diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 82e3e33ef7..8de92e7ff4 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -37,8 +37,8 @@ #include #include #include -#include #include +#include #include #endif // _WIN32 @@ -53,30 +53,30 @@ #include "baseapi.h" #include "blobclass.h" -#include "resultiterator.h" -#include "mutableiterator.h" -#include "thresholder.h" -#include "tesseractclass.h" -#include "pageres.h" -#include "paragraphs.h" -#include "tessvars.h" #include "control.h" #include "dict.h" -#include "pgedit.h" -#include "paramsd.h" -#include "output.h" -#include "globaloc.h" -#include "globals.h" #include "edgblob.h" #include "equationdetect.h" -#include "tessbox.h" +#include "globaloc.h" +#include "globals.h" #include "makerow.h" -#include "otsuthr.h" +#include "mutableiterator.h" +#include "openclwrapper.h" #include "osdetect.h" +#include "otsuthr.h" +#include "output.h" +#include "pageres.h" +#include "paragraphs.h" #include "params.h" +#include "paramsd.h" +#include "pgedit.h" #include "renderer.h" +#include "resultiterator.h" #include "strngs.h" -#include "openclwrapper.h" +#include "tessbox.h" +#include "tesseractclass.h" +#include "tessvars.h" +#include "thresholder.h" BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin"); @@ -103,43 +103,42 @@ const char* kOldVarsFile = "failed_vars.txt"; const int kMaxIntSize = 22; /* Add all available languages recursively. -*/ -static void addAvailableLanguages(const STRING &datadir, const STRING &base, - GenericVector* langs) -{ + */ +static void addAvailableLanguages(const STRING& datadir, const STRING& base, + GenericVector* langs) { const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; const size_t extlen = sizeof(kTrainedDataSuffix); #ifdef _WIN32 - WIN32_FIND_DATA data; - HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data); - if (handle != INVALID_HANDLE_VALUE) { - BOOL result = TRUE; - for (; result;) { - char *name = data.cFileName; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == - FILE_ATTRIBUTE_DIRECTORY) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } + WIN32_FIND_DATA data; + HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data); + if (handle != INVALID_HANDLE_VALUE) { + BOOL result = TRUE; + for (; result;) { + char* name = data.cFileName; + // Skip '.', '..', and hidden files + if (name[0] != '.') { + if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == + FILE_ATTRIBUTE_DIRECTORY) { + addAvailableLanguages(datadir, base2 + name, langs); + } else { + size_t len = strlen(name); + if (len > extlen && name[len - extlen] == '.' && + strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { + name[len - extlen] = '\0'; + langs->push_back(base2 + name); } } - result = FindNextFile(handle, &data); } - FindClose(handle); + result = FindNextFile(handle, &data); } + FindClose(handle); + } #else // _WIN32 DIR* dir = opendir((datadir + base).string()); if (dir != nullptr) { - dirent *de; + dirent* de; while ((de = readdir(dir))) { - char *name = de->d_name; + char* name = de->d_name; // Skip '.', '..', and hidden files if (name[0] != '.') { struct stat st; @@ -166,9 +165,9 @@ TessBaseAPI::TessBaseAPI() osd_tesseract_(nullptr), equ_detect_(nullptr), reader_(nullptr), - // Thresholder is initialized to nullptr here, but will be set before use by: - // A constructor of a derived API, SetThresholder(), or - // created implicitly when used in InternalSetImage. + // Thresholder is initialized to nullptr here, but will be set before use + // by: A constructor of a derived API, SetThresholder(), or created + // implicitly when used in InternalSetImage. thresholder_(nullptr), paragraph_models_(nullptr), block_list_(nullptr), @@ -187,16 +186,12 @@ TessBaseAPI::TessBaseAPI() image_width_(0), image_height_(0) {} -TessBaseAPI::~TessBaseAPI() { - End(); -} +TessBaseAPI::~TessBaseAPI() { End(); } /** * Returns the version identifier as a static string. Do not delete. */ -const char* TessBaseAPI::Version() { - return PACKAGE_VERSION; -} +const char* TessBaseAPI::Version() { return PACKAGE_VERSION; } /** * If compiled with OpenCL AND an available OpenCL @@ -210,7 +205,7 @@ const char* TessBaseAPI::Version() { #include "opencl_device_selection.h" #endif #endif -size_t TessBaseAPI::getOpenCLDevice(void **data) { +size_t TessBaseAPI::getOpenCLDevice(void** data) { #ifdef USE_OPENCL #if USE_DEVICE_SELECTION ds_device device = OpenclDevice::getDeviceSelection(); @@ -276,30 +271,30 @@ bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { tesseract_->params()); } -bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { - IntParam *p = ParamUtils::FindParam( +bool TessBaseAPI::GetIntVariable(const char* name, int* value) const { + IntParam* p = ParamUtils::FindParam( name, GlobalParams()->int_params, tesseract_->params()->int_params); if (p == nullptr) return false; *value = (int32_t)(*p); return true; } -bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { - BoolParam *p = ParamUtils::FindParam( +bool TessBaseAPI::GetBoolVariable(const char* name, bool* value) const { + BoolParam* p = ParamUtils::FindParam( name, GlobalParams()->bool_params, tesseract_->params()->bool_params); if (p == nullptr) return false; *value = (BOOL8)(*p); return true; } -const char *TessBaseAPI::GetStringVariable(const char *name) const { - StringParam *p = ParamUtils::FindParam( +const char* TessBaseAPI::GetStringVariable(const char* name) const { + StringParam* p = ParamUtils::FindParam( name, GlobalParams()->string_params, tesseract_->params()->string_params); return (p != nullptr) ? p->string() : nullptr; } -bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { - DoubleParam *p = ParamUtils::FindParam( +bool TessBaseAPI::GetDoubleVariable(const char* name, double* value) const { + DoubleParam* p = ParamUtils::FindParam( name, GlobalParams()->double_params, tesseract_->params()->double_params); if (p == nullptr) return false; *value = (double)(*p); @@ -307,12 +302,12 @@ bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { } /** Get value of named variable as a string, if it exists. */ -bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { +bool TessBaseAPI::GetVariableAsString(const char* name, STRING* val) { return ParamUtils::GetParamAsString(name, tesseract_->params(), val); } /** Print Tesseract parameters to the given file. */ -void TessBaseAPI::PrintVariables(FILE *fp) const { +void TessBaseAPI::PrintVariables(FILE* fp) const { ParamUtils::PrintParams(fp, tesseract_->params()); } @@ -325,9 +320,9 @@ void TessBaseAPI::PrintVariables(FILE *fp) const { * @return: 0 on success and -1 on initialization failure. */ int TessBaseAPI::Init(const char* datapath, const char* language, - OcrEngineMode oem, char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, + OcrEngineMode oem, char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, bool set_only_non_debug_params) { return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params, nullptr); @@ -388,7 +383,7 @@ int TessBaseAPI::Init(const char* data, int data_size, const char* language, *datapath_ = datapath; if ((strcmp(datapath_->string(), "") == 0) && (strcmp(tesseract_->datadir.string(), "") != 0)) - *datapath_ = tesseract_->datadir; + *datapath_ = tesseract_->datadir; if (language_ == nullptr) language_ = new STRING(language); @@ -414,8 +409,9 @@ int TessBaseAPI::Init(const char* data, int data_size, const char* language, * The returned string should NOT be deleted. */ const char* TessBaseAPI::GetInitLanguagesAsString() const { - return (language_ == nullptr || language_->string() == nullptr) ? - "" : language_->string(); + return (language_ == nullptr || language_->string() == nullptr) + ? "" + : language_->string(); } /** @@ -491,17 +487,15 @@ void TessBaseAPI::ReadDebugConfigFile(const char* filename) { * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ void TessBaseAPI::SetPageSegMode(PageSegMode mode) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; + if (tesseract_ == nullptr) tesseract_ = new Tesseract; tesseract_->tessedit_pageseg_mode.set_value(mode); } /** Return the current page segmentation mode. */ PageSegMode TessBaseAPI::GetPageSegMode() const { - if (tesseract_ == nullptr) - return PSM_SINGLE_BLOCK; + if (tesseract_ == nullptr) return PSM_SINGLE_BLOCK; return static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); + static_cast(tesseract_->tessedit_pageseg_mode)); } /** @@ -518,10 +512,8 @@ PageSegMode TessBaseAPI::GetPageSegMode() const { * as UTF8 and must be freed with the delete [] operator. */ char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, - int bytes_per_line, - int left, int top, - int width, int height) { + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height) { if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) return nullptr; // Nothing worth doing. @@ -540,8 +532,7 @@ char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, * adaptive data. */ void TessBaseAPI::ClearAdaptiveClassifier() { - if (tesseract_ == nullptr) - return; + if (tesseract_ == nullptr) return; tesseract_->ResetAdaptiveClassifier(); tesseract_->ResetDocumentDictionary(); } @@ -553,12 +544,12 @@ void TessBaseAPI::ClearAdaptiveClassifier() { * full image, so it may be followed immediately by a GetUTF8Text, and it * will automatically perform recognition. */ -void TessBaseAPI::SetImage(const unsigned char* imagedata, - int width, int height, - int bytes_per_pixel, int bytes_per_line) { +void TessBaseAPI::SetImage(const unsigned char* imagedata, int width, + int height, int bytes_per_pixel, + int bytes_per_line) { if (InternalSetImage()) { - thresholder_->SetImage(imagedata, width, height, - bytes_per_pixel, bytes_per_line); + thresholder_->SetImage(imagedata, width, height, bytes_per_pixel, + bytes_per_line); SetInputImage(thresholder_->GetPixRect()); } } @@ -591,8 +582,7 @@ void TessBaseAPI::SetImage(Pix* pix) { * can be recognized with the same image. */ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { - if (thresholder_ == nullptr) - return; + if (thresholder_ == nullptr) return; thresholder_->SetRectangle(left, top, width, height); ClearResults(); } @@ -629,8 +619,8 @@ Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { */ Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, Pixa** pixa, int** blockids, int** paraids) { - return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, - pixa, blockids, paraids); + return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, pixa, + blockids, paraids); } /** @@ -672,16 +662,13 @@ Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { * as an array of one element per component. delete [] after use. * If text_only is true, then only text components are returned. */ -Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, - bool text_only, bool raw_image, - const int raw_padding, +Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, bool text_only, + bool raw_image, const int raw_padding, Pixa** pixa, int** blockids, int** paraids) { PageIterator* page_it = GetIterator(); - if (page_it == nullptr) - page_it = AnalyseLayout(); - if (page_it == nullptr) - return nullptr; // Failed. + if (page_it == nullptr) page_it = AnalyseLayout(); + if (page_it == nullptr) return nullptr; // Failed. // Count the components to get a size for the arrays. int component_count = 0; @@ -690,37 +677,32 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, TessResultCallback* get_bbox = nullptr; if (raw_image) { // Get bounding box in original raw image with padding. - get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, - level, raw_padding, - &left, &top, &right, &bottom); + get_bbox = + NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, level, + raw_padding, &left, &top, &right, &bottom); } else { // Get bounding box from binarized imaged. Note that this could be // differently scaled from the original image. - get_bbox = NewPermanentTessCallback(page_it, - &PageIterator::BoundingBoxInternal, - level, &left, &top, &right, &bottom); + get_bbox = + NewPermanentTessCallback(page_it, &PageIterator::BoundingBoxInternal, + level, &left, &top, &right, &bottom); } do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) + if (get_bbox->Run() && (!text_only || PTIsTextType(page_it->BlockType()))) ++component_count; } while (page_it->Next(level)); Boxa* boxa = boxaCreate(component_count); - if (pixa != nullptr) - *pixa = pixaCreate(component_count); - if (blockids != nullptr) - *blockids = new int[component_count]; - if (paraids != nullptr) - *paraids = new int[component_count]; + if (pixa != nullptr) *pixa = pixaCreate(component_count); + if (blockids != nullptr) *blockids = new int[component_count]; + if (paraids != nullptr) *paraids = new int[component_count]; int blockid = 0; int paraid = 0; int component_index = 0; page_it->Begin(); do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) { + if (get_bbox->Run() && (!text_only || PTIsTextType(page_it->BlockType()))) { Box* lbox = boxCreate(left, top, right - left, bottom - top); boxaAddBox(boxa, lbox, L_INSERT); if (pixa != nullptr) { @@ -736,8 +718,7 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, } if (paraids != nullptr) { (*paraids)[component_index] = paraid; - if (page_it->IsAtFinalElement(RIL_PARA, level)) - ++paraid; + if (page_it->IsAtFinalElement(RIL_PARA, level)) ++paraid; } if (blockids != nullptr) { (*blockids)[component_index] = blockid; @@ -780,14 +761,13 @@ PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { if (FindLines() == 0) { - if (block_list_->empty()) - return nullptr; // The page was empty. + if (block_list_->empty()) return nullptr; // The page was empty. page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); DetectParagraphs(false); - return new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); + return new PageIterator(page_res_, tesseract_, + thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), rect_left_, + rect_top_, rect_width_, rect_height_); } return nullptr; } @@ -797,15 +777,13 @@ PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { * internal structures. */ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (FindLines() != 0) - return -1; + if (tesseract_ == nullptr) return -1; + if (FindLines() != 0) return -1; delete page_res_; if (block_list_->empty()) { - page_res_ = new PAGE_RES(false, block_list_, - &tesseract_->prev_word_best_choice_); - return 0; // Empty page. + page_res_ = + new PAGE_RES(false, block_list_, &tesseract_->prev_word_best_choice_); + return 0; // Empty page. } tesseract_->SetBlackAndWhitelist(); @@ -815,8 +793,8 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { } else if (tesseract_->tessedit_resegment_from_boxes) { page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); } else { - page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), - block_list_, &tesseract_->prev_word_best_choice_); + page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), block_list_, + &tesseract_->prev_word_best_choice_); } if (page_res_ == nullptr) { return -1; @@ -833,20 +811,20 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { if (truth_cb_ != nullptr) { tesseract_->wordrec_run_blamer.set_value(true); - PageIterator *page_it = new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - truth_cb_->Run(tesseract_->getDict().getUnicharset(), - image_height_, page_it, this->tesseract()->pix_grey()); + PageIterator* page_it = + new PageIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), rect_left_, + rect_top_, rect_width_, rect_height_); + truth_cb_->Run(tesseract_->getDict().getUnicharset(), image_height_, + page_it, this->tesseract()->pix_grey()); delete page_it; } int result = 0; if (tesseract_->interactive_display_mode) { - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED // The page_res is invalid after an interactive session, so cleanup // in a way that lets us continue to the next page without crashing. delete page_res_; @@ -857,10 +835,10 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { ExtractFontName(*output_file_, &fontname); tesseract_->ApplyBoxTraining(fontname, page_res_); } else if (tesseract_->tessedit_ambigs_training) { - FILE *training_output_file = tesseract_->init_recog_training(*input_file_); + FILE* training_output_file = tesseract_->init_recog_training(*input_file_); // OCR the page segmented into words by tesseract. - tesseract_->recog_training_segmented( - *input_file_, page_res_, monitor, training_output_file); + tesseract_->recog_training_segmented(*input_file_, page_res_, monitor, + training_output_file); fclose(training_output_file); } else { // Now run the main recognition. @@ -878,28 +856,25 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { /** Tests the chopper by exhaustively running chop_one_blob. */ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; + if (tesseract_ == nullptr) return -1; if (thresholder_ == nullptr || thresholder_->IsEmpty()) { tprintf("Please call SetImage before attempting recognition.\n"); return -1; } - if (page_res_ != nullptr) - ClearResults(); - if (FindLines() != 0) - return -1; + if (page_res_ != nullptr) ClearResults(); + if (FindLines() != 0) return -1; // Additional conditions under which chopper test cannot be run if (tesseract_->interactive_display_mode) return -1; recognition_done_ = true; - page_res_ = new PAGE_RES(false, block_list_, - &(tesseract_->prev_word_best_choice_)); + page_res_ = + new PAGE_RES(false, block_list_, &(tesseract_->prev_word_best_choice_)); PAGE_RES_IT page_res_it(page_res_); while (page_res_it.word() != nullptr) { - WERD_RES *word_res = page_res_it.word(); + WERD_RES* word_res = page_res_it.word(); GenericVector boxes; tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, page_res_it.row()->row, word_res); @@ -913,15 +888,12 @@ void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); } Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } -const char * TessBaseAPI::GetInputName() { - if (input_file_) - return input_file_->c_str(); +const char* TessBaseAPI::GetInputName() { + if (input_file_) return input_file_->c_str(); return nullptr; } -const char * TessBaseAPI::GetDatapath() { - return tesseract_->datadir.c_str(); -} +const char* TessBaseAPI::GetDatapath() { return tesseract_->datadir.c_str(); } int TessBaseAPI::GetSourceYResolution() { return thresholder_->GetSourceYResolution(); @@ -931,8 +903,7 @@ int TessBaseAPI::GetSourceYResolution() { // Seems convoluted, but is the easiest way I know of to meet multiple // goals. Support streaming from stdin, and also work on platforms // lacking fmemopen. -bool TessBaseAPI::ProcessPagesFileList(FILE *flist, - STRING *buf, +bool TessBaseAPI::ProcessPagesFileList(FILE* flist, STRING* buf, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer, @@ -968,14 +939,14 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); } chomp_string(pagename); - Pix *pix = pixRead(pagename); + Pix* pix = pixRead(pagename); if (pix == nullptr) { tprintf("Image file %s cannot be read!\n", pagename); return false; } tprintf("Page %d : %s\n", page, pagename); - bool r = ProcessPage(pix, page, pagename, retry_config, - timeout_millisec, renderer); + bool r = ProcessPage(pix, page, pagename, retry_config, timeout_millisec, + renderer); pixDestroy(&pix); if (!r) return false; if (tessedit_page_number >= 0) break; @@ -989,20 +960,18 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, return true; } -bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, - size_t size, +bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8* data, size_t size, const char* filename, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number) { #ifndef ANDROID_BUILD - Pix *pix = nullptr; + Pix* pix = nullptr; int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; size_t offset = 0; - for (; ; ++page) { - if (tessedit_page_number >= 0) - page = tessedit_page_number; + for (;; ++page) { + if (tessedit_page_number >= 0) page = tessedit_page_number; pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) : pixReadFromMultipageTiff(filename, &offset); if (pix == nullptr) break; @@ -1010,8 +979,8 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, char page_str[kMaxIntSize]; snprintf(page_str, kMaxIntSize - 1, "%d", page); SetVariable("applybox_page", page_str); - bool r = ProcessPage(pix, page, filename, retry_config, - timeout_millisec, renderer); + bool r = ProcessPage(pix, page, filename, retry_config, timeout_millisec, + renderer); pixDestroy(&pix); if (!r) return false; if (tessedit_page_number >= 0) break; @@ -1065,27 +1034,25 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename, } if (stream_filelist) { - return ProcessPagesFileList(stdin, nullptr, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); + return ProcessPagesFileList(stdin, nullptr, retry_config, timeout_millisec, + renderer, tesseract_->tessedit_page_number); } // At this point we are officially in autodection territory. // That means any data in stdin must be buffered, to make it // seekable. std::string buf; - const l_uint8 *data = nullptr; + const l_uint8* data = nullptr; if (stdInput) { buf.assign((std::istreambuf_iterator(std::cin)), (std::istreambuf_iterator())); - data = reinterpret_cast(buf.data()); + data = reinterpret_cast(buf.data()); } // Here is our autodetection int format; - int r = (stdInput) ? - findFileFormatBuffer(data, &format) : - findFileFormat(filename, &format); + int r = (stdInput) ? findFileFormatBuffer(data, &format) + : findFileFormat(filename, &format); // Maybe we have a filelist if (r != 0 || format == IFF_UNKNOWN) { @@ -1098,9 +1065,8 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename, std::istreambuf_iterator()); s = u.c_str(); } - return ProcessPagesFileList(nullptr, &s, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); + return ProcessPagesFileList(nullptr, &s, retry_config, timeout_millisec, + renderer, tesseract_->tessedit_page_number); } // Maybe we have a TIFF which is potentially multipage @@ -1110,7 +1076,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename, format == IFF_TIFF_ZIP); // Fail early if we can, before producing any output - Pix *pix = nullptr; + Pix* pix = nullptr; if (!tiff) { pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename); if (pix == nullptr) { @@ -1125,12 +1091,11 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename, } // Produce output - r = (tiff) ? - ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number) : - ProcessPage(pix, 0, filename, retry_config, - timeout_millisec, renderer); + r = (tiff) ? ProcessPagesMultipageTiff( + data, buf.size(), filename, retry_config, timeout_millisec, + renderer, tesseract_->tessedit_page_number) + : ProcessPage(pix, 0, filename, retry_config, timeout_millisec, + renderer); // Clean up memory as needed pixDestroy(&pix); @@ -1209,12 +1174,11 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, * Recognize. The returned iterator must be deleted after use. */ LTRResultIterator* TessBaseAPI::GetLTRIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); + if (tesseract_ == nullptr || page_res_ == nullptr) return nullptr; + return new LTRResultIterator(page_res_, tesseract_, + thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), rect_left_, + rect_top_, rect_width_, rect_height_); } /** @@ -1226,12 +1190,11 @@ LTRResultIterator* TessBaseAPI::GetLTRIterator() { * DetectOS, or anything else that changes the internal PAGE_RES. */ ResultIterator* TessBaseAPI::GetIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return ResultIterator::StartOfParagraph(LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_)); + if (tesseract_ == nullptr || page_res_ == nullptr) return nullptr; + return ResultIterator::StartOfParagraph( + LTRResultIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), rect_left_, + rect_top_, rect_width_, rect_height_)); } /** @@ -1243,21 +1206,19 @@ ResultIterator* TessBaseAPI::GetIterator() { * DetectOS, or anything else that changes the internal PAGE_RES. */ MutableIterator* TessBaseAPI::GetMutableIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; + if (tesseract_ == nullptr || page_res_ == nullptr) return nullptr; return new MutableIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); + thresholder_->GetScaledYResolution(), rect_left_, + rect_top_, rect_width_, rect_height_); } /** Make a text string from the internal data structures. */ char* TessBaseAPI::GetUTF8Text() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) + if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) return nullptr; STRING text(""); - ResultIterator *it = GetIterator(); + ResultIterator* it = GetIterator(); do { if (it->Empty(RIL_PARA)) continue; const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); @@ -1272,7 +1233,7 @@ char* TessBaseAPI::GetUTF8Text() { /** * Gets the block orientation at the current iterator position. */ -static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { +static tesseract::Orientation GetBlockTextOrientation(const PageIterator* it) { tesseract::Orientation orientation; tesseract::WritingDirection writing_direction; tesseract::TextlineOrder textline_order; @@ -1290,9 +1251,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { * method currently only inserts a 'textangle' property to indicate the rotation * direction and does not add any baseline information to the hocr string. */ -static void AddBaselineCoordsTohOCR(const PageIterator *it, - PageIteratorLevel level, - STRING* hocr_str) { +static void AddBaselineCoordsTohOCR(const PageIterator* it, + PageIteratorLevel level, STRING* hocr_str) { tesseract::Orientation orientation = GetBlockTextOrientation(it); if (orientation != ORIENTATION_PAGE_UP) { hocr_str->add_str_int("; textangle ", 360 - orientation * 90); @@ -1304,8 +1264,7 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, // Try to get the baseline coordinates at this level. int x1, y1, x2, y2; - if (!it->Baseline(level, &x1, &y1, &x2, &y2)) - return; + if (!it->Baseline(level, &x1, &y1, &x2, &y2)) return; // Following the description of this field of the hOCR spec, we convert the // baseline coordinates so that "the bottom left of the bounding box is the // origin". @@ -1413,21 +1372,20 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { STRING hocr_str(""); - if (input_file_ == nullptr) - SetInputName(nullptr); + if (input_file_ == nullptr) SetInputName(nullptr); #ifdef _WIN32 // convert input name from ANSI encoding to utf-8 int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); - wchar_t *uni16_str = new WCHAR[str16_len]; + wchar_t* uni16_str = new WCHAR[str16_len]; str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, uni16_str, str16_len); - int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, - nullptr, nullptr); - char *utf8_str = new char[utf8_len]; - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, - utf8_len, nullptr, nullptr); + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, + 0, nullptr, nullptr); + char* utf8_str = new char[utf8_len]; + WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, + nullptr, nullptr); *input_file_ = utf8_str; delete[] uni16_str; delete[] utf8_str; @@ -1448,7 +1406,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { hocr_str.add_str_int("; ppageno ", page_number); hocr_str += "'>\n"; - ResultIterator *res_it = GetIterator(); + ResultIterator* res_it = GetIterator(); while (!res_it->Empty(RIL_BLOCK)) { if (res_it->Empty(RIL_WORD)) { res_it->Next(RIL_WORD); @@ -1489,11 +1447,11 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { int left, top, right, bottom; bool bold, italic, underlined, monospace, serif, smallcaps; int pointsize, font_id; - const char *font_name; + const char* font_name; res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); + font_name = + res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, + &serif, &smallcaps, &pointsize, &font_id); hocr_str.add_str_int(" title='bbox ", left); hocr_str.add_str_int(" ", top); hocr_str.add_str_int(" ", right); @@ -1561,7 +1519,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { } hocr_str += " \n"; - char *ret = new char[hocr_str.length() + 1]; + char* ret = new char[hocr_str.length() + 1]; strcpy(ret, hocr_str.string()); delete res_it; return ret; @@ -1691,8 +1649,8 @@ const int kBytesPer64BitNumber = 20; * space plus the newline and the maximum length of a UNICHAR. * Test against this on each iteration for safety. */ -const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + - UNICHAR_LEN; +const int kMaxBytesPerLine = + kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + UNICHAR_LEN; /** * The recognized text is returned as a char* which is coded @@ -1701,13 +1659,12 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetBoxText(int page_number) { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) + if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) return nullptr; int blob_count; int utf8_length = TextLength(&blob_count); - int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + - kMaxBytesPerLine; + int total_length = + blob_count * kBytesPerBoxFileLine + utf8_length + kMaxBytesPerLine; char* result = new char[total_length]; result[0] = '\0'; int output_length = 0; @@ -1720,16 +1677,14 @@ char* TessBaseAPI::GetBoxText(int page_number) { // Tesseract uses space for recognition failure. Fix to a reject // character, kTesseractReject so we don't create illegal box files. for (int i = 0; text[i] != '\0'; ++i) { - if (text[i] == ' ') - text[i] = kTesseractReject; + if (text[i] == ' ') text[i] = kTesseractReject; } snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number); output_length += strlen(result + output_length); // Just in case... - if (output_length + kMaxBytesPerLine > total_length) - break; + if (output_length + kMaxBytesPerLine > total_length) break; } } while (it->Next(RIL_SYMBOL)); delete it; @@ -1741,13 +1696,11 @@ char* TessBaseAPI::GetBoxText(int page_number) { * Maps characters out of the latin set into the latin set. * TODO(rays) incorporate this translation into unicharset. */ -const int kUniChs[] = { - 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 -}; +const int kUniChs[] = {0x20ac, 0x201c, 0x201d, 0x2018, + 0x2019, 0x2022, 0x2014, 0}; /** Latin chars corresponding to the unicode chars above. */ -const int kLatinChs[] = { - 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 -}; +const int kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, + 0x0027, 0x00b7, 0x002d, 0}; /** * The recognized text is returned as a char* which is coded @@ -1755,32 +1708,28 @@ const int kLatinChs[] = { * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetUNLVText() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) + if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) return nullptr; bool tilde_crunch_written = false; bool last_char_was_newline = true; bool last_char_was_tilde = false; int total_length = TextLength(nullptr); - PAGE_RES_IT page_res_it(page_res_); + PAGE_RES_IT page_res_it(page_res_); char* result = new char[total_length]; char* ptr = result; - for (page_res_it.restart_page(); page_res_it.word () != nullptr; + for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); + WERD_RES* word = page_res_it.word(); // Process the current word. if (word->unlv_crunch_mode != CR_NONE) { if (word->unlv_crunch_mode != CR_DELETE && (!tilde_crunch_written || (word->unlv_crunch_mode == CR_KEEP_SPACE && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && + word->word->space() > 0 && !word->word->flag(W_FUZZY_NON) && !word->word->flag(W_FUZZY_SP)))) { - if (!word->word->flag(W_BOL) && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)) { + if (!word->word->flag(W_BOL) && word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && !word->word->flag(W_FUZZY_SP)) { /* Write a space to separate from preceding good text */ *ptr++ = ' '; last_char_was_tilde = false; @@ -1803,8 +1752,8 @@ char* TessBaseAPI::GetUNLVText() { int i = 0; int offset = 0; - if (last_char_was_tilde && - word->word->space() == 0 && wordstr[offset] == ' ') { + if (last_char_was_tilde && word->word->space() == 0 && + wordstr[offset] == ' ') { // Prevent adjacent tilde across words - we know that adjacent tildes // within words have been removed. // Skip the first character. @@ -1816,13 +1765,11 @@ char* TessBaseAPI::GetUNLVText() { else last_char_was_newline = false; for (; i < length; offset += lengths[i++]) { - if (wordstr[offset] == ' ' || - wordstr[offset] == kTesseractReject) { + if (wordstr[offset] == ' ' || wordstr[offset] == kTesseractReject) { *ptr++ = kUNLVReject; last_char_was_tilde = true; } else { - if (word->reject_map[i].rejected()) - *ptr++ = kUNLVSuspect; + if (word->reject_map[i].rejected()) *ptr++ = kUNLVSuspect; UNICHAR ch(wordstr + offset, lengths[i]); int uni_ch = ch.first_uni(); for (int j = 0; kUniChs[j] != 0; ++j) { @@ -1928,30 +1875,29 @@ int TessBaseAPI::MeanTextConf() { int* conf = AllWordConfidences(); if (!conf) return 0; int sum = 0; - int *pt = conf; + int* pt = conf; while (*pt >= 0) sum += *pt++; if (pt != conf) sum /= pt - conf; - delete [] conf; + delete[] conf; return sum; } /** Returns an array of all word confidences, terminated by -1. */ int* TessBaseAPI::AllWordConfidences() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) + if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) return nullptr; int n_word = 0; PAGE_RES_IT res_it(page_res_); for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) n_word++; - int* conf = new int[n_word+1]; + int* conf = new int[n_word + 1]; n_word = 0; for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { - WERD_RES *word = res_it.word(); + WERD_RES* word = res_it.word(); WERD_CHOICE* choice = word->best_choice; int w_conf = static_cast(100 + 5 * choice->certainty()); - // This is the eq for converting Tesseract confidence to 1..100 + // This is the eq for converting Tesseract confidence to 1..100 if (w_conf < 0) w_conf = 0; if (w_conf > 100) w_conf = 100; conf[n_word++] = w_conf; @@ -1993,11 +1939,9 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { int w = 0; int t = 0; for (t = 0; text[t] != '\0'; ++t) { - if (text[t] == '\n' || text[t] == ' ') - continue; + if (text[t] == '\n' || text[t] == ' ') continue; while (wordstr[w] == ' ') ++w; - if (text[t] != wordstr[w]) - break; + if (text[t] != wordstr[w]) break; ++w; } if (text[t] != '\0' || wordstr[w] != '\0') { @@ -2033,8 +1977,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { * any Recognize or Get* operation. */ void TessBaseAPI::Clear() { - if (thresholder_ != nullptr) - thresholder_->Clear(); + if (thresholder_ != nullptr) thresholder_->Clear(); ClearResults(); if (tesseract_ != nullptr) SetInputImage(nullptr); } @@ -2088,15 +2031,14 @@ void TessBaseAPI::ClearPersistentCache() { * Check whether a word is valid according to Tesseract's language model * returns 0 if the word is invalid, non-zero if valid */ -int TessBaseAPI::IsValidWord(const char *word) { +int TessBaseAPI::IsValidWord(const char* word) { return tesseract_->getDict().valid_word(word); } // Returns true if utf8_character is defined in the UniCharset. -bool TessBaseAPI::IsValidCharacter(const char *utf8_character) { - return tesseract_->unicharset.contains_unichar(utf8_character); +bool TessBaseAPI::IsValidCharacter(const char* utf8_character) { + return tesseract_->unicharset.contains_unichar(utf8_character); } - // TODO(rays) Obsolete this function and replace with a more aptly named // function that returns image coordinates rather than tesseract coordinates. bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { @@ -2170,8 +2112,7 @@ bool TessBaseAPI::InternalSetImage() { tprintf("Please call Init before attempting to set an image.\n"); return false; } - if (thresholder_ == nullptr) - thresholder_ = new ImageThresholder; + if (thresholder_ == nullptr) thresholder_ = new ImageThresholder; ClearResults(); return true; } @@ -2184,8 +2125,7 @@ bool TessBaseAPI::InternalSetImage() { */ bool TessBaseAPI::Threshold(Pix** pix) { ASSERT_HOST(pix != nullptr); - if (*pix != nullptr) - pixDestroy(pix); + if (*pix != nullptr) pixDestroy(pix); // Zero resolution messes up the algorithms, so make sure it is credible. int y_res = thresholder_->GetScaledYResolution(); if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) { @@ -2195,13 +2135,11 @@ bool TessBaseAPI::Threshold(Pix** pix) { kMinCredibleResolution); thresholder_->SetSourceYResolution(kMinCredibleResolution); } - PageSegMode pageseg_mode = - static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); + PageSegMode pageseg_mode = static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; - thresholder_->GetImageSizes(&rect_left_, &rect_top_, - &rect_width_, &rect_height_, - &image_width_, &image_height_); + thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, + &rect_height_, &image_width_, &image_height_); if (!thresholder_->IsBinary()) { tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); @@ -2213,9 +2151,9 @@ bool TessBaseAPI::Threshold(Pix** pix) { // estimated resolution, rather than the image resolution, which may be // fabricated, but we will use the image resolution, if there is one, to // report output point sizes. - int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), - kMinCredibleResolution, - kMaxCredibleResolution); + int estimated_res = + ClipToRange(thresholder_->GetScaledEstimatedResolution(), + kMinCredibleResolution, kMaxCredibleResolution); if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { tprintf("Estimated resolution %d out of range! Corrected to %d\n", thresholder_->GetScaledEstimatedResolution(), estimated_res); @@ -2231,8 +2169,7 @@ int TessBaseAPI::FindLines() { tprintf("Please call SetImage before attempting recognition.\n"); return -1; } - if (recognition_done_) - ClearResults(); + if (recognition_done_) ClearResults(); if (!block_list_->empty()) { return 0; } @@ -2268,20 +2205,21 @@ int TessBaseAPI::FindLines() { osd_tesseract_ = new Tesseract; TessdataManager mgr(reader_); if (datapath_ == nullptr) { - tprintf("Warning: Auto orientation and script detection requested," - " but data path is undefined\n"); + tprintf( + "Warning: Auto orientation and script detection requested," + " but data path is undefined\n"); delete osd_tesseract_; osd_tesseract_ = nullptr; - } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, - "osd", OEM_TESSERACT_ONLY, - nullptr, 0, nullptr, nullptr, - false, &mgr) == 0) { + } else if (osd_tesseract_->init_tesseract( + datapath_->string(), nullptr, "osd", OEM_TESSERACT_ONLY, + nullptr, 0, nullptr, nullptr, false, &mgr) == 0) { osd_tess = osd_tesseract_; osd_tesseract_->set_source_resolution( thresholder_->GetSourceYResolution()); } else { - tprintf("Warning: Auto orientation and script detection requested," - " but osd language failed to load\n"); + tprintf( + "Warning: Auto orientation and script detection requested," + " but osd language failed to load\n"); delete osd_tesseract_; osd_tesseract_ = nullptr; } @@ -2324,28 +2262,25 @@ void TessBaseAPI::ClearResults() { * Also return the number of recognized blobs in blob_count. */ int TessBaseAPI::TextLength(int* blob_count) { - if (tesseract_ == nullptr || page_res_ == nullptr) - return 0; + if (tesseract_ == nullptr || page_res_ == nullptr) return 0; - PAGE_RES_IT page_res_it(page_res_); + PAGE_RES_IT page_res_it(page_res_); int total_length = 2; int total_blobs = 0; // Iterate over the data structures to extract the recognition result. - for (page_res_it.restart_page(); page_res_it.word () != nullptr; + for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); + WERD_RES* word = page_res_it.word(); WERD_CHOICE* choice = word->best_choice; if (choice != nullptr) { total_blobs += choice->length() + 2; total_length += choice->unichar_string().length() + 2; for (int i = 0; i < word->reject_map.length(); ++i) { - if (word->reject_map[i].rejected()) - ++total_length; + if (word->reject_map[i].rejected()) ++total_length; } } } - if (blob_count != nullptr) - *blob_count = total_blobs; + if (blob_count != nullptr) *blob_count = total_blobs; return total_length; } @@ -2354,15 +2289,13 @@ int TessBaseAPI::TextLength(int* blob_count) { * Returns true if the image was processed successfully. */ bool TessBaseAPI::DetectOS(OSResults* osr) { - if (tesseract_ == nullptr) - return false; + if (tesseract_ == nullptr) return false; ClearResults(); if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) { return false; } - if (input_file_ == nullptr) - input_file_ = new STRING(kInputFile); + if (input_file_ == nullptr) input_file_ = new STRING(kInputFile); return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; } @@ -2386,9 +2319,9 @@ void TessBaseAPI::set_min_orientation_margin(double margin) { */ void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, bool** vertical_writing) { - delete[] *block_orientation; + delete[] * block_orientation; *block_orientation = nullptr; - delete[] *vertical_writing; + delete[] * vertical_writing; *vertical_writing = nullptr; BLOCK_IT block_it(block_list_); @@ -2408,8 +2341,7 @@ void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, *vertical_writing = new bool[num_blocks]; block_it.move_to_first(); int i = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { if (!block_it.data()->pdblk.poly_block()->IsText()) { continue; } @@ -2417,7 +2349,7 @@ void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, float re_theta = re_rotation.angle(); FCOORD classify_rotation = block_it.data()->classify_rotation(); float classify_theta = classify_rotation.angle(); - double rot_theta = - (re_theta - classify_theta) * 2.0 / PI; + double rot_theta = -(re_theta - classify_theta) * 2.0 / PI; if (rot_theta < 0) rot_theta += 4; int num_rotations = static_cast(rot_theta + 0.5); (*block_orientation)[i] = num_rotations; @@ -2444,29 +2376,18 @@ BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { * This is to keep BLOCK_LIST pointer opaque * and let go of including the other headers. */ -void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { - delete block_list; -} +void TessBaseAPI::DeleteBlockList(BLOCK_LIST* block_list) { delete block_list; } - -ROW *TessBaseAPI::MakeTessOCRRow(float baseline, - float xheight, - float descender, +ROW* TessBaseAPI::MakeTessOCRRow(float baseline, float xheight, float descender, float ascender) { int32_t xstarts[] = {-32000}; double quad_coeffs[] = {0, 0, baseline}; - return new ROW(1, - xstarts, - quad_coeffs, - xheight, - ascender - (baseline + xheight), - descender - baseline, - 0, - 0); + return new ROW(1, xstarts, quad_coeffs, xheight, + ascender - (baseline + xheight), descender - baseline, 0, 0); } /** Creates a TBLOB* from the whole pix. */ -TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { +TBLOB* TessBaseAPI::MakeTBLOB(Pix* pix) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); @@ -2475,17 +2396,14 @@ TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { extract_edges(pix, &block); // Merge all C_BLOBs - C_BLOB_LIST *list = block.blob_list(); + C_BLOB_LIST* list = block.blob_list(); C_BLOB_IT c_blob_it(list); - if (c_blob_it.empty()) - return nullptr; + if (c_blob_it.empty()) return nullptr; // Move all the outlines to the first blob. C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); - for (c_blob_it.forward(); - !c_blob_it.at_first(); - c_blob_it.forward()) { - C_BLOB *c_blob = c_blob_it.data(); - ol_it.add_list_after(c_blob->out_list()); + for (c_blob_it.forward(); !c_blob_it.at_first(); c_blob_it.forward()) { + C_BLOB* c_blob = c_blob_it.data(); + ol_it.add_list_after(c_blob->out_list()); } // Convert the first blob to the output TBLOB. return TBLOB::PolygonalCopy(false, c_blob_it.data()); @@ -2496,26 +2414,26 @@ TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { * for normalization. The denorm is an optional parameter in which the * normalization-antidote is returned. */ -void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { +void TessBaseAPI::NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode) { TBOX box = tblob->bounding_box(); float x_center = (box.left() + box.right()) / 2.0f; float baseline = row->base_line(x_center); float scale = kBlnXHeight / row->x_height(); tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, - 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); + 0.0f, static_cast(kBlnBaselineOffset), false, + nullptr); } /** * Return a TBLOB * from the whole pix. * To be freed later with delete. */ -TBLOB *make_tesseract_blob(float baseline, float xheight, - float descender, float ascender, - bool numeric_mode, Pix* pix) { - TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); +TBLOB* make_tesseract_blob(float baseline, float xheight, float descender, + float ascender, bool numeric_mode, Pix* pix) { + TBLOB* tblob = TessBaseAPI::MakeTBLOB(pix); // Normalize TBLOB - ROW *row = + ROW* row = TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); delete row; @@ -2527,20 +2445,16 @@ TBLOB *make_tesseract_blob(float baseline, float xheight, * The image must be preloaded into pix_binary_ and be just an image * of a single character. */ -void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender) { +void TessBaseAPI::AdaptToCharacter(const char* unichar_repr, int length, + float baseline, float xheight, + float descender, float ascender) { UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); - TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, + TBLOB* blob = make_tesseract_blob(baseline, xheight, descender, ascender, tesseract_->classify_bln_numeric_mode, tesseract_->pix_binary()); float threshold; float best_rating = -100; - // Classify to get a raw choice. BLOB_CHOICE_LIST choices; tesseract_->AdaptiveClassifier(blob, &choices); @@ -2561,10 +2475,9 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, delete blob; } - PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { - PAGE_RES *page_res = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); + PAGE_RES* page_res = + new PAGE_RES(false, block_list, &(tesseract_->prev_word_best_choice_)); tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); return page_res; } @@ -2572,8 +2485,8 @@ PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result) { if (!pass1_result) - pass1_result = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); + pass1_result = + new PAGE_RES(false, block_list, &(tesseract_->prev_word_best_choice_)); tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); return pass1_result; } @@ -2583,9 +2496,9 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { GetIntVariable("paragraph_debug_level", &debug_level); if (paragraph_models_ == nullptr) paragraph_models_ = new GenericVector; - MutableIterator *result_it = GetMutableIterator(); + MutableIterator* result_it = GetMutableIterator(); do { // Detect paragraphs for this block - GenericVector models; + GenericVector models; ::tesseract::DetectParagraphs(debug_level, after_text_recognition, result_it, &models); *paragraph_models_ += models; @@ -2594,12 +2507,12 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { } struct TESS_CHAR : ELIST_LINK { - char *unicode_repr; + char* unicode_repr; int length; // of unicode_repr float cost; TBOX box; - TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { + TESS_CHAR(float _cost, const char* repr, int len = -1) : cost(_cost) { length = (len == -1 ? strlen(repr) : len); unicode_repr = new char[length + 1]; strncpy(unicode_repr, repr, length); @@ -2607,20 +2520,17 @@ struct TESS_CHAR : ELIST_LINK { TESS_CHAR() { // Satisfies ELISTIZE. } - ~TESS_CHAR() { - delete [] unicode_repr; - } + ~TESS_CHAR() { delete[] unicode_repr; } }; ELISTIZEH(TESS_CHAR) ELISTIZE(TESS_CHAR) static void add_space(TESS_CHAR_IT* it) { - TESS_CHAR *t = new TESS_CHAR(0, " "); + TESS_CHAR* t = new TESS_CHAR(0, " "); it->add_after_then_move(t); } - static float rating_to_cost(float rating) { rating = 100 + rating; // cuddled that to save from coverage profiler @@ -2634,25 +2544,23 @@ static float rating_to_cost(float rating) { * Extract the OCR results, costs (penalty points for uncertainty), * and the bounding boxes of the characters. */ -static void extract_result(TESS_CHAR_IT* out, - PAGE_RES* page_res) { +static void extract_result(TESS_CHAR_IT* out, PAGE_RES* page_res) { PAGE_RES_IT page_res_it(page_res); int word_count = 0; while (page_res_it.word() != nullptr) { - WERD_RES *word = page_res_it.word(); - const char *str = word->best_choice->unichar_string().string(); - const char *len = word->best_choice->unichar_lengths().string(); + WERD_RES* word = page_res_it.word(); + const char* str = word->best_choice->unichar_string().string(); + const char* len = word->best_choice->unichar_lengths().string(); TBOX real_rect = word->word->bounding_box(); - if (word_count) - add_space(out); + if (word_count) add_space(out); int n = strlen(len); for (int i = 0; i < n; i++) { - TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), - str, *len); + TESS_CHAR* tc = + new TESS_CHAR(rating_to_cost(word->best_choice->rating()), str, *len); tc->box = real_rect.intersection(word->box_word->BlobBox(i)); out->add_after_then_move(tc); - str += *len; + str += *len; len++; } page_res_it.forward(); @@ -2664,13 +2572,9 @@ static void extract_result(TESS_CHAR_IT* out, * Extract the OCR results, costs (penalty points for uncertainty), * and the bounding boxes of the characters. */ -int TessBaseAPI::TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, +int TessBaseAPI::TesseractExtractResult(char** text, int** lengths, + float** costs, int** x0, int** y0, + int** x1, int** y1, PAGE_RES* page_res) { TESS_CHAR_LIST tess_chars; TESS_CHAR_IT tess_chars_it(&tess_chars); @@ -2685,10 +2589,9 @@ int TessBaseAPI::TesseractExtractResult(char** text, *x1 = new int[n]; *y1 = new int[n]; int i = 0; - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); + for (tess_chars_it.mark_cycle_pt(); !tess_chars_it.cycled_list(); tess_chars_it.forward(), i++) { - TESS_CHAR *tc = tess_chars_it.data(); + TESS_CHAR* tc = tess_chars_it.data(); text_len += (*lengths)[i] = tc->length; (*costs)[i] = tc->cost; (*x0)[i] = tc->box.left(); @@ -2696,13 +2599,12 @@ int TessBaseAPI::TesseractExtractResult(char** text, (*x1)[i] = tc->box.right(); (*y1)[i] = tc->box.top(); } - char *p = *text = new char[text_len]; + char* p = *text = new char[text_len]; tess_chars_it.move_to_first(); - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); + for (tess_chars_it.mark_cycle_pt(); !tess_chars_it.cycled_list(); tess_chars_it.forward()) { - TESS_CHAR *tc = tess_chars_it.data(); + TESS_CHAR* tc = tess_chars_it.data(); strncpy(p, tc->unicode_repr, tc->length); p += tc->length; } @@ -2724,8 +2626,8 @@ void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, GenericVector bl_features; GenericVector cn_features; INT_FX_RESULT_STRUCT fx_info; - tesseract_->ExtractFeatures(*blob, false, &bl_features, - &cn_features, &fx_info, &outline_counts); + tesseract_->ExtractFeatures(*blob, false, &bl_features, &cn_features, + &fx_info, &outline_counts); if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { *num_features = 0; return; // Feature extraction failed. @@ -2736,32 +2638,28 @@ void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, if (feature_outline_index != nullptr) { int f = 0; for (int i = 0; i < outline_counts.size(); ++i) { - while (f < outline_counts[i]) - feature_outline_index[f++] = i; + while (f < outline_counts[i]) feature_outline_index[f++] = i; } } } // This method returns the row to which a box of specified dimensions would // belong. If no good match is found, it returns nullptr. -ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, - int left, int top, int right, int bottom) { +ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom) { TBOX box(left, bottom, right, top); BLOCK_IT b_it(blocks); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOCK* block = b_it.data(); - if (!box.major_overlap(block->pdblk.bounding_box())) - continue; + if (!box.major_overlap(block->pdblk.bounding_box())) continue; ROW_IT r_it(block->row_list()); for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { ROW* row = r_it.data(); - if (!box.major_overlap(row->bounding_box())) - continue; + if (!box.major_overlap(row->bounding_box())) continue; WERD_IT w_it(row->word_list()); for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { WERD* word = w_it.data(); - if (box.major_overlap(word->bounding_box())) - return row; + if (box.major_overlap(word->bounding_box())) return row; } } } @@ -2769,10 +2667,8 @@ ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, } /** Method to run adaptive classifier on a blob. */ -void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, +void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, + int* unichar_ids, float* ratings, int* num_matches_returned) { BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; tesseract_->AdaptiveClassifier(blob, choices); @@ -2797,7 +2693,7 @@ const char* TessBaseAPI::GetUnichar(int unichar_id) { } /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ -const Dawg *TessBaseAPI::GetDawg(int i) const { +const Dawg* TessBaseAPI::GetDawg(int i) const { if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; return tesseract_->getDict().GetDawg(i); } @@ -2808,17 +2704,29 @@ int TessBaseAPI::NumDawgs() const { } /** Escape a char string - remove <>&"' with HTML codes. */ -STRING HOcrEscape(const char* text) { +STRING +HOcrEscape(const char* text) { STRING ret; - const char *ptr; + const char* ptr; for (ptr = text; *ptr; ptr++) { switch (*ptr) { - case '<': ret += "<"; break; - case '>': ret += ">"; break; - case '&': ret += "&"; break; - case '"': ret += """; break; - case '\'': ret += "'"; break; - default: ret += *ptr; + case '<': + ret += "<"; + break; + case '>': + ret += ">"; + break; + case '&': + ret += "&"; + break; + case '"': + ret += """; + break; + case '\'': + ret += "'"; + break; + default: + ret += *ptr; } } return ret; diff --git a/src/api/baseapi.h b/src/api/baseapi.h index 029fbc64f0..ba57c698c0 100644 --- a/src/api/baseapi.h +++ b/src/api/baseapi.h @@ -24,18 +24,19 @@ // To avoid collision with other typenames include the ABSOLUTE MINIMUM // complexity of includes here. Use forward declarations wherever possible // and hide includes of complex types in baseapi.cpp. -#include "tess_version.h" #include "apitypes.h" #include "pageiterator.h" #include "platform.h" #include "publictypes.h" #include "resultiterator.h" #include "serialis.h" +#include "tess_version.h" #include "tesscallback.h" #include "thresholder.h" #include "unichar.h" -template class GenericVector; +template +class GenericVector; class PAGE_RES; class PAGE_RES_IT; class ParagraphModel; @@ -57,7 +58,7 @@ class UNICHARSET; class WERD_CHOICE_LIST; struct INT_FEATURE_STRUCT; -typedef INT_FEATURE_STRUCT *INT_FEATURE; +typedef INT_FEATURE_STRUCT* INT_FEATURE; struct TBLOB; namespace tesseract { @@ -74,20 +75,19 @@ class Tesseract; class Trie; class Wordrec; -typedef int (Dict::*DictFunc)(void* void_dawg_args, - UNICHAR_ID unichar_id, bool word_end) const; +typedef int (Dict::*DictFunc)(void* void_dawg_args, UNICHAR_ID unichar_id, + bool word_end) const; typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, const char* context, int context_bytes, const char* character, int character_bytes); -typedef float (Dict::*ParamsModelClassifyFunc)( - const char *lang, void *path); -typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, - const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, - BlamerBundle *blamer_bundle); -typedef TessCallback4 +typedef float (Dict::*ParamsModelClassifyFunc)(const char* lang, void* path); +typedef void (Wordrec::*FillLatticeFunc)(const MATRIX& ratings, + const WERD_CHOICE_LIST& best_choices, + const UNICHARSET& unicharset, + BlamerBundle* blamer_bundle); +typedef TessCallback4 TruthCallback; /** @@ -115,7 +115,7 @@ class TESS_API TessBaseAPI { * and returns sizeof(cl_device_id) * otherwise *device=nullptr and returns 0. */ - static size_t getOpenCLDevice(void **device); + static size_t getOpenCLDevice(void** device); /** * Writes the thresholded image to stderr as a PBM file on receipt of a @@ -137,7 +137,7 @@ class TESS_API TessBaseAPI { */ const char* GetInputName(); // Takes ownership of the input pix. - void SetInputImage(Pix *pix); + void SetInputImage(Pix* pix); Pix* GetInputImage(); int GetSourceYResolution(); const char* GetDatapath(); @@ -165,25 +165,25 @@ class TESS_API TessBaseAPI { * Returns true if the parameter was found among Tesseract parameters. * Fills in value with the value of the parameter. */ - bool GetIntVariable(const char *name, int *value) const; - bool GetBoolVariable(const char *name, bool *value) const; - bool GetDoubleVariable(const char *name, double *value) const; + bool GetIntVariable(const char* name, int* value) const; + bool GetBoolVariable(const char* name, bool* value) const; + bool GetDoubleVariable(const char* name, double* value) const; /** * Returns the pointer to the string that represents the value of the * parameter if it was found among Tesseract parameters. */ - const char *GetStringVariable(const char *name) const; + const char* GetStringVariable(const char* name) const; /** * Print Tesseract parameters to the given file. */ - void PrintVariables(FILE *fp) const; + void PrintVariables(FILE* fp) const; /** * Get value of named variable as a string, if it exists. */ - bool GetVariableAsString(const char *name, STRING *val); + bool GetVariableAsString(const char* name, STRING* val); /** * Instances are now mostly thread-safe and totally independent, @@ -198,8 +198,8 @@ class TESS_API TessBaseAPI { * * The datapath must be the name of the parent directory of tessdata and * must end in / . Any name after the last / will be stripped. - * The language is (usually) an ISO 639-3 string or nullptr will default to eng. - * It is entirely safe (and eventually will be efficient too) to call + * The language is (usually) an ISO 639-3 string or nullptr will default to + * eng. It is entirely safe (and eventually will be efficient too) to call * Init multiple times on the same instance to change language, or just * to reset the classifier. * The language may be a string of the form [~][+[~]]* indicating @@ -224,15 +224,16 @@ class TESS_API TessBaseAPI { * "debug" in the name will be set. */ int Init(const char* datapath, const char* language, OcrEngineMode mode, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, + char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, bool set_only_non_debug_params); int Init(const char* datapath, const char* language, OcrEngineMode oem) { return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); } int Init(const char* datapath, const char* language) { - return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); + return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, + false); } // In-memory version reads the traineddata file directly from the given // data[data_size] array, and/or reads data via a FileReader. @@ -315,9 +316,9 @@ class TESS_API TessBaseAPI { * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, * and one or more of the Get*Text functions below. */ - char* TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height); + char* TesseractRect(const unsigned char* imagedata, int bytes_per_pixel, + int bytes_per_line, int left, int top, int width, + int height); /** * Call between pages or documents etc to free up memory and forget @@ -331,7 +332,7 @@ class TESS_API TessBaseAPI { * get hold of the thresholded image, get the text in different formats, * get bounding boxes, confidences etc. */ - /* @{ */ + /* @{ */ /** * Provide an image for Tesseract to recognize. Format is as @@ -399,13 +400,13 @@ class TESS_API TessBaseAPI { * Can be called before or after Recognize. * If raw_image is true, then extract from the original image instead of the * thresholded image and pad by raw_padding pixels. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - * If paraids is not nullptr, the paragraph-id of each line within its block is - * also returned as an array of one element per line. delete [] after use. + * If blockids is not nullptr, the block-id of each line is also returned as + * an array of one element per line. delete [] after use. If paraids is not + * nullptr, the paragraph-id of each line within its block is also returned as + * an array of one element per line. delete [] after use. */ - Boxa* GetTextlines(const bool raw_image, const int raw_padding, - Pixa** pixa, int** blockids, int** paraids); + Boxa* GetTextlines(const bool raw_image, const int raw_padding, Pixa** pixa, + int** blockids, int** paraids); /* Helper method to extract from the thresholded image. (most common usage) */ @@ -418,8 +419,8 @@ class TESS_API TessBaseAPI { * pair, in reading order. Enables downstream handling of non-rectangular * regions. * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. + * If blockids is not nullptr, the block-id of each line is also returned as + * an array of one element per line. delete [] after use. */ Boxa* GetStrips(Pixa** pixa, int** blockids); @@ -446,22 +447,20 @@ class TESS_API TessBaseAPI { * Can be called before or after Recognize. * If blockids is not nullptr, the block-id of each component is also returned * as an array of one element per component. delete [] after use. - * If blockids is not nullptr, the paragraph-id of each component with its block - * is also returned as an array of one element per component. delete [] after - * use. - * If raw_image is true, then portions of the original image are extracted - * instead of the thresholded image and padded with raw_padding. - * If text_only is true, then only text components are returned. - */ - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, const bool raw_image, - const int raw_padding, + * If blockids is not nullptr, the paragraph-id of each component with its + * block is also returned as an array of one element per component. delete [] + * after use. If raw_image is true, then portions of the original image are + * extracted instead of the thresholded image and padded with raw_padding. If + * text_only is true, then only text components are returned. + */ + Boxa* GetComponentImages(const PageIteratorLevel level, const bool text_only, + const bool raw_image, const int raw_padding, Pixa** pixa, int** blockids, int** paraids); // Helper function to get binary images with no padding (most common usage). - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, + Boxa* GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa** pixa, int** blockids) { - return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); + return GetComponentImages(level, text_only, false, 0, pixa, blockids, + nullptr); } /** @@ -687,10 +686,9 @@ class TESS_API TessBaseAPI { * @warning temporary! This function will be removed from here and placed * in a separate API at some future time. */ - int IsValidWord(const char *word); + int IsValidWord(const char* word); // Returns true if utf8_character is defined in the UniCharset. - bool IsValidCharacter(const char *utf8_character); - + bool IsValidCharacter(const char* utf8_character); bool GetTextDirection(int* out_offset, float* out_slope); @@ -719,47 +717,44 @@ class TESS_API TessBaseAPI { * This method returns the row to which a box of specified dimensions would * belong. If no good match is found, it returns nullptr. */ - static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, - int right, int bottom); + static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, + int bottom); /** * Method to run adaptive classifier on a blob. * It returns at max num_max_matches results. */ - void RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned); + void RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, int* unichar_ids, + float* ratings, int* num_matches_returned); /** This method returns the string form of the specified unichar. */ const char* GetUnichar(int unichar_id); /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ - const Dawg *GetDawg(int i) const; + const Dawg* GetDawg(int i) const; /** Return the number of dawgs loaded into tesseract_ object. */ int NumDawgs() const; /** Returns a ROW object created from the input row specification. */ - static ROW *MakeTessOCRRow(float baseline, float xheight, - float descender, float ascender); + static ROW* MakeTessOCRRow(float baseline, float xheight, float descender, + float ascender); /** Returns a TBLOB corresponding to the entire input image. */ - static TBLOB *MakeTBLOB(Pix *pix); + static TBLOB* MakeTBLOB(Pix* pix); /** * This method baseline normalizes a TBLOB in-place. The input row is used * for normalization. The denorm is an optional parameter in which the * normalization-antidote is returned. */ - static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); + static void NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode); Tesseract* tesseract() const { return tesseract_; } OcrEngineMode oem() const { return last_oem_requested_; } - void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } + void InitTruthCallback(TruthCallback* cb) { truth_cb_ = cb; } void set_min_orientation_margin(double margin); @@ -779,11 +774,11 @@ class TESS_API TessBaseAPI { * and let go of including the other headers. */ static void DeleteBlockList(BLOCK_LIST* block_list); - /* @} */ + /* @} */ protected: - - /** Common code for setting the image. Returns true if Init has been called. */ + /** Common code for setting the image. Returns true if Init has been called. + */ TESS_LOCAL bool InternalSetImage(); /** @@ -823,12 +818,9 @@ class TESS_API TessBaseAPI { * Adapt to recognize the current image as the given character. * The image must be preloaded and be just an image of a single character. */ - TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender); + TESS_LOCAL void AdaptToCharacter(const char* unichar_repr, int length, + float baseline, float xheight, + float descender, float ascender); /** Recognize text doing one pass only, using settings for a given pass. */ TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); @@ -842,35 +834,30 @@ class TESS_API TessBaseAPI { * Extract the OCR results, costs (penalty points for uncertainty), * and the bounding boxes of the characters. */ - TESS_LOCAL static int TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res); + TESS_LOCAL static int TesseractExtractResult(char** text, int** lengths, + float** costs, int** x0, + int** y0, int** x1, int** y1, + PAGE_RES* page_res); TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } /* @} */ - protected: - Tesseract* tesseract_; ///< The underlying data object. - Tesseract* osd_tesseract_; ///< For orientation & script detection. - EquationDetect* equ_detect_; ///* paragraph_models_; - BLOCK_LIST* block_list_; ///< The page layout. - PAGE_RES* page_res_; ///< The page-level data. - STRING* input_file_; ///< Name used by training code. - STRING* output_file_; ///< Name used by debug code. - STRING* datapath_; ///< Current location of tessdata. - STRING* language_; ///< Last initialized language. + Tesseract* tesseract_; ///< The underlying data object. + Tesseract* osd_tesseract_; ///< For orientation & script detection. + EquationDetect* equ_detect_; ///< The equation detector. + FileReader reader_; ///< Reads files from any filesystem. + ImageThresholder* thresholder_; ///< Image thresholding module. + GenericVector* paragraph_models_; + BLOCK_LIST* block_list_; ///< The page layout. + PAGE_RES* page_res_; ///< The page-level data. + STRING* input_file_; ///< Name used by training code. + STRING* output_file_; ///< Name used by debug code. + STRING* datapath_; ///< Current location of tessdata. + STRING* language_; ///< Last initialized language. OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. - bool recognition_done_; ///< page_res_ contains recognition data. - TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES + bool recognition_done_; ///< page_res_ contains recognition data. + TruthCallback* truth_cb_; /// fxn for setting truth_* in WERD_RES /** * @defgroup ThresholderParams Thresholder Parameters @@ -887,16 +874,12 @@ class TESS_API TessBaseAPI { private: // A list of image filenames gets special consideration - bool ProcessPagesFileList(FILE *fp, - STRING *buf, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer, + bool ProcessPagesFileList(FILE* fp, STRING* buf, const char* retry_config, + int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number); // TIFF supports multipage so gets special consideration. - bool ProcessPagesMultipageTiff(const unsigned char *data, - size_t size, - const char* filename, - const char* retry_config, + bool ProcessPagesMultipageTiff(const unsigned char* data, size_t size, + const char* filename, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number); @@ -905,11 +888,12 @@ class TESS_API TessBaseAPI { // to set the title to an empty string. Using a single named // variable will hopefully reduce confusion if the situation changes // in the future. - const char *unknown_title_ = ""; + const char* unknown_title_ = ""; }; // class TessBaseAPI. /** Escape a char string - remove &<>"' with HTML codes. */ -STRING HOcrEscape(const char* text); +STRING +HOcrEscape(const char* text); } // namespace tesseract. #endif // TESSERACT_API_BASEAPI_H_ diff --git a/src/api/capi.cpp b/src/api/capi.cpp index 1cf614d3ac..a891d9e47b 100644 --- a/src/api/capi.cpp +++ b/src/api/capi.cpp @@ -16,794 +16,776 @@ /////////////////////////////////////////////////////////////////////// #ifndef TESS_CAPI_INCLUDE_BASEAPI -# define TESS_CAPI_INCLUDE_BASEAPI +#define TESS_CAPI_INCLUDE_BASEAPI #endif #include "capi.h" #include "genericvector.h" #include "strngs.h" -TESS_API const char* TESS_CALL TessVersion() -{ - return TessBaseAPI::Version(); -} +TESS_API const char* TESS_CALL TessVersion() { return TessBaseAPI::Version(); } -TESS_API void TESS_CALL TessDeleteText(char* text) -{ - delete [] text; -} +TESS_API void TESS_CALL TessDeleteText(char* text) { delete[] text; } -TESS_API void TESS_CALL TessDeleteTextArray(char** arr) -{ - for (char** pos = arr; *pos != nullptr; ++pos) - delete [] *pos; - delete [] arr; +TESS_API void TESS_CALL TessDeleteTextArray(char** arr) { + for (char** pos = arr; *pos != nullptr; ++pos) delete[] * pos; + delete[] arr; } -TESS_API void TESS_CALL TessDeleteIntArray(int* arr) -{ - delete [] arr; -} +TESS_API void TESS_CALL TessDeleteIntArray(int* arr) { delete[] arr; } -TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list) -{ - TessBaseAPI::DeleteBlockList(block_list); +TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list) { + TessBaseAPI::DeleteBlockList(block_list); } -TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase) -{ - return new TessTextRenderer(outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessTextRendererCreate(const char* outputbase) { + return new TessTextRenderer(outputbase); } -TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase) -{ - return new TessHOcrRenderer(outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessHOcrRendererCreate(const char* outputbase) { + return new TessHOcrRenderer(outputbase); } -TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info) -{ - return new TessHOcrRenderer(outputbase, font_info); +TESS_API TessResultRenderer* TESS_CALL +TessHOcrRendererCreate2(const char* outputbase, BOOL font_info) { + return new TessHOcrRenderer(outputbase, font_info); } -TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, - BOOL textonly) -{ - return new TessPDFRenderer(outputbase, datadir, textonly); +TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate( + const char* outputbase, const char* datadir, BOOL textonly) { + return new TessPDFRenderer(outputbase, datadir, textonly); } -TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase) -{ - return new TessUnlvRenderer(outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessUnlvRendererCreate(const char* outputbase) { + return new TessUnlvRenderer(outputbase); } -TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase) -{ - return new TessBoxTextRenderer(outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessBoxTextRendererCreate(const char* outputbase) { + return new TessBoxTextRenderer(outputbase); } -TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer) -{ - delete renderer; +TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer) { + delete renderer; } -TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next) -{ - renderer->insert(next); +TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, + TessResultRenderer* next) { + renderer->insert(next); } -TESS_API TessResultRenderer* TESS_CALL TessResultRendererNext(TessResultRenderer* renderer) -{ - return renderer->next(); +TESS_API TessResultRenderer* TESS_CALL +TessResultRendererNext(TessResultRenderer* renderer) { + return renderer->next(); } -TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title) -{ - return renderer->BeginDocument(title); +TESS_API BOOL TESS_CALL TessResultRendererBeginDocument( + TessResultRenderer* renderer, const char* title) { + return renderer->BeginDocument(title); } -TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api) -{ - return renderer->AddImage(api); +TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, + TessBaseAPI* api) { + return renderer->AddImage(api); } -TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer) -{ - return renderer->EndDocument(); +TESS_API BOOL TESS_CALL +TessResultRendererEndDocument(TessResultRenderer* renderer) { + return renderer->EndDocument(); } -TESS_API const char* TESS_CALL TessResultRendererExtention(TessResultRenderer* renderer) -{ - return renderer->file_extension(); +TESS_API const char* TESS_CALL +TessResultRendererExtention(TessResultRenderer* renderer) { + return renderer->file_extension(); } -TESS_API const char* TESS_CALL TessResultRendererTitle(TessResultRenderer* renderer) -{ - return renderer->title(); +TESS_API const char* TESS_CALL +TessResultRendererTitle(TessResultRenderer* renderer) { + return renderer->title(); } -TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer) -{ - return renderer->imagenum(); +TESS_API int TESS_CALL +TessResultRendererImageNum(TessResultRenderer* renderer) { + return renderer->imagenum(); } -TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate() -{ - return new TessBaseAPI; -} +TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate() { return new TessBaseAPI; } -TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle) -{ - delete handle; +TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle) { + delete handle; } -TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device) -{ - return handle->getOpenCLDevice(device); +TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, + void** device) { + return handle->getOpenCLDevice(device); } -TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, const char* name) -{ - handle->SetInputName(name); +TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, + const char* name) { + handle->SetInputName(name); } -TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle) -{ - return handle->GetInputName(); +TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle) { + return handle->GetInputName(); } -TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, Pix* pix) -{ - handle->SetInputImage(pix); +TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, + Pix* pix) { + handle->SetInputImage(pix); } -TESS_API Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle) -{ - return handle->GetInputImage(); +TESS_API Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle) { + return handle->GetInputImage(); } -TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle) -{ - return handle->GetSourceYResolution(); +TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle) { + return handle->GetSourceYResolution(); } -TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle) -{ - return handle->GetDatapath(); +TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle) { + return handle->GetDatapath(); } -TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name) -{ - handle->SetOutputName(name); +TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, + const char* name) { + handle->SetOutputName(name); } -TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value) -{ - return handle->SetVariable(name, value) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, + const char* name, + const char* value) { + return handle->SetVariable(name, value) ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value) -{ - return handle->SetVariable(name, value) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, + const char* name, + const char* value) { + return handle->SetVariable(name, value) ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, const char* name, int* value) -{ - return handle->GetIntVariable(name, value) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, + const char* name, + int* value) { + return handle->GetIntVariable(name, value) ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value) -{ - bool boolValue; - if (handle->GetBoolVariable(name, &boolValue)) - { - *value = boolValue ? TRUE : FALSE; - return TRUE; - } - else - { - return FALSE; - } +TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, + const char* name, + BOOL* value) { + bool boolValue; + if (handle->GetBoolVariable(name, &boolValue)) { + *value = boolValue ? TRUE : FALSE; + return TRUE; + } else { + return FALSE; + } } -TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value) -{ - return handle->GetDoubleVariable(name, value) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, + const char* name, + double* value) { + return handle->GetDoubleVariable(name, value) ? TRUE : FALSE; } -TESS_API const char* TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name) -{ - return handle->GetStringVariable(name); +TESS_API const char* TESS_CALL +TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name) { + return handle->GetStringVariable(name); } -TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp) -{ - handle->PrintVariables(fp); +TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, + FILE* fp) { + handle->PrintVariables(fp); } -TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename) -{ - FILE* fp = fopen(filename, "w"); - if (fp != nullptr) - { - handle->PrintVariables(fp); - fclose(fp); - return TRUE; - } - return FALSE; -} - -TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val) -{ - return handle->GetVariableAsString(name, val) ? TRUE : FALSE; -} - -TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, - TessOcrEngineMode mode, char** configs, int configs_size, - char** vars_vec, char** vars_values, size_t vars_vec_size, - BOOL set_only_non_debug_params) -{ - GenericVector varNames; - GenericVector varValues; - if (vars_vec != nullptr && vars_values != nullptr) { - for (size_t i = 0; i < vars_vec_size; i++) { - varNames.push_back(STRING(vars_vec[i])); - varValues.push_back(STRING(vars_values[i])); - } +TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile( + const TessBaseAPI* handle, const char* filename) { + FILE* fp = fopen(filename, "w"); + if (fp != nullptr) { + handle->PrintVariables(fp); + fclose(fp); + return TRUE; + } + return FALSE; +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, + const char* name, + STRING* val) { + return handle->GetVariableAsString(name, val) ? TRUE : FALSE; +} + +TESS_API int TESS_CALL TessBaseAPIInit4( + TessBaseAPI* handle, const char* datapath, const char* language, + TessOcrEngineMode mode, char** configs, int configs_size, char** vars_vec, + char** vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params) { + GenericVector varNames; + GenericVector varValues; + if (vars_vec != nullptr && vars_values != nullptr) { + for (size_t i = 0; i < vars_vec_size; i++) { + varNames.push_back(STRING(vars_vec[i])); + varValues.push_back(STRING(vars_values[i])); } + } - return handle->Init(datapath, language, mode, configs, configs_size, &varNames, &varValues, set_only_non_debug_params); + return handle->Init(datapath, language, mode, configs, configs_size, + &varNames, &varValues, set_only_non_debug_params); } - -TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem, - char** configs, int configs_size) -{ - return handle->Init(datapath, language, oem, configs, configs_size, nullptr, nullptr, false); +TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, + const char* datapath, + const char* language, + TessOcrEngineMode oem, char** configs, + int configs_size) { + return handle->Init(datapath, language, oem, configs, configs_size, nullptr, + nullptr, false); } -TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem) -{ - return handle->Init(datapath, language, oem); +TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, + const char* datapath, + const char* language, + TessOcrEngineMode oem) { + return handle->Init(datapath, language, oem); } -TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language) -{ - return handle->Init(datapath, language); +TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, + const char* datapath, + const char* language) { + return handle->Init(datapath, language); } -TESS_API const char* TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle) -{ - return handle->GetInitLanguagesAsString(); +TESS_API const char* TESS_CALL +TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle) { + return handle->GetInitLanguagesAsString(); } -TESS_API char** TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle) -{ - GenericVector languages; - handle->GetLoadedLanguagesAsVector(&languages); - char** arr = new char*[languages.size() + 1]; - for (int index = 0; index < languages.size(); ++index) - arr[index] = languages[index].strdup(); - arr[languages.size()] = nullptr; - return arr; +TESS_API char** TESS_CALL +TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle) { + GenericVector languages; + handle->GetLoadedLanguagesAsVector(&languages); + char** arr = new char*[languages.size() + 1]; + for (int index = 0; index < languages.size(); ++index) + arr[index] = languages[index].strdup(); + arr[languages.size()] = nullptr; + return arr; } -TESS_API char** TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle) -{ - GenericVector languages; - handle->GetAvailableLanguagesAsVector(&languages); - char** arr = new char*[languages.size() + 1]; - for (int index = 0; index < languages.size(); ++index) - arr[index] = languages[index].strdup(); - arr[languages.size()] = nullptr; - return arr; +TESS_API char** TESS_CALL +TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle) { + GenericVector languages; + handle->GetAvailableLanguagesAsVector(&languages); + char** arr = new char*[languages.size() + 1]; + for (int index = 0; index < languages.size(); ++index) + arr[index] = languages[index].strdup(); + arr[languages.size()] = nullptr; + return arr; } -TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language) -{ - return handle->InitLangMod(datapath, language); +TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, + const char* datapath, + const char* language) { + return handle->InitLangMod(datapath, language); } -TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle) -{ - handle->InitForAnalysePage(); +TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle) { + handle->InitForAnalysePage(); } -TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename) -{ - handle->ReadConfigFile(filename); +TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, + const char* filename) { + handle->ReadConfigFile(filename); } -TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename) -{ - handle->ReadDebugConfigFile(filename); +TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, + const char* filename) { + handle->ReadDebugConfigFile(filename); } -TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode) -{ - handle->SetPageSegMode(mode); +TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, + TessPageSegMode mode) { + handle->SetPageSegMode(mode); } -TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle) -{ - return handle->GetPageSegMode(); +TESS_API TessPageSegMode TESS_CALL +TessBaseAPIGetPageSegMode(const TessBaseAPI* handle) { + return handle->GetPageSegMode(); } -TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height) -{ - return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height); +TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, + const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, int left, int top, + int width, int height) { + return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, + top, width, height); } -TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle) -{ - handle->ClearAdaptiveClassifier(); +TESS_API void TESS_CALL +TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle) { + handle->ClearAdaptiveClassifier(); } -TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line) -{ - handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line); +TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, + const unsigned char* imagedata, + int width, int height, + int bytes_per_pixel, + int bytes_per_line) { + handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line); } -TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix) -{ - return handle->SetImage(pix); +TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, + struct Pix* pix) { + return handle->SetImage(pix); } -TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi) -{ - handle->SetSourceResolution(ppi); +TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, + int ppi) { + handle->SetSourceResolution(ppi); } -TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height) -{ - handle->SetRectangle(left, top, width, height); +TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, + int top, int width, + int height) { + handle->SetRectangle(left, top, width, height); } -TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder) -{ - handle->SetThresholder(thresholder); +TESS_API void TESS_CALL TessBaseAPISetThresholder( + TessBaseAPI* handle, TessImageThresholder* thresholder) { + handle->SetThresholder(thresholder); } -TESS_API struct Pix* TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle) -{ - return handle->GetThresholdedImage(); +TESS_API struct Pix* TESS_CALL +TessBaseAPIGetThresholdedImage(TessBaseAPI* handle) { + return handle->GetThresholdedImage(); } -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa) -{ - return handle->GetRegions(pixa); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, + struct Pixa** pixa) { + return handle->GetRegions(pixa); } -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids) -{ - return handle->GetTextlines(pixa, blockids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, + struct Pixa** pixa, + int** blockids) { + return handle->GetTextlines(pixa, blockids); } -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids) -{ - return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1( + TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids) { + return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids); } -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids) -{ - return handle->GetStrips(pixa, blockids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, + struct Pixa** pixa, + int** blockids) { + return handle->GetStrips(pixa, blockids); } -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa) -{ - return handle->GetWords(pixa); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, + struct Pixa** pixa) { + return handle->GetWords(pixa); } -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc) -{ - return handle->GetConnectedComponents(cc); +TESS_API struct Boxa* TESS_CALL +TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc) { + return handle->GetConnectedComponents(cc); } -TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, struct Pixa** pixa, int** blockids) -{ - return handle->GetComponentImages(level, text_only != FALSE, pixa, blockids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages( + TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, + struct Pixa** pixa, int** blockids) { + return handle->GetComponentImages(level, text_only != FALSE, pixa, blockids); } -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetComponentImages1( TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids) -{ - return handle->GetComponentImages(level, text_only != FALSE, raw_image, raw_padding, pixa, blockids, paraids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages1( + TessBaseAPI* handle, const TessPageIteratorLevel level, + const BOOL text_only, const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids) { + return handle->GetComponentImages(level, text_only != FALSE, raw_image, + raw_padding, pixa, blockids, paraids); } -TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle) -{ - return handle->GetThresholdedImageScaleFactor(); +TESS_API int TESS_CALL +TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle) { + return handle->GetThresholdedImageScaleFactor(); } -TESS_API TessPageIterator* TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle) -{ - return handle->AnalyseLayout(); +TESS_API TessPageIterator* TESS_CALL +TessBaseAPIAnalyseLayout(TessBaseAPI* handle) { + return handle->AnalyseLayout(); } -TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor) -{ - return handle->Recognize(monitor); +TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, + ETEXT_DESC* monitor) { + return handle->Recognize(monitor); } -TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor) -{ - return handle->RecognizeForChopTest(monitor); +TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, + ETEXT_DESC* monitor) { + return handle->RecognizeForChopTest(monitor); } -TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer) -{ - if (handle->ProcessPages(filename, retry_config, timeout_millisec, renderer)) - return TRUE; - else - return FALSE; +TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + if (handle->ProcessPages(filename, retry_config, timeout_millisec, renderer)) + return TRUE; + else + return FALSE; } -TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, TessResultRenderer* renderer) -{ - if (handle->ProcessPage(pix, page_index, filename, retry_config, timeout_millisec, renderer)) - return TRUE; - else - return FALSE; +TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, + struct Pix* pix, int page_index, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + if (handle->ProcessPage(pix, page_index, filename, retry_config, + timeout_millisec, renderer)) + return TRUE; + else + return FALSE; } -TESS_API TessResultIterator* TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle) -{ - return handle->GetIterator(); +TESS_API TessResultIterator* TESS_CALL +TessBaseAPIGetIterator(TessBaseAPI* handle) { + return handle->GetIterator(); } -TESS_API TessMutableIterator* TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle) -{ - return handle->GetMutableIterator(); +TESS_API TessMutableIterator* TESS_CALL +TessBaseAPIGetMutableIterator(TessBaseAPI* handle) { + return handle->GetMutableIterator(); } -TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle) -{ - return handle->GetUTF8Text(); +TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle) { + return handle->GetUTF8Text(); } -TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number) -{ - return handle->GetHOCRText(nullptr, page_number); +TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, + int page_number) { + return handle->GetHOCRText(nullptr, page_number); } -TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number) -{ - return handle->GetBoxText(page_number); +TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, + int page_number) { + return handle->GetBoxText(page_number); } -TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle) -{ - return handle->GetUNLVText(); +TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle) { + return handle->GetUNLVText(); } -TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle) -{ - return handle->MeanTextConf(); +TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle) { + return handle->MeanTextConf(); } -TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle) -{ - return handle->AllWordConfidences(); +TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle) { + return handle->AllWordConfidences(); } -TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr) -{ - return handle->AdaptToWordStr(mode, wordstr) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, + TessPageSegMode mode, + const char* wordstr) { + return handle->AdaptToWordStr(mode, wordstr) ? TRUE : FALSE; } -TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle) -{ - handle->Clear(); +TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle) { + handle->Clear(); } -TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle) -{ - handle->End(); -} +TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle) { handle->End(); } -TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word) -{ - return handle->IsValidWord(word); +TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, + const char* word) { + return handle->IsValidWord(word); } -TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope) -{ - return handle->GetTextDirection(out_offset, out_slope) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, + int* out_offset, + float* out_slope) { + return handle->GetTextDirection(out_offset, out_slope) ? TRUE : FALSE; } -TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f) -{ - handle->SetDictFunc(f); +TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, + TessDictFunc f) { + handle->SetDictFunc(f); } -TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle) -{ - handle->ClearPersistentCache(); +TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle) { + handle->ClearPersistentCache(); } -TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f) -{ - handle->SetProbabilityInContextFunc(f); +TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc( + TessBaseAPI* handle, TessProbabilityInContextFunc f) { + handle->SetProbabilityInContextFunc(f); } -TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results) -{ - return FALSE; // Unsafe ABI, return FALSE always +TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, + OSResults* results) { + return FALSE; // Unsafe ABI, return FALSE always } -TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, - int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) -{ - bool success; - success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf); - return (BOOL)success; +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript( + TessBaseAPI* handle, int* orient_deg, float* orient_conf, + const char** script_name, float* script_conf) { + bool success; + success = handle->DetectOrientationScript(orient_deg, orient_conf, + script_name, script_conf); + return (BOOL)success; } - -TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* FeatureOutlineIndex) -{ - handle->GetFeaturesForBlob(blob, int_features, num_features, FeatureOutlineIndex); +TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob( + TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, + int* num_features, int* FeatureOutlineIndex) { + handle->GetFeaturesForBlob(blob, int_features, num_features, + FeatureOutlineIndex); } -TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom) -{ - return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom); +TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom) { + return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom); } -TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches, - int* unichar_ids, float* ratings, int* num_matches_returned) -{ - handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, num_matches_returned); +TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier( + TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids, + float* ratings, int* num_matches_returned) { + handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, + num_matches_returned); } -TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id) -{ - return handle->GetUnichar(unichar_id); +TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, + int unichar_id) { + return handle->GetUnichar(unichar_id); } -TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i) -{ - return handle->GetDawg(i); +TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, + int i) { + return handle->GetDawg(i); } -TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle) -{ - return handle->NumDawgs(); +TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle) { + return handle->NumDawgs(); } -TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender) -{ - return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); +TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, + float descender, float ascender) { + return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); } -TESS_API TBLOB* TESS_CALL TessMakeTBLOB(struct Pix* pix) -{ - return TessBaseAPI::MakeTBLOB(pix); +TESS_API TBLOB* TESS_CALL TessMakeTBLOB(struct Pix* pix) { + return TessBaseAPI::MakeTBLOB(pix); } -TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode) -{ - TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode != FALSE); +TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, + BOOL numeric_mode) { + TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode != FALSE); } -TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle) -{ - return handle->oem(); +TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle) { + return handle->oem(); } -TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb) -{ - handle->InitTruthCallback(cb); +TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, + TessTruthCallback* cb) { + handle->InitTruthCallback(cb); } -TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin) -{ - handle->set_min_orientation_margin(margin); +TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, + double margin) { + handle->set_min_orientation_margin(margin); } -TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, bool** vertical_writing) -{ - handle->GetBlockTextOrientations(block_orientation, vertical_writing); +TESS_API void TESS_CALL TessBaseGetBlockTextOrientations( + TessBaseAPI* handle, int** block_orientation, bool** vertical_writing) { + handle->GetBlockTextOrientations(block_orientation, vertical_writing); } -TESS_API BLOCK_LIST* TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) -{ - return handle->FindLinesCreateBlockList(); +TESS_API BLOCK_LIST* TESS_CALL +TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) { + return handle->FindLinesCreateBlockList(); } -TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle) -{ - delete handle; +TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle) { + delete handle; } -TESS_API TessPageIterator* TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle) -{ - return new TessPageIterator(*handle); +TESS_API TessPageIterator* TESS_CALL +TessPageIteratorCopy(const TessPageIterator* handle) { + return new TessPageIterator(*handle); } -TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle) -{ - handle->Begin(); +TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle) { + handle->Begin(); } -TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level) -{ - return handle->Next(level) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, + TessPageIteratorLevel level) { + return handle->Next(level) ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level) -{ - return handle->IsAtBeginningOf(level) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf( + const TessPageIterator* handle, TessPageIteratorLevel level) { + return handle->IsAtBeginningOf(level) ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level, - TessPageIteratorLevel element) -{ - return handle->IsAtFinalElement(level, element) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement( + const TessPageIterator* handle, TessPageIteratorLevel level, + TessPageIteratorLevel element) { + return handle->IsAtFinalElement(level, element) ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level, - int* left, int* top, int* right, int* bottom) -{ - return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox( + const TessPageIterator* handle, TessPageIteratorLevel level, int* left, + int* top, int* right, int* bottom) { + return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE; } -TESS_API TessPolyBlockType TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle) -{ - return handle->BlockType(); +TESS_API TessPolyBlockType TESS_CALL +TessPageIteratorBlockType(const TessPageIterator* handle) { + return handle->BlockType(); } -TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level) -{ - return handle->GetBinaryImage(level); +TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage( + const TessPageIterator* handle, TessPageIteratorLevel level) { + return handle->GetBinaryImage(level); } -TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, - struct Pix* original_image, int* left, int* top) -{ - return handle->GetImage(level, padding, original_image, left, top); +TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage( + const TessPageIterator* handle, TessPageIteratorLevel level, int padding, + struct Pix* original_image, int* left, int* top) { + return handle->GetImage(level, padding, original_image, left, top); } -TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) -{ - return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE; +TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, + TessPageIteratorLevel level, + int* x1, int* y1, int* x2, + int* y2) { + return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE; } -TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation, - TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, - float* deskew_angle) -{ - handle->Orientation(orientation, writing_direction, textline_order, deskew_angle); +TESS_API void TESS_CALL TessPageIteratorOrientation( + TessPageIterator* handle, TessOrientation* orientation, + TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, + float* deskew_angle) { + handle->Orientation(orientation, writing_direction, textline_order, + deskew_angle); } -TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification, - BOOL *is_list_item, BOOL *is_crown, int *first_line_indent) -{ - bool bool_is_list_item, bool_is_crown; - handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown, first_line_indent); - if (is_list_item) - *is_list_item = bool_is_list_item ? TRUE : FALSE; - if (is_crown) - *is_crown = bool_is_crown ? TRUE : FALSE; +TESS_API void TESS_CALL TessPageIteratorParagraphInfo( + TessPageIterator* handle, TessParagraphJustification* justification, + BOOL* is_list_item, BOOL* is_crown, int* first_line_indent) { + bool bool_is_list_item, bool_is_crown; + handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown, + first_line_indent); + if (is_list_item) *is_list_item = bool_is_list_item ? TRUE : FALSE; + if (is_crown) *is_crown = bool_is_crown ? TRUE : FALSE; } - -TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle) -{ - delete handle; +TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle) { + delete handle; } -TESS_API TessResultIterator* TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle) -{ - return new TessResultIterator(*handle); +TESS_API TessResultIterator* TESS_CALL +TessResultIteratorCopy(const TessResultIterator* handle) { + return new TessResultIterator(*handle); } -TESS_API TessPageIterator* TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle) -{ - return handle; +TESS_API TessPageIterator* TESS_CALL +TessResultIteratorGetPageIterator(TessResultIterator* handle) { + return handle; } -TESS_API const TessPageIterator* TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle) -{ - return handle; +TESS_API const TessPageIterator* TESS_CALL +TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle) { + return handle; } -TESS_API TessChoiceIterator* TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle) -{ - return new TessChoiceIterator(*handle); +TESS_API TessChoiceIterator* TESS_CALL +TessResultIteratorGetChoiceIterator(const TessResultIterator* handle) { + return new TessChoiceIterator(*handle); } -TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level) -{ - return handle->Next(level); +TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, + TessPageIteratorLevel level) { + return handle->Next(level); } -TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level) -{ - return handle->GetUTF8Text(level); +TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text( + const TessResultIterator* handle, TessPageIteratorLevel level) { + return handle->GetUTF8Text(level); } -TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level) -{ - return handle->Confidence(level); +TESS_API float TESS_CALL TessResultIteratorConfidence( + const TessResultIterator* handle, TessPageIteratorLevel level) { + return handle->Confidence(level); } -TESS_API const char* TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle) -{ - return handle->WordRecognitionLanguage(); +TESS_API const char* TESS_CALL +TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle) { + return handle->WordRecognitionLanguage(); } -TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, - BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, - BOOL* is_smallcaps, int* pointsize, int* font_id) -{ - bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps; - const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif, - &bool_is_smallcaps, pointsize, font_id); - if (is_bold) - *is_bold = bool_is_bold ? TRUE : FALSE; - if (is_italic) - *is_italic = bool_is_italic ? TRUE : FALSE; - if (is_underlined) - *is_underlined = bool_is_underlined ? TRUE : FALSE; - if (is_monospace) - *is_monospace = bool_is_monospace ? TRUE : FALSE; - if (is_serif) - *is_serif = bool_is_serif ? TRUE : FALSE; - if (is_smallcaps) - *is_smallcaps = bool_is_smallcaps ? TRUE : FALSE; - return ret; +TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes( + const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps, + int* pointsize, int* font_id) { + bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, + bool_is_serif, bool_is_smallcaps; + const char* ret = handle->WordFontAttributes( + &bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, + &bool_is_serif, &bool_is_smallcaps, pointsize, font_id); + if (is_bold) *is_bold = bool_is_bold ? TRUE : FALSE; + if (is_italic) *is_italic = bool_is_italic ? TRUE : FALSE; + if (is_underlined) *is_underlined = bool_is_underlined ? TRUE : FALSE; + if (is_monospace) *is_monospace = bool_is_monospace ? TRUE : FALSE; + if (is_serif) *is_serif = bool_is_serif ? TRUE : FALSE; + if (is_smallcaps) *is_smallcaps = bool_is_smallcaps ? TRUE : FALSE; + return ret; } -TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle) -{ - return handle->WordIsFromDictionary() ? TRUE : FALSE; +TESS_API BOOL TESS_CALL +TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle) { + return handle->WordIsFromDictionary() ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle) -{ - return handle->WordIsNumeric() ? TRUE : FALSE; +TESS_API BOOL TESS_CALL +TessResultIteratorWordIsNumeric(const TessResultIterator* handle) { + return handle->WordIsNumeric() ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle) -{ - return handle->SymbolIsSuperscript() ? TRUE : FALSE; +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle) { + return handle->SymbolIsSuperscript() ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle) -{ - return handle->SymbolIsSubscript() ? TRUE : FALSE; +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle) { + return handle->SymbolIsSubscript() ? TRUE : FALSE; } -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle) -{ - return handle->SymbolIsDropcap() ? TRUE : FALSE; +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle) { + return handle->SymbolIsDropcap() ? TRUE : FALSE; } -TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle) -{ - delete handle; +TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle) { + delete handle; } -TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle) -{ - return handle->Next(); +TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle) { + return handle->Next(); } -TESS_API const char* TESS_CALL TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle) -{ - return handle->GetUTF8Text(); +TESS_API const char* TESS_CALL +TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle) { + return handle->GetUTF8Text(); } -TESS_API float TESS_CALL TessChoiceIteratorConfidence(const TessChoiceIterator* handle) -{ - return handle->Confidence(); +TESS_API float TESS_CALL +TessChoiceIteratorConfidence(const TessChoiceIterator* handle) { + return handle->Confidence(); } diff --git a/src/api/capi.h b/src/api/capi.h index 8a0183b46f..6dbe2a4752 100644 --- a/src/api/capi.h +++ b/src/api/capi.h @@ -19,17 +19,17 @@ #define API_CAPI_H_ #if defined(TESSERACT_API_BASEAPI_H_) && !defined(TESS_CAPI_INCLUDE_BASEAPI) -# define TESS_CAPI_INCLUDE_BASEAPI +#define TESS_CAPI_INCLUDE_BASEAPI #endif #ifdef TESS_CAPI_INCLUDE_BASEAPI -# include "baseapi.h" -# include "pageiterator.h" -# include "resultiterator.h" -# include "renderer.h" +#include "baseapi.h" +#include "pageiterator.h" +#include "renderer.h" +#include "resultiterator.h" #else -# include "platform.h" -# include +#include +#include "platform.h" #endif #ifdef __cplusplus @@ -37,17 +37,17 @@ extern "C" { #endif #ifndef TESS_CALL -# if defined(WIN32) -# define TESS_CALL __cdecl -# else -# define TESS_CALL -# endif +#if defined(WIN32) +#define TESS_CALL __cdecl +#else +#define TESS_CALL +#endif #endif #ifndef BOOL -# define BOOL int -# define TRUE 1 -# define FALSE 0 +#define BOOL int +#define TRUE 1 +#define FALSE 0 #endif #ifdef TESS_CAPI_INCLUDE_BASEAPI @@ -89,18 +89,75 @@ typedef struct TessPageIterator TessPageIterator; typedef struct TessResultIterator TessResultIterator; typedef struct TessMutableIterator TessMutableIterator; typedef struct TessChoiceIterator TessChoiceIterator; -typedef enum TessOcrEngineMode { OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT } TessOcrEngineMode; -typedef enum TessPageSegMode { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT, - PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT, - PSM_SPARSE_TEXT_OSD, PSM_COUNT } TessPageSegMode; -typedef enum TessPageIteratorLevel { RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL} TessPageIteratorLevel; -typedef enum TessPolyBlockType { PT_UNKNOWN, PT_FLOWING_TEXT, PT_HEADING_TEXT, PT_PULLOUT_TEXT, PT_EQUATION, PT_INLINE_EQUATION, - PT_TABLE, PT_VERTICAL_TEXT, PT_CAPTION_TEXT, PT_FLOWING_IMAGE, PT_HEADING_IMAGE, - PT_PULLOUT_IMAGE, PT_HORZ_LINE, PT_VERT_LINE, PT_NOISE, PT_COUNT } TessPolyBlockType; -typedef enum TessOrientation { ORIENTATION_PAGE_UP, ORIENTATION_PAGE_RIGHT, ORIENTATION_PAGE_DOWN, ORIENTATION_PAGE_LEFT } TessOrientation; -typedef enum TessParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT } TessParagraphJustification; -typedef enum TessWritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT, WRITING_DIRECTION_RIGHT_TO_LEFT, WRITING_DIRECTION_TOP_TO_BOTTOM } TessWritingDirection; -typedef enum TessTextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT, TEXTLINE_ORDER_RIGHT_TO_LEFT, TEXTLINE_ORDER_TOP_TO_BOTTOM } TessTextlineOrder; +typedef enum TessOcrEngineMode { + OEM_TESSERACT_ONLY, + OEM_LSTM_ONLY, + OEM_TESSERACT_LSTM_COMBINED, + OEM_DEFAULT +} TessOcrEngineMode; +typedef enum TessPageSegMode { + PSM_OSD_ONLY, + PSM_AUTO_OSD, + PSM_AUTO_ONLY, + PSM_AUTO, + PSM_SINGLE_COLUMN, + PSM_SINGLE_BLOCK_VERT_TEXT, + PSM_SINGLE_BLOCK, + PSM_SINGLE_LINE, + PSM_SINGLE_WORD, + PSM_CIRCLE_WORD, + PSM_SINGLE_CHAR, + PSM_SPARSE_TEXT, + PSM_SPARSE_TEXT_OSD, + PSM_COUNT +} TessPageSegMode; +typedef enum TessPageIteratorLevel { + RIL_BLOCK, + RIL_PARA, + RIL_TEXTLINE, + RIL_WORD, + RIL_SYMBOL +} TessPageIteratorLevel; +typedef enum TessPolyBlockType { + PT_UNKNOWN, + PT_FLOWING_TEXT, + PT_HEADING_TEXT, + PT_PULLOUT_TEXT, + PT_EQUATION, + PT_INLINE_EQUATION, + PT_TABLE, + PT_VERTICAL_TEXT, + PT_CAPTION_TEXT, + PT_FLOWING_IMAGE, + PT_HEADING_IMAGE, + PT_PULLOUT_IMAGE, + PT_HORZ_LINE, + PT_VERT_LINE, + PT_NOISE, + PT_COUNT +} TessPolyBlockType; +typedef enum TessOrientation { + ORIENTATION_PAGE_UP, + ORIENTATION_PAGE_RIGHT, + ORIENTATION_PAGE_DOWN, + ORIENTATION_PAGE_LEFT +} TessOrientation; +typedef enum TessParagraphJustification { + JUSTIFICATION_UNKNOWN, + JUSTIFICATION_LEFT, + JUSTIFICATION_CENTER, + JUSTIFICATION_RIGHT +} TessParagraphJustification; +typedef enum TessWritingDirection { + WRITING_DIRECTION_LEFT_TO_RIGHT, + WRITING_DIRECTION_RIGHT_TO_LEFT, + WRITING_DIRECTION_TOP_TO_BOTTOM +} TessWritingDirection; +typedef enum TessTextlineOrder { + TEXTLINE_ORDER_LEFT_TO_RIGHT, + TEXTLINE_ORDER_RIGHT_TO_LEFT, + TEXTLINE_ORDER_TOP_TO_BOTTOM +} TessTextlineOrder; typedef struct ETEXT_DESC ETEXT_DESC; #endif @@ -110,287 +167,384 @@ struct Pixa; /* General free functions */ -TESS_API const char* - TESS_CALL TessVersion(); -TESS_API void TESS_CALL TessDeleteText(char* text); -TESS_API void TESS_CALL TessDeleteTextArray(char** arr); -TESS_API void TESS_CALL TessDeleteIntArray(int* arr); +TESS_API const char* TESS_CALL TessVersion(); +TESS_API void TESS_CALL TessDeleteText(char* text); +TESS_API void TESS_CALL TessDeleteTextArray(char** arr); +TESS_API void TESS_CALL TessDeleteIntArray(int* arr); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list); +TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list); #endif /* Renderer API */ -TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); -TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, - BOOL textonly); -TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessTextRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessHOcrRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); +TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate( + const char* outputbase, const char* datadir, BOOL textonly); +TESS_API TessResultRenderer* TESS_CALL +TessUnlvRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessBoxTextRendererCreate(const char* outputbase); TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer); -TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next); -TESS_API TessResultRenderer* - TESS_CALL TessResultRendererNext(TessResultRenderer* renderer); -TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title); -TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api); -TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer); - -TESS_API const char* TESS_CALL TessResultRendererExtention(TessResultRenderer* renderer); -TESS_API const char* TESS_CALL TessResultRendererTitle(TessResultRenderer* renderer); +TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, + TessResultRenderer* next); +TESS_API TessResultRenderer* TESS_CALL +TessResultRendererNext(TessResultRenderer* renderer); +TESS_API BOOL TESS_CALL TessResultRendererBeginDocument( + TessResultRenderer* renderer, const char* title); +TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, + TessBaseAPI* api); +TESS_API BOOL TESS_CALL +TessResultRendererEndDocument(TessResultRenderer* renderer); + +TESS_API const char* TESS_CALL +TessResultRendererExtention(TessResultRenderer* renderer); +TESS_API const char* TESS_CALL +TessResultRendererTitle(TessResultRenderer* renderer); TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer); /* Base API */ -TESS_API TessBaseAPI* - TESS_CALL TessBaseAPICreate(); -TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle); +TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate(); +TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle); -TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device); +TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, + void** device); -TESS_API void TESS_CALL TessBaseAPISetInputName( TessBaseAPI* handle, const char* name); +TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, + const char* name); TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle); -TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, struct Pix* pix); -TESS_API struct Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle); +TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, + struct Pix* pix); +TESS_API struct Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle); -TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle); +TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle); TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle); -TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name); - -TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value); -TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value); - -TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable( const TessBaseAPI* handle, const char* name, int* value); -TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable( const TessBaseAPI* handle, const char* name, BOOL* value); -TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value); -TESS_API const char* - TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); - -TESS_API void TESS_CALL TessBaseAPIPrintVariables( const TessBaseAPI* handle, FILE* fp); -TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename); +TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, + const char* name); + +TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, + const char* name, + const char* value); +TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, + const char* name, + const char* value); + +TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, + const char* name, int* value); +TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, + const char* name, + BOOL* value); +TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, + const char* name, + double* value); +TESS_API const char* TESS_CALL +TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); + +TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, + FILE* fp); +TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile( + const TessBaseAPI* handle, const char* filename); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val); +TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, + const char* name, + STRING* val); #endif #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API int TESS_CALL TessBaseAPIInit(TessBaseAPI* handle, const char* datapath, const char* language, - TessOcrEngineMode mode, char** configs, int configs_size, - const STRING* vars_vec, size_t vars_vec_size, - const STRING* vars_values, size_t vars_values_size, BOOL set_only_init_params); +TESS_API int TESS_CALL TessBaseAPIInit( + TessBaseAPI* handle, const char* datapath, const char* language, + TessOcrEngineMode mode, char** configs, int configs_size, + const STRING* vars_vec, size_t vars_vec_size, const STRING* vars_values, + size_t vars_values_size, BOOL set_only_init_params); #endif -TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem, - char** configs, int configs_size); -TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem); -TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language); - -TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode mode, - char** configs, int configs_size, - char** vars_vec, char** vars_values, size_t vars_vec_size, - BOOL set_only_non_debug_params); - -TESS_API const char* - TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); -TESS_API char** - TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); -TESS_API char** - TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); - -TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language); -TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle); - -TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename); -TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename); - -TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode); -TESS_API TessPageSegMode - TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); - -TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height); - -TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle); - -TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line); -TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix); - -TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi); - -TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height); +TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, + const char* datapath, + const char* language, + TessOcrEngineMode oem, char** configs, + int configs_size); +TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, + const char* datapath, + const char* language, + TessOcrEngineMode oem); +TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, + const char* datapath, + const char* language); + +TESS_API int TESS_CALL TessBaseAPIInit4( + TessBaseAPI* handle, const char* datapath, const char* language, + TessOcrEngineMode mode, char** configs, int configs_size, char** vars_vec, + char** vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params); + +TESS_API const char* TESS_CALL +TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); +TESS_API char** TESS_CALL +TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); +TESS_API char** TESS_CALL +TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, + const char* datapath, + const char* language); +TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, + const char* filename); +TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, + const char* filename); + +TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, + TessPageSegMode mode); +TESS_API TessPageSegMode TESS_CALL +TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); + +TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, + const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, int left, int top, + int width, int height); + +TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, + const unsigned char* imagedata, + int width, int height, + int bytes_per_pixel, + int bytes_per_line); +TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, + struct Pix* pix); + +TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, + int ppi); + +TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, + int top, int width, int height); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder); +TESS_API void TESS_CALL TessBaseAPISetThresholder( + TessBaseAPI* handle, TessImageThresholder* thresholder); #endif -TESS_API struct Pix* - TESS_CALL TessBaseAPIGetThresholdedImage( TessBaseAPI* handle); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetRegions( TessBaseAPI* handle, struct Pixa** pixa); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetTextlines( TessBaseAPI* handle, struct Pixa** pixa, int** blockids); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetTextlines1( TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetStrips( TessBaseAPI* handle, struct Pixa** pixa, int** blockids); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetWords( TessBaseAPI* handle, struct Pixa** pixa); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetComponentImages( TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - struct Pixa** pixa, int** blockids); -TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetComponentImages1( TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); - -TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle); - -TESS_API TessPageIterator* - TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle); - -TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor); -TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor); -TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer); -TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, TessResultRenderer* renderer); - -TESS_API TessResultIterator* - TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle); -TESS_API TessMutableIterator* - TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle); +TESS_API struct Pix* TESS_CALL +TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, + struct Pixa** pixa); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, + struct Pixa** pixa, + int** blockids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1( + TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, + struct Pixa** pixa, + int** blockids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, + struct Pixa** pixa); +TESS_API struct Boxa* TESS_CALL +TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages( + TessBaseAPI* handle, const TessPageIteratorLevel level, + const BOOL text_only, struct Pixa** pixa, int** blockids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages1( + TessBaseAPI* handle, const TessPageIteratorLevel level, + const BOOL text_only, const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids); + +TESS_API int TESS_CALL +TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle); + +TESS_API TessPageIterator* TESS_CALL +TessBaseAPIAnalyseLayout(TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, + ETEXT_DESC* monitor); +TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, + ETEXT_DESC* monitor); +TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer); +TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, + struct Pix* pix, int page_index, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer); + +TESS_API TessResultIterator* TESS_CALL +TessBaseAPIGetIterator(TessBaseAPI* handle); +TESS_API TessMutableIterator* TESS_CALL +TessBaseAPIGetMutableIterator(TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle); -TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number); -TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number); +TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, + int page_number); +TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, + int page_number); TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle); -TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle); -TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle); -TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr); +TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle); +TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle); +TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, + TessPageSegMode mode, + const char* wordstr); -TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle); -TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle); +TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle); +TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle); -TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word); -TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope); +TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, + const char* word); +TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, + int* out_offset, + float* out_slope); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f); -TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle); -TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f); - -TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f); - -// Call TessDeleteText(*best_script_name) to free memory allocated by this function -TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, - int* orient_deg, float* orient_conf, const char **script_name, float* script_conf); - -TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* FeatureOutlineIndex); - -TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom); -TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches, - int* unichar_ids, float* ratings, int* num_matches_returned); +TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, + TessDictFunc f); +TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle); +TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc( + TessBaseAPI* handle, TessProbabilityInContextFunc f); + +TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, + TessFillLatticeFunc f); + +// Call TessDeleteText(*best_script_name) to free memory allocated by this +// function +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript( + TessBaseAPI* handle, int* orient_deg, float* orient_conf, + const char** script_name, float* script_conf); + +TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob( + TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, + int* num_features, int* FeatureOutlineIndex); + +TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom); +TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier( + TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids, + float* ratings, int* num_matches_returned); #endif -TESS_API const char* - TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id); +TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, + int unichar_id); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API const TessDawg* - TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i); -TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle); +TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, + int i); +TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle); #endif #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender); -TESS_API TBLOB* - TESS_CALL TessMakeTBLOB(Pix* pix); -TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode); - -TESS_API TessOcrEngineMode - TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); -TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb); +TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, + float descender, float ascender); +TESS_API TBLOB* TESS_CALL TessMakeTBLOB(Pix* pix); +TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, + BOOL numeric_mode); + +TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); +TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, + TessTruthCallback* cb); #endif -TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin); +TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, + double margin); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, BOOL** vertical_writing); +TESS_API void TESS_CALL TessBaseGetBlockTextOrientations( + TessBaseAPI* handle, int** block_orientation, BOOL** vertical_writing); -TESS_API BLOCK_LIST* - TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); +TESS_API BLOCK_LIST* TESS_CALL +TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); #endif /* Page iterator */ -TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle); -TESS_API TessPageIterator* - TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle); - -TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle); -TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level); -TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level); -TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level, - TessPageIteratorLevel element); - -TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level, - int* left, int* top, int* right, int* bottom); -TESS_API TessPolyBlockType - TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle); - -TESS_API struct Pix* - TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level); -TESS_API struct Pix* - TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, - struct Pix* original_image, int* left, int* top); - -TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2); - -TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation, - TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, - float* deskew_angle); - -TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification, - BOOL *is_list_item, BOOL *is_crown, int *first_line_indent); +TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle); +TESS_API TessPageIterator* TESS_CALL +TessPageIteratorCopy(const TessPageIterator* handle); + +TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle); +TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, + TessPageIteratorLevel level); +TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf( + const TessPageIterator* handle, TessPageIteratorLevel level); +TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement( + const TessPageIterator* handle, TessPageIteratorLevel level, + TessPageIteratorLevel element); + +TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox( + const TessPageIterator* handle, TessPageIteratorLevel level, int* left, + int* top, int* right, int* bottom); +TESS_API TessPolyBlockType TESS_CALL +TessPageIteratorBlockType(const TessPageIterator* handle); + +TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage( + const TessPageIterator* handle, TessPageIteratorLevel level); +TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage( + const TessPageIterator* handle, TessPageIteratorLevel level, int padding, + struct Pix* original_image, int* left, int* top); + +TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, + TessPageIteratorLevel level, + int* x1, int* y1, int* x2, + int* y2); + +TESS_API void TESS_CALL TessPageIteratorOrientation( + TessPageIterator* handle, TessOrientation* orientation, + TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, + float* deskew_angle); + +TESS_API void TESS_CALL TessPageIteratorParagraphInfo( + TessPageIterator* handle, TessParagraphJustification* justification, + BOOL* is_list_item, BOOL* is_crown, int* first_line_indent); /* Result iterator */ -TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle); -TESS_API TessResultIterator* - TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle); -TESS_API TessPageIterator* - TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle); -TESS_API const TessPageIterator* - TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); -TESS_API TessChoiceIterator* - TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); - -TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level); -TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level); -TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level); -TESS_API const char* - TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); -TESS_API const char* - TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, - BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, - BOOL* is_smallcaps, int* pointsize, int* font_id); - -TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle); -TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle); -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle); -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle); -TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle); - -TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle); -TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle); -TESS_API const char* TESS_CALL TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle); -TESS_API float TESS_CALL TessChoiceIteratorConfidence(const TessChoiceIterator* handle); +TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle); +TESS_API TessResultIterator* TESS_CALL +TessResultIteratorCopy(const TessResultIterator* handle); +TESS_API TessPageIterator* TESS_CALL +TessResultIteratorGetPageIterator(TessResultIterator* handle); +TESS_API const TessPageIterator* TESS_CALL +TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); +TESS_API TessChoiceIterator* TESS_CALL +TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); + +TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, + TessPageIteratorLevel level); +TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text( + const TessResultIterator* handle, TessPageIteratorLevel level); +TESS_API float TESS_CALL TessResultIteratorConfidence( + const TessResultIterator* handle, TessPageIteratorLevel level); +TESS_API const char* TESS_CALL +TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); +TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes( + const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps, + int* pointsize, int* font_id); + +TESS_API BOOL TESS_CALL +TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL +TessResultIteratorWordIsNumeric(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle); + +TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle); +TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle); +TESS_API const char* TESS_CALL +TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle); +TESS_API float TESS_CALL +TessChoiceIteratorConfidence(const TessChoiceIterator* handle); #ifdef __cplusplus } diff --git a/src/api/pdfrenderer.cpp b/src/api/pdfrenderer.cpp index b700319dbd..f36bd5f522 100644 --- a/src/api/pdfrenderer.cpp +++ b/src/api/pdfrenderer.cpp @@ -180,10 +180,10 @@ static const int kMaxBytesPerCodepoint = 20; * PDF Renderer interface implementation **********************************************************************/ -TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir, +TessPDFRenderer::TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly) : TessResultRenderer(outputbase, "pdf") { - obj_ = 0; + obj_ = 0; datadir_ = datadir; textonly_ = textonly; offsets_.push_back(0); @@ -194,9 +194,9 @@ void TessPDFRenderer::AppendPDFObjectDIY(size_t objectsize) { obj_++; } -void TessPDFRenderer::AppendPDFObject(const char *data) { +void TessPDFRenderer::AppendPDFObject(const char* data) { AppendPDFObjectDIY(strlen(data)); - AppendString((const char *)data); + AppendString((const char*)data); } // Helper function to prevent us from accidentally writing @@ -205,8 +205,7 @@ void TessPDFRenderer::AppendPDFObject(const char *data) { double prec(double x) { double kPrecision = 1000.0; double a = round(x * kPrecision) / kPrecision; - if (a == -0) - return 0; + if (a == -0) return 0; return a; } @@ -222,10 +221,10 @@ long dist2(int x1, int y1, int x2, int y2) { // left-to-right no matter what the reading order is. We need the // word baseline in reading order, so we do that conversion here. Returns // the word's baseline origin and length. -void GetWordBaseline(int writing_direction, int ppi, int height, - int word_x1, int word_y1, int word_x2, int word_y2, - int line_x1, int line_y1, int line_x2, int line_y2, - double *x0, double *y0, double *length) { +void GetWordBaseline(int writing_direction, int ppi, int height, int word_x1, + int word_y1, int word_x2, int word_y2, int line_x1, + int line_y1, int line_x2, int line_y2, double* x0, + double* y0, double* length) { if (writing_direction == WRITING_DIRECTION_RIGHT_TO_LEFT) { Swap(&word_x1, &word_x2); Swap(&word_y1, &word_y2); @@ -241,12 +240,13 @@ void GetWordBaseline(int writing_direction, int ppi, int height, y = line_y1; } else { double t = ((px - line_x2) * (line_x2 - line_x1) + - (py - line_y2) * (line_y2 - line_y1)) / l2; + (py - line_y2) * (line_y2 - line_y1)) / + l2; x = line_x2 + t * (line_x2 - line_x1); y = line_y2 + t * (line_y2 - line_y1); } - word_length = sqrt(static_cast(dist2(word_x1, word_y1, - word_x2, word_y2))); + word_length = + sqrt(static_cast(dist2(word_x1, word_y1, word_x2, word_y2))); word_length = word_length * 72.0 / ppi; x = x * 72 / ppi; y = height - (y * 72.0 / ppi); @@ -264,16 +264,15 @@ void GetWordBaseline(int writing_direction, int ppi, int height, // RTL // [ x' ] = [ a b ][ x ] = [-1 0 ] [ cos sin ][ x ] // [ y' ] [ c d ][ y ] [ 0 1 ] [-sin cos ][ y ] -void AffineMatrix(int writing_direction, - int line_x1, int line_y1, int line_x2, int line_y2, - double *a, double *b, double *c, double *d) { +void AffineMatrix(int writing_direction, int line_x1, int line_y1, int line_x2, + int line_y2, double* a, double* b, double* c, double* d) { double theta = atan2(static_cast(line_y1 - line_y2), static_cast(line_x2 - line_x1)); *a = cos(theta); *b = sin(theta); *c = -sin(theta); *d = cos(theta); - switch(writing_direction) { + switch (writing_direction) { case WRITING_DIRECTION_RIGHT_TO_LEFT: *a = -*a; *b = -*b; @@ -293,17 +292,15 @@ void AffineMatrix(int writing_direction, // these viewers. I chose this threshold large enough to absorb noise, // but small enough that lines probably won't cross each other if the // whole page is tilted at almost exactly the clipping threshold. -void ClipBaseline(int ppi, int x1, int y1, int x2, int y2, - int *line_x1, int *line_y1, - int *line_x2, int *line_y2) { +void ClipBaseline(int ppi, int x1, int y1, int x2, int y2, int* line_x1, + int* line_y1, int* line_x2, int* line_y2) { *line_x1 = x1; *line_y1 = y1; *line_x2 = x2; *line_y2 = y2; double rise = abs(y2 - y1) * 72 / ppi; double run = abs(x2 - x1) * 72 / ppi; - if (rise < 2.0 && 2.0 < run) - *line_y1 = *line_y2 = (y1 + y2) / 2; + if (rise < 2.0 && 2.0 < run) *line_y1 = *line_y2 = (y1 + y2) / 2; } bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) { @@ -317,14 +314,14 @@ bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) { int a = code - 0x010000; int high_surrogate = (0x03FF & (a >> 10)) + 0xD800; int low_surrogate = (0x03FF & a) + 0xDC00; - snprintf(utf16, kMaxBytesPerCodepoint, - "%04X%04X", high_surrogate, low_surrogate); + snprintf(utf16, kMaxBytesPerCodepoint, "%04X%04X", high_surrogate, + low_surrogate); } return true; } -char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, - double width, double height) { +char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, double width, + double height) { STRING pdf_str(""); double ppi = api->GetSourceYResolution(); @@ -358,12 +355,12 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, int line_x2 = 0; int line_y2 = 0; - ResultIterator *res_it = api->GetIterator(); + ResultIterator* res_it = api->GetIterator(); while (!res_it->Empty(RIL_BLOCK)) { if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - pdf_str += "BT\n3 Tr"; // Begin text object, use invisible ink - old_fontsize = 0; // Every block will declare its fontsize - new_block = true; // Every block will declare its affine matrix + pdf_str += "BT\n3 Tr"; // Begin text object, use invisible ink + old_fontsize = 0; // Every block will declare its fontsize + new_block = true; // Every block will declare its affine matrix } if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { @@ -383,8 +380,8 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, tesseract::Orientation orientation; tesseract::TextlineOrder textline_order; float deskew_angle; - res_it->Orientation(&orientation, &writing_direction, - &textline_order, &deskew_angle); + res_it->Orientation(&orientation, &writing_direction, &textline_order, + &deskew_angle); if (writing_direction != WRITING_DIRECTION_TOP_TO_BOTTOM) { switch (res_it->WordDirection()) { case DIR_LEFT_TO_RIGHT: @@ -404,15 +401,14 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, { int word_x1, word_y1, word_x2, word_y2; res_it->Baseline(RIL_WORD, &word_x1, &word_y1, &word_x2, &word_y2); - GetWordBaseline(writing_direction, ppi, height, - word_x1, word_y1, word_x2, word_y2, - line_x1, line_y1, line_x2, line_y2, - &x, &y, &word_length); + GetWordBaseline(writing_direction, ppi, height, word_x1, word_y1, word_x2, + word_y2, line_x1, line_y1, line_x2, line_y2, &x, &y, + &word_length); } if (writing_direction != old_writing_direction || new_block) { - AffineMatrix(writing_direction, - line_x1, line_y1, line_x2, line_y2, &a, &b, &c, &d); + AffineMatrix(writing_direction, line_x1, line_y1, line_x2, line_y2, &a, + &b, &c, &d); pdf_str.add_str_double(" ", prec(a)); // . This affine matrix pdf_str.add_str_double(" ", prec(b)); // . sets the coordinate pdf_str.add_str_double(" ", prec(c)); // . system for all @@ -426,7 +422,7 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, double dy = y - old_y; pdf_str.add_str_double(" ", prec(dx * a + dy * b)); pdf_str.add_str_double(" ", prec(dx * c + dy * d)); - pdf_str += (" Td "); // Relative moveto + pdf_str += (" Td "); // Relative moveto } old_x = x; old_y = y; @@ -442,8 +438,7 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, &smallcaps, &fontsize, &font_id); const int kDefaultFontsize = 8; - if (fontsize <= 0) - fontsize = kDefaultFontsize; + if (fontsize <= 0) fontsize = kDefaultFontsize; if (fontsize != old_fontsize) { char textfont[20]; snprintf(textfont, sizeof(textfont), "/f-0-0 %d Tf ", fontsize); @@ -475,19 +470,19 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, double h_stretch = kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len)); pdf_str.add_str_double("", h_stretch); - pdf_str += " Tz"; // horizontal stretch + pdf_str += " Tz"; // horizontal stretch pdf_str += " [ <"; - pdf_str += pdf_word; // UTF-16BE representation - pdf_str += "> ] TJ"; // show the text + pdf_str += pdf_word; // UTF-16BE representation + pdf_str += "> ] TJ"; // show the text } if (last_word_in_line) { pdf_str += " \n"; } if (last_word_in_block) { - pdf_str += "ET\n"; // end the text object + pdf_str += "ET\n"; // end the text object } } - char *ret = new char[pdf_str.length() + 1]; + char* ret = new char[pdf_str.length() + 1]; strcpy(ret, pdf_str.string()); delete res_it; return ret; @@ -533,9 +528,9 @@ bool TessPDFRenderer::BeginDocumentHandler() { " /Type /Font\n" ">>\n" "endobj\n", - 4L, // CIDFontType2 font - 6L // ToUnicode - ); + 4L, // CIDFontType2 font + 6L // ToUnicode + ); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); @@ -557,8 +552,8 @@ bool TessPDFRenderer::BeginDocumentHandler() { " /DW %d\n" ">>\n" "endobj\n", - 5L, // CIDToGIDMap - 7L, // Font descriptor + 5L, // CIDToGIDMap + 7L, // Font descriptor 1000 / kCharWidth); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); @@ -571,7 +566,7 @@ bool TessPDFRenderer::BeginDocumentHandler() { cidtogidmap[i] = (i % 2) ? 1 : 0; } size_t len; - unsigned char *comp = zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len); + unsigned char* comp = zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len); n = snprintf(buf, sizeof(buf), "5 0 obj\n" "<<\n" @@ -585,17 +580,17 @@ bool TessPDFRenderer::BeginDocumentHandler() { } AppendString(buf); long objsize = strlen(buf); - AppendData(reinterpret_cast(comp), len); + AppendData(reinterpret_cast(comp), len); objsize += len; lept_free(comp); - const char *endstream_endobj = + const char* endstream_endobj = "endstream\n" "endobj\n"; AppendString(endstream_endobj); objsize += strlen(endstream_endobj); AppendPDFObjectDIY(objsize); - const char *stream = + const char* stream = "/CIDInit /ProcSet findresource begin\n" "12 dict begin\n" "begincmap\n" @@ -625,7 +620,8 @@ bool TessPDFRenderer::BeginDocumentHandler() { "stream\n" "%s" "endstream\n" - "endobj\n", (unsigned long) strlen(stream), stream); + "endobj\n", + (unsigned long)strlen(stream), stream); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); @@ -635,8 +631,8 @@ bool TessPDFRenderer::BeginDocumentHandler() { "<<\n" " /Ascent %d\n" " /CapHeight %d\n" - " /Descent -1\n" // Spec says must be negative - " /Flags 5\n" // FixedPitch + Symbolic + " /Descent -1\n" // Spec says must be negative + " /Flags 5\n" // FixedPitch + Symbolic " /FontBBox [ 0 0 %d %d ]\n" " /FontFile2 %ld 0 R\n" " /FontName /GlyphLessFont\n" @@ -645,18 +641,15 @@ bool TessPDFRenderer::BeginDocumentHandler() { " /Type /FontDescriptor\n" ">>\n" "endobj\n", - 1000, - 1000, - 1000 / kCharWidth, - 1000, - 8L // Font data - ); + 1000, 1000, 1000 / kCharWidth, 1000, + 8L // Font data + ); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); n = snprintf(buf, sizeof(buf), "%s/pdf.ttf", datadir_); if (n >= sizeof(buf)) return false; - FILE *fp = fopen(buf, "rb"); + FILE* fp = fopen(buf, "rb"); if (!fp) { tprintf("Can not open file \"%s\"!\n", buf); return false; @@ -681,12 +674,13 @@ bool TessPDFRenderer::BeginDocumentHandler() { " /Length %ld\n" " /Length1 %ld\n" ">>\n" - "stream\n", size, size); + "stream\n", + size, size); if (n >= sizeof(buf)) { return false; } AppendString(buf); - objsize = strlen(buf); + objsize = strlen(buf); AppendData(buffer.get(), size); objsize += size; AppendString(endstream_endobj); @@ -695,29 +689,25 @@ bool TessPDFRenderer::BeginDocumentHandler() { return true; } -bool TessPDFRenderer::imageToPDFObj(Pix *pix, - char *filename, - long int objnum, - char **pdf_object, - long int *pdf_object_size) { +bool TessPDFRenderer::imageToPDFObj(Pix* pix, char* filename, long int objnum, + char** pdf_object, + long int* pdf_object_size) { size_t n; char b0[kBasicBufSize]; char b1[kBasicBufSize]; char b2[kBasicBufSize]; - if (!pdf_object_size || !pdf_object) - return false; + if (!pdf_object_size || !pdf_object) return false; *pdf_object = nullptr; *pdf_object_size = 0; - if (!filename) - return false; + if (!filename) return false; - L_Compressed_Data *cid = nullptr; + L_Compressed_Data* cid = nullptr; const int kJpegQuality = 85; int format, sad; findFileFormat(filename, &format); if (pixGetSpp(pix) == 4 && format == IFF_PNG) { - Pix *p1 = pixAlphaBlendUniform(pix, 0xffffff00); + Pix* p1 = pixAlphaBlendUniform(pix, 0xffffff00); sad = pixGenerateCIData(p1, L_FLATE_ENCODE, 0, 0, &cid); pixDestroy(&p1); } else { @@ -729,9 +719,9 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, return false; } - const char *group4 = ""; - const char *filter; - switch(cid->type) { + const char* group4 = ""; + const char* filter; + switch (cid->type) { case L_FLATE_ENCODE: filter = "/FlateDecode"; break; @@ -753,7 +743,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, // Maybe someday we will accept RGBA but today is not that day. // It requires creating an /SMask for the alpha channel. // http://stackoverflow.com/questions/14220221 - const char *colorspace; + const char* colorspace; if (cid->ncolors > 0) { n = snprintf(b0, sizeof(b0), " /ColorSpace [ /Indexed /DeviceRGB %d %s ]\n", @@ -785,7 +775,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, "<<\n" " /Length %ld\n" " /Subtype /Image\n", - objnum, (unsigned long) cid->nbytescomp); + objnum, (unsigned long)cid->nbytescomp); if (n >= sizeof(b1)) { l_CIDataDestroy(&cid); return false; @@ -806,14 +796,14 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, " >>\n" ">>\n" "stream\n", - cid->w, cid->h, cid->bps, filter, predictor, cid->spp, - group4, cid->w, cid->bps); + cid->w, cid->h, cid->bps, filter, predictor, cid->spp, group4, + cid->w, cid->bps); if (n >= sizeof(b2)) { l_CIDataDestroy(&cid); return false; } - const char *b3 = + const char* b3 = "endstream\n" "endobj\n"; @@ -826,7 +816,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len; *pdf_object = new char[*pdf_object_size]; - char *p = *pdf_object; + char* p = *pdf_object; memcpy(p, b1, b1_len); p += b1_len; memcpy(p, colorspace, colorspace_len); @@ -844,16 +834,15 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { size_t n; char buf[kBasicBufSize]; char buf2[kBasicBufSize]; - Pix *pix = api->GetInputImage(); - char *filename = (char *)api->GetInputName(); + Pix* pix = api->GetInputImage(); + char* filename = (char*)api->GetInputName(); int ppi = api->GetSourceYResolution(); - if (!pix || ppi <= 0) - return false; + if (!pix || ppi <= 0) return false; double width = pixGetWidth(pix) * 72.0 / ppi; double height = pixGetHeight(pix) * 72.0 / ppi; snprintf(buf2, sizeof(buf2), "/XObject << /Im1 %ld 0 R >>\n", obj_ + 2); - const char *xobject = (textonly_) ? "" : buf2; + const char* xobject = (textonly_) ? "" : buf2; // PAGE n = snprintf(buf, sizeof(buf), @@ -885,25 +874,26 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { const std::unique_ptr pdftext(GetPDFTextObjects(api, width, height)); const size_t pdftext_len = strlen(pdftext.get()); size_t len; - unsigned char *comp_pdftext = zlibCompress( - reinterpret_cast(pdftext.get()), pdftext_len, &len); + unsigned char* comp_pdftext = zlibCompress( + reinterpret_cast(pdftext.get()), pdftext_len, &len); long comp_pdftext_len = len; n = snprintf(buf, sizeof(buf), "%ld 0 obj\n" "<<\n" " /Length %ld /Filter /FlateDecode\n" ">>\n" - "stream\n", obj_, comp_pdftext_len); + "stream\n", + obj_, comp_pdftext_len); if (n >= sizeof(buf)) { lept_free(comp_pdftext); return false; } AppendString(buf); long objsize = strlen(buf); - AppendData(reinterpret_cast(comp_pdftext), comp_pdftext_len); + AppendData(reinterpret_cast(comp_pdftext), comp_pdftext_len); objsize += comp_pdftext_len; lept_free(comp_pdftext); - const char *b2 = + const char* b2 = "endstream\n" "endobj\n"; AppendString(b2); @@ -911,7 +901,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { AppendPDFObjectDIY(objsize); if (!textonly_) { - char *pdf_object = nullptr; + char* pdf_object = nullptr; if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) { return false; } @@ -922,7 +912,6 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { return true; } - bool TessPDFRenderer::EndDocumentHandler() { size_t n; char buf[kBasicBufSize]; @@ -940,13 +929,13 @@ bool TessPDFRenderer::EndDocumentHandler() { "%ld 0 obj\n" "<<\n" " /Type /Pages\n" - " /Kids [ ", kPagesObjectNumber); + " /Kids [ ", + kPagesObjectNumber); if (n >= sizeof(buf)) return false; AppendString(buf); - size_t pages_objsize = strlen(buf); + size_t pages_objsize = strlen(buf); for (size_t i = 0; i < pages_.unsigned_size(); i++) { - n = snprintf(buf, sizeof(buf), - "%ld 0 R ", pages_[i]); + n = snprintf(buf, sizeof(buf), "%ld 0 R ", pages_[i]); if (n >= sizeof(buf)) return false; AppendString(buf); pages_objsize += strlen(buf); @@ -955,11 +944,12 @@ bool TessPDFRenderer::EndDocumentHandler() { "]\n" " /Count %d\n" ">>\n" - "endobj\n", pages_.size()); + "endobj\n", + pages_.size()); if (n >= sizeof(buf)) return false; AppendString(buf); pages_objsize += strlen(buf); - offsets_.back() += pages_objsize; // manipulation #2 + offsets_.back() += pages_objsize; // manipulation #2 // INFO STRING utf16_title = "FEFF"; // byte_order_marker @@ -980,15 +970,16 @@ bool TessPDFRenderer::EndDocumentHandler() { " /Title <%s>\n" ">>\n" "endobj\n", - obj_, tesseract::TessBaseAPI::Version(), - datestr, utf16_title.c_str()); + obj_, tesseract::TessBaseAPI::Version(), datestr, + utf16_title.c_str()); lept_free(datestr); if (n >= sizeof(buf)) return false; AppendPDFObject(buf); n = snprintf(buf, sizeof(buf), "xref\n" "0 %ld\n" - "0000000000 65535 f \n", obj_); + "0000000000 65535 f \n", + obj_); if (n >= sizeof(buf)) return false; AppendString(buf); for (int i = 1; i < obj_; i++) { @@ -1007,8 +998,8 @@ bool TessPDFRenderer::EndDocumentHandler() { "%ld\n" "%%%%EOF\n", obj_, - 1L, // catalog - obj_ - 1, // info + 1L, // catalog + obj_ - 1, // info offsets_.back()); if (n >= sizeof(buf)) return false; AppendString(buf); diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp index bd59a5d3a3..393ab90fb9 100644 --- a/src/api/renderer.cpp +++ b/src/api/renderer.cpp @@ -30,10 +30,11 @@ namespace tesseract { /********************************************************************** * Base Renderer interface implementation **********************************************************************/ -TessResultRenderer::TessResultRenderer(const char *outputbase, +TessResultRenderer::TessResultRenderer(const char* outputbase, const char* extension) : file_extension_(extension), - title_(""), imagenum_(-1), + title_(""), + imagenum_(-1), fout_(stdout), next_(nullptr), happy_(true) { @@ -108,21 +109,15 @@ void TessResultRenderer::AppendData(const char* s, int len) { if (n != len) happy_ = false; } -bool TessResultRenderer::BeginDocumentHandler() { - return happy_; -} - -bool TessResultRenderer::EndDocumentHandler() { - return happy_; -} +bool TessResultRenderer::BeginDocumentHandler() { return happy_; } +bool TessResultRenderer::EndDocumentHandler() { return happy_; } /********************************************************************** * UTF8 Text Renderer interface implementation **********************************************************************/ -TessTextRenderer::TessTextRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "txt") { -} +TessTextRenderer::TessTextRenderer(const char* outputbase) + : TessResultRenderer(outputbase, "txt") {} bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { const std::unique_ptr utf8(api->GetUTF8Text()); @@ -143,14 +138,14 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { /********************************************************************** * HOcr Text Renderer interface implementation **********************************************************************/ -TessHOcrRenderer::TessHOcrRenderer(const char *outputbase) +TessHOcrRenderer::TessHOcrRenderer(const char* outputbase) : TessResultRenderer(outputbase, "hocr") { - font_info_ = false; + font_info_ = false; } -TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) +TessHOcrRenderer::TessHOcrRenderer(const char* outputbase, bool font_info) : TessResultRenderer(outputbase, "hocr") { - font_info_ = font_info; + font_info_ = font_info; } bool TessHOcrRenderer::BeginDocumentHandler() { @@ -166,12 +161,11 @@ bool TessHOcrRenderer::BeginDocumentHandler() { "\n" " \n" + "' />\n" " \n" "\n\n"); @@ -229,9 +223,8 @@ bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { /********************************************************************** * UNLV Text Renderer interface implementation **********************************************************************/ -TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "unlv") { -} +TessUnlvRenderer::TessUnlvRenderer(const char* outputbase) + : TessResultRenderer(outputbase, "unlv") {} bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { const std::unique_ptr unlv(api->GetUNLVText()); @@ -245,9 +238,8 @@ bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { /********************************************************************** * BoxText Renderer interface implementation **********************************************************************/ -TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "box") { -} +TessBoxTextRenderer::TessBoxTextRenderer(const char* outputbase) + : TessResultRenderer(outputbase, "box") {} bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { const std::unique_ptr text(api->GetBoxText(imagenum())); diff --git a/src/api/renderer.h b/src/api/renderer.h index e06ee81e1e..f16d8e9e1b 100644 --- a/src/api/renderer.h +++ b/src/api/renderer.h @@ -43,96 +43,95 @@ class TessBaseAPI; * in addition to the heuristics for producing it. */ class TESS_API TessResultRenderer { - public: - virtual ~TessResultRenderer(); - - // Takes ownership of pointer so must be new'd instance. - // Renderers aren't ordered, but appends the sequences of next parameter - // and existing next(). The renderers should be unique across both lists. - void insert(TessResultRenderer* next); - - // Returns the next renderer or nullptr. - TessResultRenderer* next() { return next_; } - - /** - * Starts a new document with the given title. - * This clears the contents of the output data. - * Title should use UTF-8 encoding. - */ - bool BeginDocument(const char* title); - - /** - * Adds the recognized text from the source image to the current document. - * Invalid if BeginDocument not yet called. - * - * Note that this API is a bit weird but is designed to fit into the - * current TessBaseAPI implementation where the api has lots of state - * information that we might want to add in. - */ - bool AddImage(TessBaseAPI* api); - - /** - * Finishes the document and finalizes the output data - * Invalid if BeginDocument not yet called. - */ - bool EndDocument(); - - const char* file_extension() const { return file_extension_; } - const char* title() const { return title_.c_str(); } - - /** - * Returns the index of the last image given to AddImage - * (i.e. images are incremented whether the image succeeded or not) - * - * This is always defined. It means either the number of the - * current image, the last image ended, or in the completed document - * depending on when in the document lifecycle you are looking at it. - * Will return -1 if a document was never started. - */ - int imagenum() const { return imagenum_; } - - protected: - /** - * Called by concrete classes. - * - * outputbase is the name of the output file excluding - * extension. For example, "/path/to/chocolate-chip-cookie-recipe" - * - * extension indicates the file extension to be used for output - * files. For example "pdf" will produce a .pdf file, and "hocr" - * will produce .hocr files. - */ - TessResultRenderer(const char *outputbase, - const char* extension); - - // Hook for specialized handling in BeginDocument() - virtual bool BeginDocumentHandler(); - - // This must be overridden to render the OCR'd results - virtual bool AddImageHandler(TessBaseAPI* api) = 0; - - // Hook for specialized handling in EndDocument() - virtual bool EndDocumentHandler(); - - // Renderers can call this to append '\0' terminated strings into - // the output string returned by GetOutput. - // This method will grow the output buffer if needed. - void AppendString(const char* s); - - // Renderers can call this to append binary byte sequences into - // the output string returned by GetOutput. Note that s is not necessarily - // '\0' terminated (and can contain '\0' within it). - // This method will grow the output buffer if needed. - void AppendData(const char* s, int len); - - private: - const char* file_extension_; // standard extension for generated output - STRING title_; // title of document being renderered - int imagenum_; // index of last image added - - FILE* fout_; // output file pointer - TessResultRenderer* next_; // Can link multiple renderers together - bool happy_; // I get grumpy when the disk fills up, etc. + public: + virtual ~TessResultRenderer(); + + // Takes ownership of pointer so must be new'd instance. + // Renderers aren't ordered, but appends the sequences of next parameter + // and existing next(). The renderers should be unique across both lists. + void insert(TessResultRenderer* next); + + // Returns the next renderer or nullptr. + TessResultRenderer* next() { return next_; } + + /** + * Starts a new document with the given title. + * This clears the contents of the output data. + * Title should use UTF-8 encoding. + */ + bool BeginDocument(const char* title); + + /** + * Adds the recognized text from the source image to the current document. + * Invalid if BeginDocument not yet called. + * + * Note that this API is a bit weird but is designed to fit into the + * current TessBaseAPI implementation where the api has lots of state + * information that we might want to add in. + */ + bool AddImage(TessBaseAPI* api); + + /** + * Finishes the document and finalizes the output data + * Invalid if BeginDocument not yet called. + */ + bool EndDocument(); + + const char* file_extension() const { return file_extension_; } + const char* title() const { return title_.c_str(); } + + /** + * Returns the index of the last image given to AddImage + * (i.e. images are incremented whether the image succeeded or not) + * + * This is always defined. It means either the number of the + * current image, the last image ended, or in the completed document + * depending on when in the document lifecycle you are looking at it. + * Will return -1 if a document was never started. + */ + int imagenum() const { return imagenum_; } + + protected: + /** + * Called by concrete classes. + * + * outputbase is the name of the output file excluding + * extension. For example, "/path/to/chocolate-chip-cookie-recipe" + * + * extension indicates the file extension to be used for output + * files. For example "pdf" will produce a .pdf file, and "hocr" + * will produce .hocr files. + */ + TessResultRenderer(const char* outputbase, const char* extension); + + // Hook for specialized handling in BeginDocument() + virtual bool BeginDocumentHandler(); + + // This must be overridden to render the OCR'd results + virtual bool AddImageHandler(TessBaseAPI* api) = 0; + + // Hook for specialized handling in EndDocument() + virtual bool EndDocumentHandler(); + + // Renderers can call this to append '\0' terminated strings into + // the output string returned by GetOutput. + // This method will grow the output buffer if needed. + void AppendString(const char* s); + + // Renderers can call this to append binary byte sequences into + // the output string returned by GetOutput. Note that s is not necessarily + // '\0' terminated (and can contain '\0' within it). + // This method will grow the output buffer if needed. + void AppendData(const char* s, int len); + + private: + const char* file_extension_; // standard extension for generated output + STRING title_; // title of document being renderered + int imagenum_; // index of last image added + + FILE* fout_; // output file pointer + TessResultRenderer* next_; // Can link multiple renderers together + bool happy_; // I get grumpy when the disk fills up, etc. }; /** @@ -140,7 +139,7 @@ class TESS_API TessResultRenderer { */ class TESS_API TessTextRenderer : public TessResultRenderer { public: - explicit TessTextRenderer(const char *outputbase); + explicit TessTextRenderer(const char* outputbase); protected: virtual bool AddImageHandler(TessBaseAPI* api); @@ -151,8 +150,8 @@ class TESS_API TessTextRenderer : public TessResultRenderer { */ class TESS_API TessHOcrRenderer : public TessResultRenderer { public: - explicit TessHOcrRenderer(const char *outputbase, bool font_info); - explicit TessHOcrRenderer(const char *outputbase); + explicit TessHOcrRenderer(const char* outputbase, bool font_info); + explicit TessHOcrRenderer(const char* outputbase); protected: virtual bool BeginDocumentHandler(); @@ -177,7 +176,7 @@ class TESS_API TessTsvRenderer : public TessResultRenderer { virtual bool EndDocumentHandler(); private: - bool font_info_; // whether to print font information + bool font_info_; // whether to print font information }; /** @@ -187,7 +186,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { public: // datadir is the location of the TESSDATA. We need it because // we load a custom PDF font from this location. - TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false); + TessPDFRenderer(const char* outputbase, const char* datadir, + bool textonly = false); protected: virtual bool BeginDocumentHandler(); @@ -203,26 +203,25 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { long int obj_; // counter for PDF objects GenericVector offsets_; // offset of every PDF object in bytes GenericVector pages_; // object number for every /Page object - const char *datadir_; // where to find the custom font + const char* datadir_; // where to find the custom font bool textonly_; // skip images if set // Bookkeeping only. DIY = Do It Yourself. void AppendPDFObjectDIY(size_t objectsize); // Bookkeeping + emit data. - void AppendPDFObject(const char *data); + void AppendPDFObject(const char* data); // Create the /Contents object for an entire page. char* GetPDFTextObjects(TessBaseAPI* api, double width, double height); // Turn an image into a PDF object. Only transcode if we have to. - static bool imageToPDFObj(Pix *pix, char *filename, long int objnum, - char **pdf_object, long int *pdf_object_size); + static bool imageToPDFObj(Pix* pix, char* filename, long int objnum, + char** pdf_object, long int* pdf_object_size); }; - /** * Renders tesseract output into a plain UTF-8 text string */ class TESS_API TessUnlvRenderer : public TessResultRenderer { public: - explicit TessUnlvRenderer(const char *outputbase); + explicit TessUnlvRenderer(const char* outputbase); protected: virtual bool AddImageHandler(TessBaseAPI* api); @@ -233,7 +232,7 @@ class TESS_API TessUnlvRenderer : public TessResultRenderer { */ class TESS_API TessBoxTextRenderer : public TessResultRenderer { public: - explicit TessBoxTextRenderer(const char *outputbase); + explicit TessBoxTextRenderer(const char* outputbase); protected: virtual bool AddImageHandler(TessBaseAPI* api); diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp index 7a652a0826..cb0dadfed4 100644 --- a/src/api/tesseractmain.cpp +++ b/src/api/tesseractmain.cpp @@ -94,13 +94,13 @@ static void PrintVersionInfo() { } } } - } + } #endif - if (SIMDDetect::IsAVX512BWAvailable()) printf(" Found AVX512BW\n"); - if (SIMDDetect::IsAVX512FAvailable()) printf(" Found AVX512F\n"); - if (SIMDDetect::IsAVX2Available()) printf(" Found AVX2\n"); - if (SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n"); - if (SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n"); + if (SIMDDetect::IsAVX512BWAvailable()) printf(" Found AVX512BW\n"); + if (SIMDDetect::IsAVX512FAvailable()) printf(" Found AVX512F\n"); + if (SIMDDetect::IsAVX2Available()) printf(" Found AVX2\n"); + if (SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n"); + if (SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n"); } static void PrintHelpForPSM() { @@ -143,7 +143,8 @@ static void PrintHelpExtra(const char* program) { " %s --help | --help-extra | --help-psm | --help-oem | --version\n" " %s --list-langs [--tessdata-dir PATH]\n" " %s --print-parameters [options...] [configfile...]\n" - " %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n" + " %s imagename|imagelist|stdin outputbase|stdout [options...] " + "[configfile...]\n" "\n" "OCR options:\n" " --tessdata-dir PATH Specify the location of tessdata path.\n" @@ -156,8 +157,7 @@ static void PrintHelpExtra(const char* program) { " --oem NUM Specify OCR Engine mode.\n" "NOTE: These options must occur before any configfile.\n" "\n", - program, program, program, program - ); + program, program, program, program); PrintHelpForPSM(); printf("\n"); @@ -172,8 +172,7 @@ static void PrintHelpExtra(const char* program) { " --help-oem Show OCR Engine modes.\n" " -v, --version Show version information.\n" " --list-langs List available languages for tesseract engine.\n" - " --print-parameters Print tesseract parameters.\n" - ); + " --print-parameters Print tesseract parameters.\n"); } static void PrintHelpMessage(const char* program) { @@ -191,9 +190,9 @@ static void PrintHelpMessage(const char* program) { " --help Show this help message.\n" " --help-extra Show extra help for advanced users.\n" " --version Show version information.\n" - " --list-langs List available languages for tesseract engine.\n", - program, program, program - ); + " --list-langs List available languages for tesseract " + "engine.\n", + program, program, program); } static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, @@ -258,7 +257,8 @@ static void FixPageSegMode(tesseract::TessBaseAPI* api, static void checkArgValues(int arg, const char* mode, int count) { if (arg >= count || arg < 0) { - printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1); + printf("Invalid %s value, please enter a number between 0-%d\n", mode, + count - 1); exit(0); } } @@ -266,9 +266,8 @@ static void checkArgValues(int arg, const char* mode, int count) { // NOTE: arg_i is used here to avoid ugly *i so many times in this function static void ParseArgs(const int argc, char** argv, const char** lang, const char** image, const char** outputbase, - const char** datapath, - bool* list_langs, bool* print_parameters, - GenericVector* vars_vec, + const char** datapath, bool* list_langs, + bool* print_parameters, GenericVector* vars_vec, GenericVector* vars_values, int* arg_i, tesseract::PageSegMode* pagesegmode, tesseract::OcrEngineMode* enginemode) { @@ -321,7 +320,7 @@ static void ParseArgs(const int argc, char** argv, const char** lang, noocr = true; *list_langs = true; } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { - checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT); + checkArgValues(atoi(argv[i + 1]), "PSM", tesseract::PSM_COUNT); *pagesegmode = static_cast(atoi(argv[i + 1])); ++i; } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { @@ -467,8 +466,9 @@ int main(int argc, char** argv) { api.SetOutputName(outputbase); - const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), - argc - arg_i, &vars_vec, &vars_values, false); + const int init_failed = + api.Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i, + &vars_vec, &vars_values, false); SetVariablesFromCLArgs(&api, argc, argv); @@ -483,11 +483,11 @@ int main(int argc, char** argv) { } if (print_parameters) { - FILE* fout = stdout; - fprintf(stdout, "Tesseract parameters:\n"); - api.PrintVariables(fout); - api.End(); - return EXIT_SUCCESS; + FILE* fout = stdout; + fprintf(stdout, "Tesseract parameters:\n"); + api.PrintVariables(fout); + api.End(); + return EXIT_SUCCESS; } FixPageSegMode(&api, pagesegmode); diff --git a/src/arch/intsimdmatrixavx2.cpp b/src/arch/intsimdmatrixavx2.cpp index c8707e7304..937b066ace 100644 --- a/src/arch/intsimdmatrixavx2.cpp +++ b/src/arch/intsimdmatrixavx2.cpp @@ -20,8 +20,8 @@ #ifdef __AVX2__ #include -#include #include +#include #include namespace tesseract { diff --git a/src/ccmain/adaptions.cpp b/src/ccmain/adaptions.cpp index 59e8e5e8fb..20a558e367 100644 --- a/src/ccmain/adaptions.cpp +++ b/src/ccmain/adaptions.cpp @@ -19,17 +19,17 @@ **********************************************************************/ #ifdef __UNIX__ -#include +#include #endif -#include -#include -#include "tessbox.h" -#include "tessvars.h" -#include "memry.h" -#include "reject.h" -#include "control.h" -#include "stopper.h" -#include "tesseractclass.h" +#include +#include +#include "control.h" +#include "memry.h" +#include "reject.h" +#include "stopper.h" +#include "tessbox.h" +#include "tesseractclass.h" +#include "tessvars.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -37,21 +37,20 @@ #endif namespace tesseract { -bool Tesseract::word_adaptable( //should we adapt? - WERD_RES* word, - uint16_t mode) { +bool Tesseract::word_adaptable( // should we adapt? + WERD_RES* word, uint16_t mode) { if (tessedit_adaption_debug) { tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n", - word->best_choice == nullptr ? "" : - word->best_choice->unichar_string().string(), - word->best_choice->rating(), word->best_choice->certainty()); + word->best_choice == nullptr + ? "" + : word->best_choice->unichar_string().string(), + word->best_choice->rating(), word->best_choice->certainty()); } BOOL8 status = FALSE; BITS16 flags(mode); - enum MODES - { + enum MODES { ADAPTABLE_WERD, ACCEPTABLE_WERD, CHECK_DAWGS, @@ -68,45 +67,45 @@ bool Tesseract::word_adaptable( //should we adapt? return false; } - if (flags.bit (ADAPTABLE_WERD)) { + if (flags.bit(ADAPTABLE_WERD)) { status |= word->tess_would_adapt; // result of Classify::AdaptableWord() if (tessedit_adaption_debug && !status) { tprintf("tess_would_adapt bit is false\n"); } } - if (flags.bit (ACCEPTABLE_WERD)) { + if (flags.bit(ACCEPTABLE_WERD)) { status |= word->tess_accepted; if (tessedit_adaption_debug && !status) { tprintf("tess_accepted bit is false\n"); } } - if (!status) { // If not set then - return false; // ignore other checks + if (!status) { // If not set then + return false; // ignore other checks } - if (flags.bit (CHECK_DAWGS) && - (word->best_choice->permuter () != SYSTEM_DAWG_PERM) && - (word->best_choice->permuter () != FREQ_DAWG_PERM) && - (word->best_choice->permuter () != USER_DAWG_PERM) && - (word->best_choice->permuter () != NUMBER_PERM)) { + if (flags.bit(CHECK_DAWGS) && + (word->best_choice->permuter() != SYSTEM_DAWG_PERM) && + (word->best_choice->permuter() != FREQ_DAWG_PERM) && + (word->best_choice->permuter() != USER_DAWG_PERM) && + (word->best_choice->permuter() != NUMBER_PERM)) { if (tessedit_adaption_debug) tprintf("word not in dawgs\n"); return false; } - if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, false)) { + if (flags.bit(CHECK_ONE_ELL_CONFLICT) && one_ell_conflict(word, false)) { if (tessedit_adaption_debug) tprintf("word has ell conflict\n"); return false; } - if (flags.bit (CHECK_SPACES) && - (strchr(word->best_choice->unichar_string().string(), ' ') != nullptr)) { + if (flags.bit(CHECK_SPACES) && + (strchr(word->best_choice->unichar_string().string(), ' ') != nullptr)) { if (tessedit_adaption_debug) tprintf("word contains spaces\n"); return false; } - if (flags.bit (CHECK_AMBIG_WERD) && + if (flags.bit(CHECK_AMBIG_WERD) && word->best_choice->dangerous_ambig_found()) { if (tessedit_adaption_debug) tprintf("word is ambiguous\n"); return false; diff --git a/src/ccmain/applybox.cpp b/src/ccmain/applybox.cpp index 3b40889f91..64c5578316 100644 --- a/src/ccmain/applybox.cpp +++ b/src/ccmain/applybox.cpp @@ -26,11 +26,11 @@ #include "allheaders.h" #include "boxread.h" #include "chopper.h" +#include "genericvector.h" #include "pageres.h" +#include "tesseractclass.h" #include "unichar.h" #include "unicharset.h" -#include "tesseractclass.h" -#include "genericvector.h" /** Max number of blobs to classify together in FindSegmentation. */ const int kMaxGroupSize = 4; @@ -75,15 +75,13 @@ const double kMaxXHeightDeviationFraction = 0.125; namespace tesseract { -static void clear_any_old_text(BLOCK_LIST *block_list) { +static void clear_any_old_text(BLOCK_LIST* block_list) { BLOCK_IT block_it(block_list); - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { ROW_IT row_it(block_it.data()->row_list()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { WERD_IT word_it(row_it.data()->word_list()); - for (word_it.mark_cycle_pt(); - !word_it.cycled_list(); word_it.forward()) { + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { word_it.data()->set_text(""); } } @@ -110,9 +108,8 @@ static void clear_any_old_text(BLOCK_LIST *block_list) { // Instead, the correct_text member of WERD_RES is set, and this may be later // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords // is not required before calling ApplyBoxTraining. -PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname, - bool find_segmentation, - BLOCK_LIST *block_list) { +PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname, bool find_segmentation, + BLOCK_LIST* block_list) { GenericVector boxes; GenericVector texts, full_texts; if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts, @@ -129,8 +126,8 @@ PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname, // In word mode, we use the boxes to make a word for each box, but // in blob mode we use the existing words and maximally chop them first. - PAGE_RES* page_res = find_segmentation ? - nullptr : SetupApplyBoxes(boxes, block_list); + PAGE_RES* page_res = + find_segmentation ? nullptr : SetupApplyBoxes(boxes, block_list); clear_any_old_text(block_list); for (int i = 0; i < boxes.size() - 1; i++) { @@ -140,7 +137,7 @@ PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname, foundit = ResegmentCharBox(page_res, nullptr, boxes[i], boxes[i + 1], full_texts[i].string()); } else { - foundit = ResegmentCharBox(page_res, &boxes[i-1], boxes[i], + foundit = ResegmentCharBox(page_res, &boxes[i - 1], boxes[i], boxes[i + 1], full_texts[i].string()); } } else { @@ -171,11 +168,10 @@ PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname, } // Helper computes median xheight in the image. -static double MedianXHeight(BLOCK_LIST *block_list) { +static double MedianXHeight(BLOCK_LIST* block_list) { BLOCK_IT block_it(block_list); STATS xheights(0, block_it.data()->pdblk.bounding_box().height()); - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { ROW_IT row_it(block_it.data()->row_list()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { xheights.add(IntCastRounded(row_it.data()->x_height()), 1); @@ -186,7 +182,7 @@ static double MedianXHeight(BLOCK_LIST *block_list) { /// Any row xheight that is significantly different from the median is set /// to the median. -void Tesseract::PreenXHeights(BLOCK_LIST *block_list) { +void Tesseract::PreenXHeights(BLOCK_LIST* block_list) { const double median_xheight = MedianXHeight(block_list); const double max_deviation = kMaxXHeightDeviationFraction * median_xheight; // Strip all fuzzy space markers to simplify the PAGE_RES. @@ -194,13 +190,13 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) { for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOCK* block = b_it.data(); ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) { + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { ROW* row = r_it.data(); const double diff = fabs(row->x_height() - median_xheight); if (diff > max_deviation) { if (applybox_debug) { - tprintf("row xheight=%g, but median xheight = %g\n", - row->x_height(), median_xheight); + tprintf("row xheight=%g, but median xheight = %g\n", row->x_height(), + median_xheight); } row->set_x_height(static_cast(median_xheight)); } @@ -211,14 +207,14 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) { /// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: /// All fuzzy spaces are removed, and all the words are maximally chopped. PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector& boxes, - BLOCK_LIST *block_list) { + BLOCK_LIST* block_list) { PreenXHeights(block_list); // Strip all fuzzy space markers to simplify the PAGE_RES. BLOCK_IT b_it(block_list); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOCK* block = b_it.data(); ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) { + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { ROW* row = r_it.data(); WERD_IT w_it(row->word_list()); for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { @@ -236,8 +232,7 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector& boxes, PAGE_RES_IT pr_it(page_res); WERD_RES* word_res; while ((word_res = pr_it.word()) != nullptr) { - MaximallyChopWord(boxes, pr_it.block()->block, - pr_it.row()->row, word_res); + MaximallyChopWord(boxes, pr_it.block()->block, pr_it.row()->row, word_res); pr_it.forward(); } return page_res; @@ -247,14 +242,11 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector& boxes, /// The word_res will contain filled chopped_word, seam_array, denorm, /// box_word and best_state for the maximally chopped word. void Tesseract::MaximallyChopWord(const GenericVector& boxes, - BLOCK* block, ROW* row, - WERD_RES* word_res) { - if (!word_res->SetupForRecognition(unicharset, this, BestPix(), - tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - row, block)) { + BLOCK* block, ROW* row, WERD_RES* word_res) { + if (!word_res->SetupForRecognition( + unicharset, this, BestPix(), tessedit_ocr_engine_mode, nullptr, + classify_bln_numeric_mode, textord_use_cjk_fp_model, + poly_allow_detailed_fx, row, block)) { word_res->CloneChoppedToRebuild(); return; } @@ -293,9 +285,9 @@ void Tesseract::MaximallyChopWord(const GenericVector& boxes, left_choice->set_rating(rating); left_choice->set_certainty(-rating); // combine confidence w/ serial # - BLOB_CHOICE* right_choice = new BLOB_CHOICE(++right_chop_index, - rating - 0.125f, -rating, -1, - 0.0f, 0.0f, 0.0f, BCC_FAKE); + BLOB_CHOICE* right_choice = + new BLOB_CHOICE(++right_chop_index, rating - 0.125f, -rating, -1, + 0.0f, 0.0f, 0.0f, BCC_FAKE); blob_choices.insert(right_choice, blob_number + 1); } } @@ -316,7 +308,7 @@ void Tesseract::MaximallyChopWord(const GenericVector& boxes, /// miss metric gets the blob. static double BoxMissMetric(const TBOX& box1, const TBOX& box2) { const int overlap_area = box1.intersection(box2).area(); - double miss_metric = box1.area()- overlap_area; + double miss_metric = box1.area() - overlap_area; miss_metric /= box1.area(); miss_metric *= box2.area() - overlap_area; miss_metric /= box2.area(); @@ -333,7 +325,7 @@ static double BoxMissMetric(const TBOX& box1, const TBOX& box2) { /// /// This means that occasionally, blobs may be incorrectly segmented if the /// chopper fails to find a suitable chop point. -bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box, +bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box, const TBOX& box, const TBOX& next_box, const char* correct_text) { if (applybox_debug > 1) { @@ -343,8 +335,7 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box, WERD_RES* word_res; for (word_res = page_res_it.word(); word_res != nullptr; word_res = page_res_it.forward()) { - if (!word_res->box_word->bounding_box().major_overlap(box)) - continue; + if (!word_res->box_word->bounding_box().major_overlap(box)) continue; if (applybox_debug > 1) { tprintf("Checking word box:"); word_res->box_word->bounding_box().print(); @@ -355,8 +346,7 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box, int blob_count = 0; for (blob_count = 0; i + blob_count < word_len; ++blob_count) { TBOX blob_box = word_res->box_word->BlobBox(i + blob_count); - if (!blob_box.major_overlap(box)) - break; + if (!blob_box.major_overlap(box)) break; if (word_res->correct_text[i + blob_count].length() > 0) break; // Blob is claimed already. const double current_box_miss_metric = BoxMissMetric(blob_box, box); @@ -431,8 +421,8 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box, /// applying the blobs to box or next_box with the least non-overlap. /// @return false if the box was in error, which can only be caused by /// failing to find an overlapping blob for a box. -bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, - const TBOX& box, const TBOX& next_box, +bool Tesseract::ResegmentWordBox(BLOCK_LIST* block_list, const TBOX& box, + const TBOX& next_box, const char* correct_text) { if (applybox_debug > 1) { tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text); @@ -441,13 +431,11 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, BLOCK_IT b_it(block_list); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOCK* block = b_it.data(); - if (!box.major_overlap(block->pdblk.bounding_box())) - continue; + if (!box.major_overlap(block->pdblk.bounding_box())) continue; ROW_IT r_it(block->row_list()); for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { ROW* row = r_it.data(); - if (!box.major_overlap(row->bounding_box())) - continue; + if (!box.major_overlap(row->bounding_box())) continue; WERD_IT w_it(row->word_list()); for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { WERD* word = w_it.data(); @@ -457,15 +445,13 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, } if (word->text() != nullptr && word->text()[0] != '\0') continue; // Ignore words that are already done. - if (!box.major_overlap(word->bounding_box())) - continue; + if (!box.major_overlap(word->bounding_box())) continue; C_BLOB_IT blob_it(word->cblob_list()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* blob = blob_it.data(); TBOX blob_box = blob->bounding_box(); - if (!blob_box.major_overlap(box)) - continue; + if (!blob_box.major_overlap(box)) continue; const double current_box_miss_metric = BoxMissMetric(blob_box, box); const double next_box_miss_metric = BoxMissMetric(blob_box, next_box); if (applybox_debug > 2) { @@ -532,15 +518,13 @@ bool Tesseract::ConvertStringToUnichars(const char* utf8, GenericVector* class_ids) { for (int step = 0; *utf8 != '\0'; utf8 += step) { const char* next_space = strchr(utf8, ' '); - if (next_space == nullptr) - next_space = utf8 + strlen(utf8); + if (next_space == nullptr) next_space = utf8 + strlen(utf8); step = next_space - utf8; UNICHAR_ID class_id = unicharset.unichar_to_id(utf8, step); if (class_id == INVALID_UNICHAR_ID) { return false; } - while (utf8[step] == ' ') - ++step; + while (utf8[step] == ' ') ++step; class_ids->push_back(class_id); } return true; @@ -560,9 +544,9 @@ bool Tesseract::FindSegmentation(const GenericVector& target_text, new GenericVector[word_length]; for (int i = 0; i < word_length; ++i) { for (int j = 1; j <= kMaxGroupSize && i + j <= word_length; ++j) { - BLOB_CHOICE_LIST* match_result = classify_piece( - word_res->seam_array, i, i + j - 1, "Applybox", - word_res->chopped_word, word_res->blamer_bundle); + BLOB_CHOICE_LIST* match_result = + classify_piece(word_res->seam_array, i, i + j - 1, "Applybox", + word_res->chopped_word, word_res->blamer_bundle); if (applybox_debug > 2) { tprintf("%d+%d:", i, j); print_ratings_list("Segment:", match_result, unicharset); @@ -578,9 +562,8 @@ bool Tesseract::FindSegmentation(const GenericVector& target_text, float best_rating = 0.0f; SearchForText(choices, 0, word_length, target_text, 0, 0.0f, &search_segmentation, &best_rating, &word_res->best_state); - for (int i = 0; i < word_length; ++i) - choices[i].delete_data_pointers(); - delete [] choices; + for (int i = 0; i < word_length; ++i) choices[i].delete_data_pointers(); + delete[] choices; if (word_res->best_state.empty()) { // Build the original segmentation and if it is the same length as the // truth, assume it will do. @@ -625,8 +608,8 @@ bool Tesseract::FindSegmentation(const GenericVector& target_text, void Tesseract::SearchForText(const GenericVector* choices, int choices_pos, int choices_length, const GenericVector& target_text, - int text_index, - float rating, GenericVector* segmentation, + int text_index, float rating, + GenericVector* segmentation, float* best_rating, GenericVector* best_segmentation) { const UnicharAmbigsVector& table = getDict().getUnicharAmbigs().dang_ambigs(); @@ -648,18 +631,16 @@ void Tesseract::SearchForText(const GenericVector* choices, AmbigSpec_IT spec_it(table[class_id]); for (spec_it.mark_cycle_pt(); !spec_it.cycled_list(); spec_it.forward()) { - const AmbigSpec *ambig_spec = spec_it.data(); + const AmbigSpec* ambig_spec = spec_it.data(); // We'll only do 1-1. if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID && ambig_spec->correct_ngram_id == target_text[text_index]) break; } - if (!spec_it.cycled_list()) - break; // Found an ambig. + if (!spec_it.cycled_list()) break; // Found an ambig. } } - if (choice_it.cycled_list()) - continue; // No match. + if (choice_it.cycled_list()) continue; // No match. segmentation->push_back(length); if (choices_pos + length == choices_length && text_index + 1 == target_text.size()) { @@ -680,7 +661,8 @@ void Tesseract::SearchForText(const GenericVector* choices, target_text[text_index], unicharset.id_to_unichar(target_text[text_index]), choice_it.data()->unichar_id() == target_text[text_index] - ? "Match" : "Ambig", + ? "Match" + : "Ambig", choices_pos, length); } SearchForText(choices, choices_pos + length, choices_length, target_text, @@ -748,8 +730,8 @@ void Tesseract::TidyUp(PAGE_RES* page_res) { if (applybox_debug > 0) { tprintf(" Found %d good blobs.\n", ok_blob_count); if (bad_blob_count > 0) { - tprintf(" Leaving %d unlabelled blobs in %d words.\n", - bad_blob_count, ok_word_count); + tprintf(" Leaving %d unlabelled blobs in %d words.\n", bad_blob_count, + ok_word_count); } if (unlabelled_words > 0) tprintf(" %d remaining unlabelled words deleted.\n", unlabelled_words); @@ -758,28 +740,27 @@ void Tesseract::TidyUp(PAGE_RES* page_res) { /** Logs a bad box by line in the box file and box coords.*/ void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box, - const char *box_ch, const char *err_msg) { + const char* box_ch, const char* err_msg) { tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n", - boxfile_lineno + 1, box_ch, - box.left(), box.bottom(), box.right(), box.top(), err_msg); + boxfile_lineno + 1, box_ch, box.left(), box.bottom(), box.right(), + box.top(), err_msg); } /** Creates a fake best_choice entry in each WERD_RES with the correct text.*/ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) { PAGE_RES_IT pr_it(page_res); - for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; + for (WERD_RES* word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) { - WERD_CHOICE* choice = new WERD_CHOICE(word_res->uch_set, - word_res->correct_text.size()); + WERD_CHOICE* choice = + new WERD_CHOICE(word_res->uch_set, word_res->correct_text.size()); for (int i = 0; i < word_res->correct_text.size(); ++i) { // The part before the first space is the real ground truth, and the // rest is the bounding box location and page number. GenericVector tokens; word_res->correct_text[i].split(' ', &tokens); UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].string()); - choice->append_unichar_id_space_allocated(char_id, - word_res->best_state[i], - 0.0f, 0.0f); + choice->append_unichar_id_space_allocated( + char_id, word_res->best_state[i], 0.0f, 0.0f); } word_res->ClearWordChoices(); word_res->LogNewRawChoice(choice); @@ -792,7 +773,7 @@ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) { void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) { PAGE_RES_IT pr_it(page_res); int word_count = 0; - for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; + for (WERD_RES* word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) { LearnWord(fontname.string(), word_res); ++word_count; @@ -800,5 +781,4 @@ void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) { tprintf("Generated training data for %d words\n", word_count); } - } // namespace tesseract diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index cfdc5e2f31..c614f795c8 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -23,13 +23,13 @@ #include "config_auto.h" #endif -#include #include +#include #include #ifdef __UNIX__ #include -#include #include +#include #endif #include #include "callcpp.h" @@ -49,15 +49,14 @@ #include "tessvars.h" #include "werdit.h" -#define MIN_FONT_ROW_COUNT 8 -#define MAX_XHEIGHT_DIFF 3 +#define MIN_FONT_ROW_COUNT 8 +#define MAX_XHEIGHT_DIFF 3 const char* const kBackUpConfigFile = "tempconfigdata.config"; // Min believable x-height for any text when refitting as a fraction of // original x-height const double kMinRefitXHeightFraction = 0.5; - /** * Make a word from the selected blobs and run Tess on them. * @@ -65,8 +64,7 @@ const double kMinRefitXHeightFraction = 0.5; * @param selection_box within this box */ namespace tesseract { -void Tesseract::recog_pseudo_word(PAGE_RES* page_res, - TBOX &selection_box) { +void Tesseract::recog_pseudo_word(PAGE_RES* page_res, TBOX& selection_box) { PAGE_RES_IT* it = make_pseudo_word(page_res, selection_box); if (it != nullptr) { recog_interactive(it); @@ -95,11 +93,12 @@ bool Tesseract::recog_interactive(PAGE_RES_IT* pr_it) { if (tessedit_debug_quality_metrics) { WERD_RES* word_res = pr_it->word(); word_char_quality(word_res, pr_it->row()->row, &char_qual, &good_char_qual); - tprintf("\n%d chars; word_blob_quality: %d; outline_errs: %d; " - "char_quality: %d; good_char_quality: %d\n", - word_res->reject_map.length(), - word_blob_quality(word_res, pr_it->row()->row), - word_outline_errs(word_res), char_qual, good_char_qual); + tprintf( + "\n%d chars; word_blob_quality: %d; outline_errs: %d; " + "char_quality: %d; good_char_quality: %d\n", + word_res->reject_map.length(), + word_blob_quality(word_res, pr_it->row()->row), + word_outline_errs(word_res), char_qual, good_char_qual); } return true; } @@ -121,8 +120,7 @@ bool Tesseract::recog_interactive(PAGE_RES_IT* pr_it) { // set only debug params from the word config file. bool Tesseract::ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box, - const char* word_config, - int pass) { + const char* word_config, int pass) { if (word_config != nullptr) { if (word_box.major_overlap(target_word_box)) { if (backup_config_file_ == nullptr) { @@ -130,15 +128,13 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box, FILE* config_fp = fopen(backup_config_file_, "wb"); ParamUtils::PrintParams(config_fp, params()); fclose(config_fp); - ParamUtils::ReadParamsFile(word_config, - SET_PARAM_CONSTRAINT_DEBUG_ONLY, + ParamUtils::ReadParamsFile(word_config, SET_PARAM_CONSTRAINT_DEBUG_ONLY, params()); } } else { if (backup_config_file_ != nullptr) { ParamUtils::ReadParamsFile(backup_config_file_, - SET_PARAM_CONSTRAINT_DEBUG_ONLY, - params()); + SET_PARAM_CONSTRAINT_DEBUG_ONLY, params()); backup_config_file_ = nullptr; } } @@ -149,10 +145,8 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box, } /** If tesseract is to be run, sets the words up ready for it. */ -void Tesseract::SetupAllWordsPassN(int pass_n, - const TBOX* target_word_box, - const char* word_config, - PAGE_RES* page_res, +void Tesseract::SetupAllWordsPassN(int pass_n, const TBOX* target_word_box, + const char* word_config, PAGE_RES* page_res, GenericVector* words) { // Prepare all the words. PAGE_RES_IT page_res_it(page_res); @@ -175,12 +169,10 @@ void Tesseract::SetupAllWordsPassN(int pass_n, void Tesseract::SetupWordPassN(int pass_n, WordData* word) { if (pass_n == 1 || !word->word->done) { if (pass_n == 1) { - word->word->SetupForRecognition(unicharset, this, BestPix(), - tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - word->row, word->block); + word->word->SetupForRecognition( + unicharset, this, BestPix(), tessedit_ocr_engine_mode, nullptr, + classify_bln_numeric_mode, textord_use_cjk_fp_model, + poly_allow_detailed_fx, word->row, word->block); } else if (pass_n == 2) { // TODO(rays) Should we do this on pass1 too? word->word->caps_height = 0.0; @@ -197,11 +189,10 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) { // LSTM doesn't get setup for pass2. if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) { word_res->SetupForRecognition( - lang_t->unicharset, lang_t, BestPix(), - lang_t->tessedit_ocr_engine_mode, nullptr, - lang_t->classify_bln_numeric_mode, - lang_t->textord_use_cjk_fp_model, - lang_t->poly_allow_detailed_fx, word->row, word->block); + lang_t->unicharset, lang_t, BestPix(), + lang_t->tessedit_ocr_engine_mode, nullptr, + lang_t->classify_bln_numeric_mode, lang_t->textord_use_cjk_fp_model, + lang_t->poly_allow_detailed_fx, word->row, word->block); } } } @@ -236,8 +227,8 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, } } if (monitor->deadline_exceeded() || - (monitor->cancel != nullptr && (*monitor->cancel)(monitor->cancel_this, - words->size()))) { + (monitor->cancel != nullptr && + (*monitor->cancel)(monitor->cancel_this, words->size()))) { // Timeout. Fake out the rest of the words. for (; w < words->size(); ++w) { (*words)[w].word->SetupFake(unicharset); @@ -247,8 +238,10 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, } if (word->word->tess_failed) { int s; - for (s = 0; s < word->lang_words.size() && - word->lang_words[s]->tess_failed; ++s) {} + for (s = 0; + s < word->lang_words.size() && word->lang_words[s]->tess_failed; + ++s) { + } // If all are failed, skip it. Image words are skipped by this test. if (s > word->lang_words.size()) continue; } @@ -298,19 +291,17 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, * @param dopasses 0 - all, 1 just pass 1, 2 passes 2 and higher */ -bool Tesseract::recog_all_words(PAGE_RES* page_res, - ETEXT_DESC* monitor, +bool Tesseract::recog_all_words(PAGE_RES* page_res, ETEXT_DESC* monitor, const TBOX* target_word_box, - const char* word_config, - int dopasses) { + const char* word_config, int dopasses) { PAGE_RES_IT page_res_it(page_res); if (tessedit_minimal_rej_pass1) { - tessedit_test_adaption.set_value (TRUE); - tessedit_minimal_rejection.set_value (TRUE); + tessedit_test_adaption.set_value(TRUE); + tessedit_minimal_rejection.set_value(TRUE); } - if (dopasses==0 || dopasses==1) { + if (dopasses == 0 || dopasses == 1) { page_res_it.restart_page(); // ****************** Pass 1 ******************* @@ -395,8 +386,8 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, // Fix fuzzy spaces. set_global_loc_code(LOC_FUZZY_SPACE); - if (!tessedit_test_adaption && tessedit_fix_fuzzy_spaces - && !tessedit_word_for_word && !right_to_left()) + if (!tessedit_test_adaption && tessedit_fix_fuzzy_spaces && + !tessedit_word_for_word && !right_to_left()) fix_fuzzy_spaces(monitor, stats_.word_count, page_res); // ****************** Pass 4 ******************* @@ -425,8 +416,8 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv)) output_pass(page_res_it, target_word_box); // end jetsoft - const PageSegMode pageseg_mode = static_cast( - static_cast(tessedit_pageseg_mode)); + const PageSegMode pageseg_mode = + static_cast(static_cast(tessedit_pageseg_mode)); textord_.CleanupSingleRowResult(pageseg_mode, page_res); // Remove empty words, as these mess up the result iterators. @@ -434,8 +425,8 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, page_res_it.forward()) { const WERD_RES* word = page_res_it.word(); const POLY_BLOCK* pb = page_res_it.block()->block != nullptr - ? page_res_it.block()->block->pdblk.poly_block() - : nullptr; + ? page_res_it.block()->block->pdblk.poly_block() + : nullptr; if (word->best_choice == nullptr || word->best_choice->length() == 0 || (word->best_choice->IsAllSpaces() && (pb == nullptr || pb->IsText()))) { page_res_it.DeleteCurrentWord(); @@ -448,11 +439,11 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, return true; } -void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { +void Tesseract::bigram_correction_pass(PAGE_RES* page_res) { PAGE_RES_IT word_it(page_res); - WERD_RES *w_prev = nullptr; - WERD_RES *w = word_it.word(); + WERD_RES* w_prev = nullptr; + WERD_RES* w = word_it.word(); while (true) { w_prev = w; while (word_it.forward() != nullptr && @@ -471,8 +462,8 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { continue; } // Two words sharing the same language model, excellent! - GenericVector overrides_word1; - GenericVector overrides_word2; + GenericVector overrides_word1; + GenericVector overrides_word2; const STRING orig_w1_str = w_prev->best_choice->unichar_string(); const STRING orig_w2_str = w->best_choice->unichar_string(); @@ -497,8 +488,8 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { continue; } if (tessedit_bigram_debug > 2) { - tprintf("Examining alt choices for \"%s %s\".\n", - orig_w1_str.string(), orig_w2_str.string()); + tprintf("Examining alt choices for \"%s %s\".\n", orig_w1_str.string(), + orig_w2_str.string()); } if (tessedit_bigram_debug > 1) { if (!w_prev->best_choices.singleton()) { @@ -512,7 +503,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { int best_idx = 0; WERD_CHOICE_IT prev_it(&w_prev->best_choices); for (prev_it.mark_cycle_pt(); !prev_it.cycled_list(); prev_it.forward()) { - WERD_CHOICE *p1 = prev_it.data(); + WERD_CHOICE* p1 = prev_it.data(); WERD_CHOICE strip1(w->uch_set); { int p1start, p1end; @@ -521,7 +512,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { } WERD_CHOICE_IT w_it(&w->best_choices); for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD_CHOICE *p2 = w_it.data(); + WERD_CHOICE* p2 = w_it.data(); WERD_CHOICE strip2(w->uch_set); { int p2start, p2end; @@ -546,8 +537,10 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { EqualIgnoringCaseAndTerminalPunct(*w->best_choice, *overrides_word2[best_idx])) { if (tessedit_bigram_debug > 1) { - tprintf("Top choice \"%s %s\" verified (sans case) by bigram " - "model.\n", orig_w1_str.string(), orig_w2_str.string()); + tprintf( + "Top choice \"%s %s\" verified (sans case) by bigram " + "model.\n", + orig_w1_str.string(), orig_w2_str.string()); } continue; } @@ -561,19 +554,21 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { } if (tessedit_bigram_debug > 0) { STRING choices_description; - int num_bigram_choices - = overrides_word1.size() * overrides_word2.size(); + int num_bigram_choices = + overrides_word1.size() * overrides_word2.size(); if (num_bigram_choices == 1) { choices_description = "This was the unique bigram choice."; } else { if (tessedit_bigram_debug > 1) { STRING bigrams_list; const int kMaxChoicesToPrint = 20; - for (int i = 0; i < overrides_word1.size() && - i < kMaxChoicesToPrint; i++) { - if (i > 0) { bigrams_list += ", "; } - WERD_CHOICE *p1 = overrides_word1[i]; - WERD_CHOICE *p2 = overrides_word2[i]; + for (int i = 0; + i < overrides_word1.size() && i < kMaxChoicesToPrint; i++) { + if (i > 0) { + bigrams_list += ", "; + } + WERD_CHOICE* p1 = overrides_word1[i]; + WERD_CHOICE* p2 = overrides_word2[i]; bigrams_list += p1->unichar_string() + " " + p2->unichar_string(); if (i == kMaxChoicesToPrint) { bigrams_list += " ..."; @@ -588,16 +583,14 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { } } tprintf("Replaced \"%s %s\" with \"%s %s\" with bigram model. %s\n", - orig_w1_str.string(), orig_w2_str.string(), - new_w1_str.string(), new_w2_str.string(), - choices_description.string()); + orig_w1_str.string(), orig_w2_str.string(), new_w1_str.string(), + new_w2_str.string(), choices_description.string()); } } } } -void Tesseract::rejection_passes(PAGE_RES* page_res, - ETEXT_DESC* monitor, +void Tesseract::rejection_passes(PAGE_RES* page_res, ETEXT_DESC* monitor, const TBOX* target_word_box, const char* word_config) { PAGE_RES_IT page_res_it(page_res); @@ -622,8 +615,8 @@ void Tesseract::rejection_passes(PAGE_RES* page_res, // changed by jetsoft // specific to its needs to extract one word when need if (target_word_box && - !ProcessTargetWord(word->word->bounding_box(), - *target_word_box, word_config, 4)) { + !ProcessTargetWord(word->word->bounding_box(), *target_word_box, + word_config, 4)) { page_res_it.forward(); continue; } @@ -639,8 +632,8 @@ void Tesseract::rejection_passes(PAGE_RES* page_res, stats_.doc_outline_errs += outline_errs; int16_t all_char_quality; int16_t accepted_all_char_quality; - word_char_quality(word, page_res_it.row()->row, - &all_char_quality, &accepted_all_char_quality); + word_char_quality(word, page_res_it.row()->row, &all_char_quality, + &accepted_all_char_quality); stats_.doc_char_quality += all_char_quality; const uint8_t permuter_type = word->best_choice->permuter(); if ((permuter_type == SYSTEM_DAWG_PERM) || @@ -650,39 +643,40 @@ void Tesseract::rejection_passes(PAGE_RES* page_res, stats_.doc_good_char_quality += accepted_all_char_quality; } check_debug_pt(word, 80); - if (tessedit_reject_bad_qual_wds && - (blob_quality == 0) && (outline_errs >= chars_in_word)) + if (tessedit_reject_bad_qual_wds && (blob_quality == 0) && + (outline_errs >= chars_in_word)) word->reject_map.rej_word_bad_quality(); check_debug_pt(word, 90); page_res_it.forward(); } if (tessedit_debug_quality_metrics) { - tprintf - ("QUALITY: num_chs= %d num_rejs= %d %5.3f blob_qual= %d %5.3f" - " outline_errs= %d %5.3f char_qual= %d %5.3f good_ch_qual= %d %5.3f\n", - page_res->char_count, page_res->rej_count, - page_res->rej_count / static_cast(page_res->char_count), - stats_.doc_blob_quality, - stats_.doc_blob_quality / static_cast(page_res->char_count), - stats_.doc_outline_errs, - stats_.doc_outline_errs / static_cast(page_res->char_count), - stats_.doc_char_quality, - stats_.doc_char_quality / static_cast(page_res->char_count), - stats_.doc_good_char_quality, - (stats_.good_char_count > 0) ? - (stats_.doc_good_char_quality / - static_cast(stats_.good_char_count)) : 0.0); + tprintf( + "QUALITY: num_chs= %d num_rejs= %d %5.3f blob_qual= %d %5.3f" + " outline_errs= %d %5.3f char_qual= %d %5.3f good_ch_qual= %d %5.3f\n", + page_res->char_count, page_res->rej_count, + page_res->rej_count / static_cast(page_res->char_count), + stats_.doc_blob_quality, + stats_.doc_blob_quality / static_cast(page_res->char_count), + stats_.doc_outline_errs, + stats_.doc_outline_errs / static_cast(page_res->char_count), + stats_.doc_char_quality, + stats_.doc_char_quality / static_cast(page_res->char_count), + stats_.doc_good_char_quality, + (stats_.good_char_count > 0) + ? (stats_.doc_good_char_quality / + static_cast(stats_.good_char_count)) + : 0.0); } bool good_quality_doc = - ((page_res->rej_count / static_cast(page_res->char_count)) <= - quality_rej_pc) && - (stats_.doc_blob_quality / static_cast(page_res->char_count) >= - quality_blob_pc) && - (stats_.doc_outline_errs / static_cast(page_res->char_count) <= - quality_outline_pc) && - (stats_.doc_char_quality / static_cast(page_res->char_count) >= - quality_char_pc); + ((page_res->rej_count / static_cast(page_res->char_count)) <= + quality_rej_pc) && + (stats_.doc_blob_quality / static_cast(page_res->char_count) >= + quality_blob_pc) && + (stats_.doc_outline_errs / static_cast(page_res->char_count) <= + quality_outline_pc) && + (stats_.doc_char_quality / static_cast(page_res->char_count) >= + quality_char_pc); // ****************** Pass 6 ******************* // Do whole document or whole block rejection pass @@ -696,16 +690,17 @@ void Tesseract::blamer_pass(PAGE_RES* page_res) { if (!wordrec_run_blamer) return; PAGE_RES_IT page_res_it(page_res); for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); + page_res_it.forward()) { + WERD_RES* word = page_res_it.word(); BlamerBundle::LastChanceBlame(wordrec_debug_blamer, word); page_res->blame_reasons[word->blamer_bundle->incorrect_result_reason()]++; } tprintf("Blame reasons:\n"); for (int bl = 0; bl < IRR_NUM_REASONS; ++bl) { - tprintf("%s %d\n", BlamerBundle::IncorrectReasonName( - static_cast(bl)), - page_res->blame_reasons[bl]); + tprintf("%s %d\n", + BlamerBundle::IncorrectReasonName( + static_cast(bl)), + page_res->blame_reasons[bl]); } if (page_res->misadaption_log.length() > 0) { tprintf("Misadaption log:\n"); @@ -719,9 +714,9 @@ void Tesseract::blamer_pass(PAGE_RES* page_res) { void Tesseract::script_pos_pass(PAGE_RES* page_res) { PAGE_RES_IT page_res_it(page_res); for (page_res_it.restart_page(); page_res_it.word() != nullptr; - page_res_it.forward()) { + page_res_it.forward()) { WERD_RES* word = page_res_it.word(); - if (word->word->flag(W_REP_CHAR)) { + if (word->word->flag(W_REP_CHAR)) { page_res_it.forward(); continue; } @@ -730,7 +725,8 @@ void Tesseract::script_pos_pass(PAGE_RES* page_res) { if (word_x_height < word->best_choice->min_x_height() || word_x_height > word->best_choice->max_x_height()) { word_x_height = (word->best_choice->min_x_height() + - word->best_choice->max_x_height()) / 2.0f; + word->best_choice->max_x_height()) / + 2.0f; } // Test for small caps. Word capheight must be close to block xheight, // and word must contain no lower case letters, and at least one upper case. @@ -748,8 +744,7 @@ void Tesseract::script_pos_pass(PAGE_RES* page_res) { else if (word->uch_set->get_islower(word->best_choice->unichar_id(i))) ++num_lower; } - if (num_upper > 0 && num_lower == 0) - word->small_caps = true; + if (num_upper > 0 && num_lower == 0) word->small_caps = true; } word->SetScriptPositions(); } @@ -798,10 +793,8 @@ static void EvaluateWordSpan(const PointerVector& words, // All the new_words are consumed (moved to best_words or deleted.) // The return value is the number of new_words used minus the number of // best_words that remain in the output. -static int SelectBestWords(double rating_ratio, - double certainty_margin, - bool debug, - PointerVector* new_words, +static int SelectBestWords(double rating_ratio, double certainty_margin, + bool debug, PointerVector* new_words, PointerVector* best_words) { // Process the smallest groups of words that have an overlapping word // boundary at the end. @@ -845,11 +838,11 @@ static int SelectBestWords(double rating_ratio, EvaluateWordSpan(*new_words, start_n, end_n, &n_rating, &n_certainty, &n_bad, &n_valid_permuter); bool new_better = false; - if (!n_bad && (b_bad || (n_certainty > b_certainty && - n_rating < b_rating) || - (!b_valid_permuter && n_valid_permuter && - n_rating < b_rating * rating_ratio && - n_certainty > b_certainty - certainty_margin))) { + if (!n_bad && + (b_bad || (n_certainty > b_certainty && n_rating < b_rating) || + (!b_valid_permuter && n_valid_permuter && + n_rating < b_rating * rating_ratio && + n_certainty > b_certainty - certainty_margin))) { // New is better. for (int i = start_n; i < end_n; ++i) { out_words.push_back((*new_words)[i]); @@ -866,11 +859,12 @@ static int SelectBestWords(double rating_ratio, } } if (debug) { - tprintf("%d new words %s than %d old words: r: %g v %g c: %g v %g" - " valid dict: %d v %d\n", - end_n - start_n, new_better ? "better" : "worse", - end_b - start_b, n_rating, b_rating, - n_certainty, b_certainty, n_valid_permuter, b_valid_permuter); + tprintf( + "%d new words %s than %d old words: r: %g v %g c: %g v %g" + " valid dict: %d v %d\n", + end_n - start_n, new_better ? "better" : "worse", end_b - start_b, + n_rating, b_rating, n_certainty, b_certainty, n_valid_permuter, + b_valid_permuter); } // Move on to the next group. b = end_b; @@ -891,8 +885,8 @@ int Tesseract::RetryWithLanguage(const WordData& word_data, WERD_RES** in_word, PointerVector* best_words) { if (debug) { - tprintf("Trying word using lang %s, oem %d\n", - lang.string(), static_cast(tessedit_ocr_engine_mode)); + tprintf("Trying word using lang %s, oem %d\n", lang.string(), + static_cast(tessedit_ocr_engine_mode)); } // Run the recognizer on the word. PointerVector new_words; @@ -910,8 +904,8 @@ int Tesseract::RetryWithLanguage(const WordData& word_data, // Initial version is a bit of a hack based on better certainty and rating // or a dictionary vs non-dictionary word. return SelectBestWords(classify_max_rating_ratio, - classify_max_certainty_margin, - debug, &new_words, best_words); + classify_max_certainty_margin, debug, &new_words, + best_words); } // Helper returns true if all the words are acceptable. @@ -961,7 +955,8 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT* pr_it, outlines[i] = nullptr; } } - real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, nullptr); + real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, + nullptr); AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, &target_blobs); int non_overlapped = 0; @@ -1103,8 +1098,8 @@ void Tesseract::AssignDiacriticsToNewBlobs( (*target_blobs)[j] = right_blob; } } - } else if (SelectGoodDiacriticOutlines(pass, noise_cert_punc, pr_it, nullptr, - outlines, num_blob_outlines, + } else if (SelectGoodDiacriticOutlines(pass, noise_cert_punc, pr_it, + nullptr, outlines, num_blob_outlines, &blob_wanted)) { if (debug_noise_removal) tprintf("Fitted between blobs\n"); for (int j = 0; j < blob_wanted.size(); ++j) { @@ -1294,22 +1289,23 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it, clock_t start_t = clock(); const bool debug = classify_debug_level > 0 || multilang_debug_level > 0; if (debug) { - tprintf("%s word with lang %s at:", - word->done ? "Already done" : "Processing", - most_recently_used_->lang.string()); + tprintf( + "%s word with lang %s at:", word->done ? "Already done" : "Processing", + most_recently_used_->lang.string()); word->word->bounding_box().print(); } if (word->done) { // If done on pass1, leave it as-is. - if (!word->tess_failed) - most_recently_used_ = word->tesseract; + if (!word->tess_failed) most_recently_used_ = word->tesseract; return; } int sub = sub_langs_.size(); if (most_recently_used_ != this) { // Get the index of the most_recently_used_. - for (sub = 0; sub < sub_langs_.size() && - most_recently_used_ != sub_langs_[sub]; ++sub) {} + for (sub = 0; + sub < sub_langs_.size() && most_recently_used_ != sub_langs_[sub]; + ++sub) { + } } most_recently_used_->RetryWithLanguage( *word_data, recognizer, debug, &word_data->lang_words[sub], &best_words); @@ -1350,7 +1346,7 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it, if (tessedit_timing_debug) { tprintf("%s (ocr took %.2f sec)\n", word->best_choice->unichar_string().string(), - static_cast(ocr_t-start_t)/CLOCKS_PER_SEC); + static_cast(ocr_t - start_t) / CLOCKS_PER_SEC); } } @@ -1366,14 +1362,14 @@ void Tesseract::classify_word_pass1(const WordData& word_data, ROW* row = word_data.row; BLOCK* block = word_data.block; prev_word_best_choice_ = word_data.prev_word != nullptr - ? word_data.prev_word->word->best_choice : nullptr; + ? word_data.prev_word->word->best_choice + : nullptr; #ifndef ANDROID_BUILD if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY || tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) { if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { LSTMRecognizeWord(*block, row, *in_word, out_words); - if (!out_words->empty()) - return; // Successful lstm recognition. + if (!out_words->empty()) return; // Successful lstm recognition. } if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { // No fallback allowed, so use a fake. @@ -1381,11 +1377,10 @@ void Tesseract::classify_word_pass1(const WordData& word_data, return; } // Fall back to tesseract for failed words or odd words. - (*in_word)->SetupForRecognition(unicharset, this, BestPix(), - OEM_TESSERACT_ONLY, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, row, block); + (*in_word)->SetupForRecognition( + unicharset, this, BestPix(), OEM_TESSERACT_ONLY, nullptr, + classify_bln_numeric_mode, textord_use_cjk_fp_model, + poly_allow_detailed_fx, row, block); } #endif WERD_RES* word = *in_word; @@ -1417,12 +1412,10 @@ void Tesseract::ReportXhtFixResult(bool accept_new_word, float new_x_ht, word->best_choice->unichar_string().string(), word->best_choice->debug_string().string()); word->reject_map.print(debug_fp); - tprintf(" -> %s = %s ", - new_word->best_choice->unichar_string().string(), + tprintf(" -> %s = %s ", new_word->best_choice->unichar_string().string(), new_word->best_choice->debug_string().string()); new_word->reject_map.print(debug_fp); - tprintf(" %s->%s %s %s\n", - word->guessed_x_ht ? "GUESS" : "CERT", + tprintf(" %s->%s %s %s\n", word->guessed_x_ht ? "GUESS" : "CERT", new_word->guessed_x_ht ? "GUESS" : "CERT", new_x_ht > 0.1 ? "STILL DOUBT" : "OK", accept_new_word ? "ACCEPTED" : ""); @@ -1432,10 +1425,9 @@ void Tesseract::ReportXhtFixResult(bool accept_new_word, float new_x_ht, // unicharset. // Returns true if the word was changed. // See the comment in fixxht.cpp for a description of the overall process. -bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) { +bool Tesseract::TrainedXheightFix(WERD_RES* word, BLOCK* block, ROW* row) { int original_misfits = CountMisfitTops(word); - if (original_misfits == 0) - return false; + if (original_misfits == 0) return false; float baseline_shift = 0.0f; float new_x_ht = ComputeCompatibleXheight(word, &baseline_shift); if (baseline_shift != 0.0f) { @@ -1451,14 +1443,14 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) { if (new_x_ht >= kMinRefitXHeightFraction * word->x_height) { // No test of return value here, as we are definitely making a change // to the word by shifting the baseline. - TestNewNormalization(original_misfits, baseline_shift, new_x_ht, - word, block, row); + TestNewNormalization(original_misfits, baseline_shift, new_x_ht, word, + block, row); } } return true; } else if (new_x_ht >= kMinRefitXHeightFraction * word->x_height) { - return TestNewNormalization(original_misfits, 0.0f, new_x_ht, - word, block, row); + return TestNewNormalization(original_misfits, 0.0f, new_x_ht, word, block, + row); } else { return false; } @@ -1466,9 +1458,9 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) { // Runs recognition with the test baseline shift and x-height and returns true // if there was an improvement in recognition result. -bool Tesseract::TestNewNormalization(int original_misfits, - float baseline_shift, float new_x_ht, - WERD_RES *word, BLOCK* block, ROW *row) { +bool Tesseract::TestNewNormalization(int original_misfits, float baseline_shift, + float new_x_ht, WERD_RES* word, + BLOCK* block, ROW* row) { bool accept_new_x_ht = false; WERD_RES new_x_ht_word(word->word); if (word->blamer_bundle != nullptr) { @@ -1479,27 +1471,26 @@ bool Tesseract::TestNewNormalization(int original_misfits, new_x_ht_word.baseline_shift = baseline_shift; new_x_ht_word.caps_height = 0.0; new_x_ht_word.SetupForRecognition( - unicharset, this, BestPix(), tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, textord_use_cjk_fp_model, + unicharset, this, BestPix(), tessedit_ocr_engine_mode, nullptr, + classify_bln_numeric_mode, textord_use_cjk_fp_model, poly_allow_detailed_fx, row, block); match_word_pass_n(2, &new_x_ht_word, row, block); if (!new_x_ht_word.tess_failed) { int new_misfits = CountMisfitTops(&new_x_ht_word); if (debug_x_ht_level >= 1) { tprintf("Old misfits=%d with x-height %f, new=%d with x-height %f\n", - original_misfits, word->x_height, - new_misfits, new_x_ht); + original_misfits, word->x_height, new_misfits, new_x_ht); tprintf("Old rating= %f, certainty=%f, new=%f, %f\n", word->best_choice->rating(), word->best_choice->certainty(), new_x_ht_word.best_choice->rating(), new_x_ht_word.best_choice->certainty()); } // The misfits must improve and either the rating or certainty. - accept_new_x_ht = new_misfits < original_misfits && - (new_x_ht_word.best_choice->certainty() > - word->best_choice->certainty() || - new_x_ht_word.best_choice->rating() < - word->best_choice->rating()); + accept_new_x_ht = + new_misfits < original_misfits && + (new_x_ht_word.best_choice->certainty() > + word->best_choice->certainty() || + new_x_ht_word.best_choice->rating() < word->best_choice->rating()); if (debug_x_ht_level >= 1) { ReportXhtFixResult(accept_new_x_ht, new_x_ht, word, &new_x_ht_word); } @@ -1528,14 +1519,14 @@ void Tesseract::classify_word_pass2(const WordData& word_data, BLOCK* block = word_data.block; WERD_RES* word = *in_word; prev_word_best_choice_ = word_data.prev_word != nullptr - ? word_data.prev_word->word->best_choice : nullptr; + ? word_data.prev_word->word->best_choice + : nullptr; set_global_subloc_code(SUBLOC_NORM); check_debug_pt(word, 30); if (!word->done) { word->caps_height = 0.0; - if (word->x_height == 0.0f) - word->x_height = row->x_height(); + if (word->x_height == 0.0f) word->x_height = row->x_height(); match_word_pass_n(2, word, row, block); check_debug_pt(word, 40); } @@ -1553,13 +1544,12 @@ void Tesseract::classify_word_pass2(const WordData& word_data, } #ifndef GRAPHICS_DISABLED if (tessedit_display_outwords) { - if (fx_win == nullptr) - create_fx_win(); + if (fx_win == nullptr) create_fx_win(); clear_fx_win(); word->rebuild_word->plot(fx_win); TBOX wbox = word->rebuild_word->bounding_box(); - fx_win->ZoomToRectangle(wbox.left(), wbox.top(), - wbox.right(), wbox.bottom()); + fx_win->ZoomToRectangle(wbox.left(), wbox.top(), wbox.right(), + wbox.bottom()); ScrollView::Update(); } #endif @@ -1567,31 +1557,28 @@ void Tesseract::classify_word_pass2(const WordData& word_data, check_debug_pt(word, 50); } - /** * match_word_pass2 * * Baseline normalize the word and pass it to Tess. */ -void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, - ROW *row, BLOCK* block) { +void Tesseract::match_word_pass_n(int pass_n, WERD_RES* word, ROW* row, + BLOCK* block) { if (word->tess_failed) return; tess_segment_pass_n(pass_n, word); if (!word->tess_failed) { - if (!word->word->flag (W_REP_CHAR)) { - word->fix_quotes(); - if (tessedit_fix_hyphens) - word->fix_hyphens(); + if (!word->word->flag(W_REP_CHAR)) { + word->fix_quotes(); + if (tessedit_fix_hyphens) word->fix_hyphens(); /* Don't trust fix_quotes! - though I think I've fixed the bug */ if (word->best_choice->length() != word->box_word->length()) { - tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;" - " #Blobs=%d\n", - word->best_choice->debug_string().string(), - word->best_choice->length(), - word->box_word->length()); - + tprintf( + "POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;" + " #Blobs=%d\n", + word->best_choice->debug_string().string(), + word->best_choice->length(), word->box_word->length()); } word->tess_accepted = tess_acceptable_word(word); @@ -1611,8 +1598,8 @@ static BLOB_CHOICE* FindBestMatchingChoice(UNICHAR_ID char_id, // Find the corresponding best BLOB_CHOICE from any position in the word_res. BLOB_CHOICE* best_choice = nullptr; for (int i = 0; i < word_res->best_choice->length(); ++i) { - BLOB_CHOICE* choice = FindMatchingChoice(char_id, - word_res->GetBlobChoices(i)); + BLOB_CHOICE* choice = + FindMatchingChoice(char_id, word_res->GetBlobChoices(i)); if (choice != nullptr) { if (best_choice == nullptr || choice->rating() < best_choice->rating()) best_choice = choice; @@ -1650,8 +1637,8 @@ static void CorrectRepcharChoices(BLOB_CHOICE* blob_choice, * where some of the characters disagree with the majority. */ void Tesseract::fix_rep_char(PAGE_RES_IT* page_res_it) { - WERD_RES *word_res = page_res_it->word(); - const WERD_CHOICE &word = *(word_res->best_choice); + WERD_RES* word_res = page_res_it->word(); + const WERD_CHOICE& word = *(word_res->best_choice); // Find the frequency of each unique character in the word. SortHelper rep_ch(word.length()); @@ -1660,7 +1647,7 @@ void Tesseract::fix_rep_char(PAGE_RES_IT* page_res_it) { } // Find the most frequent result. - UNICHAR_ID maxch_id = INVALID_UNICHAR_ID; // most common char + UNICHAR_ID maxch_id = INVALID_UNICHAR_ID; // most common char int max_count = rep_ch.MaxCount(&maxch_id); // Find the best exemplar of a classifier result for maxch_id. BLOB_CHOICE* best_choice = FindBestMatchingChoice(maxch_id, word_res); @@ -1688,8 +1675,9 @@ void Tesseract::fix_rep_char(PAGE_RES_IT* page_res_it) { word_res->reject_map.initialise(word.length()); } -ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string( - const UNICHARSET& char_set, const char *s, const char *lengths) { +ACCEPTABLE_WERD_TYPE +Tesseract::acceptable_word_string(const UNICHARSET& char_set, const char* s, + const char* lengths) { int i = 0; int offset = 0; int leading_punct_count; @@ -1697,8 +1685,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string( int hyphen_pos = -1; ACCEPTABLE_WERD_TYPE word_type = AC_UNACCEPTABLE; - if (strlen (lengths) > 20) - return word_type; + if (strlen(lengths) > 20) return word_type; /* Single Leading punctuation char*/ @@ -1732,13 +1719,12 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string( char_set.get_islower(s + offset, lengths[i])) { offset += lengths[i++]; } - if (i < hyphen_pos + 3) - goto not_a_word; + if (i < hyphen_pos + 3) goto not_a_word; } } else { /* Allow "'s" in NON hyphenated lower case words */ - if (lengths[i] == 1 && (s[offset] == '\'') && - lengths[i + 1] == 1 && (s[offset + lengths[i]] == 's')) { + if (lengths[i] == 1 && (s[offset] == '\'') && lengths[i + 1] == 1 && + (s[offset + lengths[i]] == 's')) { offset += lengths[i++]; offset += lengths[i++]; } @@ -1755,13 +1741,12 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string( offset += lengths[i++]; if (lengths[i] == 1 && s[offset] != '\0' && i > 0 && s[offset - lengths[i - 1]] != s[offset] && - STRING(chs_trailing_punct2).contains (s[offset])) + STRING(chs_trailing_punct2).contains(s[offset])) offset += lengths[i++]; - if (s[offset] != '\0') - word_type = AC_UNACCEPTABLE; + if (s[offset] != '\0') word_type = AC_UNACCEPTABLE; - not_a_word: +not_a_word: if (word_type == AC_UNACCEPTABLE) { /* Look for abbreviation string */ @@ -1775,8 +1760,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string( offset += lengths[i++]; offset += lengths[i++]; } - } - else if (s[0] != '\0' && char_set.get_islower(s, lengths[0])) { + } else if (s[0] != '\0' && char_set.get_islower(s, lengths[0])) { word_type = AC_LC_ABBREV; while (s[offset] != '\0' && char_set.get_islower(s + offset, lengths[i]) && @@ -1785,8 +1769,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string( offset += lengths[i++]; } } - if (s[offset] != '\0') - word_type = AC_UNACCEPTABLE; + if (s[offset] != '\0') word_type = AC_UNACCEPTABLE; } return word_type; @@ -1796,59 +1779,57 @@ bool Tesseract::check_debug_pt(WERD_RES* word, int location) { bool show_map_detail = false; int16_t i; - if (!test_pt) - return false; + if (!test_pt) return false; - tessedit_rejection_debug.set_value (FALSE); + tessedit_rejection_debug.set_value(FALSE); debug_x_ht_level.set_value(0); - if (word->word->bounding_box().contains(FCOORD (test_pt_x, test_pt_y))) { - if (location < 0) - return true; // For breakpoint use + if (word->word->bounding_box().contains(FCOORD(test_pt_x, test_pt_y))) { + if (location < 0) return true; // For breakpoint use tessedit_rejection_debug.set_value(TRUE); debug_x_ht_level.set_value(2); - tprintf ("\n\nTESTWD::"); + tprintf("\n\nTESTWD::"); switch (location) { case 0: - tprintf ("classify_word_pass1 start\n"); + tprintf("classify_word_pass1 start\n"); word->word->print(); break; case 10: - tprintf ("make_reject_map: initial map"); + tprintf("make_reject_map: initial map"); break; case 20: - tprintf ("make_reject_map: after NN"); + tprintf("make_reject_map: after NN"); break; case 30: - tprintf ("classify_word_pass2 - START"); + tprintf("classify_word_pass2 - START"); break; case 40: - tprintf ("classify_word_pass2 - Pre Xht"); + tprintf("classify_word_pass2 - Pre Xht"); break; case 50: - tprintf ("classify_word_pass2 - END"); + tprintf("classify_word_pass2 - END"); show_map_detail = true; break; case 60: - tprintf ("fixspace"); + tprintf("fixspace"); break; case 70: - tprintf ("MM pass START"); + tprintf("MM pass START"); break; case 80: - tprintf ("MM pass END"); + tprintf("MM pass END"); break; case 90: - tprintf ("After Poor quality rejection"); + tprintf("After Poor quality rejection"); break; case 100: - tprintf ("unrej_good_quality_words - START"); + tprintf("unrej_good_quality_words - START"); break; case 110: - tprintf ("unrej_good_quality_words - END"); + tprintf("unrej_good_quality_words - END"); break; case 120: - tprintf ("Write results pass"); + tprintf("Write results pass"); show_map_detail = true; break; } @@ -1866,8 +1847,8 @@ bool Tesseract::check_debug_pt(WERD_RES* word, int location) { } else { tprintf("null best choice\n"); } - tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); - tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); + tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); + tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); return true; } else { return false; @@ -1884,17 +1865,16 @@ static void find_modal_font( // good chars in word int16_t* font_out, // output font int8_t* font_count // output count ) { - int16_t font; //font index - int32_t count; //pile count + int16_t font; // font index + int32_t count; // pile count - if (fonts->get_total () > 0) { - font = (int16_t) fonts->mode (); + if (fonts->get_total() > 0) { + font = (int16_t)fonts->mode(); *font_out = font; - count = fonts->pile_count (font); + count = fonts->pile_count(font); *font_count = count < INT8_MAX ? count : INT8_MAX; - fonts->add (font, -*font_count); - } - else { + fonts->add(font, -*font_count); + } else { *font_out = -1; *font_count = 0; } @@ -1905,7 +1885,7 @@ static void find_modal_font( // good chars in word * * Get the fonts for the word. */ -void Tesseract::set_word_fonts(WERD_RES *word) { +void Tesseract::set_word_fonts(WERD_RES* word) { // Don't try to set the word fonts for an lstm word, as the configs // will be meaningless. if (word->chopped_word == nullptr) return; @@ -1939,8 +1919,8 @@ void Tesseract::set_word_fonts(WERD_RES *word) { int16_t font_id1 = -1, font_id2 = -1; for (int f = 0; f < fontinfo_size; ++f) { if (tessedit_debug_fonts && font_total_score[f] > 0) { - tprintf("Font %s, total score = %d\n", - fontinfo_table_.get(f).name, font_total_score[f]); + tprintf("Font %s, total score = %d\n", fontinfo_table_.get(f).name, + font_total_score[f]); } if (font_total_score[f] > score1) { score2 = score1; @@ -1962,13 +1942,12 @@ void Tesseract::set_word_fonts(WERD_RES *word) { const FontInfo fi = fontinfo_table_.get(font_id1); if (tessedit_debug_fonts) { if (word->fontinfo_id2_count > 0) { - tprintf("Word modal font=%s, score=%d, 2nd choice %s/%d\n", - fi.name, word->fontinfo_id_count, - fontinfo_table_.get(font_id2).name, + tprintf("Word modal font=%s, score=%d, 2nd choice %s/%d\n", fi.name, + word->fontinfo_id_count, fontinfo_table_.get(font_id2).name, word->fontinfo_id2_count); } else { - tprintf("Word modal font=%s, score=%d. No 2nd choice\n", - fi.name, word->fontinfo_id_count); + tprintf("Word modal font=%s, score=%d. No 2nd choice\n", fi.name, + word->fontinfo_id_count); } } word->italic = (fi.is_italic() ? 1 : -1) * word->fontinfo_id_count; @@ -1976,7 +1955,6 @@ void Tesseract::set_word_fonts(WERD_RES *word) { } } - /** * font_recognition_pass * @@ -1985,8 +1963,8 @@ void Tesseract::set_word_fonts(WERD_RES *word) { void Tesseract::font_recognition_pass(PAGE_RES* page_res) { PAGE_RES_IT page_res_it(page_res); - WERD_RES *word; // current word - STATS doc_fonts(0, font_table_size_); // font counters + WERD_RES* word; // current word + STATS doc_fonts(0, font_table_size_); // font counters // Gather font id statistics. for (page_res_it.restart_page(); page_res_it.word() != nullptr; @@ -1999,11 +1977,10 @@ void Tesseract::font_recognition_pass(PAGE_RES* page_res) { doc_fonts.add(word->fontinfo2->universal_id, word->fontinfo_id2_count); } } - int16_t doc_font; // modal font - int8_t doc_font_count; // modal font + int16_t doc_font; // modal font + int8_t doc_font_count; // modal font find_modal_font(&doc_fonts, &doc_font, &doc_font_count); - if (doc_font_count == 0) - return; + if (doc_font_count == 0) return; // Get the modal font pointer. const FontInfo* modal_font = nullptr; for (page_res_it.restart_page(); page_res_it.word() != nullptr; @@ -2013,7 +1990,8 @@ void Tesseract::font_recognition_pass(PAGE_RES* page_res) { modal_font = word->fontinfo; break; } - if (word->fontinfo2 != nullptr && word->fontinfo2->universal_id == doc_font) { + if (word->fontinfo2 != nullptr && + word->fontinfo2->universal_id == doc_font) { modal_font = word->fontinfo2; break; } @@ -2040,12 +2018,11 @@ void Tesseract::font_recognition_pass(PAGE_RES* page_res) { // If a word has multiple alternates check if the best choice is in the // dictionary. If not, replace it with an alternate that exists in the // dictionary. -void Tesseract::dictionary_correction_pass(PAGE_RES *page_res) { +void Tesseract::dictionary_correction_pass(PAGE_RES* page_res) { PAGE_RES_IT word_it(page_res); for (WERD_RES* word = word_it.word(); word != nullptr; word = word_it.forward()) { - if (word->best_choices.singleton()) - continue; // There are no alternates. + if (word->best_choices.singleton()) continue; // There are no alternates. const WERD_CHOICE* best = word->best_choice; if (word->tesseract->getDict().valid_word(*best) != 0) diff --git a/src/ccmain/control.h b/src/ccmain/control.h index 2f76a47cea..02346ab0ab 100644 --- a/src/ccmain/control.h +++ b/src/ccmain/control.h @@ -22,23 +22,22 @@ * Module-independent matcher controller. */ -#ifndef CONTROL_H -#define CONTROL_H +#ifndef CONTROL_H +#define CONTROL_H -#include "params.h" -#include "ocrblock.h" -#include "ratngs.h" -#include "statistc.h" -#include "pageres.h" +#include "ocrblock.h" +#include "pageres.h" +#include "params.h" +#include "ratngs.h" +#include "statistc.h" -enum ACCEPTABLE_WERD_TYPE -{ - AC_UNACCEPTABLE, ///< Unacceptable word - AC_LOWER_CASE, ///< ALL lower case - AC_UPPER_CASE, ///< ALL upper case - AC_INITIAL_CAP, ///< ALL but initial lc - AC_LC_ABBREV, ///< a.b.c. - AC_UC_ABBREV ///< A.B.C. +enum ACCEPTABLE_WERD_TYPE { + AC_UNACCEPTABLE, ///< Unacceptable word + AC_LOWER_CASE, ///< ALL lower case + AC_UPPER_CASE, ///< ALL upper case + AC_INITIAL_CAP, ///< ALL but initial lc + AC_LC_ABBREV, ///< a.b.c. + AC_UC_ABBREV ///< A.B.C. }; #endif diff --git a/src/ccmain/docqual.cpp b/src/ccmain/docqual.cpp index 8c0aedbe77..6bd59a1dc4 100644 --- a/src/ccmain/docqual.cpp +++ b/src/ccmain/docqual.cpp @@ -17,28 +17,25 @@ * **********************************************************************/ -#include -#include "docqual.h" -#include "reject.h" -#include "tesscallback.h" -#include "tessvars.h" -#include "globals.h" -#include "tesseractclass.h" +#include "docqual.h" +#include +#include "globals.h" +#include "reject.h" +#include "tesscallback.h" +#include "tesseractclass.h" +#include "tessvars.h" -namespace tesseract{ +namespace tesseract { // A little class to provide the callbacks as we have no pre-bound args. struct DocQualCallbacks { explicit DocQualCallbacks(WERD_RES* word0) - : word(word0), match_count(0), accepted_match_count(0) {} + : word(word0), match_count(0), accepted_match_count(0) {} - void CountMatchingBlobs(int index) { - ++match_count; - } + void CountMatchingBlobs(int index) { ++match_count; } void CountAcceptedBlobs(int index) { - if (word->reject_map[index].accepted()) - ++accepted_match_count; + if (word->reject_map[index].accepted()) ++accepted_match_count; ++match_count; } @@ -58,9 +55,9 @@ struct DocQualCallbacks { * ASSUME blobs in both initial word and box_word are in ascending order of * left hand blob edge. *************************************************************************/ -int16_t Tesseract::word_blob_quality(WERD_RES *word, ROW *row) { - if (word->bln_boxes == nullptr || - word->rebuild_word == nullptr || word->rebuild_word->blobs.empty()) +int16_t Tesseract::word_blob_quality(WERD_RES* word, ROW* row) { + if (word->bln_boxes == nullptr || word->rebuild_word == nullptr || + word->rebuild_word->blobs.empty()) return 0; DocQualCallbacks cb(word); @@ -70,7 +67,7 @@ int16_t Tesseract::word_blob_quality(WERD_RES *word, ROW *row) { return cb.match_count; } -int16_t Tesseract::word_outline_errs(WERD_RES *word) { +int16_t Tesseract::word_outline_errs(WERD_RES* word) { int16_t i = 0; int16_t err_count = 0; @@ -90,10 +87,9 @@ int16_t Tesseract::word_outline_errs(WERD_RES *word) { * Combination of blob quality and outline quality - how many good chars are * there? - I.e chars which pass the blob AND outline tests. *************************************************************************/ -void Tesseract::word_char_quality(WERD_RES *word, - ROW *row, - int16_t *match_count, - int16_t *accepted_match_count) { +void Tesseract::word_char_quality(WERD_RES* word, ROW* row, + int16_t* match_count, + int16_t* accepted_match_count) { if (word->bln_boxes == nullptr || word->rebuild_word == nullptr || word->rebuild_word->blobs.empty()) { *match_count = 0; @@ -113,9 +109,9 @@ void Tesseract::word_char_quality(WERD_RES *word, * unrej_good_chs() * Unreject POTENTIAL rejects if the blob passes the blob and outline checks *************************************************************************/ -void Tesseract::unrej_good_chs(WERD_RES *word, ROW *row) { - if (word->bln_boxes == nullptr || - word->rebuild_word == nullptr || word->rebuild_word->blobs.empty()) +void Tesseract::unrej_good_chs(WERD_RES* word, ROW* row) { + if (word->bln_boxes == nullptr || word->rebuild_word == nullptr || + word->rebuild_word->blobs.empty()) return; DocQualCallbacks cb(word); @@ -127,16 +123,16 @@ void Tesseract::unrej_good_chs(WERD_RES *word, ROW *row) { int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) { int expected_outline_count; - if (STRING (outlines_odd).contains (c)) + if (STRING(outlines_odd).contains(c)) return 0; // Don't use this char - else if (STRING (outlines_2).contains (c)) + else if (STRING(outlines_2).contains(c)) expected_outline_count = 2; else expected_outline_count = 1; - return abs (outline_count - expected_outline_count); + return abs(outline_count - expected_outline_count); } -void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, +void Tesseract::quality_based_rejection(PAGE_RES_IT& page_res_it, bool good_quality_doc) { if ((tessedit_good_quality_unrej && good_quality_doc)) unrej_good_quality_words(page_res_it); @@ -158,71 +154,67 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, * - CAN'T do it in a single pass without a bit of fiddling * - keep it simple but inefficient *************************************************************************/ -void Tesseract::unrej_good_quality_words( //unreject potential - PAGE_RES_IT &page_res_it) { - WERD_RES *word; - ROW_RES *current_row; - BLOCK_RES *current_block; +void Tesseract::unrej_good_quality_words( // unreject potential + PAGE_RES_IT& page_res_it) { + WERD_RES* word; + ROW_RES* current_row; + BLOCK_RES* current_block; int i; - page_res_it.restart_page (); - while (page_res_it.word () != nullptr) { - check_debug_pt (page_res_it.word (), 100); + page_res_it.restart_page(); + while (page_res_it.word() != nullptr) { + check_debug_pt(page_res_it.word(), 100); if (bland_unrej) { - word = page_res_it.word (); - for (i = 0; i < word->reject_map.length (); i++) { - if (word->reject_map[i].accept_if_good_quality ()) - word->reject_map[i].setrej_quality_accept (); + word = page_res_it.word(); + for (i = 0; i < word->reject_map.length(); i++) { + if (word->reject_map[i].accept_if_good_quality()) + word->reject_map[i].setrej_quality_accept(); } - page_res_it.forward (); - } - else if ((page_res_it.row ()->char_count > 0) && - ((page_res_it.row ()->rej_count / - (float) page_res_it.row ()->char_count) <= - quality_rowrej_pc)) { - word = page_res_it.word (); + page_res_it.forward(); + } else if ((page_res_it.row()->char_count > 0) && + ((page_res_it.row()->rej_count / + (float)page_res_it.row()->char_count) <= quality_rowrej_pc)) { + word = page_res_it.word(); if (word->reject_map.quality_recoverable_rejects() && (tessedit_unrej_any_wd || - acceptable_word_string(*word->uch_set, - word->best_choice->unichar_string().string(), - word->best_choice->unichar_lengths().string()) - != AC_UNACCEPTABLE)) { - unrej_good_chs(word, page_res_it.row ()->row); + acceptable_word_string( + *word->uch_set, word->best_choice->unichar_string().string(), + word->best_choice->unichar_lengths().string()) != + AC_UNACCEPTABLE)) { + unrej_good_chs(word, page_res_it.row()->row); } - page_res_it.forward (); - } - else { + page_res_it.forward(); + } else { /* Skip to end of dodgy row */ - current_row = page_res_it.row (); - while ((page_res_it.word () != nullptr) && - (page_res_it.row () == current_row)) - page_res_it.forward (); + current_row = page_res_it.row(); + while ((page_res_it.word() != nullptr) && + (page_res_it.row() == current_row)) + page_res_it.forward(); } - check_debug_pt (page_res_it.word (), 110); + check_debug_pt(page_res_it.word(), 110); } - page_res_it.restart_page (); + page_res_it.restart_page(); page_res_it.page_res->char_count = 0; page_res_it.page_res->rej_count = 0; current_block = nullptr; current_row = nullptr; - while (page_res_it.word () != nullptr) { - if (current_block != page_res_it.block ()) { - current_block = page_res_it.block (); + while (page_res_it.word() != nullptr) { + if (current_block != page_res_it.block()) { + current_block = page_res_it.block(); current_block->char_count = 0; current_block->rej_count = 0; } - if (current_row != page_res_it.row ()) { - current_row = page_res_it.row (); + if (current_row != page_res_it.row()) { + current_row = page_res_it.row(); current_row->char_count = 0; current_row->rej_count = 0; current_row->whole_word_rej_count = 0; } - page_res_it.rej_stat_word (); - page_res_it.forward (); + page_res_it.rej_stat_word(); + page_res_it.forward(); } } - /************************************************************************* * doc_and_block_rejection() * @@ -230,13 +222,12 @@ void Tesseract::unrej_good_quality_words( //unreject potential * If any block has too many rejects - reject all words in the block *************************************************************************/ -void Tesseract::doc_and_block_rejection( //reject big chunks - PAGE_RES_IT &page_res_it, - bool good_quality_doc) { +void Tesseract::doc_and_block_rejection( // reject big chunks + PAGE_RES_IT& page_res_it, bool good_quality_doc) { int16_t block_no = 0; int16_t row_no = 0; - BLOCK_RES *current_block; - ROW_RES *current_row; + BLOCK_RES* current_block; + ROW_RES* current_row; bool rej_word; bool prev_word_rejected; @@ -244,7 +235,8 @@ void Tesseract::doc_and_block_rejection( //reject big chunks int16_t accepted_char_quality; if (page_res_it.page_res->rej_count * 100.0 / - page_res_it.page_res->char_count > tessedit_reject_doc_percent) { + page_res_it.page_res->char_count > + tessedit_reject_doc_percent) { reject_whole_page(page_res_it); if (tessedit_debug_doc_rejection) { tprintf("REJECT ALL #chars: %d #Rejects: %d; \n", @@ -267,29 +259,27 @@ void Tesseract::doc_and_block_rejection( //reject big chunks block_no = current_block->block->pdblk.index(); if (current_block->char_count > 0 && (current_block->rej_count * 100.0 / current_block->char_count) > - tessedit_reject_block_percent) { + tessedit_reject_block_percent) { if (tessedit_debug_block_rejection) { - tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n", - block_no, current_block->char_count, - current_block->rej_count); + tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n", block_no, + current_block->char_count, current_block->rej_count); } prev_word_rejected = false; while ((word = page_res_it.word()) != nullptr && (page_res_it.block() == current_block)) { if (tessedit_preserve_blk_rej_perfect_wds) { rej_word = word->reject_map.reject_count() > 0 || - word->reject_map.length () < tessedit_preserve_min_wd_len; + word->reject_map.length() < tessedit_preserve_min_wd_len; if (rej_word && tessedit_dont_blkrej_good_wds && word->reject_map.length() >= tessedit_preserve_min_wd_len && acceptable_word_string( *word->uch_set, word->best_choice->unichar_string().string(), word->best_choice->unichar_lengths().string()) != - AC_UNACCEPTABLE) { - word_char_quality(word, page_res_it.row()->row, - &char_quality, + AC_UNACCEPTABLE) { + word_char_quality(word, page_res_it.row()->row, &char_quality, &accepted_char_quality); - rej_word = char_quality != word->reject_map.length(); + rej_word = char_quality != word->reject_map.length(); } } else { rej_word = true; @@ -300,8 +290,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks NOTE - this is NOT restricted to FUZZY spaces. - When tried this generated more space errors. */ - if (tessedit_use_reject_spaces && - prev_word_rejected && + if (tessedit_use_reject_spaces && prev_word_rejected && page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1) word->reject_spaces = true; @@ -330,35 +319,34 @@ void Tesseract::doc_and_block_rejection( //reject big chunks */ if (current_row->char_count > 0 && (current_row->rej_count * 100.0 / current_row->char_count) > - tessedit_reject_row_percent && + tessedit_reject_row_percent && (current_row->whole_word_rej_count * 100.0 / - current_row->rej_count) < - tessedit_whole_wd_rej_row_percent) { + current_row->rej_count) < tessedit_whole_wd_rej_row_percent) { if (tessedit_debug_block_rejection) { - tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n", - row_no, current_row->char_count, - current_row->rej_count); + tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n", row_no, + current_row->char_count, current_row->rej_count); } prev_word_rejected = false; while ((word = page_res_it.word()) != nullptr && - page_res_it.row () == current_row) { + page_res_it.row() == current_row) { /* Preserve words on good docs unless they are mostly rejected*/ if (!tessedit_row_rej_good_docs && good_quality_doc) { rej_word = word->reject_map.reject_count() / - static_cast(word->reject_map.length()) > - tessedit_good_doc_still_rowrej_wd; + static_cast(word->reject_map.length()) > + tessedit_good_doc_still_rowrej_wd; } else if (tessedit_preserve_row_rej_perfect_wds) { /* Preserve perfect words anyway */ - rej_word = word->reject_map.reject_count() > 0 || - word->reject_map.length () < tessedit_preserve_min_wd_len; + rej_word = + word->reject_map.reject_count() > 0 || + word->reject_map.length() < tessedit_preserve_min_wd_len; if (rej_word && tessedit_dont_rowrej_good_wds && word->reject_map.length() >= tessedit_preserve_min_wd_len && - acceptable_word_string(*word->uch_set, + acceptable_word_string( + *word->uch_set, word->best_choice->unichar_string().string(), word->best_choice->unichar_lengths().string()) != - AC_UNACCEPTABLE) { - word_char_quality(word, page_res_it.row()->row, - &char_quality, + AC_UNACCEPTABLE) { + word_char_quality(word, page_res_it.row()->row, &char_quality, &accepted_char_quality); rej_word = char_quality != word->reject_map.length(); } @@ -371,10 +359,9 @@ void Tesseract::doc_and_block_rejection( //reject big chunks NOTE - this is NOT restricted to FUZZY spaces. - When tried this generated more space errors. */ - if (tessedit_use_reject_spaces && - prev_word_rejected && + if (tessedit_use_reject_spaces && prev_word_rejected && page_res_it.prev_row() == page_res_it.row() && - word->word->space () == 1) + word->word->space() == 1) word->reject_spaces = true; word->reject_map.rej_word_row_rej(); } @@ -404,19 +391,19 @@ void Tesseract::doc_and_block_rejection( //reject big chunks * *************************************************************************/ -void reject_whole_page(PAGE_RES_IT &page_res_it) { - page_res_it.restart_page (); - while (page_res_it.word () != nullptr) { - page_res_it.word ()->reject_map.rej_word_doc_rej (); - page_res_it.forward (); +void reject_whole_page(PAGE_RES_IT& page_res_it) { + page_res_it.restart_page(); + while (page_res_it.word() != nullptr) { + page_res_it.word()->reject_map.rej_word_doc_rej(); + page_res_it.forward(); } - //whole page is rejected + // whole page is rejected page_res_it.page_res->rejected = true; } namespace tesseract { -void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) { - WERD_RES *word; +void Tesseract::tilde_crunch(PAGE_RES_IT& page_res_it) { + WERD_RES* word; GARBAGE_LEVEL garbage_level; PAGE_RES_IT copy_it; bool prev_potential_marked = false; @@ -432,75 +419,67 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) { } word = page_res_it.word(); - if (crunch_early_convert_bad_unlv_chs) - convert_bad_unlv_chs(word); + if (crunch_early_convert_bad_unlv_chs) convert_bad_unlv_chs(word); - if (crunch_early_merge_tess_fails) - word->merge_tess_fails(); + if (crunch_early_merge_tess_fails) word->merge_tess_fails(); - if (word->reject_map.accept_count () != 0) { + if (word->reject_map.accept_count() != 0) { found_terrible_word = false; - //Forget earlier potential crunches + // Forget earlier potential crunches prev_potential_marked = false; - } - else { + } else { ok_dict_word = safe_dict_word(word); garbage_level = garbage_word(word, ok_dict_word); if ((garbage_level != G_NEVER_CRUNCH) && - (terrible_word_crunch (word, garbage_level))) { + (terrible_word_crunch(word, garbage_level))) { if (crunch_debug > 0) { - tprintf ("T CRUNCHING: \"%s\"\n", - word->best_choice->unichar_string().string()); + tprintf("T CRUNCHING: \"%s\"\n", + word->best_choice->unichar_string().string()); } word->unlv_crunch_mode = CR_KEEP_SPACE; if (prev_potential_marked) { - while (copy_it.word () != word) { + while (copy_it.word() != word) { if (crunch_debug > 0) { - tprintf ("P1 CRUNCHING: \"%s\"\n", - copy_it.word()->best_choice->unichar_string().string()); + tprintf("P1 CRUNCHING: \"%s\"\n", + copy_it.word()->best_choice->unichar_string().string()); } - copy_it.word ()->unlv_crunch_mode = CR_KEEP_SPACE; - copy_it.forward (); + copy_it.word()->unlv_crunch_mode = CR_KEEP_SPACE; + copy_it.forward(); } prev_potential_marked = false; } found_terrible_word = true; - } - else if ((garbage_level != G_NEVER_CRUNCH) && - (potential_word_crunch (word, - garbage_level, ok_dict_word))) { + } else if ((garbage_level != G_NEVER_CRUNCH) && + (potential_word_crunch(word, garbage_level, ok_dict_word))) { if (found_terrible_word) { if (crunch_debug > 0) { - tprintf ("P2 CRUNCHING: \"%s\"\n", - word->best_choice->unichar_string().string()); + tprintf("P2 CRUNCHING: \"%s\"\n", + word->best_choice->unichar_string().string()); } word->unlv_crunch_mode = CR_KEEP_SPACE; - } - else if (!prev_potential_marked) { + } else if (!prev_potential_marked) { copy_it = page_res_it; prev_potential_marked = true; if (crunch_debug > 1) { - tprintf ("P3 CRUNCHING: \"%s\"\n", - word->best_choice->unichar_string().string()); + tprintf("P3 CRUNCHING: \"%s\"\n", + word->best_choice->unichar_string().string()); } } - } - else { + } else { found_terrible_word = false; - //Forget earlier potential crunches + // Forget earlier potential crunches prev_potential_marked = false; if (crunch_debug > 2) { - tprintf ("NO CRUNCH: \"%s\"\n", - word->best_choice->unichar_string().string()); + tprintf("NO CRUNCH: \"%s\"\n", + word->best_choice->unichar_string().string()); } } } - page_res_it.forward (); + page_res_it.forward(); } } - bool Tesseract::terrible_word_crunch(WERD_RES* word, GARBAGE_LEVEL garbage_level) { float rating_per_ch; @@ -512,30 +491,28 @@ bool Tesseract::terrible_word_crunch(WERD_RES* word, word->best_choice->unichar_string().unsigned_size())) crunch_mode = 1; else { - adjusted_len = word->reject_map.length (); - if (adjusted_len > crunch_rating_max) - adjusted_len = crunch_rating_max; - rating_per_ch = word->best_choice->rating () / adjusted_len; + adjusted_len = word->reject_map.length(); + if (adjusted_len > crunch_rating_max) adjusted_len = crunch_rating_max; + rating_per_ch = word->best_choice->rating() / adjusted_len; if (rating_per_ch > crunch_terrible_rating) crunch_mode = 2; else if (crunch_terrible_garbage && (garbage_level == G_TERRIBLE)) crunch_mode = 3; - else if ((word->best_choice->certainty () < crunch_poor_garbage_cert) && - (garbage_level != G_OK)) + else if ((word->best_choice->certainty() < crunch_poor_garbage_cert) && + (garbage_level != G_OK)) crunch_mode = 4; else if ((rating_per_ch > crunch_poor_garbage_rate) && - (garbage_level != G_OK)) + (garbage_level != G_OK)) crunch_mode = 5; } if (crunch_mode > 0) { if (crunch_debug > 2) { - tprintf ("Terrible_word_crunch (%d) on \"%s\"\n", - crunch_mode, word->best_choice->unichar_string().string()); + tprintf("Terrible_word_crunch (%d) on \"%s\"\n", crunch_mode, + word->best_choice->unichar_string().string()); } return true; - } - else + } else return false; } @@ -544,20 +521,19 @@ bool Tesseract::potential_word_crunch(WERD_RES* word, bool ok_dict_word) { float rating_per_ch; int adjusted_len; - const char *str = word->best_choice->unichar_string().string(); - const char *lengths = word->best_choice->unichar_lengths().string(); + const char* str = word->best_choice->unichar_string().string(); + const char* lengths = word->best_choice->unichar_lengths().string(); bool word_crunchable; int poor_indicator_count = 0; word_crunchable = !crunch_leave_accept_strings || word->reject_map.length() < 3 || - (acceptable_word_string(*word->uch_set, - str, lengths) == AC_UNACCEPTABLE && + (acceptable_word_string(*word->uch_set, str, lengths) == + AC_UNACCEPTABLE && !ok_dict_word); adjusted_len = word->reject_map.length(); - if (adjusted_len > 10) - adjusted_len = 10; + if (adjusted_len > 10) adjusted_len = 10; rating_per_ch = word->best_choice->rating() / adjusted_len; if (rating_per_ch > crunch_pot_poor_rate) { @@ -587,8 +563,8 @@ bool Tesseract::potential_word_crunch(WERD_RES* word, return poor_indicator_count >= crunch_pot_indicators; } -void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) { - WERD_RES *word; +void Tesseract::tilde_delete(PAGE_RES_IT& page_res_it) { + WERD_RES* word; PAGE_RES_IT copy_it; bool deleting_from_bol = false; bool marked_delete_point = false; @@ -601,63 +577,55 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) { while (page_res_it.word() != nullptr) { word = page_res_it.word(); - delete_mode = word_deletable (word, debug_delete_mode); + delete_mode = word_deletable(word, debug_delete_mode); if (delete_mode != CR_NONE) { - if (word->word->flag (W_BOL) || deleting_from_bol) { + if (word->word->flag(W_BOL) || deleting_from_bol) { if (crunch_debug > 0) { - tprintf ("BOL CRUNCH DELETING(%d): \"%s\"\n", - debug_delete_mode, - word->best_choice->unichar_string().string()); + tprintf("BOL CRUNCH DELETING(%d): \"%s\"\n", debug_delete_mode, + word->best_choice->unichar_string().string()); } word->unlv_crunch_mode = delete_mode; deleting_from_bol = true; } else if (word->word->flag(W_EOL)) { if (marked_delete_point) { while (copy_it.word() != word) { - x_delete_mode = word_deletable (copy_it.word (), - x_debug_delete_mode); + x_delete_mode = word_deletable(copy_it.word(), x_debug_delete_mode); if (crunch_debug > 0) { - tprintf ("EOL CRUNCH DELETING(%d): \"%s\"\n", - x_debug_delete_mode, - copy_it.word()->best_choice->unichar_string().string()); + tprintf("EOL CRUNCH DELETING(%d): \"%s\"\n", x_debug_delete_mode, + copy_it.word()->best_choice->unichar_string().string()); } - copy_it.word ()->unlv_crunch_mode = x_delete_mode; - copy_it.forward (); + copy_it.word()->unlv_crunch_mode = x_delete_mode; + copy_it.forward(); } } if (crunch_debug > 0) { - tprintf ("EOL CRUNCH DELETING(%d): \"%s\"\n", - debug_delete_mode, - word->best_choice->unichar_string().string()); + tprintf("EOL CRUNCH DELETING(%d): \"%s\"\n", debug_delete_mode, + word->best_choice->unichar_string().string()); } word->unlv_crunch_mode = delete_mode; deleting_from_bol = false; marked_delete_point = false; - } - else { + } else { if (!marked_delete_point) { copy_it = page_res_it; marked_delete_point = true; } } - } - else { + } else { deleting_from_bol = false; - //Forget earlier potential crunches + // Forget earlier potential crunches marked_delete_point = false; } /* The following step has been left till now as the tess fails are used to determine if the word is deletable. */ - if (!crunch_early_merge_tess_fails) - word->merge_tess_fails(); - page_res_it.forward (); + if (!crunch_early_merge_tess_fails) word->merge_tess_fails(); + page_res_it.forward(); } } - -void Tesseract::convert_bad_unlv_chs(WERD_RES *word_res) { +void Tesseract::convert_bad_unlv_chs(WERD_RES* word_res) { int i; UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-"); UNICHAR_ID unichar_space = word_res->uch_set->unichar_to_id(" "); @@ -666,20 +634,20 @@ void Tesseract::convert_bad_unlv_chs(WERD_RES *word_res) { for (i = 0; i < word_res->reject_map.length(); ++i) { if (word_res->best_choice->unichar_id(i) == unichar_tilde) { word_res->best_choice->set_unichar_id(unichar_dash, i); - if (word_res->reject_map[i].accepted ()) - word_res->reject_map[i].setrej_unlv_rej (); + if (word_res->reject_map[i].accepted()) + word_res->reject_map[i].setrej_unlv_rej(); } if (word_res->best_choice->unichar_id(i) == unichar_pow) { word_res->best_choice->set_unichar_id(unichar_space, i); - if (word_res->reject_map[i].accepted ()) - word_res->reject_map[i].setrej_unlv_rej (); + if (word_res->reject_map[i].accepted()) + word_res->reject_map[i].setrej_unlv_rej(); } } } -GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { - enum STATES - { +GARBAGE_LEVEL +Tesseract::garbage_word(WERD_RES* word, BOOL8 ok_dict_word) { + enum STATES { JUNK, FIRST_UPPER, FIRST_LOWER, @@ -688,8 +656,8 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { SUBSEQUENT_LOWER, SUBSEQUENT_NUM }; - const char *str = word->best_choice->unichar_string().string(); - const char *lengths = word->best_choice->unichar_lengths().string(); + const char* str = word->best_choice->unichar_string().string(); + const char* lengths = word->best_choice->unichar_lengths().string(); STATES state = JUNK; int len = 0; int isolated_digits = 0; @@ -710,7 +678,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { for (; *str != '\0'; str += *(lengths++)) { len++; - if (word->uch_set->get_isupper (str, *lengths)) { + if (word->uch_set->get_isupper(str, *lengths)) { total_alpha_count++; switch (state) { case SUBSEQUENT_UPPER: @@ -724,8 +692,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { if (longest_alpha_repetition_count < alpha_repetition_count) { longest_alpha_repetition_count = alpha_repetition_count; } - } - else { + } else { last_char = word->uch_set->unichar_to_id(str, *lengths); alpha_repetition_count = 1; } @@ -739,8 +706,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { upper_string_count = 1; break; } - } - else if (word->uch_set->get_islower (str, *lengths)) { + } else if (word->uch_set->get_islower(str, *lengths)) { total_alpha_count++; switch (state) { case SUBSEQUENT_LOWER: @@ -754,8 +720,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { if (longest_alpha_repetition_count < alpha_repetition_count) { longest_alpha_repetition_count = alpha_repetition_count; } - } - else { + } else { last_char = word->uch_set->unichar_to_id(str, *lengths); alpha_repetition_count = 1; } @@ -769,8 +734,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { lower_string_count = 1; break; } - } - else if (word->uch_set->get_isdigit (str, *lengths)) { + } else if (word->uch_set->get_isdigit(str, *lengths)) { total_digit_count++; switch (state) { case FIRST_NUM: @@ -784,8 +748,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { state = FIRST_NUM; break; } - } - else { + } else { if (*lengths == 1 && *str == ' ') tess_rejs++; else @@ -829,28 +792,26 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { longest_upper_run_len > crunch_leave_uc_strings) return G_NEVER_CRUNCH; } - if (word->reject_map.length() > 1 && - strpbrk(str, " ") == nullptr && + if (word->reject_map.length() > 1 && strpbrk(str, " ") == nullptr && (word->best_choice->permuter() == SYSTEM_DAWG_PERM || word->best_choice->permuter() == FREQ_DAWG_PERM || word->best_choice->permuter() == USER_DAWG_PERM || word->best_choice->permuter() == NUMBER_PERM || acceptable_word_string(*word->uch_set, str, lengths) != - AC_UNACCEPTABLE || ok_dict_word)) + AC_UNACCEPTABLE || + ok_dict_word)) return G_OK; - ok_chars = len - bad_char_count - isolated_digits - - isolated_alphas - tess_rejs; + ok_chars = + len - bad_char_count - isolated_digits - isolated_alphas - tess_rejs; if (crunch_debug > 3) { tprintf("garbage_word: \"%s\"\n", word->best_choice->unichar_string().string()); - tprintf("LEN: %d bad: %d iso_N: %d iso_A: %d rej: %d\n", - len, + tprintf("LEN: %d bad: %d iso_N: %d iso_A: %d rej: %d\n", len, bad_char_count, isolated_digits, isolated_alphas, tess_rejs); } - if (bad_char_count == 0 && - tess_rejs == 0 && + if (bad_char_count == 0 && tess_rejs == 0 && (len > isolated_digits + isolated_alphas || len <= 2)) return G_OK; @@ -859,23 +820,22 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { return G_TERRIBLE; if (len > 4) { - dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits + - isolated_alphas; - if (dodgy_chars > 5 || (dodgy_chars / (float) len) > 0.5) + dodgy_chars = + 2 * tess_rejs + bad_char_count + isolated_digits + isolated_alphas; + if (dodgy_chars > 5 || (dodgy_chars / (float)len) > 0.5) return G_DODGY; else return G_OK; } else { dodgy_chars = 2 * tess_rejs + bad_char_count; - if ((len == 4 && dodgy_chars > 2) || - (len == 3 && dodgy_chars > 2) || dodgy_chars >= len) + if ((len == 4 && dodgy_chars > 2) || (len == 3 && dodgy_chars > 2) || + dodgy_chars >= len) return G_DODGY; else return G_OK; } } - /************************************************************************* * word_deletable() * DELETE WERDS AT ENDS OF ROWS IF @@ -892,10 +852,11 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { * >75% of the outline BBs have longest dimension < 0.5xht *************************************************************************/ -CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) { - int word_len = word->reject_map.length (); +CRUNCH_MODE +Tesseract::word_deletable(WERD_RES* word, int16_t& delete_mode) { + int word_len = word->reject_map.length(); float rating_per_ch; - TBOX box; //BB of word + TBOX box; // BB of word if (word->unlv_crunch_mode == CR_NONE) { delete_mode = 0; @@ -910,7 +871,7 @@ CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) { if (word->rebuild_word != nullptr) { // Cube leaves rebuild_word nullptr. box = word->rebuild_word->bounding_box(); - if (box.height () < crunch_del_min_ht * kBlnXHeight) { + if (box.height() < crunch_del_min_ht * kBlnXHeight) { delete_mode = 4; return CR_DELETE; } @@ -921,40 +882,39 @@ CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) { } } - if ((failure_count (word) * 1.5) > word_len) { + if ((failure_count(word) * 1.5) > word_len) { delete_mode = 2; return CR_LOOSE_SPACE; } - if (word->best_choice->certainty () < crunch_del_cert) { + if (word->best_choice->certainty() < crunch_del_cert) { delete_mode = 7; return CR_LOOSE_SPACE; } - rating_per_ch = word->best_choice->rating () / word_len; + rating_per_ch = word->best_choice->rating() / word_len; if (rating_per_ch > crunch_del_rating) { delete_mode = 8; return CR_LOOSE_SPACE; } - if (box.top () < kBlnBaselineOffset - crunch_del_low_word * kBlnXHeight) { + if (box.top() < kBlnBaselineOffset - crunch_del_low_word * kBlnXHeight) { delete_mode = 9; return CR_LOOSE_SPACE; } - if (box.bottom () > - kBlnBaselineOffset + crunch_del_high_word * kBlnXHeight) { + if (box.bottom() > kBlnBaselineOffset + crunch_del_high_word * kBlnXHeight) { delete_mode = 10; return CR_LOOSE_SPACE; } - if (box.height () > crunch_del_max_ht * kBlnXHeight) { + if (box.height() > crunch_del_max_ht * kBlnXHeight) { delete_mode = 11; return CR_LOOSE_SPACE; } - if (box.width () < crunch_del_min_width * kBlnXHeight) { + if (box.width() < crunch_del_min_width * kBlnXHeight) { delete_mode = 3; return CR_LOOSE_SPACE; } @@ -963,20 +923,18 @@ CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) { return CR_NONE; } -int16_t Tesseract::failure_count(WERD_RES *word) { - const char *str = word->best_choice->unichar_string().string(); +int16_t Tesseract::failure_count(WERD_RES* word) { + const char* str = word->best_choice->unichar_string().string(); int tess_rejs = 0; for (; *str != '\0'; str++) { - if (*str == ' ') - tess_rejs++; + if (*str == ' ') tess_rejs++; } return tess_rejs; } - bool Tesseract::noise_outlines(TWERD* word) { - TBOX box; // BB of outline + TBOX box; // BB of outline int16_t outline_count = 0; int16_t small_outline_count = 0; int16_t max_dimension; @@ -991,8 +949,7 @@ bool Tesseract::noise_outlines(TWERD* word) { max_dimension = box.height(); else max_dimension = box.width(); - if (max_dimension < small_limit) - small_outline_count++; + if (max_dimension < small_limit) small_outline_count++; } } return small_outline_count >= outline_count; diff --git a/src/ccmain/docqual.h b/src/ccmain/docqual.h index dc032c5e2d..017a217616 100644 --- a/src/ccmain/docqual.h +++ b/src/ccmain/docqual.h @@ -17,19 +17,13 @@ * **********************************************************************/ -#ifndef DOCQUAL_H -#define DOCQUAL_H +#ifndef DOCQUAL_H +#define DOCQUAL_H -#include "control.h" +#include "control.h" -enum GARBAGE_LEVEL -{ - G_NEVER_CRUNCH, - G_OK, - G_DODGY, - G_TERRIBLE -}; +enum GARBAGE_LEVEL { G_NEVER_CRUNCH, G_OK, G_DODGY, G_TERRIBLE }; -int16_t word_blob_quality(WERD_RES *word, ROW *row); -void reject_whole_page(PAGE_RES_IT &page_res_it); +int16_t word_blob_quality(WERD_RES* word, ROW* row); +void reject_whole_page(PAGE_RES_IT& page_res_it); #endif diff --git a/src/ccmain/equationdetect.cpp b/src/ccmain/equationdetect.cpp index 84cbef9d7c..c121f3ceb8 100644 --- a/src/ccmain/equationdetect.cpp +++ b/src/ccmain/equationdetect.cpp @@ -21,8 +21,8 @@ #include #endif -#include #include +#include #include // Include automatically generated configuration file if running autoconf. @@ -91,16 +91,15 @@ inline bool IsTextOrEquationType(PolyBlockType type) { inline bool IsLeftIndented(const EquationDetect::IndentType type) { return type == EquationDetect::LEFT_INDENT || - type == EquationDetect::BOTH_INDENT; + type == EquationDetect::BOTH_INDENT; } inline bool IsRightIndented(const EquationDetect::IndentType type) { return type == EquationDetect::RIGHT_INDENT || - type == EquationDetect::BOTH_INDENT; + type == EquationDetect::BOTH_INDENT; } -EquationDetect::EquationDetect(const char* equ_datapath, - const char* equ_name) { +EquationDetect::EquationDetect(const char* equ_datapath, const char* equ_name) { const char* default_name = "equ"; if (equ_name == nullptr) { equ_name = default_name; @@ -111,8 +110,10 @@ EquationDetect::EquationDetect(const char* equ_datapath, if (equ_tesseract_.init_tesseract(equ_datapath, equ_name, OEM_TESSERACT_ONLY)) { - tprintf("Warning: equation region detection requested," - " but %s failed to load from %s\n", equ_name, equ_datapath); + tprintf( + "Warning: equation region detection requested," + " but %s failed to load from %s\n", + equ_name, equ_datapath); } cps_super_bbox_ = nullptr; @@ -139,8 +140,7 @@ int EquationDetect::LabelSpecialText(TO_BLOCK* to_block) { blob_lists.push_back(&(to_block->large_blobs)); for (int i = 0; i < blob_lists.size(); ++i) { BLOBNBOX_IT bbox_it(blob_lists[i]); - for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); - bbox_it.forward()) { + for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { bbox_it.data()->set_special_text_type(BSTT_NONE); } } @@ -148,8 +148,8 @@ int EquationDetect::LabelSpecialText(TO_BLOCK* to_block) { return 0; } -void EquationDetect::IdentifySpecialText( - BLOBNBOX *blobnbox, const int height_th) { +void EquationDetect::IdentifySpecialText(BLOBNBOX* blobnbox, + const int height_th) { ASSERT_HOST(blobnbox != nullptr); if (blobnbox->bounding_box().height() < height_th && height_th > 0) { // For small blob, we simply set to BSTT_NONE. @@ -169,8 +169,8 @@ void EquationDetect::IdentifySpecialText( const float scaling = static_cast(kBlnXHeight) / box.height(); const float x_orig = (box.left() + box.right()) / 2.0f, y_orig = box.bottom(); TBLOB* normed_blob = new TBLOB(*tblob); - normed_blob->Normalize(nullptr, nullptr, nullptr, x_orig, y_orig, scaling, scaling, - 0.0f, static_cast(kBlnBaselineOffset), + normed_blob->Normalize(nullptr, nullptr, nullptr, x_orig, y_orig, scaling, + scaling, 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); equ_tesseract_.AdaptiveClassifier(normed_blob, &ratings_equ); lang_tesseract_->AdaptiveClassifier(normed_blob, &ratings_lang); @@ -209,12 +209,13 @@ void EquationDetect::IdentifySpecialText( type = BSTT_MATH; } else if (lang_choice) { // For other cases: lang_score is similar or significantly higher. - type = EstimateTypeForUnichar( - lang_tesseract_->unicharset, lang_choice->unichar_id()); + type = EstimateTypeForUnichar(lang_tesseract_->unicharset, + lang_choice->unichar_id()); } - if (type == BSTT_NONE && lang_tesseract_->get_fontinfo_table().get( - lang_choice->fontinfo_id()).is_italic()) { + if (type == BSTT_NONE && lang_tesseract_->get_fontinfo_table() + .get(lang_choice->fontinfo_id()) + .is_italic()) { // For text symbol, we still check if it is italic. blobnbox->set_special_text_type(BSTT_ITALIC); } else { @@ -233,8 +234,9 @@ BlobSpecialTextType EquationDetect::EstimateTypeForUnichar( // Exclude some special texts that are likely to be confused as math symbol. static GenericVector ids_to_exclude; if (ids_to_exclude.empty()) { - static const STRING kCharsToEx[] = {"'", "`", "\"", "\\", ",", ".", - "〈", "〉", "《", "》", "」", "「", ""}; + static const STRING kCharsToEx[] = {"'", "`", "\"", "\\", ",", + ".", "〈", "〉", "《", "》", + "」", "「", ""}; int i = 0; while (kCharsToEx[i] != "") { ids_to_exclude.push_back( @@ -251,7 +253,7 @@ BlobSpecialTextType EquationDetect::EstimateTypeForUnichar( if (unicharset.get_isdigit(id) || (s.length() == 1 && kDigitsChars.contains(s[0]))) { return BSTT_DIGIT; - } else { + } else { return BSTT_MATH; } } @@ -262,14 +264,15 @@ void EquationDetect::IdentifySpecialText() { equ_tesseract_.tess_bn_matching.set_value(0); // Set the multiplier to zero for lang_tesseract_ to improve the accuracy. - const int classify_class_pruner = lang_tesseract_->classify_class_pruner_multiplier; + const int classify_class_pruner = + lang_tesseract_->classify_class_pruner_multiplier; const int classify_integer_matcher = lang_tesseract_->classify_integer_matcher_multiplier; lang_tesseract_->classify_class_pruner_multiplier.set_value(0); lang_tesseract_->classify_integer_matcher_multiplier.set_value(0); ColPartitionGridSearch gsearch(part_grid_); - ColPartition *part = nullptr; + ColPartition* part = nullptr; gsearch.StartFullSearch(); while ((part = gsearch.NextFullSearch()) != nullptr) { if (!IsTextOrEquationType(part->type())) { @@ -279,16 +282,14 @@ void EquationDetect::IdentifySpecialText() { BLOBNBOX_C_IT bbox_it(part->boxes()); // Compute the height threshold. GenericVector blob_heights; - for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); - bbox_it.forward()) { + for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { if (bbox_it.data()->special_text_type() != BSTT_SKIP) { blob_heights.push_back(bbox_it.data()->bounding_box().height()); } } blob_heights.sort(); - const int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2; - for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); - bbox_it.forward()) { + const int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2; + for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { if (bbox_it.data()->special_text_type() != BSTT_SKIP) { IdentifySpecialText(bbox_it.data(), height_th); } @@ -335,13 +336,13 @@ void EquationDetect::IdentifyBlobsToSkip(ColPartition* part) { } const float kWidthR = 0.4, kHeightR = 0.3; const bool xoverlap = blob_box.major_x_overlap(nextblob_box), - yoverlap = blob_box.y_overlap(nextblob_box); - const float widthR = static_cast( - std::min(nextblob_box.width(), blob_box.width())) / + yoverlap = blob_box.y_overlap(nextblob_box); + const float widthR = + static_cast(std::min(nextblob_box.width(), blob_box.width())) / std::max(nextblob_box.width(), blob_box.width()); - const float heightR = static_cast( - std::min(nextblob_box.height(), blob_box.height())) / - std::max(nextblob_box.height(), blob_box.height()); + const float heightR = static_cast(std::min(nextblob_box.height(), + blob_box.height())) / + std::max(nextblob_box.height(), blob_box.height()); if (xoverlap && yoverlap && widthR > kWidthR && heightR > kHeightR) { // Found one, set nextblob type and recompute blob_box. @@ -356,8 +357,8 @@ void EquationDetect::IdentifyBlobsToSkip(ColPartition* part) { } } -int EquationDetect::FindEquationParts( - ColPartitionGrid* part_grid, ColPartitionSet** best_columns) { +int EquationDetect::FindEquationParts(ColPartitionGrid* part_grid, + ColPartitionSet** best_columns) { if (!lang_tesseract_) { tprintf("Warning: lang_tesseract_ is nullptr!\n"); return -1; @@ -462,8 +463,7 @@ void EquationDetect::MergePartsByLocation() { } void EquationDetect::SearchByOverlap( - ColPartition* seed, - GenericVector* parts_overlap) { + ColPartition* seed, GenericVector* parts_overlap) { ASSERT_HOST(seed != nullptr && parts_overlap != nullptr); if (!IsTextOrEquationType(seed->type())) { return; @@ -477,7 +477,7 @@ void EquationDetect::SearchByOverlap( search.SetUniqueMode(true); // Search iteratively. - ColPartition *part; + ColPartition* part; GenericVector parts; const float kLargeOverlapTh = 0.95; const float kEquXOverlap = 0.4, kEquYOverlap = 0.5; @@ -489,7 +489,7 @@ void EquationDetect::SearchByOverlap( bool merge = false; const float x_overlap_fraction = part_box.x_overlap_fraction(seed_box), - y_overlap_fraction = part_box.y_overlap_fraction(seed_box); + y_overlap_fraction = part_box.y_overlap_fraction(seed_box); // If part is large overlapped with seed, then set merge to true. if (x_overlap_fraction >= kLargeOverlapTh && @@ -523,8 +523,7 @@ void EquationDetect::InsertPartAfterAbsorb(ColPartition* part) { // Call SetPartitionType to re-compute the attributes of part. const TBOX& part_box(part->bounding_box()); int grid_x, grid_y; - part_grid_->GridCoords( - part_box.left(), part_box.bottom(), &grid_x, &grid_y); + part_grid_->GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); part->SetPartitionType(resolution_, best_columns_[grid_y]); // Reset the types back. @@ -539,7 +538,7 @@ void EquationDetect::InsertPartAfterAbsorb(ColPartition* part) { void EquationDetect::IdentifySeedParts() { ColPartitionGridSearch gsearch(part_grid_); - ColPartition *part = nullptr; + ColPartition* part = nullptr; gsearch.StartFullSearch(); GenericVector seeds1, seeds2; @@ -568,7 +567,7 @@ void EquationDetect::IdentifySeedParts() { } else if (!IsRightIndented(indent) && part->boxes_count() > kTextBlobsTh) { // This is likely to be a text part, save the features. - const TBOX&box = part->bounding_box(); + const TBOX& box = part->bounding_box(); if (IsLeftIndented(indent)) { indented_texts_left.push_back(box.left()); } @@ -583,8 +582,8 @@ void EquationDetect::IdentifySeedParts() { float foreground_density_th = 0.15; // Default value. if (!texts_foreground_density.empty()) { // Use the median of the texts_foreground_density. - foreground_density_th = 0.8 * texts_foreground_density[ - texts_foreground_density.size() / 2]; + foreground_density_th = + 0.8 * texts_foreground_density[texts_foreground_density.size() / 2]; } for (int i = 0; i < seeds1.size(); ++i) { @@ -592,7 +591,7 @@ void EquationDetect::IdentifySeedParts() { if (CheckSeedFgDensity(foreground_density_th, seeds1[i]) && !(IsLeftIndented(IsIndented(seeds1[i])) && CountAlignment(indented_texts_left, box.left()) >= - kLeftIndentAlignmentCountTh)) { + kLeftIndentAlignmentCountTh)) { // Mark as PT_EQUATION type. seeds1[i]->set_type(PT_EQUATION); cp_seeds_.push_back(seeds1[i]); @@ -610,11 +609,11 @@ void EquationDetect::IdentifySeedParts() { } float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) { - Pix *pix_bi = lang_tesseract_->pix_binary(); + Pix* pix_bi = lang_tesseract_->pix_binary(); const int pix_height = pixGetHeight(pix_bi); - Box* box = boxCreate(tbox.left(), pix_height - tbox.top(), - tbox.width(), tbox.height()); - Pix *pix_sub = pixClipRectangle(pix_bi, box, nullptr); + Box* box = boxCreate(tbox.left(), pix_height - tbox.top(), tbox.width(), + tbox.height()); + Pix* pix_sub = pixClipRectangle(pix_bi, box, nullptr); l_float32 fract; pixForegroundFraction(pix_sub, &fract); pixDestroy(&pix_sub); @@ -646,7 +645,7 @@ bool EquationDetect::CheckSeedFgDensity(const float density_th, } void EquationDetect::SplitCPHor(ColPartition* part, - GenericVector* parts_splitted) { + GenericVector* parts_splitted) { ASSERT_HOST(part && parts_splitted); if (part->median_width() == 0 || part->boxes_count() == 0) { return; @@ -696,7 +695,7 @@ void EquationDetect::SplitCPHor(ColPartition* part, } void EquationDetect::SplitCPHorLite(ColPartition* part, - GenericVector* splitted_boxes) { + GenericVector* splitted_boxes) { ASSERT_HOST(part && splitted_boxes); splitted_boxes->clear(); if (part->median_width() == 0) { @@ -737,16 +736,15 @@ void EquationDetect::SplitCPHorLite(ColPartition* part, bool EquationDetect::CheckForSeed2( const GenericVector& indented_texts_left, - const float foreground_density_th, - ColPartition* part) { + const float foreground_density_th, ColPartition* part) { ASSERT_HOST(part); const TBOX& box = part->bounding_box(); // Check if it is aligned with any indented_texts_left. if (!indented_texts_left.empty() && CountAlignment(indented_texts_left, box.left()) >= - kLeftIndentAlignmentCountTh) { - return false; + kLeftIndentAlignmentCountTh) { + return false; } // Check the foreground density. @@ -757,8 +755,8 @@ bool EquationDetect::CheckForSeed2( return true; } -int EquationDetect::CountAlignment( - const GenericVector& sorted_vec, const int val) const { +int EquationDetect::CountAlignment(const GenericVector& sorted_vec, + const int val) const { if (sorted_vec.empty()) { return 0; } @@ -791,7 +789,7 @@ void EquationDetect::IdentifyInlineParts() { void EquationDetect::ComputeCPsSuperBBox() { ColPartitionGridSearch gsearch(part_grid_); - ColPartition *part = nullptr; + ColPartition* part = nullptr; gsearch.StartFullSearch(); delete cps_super_bbox_; cps_super_bbox_ = new TBOX(); @@ -803,10 +801,10 @@ void EquationDetect::ComputeCPsSuperBBox() { void EquationDetect::IdentifyInlinePartsHorizontal() { ASSERT_HOST(cps_super_bbox_); GenericVector new_seeds; - const int kMarginDiffTh = IntCastRounded( - 0.5 * lang_tesseract_->source_resolution()); - const int kGapTh = static_cast(roundf( - 1.0 * lang_tesseract_->source_resolution())); + const int kMarginDiffTh = + IntCastRounded(0.5 * lang_tesseract_->source_resolution()); + const int kGapTh = + static_cast(roundf(1.0 * lang_tesseract_->source_resolution())); ColPartitionGridSearch search(part_grid_); search.SetUniqueMode(true); // The center x coordinate of the cp_super_bbox_. @@ -815,19 +813,19 @@ void EquationDetect::IdentifyInlinePartsHorizontal() { ColPartition* part = cp_seeds_[i]; const TBOX& part_box(part->bounding_box()); const int left_margin = part_box.left() - cps_super_bbox_->left(), - right_margin = cps_super_bbox_->right() - part_box.right(); + right_margin = cps_super_bbox_->right() - part_box.right(); bool right_to_left; if (left_margin + kMarginDiffTh < right_margin && left_margin < kMarginDiffTh) { // part is left aligned, so we search if it has any right neighbor. - search.StartSideSearch( - part_box.right(), part_box.top(), part_box.bottom()); + search.StartSideSearch(part_box.right(), part_box.top(), + part_box.bottom()); right_to_left = false; } else if (left_margin > cps_cx) { // part locates on the right half on image, so search if it has any left // neighbor. - search.StartSideSearch( - part_box.left(), part_box.top(), part_box.bottom()); + search.StartSideSearch(part_box.left(), part_box.top(), + part_box.bottom()); right_to_left = true; } else { // part is not an inline equation. new_seeds.push_back(part); @@ -877,8 +875,8 @@ int EquationDetect::EstimateTextPartLineSpacing() { continue; } if (prev != nullptr) { - const TBOX ¤t_box = current->bounding_box(); - const TBOX &prev_box = prev->bounding_box(); + const TBOX& current_box = current->bounding_box(); + const TBOX& prev_box = prev->bounding_box(); // prev and current should be x major overlap and non y overlap. if (current_box.major_x_overlap(prev_box) && !current_box.y_overlap(prev_box)) { @@ -941,7 +939,7 @@ bool EquationDetect::IsInline(const bool search_bottom, // Look for its nearest vertical neighbor that hardly overlaps in y but // largely overlaps in x. ColPartitionGridSearch search(part_grid_); - ColPartition *neighbor = nullptr; + ColPartition* neighbor = nullptr; const TBOX& part_box(part->bounding_box()); const float kYGapRatioTh = 1.0; @@ -955,8 +953,8 @@ bool EquationDetect::IsInline(const bool search_bottom, search.SetUniqueMode(true); while ((neighbor = search.NextVerticalSearch(search_bottom)) != nullptr) { const TBOX& neighbor_box(neighbor->bounding_box()); - if (part_box.y_gap(neighbor_box) > kYGapRatioTh * - std::min(part_box.height(), neighbor_box.height())) { + if (part_box.y_gap(neighbor_box) > + kYGapRatioTh * std::min(part_box.height(), neighbor_box.height())) { // Finished searching. break; } @@ -966,14 +964,17 @@ bool EquationDetect::IsInline(const bool search_bottom, // Check if neighbor and part is inline similar. const float kHeightRatioTh = 0.5; - const int kYGapTh = textparts_linespacing > 0 ? - textparts_linespacing + static_cast(roundf(0.02 * resolution_)): - static_cast(roundf(0.05 * resolution_)); // Default value. - if (part_box.x_overlap(neighbor_box) && // Location feature. + const int kYGapTh = + textparts_linespacing > 0 + ? textparts_linespacing + + static_cast(roundf(0.02 * resolution_)) + : static_cast(roundf(0.05 * resolution_)); // Default value. + if (part_box.x_overlap(neighbor_box) && // Location feature. part_box.y_gap(neighbor_box) <= kYGapTh && // Line spacing. // Geo feature. static_cast(std::min(part_box.height(), neighbor_box.height())) / - std::max(part_box.height(), neighbor_box.height()) > kHeightRatioTh) { + std::max(part_box.height(), neighbor_box.height()) > + kHeightRatioTh) { return true; } } @@ -989,8 +990,8 @@ bool EquationDetect::CheckSeedBlobsCount(ColPartition* part) { const int kSeedMathDigitBlobsCount = 5; const int blobs = part->boxes_count(), - math_blobs = part->SpecialBlobsCount(BSTT_MATH), - digit_blobs = part->SpecialBlobsCount(BSTT_DIGIT); + math_blobs = part->SpecialBlobsCount(BSTT_MATH), + digit_blobs = part->SpecialBlobsCount(BSTT_DIGIT); if (blobs < kSeedBlobsCountTh || math_blobs <= kSeedMathBlobsCount || math_blobs + digit_blobs <= kSeedMathDigitBlobsCount) { return false; @@ -999,13 +1000,12 @@ bool EquationDetect::CheckSeedBlobsCount(ColPartition* part) { return true; } -bool EquationDetect::CheckSeedDensity( - const float math_density_high, - const float math_density_low, - const ColPartition* part) const { +bool EquationDetect::CheckSeedDensity(const float math_density_high, + const float math_density_low, + const ColPartition* part) const { ASSERT_HOST(part); - float math_digit_density = part->SpecialBlobsDensity(BSTT_MATH) - + part->SpecialBlobsDensity(BSTT_DIGIT); + float math_digit_density = part->SpecialBlobsDensity(BSTT_MATH) + + part->SpecialBlobsDensity(BSTT_DIGIT); float italic_density = part->SpecialBlobsDensity(BSTT_ITALIC); if (math_digit_density > math_density_high) { return true; @@ -1022,7 +1022,7 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition* part) { ASSERT_HOST(part); ColPartitionGridSearch search(part_grid_); - ColPartition *neighbor = nullptr; + ColPartition* neighbor = nullptr; const TBOX& part_box(part->bounding_box()); const int kXGapTh = static_cast(roundf(0.5 * resolution_)); const int kRadiusTh = static_cast(roundf(3.0 * resolution_)); @@ -1032,7 +1032,7 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition* part) { // perform the radius search, and check if we can find a neighboring parition // that locates on the top/bottom left of part. search.StartRadSearch((part_box.left() + part_box.right()) / 2, - (part_box.top() + part_box.bottom()) / 2, kRadiusTh); + (part_box.top() + part_box.bottom()) / 2, kRadiusTh); search.SetUniqueMode(true); bool left_indented = false, right_indented = false; while ((neighbor = search.NextRadSearch()) != nullptr && @@ -1083,7 +1083,7 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition* part) { } bool EquationDetect::ExpandSeed(ColPartition* seed) { - if (seed == nullptr || // This seed has been absorbed by other seeds. + if (seed == nullptr || // This seed has been absorbed by other seeds. seed->IsVerticalType()) { // We skip vertical type right now. return false; } @@ -1126,8 +1126,7 @@ bool EquationDetect::ExpandSeed(ColPartition* seed) { } void EquationDetect::ExpandSeedHorizontal( - const bool search_left, - ColPartition* seed, + const bool search_left, ColPartition* seed, GenericVector* parts_to_merge) { ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr); const float kYOverlapTh = 0.6; @@ -1140,7 +1139,7 @@ void EquationDetect::ExpandSeedHorizontal( search.SetUniqueMode(true); // Search iteratively. - ColPartition *part = nullptr; + ColPartition* part = nullptr; while ((part = search.NextSideSearch(search_left)) != nullptr) { if (part == seed) { continue; @@ -1182,8 +1181,7 @@ void EquationDetect::ExpandSeedHorizontal( } void EquationDetect::ExpandSeedVertical( - const bool search_bottom, - ColPartition* seed, + const bool search_bottom, ColPartition* seed, GenericVector* parts_to_merge) { ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr && cps_super_bbox_ != nullptr); @@ -1193,14 +1191,15 @@ void EquationDetect::ExpandSeedVertical( ColPartitionGridSearch search(part_grid_); const TBOX& seed_box(seed->bounding_box()); const int y = search_bottom ? seed_box.bottom() : seed_box.top(); - search.StartVerticalSearch( - cps_super_bbox_->left(), cps_super_bbox_->right(), y); + search.StartVerticalSearch(cps_super_bbox_->left(), cps_super_bbox_->right(), + y); search.SetUniqueMode(true); // Search iteratively. - ColPartition *part = nullptr; + ColPartition* part = nullptr; GenericVector parts; - int skipped_min_top = std::numeric_limits::max(), skipped_max_bottom = -1; + int skipped_min_top = std::numeric_limits::max(), + skipped_max_bottom = -1; while ((part = search.NextVerticalSearch(search_bottom)) != nullptr) { if (part == seed) { continue; @@ -1225,7 +1224,7 @@ void EquationDetect::ExpandSeedVertical( part->blob_type() != BRT_HLINE)) { skip_part = true; } else if (!IsNearSmallNeighbor(seed_box, part_box) || - !CheckSeedNeighborDensity(part)) { + !CheckSeedNeighborDensity(part)) { // For other types, it should be the near small neighbor of seed. skip_part = true; } @@ -1299,7 +1298,8 @@ bool EquationDetect::CheckSeedNeighborDensity(const ColPartition* part) const { // We check the math blobs density and the unclear blobs density. if (part->SpecialBlobsDensity(BSTT_MATH) + - part->SpecialBlobsDensity(BSTT_DIGIT) > kMathDigitDensityTh1 || + part->SpecialBlobsDensity(BSTT_DIGIT) > + kMathDigitDensityTh1 || part->SpecialBlobsDensity(BSTT_UNCLEAR) > kUnclearDensityTh) { return true; } @@ -1310,7 +1310,7 @@ bool EquationDetect::CheckSeedNeighborDensity(const ColPartition* part) const { void EquationDetect::ProcessMathBlockSatelliteParts() { // Iterate over part_grid_, and find all parts that are text type but not // equation type. - ColPartition *part = nullptr; + ColPartition* part = nullptr; GenericVector text_parts; ColPartitionGridSearch gsearch(part_grid_); gsearch.StartFullSearch(); @@ -1330,8 +1330,8 @@ void EquationDetect::ProcessMathBlockSatelliteParts() { if (text_parts.size() % 2 == 0 && text_parts.size() > 1) { const TBOX& text_box = text_parts[text_parts.size() / 2 - 1]->bounding_box(); - med_height = static_cast(roundf( - 0.5 * (text_box.height() + med_height))); + med_height = + static_cast(roundf(0.5 * (text_box.height() + med_height))); } // Iterate every text_parts and check if it is a math block satellite. @@ -1362,8 +1362,9 @@ bool EquationDetect::IsMathBlockSatellite( math_blocks->clear(); const TBOX& part_box(part->bounding_box()); // Find the top/bottom nearest neighbor of part. - ColPartition *neighbors[2]; - int y_gaps[2] = {std::numeric_limits::max(), std::numeric_limits::max()}; + ColPartition* neighbors[2]; + int y_gaps[2] = {std::numeric_limits::max(), + std::numeric_limits::max()}; // The horizontal boundary of the neighbors. int neighbors_left = std::numeric_limits::max(), neighbors_right = 0; for (int i = 0; i < 2; ++i) { @@ -1410,8 +1411,8 @@ bool EquationDetect::IsMathBlockSatellite( return true; } -ColPartition* EquationDetect::SearchNNVertical( - const bool search_bottom, const ColPartition* part) { +ColPartition* EquationDetect::SearchNNVertical(const bool search_bottom, + const ColPartition* part) { ASSERT_HOST(part); ColPartition *nearest_neighbor = nullptr, *neighbor = nullptr; const int kYGapTh = static_cast(roundf(resolution_ * 0.5)); @@ -1445,8 +1446,8 @@ ColPartition* EquationDetect::SearchNNVertical( return nearest_neighbor; } -bool EquationDetect::IsNearMathNeighbor( - const int y_gap, const ColPartition *neighbor) const { +bool EquationDetect::IsNearMathNeighbor(const int y_gap, + const ColPartition* neighbor) const { if (!neighbor) { return false; } @@ -1480,13 +1481,13 @@ void EquationDetect::PaintSpecialTexts(const STRING& outfile) const { } void EquationDetect::PaintColParts(const STRING& outfile) const { - Pix *pix = pixConvertTo32(lang_tesseract_->BestPix()); + Pix* pix = pixConvertTo32(lang_tesseract_->BestPix()); ColPartitionGridSearch gsearch(part_grid_); gsearch.StartFullSearch(); ColPartition* part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { const TBOX& tbox = part->bounding_box(); - Box *box = boxCreate(tbox.left(), pixGetHeight(pix) - tbox.top(), + Box* box = boxCreate(tbox.left(), pixGetHeight(pix) - tbox.top(), tbox.width(), tbox.height()); if (part->type() == PT_EQUATION) { pixRenderBoxArb(pix, box, 5, 255, 0, 0); diff --git a/src/ccmain/equationdetect.h b/src/ccmain/equationdetect.h index a0c29b9ebe..cfca805091 100644 --- a/src/ccmain/equationdetect.h +++ b/src/ccmain/equationdetect.h @@ -42,8 +42,7 @@ class ColPartitionSet; class EquationDetect : public EquationDetectBase { public: - EquationDetect(const char* equ_datapath, - const char* equ_language); + EquationDetect(const char* equ_datapath, const char* equ_language); ~EquationDetect(); enum IndentType { @@ -76,11 +75,11 @@ class EquationDetect : public EquationDetectBase { // Identify the special text type for one blob, and update its field. When // height_th is set (> 0), we will label the blob as BSTT_NONE if its height // is less than height_th. - void IdentifySpecialText(BLOBNBOX *blob, const int height_th); + void IdentifySpecialText(BLOBNBOX* blob, const int height_th); // Estimate the type for one unichar. - BlobSpecialTextType EstimateTypeForUnichar( - const UNICHARSET& unicharset, const UNICHAR_ID id) const; + BlobSpecialTextType EstimateTypeForUnichar(const UNICHARSET& unicharset, + const UNICHAR_ID id) const; // Compute special text type for each blobs in part_grid_. void IdentifySpecialText(); @@ -120,15 +119,12 @@ class EquationDetect : public EquationDetectBase { // 1. If its left is aligned with any coordinates in indented_texts_left, // which we assume have been sorted. // 2. If its foreground density is over foreground_density_th. - bool CheckForSeed2( - const GenericVector& indented_texts_left, - const float foreground_density_th, - ColPartition* part); + bool CheckForSeed2(const GenericVector& indented_texts_left, + const float foreground_density_th, ColPartition* part); // Count the number of values in sorted_vec that is close to val, used to // check if a partition is aligned with text partitions. - int CountAlignment( - const GenericVector& sorted_vec, const int val) const; + int CountAlignment(const GenericVector& sorted_vec, const int val) const; // Check for a seed candidate using the foreground pixel density. And we // return true if the density is below a certain threshold, because characters @@ -175,8 +171,7 @@ class EquationDetect : public EquationDetectBase { // Check if part is an inline equation zone. This should be called after we // identified the seed regions. - bool IsInline(const bool search_bottom, - const int textPartsLineSpacing, + bool IsInline(const bool search_bottom, const int textPartsLineSpacing, ColPartition* part); // For a given seed partition, we search the part_grid_ and see if there is @@ -188,16 +183,13 @@ class EquationDetect : public EquationDetectBase { // horizontally/vertically, find all parititions that can be // merged with seed, remove them from part_grid_, and put them into // parts_to_merge. - void ExpandSeedHorizontal(const bool search_left, - ColPartition* seed, + void ExpandSeedHorizontal(const bool search_left, ColPartition* seed, GenericVector* parts_to_merge); - void ExpandSeedVertical(const bool search_bottom, - ColPartition* seed, + void ExpandSeedVertical(const bool search_bottom, ColPartition* seed, GenericVector* parts_to_merge); // Check if a part_box is the small neighbor of seed_box. - bool IsNearSmallNeighbor(const TBOX& seed_box, - const TBOX& part_box) const; + bool IsNearSmallNeighbor(const TBOX& seed_box, const TBOX& part_box) const; // Perform the density check for part, which we assume is nearing a seed // partition. It returns true if the check passed. @@ -215,8 +207,8 @@ class EquationDetect : public EquationDetectBase { // Check if part is the satellite of one/two math blocks. If it is, we return // true, and save the blocks into math_blocks. - bool IsMathBlockSatellite( - ColPartition* part, GenericVector* math_blocks); + bool IsMathBlockSatellite(ColPartition* part, + GenericVector* math_blocks); // Search the nearest neighbor of part in one vertical direction as defined in // search_bottom. It returns the neighbor found that major x overlap with it, @@ -226,7 +218,7 @@ class EquationDetect : public EquationDetectBase { // Check if the neighbor with vertical distance of y_gap is a near and math // block partition. - bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const; + bool IsNearMathNeighbor(const int y_gap, const ColPartition* neighbor) const; // Generate the tiff file name for output/debug file. void GetOutputTiffName(const char* name, STRING* image_name) const; diff --git a/src/ccmain/fixspace.cpp b/src/ccmain/fixspace.cpp index e07378920a..d747dc5cb7 100644 --- a/src/ccmain/fixspace.cpp +++ b/src/ccmain/fixspace.cpp @@ -3,35 +3,35 @@ * Description: Implements a pass over the page res, exploring the alternative * spacing possibilities, trying to use context to improve the * word spacing -* Author: Phil Cheatle -* Created: Thu Oct 21 11:38:43 BST 1993 -* -* (C) Copyright 1993, Hewlett-Packard Ltd. -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** http://www.apache.org/licenses/LICENSE-2.0 -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -* -**********************************************************************/ + * Author: Phil Cheatle + * Created: Thu Oct 21 11:38:43 BST 1993 + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ +#include "fixspace.h" #include -#include "reject.h" -#include "statistc.h" #include "control.h" -#include "fixspace.h" #include "genblob.h" -#include "tessvars.h" -#include "tessbox.h" #include "globals.h" +#include "reject.h" +#include "statistc.h" +#include "tessbox.h" #include "tesseractclass.h" +#include "tessvars.h" -#define PERFECT_WERDS 999 -#define MAXSPACING 128 /*max expected spacing in pix */ +#define PERFECT_WERDS 999 +#define MAXSPACING 128 /*max expected spacing in pix */ namespace tesseract { @@ -45,18 +45,17 @@ namespace tesseract { * @param word_count count of words in doc * @param[out] page_res */ -void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, - int32_t word_count, - PAGE_RES *page_res) { +void Tesseract::fix_fuzzy_spaces(ETEXT_DESC* monitor, int32_t word_count, + PAGE_RES* page_res) { BLOCK_RES_IT block_res_it; ROW_RES_IT row_res_it; WERD_RES_IT word_res_it_from; WERD_RES_IT word_res_it_to; - WERD_RES *word_res; + WERD_RES* word_res; WERD_RES_LIST fuzzy_space_words; int16_t new_length; - bool prevent_null_wd_fixsp; // DON'T process blobless wds - int32_t word_index; // current word + bool prevent_null_wd_fixsp; // DON'T process blobless wds + int32_t word_index; // current word block_res_it.set_to_list(&page_res->block_res_list); word_index = 0; @@ -82,16 +81,14 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, if (monitor->deadline_exceeded() || (monitor->cancel != nullptr && (*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) - return; + return; } } if (!word_res_it_from.at_last()) { word_res_it_to = word_res_it_from; - prevent_null_wd_fixsp = - word_res->word->cblob_list()->empty(); - if (check_debug_pt(word_res, 60)) - debug_fix_space_level.set_value(10); + prevent_null_wd_fixsp = word_res->word->cblob_list()->empty(); + if (check_debug_pt(word_res, 60)) debug_fix_space_level.set_value(10); word_res_it_to.forward(); word_index++; if (monitor != nullptr) { @@ -100,9 +97,9 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, if (monitor->deadline_exceeded() || (monitor->cancel != nullptr && (*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) - return; + return; } - while (!word_res_it_to.at_last () && + while (!word_res_it_to.at_last() && (word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) || word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) { if (check_debug_pt(word_res, 60)) @@ -111,8 +108,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, prevent_null_wd_fixsp = true; word_res = word_res_it_to.forward(); } - if (check_debug_pt(word_res, 60)) - debug_fix_space_level.set_value(10); + if (check_debug_pt(word_res, 60)) debug_fix_space_level.set_value(10); if (word_res->word->cblob_list()->empty()) prevent_null_wd_fixsp = true; if (prevent_null_wd_fixsp) { @@ -120,19 +116,16 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, } else { fuzzy_space_words.assign_to_sublist(&word_res_it_from, &word_res_it_to); - fix_fuzzy_space_list(fuzzy_space_words, - row_res_it.data()->row, + fix_fuzzy_space_list(fuzzy_space_words, row_res_it.data()->row, block_res_it.data()->block); new_length = fuzzy_space_words.length(); word_res_it_from.add_list_before(&fuzzy_space_words); - for (; - !word_res_it_from.at_last() && new_length > 0; + for (; !word_res_it_from.at_last() && new_length > 0; new_length--) { word_res_it_from.forward(); } } - if (test_pt) - debug_fix_space_level.set_value(0); + if (test_pt) debug_fix_space_level.set_value(0); } fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, block_res_it.data()->block); @@ -142,8 +135,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, } } -void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, - ROW *row, +void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST& best_perm, ROW* row, BLOCK* block) { int16_t best_score; WERD_RES_LIST current_perm; @@ -153,8 +145,7 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, best_score = eval_word_spacing(best_perm); // default score dump_words(best_perm, best_score, 1, improved); - if (best_score != PERFECT_WERDS) - initialise_search(best_perm, current_perm); + if (best_score != PERFECT_WERDS) initialise_search(best_perm, current_perm); while ((best_score != PERFECT_WERDS) && !current_perm.empty()) { match_current_words(current_perm, row, block); @@ -166,19 +157,18 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, best_score = current_score; improved = true; } - if (current_score < PERFECT_WERDS) - transform_to_next_perm(current_perm); + if (current_score < PERFECT_WERDS) transform_to_next_perm(current_perm); } dump_words(best_perm, best_score, 3, improved); } } // namespace tesseract -void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) { +void initialise_search(WERD_RES_LIST& src_list, WERD_RES_LIST& new_list) { WERD_RES_IT src_it(&src_list); WERD_RES_IT new_it(&new_list); - WERD_RES *src_wd; - WERD_RES *new_wd; + WERD_RES* src_wd; + WERD_RES* new_wd; for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { src_wd = src_it.data(); @@ -191,12 +181,11 @@ void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) { } } - namespace tesseract { -void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, +void Tesseract::match_current_words(WERD_RES_LIST& words, ROW* row, BLOCK* block) { WERD_RES_IT word_it(&words); - WERD_RES *word; + WERD_RES* word; // Since we are not using PAGE_RES to iterate over words, we need to update // prev_word_best_choice_ before calling classify_word_pass2(). prev_word_best_choice_ = nullptr; @@ -236,7 +225,7 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, * confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2. * */ -int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { +int16_t Tesseract::eval_word_spacing(WERD_RES_LIST& word_res_list) { WERD_RES_IT word_res_it(&word_res_list); int16_t total_score = 0; int16_t word_count = 0; @@ -244,7 +233,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { int16_t word_len; int16_t i; int16_t offset; - WERD_RES *word; // current word + WERD_RES* word; // current word int16_t prev_word_score = 0; bool prev_word_done = false; bool prev_char_1 = false; // prev ch a "1/I/l"? @@ -262,8 +251,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { word_count++; if (word->tess_failed) { total_score += prev_word_score; - if (prev_word_done) - done_word_count++; + if (prev_word_done) done_word_count++; prev_word_score = 0; prev_char_1 = false; prev_char_digit = false; @@ -277,15 +265,15 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { word_len = word->reject_map.length(); current_word_ok_so_far = false; if (!((prev_char_1 && digit_or_numeric_punct(word, 0)) || - (prev_char_digit && ( - (word_done && - word->best_choice->unichar_lengths().string()[0] == 1 && - word->best_choice->unichar_string()[0] == '1') || - (!word_done && STRING(conflict_set_I_l_1).contains( - word->best_choice->unichar_string()[0])))))) { + (prev_char_digit && + ((word_done && + word->best_choice->unichar_lengths().string()[0] == 1 && + word->best_choice->unichar_string()[0] == '1') || + (!word_done && + STRING(conflict_set_I_l_1) + .contains(word->best_choice->unichar_string()[0])))))) { total_score += prev_word_score; - if (prev_word_done) - done_word_count++; + if (prev_word_done) done_word_count++; current_word_ok_so_far = word_done; } @@ -301,8 +289,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { rejtn */ for (i = 0, prev_char_1 = false; i < word_len; i++) { current_char_1 = word->best_choice->unichar_string()[i] == '1'; - if (prev_char_1 || (current_char_1 && (i > 0))) - total_score++; + if (prev_char_1 || (current_char_1 && (i > 0))) total_score++; prev_char_1 = current_char_1; } @@ -312,19 +299,21 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { for (i = 0, offset = 0, prev_char_punct = false; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) { current_char_punct = - punct_chars.contains(word->best_choice->unichar_string()[offset]); - if (prev_char_punct || (current_char_punct && i > 0)) - total_score++; + punct_chars.contains(word->best_choice->unichar_string()[offset]); + if (prev_char_punct || (current_char_punct && i > 0)) total_score++; prev_char_punct = current_char_punct; } } prev_char_digit = digit_or_numeric_punct(word, word_len - 1); for (i = 0, offset = 0; i < word_len - 1; - offset += word->best_choice->unichar_lengths()[i++]); + offset += word->best_choice->unichar_lengths()[i++]) + ; prev_char_1 = - ((word_done && (word->best_choice->unichar_string()[offset] == '1')) - || (!word_done && STRING(conflict_set_I_l_1).contains( - word->best_choice->unichar_string()[offset]))); + ((word_done && + (word->best_choice->unichar_string()[offset] == '1')) || + (!word_done && + STRING(conflict_set_I_l_1) + .contains(word->best_choice->unichar_string()[offset]))); } /* Find next word */ do { @@ -332,32 +321,31 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { } while (word_res_it.data()->part_of_combo); } while (!word_res_it.at_first()); total_score += prev_word_score; - if (prev_word_done) - done_word_count++; + if (prev_word_done) done_word_count++; if (done_word_count == word_count) return PERFECT_WERDS; else return total_score; } -bool Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) { +bool Tesseract::digit_or_numeric_punct(WERD_RES* word, int char_position) { int i; int offset; for (i = 0, offset = 0; i < char_position; - offset += word->best_choice->unichar_lengths()[i++]); + offset += word->best_choice->unichar_lengths()[i++]) + ; return ( word->uch_set->get_isdigit( word->best_choice->unichar_string().string() + offset, word->best_choice->unichar_lengths()[i]) || (word->best_choice->permuter() == NUMBER_PERM && - STRING(numeric_punctuation).contains( - word->best_choice->unichar_string().string()[offset]))); + STRING(numeric_punctuation) + .contains(word->best_choice->unichar_string().string()[offset]))); } } // namespace tesseract - /** * @name transform_to_next_perm() * Examines the current word list to find the smallest word gap size. Then walks @@ -369,13 +357,13 @@ bool Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) { * If there are no more gaps then it DELETES the entire list and returns the * empty list to cause termination. */ -void transform_to_next_perm(WERD_RES_LIST &words) { +void transform_to_next_perm(WERD_RES_LIST& words) { WERD_RES_IT word_it(&words); WERD_RES_IT prev_word_it(&words); - WERD_RES *word; - WERD_RES *prev_word; - WERD_RES *combo; - WERD *copy_word; + WERD_RES* word; + WERD_RES* prev_word; + WERD_RES* combo; + WERD* copy_word; int16_t prev_right = -INT16_MAX; TBOX box; int16_t gap; @@ -387,14 +375,13 @@ void transform_to_next_perm(WERD_RES_LIST &words) { box = word->word->bounding_box(); if (prev_right > -INT16_MAX) { gap = box.left() - prev_right; - if (gap < min_gap) - min_gap = gap; + if (gap < min_gap) min_gap = gap; } prev_right = box.right(); } } if (min_gap < INT16_MAX) { - prev_right = -INT16_MAX; // back to start + prev_right = -INT16_MAX; // back to start word_it.set_to_list(&words); // Note: we can't use cycle_pt due to inserted combos at start of list. for (; (prev_right == -INT16_MAX) || !word_it.at_first(); @@ -446,8 +433,8 @@ void transform_to_next_perm(WERD_RES_LIST &words) { } namespace tesseract { -void Tesseract::dump_words(WERD_RES_LIST &perm, int16_t score, - int16_t mode, bool improved) { +void Tesseract::dump_words(WERD_RES_LIST& perm, int16_t score, int16_t mode, + bool improved) { WERD_RES_IT word_res_it(&perm); if (debug_fix_space_level > 0) { @@ -500,9 +487,8 @@ void Tesseract::dump_words(WERD_RES_LIST &perm, int16_t score, } } -bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) { - if (word->done) - return true; +bool Tesseract::fixspace_thinks_word_done(WERD_RES* word) { + if (word->done) return true; /* Use all the standard pass 2 conditions for mode 5 in set_done() in @@ -524,7 +510,6 @@ bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) { } } - /** * @name fix_sp_fp_word() * Test the current word to see if it can be split by deleting noise blobs. If @@ -532,9 +517,9 @@ bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) { * Return with the iterator pointing to the same place if the word is unchanged, * or the last of the replacement words. */ -void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, +void Tesseract::fix_sp_fp_word(WERD_RES_IT& word_res_it, ROW* row, BLOCK* block) { - WERD_RES *word_res; + WERD_RES* word_res; WERD_RES_LIST sub_word_list; WERD_RES_IT sub_word_list_it(&sub_word_list); int16_t blob_index; @@ -542,15 +527,12 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, float junk; word_res = word_res_it.data(); - if (word_res->word->flag(W_REP_CHAR) || - word_res->combination || - word_res->part_of_combo || - !word_res->word->flag(W_DONT_CHOP)) + if (word_res->word->flag(W_REP_CHAR) || word_res->combination || + word_res->part_of_combo || !word_res->word->flag(W_DONT_CHOP)) return; blob_index = worst_noise_blob(word_res, &junk); - if (blob_index < 0) - return; + if (blob_index < 0) return; if (debug_fix_space_level > 1) { tprintf("FP fixspace working on \"%s\"\n", @@ -566,13 +548,13 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, } } -void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, +void Tesseract::fix_noisy_space_list(WERD_RES_LIST& best_perm, ROW* row, BLOCK* block) { int16_t best_score; WERD_RES_IT best_perm_it(&best_perm); WERD_RES_LIST current_perm; WERD_RES_IT current_perm_it(¤t_perm); - WERD_RES *old_word_res; + WERD_RES* old_word_res; int16_t current_score; bool improved = false; @@ -583,7 +565,7 @@ void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, old_word_res = best_perm_it.data(); // Even deep_copy doesn't copy the underlying WERD unless its combination // flag is true!. - old_word_res->combination = true; // Kludge to force deep copy + old_word_res->combination = true; // Kludge to force deep copy current_perm_it.add_to_end(WERD_RES::deep_copy(old_word_res)); old_word_res->combination = false; // Undo kludge @@ -606,26 +588,25 @@ void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, dump_words(best_perm, best_score, 3, improved); } - /** * break_noisiest_blob_word() * Find the word with the blob which looks like the worst noise. * Break the word into two, deleting the noise blob. */ -void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { +void Tesseract::break_noisiest_blob_word(WERD_RES_LIST& words) { WERD_RES_IT word_it(&words); WERD_RES_IT worst_word_it; float worst_noise_score = 9999; - int worst_blob_index = -1; // Noisiest blob of noisiest wd - int blob_index; // of wds noisiest blob - float noise_score; // of wds noisiest blob - WERD_RES *word_res; + int worst_blob_index = -1; // Noisiest blob of noisiest wd + int blob_index; // of wds noisiest blob + float noise_score; // of wds noisiest blob + WERD_RES* word_res; C_BLOB_IT blob_it; C_BLOB_IT rej_cblob_it; C_BLOB_LIST new_blob_list; C_BLOB_IT new_blob_it; C_BLOB_IT new_rej_cblob_it; - WERD *new_word; + WERD* new_word; int16_t start_of_noise_blob; int16_t i; @@ -638,7 +619,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { } } if (worst_blob_index < 0) { - words.clear(); // signal termination + words.clear(); // signal termination return; } @@ -654,7 +635,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { new_blob_it.add_after_then_move(blob_it.extract()); } start_of_noise_blob = blob_it.data()->bounding_box().left(); - delete blob_it.extract(); // throw out noise blob + delete blob_it.extract(); // throw out noise blob new_word = new WERD(&new_blob_list, word_res->word); new_word->set_flag(W_EOL, FALSE); @@ -663,9 +644,8 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { new_rej_cblob_it.set_to_list(new_word->rej_cblob_list()); rej_cblob_it.set_to_list(word_res->word->rej_cblob_list()); - for (; - (!rej_cblob_it.empty() && - (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob)); + for (; (!rej_cblob_it.empty() && + (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob)); rej_cblob_it.forward()) { new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract()); } @@ -677,33 +657,31 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { word_res->ClearResults(); } -int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, - float *worst_noise_score) { +int16_t Tesseract::worst_noise_blob(WERD_RES* word_res, + float* worst_noise_score) { float noise_score[512]; int i; - int min_noise_blob; // 1st contender - int max_noise_blob; // last contender + int min_noise_blob; // 1st contender + int max_noise_blob; // last contender int non_noise_count; - int worst_noise_blob; // Worst blob + int worst_noise_blob; // Worst blob float small_limit = kBlnXHeight * fixsp_small_outlines_size; float non_noise_limit = kBlnXHeight * 0.8; - if (word_res->rebuild_word == nullptr) - return -1; // Can't handle cube words. + if (word_res->rebuild_word == nullptr) return -1; // Can't handle cube words. // Normalised. int blob_count = word_res->box_word->length(); ASSERT_HOST(blob_count <= 512); - if (blob_count < 5) - return -1; // too short to split + if (blob_count < 5) return -1; // too short to split - /* Get the noise scores for all blobs */ + /* Get the noise scores for all blobs */ - #ifndef SECURE_NAMES +#ifndef SECURE_NAMES if (debug_fix_space_level > 5) tprintf("FP fixspace Noise metrics for \"%s\": ", word_res->best_choice->unichar_string().string()); - #endif +#endif for (i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) { TBLOB* blob = word_res->rebuild_word->blobs[i]; @@ -712,11 +690,9 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, else noise_score[i] = blob_noise_score(blob); - if (debug_fix_space_level > 5) - tprintf("%1.1f ", noise_score[i]); + if (debug_fix_space_level > 5) tprintf("%1.1f ", noise_score[i]); } - if (debug_fix_space_level > 5) - tprintf("\n"); + if (debug_fix_space_level > 5) tprintf("\n"); /* Now find the worst one which is far enough away from the end of the word */ @@ -726,8 +702,7 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, non_noise_count++; } } - if (non_noise_count < fixsp_non_noise_limit) - return -1; + if (non_noise_count < fixsp_non_noise_limit) return -1; min_noise_blob = i; @@ -738,13 +713,11 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, non_noise_count++; } } - if (non_noise_count < fixsp_non_noise_limit) - return -1; + if (non_noise_count < fixsp_non_noise_limit) return -1; max_noise_blob = i; - if (min_noise_blob > max_noise_blob) - return -1; + if (min_noise_blob > max_noise_blob) return -1; *worst_noise_score = small_limit; worst_noise_blob = -1; @@ -757,13 +730,13 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, return worst_noise_blob; } -float Tesseract::blob_noise_score(TBLOB *blob) { - TBOX box; // BB of outline +float Tesseract::blob_noise_score(TBLOB* blob) { + TBOX box; // BB of outline int16_t outline_count = 0; int16_t max_dimension; int16_t largest_outline_dimension = 0; - for (TESSLINE* ol = blob->outlines; ol != nullptr; ol= ol->next) { + for (TESSLINE* ol = blob->outlines; ol != nullptr; ol = ol->next) { outline_count++; box = ol->bounding_box(); if (box.height() > box.width()) { @@ -792,7 +765,7 @@ float Tesseract::blob_noise_score(TBLOB *blob) { } } // namespace tesseract -void fixspace_dbg(WERD_RES *word) { +void fixspace_dbg(WERD_RES* word) { TBOX box = word->word->bounding_box(); bool show_map_detail = false; int16_t i; @@ -800,8 +773,7 @@ void fixspace_dbg(WERD_RES *word) { box.print(); tprintf(" \"%s\" ", word->best_choice->unichar_string().string()); tprintf("Blob count: %d (word); %d/%d (rebuild word)\n", - word->word->cblob_list()->length(), - word->rebuild_word->NumBlobs(), + word->word->cblob_list()->length(), word->rebuild_word->NumBlobs(), word->box_word->length()); word->reject_map.print(debug_fp); tprintf("\n"); @@ -817,7 +789,6 @@ void fixspace_dbg(WERD_RES *word) { tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); } - /** * fp_eval_word_spacing() * Evaluation function for fixed pitch word lists. @@ -827,19 +798,17 @@ void fixspace_dbg(WERD_RES *word) { * Penalise any potential noise chars */ namespace tesseract { -int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) { +int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST& word_res_list) { WERD_RES_IT word_it(&word_res_list); - WERD_RES *word; + WERD_RES* word; int16_t score = 0; int16_t i; float small_limit = kBlnXHeight * fixsp_small_outlines_size; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { word = word_it.data(); - if (word->rebuild_word == nullptr) - continue; // Can't handle cube words. - if (word->done || - word->tess_accepted || + if (word->rebuild_word == nullptr) continue; // Can't handle cube words. + if (word->done || word->tess_accepted || word->best_choice->permuter() == SYSTEM_DAWG_PERM || word->best_choice->permuter() == FREQ_DAWG_PERM || word->best_choice->permuter() == USER_DAWG_PERM || @@ -857,8 +826,7 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) { } } } - if (score < 0) - score = 0; + if (score < 0) score = 0; return score; } diff --git a/src/ccmain/fixspace.h b/src/ccmain/fixspace.h index 5233bda275..6abd652f64 100644 --- a/src/ccmain/fixspace.h +++ b/src/ccmain/fixspace.h @@ -1,8 +1,8 @@ /****************************************************************** * File: fixspace.h (Formerly fixspace.h) * Description: Implements a pass over the page res, exploring the alternative - * spacing possibilities, trying to use context to improve the - word spacing + * spacing possibilities, trying to use +context to improve the word spacing * Author: Phil Cheatle * Created: Thu Oct 21 11:38:43 BST 1993 * @@ -19,13 +19,13 @@ * **********************************************************************/ -#ifndef FIXSPACE_H -#define FIXSPACE_H +#ifndef FIXSPACE_H +#define FIXSPACE_H -#include "pageres.h" -#include "params.h" +#include "pageres.h" +#include "params.h" -void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list); -void transform_to_next_perm(WERD_RES_LIST &words); -void fixspace_dbg(WERD_RES *word); +void initialise_search(WERD_RES_LIST& src_list, WERD_RES_LIST& new_list); +void transform_to_next_perm(WERD_RES_LIST& words); +void fixspace_dbg(WERD_RES* word); #endif diff --git a/src/ccmain/fixxht.cpp b/src/ccmain/fixxht.cpp index 03e3ada981..dd1582029c 100644 --- a/src/ccmain/fixxht.cpp +++ b/src/ccmain/fixxht.cpp @@ -17,11 +17,11 @@ * **********************************************************************/ -#include -#include #include -#include "params.h" +#include +#include #include "float2int.h" +#include "params.h" #include "tesseractclass.h" namespace tesseract { @@ -67,7 +67,7 @@ namespace tesseract { const int kMaxCharTopRange = 48; // Returns the number of misfit blob tops in this word. -int Tesseract::CountMisfitTops(WERD_RES *word_res) { +int Tesseract::CountMisfitTops(WERD_RES* word_res) { int bad_blobs = 0; int num_blobs = word_res->rebuild_word->NumBlobs(); for (int blob_id = 0; blob_id < num_blobs; ++blob_id) { @@ -75,22 +75,18 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) { UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id); if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) { int top = blob->bounding_box().top(); - if (top >= INT_FEAT_RANGE) - top = INT_FEAT_RANGE - 1; + if (top >= INT_FEAT_RANGE) top = INT_FEAT_RANGE - 1; int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, - &min_top, &max_top); - if (max_top - min_top > kMaxCharTopRange) - continue; - bool bad = top < min_top - x_ht_acceptance_tolerance || - top > max_top + x_ht_acceptance_tolerance; - if (bad) - ++bad_blobs; + unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, + &max_top); + if (max_top - min_top > kMaxCharTopRange) continue; + bool bad = top < min_top - x_ht_acceptance_tolerance || + top > max_top + x_ht_acceptance_tolerance; + if (bad) ++bad_blobs; if (debug_x_ht_level >= 1) { tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n", - unicharset.id_to_unichar(class_id), - bad ? "Misfit" : "OK", top, min_top, max_top, - static_cast(x_ht_acceptance_tolerance)); + unicharset.id_to_unichar(class_id), bad ? "Misfit" : "OK", top, + min_top, max_top, static_cast(x_ht_acceptance_tolerance)); } } } @@ -99,7 +95,7 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) { // Returns a new x-height maximally compatible with the result in word_res. // See comment above for overall algorithm. -float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, +float Tesseract::ComputeCompatibleXheight(WERD_RES* word_res, float* baseline_shift) { STATS top_stats(0, UINT8_MAX); STATS shift_stats(-UINT8_MAX, UINT8_MAX); @@ -115,37 +111,33 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, unicharset.get_isdigit(class_id)) { int top = blob->bounding_box().top() + bottom_shift; // Clip the top to the limit of normalized feature space. - if (top >= INT_FEAT_RANGE) - top = INT_FEAT_RANGE - 1; + if (top >= INT_FEAT_RANGE) top = INT_FEAT_RANGE - 1; int bottom = blob->bounding_box().bottom() + bottom_shift; int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, - &min_top, &max_top); + unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, + &max_top); // Chars with a wild top range would mess up the result so ignore them. - if (max_top - min_top > kMaxCharTopRange) - continue; + if (max_top - min_top > kMaxCharTopRange) continue; int misfit_dist = std::max((min_top - x_ht_acceptance_tolerance) - top, - top - (max_top + x_ht_acceptance_tolerance)); + top - (max_top + x_ht_acceptance_tolerance)); int height = top - kBlnBaselineOffset; if (debug_x_ht_level >= 2) { tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d: ", - unicharset.id_to_unichar(class_id), - height, min_bottom, max_bottom, min_top, max_top, - bottom, top); + unicharset.id_to_unichar(class_id), height, min_bottom, + max_bottom, min_top, max_top, bottom, top); } // Use only chars that fit in the expected bottom range, and where // the range of tops is sensibly near the xheight. if (min_bottom <= bottom + x_ht_acceptance_tolerance && bottom - x_ht_acceptance_tolerance <= max_bottom && min_top > kBlnBaselineOffset && - max_top - kBlnBaselineOffset >= kBlnXHeight && - misfit_dist > 0) { + max_top - kBlnBaselineOffset >= kBlnXHeight && misfit_dist > 0) { // Compute the x-height position using proportionality between the // actual height and expected height. - int min_xht = DivRounded(height * kBlnXHeight, - max_top - kBlnBaselineOffset); - int max_xht = DivRounded(height * kBlnXHeight, - min_top - kBlnBaselineOffset); + int min_xht = + DivRounded(height * kBlnXHeight, max_top - kBlnBaselineOffset); + int max_xht = + DivRounded(height * kBlnXHeight, min_top - kBlnBaselineOffset); if (debug_x_ht_level >= 2) { tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht); } @@ -166,8 +158,7 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, // of the actual bottom from the expected bottom, spread over the // range of its acceptance. int misfit_weight = abs(min_shift); - if (max_shift > min_shift) - misfit_weight /= max_shift - min_shift; + if (max_shift > min_shift) misfit_weight /= max_shift - min_shift; for (int y = min_shift; y <= max_shift; ++y) shift_stats.add(y, misfit_weight); } else { @@ -202,8 +193,8 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float new_xht = top_stats.median(); if (debug_x_ht_level >= 2) { tprintf("Median xht=%f\n", new_xht); - tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n", - new_xht, new_xht / word_res->denorm.y_scale()); + tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n", new_xht, + new_xht / word_res->denorm.y_scale()); } // The xheight must change by at least x_ht_min_change to be used. if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) diff --git a/src/ccmain/linerec.cpp b/src/ccmain/linerec.cpp index 858fbd6405..1f9fd06355 100644 --- a/src/ccmain/linerec.cpp +++ b/src/ccmain/linerec.cpp @@ -43,7 +43,7 @@ const float kWorstDictCertainty = -25.0f; // serialized DocumentData based on output_basename. void Tesseract::TrainLineRecognizer(const STRING& input_imagename, const STRING& output_basename, - BLOCK_LIST *block_list) { + BLOCK_LIST* block_list) { STRING lstmf_name = output_basename + ".lstmf"; DocumentData images(lstmf_name); if (applybox_page > 0) { @@ -56,8 +56,8 @@ void Tesseract::TrainLineRecognizer(const STRING& input_imagename, GenericVector boxes; GenericVector texts; // Get the boxes for this page, if there are any. - if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr, - nullptr) || + if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, + nullptr, nullptr) || boxes.empty()) { tprintf("Failed to read boxes from %s\n", input_imagename.string()); return; @@ -74,7 +74,7 @@ void Tesseract::TrainLineRecognizer(const STRING& input_imagename, // appends them to the given training_data. void Tesseract::TrainFromBoxes(const GenericVector& boxes, const GenericVector& texts, - BLOCK_LIST *block_list, + BLOCK_LIST* block_list, DocumentData* training_data) { int box_count = boxes.size(); // Process all the text lines in this page, as defined by the boxes. @@ -97,7 +97,8 @@ void Tesseract::TrainFromBoxes(const GenericVector& boxes, BLOCK_IT b_it(block_list); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOCK* block = b_it.data(); - if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) + if (block->pdblk.poly_block() != nullptr && + !block->pdblk.poly_block()->IsText()) continue; // Not a text block. TBOX block_box = block->pdblk.bounding_box(); block_box.rotate(block->re_rotation()); @@ -113,11 +114,10 @@ void Tesseract::TrainFromBoxes(const GenericVector& boxes, if (best_block == nullptr) { tprintf("No block overlapping textline: %s\n", line_str.string()); } else { - imagedata = GetLineData(line_box, boxes, texts, start_box, end_box, - *best_block); + imagedata = + GetLineData(line_box, boxes, texts, start_box, end_box, *best_block); } - if (imagedata != nullptr) - training_data->AddPageToDocument(imagedata); + if (imagedata != nullptr) training_data->AddPageToDocument(imagedata); // Don't let \t, which marks newlines in the box file, get into the line // content, as that makes the line unusable in training. while (end_box < texts.size() && texts[end_box] == "\t") ++end_box; @@ -133,8 +133,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box, int start_box, int end_box, const BLOCK& block) { TBOX revised_box; - ImageData* image_data = GetRectImage(line_box, block, kImagePadding, - &revised_box); + ImageData* image_data = + GetRectImage(line_box, block, kImagePadding, &revised_box); if (image_data == nullptr) return nullptr; image_data->set_page_number(applybox_page); // Copy the boxes and shift them so they are relative to the image. @@ -211,8 +211,7 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block, // Rotated the clipped revised box back to internal coordinates. FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y()); revised_box->rotate(rotation); - if (num_rotations != 2) - vertical_text = true; + if (num_rotations != 2) vertical_text = true; } return new ImageData(vertical_text, box_pix); } @@ -220,7 +219,7 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block, #ifndef ANDROID_BUILD // Recognizes a word or group of words, converting to WERD_RES in *words. // Analogous to classify_word_pass1, but can handle a group of words as well. -void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word, +void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW* row, WERD_RES* word, PointerVector* words) { TBOX word_box = word->word->bounding_box(); // Get the word image - no frills. @@ -281,15 +280,16 @@ void Tesseract::SearchWords(PointerVector* words) { word->tess_would_adapt = false; word->done = true; word->tesseract = this; - float word_certainty = std::min(word->space_certainty, - word->best_choice->certainty()); + float word_certainty = + std::min(word->space_certainty, word->best_choice->certainty()); word_certainty *= kCertaintyScale; if (getDict().stopper_debug_level >= 1) { - tprintf("Best choice certainty=%g, space=%g, scaled=%g, final=%g\n", - word->best_choice->certainty(), word->space_certainty, - std::min(word->space_certainty, word->best_choice->certainty()) * - kCertaintyScale, - word_certainty); + tprintf( + "Best choice certainty=%g, space=%g, scaled=%g, final=%g\n", + word->best_choice->certainty(), word->space_certainty, + std::min(word->space_certainty, word->best_choice->certainty()) * + kCertaintyScale, + word_certainty); word->best_choice->print(); } word->best_choice->set_certainty(word_certainty); diff --git a/src/ccmain/ltrresultiterator.cpp b/src/ccmain/ltrresultiterator.cpp index 5c1c7f9df2..60c75923f3 100644 --- a/src/ccmain/ltrresultiterator.cpp +++ b/src/ccmain/ltrresultiterator.cpp @@ -28,14 +28,13 @@ namespace tesseract { LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, - int rect_width, int rect_height) - : PageIterator(page_res, tesseract, scale, scaled_yres, - rect_left, rect_top, rect_width, rect_height), - line_separator_("\n"), - paragraph_separator_("\n") { -} + int scale, int scaled_yres, int rect_left, + int rect_top, int rect_width, + int rect_height) + : PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, + rect_width, rect_height), + line_separator_("\n"), + paragraph_separator_("\n") {} // Returns the null terminated UTF-8 encoded text string for the current // object at the given level. Use delete [] to free after use. @@ -52,9 +51,9 @@ char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const { } else { bool eol = false; // end of line? bool eop = false; // end of paragraph? - do { // for each paragraph in a block - do { // for each text line in a paragraph - do { // for each word in a text line + do { // for each paragraph in a block + do { // for each text line in a paragraph + do { // for each word in a text line best_choice = res_it.word()->best_choice; ASSERT_HOST(best_choice != nullptr); text += best_choice->unichar_string(); @@ -65,7 +64,7 @@ char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const { text.truncate_at(text.length() - 1); text += line_separator_; eop = res_it.block() != res_it.prev_block() || - res_it.row()->row->para() != res_it.prev_row()->row->para(); + res_it.row()->row->para() != res_it.prev_row()->row->para(); } while (level != RIL_TEXTLINE && !eop); if (eop) text += paragraph_separator_; } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block()); @@ -77,12 +76,12 @@ char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const { } // Set the string inserted at the end of each text line. "\n" by default. -void LTRResultIterator::SetLineSeparator(const char *new_line) { +void LTRResultIterator::SetLineSeparator(const char* new_line) { line_separator_ = new_line; } // Set the string inserted at the end of each paragraph. "\n" by default. -void LTRResultIterator::SetParagraphSeparator(const char *new_para) { +void LTRResultIterator::SetParagraphSeparator(const char* new_para) { paragraph_separator_ = new_para; } @@ -126,7 +125,7 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const { break; case RIL_WORD: mean_certainty += best_choice->certainty(); - ++certainty_count; + ++certainty_count; break; case RIL_SYMBOL: mean_certainty += best_choice->certainty(blob_index_); @@ -158,20 +157,17 @@ void LTRResultIterator::RowAttributes(float* row_height, float* descenders, // the iterator itself, ie rendered invalid by various members of // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. // Pointsize is returned in printers points (1/72 inch.) -const char* LTRResultIterator::WordFontAttributes(bool* is_bold, - bool* is_italic, - bool* is_underlined, - bool* is_monospace, - bool* is_serif, - bool* is_smallcaps, - int* pointsize, - int* font_id) const { +const char* LTRResultIterator::WordFontAttributes( + bool* is_bold, bool* is_italic, bool* is_underlined, bool* is_monospace, + bool* is_serif, bool* is_smallcaps, int* pointsize, int* font_id) const { float row_height = it_->row()->row->x_height() + - it_->row()->row->ascenders() - it_->row()->row->descenders(); + it_->row()->row->ascenders() - + it_->row()->row->descenders(); // Convert from pixels to printers points. - *pointsize = scaled_yres_ > 0 - ? static_cast(row_height * kPointsPerInch / scaled_yres_ + 0.5) - : 0; + *pointsize = + scaled_yres_ > 0 + ? static_cast(row_height * kPointsPerInch / scaled_yres_ + 0.5) + : 0; if (it_->word() == nullptr) return nullptr; // Already at the end! if (it_->word()->fontinfo == nullptr) { *font_id = -1; @@ -191,7 +187,8 @@ const char* LTRResultIterator::WordFontAttributes(bool* is_bold, // Returns the name of the language used to recognize this word. const char* LTRResultIterator::WordRecognitionLanguage() const { - if (it_->word() == nullptr || it_->word()->tesseract == nullptr) return nullptr; + if (it_->word() == nullptr || it_->word()->tesseract == nullptr) + return nullptr; return it_->word()->tesseract->lang.string(); } @@ -200,12 +197,9 @@ StrongScriptDirection LTRResultIterator::WordDirection() const { if (it_->word() == nullptr) return DIR_NEUTRAL; bool has_rtl = it_->word()->AnyRtlCharsInWord(); bool has_ltr = it_->word()->AnyLtrCharsInWord(); - if (has_rtl && !has_ltr) - return DIR_RIGHT_TO_LEFT; - if (has_ltr && !has_rtl) - return DIR_LEFT_TO_RIGHT; - if (!has_ltr && !has_rtl) - return DIR_NEUTRAL; + if (has_rtl && !has_ltr) return DIR_RIGHT_TO_LEFT; + if (has_ltr && !has_rtl) return DIR_LEFT_TO_RIGHT; + if (!has_ltr && !has_rtl) return DIR_NEUTRAL; return DIR_MIX; } @@ -238,20 +232,21 @@ bool LTRResultIterator::HasBlamerInfo() const { // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle // of the current word. -const void *LTRResultIterator::GetParamsTrainingBundle() const { - return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr) ? - &(it_->word()->blamer_bundle->params_training_bundle()) : nullptr; +const void* LTRResultIterator::GetParamsTrainingBundle() const { + return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr) + ? &(it_->word()->blamer_bundle->params_training_bundle()) + : nullptr; } // Returns the pointer to the string with blamer information for this word. // Assumes that the word's blamer_bundle is not nullptr. -const char *LTRResultIterator::GetBlamerDebug() const { +const char* LTRResultIterator::GetBlamerDebug() const { return it_->word()->blamer_bundle->debug().string(); } // Returns the pointer to the string with misadaption information for this word. // Assumes that the word's blamer_bundle is not nullptr. -const char *LTRResultIterator::GetBlamerMisadaptionDebug() const { +const char* LTRResultIterator::GetBlamerMisadaptionDebug() const { return it_->word()->blamer_bundle->misadaption_debug().string(); } @@ -267,7 +262,7 @@ bool LTRResultIterator::HasTruthString() const { // Returns true if the given string is equivalent to the truth string for // the current word. -bool LTRResultIterator::EquivalentToTruth(const char *str) const { +bool LTRResultIterator::EquivalentToTruth(const char* str) const { if (!HasTruthString()) return false; ASSERT_HOST(it_->word()->uch_set != nullptr); WERD_CHOICE str_wd(str, *(it_->word()->uch_set)); @@ -291,7 +286,7 @@ char* LTRResultIterator::WordNormedUTF8Text() const { if (it_->word() == nullptr) return nullptr; // Already at the end! STRING ocr_text; WERD_CHOICE* best_choice = it_->word()->best_choice; - const UNICHARSET *unicharset = it_->word()->uch_set; + const UNICHARSET* unicharset = it_->word()->uch_set; ASSERT_HOST(best_choice != nullptr); for (int i = 0; i < best_choice->length(); ++i) { ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i)); @@ -304,7 +299,7 @@ char* LTRResultIterator::WordNormedUTF8Text() const { // Returns a pointer to serialized choice lattice. // Fills lattice_size with the number of bytes in lattice data. -const char *LTRResultIterator::WordLattice(int *lattice_size) const { +const char* LTRResultIterator::WordLattice(int* lattice_size) const { if (it_->word() == nullptr) return nullptr; // Already at the end! if (it_->word()->blamer_bundle == nullptr) return nullptr; *lattice_size = it_->word()->blamer_bundle->lattice_size(); @@ -317,7 +312,7 @@ const char *LTRResultIterator::WordLattice(int *lattice_size) const { bool LTRResultIterator::SymbolIsSuperscript() const { if (cblob_it_ == nullptr && it_->word() != nullptr) return it_->word()->best_choice->BlobPosition(blob_index_) == - SP_SUPERSCRIPT; + SP_SUPERSCRIPT; return false; } @@ -353,15 +348,12 @@ ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) { } } -ChoiceIterator::~ChoiceIterator() { - delete choice_it_; -} +ChoiceIterator::~ChoiceIterator() { delete choice_it_; } // Moves to the next choice for the symbol and returns false if there // are none left. bool ChoiceIterator::Next() { - if (choice_it_ == nullptr) - return false; + if (choice_it_ == nullptr) return false; choice_it_->forward(); return !choice_it_->cycled_list(); } @@ -369,8 +361,7 @@ bool ChoiceIterator::Next() { // Returns the null terminated UTF-8 encoded text string for the current // choice. Do NOT use delete [] to free after use. const char* ChoiceIterator::GetUTF8Text() const { - if (choice_it_ == nullptr) - return nullptr; + if (choice_it_ == nullptr) return nullptr; UNICHAR_ID id = choice_it_->data()->unichar_id(); return word_res_->uch_set->id_to_unichar_ext(id); } @@ -378,13 +369,11 @@ const char* ChoiceIterator::GetUTF8Text() const { // Returns the confidence of the current choice. // The number should be interpreted as a percent probability. (0.0f-100.0f) float ChoiceIterator::Confidence() const { - if (choice_it_ == nullptr) - return 0.0f; + if (choice_it_ == nullptr) return 0.0f; float confidence = 100 + 5 * choice_it_->data()->certainty(); if (confidence < 0.0f) confidence = 0.0f; if (confidence > 100.0f) confidence = 100.0f; return confidence; } - } // namespace tesseract. diff --git a/src/ccmain/ltrresultiterator.h b/src/ccmain/ltrresultiterator.h index 09e1230ac1..7a81b79e66 100644 --- a/src/ccmain/ltrresultiterator.h +++ b/src/ccmain/ltrresultiterator.h @@ -21,8 +21,8 @@ #ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_ #define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_ -#include "platform.h" #include "pageiterator.h" +#include "platform.h" #include "unichar.h" class BLOB_CHOICE_IT; @@ -45,6 +45,7 @@ class Tesseract; class TESS_API LTRResultIterator : public PageIterator { friend class ChoiceIterator; + public: // page_res and tesseract come directly from the BaseAPI. // The rectangle parameters are copied indirectly from the Thresholder, @@ -58,9 +59,8 @@ class TESS_API LTRResultIterator : public PageIterator { // The scaled_yres indicates the effective resolution of the binary image // that tesseract has been given by the Thresholder. // After the constructor, Begin has already been called. - LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, + LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale, + int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height); virtual ~LTRResultIterator() = default; @@ -83,10 +83,10 @@ class TESS_API LTRResultIterator : public PageIterator { char* GetUTF8Text(PageIteratorLevel level) const; // Set the string inserted at the end of each text line. "\n" by default. - void SetLineSeparator(const char *new_line); + void SetLineSeparator(const char* new_line); // Set the string inserted at the end of each paragraph. "\n" by default. - void SetParagraphSeparator(const char *new_para); + void SetParagraphSeparator(const char* new_para); // Returns the mean confidence of the current object at the given level. // The number should be interpreted as a percent probability. (0.0f-100.0f) @@ -106,14 +106,10 @@ class TESS_API LTRResultIterator : public PageIterator { // the iterator itself, ie rendered invalid by various members of // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. // Pointsize is returned in printers points (1/72 inch.) - const char* WordFontAttributes(bool* is_bold, - bool* is_italic, - bool* is_underlined, - bool* is_monospace, - bool* is_serif, - bool* is_smallcaps, - int* pointsize, - int* font_id) const; + const char* WordFontAttributes(bool* is_bold, bool* is_italic, + bool* is_underlined, bool* is_monospace, + bool* is_serif, bool* is_smallcaps, + int* pointsize, int* font_id) const; // Return the name of the language used to recognize this word. // On error, nullptr. Do not delete this pointer. @@ -136,22 +132,22 @@ class TESS_API LTRResultIterator : public PageIterator { // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle // of the current word. - const void *GetParamsTrainingBundle() const; + const void* GetParamsTrainingBundle() const; // Returns a pointer to the string with blamer information for this word. // Assumes that the word's blamer_bundle is not nullptr. - const char *GetBlamerDebug() const; + const char* GetBlamerDebug() const; // Returns a pointer to the string with misadaption information for this word. // Assumes that the word's blamer_bundle is not nullptr. - const char *GetBlamerMisadaptionDebug() const; + const char* GetBlamerMisadaptionDebug() const; // Returns true if a truth string was recorded for the current word. bool HasTruthString() const; // Returns true if the given string is equivalent to the truth string for // the current word. - bool EquivalentToTruth(const char *str) const; + bool EquivalentToTruth(const char* str) const; // Returns a null terminated UTF-8 encoded truth string for the current word. // Use delete [] to free after use. @@ -163,7 +159,7 @@ class TESS_API LTRResultIterator : public PageIterator { // Returns a pointer to serialized choice lattice. // Fills lattice_size with the number of bytes in lattice data. - const char *WordLattice(int *lattice_size) const; + const char* WordLattice(int* lattice_size) const; // ============= Functions that refer to symbols only ============. @@ -181,8 +177,8 @@ class TESS_API LTRResultIterator : public PageIterator { bool SymbolIsDropcap() const; protected: - const char *line_separator_; - const char *paragraph_separator_; + const char* line_separator_; + const char* paragraph_separator_; }; // Class to iterate over the classifier choices for a single RIL_SYMBOL. diff --git a/src/ccmain/mutableiterator.h b/src/ccmain/mutableiterator.h index 1b905f5cdf..df0433feb5 100644 --- a/src/ccmain/mutableiterator.h +++ b/src/ccmain/mutableiterator.h @@ -44,19 +44,18 @@ class Tesseract; class MutableIterator : public ResultIterator { public: // See argument descriptions in ResultIterator() - MutableIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, - int rect_width, int rect_height) - : ResultIterator( - LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left, - rect_top, rect_width, rect_height)) {} + MutableIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale, + int scaled_yres, int rect_left, int rect_top, int rect_width, + int rect_height) + : ResultIterator(LTRResultIterator(page_res, tesseract, scale, + scaled_yres, rect_left, rect_top, + rect_width, rect_height)) {} virtual ~MutableIterator() = default; // See PageIterator and ResultIterator for most calls. // Return access to Tesseract internals. - const PAGE_RES_IT *PageResIt() const { return it_; } + const PAGE_RES_IT* PageResIt() const { return it_; } }; } // namespace tesseract. diff --git a/src/ccmain/osdetect.cpp b/src/ccmain/osdetect.cpp index 5bbcc1763e..9a87859371 100644 --- a/src/ccmain/osdetect.cpp +++ b/src/ccmain/osdetect.cpp @@ -106,14 +106,13 @@ void OSResults::update_best_script(int orientation) { second = scripts_na[orientation][i]; } } - best_result.sconfidence = - (first / second - 1.0) / (kScriptAcceptRatio - 1.0); + best_result.sconfidence = (first / second - 1.0) / (kScriptAcceptRatio - 1.0); } int OSResults::get_best_script(int orientation_id) const { int max_id = -1; for (int j = 0; j < kMaxNumberOfScripts; ++j) { - const char *script = unicharset->get_script_from_script_id(j); + const char* script = unicharset->get_script_from_script_id(j); if (strcmp(script, "Common") && strcmp(script, "NULL")) { if (max_id == -1 || scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id]) @@ -136,7 +135,7 @@ void OSResults::print_scores(int orientation_id) const { for (int j = 0; j < kMaxNumberOfScripts; ++j) { if (scripts_na[orientation_id][j]) { tprintf("%12s\t: %f\n", unicharset->get_script_from_script_id(j), - scripts_na[orientation_id][j]); + scripts_na[orientation_id][j]); } } } @@ -155,9 +154,9 @@ void OSResults::accumulate(const OSResults& osr) { // Detect and erase horizontal/vertical lines and picture regions from the // image, so that non-text blobs are removed from consideration. -void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks, - TO_BLOCK_LIST *to_blocks) { - Pix *pix = tess->pix_binary(); +void remove_nontext_regions(tesseract::Tesseract* tess, BLOCK_LIST* blocks, + TO_BLOCK_LIST* to_blocks) { + Pix* pix = tess->pix_binary(); ASSERT_HOST(pix != nullptr); int vertical_x = 0; int vertical_y = 1; @@ -172,31 +171,29 @@ void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks, resolution = pixGetXRes(pix); } - tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, - &vertical_x, &vertical_y, - nullptr, &v_lines, &h_lines); + tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, + &vertical_y, nullptr, &v_lines, + &h_lines); Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr); if (im_pix != nullptr) { pixSubtract(pix, pix, im_pix); pixDestroy(&im_pix); } - tess->mutable_textord()->find_components(tess->pix_binary(), - blocks, to_blocks); + tess->mutable_textord()->find_components(tess->pix_binary(), blocks, + to_blocks); } // Find connected components in the page and process a subset until finished or // a stopping criterion is met. // Returns the number of blobs used in making the estimate. 0 implies failure. -int orientation_and_script_detection(STRING& filename, - OSResults* osr, +int orientation_and_script_detection(STRING& filename, OSResults* osr, tesseract::Tesseract* tess) { - STRING name = filename; //truncated name - const char *lastdot; //of name + STRING name = filename; // truncated name + const char* lastdot; // of name TBOX page_box; - lastdot = strrchr (name.string (), '.'); - if (lastdot != nullptr) - name[lastdot-name.string()] = '\0'; + lastdot = strrchr(name.string(), '.'); + if (lastdot != nullptr) name[lastdot - name.string()] = '\0'; ASSERT_HOST(tess->pix_binary() != nullptr) int width = pixGetWidth(tess->pix_binary()); @@ -212,16 +209,16 @@ int orientation_and_script_detection(STRING& filename, if (port_blocks.empty()) { // page segmentation did not succeed, so we need to find_components first. - tess->mutable_textord()->find_components(tess->pix_binary(), - &blocks, &port_blocks); + tess->mutable_textord()->find_components(tess->pix_binary(), &blocks, + &port_blocks); } else { page_box.set_left(0); page_box.set_bottom(0); page_box.set_right(width); page_box.set_top(height); // Filter_blobs sets up the TO_BLOCKs the same as find_components does. - tess->mutable_textord()->filter_blobs(page_box.topright(), - &port_blocks, true); + tess->mutable_textord()->filter_blobs(page_box.topright(), &port_blocks, + true); } return os_detect(&port_blocks, osr, tess); @@ -239,18 +236,17 @@ int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, BLOBNBOX_CLIST filtered_list; BLOBNBOX_C_IT filtered_it(&filtered_list); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward ()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { TO_BLOCK* to_block = block_it.data(); if (to_block->block->pdblk.poly_block() && - !to_block->block->pdblk.poly_block()->IsText()) continue; + !to_block->block->pdblk.poly_block()->IsText()) + continue; BLOBNBOX_IT bbox_it; bbox_it.set_to_list(&to_block->blobs); - for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list (); - bbox_it.forward ()) { + for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* bbox = bbox_it.data(); - C_BLOB* blob = bbox->cblob(); - TBOX box = blob->bounding_box(); + C_BLOB* blob = bbox->cblob(); + TBOX box = blob->bounding_box(); ++blobs_total; float y_x = fabs((box.height() * 1.0) / box.width()); @@ -276,8 +272,7 @@ int os_detect_blobs(const GenericVector* allowed_scripts, BLOBNBOX_CLIST* blob_list, OSResults* osr, tesseract::Tesseract* tess) { OSResults osr_; - if (osr == nullptr) - osr = &osr_; + if (osr == nullptr) osr = &osr_; osr->unicharset = &tess->unicharset; OrientationDetector o(allowed_scripts, osr); @@ -297,20 +292,20 @@ int os_detect_blobs(const GenericVector* allowed_scripts, BLOBNBOX** blobs = new BLOBNBOX*[filtered_it.length()]; int number_of_blobs = 0; - for (filtered_it.mark_cycle_pt (); !filtered_it.cycled_list (); - filtered_it.forward ()) { + for (filtered_it.mark_cycle_pt(); !filtered_it.cycled_list(); + filtered_it.forward()) { blobs[number_of_blobs++] = (BLOBNBOX*)filtered_it.data(); } QRSequenceGenerator sequence(number_of_blobs); int num_blobs_evaluated = 0; for (int i = 0; i < real_max; ++i) { - if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) - && i > kMinCharactersToTry) { + if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) && + i > kMinCharactersToTry) { break; } ++num_blobs_evaluated; } - delete [] blobs; + delete[] blobs; // Make sure the best_result is up-to-date int orientation = o.get_orientation(); @@ -321,10 +316,9 @@ int os_detect_blobs(const GenericVector* allowed_scripts, // Processes a single blob to estimate script and orientation. // Return true if estimate of orientation and script satisfies stopping // criteria. -bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, - ScriptDetector* s, OSResults* osr, - tesseract::Tesseract* tess) { - tess->tess_cn_matching.set_value(true); // turn it on +bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, ScriptDetector* s, + OSResults* osr, tesseract::Tesseract* tess) { + tess->tess_cn_matching.set_value(true); // turn it on tess->tess_bn_matching.set_value(false); C_BLOB* blob = bbox->cblob(); TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob); @@ -349,10 +343,9 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, x_origin = i == 1 ? box.left() : box.right(); } TBLOB* rotated_blob = new TBLOB(*tblob); - rotated_blob->Normalize(nullptr, ¤t_rotation, nullptr, - x_origin, y_origin, scaling, scaling, - 0.0f, static_cast(kBlnBaselineOffset), - false, nullptr); + rotated_blob->Normalize( + nullptr, ¤t_rotation, nullptr, x_origin, y_origin, scaling, + scaling, 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); tess->AdaptiveClassifier(rotated_blob, ratings + i); delete rotated_blob; current_rotation.rotate(rotation90); @@ -366,7 +359,6 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, return stop; } - OrientationDetector::OrientationDetector( const GenericVector* allowed_scripts, OSResults* osr) { osr_ = osr; @@ -385,8 +377,9 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) { BLOB_CHOICE* choice = nullptr; if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) { // Find the top choice in an allowed script. - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() && - choice == nullptr; choice_it.forward()) { + for (choice_it.mark_cycle_pt(); + !choice_it.cycled_list() && choice == nullptr; + choice_it.forward()) { int choice_script = choice_it.data()->script_id(); int s = 0; for (s = 0; s < allowed_scripts_->size(); ++s) { @@ -445,7 +438,6 @@ int OrientationDetector::get_orientation() { return osr_->best_result.orientation_id; } - ScriptDetector::ScriptDetector(const GenericVector* allowed_scripts, OSResults* osr, tesseract::Tesseract* tess) { osr_ = osr; @@ -461,14 +453,12 @@ ScriptDetector::ScriptDetector(const GenericVector* allowed_scripts, fraktur_id_ = tess_->unicharset.add_script(fraktur_script_); } - // Score the given blob and return true if it is now sure of the script after // adding this blob. void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { bool done[kMaxNumberOfScripts]; for (int i = 0; i < 4; ++i) { - for (int j = 0; j < kMaxNumberOfScripts; ++j) - done[j] = false; + for (int j = 0; j < kMaxNumberOfScripts; ++j) done[j] = false; BLOB_CHOICE_IT choice_it; choice_it.set_to_list(scores + i); @@ -509,13 +499,11 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { } if (strlen(prev_unichar) == 1) - if (unichar[0] >= '0' && unichar[0] <= '9') - break; + if (unichar[0] >= '0' && unichar[0] <= '9') break; // if script_count is >= 2, character is ambiguous, skip other matches // since they are useless. - if (script_count >= 2) - break; + if (script_count >= 2) break; } // Character is non ambiguous if (script_count == 1) { @@ -525,9 +513,9 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { // Workaround for Fraktur if (prev_id == latin_id_) { if (prev_fontinfo_id >= 0) { - const tesseract::FontInfo &fi = + const tesseract::FontInfo& fi = tess_->get_fontinfo_table().get(prev_fontinfo_id); - //printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name, + // printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name, // fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(), // fi.is_serif(), fi.is_fraktur(), // prev_unichar); @@ -539,12 +527,9 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { } // Update Japanese / Korean pseudo-scripts - if (prev_id == katakana_id_) - osr_->scripts_na[i][japanese_id_] += 1.0; - if (prev_id == hiragana_id_) - osr_->scripts_na[i][japanese_id_] += 1.0; - if (prev_id == hangul_id_) - osr_->scripts_na[i][korean_id_] += 1.0; + if (prev_id == katakana_id_) osr_->scripts_na[i][japanese_id_] += 1.0; + if (prev_id == hiragana_id_) osr_->scripts_na[i][japanese_id_] += 1.0; + if (prev_id == hangul_id_) osr_->scripts_na[i][korean_id_] += 1.0; if (prev_id == han_id_) { osr_->scripts_na[i][korean_id_] += kHanRatioInKorean; osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese; diff --git a/src/ccmain/osdetect.h b/src/ccmain/osdetect.h index 52b00968c9..5d367767ad 100644 --- a/src/ccmain/osdetect.h +++ b/src/ccmain/osdetect.h @@ -36,8 +36,8 @@ class Tesseract; const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; struct OSBestResult { - OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), - oconfidence(0.0) {} + OSBestResult() + : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {} int orientation_id; int script_id; float sconfidence; @@ -47,8 +47,7 @@ struct OSBestResult { struct OSResults { OSResults() : unicharset(nullptr) { for (int i = 0; i < 4; ++i) { - for (int j = 0; j < kMaxNumberOfScripts; ++j) - scripts_na[i][j] = 0; + for (int j = 0; j < kMaxNumberOfScripts; ++j) scripts_na[i][j] = 0; orientations[i] = 0; } } @@ -85,6 +84,7 @@ class OrientationDetector { OSResults* results); bool detect_blob(BLOB_CHOICE_LIST* scores); int get_orientation(); + private: OSResults* osr_; const GenericVector* allowed_scripts_; @@ -92,10 +92,11 @@ class OrientationDetector { class ScriptDetector { public: - ScriptDetector(const GenericVector* allowed_scripts, - OSResults* osr, tesseract::Tesseract* tess); + ScriptDetector(const GenericVector* allowed_scripts, OSResults* osr, + tesseract::Tesseract* tess); void detect_blob(BLOB_CHOICE_LIST* scores); bool must_stop(int orientation); + private: OSResults* osr_; static const char* korean_script_; @@ -113,22 +114,18 @@ class ScriptDetector { const GenericVector* allowed_scripts_; }; -int orientation_and_script_detection(STRING& filename, - OSResults*, +int orientation_and_script_detection(STRING& filename, OSResults*, tesseract::Tesseract*); -int os_detect(TO_BLOCK_LIST* port_blocks, - OSResults* osr, +int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, tesseract::Tesseract* tess); int os_detect_blobs(const GenericVector* allowed_scripts, - BLOBNBOX_CLIST* blob_list, - OSResults* osr, + BLOBNBOX_CLIST* blob_list, OSResults* osr, tesseract::Tesseract* tess); -bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, - ScriptDetector* s, OSResults*, - tesseract::Tesseract* tess); +bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, ScriptDetector* s, + OSResults*, tesseract::Tesseract* tess); // Helper method to convert an orientation index to its value in degrees. // The value represents the amount of clockwise rotation in degrees that must be diff --git a/src/ccmain/output.cpp b/src/ccmain/output.cpp index 64dfca1d4a..ac314d2bac 100644 --- a/src/ccmain/output.cpp +++ b/src/ccmain/output.cpp @@ -17,31 +17,31 @@ * **********************************************************************/ -#include #include +#include #ifdef __UNIX__ -#include -#include -#include +#include +#include +#include #endif -#include "helpers.h" -#include "tessvars.h" #include "control.h" -#include "reject.h" #include "docqual.h" -#include "output.h" #include "globals.h" +#include "helpers.h" +#include "output.h" +#include "reject.h" #include "tesseractclass.h" +#include "tessvars.h" -#define EPAPER_EXT ".ep" -#define PAGE_YSIZE 3508 -#define CTRL_INSET '\024' //dc4=text inset -#define CTRL_FONT '\016' //so=font change -#define CTRL_DEFAULT '\017' //si=default font -#define CTRL_SHIFT '\022' //dc2=x shift -#define CTRL_TAB '\011' //tab -#define CTRL_NEWLINE '\012' //newline -#define CTRL_HARDLINE '\015' //cr +#define EPAPER_EXT ".ep" +#define PAGE_YSIZE 3508 +#define CTRL_INSET '\024' // dc4=text inset +#define CTRL_FONT '\016' // so=font change +#define CTRL_DEFAULT '\017' // si=default font +#define CTRL_SHIFT '\022' // dc2=x shift +#define CTRL_TAB '\011' // tab +#define CTRL_NEWLINE '\012' // newline +#define CTRL_HARDLINE '\015' // cr /********************************************************************** * pixels_to_pts @@ -50,29 +50,28 @@ * number of points. **********************************************************************/ -int32_t pixels_to_pts( //convert coords - int32_t pixels, - int32_t pix_res //resolution - ) { - float pts; //converted value +int32_t pixels_to_pts( // convert coords + int32_t pixels, + int32_t pix_res // resolution +) { + float pts; // converted value pts = pixels * 72.0 / pix_res; - return (int32_t) (pts + 0.5); //round it + return (int32_t)(pts + 0.5); // round it } namespace tesseract { -void Tesseract::output_pass( //Tess output pass //send to api - PAGE_RES_IT &page_res_it, - const TBOX *target_word_box) { - BLOCK_RES *block_of_last_word; - bool force_eol; //During output - BLOCK *nextblock; //block of next word - WERD *nextword; //next word - - page_res_it.restart_page (); +void Tesseract::output_pass( // Tess output pass //send to api + PAGE_RES_IT& page_res_it, const TBOX* target_word_box) { + BLOCK_RES* block_of_last_word; + bool force_eol; // During output + BLOCK* nextblock; // block of next word + WERD* nextword; // next word + + page_res_it.restart_page(); block_of_last_word = nullptr; - while (page_res_it.word () != nullptr) { - check_debug_pt (page_res_it.word (), 120); + while (page_res_it.word() != nullptr) { + check_debug_pt(page_res_it.word(), 120); if (target_word_box) { TBOX current_word_box = page_res_it.word()->word->bounding_box(); @@ -85,32 +84,32 @@ void Tesseract::output_pass( //Tess output pass //send to api } } if (tessedit_write_block_separators && - block_of_last_word != page_res_it.block ()) { - block_of_last_word = page_res_it.block (); + block_of_last_word != page_res_it.block()) { + block_of_last_word = page_res_it.block(); } force_eol = (tessedit_write_block_separators && - (page_res_it.block () != page_res_it.next_block ())) || - (page_res_it.next_word () == nullptr); + (page_res_it.block() != page_res_it.next_block())) || + (page_res_it.next_word() == nullptr); - if (page_res_it.next_word () != nullptr) - nextword = page_res_it.next_word ()->word; + if (page_res_it.next_word() != nullptr) + nextword = page_res_it.next_word()->word; else nextword = nullptr; - if (page_res_it.next_block () != nullptr) - nextblock = page_res_it.next_block ()->block; + if (page_res_it.next_block() != nullptr) + nextblock = page_res_it.next_block()->block; else nextblock = nullptr; - //regardless of tilde crunching - write_results(page_res_it, - determine_newline_type(page_res_it.word()->word, - page_res_it.block()->block, - nextword, nextblock), force_eol); + // regardless of tilde crunching + write_results( + page_res_it, + determine_newline_type(page_res_it.word()->word, + page_res_it.block()->block, nextword, nextblock), + force_eol); page_res_it.forward(); } } - /************************************************************************* * write_results() * @@ -125,26 +124,22 @@ void Tesseract::output_pass( //Tess output pass //send to api *************************************************************************/ void Tesseract::write_results(PAGE_RES_IT& page_res_it, char newline_type, // type of newline - bool force_eol) { // override tilde crunch? - WERD_RES *word = page_res_it.word(); - const UNICHARSET &uchset = *word->uch_set; + bool force_eol) { // override tilde crunch? + WERD_RES* word = page_res_it.word(); + const UNICHARSET& uchset = *word->uch_set; int i; bool need_reject = false; UNICHAR_ID space = uchset.unichar_to_id(" "); - if ((word->unlv_crunch_mode != CR_NONE || - word->best_choice->length() == 0) && + if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->length() == 0) && !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) { if ((word->unlv_crunch_mode != CR_DELETE) && (!stats_.tilde_crunch_written || ((word->unlv_crunch_mode == CR_KEEP_SPACE) && - (word->word->space () > 0) && - !word->word->flag (W_FUZZY_NON) && - !word->word->flag (W_FUZZY_SP)))) { - if (!word->word->flag (W_BOL) && - (word->word->space () > 0) && - !word->word->flag (W_FUZZY_NON) && - !word->word->flag (W_FUZZY_SP)) { + (word->word->space() > 0) && !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)))) { + if (!word->word->flag(W_BOL) && (word->word->space() > 0) && + !word->word->flag(W_FUZZY_NON) && !word->word->flag(W_FUZZY_SP)) { stats_.last_char_was_tilde = false; } need_reject = true; @@ -158,14 +153,14 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it, stats_.write_results_empty_block = false; } - if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) { + if ((word->word->flag(W_EOL) && !stats_.last_char_was_newline) || + force_eol) { stats_.tilde_crunch_written = false; stats_.last_char_was_newline = true; stats_.last_char_was_tilde = false; } - if (force_eol) - stats_.write_results_empty_block = true; + if (force_eol) stats_.write_results_empty_block = true; return; } @@ -178,8 +173,7 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it, stats_.last_char_was_newline = false; stats_.write_results_empty_block = force_eol; // about to write a real word - if (unlv_tilde_crunching && - stats_.last_char_was_tilde && + if (unlv_tilde_crunching && stats_.last_char_was_tilde && (word->word->space() == 0) && !(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) && (word->best_choice->unichar_id(0) == space)) { @@ -188,28 +182,27 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it, word->MergeAdjacentBlobs(0); } if (newline_type || - (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes)) + (word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes)) stats_.last_char_was_tilde = false; else { - if (word->reject_map.length () > 0) { + if (word->reject_map.length() > 0) { if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space) stats_.last_char_was_tilde = true; else stats_.last_char_was_tilde = false; - } - else if (word->word->space () > 0) + } else if (word->word->space() > 0) stats_.last_char_was_tilde = false; /* else it is unchanged as there are no output chars */ } - ASSERT_HOST (word->best_choice->length() == word->reject_map.length()); + ASSERT_HOST(word->best_choice->length() == word->reject_map.length()); set_unlv_suspects(word); - check_debug_pt (word, 120); + check_debug_pt(word, 120); if (tessedit_rejection_debug) { - tprintf ("Dict word: \"%s\": %d\n", - word->best_choice->debug_string().string(), - dict_word(*(word->best_choice))); + tprintf("Dict word: \"%s\": %d\n", + word->best_choice->debug_string().string(), + dict_word(*(word->best_choice))); } if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) { if (tessedit_zero_rejection) { @@ -238,31 +231,29 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it, * Return FALSE if not at end of line. **********************************************************************/ -char determine_newline_type( //test line ends - WERD *word, //word to do - BLOCK *block, //current block - WERD *next_word, //next word - BLOCK *next_block //block of next word - ) { - int16_t end_gap; //to right edge - int16_t width; //of next word - TBOX word_box; //bounding - TBOX next_box; //next word - TBOX block_box; //block bounding - - if (!word->flag (W_EOL)) - return FALSE; //not end of line +char determine_newline_type( // test line ends + WERD* word, // word to do + BLOCK* block, // current block + WERD* next_word, // next word + BLOCK* next_block // block of next word +) { + int16_t end_gap; // to right edge + int16_t width; // of next word + TBOX word_box; // bounding + TBOX next_box; // next word + TBOX block_box; // block bounding + + if (!word->flag(W_EOL)) return FALSE; // not end of line if (next_word == nullptr || next_block == nullptr || block != next_block) return CTRL_NEWLINE; - if (next_word->space () > 0) - return CTRL_HARDLINE; //it is tabbed - word_box = word->bounding_box (); - next_box = next_word->bounding_box (); - block_box = block->pdblk.bounding_box (); - //gap to eol - end_gap = block_box.right () - word_box.right (); - end_gap -= (int32_t) block->space (); - width = next_box.right () - next_box.left (); + if (next_word->space() > 0) return CTRL_HARDLINE; // it is tabbed + word_box = word->bounding_box(); + next_box = next_word->bounding_box(); + block_box = block->pdblk.bounding_box(); + // gap to eol + end_gap = block_box.right() - word_box.right(); + end_gap -= (int32_t)block->space(); + width = next_box.right() - next_box.left(); // tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n", // block_box.right(),word_box.right(),end_gap, // next_box.right(),next_box.left(),width, @@ -276,10 +267,13 @@ char determine_newline_type( //test line ends * character which is repeated - as determined earlier by fix_rep_char() *************************************************************************/ namespace tesseract { -UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated? +UNICHAR_ID +Tesseract::get_rep_char(WERD_RES* word) { // what char is repeated? int i; - for (i = 0; ((i < word->reject_map.length()) && - (word->reject_map[i].rejected())); ++i); + for (i = 0; + ((i < word->reject_map.length()) && (word->reject_map[i].rejected())); + ++i) + ; if (i < word->reject_map.length()) { return word->best_choice->unichar_id(i); @@ -298,10 +292,10 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated? * NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and * tessedit_minimal_rejection. *************************************************************************/ -void Tesseract::set_unlv_suspects(WERD_RES *word_res) { +void Tesseract::set_unlv_suspects(WERD_RES* word_res) { int len = word_res->reject_map.length(); - const WERD_CHOICE &word = *(word_res->best_choice); - const UNICHARSET &uchset = *word.unicharset(); + const WERD_CHOICE& word = *(word_res->best_choice); + const UNICHARSET& uchset = *word.unicharset(); int i; float rating_per_ch; @@ -313,13 +307,11 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) { return; } - if (suspect_level >= 3) - return; //Use defaults + if (suspect_level >= 3) return; // Use defaults /* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/ - if (safe_dict_word(word_res) && - (count_alphas(word) > suspect_short_words)) { + if (safe_dict_word(word_res) && (count_alphas(word) > suspect_short_words)) { /* Unreject alphas in dictionary words */ for (i = 0; i < len; ++i) { if (word_res->reject_map[i].rejected() && @@ -353,37 +345,34 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) { } } - if (suspect_level == 2) - return; + if (suspect_level == 2) return; if (!suspect_constrain_1Il || (word_res->reject_map.length() <= suspect_short_words)) { for (i = 0; i < len; i++) { if (word_res->reject_map[i].rejected()) { if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) || - word_res->reject_map[i].flag(R_POSTNN_1IL))) + word_res->reject_map[i].flag(R_POSTNN_1IL))) word_res->reject_map[i].setrej_minimal_rej_accept(); - if (!suspect_constrain_1Il && - word_res->reject_map[i].flag(R_MM_REJECT)) + if (!suspect_constrain_1Il && word_res->reject_map[i].flag(R_MM_REJECT)) word_res->reject_map[i].setrej_minimal_rej_accept(); } } } - if (acceptable_word_string(*word_res->uch_set, - word.unichar_string().string(), + if (acceptable_word_string(*word_res->uch_set, word.unichar_string().string(), word.unichar_lengths().string()) != - AC_UNACCEPTABLE || + AC_UNACCEPTABLE || acceptable_number_string(word.unichar_string().string(), word.unichar_lengths().string())) { if (word_res->reject_map.length() > suspect_short_words) { for (i = 0; i < len; i++) { if (word_res->reject_map[i].rejected() && - (!word_res->reject_map[i].perm_rejected() || - word_res->reject_map[i].flag (R_1IL_CONFLICT) || - word_res->reject_map[i].flag (R_POSTNN_1IL) || - word_res->reject_map[i].flag (R_MM_REJECT))) { + (!word_res->reject_map[i].perm_rejected() || + word_res->reject_map[i].flag(R_1IL_CONFLICT) || + word_res->reject_map[i].flag(R_POSTNN_1IL) || + word_res->reject_map[i].flag(R_MM_REJECT))) { word_res->reject_map[i].setrej_minimal_rej_accept(); } } @@ -391,17 +380,15 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) { } } -int16_t Tesseract::count_alphas(const WERD_CHOICE &word) { +int16_t Tesseract::count_alphas(const WERD_CHOICE& word) { int count = 0; for (int i = 0; i < word.length(); ++i) { - if (word.unicharset()->get_isalpha(word.unichar_id(i))) - count++; + if (word.unicharset()->get_isalpha(word.unichar_id(i))) count++; } return count; } - -int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) { +int16_t Tesseract::count_alphanums(const WERD_CHOICE& word) { int count = 0; for (int i = 0; i < word.length(); ++i) { if (word.unicharset()->get_isalpha(word.unichar_id(i)) || @@ -411,13 +398,10 @@ int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) { return count; } - -bool Tesseract::acceptable_number_string(const char* s, - const char* lengths) { +bool Tesseract::acceptable_number_string(const char* s, const char* lengths) { bool prev_digit = false; - if (*lengths == 1 && *s == '(') - s++; + if (*lengths == 1 && *s == '(') s++; if (*lengths == 1 && ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-'))) @@ -429,11 +413,10 @@ bool Tesseract::acceptable_number_string(const char* s, else if (prev_digit && (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-')))) prev_digit = false; - else if (prev_digit && *lengths == 1 && - (*(s + *lengths) == '\0') && ((*s == '%') || (*s == ')'))) + else if (prev_digit && *lengths == 1 && (*(s + *lengths) == '\0') && + ((*s == '%') || (*s == ')'))) return true; - else if (prev_digit && - *lengths == 1 && (*s == '%') && + else if (prev_digit && *lengths == 1 && (*s == '%') && (*(lengths + 1) == 1 && *(s + *lengths) == ')') && (*(s + *lengths + *(lengths + 1)) == '\0')) return true; diff --git a/src/ccmain/output.h b/src/ccmain/output.h index 7f297c932b..d3c3f3d7f9 100644 --- a/src/ccmain/output.h +++ b/src/ccmain/output.h @@ -17,17 +17,17 @@ * **********************************************************************/ -#ifndef OUTPUT_H -#define OUTPUT_H +#ifndef OUTPUT_H +#define OUTPUT_H -#include "params.h" +#include "params.h" //#include "epapconv.h" -#include "pageres.h" +#include "pageres.h" /** test line ends */ -char determine_newline_type(WERD *word, ///< word to do - BLOCK *block, ///< current block - WERD *next_word, ///< next word - BLOCK *next_block ///< block of next word - ); +char determine_newline_type(WERD* word, ///< word to do + BLOCK* block, ///< current block + WERD* next_word, ///< next word + BLOCK* next_block ///< block of next word +); #endif diff --git a/src/ccmain/pageiterator.cpp b/src/ccmain/pageiterator.cpp index 6b346a2ee9..66ea174f58 100644 --- a/src/ccmain/pageiterator.cpp +++ b/src/ccmain/pageiterator.cpp @@ -97,7 +97,7 @@ const PageIterator& PageIterator::operator=(const PageIterator& src) { bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const { return (it_ == nullptr && it_ == other) || - ((other != nullptr) && (it_ != nullptr) && (*it_ == *other)); + ((other != nullptr) && (it_ != nullptr) && (*it_ == *other)); } // ============= Moving around within the page ============. @@ -109,7 +109,7 @@ void PageIterator::Begin() { } void PageIterator::RestartParagraph() { - if (it_->block() == nullptr) return; // At end of the document. + if (it_->block() == nullptr) return; // At end of the document. PAGE_RES_IT para(page_res_); PAGE_RES_IT next_para(para); next_para.forward_paragraph(); @@ -147,8 +147,7 @@ void PageIterator::RestartRow() { */ bool PageIterator::Next(PageIteratorLevel level) { if (it_->block() == nullptr) return false; // Already at the end! - if (it_->word() == nullptr) - level = RIL_BLOCK; + if (it_->word() == nullptr) level = RIL_BLOCK; switch (level) { case RIL_BLOCK: @@ -159,14 +158,14 @@ bool PageIterator::Next(PageIteratorLevel level) { break; case RIL_TEXTLINE: for (it_->forward_with_empties(); it_->row() == it_->prev_row(); - it_->forward_with_empties()); + it_->forward_with_empties()) + ; break; case RIL_WORD: it_->forward_with_empties(); break; case RIL_SYMBOL: - if (cblob_it_ != nullptr) - cblob_it_->forward(); + if (cblob_it_ != nullptr) cblob_it_->forward(); ++blob_index_; if (blob_index_ >= word_length_) it_->forward_with_empties(); @@ -185,14 +184,14 @@ bool PageIterator::Next(PageIteratorLevel level) { */ bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const { if (it_->block() == nullptr) return false; // Already at the end! - if (it_->word() == nullptr) return true; // In an image block. + if (it_->word() == nullptr) return true; // In an image block. switch (level) { case RIL_BLOCK: return blob_index_ == 0 && it_->block() != it_->prev_block(); case RIL_PARA: return blob_index_ == 0 && - (it_->block() != it_->prev_block() || - it_->row()->row->para() != it_->prev_row()->row->para()); + (it_->block() != it_->prev_block() || + it_->row()->row->para() != it_->prev_row()->row->para()); case RIL_TEXTLINE: return blob_index_ == 0 && it_->row() != it_->prev_row(); case RIL_WORD: @@ -221,8 +220,7 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level, if (next.Empty(element)) return true; // Reached the end of the page. while (element > level) { element = static_cast(element - 1); - if (!next.IsAtBeginningOf(element)) - return false; + if (!next.IsAtBeginningOf(element)) return false; } return true; } @@ -233,14 +231,11 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level, * equal to other: 0 * after other: 1 */ -int PageIterator::Cmp(const PageIterator &other) const { +int PageIterator::Cmp(const PageIterator& other) const { int word_cmp = it_->cmp(*other.it_); - if (word_cmp != 0) - return word_cmp; - if (blob_index_ < other.blob_index_) - return -1; - if (blob_index_ == other.blob_index_) - return 0; + if (word_cmp != 0) return word_cmp; + if (blob_index_ < other.blob_index_) return -1; + if (blob_index_ == other.blob_index_) return 0; return 1; } @@ -263,13 +258,12 @@ int PageIterator::Cmp(const PageIterator &other) const { * See comment on coordinate system above. * Returns false if there is no such object at the current position. */ -bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, - int* left, int* top, - int* right, int* bottom) const { - if (Empty(level)) - return false; +bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int* left, + int* top, int* right, + int* bottom) const { + if (Empty(level)) return false; TBOX box; - PARA *para = nullptr; + PARA* para = nullptr; switch (level) { case RIL_BLOCK: box = it_->block()->block->restricted_bounding_box(include_upper_dots_, @@ -323,22 +317,20 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, * See comment on coordinate system above. * Returns false if there is no such object at the current position. */ -bool PageIterator::BoundingBox(PageIteratorLevel level, - int* left, int* top, +bool PageIterator::BoundingBox(PageIteratorLevel level, int* left, int* top, int* right, int* bottom) const { return BoundingBox(level, 0, left, top, right, bottom); } bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, - int* left, int* top, - int* right, int* bottom) const { - if (!BoundingBoxInternal(level, left, top, right, bottom)) - return false; + int* left, int* top, int* right, + int* bottom) const { + if (!BoundingBoxInternal(level, left, top, right, bottom)) return false; // Convert to the coordinate system of the original image. - *left = ClipToRange(*left / scale_ + rect_left_ - padding, - rect_left_, rect_left_ + rect_width_); - *top = ClipToRange(*top / scale_ + rect_top_ - padding, - rect_top_, rect_top_ + rect_height_); + *left = ClipToRange(*left / scale_ + rect_left_ - padding, rect_left_, + rect_left_ + rect_width_); + *top = ClipToRange(*top / scale_ + rect_top_ - padding, rect_top_, + rect_top_ + rect_height_); *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding, *left, rect_left_ + rect_width_); *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding, @@ -408,8 +400,7 @@ Pta* PageIterator::BlockPolygon() const { */ Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const { int left, top, right, bottom; - if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) - return nullptr; + if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) return nullptr; if (level == RIL_SYMBOL && cblob_it_ != nullptr && cblob_it_->data()->area() != 0) return cblob_it_->data()->render(); @@ -423,9 +414,9 @@ Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const { int mask_x = left - mask_box.left(); int mask_y = top - (tesseract_->ImageHeight() - mask_box.top()); // AND the mask and pix, putting the result in pix. - pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix), - pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x), - std::max(0, mask_y)); + pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), + pixGetWidth(pix), pixGetHeight(pix), PIX_SRC & PIX_DST, mask, + std::max(0, mask_x), std::max(0, mask_y)); pixDestroy(&mask); } return pix; @@ -443,13 +434,10 @@ Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const { * Use pixDestroy to delete the image after use. */ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding, - Pix* original_img, - int* left, int* top) const { + Pix* original_img, int* left, int* top) const { int right, bottom; - if (!BoundingBox(level, left, top, &right, &bottom)) - return nullptr; - if (original_img == nullptr) - return GetBinaryImage(level); + if (!BoundingBox(level, left, top, &right, &bottom)) return nullptr; + if (original_img == nullptr) return GetBinaryImage(level); // Expand the box. *left = std::max(*left - padding, 0); @@ -469,8 +457,9 @@ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding, int width = pixGetWidth(grey_pix); int height = pixGetHeight(grey_pix); Pix* resized_mask = pixCreate(width, height, 1); - pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height, - PIX_SRC, mask, std::max(0, mask_x), std::max(0, mask_y)); + pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, + height, PIX_SRC, mask, std::max(0, mask_x), + std::max(0, mask_y)); pixDestroy(&mask); pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, 2 * padding + 1); @@ -486,14 +475,13 @@ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding, * The baseline is the line that passes through (x1, y1) and (x2, y2). * WARNING: with vertical text, baselines may be vertical! */ -bool PageIterator::Baseline(PageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) const { +bool PageIterator::Baseline(PageIteratorLevel level, int* x1, int* y1, int* x2, + int* y2) const { if (it_->word() == nullptr) return false; // Already at the end! ROW* row = it_->row()->row; WERD* word = it_->word()->word; - TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) - ? word->bounding_box() - : row->bounding_box(); + TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) ? word->bounding_box() + : row->bounding_box(); int left = box.left(); ICOORD startpt(left, static_cast(row->base_line(left) + 0.5)); int right = box.right(); @@ -508,10 +496,10 @@ bool PageIterator::Baseline(PageIteratorLevel level, return true; } -void PageIterator::Orientation(tesseract::Orientation *orientation, - tesseract::WritingDirection *writing_direction, - tesseract::TextlineOrder *textline_order, - float *deskew_angle) const { +void PageIterator::Orientation(tesseract::Orientation* orientation, + tesseract::WritingDirection* writing_direction, + tesseract::TextlineOrder* textline_order, + float* deskew_angle) const { BLOCK* block = it_->block()->block; // Orientation @@ -534,40 +522,35 @@ void PageIterator::Orientation(tesseract::Orientation *orientation, // Writing direction bool is_vertical_text = (block->classify_rotation().x() == 0.0); bool right_to_left = block->right_to_left(); - *writing_direction = - is_vertical_text - ? WRITING_DIRECTION_TOP_TO_BOTTOM - : (right_to_left - ? WRITING_DIRECTION_RIGHT_TO_LEFT - : WRITING_DIRECTION_LEFT_TO_RIGHT); + *writing_direction = is_vertical_text + ? WRITING_DIRECTION_TOP_TO_BOTTOM + : (right_to_left ? WRITING_DIRECTION_RIGHT_TO_LEFT + : WRITING_DIRECTION_LEFT_TO_RIGHT); // Textline Order const bool is_mongolian = false; // TODO(eger): fix me *textline_order = is_vertical_text - ? (is_mongolian - ? TEXTLINE_ORDER_LEFT_TO_RIGHT - : TEXTLINE_ORDER_RIGHT_TO_LEFT) - : TEXTLINE_ORDER_TOP_TO_BOTTOM; + ? (is_mongolian ? TEXTLINE_ORDER_LEFT_TO_RIGHT + : TEXTLINE_ORDER_RIGHT_TO_LEFT) + : TEXTLINE_ORDER_TOP_TO_BOTTOM; // Deskew angle FCOORD skew = block->skew(); // true horizontal for textlines *deskew_angle = -skew.angle(); } -void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, - bool *is_list_item, - bool *is_crown, - int *first_line_indent) const { +void PageIterator::ParagraphInfo(tesseract::ParagraphJustification* just, + bool* is_list_item, bool* is_crown, + int* first_line_indent) const { *just = tesseract::JUSTIFICATION_UNKNOWN; if (!it_->row() || !it_->row()->row || !it_->row()->row->para() || !it_->row()->row->para()->model) return; - PARA *para = it_->row()->row->para(); + PARA* para = it_->row()->row->para(); *is_list_item = para->is_list_item; *is_crown = para->is_very_first_or_continuation; - *first_line_indent = para->model->first_indent() - - para->model->body_indent(); + *first_line_indent = para->model->first_indent() - para->model->body_indent(); *just = para->model->justification(); } @@ -610,12 +593,11 @@ void PageIterator::BeginWord(int offset) { cblob_it_->set_to_list(word_->cblob_list()); } for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) { - if (cblob_it_ != nullptr) - cblob_it_->forward(); + if (cblob_it_ != nullptr) cblob_it_->forward(); } } -bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) { +bool PageIterator::SetWordBlamerBundle(BlamerBundle* blamer_bundle) { if (it_->word() != nullptr) { it_->word()->blamer_bundle = blamer_bundle; return true; diff --git a/src/ccmain/pageiterator.h b/src/ccmain/pageiterator.h index 6929080852..b7cada6c6d 100644 --- a/src/ccmain/pageiterator.h +++ b/src/ccmain/pageiterator.h @@ -21,8 +21,8 @@ #ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_ #define TESSERACT_CCMAIN_PAGEITERATOR_H_ -#include "publictypes.h" #include "platform.h" +#include "publictypes.h" struct BlamerBundle; class C_BLOB_IT; @@ -65,10 +65,9 @@ class TESS_API PageIterator { * that tesseract has been given by the Thresholder. * After the constructor, Begin has already been called. */ - PageIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, - int rect_width, int rect_height); + PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale, + int scaled_yres, int rect_left, int rect_top, int rect_width, + int rect_height); virtual ~PageIterator(); /** @@ -164,7 +163,7 @@ class TESS_API PageIterator { * equal to other: 0 * after other: 1 */ - int Cmp(const PageIterator &other) const; + int Cmp(const PageIterator& other) const; // ============= Accessing data ==============. // Coordinate system: @@ -203,17 +202,17 @@ class TESS_API PageIterator { * from a grey image. The padding argument to GetImage can be used to expand * the image to include more foreground pixels. See GetImage below. */ - bool BoundingBox(PageIteratorLevel level, - int* left, int* top, int* right, int* bottom) const; - bool BoundingBox(PageIteratorLevel level, const int padding, - int* left, int* top, int* right, int* bottom) const; + bool BoundingBox(PageIteratorLevel level, int* left, int* top, int* right, + int* bottom) const; + bool BoundingBox(PageIteratorLevel level, const int padding, int* left, + int* top, int* right, int* bottom) const; /** * Returns the bounding rectangle of the object in a coordinate system of the * working image rectangle having its origin at (rect_left_, rect_top_) with * respect to the original image and is scaled by a factor scale_. */ - bool BoundingBoxInternal(PageIteratorLevel level, - int* left, int* top, int* right, int* bottom) const; + bool BoundingBoxInternal(PageIteratorLevel level, int* left, int* top, + int* right, int* bottom) const; /** Returns whether there is no object of a given level. */ bool Empty(PageIteratorLevel level) const; @@ -261,8 +260,8 @@ class TESS_API PageIterator { * WARNING: with vertical text, baselines may be vertical! * Returns false if there is no baseline at the current position. */ - bool Baseline(PageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) const; + bool Baseline(PageIteratorLevel level, int* x1, int* y1, int* x2, + int* y2) const; /** * Returns orientation for the block the iterator points to. @@ -272,10 +271,10 @@ class TESS_API PageIterator { * block anti-clockwise for it to be level? * -Pi/4 <= deskew_angle <= Pi/4 */ - void Orientation(tesseract::Orientation *orientation, - tesseract::WritingDirection *writing_direction, - tesseract::TextlineOrder *textline_order, - float *deskew_angle) const; + void Orientation(tesseract::Orientation* orientation, + tesseract::WritingDirection* writing_direction, + tesseract::TextlineOrder* textline_order, + float* deskew_angle) const; /** * Returns information about the current paragraph, if available. @@ -305,16 +304,15 @@ class TESS_API PageIterator { * first_line_indent for subsequent paragraphs in this block * of text. */ - void ParagraphInfo(tesseract::ParagraphJustification *justification, - bool *is_list_item, - bool *is_crown, - int *first_line_indent) const; + void ParagraphInfo(tesseract::ParagraphJustification* justification, + bool* is_list_item, bool* is_crown, + int* first_line_indent) const; // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle // of the current word to the given pointer (takes ownership of the pointer) // and returns true. // Can only be used when iterating on the word level. - bool SetWordBlamerBundle(BlamerBundle *blamer_bundle); + bool SetWordBlamerBundle(BlamerBundle* blamer_bundle); protected: /** diff --git a/src/ccmain/pagesegmain.cpp b/src/ccmain/pagesegmain.cpp index 0d62c6c37d..cc344a00af 100644 --- a/src/ccmain/pagesegmain.cpp +++ b/src/ccmain/pagesegmain.cpp @@ -103,15 +103,14 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, int width = pixGetWidth(pix_binary_); int height = pixGetHeight(pix_binary_); // Get page segmentation mode. - PageSegMode pageseg_mode = static_cast( - static_cast(tessedit_pageseg_mode)); + PageSegMode pageseg_mode = + static_cast(static_cast(tessedit_pageseg_mode)); // If a UNLV zone file can be found, use that instead of segmentation. - if (!PSM_COL_FIND_ENABLED(pageseg_mode) && - input_file != nullptr && input_file->length() > 0) { + if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && + input_file->length() > 0) { STRING name = *input_file; const char* lastdot = strrchr(name.string(), '.'); - if (lastdot != nullptr) - name[lastdot - name.string()] = '\0'; + if (lastdot != nullptr) name[lastdot - name.string()] = '\0'; read_unlv_file(name, width, height, blocks); } if (blocks->empty()) { @@ -139,8 +138,7 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, auto_page_seg_ret_val = AutoPageSeg( pageseg_mode, blocks, &to_blocks, enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr); - if (pageseg_mode == PSM_OSD_ONLY) - return auto_page_seg_ret_val; + if (pageseg_mode == PSM_OSD_ONLY) return auto_page_seg_ret_val; // To create blobs from the image region bounds uncomment this line: // to_blocks.clear(); // Uncomment to go back to the old mode. } else { @@ -160,8 +158,7 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, } if (blocks->empty()) { - if (textord_debug_tabfind) - tprintf("Empty page\n"); + if (textord_debug_tabfind) tprintf("Empty page\n"); return 0; // AutoPageSeg found an empty page. } bool splitting = @@ -227,8 +224,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, to_block, photomask_pix, pix_thresholds_, pix_grey_, &pixa_debug_, &found_blocks, diacritic_blobs, to_blocks); - if (result >= 0) - finder->GetDeskewVectors(&deskew_, &reskew_); + if (result >= 0) finder->GetDeskewVectors(&deskew_, &reskew_); delete finder; } pixDestroy(&photomask_pix); @@ -259,10 +255,10 @@ static void AddAllScriptsConverted(const UNICHARSET& sid_set, * Sets up auto page segmentation, determines the orientation, and corrects it. * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to * facilitate testing. - * photo_mask_pix is a pointer to a nullptr pointer that will be filled on return - * with the leptonica photo mask, which must be pixDestroyed by the caller. - * to_blocks is an empty list that will be filled with (usually a single) - * block that is used during layout analysis. This ugly API is required + * photo_mask_pix is a pointer to a nullptr pointer that will be filled on + * return with the leptonica photo mask, which must be pixDestroyed by the + * caller. to_blocks is an empty list that will be filled with (usually a + * single) block that is used during layout analysis. This ugly API is required * because of the possibility of a unlv zone file. * TODO(rays) clean this up. * See AutoPageSeg for other arguments. @@ -283,10 +279,9 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( pixa_debug_.AddPix(pix_binary_, "PageSegInput"); } // Leptonica is used to find the rule/separator lines in the input. - LineFinder::FindAndRemoveLines(source_resolution_, - textord_tabfind_show_vlines, pix_binary_, - &vertical_x, &vertical_y, music_mask_pix, - &v_lines, &h_lines); + LineFinder::FindAndRemoveLines( + source_resolution_, textord_tabfind_show_vlines, pix_binary_, &vertical_x, + &vertical_y, music_mask_pix, &v_lines, &h_lines); if (tessedit_dump_pageseg_images) { pixa_debug_.AddPix(pix_binary_, "NoLines"); } @@ -340,11 +335,11 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; if (!vertical_text && textord_tabfind_vertical_text && PSM_ORIENTATION_ENABLED(pageseg_mode)) { - vertical_text = - finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio, - to_block, &osd_blobs); + vertical_text = finder->IsVerticallyAlignedText( + textord_tabfind_vertical_text_ratio, to_block, &osd_blobs); } - if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) { + if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && + osr != nullptr) { GenericVector osd_scripts; if (osd_tess != this) { // We are running osd as part of layout analysis, so constrain the @@ -373,11 +368,11 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( const char* best_script_str = osd_tess->unicharset.get_script_from_script_id(best_script_id); bool cjk = best_script_id == osd_tess->unicharset.han_sid() || - best_script_id == osd_tess->unicharset.hiragana_sid() || - best_script_id == osd_tess->unicharset.katakana_sid() || - strcmp("Japanese", best_script_str) == 0 || - strcmp("Korean", best_script_str) == 0 || - strcmp("Hangul", best_script_str) == 0; + best_script_id == osd_tess->unicharset.hiragana_sid() || + best_script_id == osd_tess->unicharset.katakana_sid() || + strcmp("Japanese", best_script_str) == 0 || + strcmp("Korean", best_script_str) == 0 || + strcmp("Hangul", best_script_str) == 0; if (cjk) { finder->set_cjk_script(true); } @@ -385,8 +380,10 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( // The margin is weak. if (!cjk && !vertical_text && osd_orientation == 2) { // upside down latin text is improbable with such a weak margin. - tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: " - "Don't rotate.\n", osd_margin); + tprintf( + "OSD: Weak margin (%.2f), horiz textlines, not CJK: " + "Don't rotate.\n", + osd_margin); osd_orientation = 0; } else { tprintf( diff --git a/src/ccmain/pagewalk.cpp b/src/ccmain/pagewalk.cpp index a02fe5f41c..2ba1f029fc 100644 --- a/src/ccmain/pagewalk.cpp +++ b/src/ccmain/pagewalk.cpp @@ -28,15 +28,14 @@ namespace tesseract { * to each word that overlaps the selection_box. */ void Tesseract::process_selected_words( - PAGE_RES* page_res, // blocks to check - TBOX& selection_box, - bool (tesseract::Tesseract::* word_processor)(PAGE_RES_IT* pr_it)) { + PAGE_RES* page_res, // blocks to check + TBOX& selection_box, + bool (tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it)) { for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != nullptr; page_res_it.forward()) { WERD* word = page_res_it.word()->word; if (word->bounding_box().overlap(selection_box)) { - if (!(this->*word_processor)(&page_res_it)) - return; + if (!(this->*word_processor)(&page_res_it)) return; } } } diff --git a/src/ccmain/par_control.cpp b/src/ccmain/par_control.cpp index 06ab70cf41..fed08bf2cd 100644 --- a/src/ccmain/par_control.cpp +++ b/src/ccmain/par_control.cpp @@ -27,9 +27,9 @@ namespace tesseract { struct BlobData { BlobData() : blob(nullptr), choices(nullptr) {} BlobData(int index, Tesseract* tess, const WERD_RES& word) - : blob(word.chopped_word->blobs[index]), - tesseract(tess), - choices(&(*word.ratings)(index, index)) {} + : blob(word.chopped_word->blobs[index]), + tesseract(tess), + choices(&(*word.ratings)(index, index)) {} TBLOB* blob; Tesseract* tesseract; @@ -57,14 +57,14 @@ void Tesseract::PrerecAllWordsPar(const GenericVector& words) { #pragma omp parallel for num_threads(10) #endif // _OPENMP for (int b = 0; b < blobs.size(); ++b) { - *blobs[b].choices = - blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, nullptr); + *blobs[b].choices = blobs[b].tesseract->classify_blob( + blobs[b].blob, "par", White, nullptr); } } else { // TODO(AMD) parallelize this. for (int b = 0; b < blobs.size(); ++b) { - *blobs[b].choices = - blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, nullptr); + *blobs[b].choices = blobs[b].tesseract->classify_blob( + blobs[b].blob, "par", White, nullptr); } } } diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp index 136c752418..b7f888e97f 100644 --- a/src/ccmain/paragraphs.cpp +++ b/src/ccmain/paragraphs.cpp @@ -17,8 +17,8 @@ * **********************************************************************/ -#include #include +#include #include // std::unique_ptr #include "genericvector.h" @@ -40,22 +40,20 @@ namespace tesseract { // Special "weak" ParagraphModels. -const ParagraphModel *kCrownLeft - = reinterpret_cast(0xDEAD111F); -const ParagraphModel *kCrownRight - = reinterpret_cast(0xDEAD888F); +const ParagraphModel* kCrownLeft = + reinterpret_cast(0xDEAD111F); +const ParagraphModel* kCrownRight = + reinterpret_cast(0xDEAD888F); // Given the width of a typical space between words, what is the threshold // by which by which we think left and right alignments for paragraphs // can vary and still be aligned. -static int Epsilon(int space_pix) { - return space_pix * 4 / 5; -} +static int Epsilon(int space_pix) { return space_pix * 4 / 5; } -static bool AcceptableRowArgs( - int debug_level, int min_num_rows, const char *function_name, - const GenericVector *rows, - int row_start, int row_end) { +static bool AcceptableRowArgs(int debug_level, int min_num_rows, + const char* function_name, + const GenericVector* rows, + int row_start, int row_end) { if (row_start < 0 || row_end > rows->size() || row_start > row_end) { tprintf("Invalid arguments rows[%d, %d) while rows is of size %d.\n", row_start, row_end, rows->size()); @@ -63,8 +61,8 @@ static bool AcceptableRowArgs( } if (row_end - row_start < min_num_rows) { if (debug_level > 1) { - tprintf("# Too few rows[%d, %d) for %s.\n", - row_start, row_end, function_name); + tprintf("# Too few rows[%d, %d) for %s.\n", row_start, row_end, + function_name); } return false; } @@ -82,8 +80,8 @@ static STRING StrOf(int num) { // Given a row-major matrix of unicode text and a column separator, print // a formatted table. For ASCII, we get good column alignment. -static void PrintTable(const GenericVector > &rows, - const STRING &colsep) { +static void PrintTable(const GenericVector>& rows, + const STRING& colsep) { GenericVector max_col_widths; for (int r = 0; r < rows.size(); r++) { int num_columns = rows[r].size(); @@ -95,38 +93,35 @@ static void PrintTable(const GenericVector > &rows, if (c >= max_col_widths.size()) { max_col_widths.push_back(num_unicodes); } else { - if (num_unicodes > max_col_widths[c]) - max_col_widths[c] = num_unicodes; + if (num_unicodes > max_col_widths[c]) max_col_widths[c] = num_unicodes; } } } GenericVector col_width_patterns; for (int c = 0; c < max_col_widths.size(); c++) { - col_width_patterns.push_back( - STRING("%-") + StrOf(max_col_widths[c]) + "s"); + col_width_patterns.push_back(STRING("%-") + StrOf(max_col_widths[c]) + "s"); } for (int r = 0; r < rows.size(); r++) { for (int c = 0; c < rows[r].size(); c++) { - if (c > 0) - tprintf("%s", colsep.string()); + if (c > 0) tprintf("%s", colsep.string()); tprintf(col_width_patterns[c].string(), rows[r][c].string()); } tprintf("\n"); } } -STRING RtlEmbed(const STRING &word, bool rtlify) { - if (rtlify) - return STRING(kRLE) + word + STRING(kPDF); +STRING +RtlEmbed(const STRING& word, bool rtlify) { + if (rtlify) return STRING(kRLE) + word + STRING(kPDF); return word; } // Print the current thoughts of the paragraph detector. -static void PrintDetectorState(const ParagraphTheory &theory, - const GenericVector &rows) { - GenericVector > output; +static void PrintDetectorState(const ParagraphTheory& theory, + const GenericVector& rows) { + GenericVector> output; output.push_back(GenericVector()); output.back().push_back("#row"); output.back().push_back("space"); @@ -138,23 +133,21 @@ static void PrintDetectorState(const ParagraphTheory &theory, for (int i = 0; i < rows.size(); i++) { output.push_back(GenericVector()); - GenericVector &row = output.back(); + GenericVector& row = output.back(); const RowInfo& ri = *rows[i].ri_; row.push_back(StrOf(i)); row.push_back(StrOf(ri.average_interword_space)); row.push_back(ri.has_leaders ? ".." : " "); - row.push_back(RtlEmbed(ri.lword_text, !ri.ltr) + - "[" + StrOf(ri.lword_box.width()) + + row.push_back(RtlEmbed(ri.lword_text, !ri.ltr) + "[" + + StrOf(ri.lword_box.width()) + (ri.lword_likely_starts_idea ? "S" : "s") + (ri.lword_likely_ends_idea ? "E" : "e") + - (ri.lword_indicates_list_item ? "L" : "l") + - "]"); - row.push_back(RtlEmbed(ri.rword_text, !ri.ltr) + - "[" + StrOf(ri.rword_box.width()) + + (ri.lword_indicates_list_item ? "L" : "l") + "]"); + row.push_back(RtlEmbed(ri.rword_text, !ri.ltr) + "[" + + StrOf(ri.rword_box.width()) + (ri.rword_likely_starts_idea ? "S" : "s") + (ri.rword_likely_ends_idea ? "E" : "e") + - (ri.rword_indicates_list_item ? "L" : "l") + - "]"); + (ri.rword_indicates_list_item ? "L" : "l") + "]"); rows[i].AppendDebugInfo(theory, &row); row.push_back(RtlEmbed(ri.text, !ri.ltr)); } @@ -166,19 +159,16 @@ static void PrintDetectorState(const ParagraphTheory &theory, } } -static void DebugDump( - bool should_print, - const STRING &phase, - const ParagraphTheory &theory, - const GenericVector &rows) { - if (!should_print) - return; +static void DebugDump(bool should_print, const STRING& phase, + const ParagraphTheory& theory, + const GenericVector& rows) { + if (!should_print) return; tprintf("# %s\n", phase.string()); PrintDetectorState(theory, rows); } // Print out the text for rows[row_start, row_end) -static void PrintRowRange(const GenericVector &rows, +static void PrintRowRange(const GenericVector& rows, int row_start, int row_end) { tprintf("======================================\n"); for (int row = row_start; row < row_end; row++) { @@ -197,26 +187,26 @@ bool IsDigitLike(int ch) { return ch == 'o' || ch == 'O' || ch == 'l' || ch == 'I'; } -bool IsOpeningPunct(int ch) { - return strchr("'\"({[", ch) != nullptr; -} +bool IsOpeningPunct(int ch) { return strchr("'\"({[", ch) != nullptr; } -bool IsTerminalPunct(int ch) { - return strchr(":'\".?!]})", ch) != nullptr; -} +bool IsTerminalPunct(int ch) { return strchr(":'\".?!]})", ch) != nullptr; } // Return a pointer after consuming as much text as qualifies as roman numeral. -const char *SkipChars(const char *str, const char *toskip) { - while (*str != '\0' && strchr(toskip, *str)) { str++; } +const char* SkipChars(const char* str, const char* toskip) { + while (*str != '\0' && strchr(toskip, *str)) { + str++; + } return str; } -const char *SkipChars(const char *str, bool (*skip)(int)) { - while (*str != '\0' && skip(*str)) { str++; } +const char* SkipChars(const char* str, bool (*skip)(int)) { + while (*str != '\0' && skip(*str)) { + str++; + } return str; } -const char *SkipOne(const char *str, const char *toskip) { +const char* SkipOne(const char* str, const char* toskip) { if (*str != '\0' && strchr(toskip, *str)) return str + 1; return str; } @@ -224,19 +214,19 @@ const char *SkipOne(const char *str, const char *toskip) { // Return whether it is very likely that this is a numeral marker that could // start a list item. Some examples include: // A I iii. VI (2) 3.5. [C-4] -bool LikelyListNumeral(const STRING &word) { - const char *kRomans = "ivxlmdIVXLMD"; - const char *kDigits = "012345789"; - const char *kOpen = "[{("; - const char *kSep = ":;-.,"; - const char *kClose = "]})"; +bool LikelyListNumeral(const STRING& word) { + const char* kRomans = "ivxlmdIVXLMD"; + const char* kDigits = "012345789"; + const char* kOpen = "[{("; + const char* kSep = ":;-.,"; + const char* kClose = "]})"; int num_segments = 0; - const char *pos = word.string(); + const char* pos = word.string(); while (*pos != '\0' && num_segments < 3) { // skip up to two open parens. - const char *numeral_start = SkipOne(SkipOne(pos, kOpen), kOpen); - const char *numeral_end = SkipChars(numeral_start, kRomans); + const char* numeral_start = SkipOne(SkipOne(pos, kOpen), kOpen); + const char* numeral_end = SkipChars(numeral_start, kRomans); if (numeral_end != numeral_start) { // Got Roman Numeral. Great. } else { @@ -244,35 +234,32 @@ bool LikelyListNumeral(const STRING &word) { if (numeral_end == numeral_start) { // If there's a single latin letter, we can use that. numeral_end = SkipChars(numeral_start, IsLatinLetter); - if (numeral_end - numeral_start != 1) - break; + if (numeral_end - numeral_start != 1) break; } } // We got some sort of numeral. num_segments++; // Skip any trailing parens or punctuation. pos = SkipChars(SkipChars(numeral_end, kClose), kSep); - if (pos == numeral_end) - break; + if (pos == numeral_end) break; } return *pos == '\0'; } -bool LikelyListMark(const STRING &word) { - const char *kListMarks = "0Oo*.,+."; +bool LikelyListMark(const STRING& word) { + const char* kListMarks = "0Oo*.,+."; return word.size() == 1 && strchr(kListMarks, word[0]) != nullptr; } -bool AsciiLikelyListItem(const STRING &word) { +bool AsciiLikelyListItem(const STRING& word) { return LikelyListMark(word) || LikelyListNumeral(word); } // ========== Brain Dead Language Model (Tesseract Version) ================ // Return the first Unicode Codepoint from werd[pos]. -int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos) { - if (!u || !werd || pos > werd->length()) - return 0; +int UnicodeFor(const UNICHARSET* u, const WERD_CHOICE* werd, int pos) { + if (!u || !werd || pos > werd->length()) return 0; return UNICHAR(u->id_to_unichar(werd->unichar_id(pos)), -1).first_uni(); } @@ -280,8 +267,10 @@ int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos) { // does not have given character type. class UnicodeSpanSkipper { public: - UnicodeSpanSkipper(const UNICHARSET *unicharset, const WERD_CHOICE *word) - : u_(unicharset), word_(word) { wordlen_ = word->length(); } + UnicodeSpanSkipper(const UNICHARSET* unicharset, const WERD_CHOICE* word) + : u_(unicharset), word_(word) { + wordlen_ = word->length(); + } // Given an input position, return the first position >= pos not punc. int SkipPunc(int pos); @@ -293,8 +282,8 @@ class UnicodeSpanSkipper { int SkipAlpha(int pos); private: - const UNICHARSET *u_; - const WERD_CHOICE *word_; + const UNICHARSET* u_; + const WERD_CHOICE* word_; int wordlen_; }; @@ -305,12 +294,13 @@ int UnicodeSpanSkipper::SkipPunc(int pos) { int UnicodeSpanSkipper::SkipDigits(int pos) { while (pos < wordlen_ && (u_->get_isdigit(word_->unichar_id(pos)) || - IsDigitLike(UnicodeFor(u_, word_, pos)))) pos++; + IsDigitLike(UnicodeFor(u_, word_, pos)))) + pos++; return pos; } int UnicodeSpanSkipper::SkipRomans(int pos) { - const char *kRomans = "ivxlmdIVXLMD"; + const char* kRomans = "ivxlmdIVXLMD"; while (pos < wordlen_) { int ch = UnicodeFor(u_, word_, pos); if (ch >= 0xF0 || strchr(kRomans, ch) == 0) break; @@ -353,7 +343,7 @@ bool LikelyListMarkUnicode(int ch) { // Return whether it is very likely that this is a numeral marker that could // start a list item. Some examples include: // A I iii. VI (2) 3.5. [C-4] -bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) { +bool UniLikelyListItem(const UNICHARSET* u, const WERD_CHOICE* werd) { if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0))) return true; @@ -369,16 +359,14 @@ bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) { if (numeral_end == numeral_start) { // If there's a single latin letter, we can use that. numeral_end = m.SkipAlpha(numeral_start); - if (numeral_end - numeral_start != 1) - break; + if (numeral_end - numeral_start != 1) break; } } // We got some sort of numeral. num_segments++; // Skip any trailing punctuation. pos = m.SkipPunc(numeral_end); - if (pos == numeral_end) - break; + if (pos == numeral_end) break; } return pos == werd->length(); } @@ -390,9 +378,9 @@ bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) { // is_list - this word might be a list number or bullet. // starts_idea - this word is likely to start a sentence. // ends_idea - this word is likely to end a sentence. -void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, - const STRING &utf8, - bool *is_list, bool *starts_idea, bool *ends_idea) { +void LeftWordAttributes(const UNICHARSET* unicharset, const WERD_CHOICE* werd, + const STRING& utf8, bool* is_list, bool* starts_idea, + bool* ends_idea) { *is_list = false; *starts_idea = false; *ends_idea = false; @@ -437,9 +425,9 @@ void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, // is_list - this word might be a list number or bullet. // starts_idea - this word is likely to start a sentence. // ends_idea - this word is likely to end a sentence. -void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, - const STRING &utf8, - bool *is_list, bool *starts_idea, bool *ends_idea) { +void RightWordAttributes(const UNICHARSET* unicharset, const WERD_CHOICE* werd, + const STRING& utf8, bool* is_list, bool* starts_idea, + bool* ends_idea) { *is_list = false; *starts_idea = false; *ends_idea = false; @@ -472,16 +460,16 @@ void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, // =============== Implementation of RowScratchRegisters ===================== /* static */ void RowScratchRegisters::AppendDebugHeaderFields( - GenericVector *header) { + GenericVector* header) { header->push_back("[lmarg,lind;rind,rmarg]"); header->push_back("model"); } -void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory, - GenericVector *dbg) const { +void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory& theory, + GenericVector* dbg) const { char s[30]; - snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]", - lmargin_, lindent_, rindent_, rmargin_); + snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]", lmargin_, lindent_, rindent_, + rmargin_); dbg->push_back(s); STRING model_string; model_string += static_cast(GetLineType()); @@ -489,10 +477,8 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory, int model_numbers = 0; for (int h = 0; h < hypotheses_.size(); h++) { - if (hypotheses_[h].model == nullptr) - continue; - if (model_numbers > 0) - model_string += ","; + if (hypotheses_[h].model == nullptr) continue; + if (model_numbers > 0) model_string += ","; if (StrongModel(hypotheses_[h].model)) { model_string += StrOf(1 + theory.IndexOf(hypotheses_[h].model)); } else if (hypotheses_[h].model == kCrownLeft) { @@ -502,13 +488,12 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory, } model_numbers++; } - if (model_numbers == 0) - model_string += "0"; + if (model_numbers == 0) model_string += "0"; dbg->push_back(model_string); } -void RowScratchRegisters::Init(const RowInfo &row) { +void RowScratchRegisters::Init(const RowInfo& row) { ri_ = &row; lmargin_ = 0; lindent_ = row.pix_ldistance; @@ -517,44 +502,47 @@ void RowScratchRegisters::Init(const RowInfo &row) { } LineType RowScratchRegisters::GetLineType() const { - if (hypotheses_.empty()) - return LT_UNKNOWN; + if (hypotheses_.empty()) return LT_UNKNOWN; bool has_start = false; bool has_body = false; for (int i = 0; i < hypotheses_.size(); i++) { switch (hypotheses_[i].ty) { - case LT_START: has_start = true; break; - case LT_BODY: has_body = true; break; + case LT_START: + has_start = true; + break; + case LT_BODY: + has_body = true; + break; default: tprintf("Encountered bad value in hypothesis list: %c\n", hypotheses_[i].ty); break; } } - if (has_start && has_body) - return LT_MULTIPLE; + if (has_start && has_body) return LT_MULTIPLE; return has_start ? LT_START : LT_BODY; } -LineType RowScratchRegisters::GetLineType(const ParagraphModel *model) const { - if (hypotheses_.empty()) - return LT_UNKNOWN; +LineType RowScratchRegisters::GetLineType(const ParagraphModel* model) const { + if (hypotheses_.empty()) return LT_UNKNOWN; bool has_start = false; bool has_body = false; for (int i = 0; i < hypotheses_.size(); i++) { - if (hypotheses_[i].model != model) - continue; + if (hypotheses_[i].model != model) continue; switch (hypotheses_[i].ty) { - case LT_START: has_start = true; break; - case LT_BODY: has_body = true; break; + case LT_START: + has_start = true; + break; + case LT_BODY: + has_body = true; + break; default: tprintf("Encountered bad value in hypothesis list: %c\n", hypotheses_[i].ty); break; } } - if (has_start && has_body) - return LT_MULTIPLE; + if (has_start && has_body) return LT_MULTIPLE; return has_start ? LT_START : LT_BODY; } @@ -578,58 +566,53 @@ void RowScratchRegisters::SetBodyLine() { } } -void RowScratchRegisters::AddStartLine(const ParagraphModel *model) { +void RowScratchRegisters::AddStartLine(const ParagraphModel* model) { hypotheses_.push_back_new(LineHypothesis(LT_START, model)); int old_idx = hypotheses_.get_index(LineHypothesis(LT_START, nullptr)); - if (old_idx >= 0) - hypotheses_.remove(old_idx); + if (old_idx >= 0) hypotheses_.remove(old_idx); } -void RowScratchRegisters::AddBodyLine(const ParagraphModel *model) { +void RowScratchRegisters::AddBodyLine(const ParagraphModel* model) { hypotheses_.push_back_new(LineHypothesis(LT_BODY, model)); int old_idx = hypotheses_.get_index(LineHypothesis(LT_BODY, nullptr)); - if (old_idx >= 0) - hypotheses_.remove(old_idx); + if (old_idx >= 0) hypotheses_.remove(old_idx); } -void RowScratchRegisters::StartHypotheses(SetOfModels *models) const { +void RowScratchRegisters::StartHypotheses(SetOfModels* models) const { for (int h = 0; h < hypotheses_.size(); h++) { if (hypotheses_[h].ty == LT_START && StrongModel(hypotheses_[h].model)) models->push_back_new(hypotheses_[h].model); } } -void RowScratchRegisters::StrongHypotheses(SetOfModels *models) const { +void RowScratchRegisters::StrongHypotheses(SetOfModels* models) const { for (int h = 0; h < hypotheses_.size(); h++) { if (StrongModel(hypotheses_[h].model)) models->push_back_new(hypotheses_[h].model); } } -void RowScratchRegisters::NonNullHypotheses(SetOfModels *models) const { +void RowScratchRegisters::NonNullHypotheses(SetOfModels* models) const { for (int h = 0; h < hypotheses_.size(); h++) { if (hypotheses_[h].model != nullptr) models->push_back_new(hypotheses_[h].model); } } -const ParagraphModel *RowScratchRegisters::UniqueStartHypothesis() const { - if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_START) - return nullptr; +const ParagraphModel* RowScratchRegisters::UniqueStartHypothesis() const { + if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_START) return nullptr; return hypotheses_[0].model; } -const ParagraphModel *RowScratchRegisters::UniqueBodyHypothesis() const { - if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_BODY) - return nullptr; +const ParagraphModel* RowScratchRegisters::UniqueBodyHypothesis() const { + if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_BODY) return nullptr; return hypotheses_[0].model; } // Discard any hypotheses whose model is not in the given list. void RowScratchRegisters::DiscardNonMatchingHypotheses( - const SetOfModels &models) { - if (models.empty()) - return; + const SetOfModels& models) { + if (models.empty()) return; for (int h = hypotheses_.size() - 1; h >= 0; h--) { if (!models.contains(hypotheses_[h].model)) { hypotheses_.remove(h); @@ -653,7 +636,7 @@ class SimpleClusterer { : max_cluster_width_(max_cluster_width) {} void Add(int value) { values_.push_back(value); } int size() const { return values_.size(); } - void GetClusters(GenericVector *clusters); + void GetClusters(GenericVector* clusters); private: int max_cluster_width_; @@ -661,17 +644,17 @@ class SimpleClusterer { }; // Return the index of the cluster closest to value. -int ClosestCluster(const GenericVector &clusters, int value) { +int ClosestCluster(const GenericVector& clusters, int value) { int best_index = 0; for (int i = 0; i < clusters.size(); i++) { if (abs(value - clusters[i].center) < abs(value - clusters[best_index].center)) - best_index = i; + best_index = i; } return best_index; } -void SimpleClusterer::GetClusters(GenericVector *clusters) { +void SimpleClusterer::GetClusters(GenericVector* clusters) { clusters->clear(); values_.sort(); for (int i = 0; i < values_.size();) { @@ -687,13 +670,11 @@ void SimpleClusterer::GetClusters(GenericVector *clusters) { // Calculate left- and right-indent tab stop values seen in // rows[row_start, row_end) given a tolerance of tolerance. -void CalculateTabStops(GenericVector *rows, - int row_start, int row_end, - int tolerance, - GenericVector *left_tabs, - GenericVector *right_tabs) { - if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end)) - return; +void CalculateTabStops(GenericVector* rows, int row_start, + int row_end, int tolerance, + GenericVector* left_tabs, + GenericVector* right_tabs) { + if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end)) return; // First pass: toss all left and right indents into clusterers. SimpleClusterer initial_lefts(tolerance); SimpleClusterer initial_rights(tolerance); @@ -803,13 +784,10 @@ void CalculateTabStops(GenericVector *rows, // Case 2b: Fully Justified. (eop_threshold > 0) // We mark a line as short (end of paragraph) if the offside indent // is greater than eop_threshold. -void MarkRowsWithModel(GenericVector *rows, - int row_start, int row_end, - const ParagraphModel *model, - bool ltr, +void MarkRowsWithModel(GenericVector* rows, int row_start, + int row_end, const ParagraphModel* model, bool ltr, int eop_threshold) { - if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) - return; + if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) return; for (int row = row_start; row < row_end; row++) { bool valid_first = ValidFirstLine(rows, row, model); bool valid_body = ValidBodyLine(rows, row, model); @@ -827,8 +805,8 @@ void MarkRowsWithModel(GenericVector *rows, after_eop = (*rows)[row - 1].lindent_ > eop_threshold; } } else { - after_eop = FirstWordWouldHaveFit((*rows)[row - 1], (*rows)[row], - model->justification()); + after_eop = FirstWordWouldHaveFit((*rows)[row - 1], (*rows)[row], + model->justification()); } } if (after_eop) { @@ -851,18 +829,20 @@ void MarkRowsWithModel(GenericVector *rows, // Further, this struct holds the data we amass for the (single) ParagraphModel // we'll assign to the text lines (assuming we get that far). struct GeometricClassifierState { - GeometricClassifierState(int dbg_level, - GenericVector *r, + GeometricClassifierState(int dbg_level, GenericVector* r, int r_start, int r_end) - : debug_level(dbg_level), rows(r), row_start(r_start), row_end(r_end), + : debug_level(dbg_level), + rows(r), + row_start(r_start), + row_end(r_end), margin(0) { tolerance = InterwordSpace(*r, r_start, r_end); - CalculateTabStops(r, r_start, r_end, tolerance, - &left_tabs, &right_tabs); + CalculateTabStops(r, r_start, r_end, tolerance, &left_tabs, &right_tabs); if (debug_level >= 3) { - tprintf("Geometry: TabStop cluster tolerance = %d; " - "%d left tabs; %d right tabs\n", - tolerance, left_tabs.size(), right_tabs.size()); + tprintf( + "Geometry: TabStop cluster tolerance = %d; " + "%d left tabs; %d right tabs\n", + tolerance, left_tabs.size(), right_tabs.size()); } ltr = (*r)[r_start].ri_->ltr; } @@ -878,7 +858,7 @@ struct GeometricClassifierState { } // Align tabs are the tab stops the text is aligned to. - const GenericVector &AlignTabs() const { + const GenericVector& AlignTabs() const { if (just == tesseract::JUSTIFICATION_RIGHT) return right_tabs; return left_tabs; } @@ -888,7 +868,7 @@ struct GeometricClassifierState { // Note that for a left-to-right text which is aligned to the right such as // this function comment, the offside tabs are the horizontal tab stops // marking the beginning of ("Note", "this" and "marking"). - const GenericVector &OffsideTabs() const { + const GenericVector& OffsideTabs() const { if (just == tesseract::JUSTIFICATION_RIGHT) return left_tabs; return right_tabs; } @@ -897,7 +877,7 @@ struct GeometricClassifierState { // to the right most right tab stop. bool IsFullRow(int i) const { return ClosestCluster(left_tabs, (*rows)[i].lindent_) == 0 && - ClosestCluster(right_tabs, (*rows)[i].rindent_) == 0; + ClosestCluster(right_tabs, (*rows)[i].rindent_) == 0; } int AlignsideTabIndex(int row_idx) const { @@ -907,13 +887,13 @@ struct GeometricClassifierState { // Given what we know about the paragraph justification (just), would the // first word of row_b have fit at the end of row_a? bool FirstWordWouldHaveFit(int row_a, int row_b) { - return ::tesseract::FirstWordWouldHaveFit( - (*rows)[row_a], (*rows)[row_b], just); + return ::tesseract::FirstWordWouldHaveFit((*rows)[row_a], (*rows)[row_b], + just); } void PrintRows() const { PrintRowRange(*rows, row_start, row_end); } - void Fail(int min_debug_level, const char *why) const { + void Fail(int min_debug_level, const char* why) const { if (debug_level < min_debug_level) return; tprintf("# %s\n", why); PrintRows(); @@ -928,7 +908,7 @@ struct GeometricClassifierState { // The Geometric Classifier was asked to find a single paragraph model // to fit the text rows (*rows)[row_start, row_end) - GenericVector *rows; + GenericVector* rows; int row_start; int row_end; @@ -981,10 +961,9 @@ struct GeometricClassifierState { // [script direction: first indent, body indent] // (A1) LtR: 2,0 RtL: 0,0 (B1) LtR: 0,0 RtL: 2,0 // (A2) LtR: 2,0 RtL: CrR (B2) LtR: CrL RtL: 2,0 -void GeometricClassifyThreeTabStopTextBlock( - int debug_level, - GeometricClassifierState &s, - ParagraphTheory *theory) { +void GeometricClassifyThreeTabStopTextBlock(int debug_level, + GeometricClassifierState& s, + ParagraphTheory* theory) { int num_rows = s.row_end - s.row_start; int num_full_rows = 0; int last_row_full = 0; @@ -1010,19 +989,20 @@ void GeometricClassifyThreeTabStopTextBlock( } if (debug_level > 0) { - tprintf("# Not enough variety for clear outline classification. " - "Guessing these are %s aligned based on script.\n", - s.ltr ? "left" : "right"); + tprintf( + "# Not enough variety for clear outline classification. " + "Guessing these are %s aligned based on script.\n", + s.ltr ? "left" : "right"); s.PrintRows(); } if (s.AlignTabs().size() == 2) { // case A1 or A2 s.first_indent = s.AlignTabs()[1].center; s.body_indent = s.AlignTabs()[0].center; - } else { // case B1 or B2 + } else { // case B1 or B2 if (num_rows - 1 == num_full_rows - last_row_full) { // case B2 - const ParagraphModel *model = s.ltr ? kCrownLeft : kCrownRight; + const ParagraphModel* model = s.ltr ? kCrownLeft : kCrownRight; (*s.rows)[s.row_start].AddStartLine(model); for (int i = s.row_start + 1; i < s.row_end; i++) { (*s.rows)[i].AddBodyLine(model); @@ -1031,13 +1011,13 @@ void GeometricClassifyThreeTabStopTextBlock( } else { // case B1 s.first_indent = s.body_indent = s.AlignTabs()[0].center; - s.eop_threshold = (s.OffsideTabs()[0].center + - s.OffsideTabs()[1].center) / 2; + s.eop_threshold = + (s.OffsideTabs()[0].center + s.OffsideTabs()[1].center) / 2; } } - const ParagraphModel *model = theory->AddModel(s.Model()); - MarkRowsWithModel(s.rows, s.row_start, s.row_end, model, - s.ltr, s.eop_threshold); + const ParagraphModel* model = theory->AddModel(s.Model()); + MarkRowsWithModel(s.rows, s.row_start, s.row_end, model, s.ltr, + s.eop_threshold); return; } @@ -1074,15 +1054,14 @@ void GeometricClassifyThreeTabStopTextBlock( // it's worth guessing that (A1b) is the correct interpretation if there are // far more "full" lines than "short" lines. void GeometricClassify(int debug_level, - GenericVector *rows, - int row_start, int row_end, - ParagraphTheory *theory) { + GenericVector* rows, int row_start, + int row_end, ParagraphTheory* theory) { if (!AcceptableRowArgs(debug_level, 4, __func__, rows, row_start, row_end)) return; if (debug_level > 1) { tprintf("###############################################\n"); - tprintf("##### GeometricClassify( rows[%d:%d) ) ####\n", - row_start, row_end); + tprintf("##### GeometricClassify( rows[%d:%d) ) ####\n", row_start, + row_end); tprintf("###############################################\n"); } RecomputeMarginsAndClearHypotheses(rows, row_start, row_end, 10); @@ -1174,13 +1153,12 @@ void GeometricClassify(int debug_level, } // At this point, we have our model. - const ParagraphModel *model = theory->AddModel(s.Model()); + const ParagraphModel* model = theory->AddModel(s.Model()); // Now all we have to do is figure out if the text is fully justified or not. // eop_threshold: default to fully justified unless we see evidence below. // See description on MarkRowsWithModel() - s.eop_threshold = - (s.OffsideTabs()[0].center + s.OffsideTabs()[1].center) / 2; + s.eop_threshold = (s.OffsideTabs()[0].center + s.OffsideTabs()[1].center) / 2; // If the text is not fully justified, re-set the eop_threshold to 0. if (s.AlignTabs().size() == 2) { // Paragraphs with a paragraph-start indent. @@ -1210,20 +1188,19 @@ void GeometricClassify(int debug_level, // =============== Implementation of ParagraphTheory ===================== -const ParagraphModel *ParagraphTheory::AddModel(const ParagraphModel &model) { +const ParagraphModel* ParagraphTheory::AddModel(const ParagraphModel& model) { for (int i = 0; i < models_->size(); i++) { - if ((*models_)[i]->Comparable(model)) - return (*models_)[i]; + if ((*models_)[i]->Comparable(model)) return (*models_)[i]; } - ParagraphModel *m = new ParagraphModel(model); + ParagraphModel* m = new ParagraphModel(model); models_->push_back(m); models_we_added_.push_back_new(m); return m; } -void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) { +void ParagraphTheory::DiscardUnusedModels(const SetOfModels& used_models) { for (int i = models_->size() - 1; i >= 0; i--) { - ParagraphModel *m = (*models_)[i]; + ParagraphModel* m = (*models_)[i]; if (!used_models.contains(m) && models_we_added_.contains(m)) { models_->remove(i); models_we_added_.remove(models_we_added_.get_index(m)); @@ -1235,10 +1212,10 @@ void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) { // Examine rows[start, end) and try to determine if an existing non-centered // paragraph model would fit them perfectly. If so, return a pointer to it. // If not, return nullptr. -const ParagraphModel *ParagraphTheory::Fits( - const GenericVector *rows, int start, int end) const { +const ParagraphModel* ParagraphTheory::Fits( + const GenericVector* rows, int start, int end) const { for (int m = 0; m < models_->size(); m++) { - const ParagraphModel *model = (*models_)[m]; + const ParagraphModel* model = (*models_)[m]; if (model->justification() != JUSTIFICATION_CENTER && RowsFitModel(rows, start, end, model)) return model; @@ -1246,52 +1223,49 @@ const ParagraphModel *ParagraphTheory::Fits( return nullptr; } -void ParagraphTheory::NonCenteredModels(SetOfModels *models) { +void ParagraphTheory::NonCenteredModels(SetOfModels* models) { for (int m = 0; m < models_->size(); m++) { - const ParagraphModel *model = (*models_)[m]; + const ParagraphModel* model = (*models_)[m]; if (model->justification() != JUSTIFICATION_CENTER) models->push_back_new(model); } } -int ParagraphTheory::IndexOf(const ParagraphModel *model) const { +int ParagraphTheory::IndexOf(const ParagraphModel* model) const { for (int i = 0; i < models_->size(); i++) { - if ((*models_)[i] == model) - return i; + if ((*models_)[i] == model) return i; } return -1; } -bool ValidFirstLine(const GenericVector *rows, - int row, const ParagraphModel *model) { +bool ValidFirstLine(const GenericVector* rows, int row, + const ParagraphModel* model) { if (!StrongModel(model)) { tprintf("ValidFirstLine() should only be called with strong models!\n"); } return StrongModel(model) && - model->ValidFirstLine( - (*rows)[row].lmargin_, (*rows)[row].lindent_, - (*rows)[row].rindent_, (*rows)[row].rmargin_); + model->ValidFirstLine((*rows)[row].lmargin_, (*rows)[row].lindent_, + (*rows)[row].rindent_, (*rows)[row].rmargin_); } -bool ValidBodyLine(const GenericVector *rows, - int row, const ParagraphModel *model) { +bool ValidBodyLine(const GenericVector* rows, int row, + const ParagraphModel* model) { if (!StrongModel(model)) { tprintf("ValidBodyLine() should only be called with strong models!\n"); } return StrongModel(model) && - model->ValidBodyLine( - (*rows)[row].lmargin_, (*rows)[row].lindent_, - (*rows)[row].rindent_, (*rows)[row].rmargin_); + model->ValidBodyLine((*rows)[row].lmargin_, (*rows)[row].lindent_, + (*rows)[row].rindent_, (*rows)[row].rmargin_); } -bool CrownCompatible(const GenericVector *rows, - int a, int b, const ParagraphModel *model) { +bool CrownCompatible(const GenericVector* rows, int a, + int b, const ParagraphModel* model) { if (model != kCrownRight && model != kCrownLeft) { tprintf("CrownCompatible() should only be called with crown models!\n"); return false; } - RowScratchRegisters &row_a = (*rows)[a]; - RowScratchRegisters &row_b = (*rows)[b]; + RowScratchRegisters& row_a = (*rows)[a]; + RowScratchRegisters& row_b = (*rows)[b]; if (model == kCrownRight) { return NearlyEqual(row_a.rindent_ + row_a.rmargin_, row_b.rindent_ + row_b.rmargin_, @@ -1302,14 +1276,12 @@ bool CrownCompatible(const GenericVector *rows, Epsilon(row_a.ri_->average_interword_space)); } - // =============== Implementation of ParagraphModelSmearer ==================== ParagraphModelSmearer::ParagraphModelSmearer( - GenericVector *rows, - int row_start, int row_end, ParagraphTheory *theory) - : theory_(theory), rows_(rows), row_start_(row_start), - row_end_(row_end) { + GenericVector* rows, int row_start, int row_end, + ParagraphTheory* theory) + : theory_(theory), rows_(rows), row_start_(row_start), row_end_(row_end) { if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) { row_start_ = 0; row_end_ = 0; @@ -1332,7 +1304,7 @@ void ParagraphModelSmearer::CalculateOpenModels(int row_start, int row_end) { if ((*rows_)[row].ri_->num_words == 0) { OpenModels(row + 1) = no_models; } else { - SetOfModels &opened = OpenModels(row); + SetOfModels& opened = OpenModels(row); (*rows_)[row].StartHypotheses(&opened); // Which models survive the transition from row to row + 1? @@ -1359,9 +1331,8 @@ void ParagraphModelSmearer::Smear() { // we have multiple LT_START hypotheses), see if there's a model that // was recently used (an "open" model) which might model it well. for (int i = row_start_; i < row_end_; i++) { - RowScratchRegisters &row = (*rows_)[i]; - if (row.ri_->num_words == 0) - continue; + RowScratchRegisters& row = (*rows_)[i]; + if (row.ri_->num_words == 0) continue; // Step One: // Figure out if there are "open" models which are left-alined or @@ -1371,9 +1342,14 @@ void ParagraphModelSmearer::Smear() { bool right_align_open = false; for (int m = 0; m < OpenModels(i).size(); m++) { switch (OpenModels(i)[m]->justification()) { - case JUSTIFICATION_LEFT: left_align_open = true; break; - case JUSTIFICATION_RIGHT: right_align_open = true; break; - default: left_align_open = right_align_open = true; + case JUSTIFICATION_LEFT: + left_align_open = true; + break; + case JUSTIFICATION_RIGHT: + right_align_open = true; + break; + default: + left_align_open = right_align_open = true; } } // Step Two: @@ -1385,16 +1361,15 @@ void ParagraphModelSmearer::Smear() { } else { if ((left_align_open && right_align_open) || (!left_align_open && !right_align_open)) { - likely_start = LikelyParagraphStart((*rows_)[i - 1], row, - JUSTIFICATION_LEFT) || - LikelyParagraphStart((*rows_)[i - 1], row, - JUSTIFICATION_RIGHT); + likely_start = + LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_LEFT) || + LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_RIGHT); } else if (left_align_open) { - likely_start = LikelyParagraphStart((*rows_)[i - 1], row, - JUSTIFICATION_LEFT); + likely_start = + LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_LEFT); } else { - likely_start = LikelyParagraphStart((*rows_)[i - 1], row, - JUSTIFICATION_RIGHT); + likely_start = + LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_RIGHT); } } @@ -1418,9 +1393,8 @@ void ParagraphModelSmearer::Smear() { theory_->NonCenteredModels(&last_line_models); } for (int m = 0; m < last_line_models.size(); m++) { - const ParagraphModel *model = last_line_models[m]; - if (ValidBodyLine(rows_, i, model)) - row.AddBodyLine(model); + const ParagraphModel* model = last_line_models[m]; + if (ValidBodyLine(rows_, i, model)) row.AddBodyLine(model); } } @@ -1451,8 +1425,8 @@ void ParagraphModelSmearer::Smear() { // Find out what ParagraphModels are actually used, and discard any // that are not. -void DiscardUnusedModels(const GenericVector &rows, - ParagraphTheory *theory) { +void DiscardUnusedModels(const GenericVector& rows, + ParagraphTheory* theory) { SetOfModels used_models; for (int i = 0; i < rows.size(); i++) { rows[i].StrongHypotheses(&used_models); @@ -1484,13 +1458,12 @@ void DiscardUnusedModels(const GenericVector &rows, // Comb backwards through the row scratch registers, and turn any // sequences of body lines of equivalent type abutted against the beginning // or a body or start line of a different type into a crown paragraph. -void DowngradeWeakestToCrowns(int debug_level, - ParagraphTheory *theory, - GenericVector *rows) { +void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory* theory, + GenericVector* rows) { int start; for (int end = rows->size(); end > 0; end = start) { // Search back for a body line of a unique type. - const ParagraphModel *model = nullptr; + const ParagraphModel* model = nullptr; while (end > 0 && (model = (*rows)[end - 1].UniqueBodyHypothesis()) == nullptr) { end--; @@ -1504,27 +1477,25 @@ void DowngradeWeakestToCrowns(int debug_level, StrongModel(model) && NearlyEqual(model->first_indent(), model->body_indent(), model->tolerance())) { - start--; + start--; } start++; // Now rows[start, end) is a sequence of unique body hypotheses of model. if (StrongModel(model) && model->justification() == JUSTIFICATION_CENTER) continue; if (!StrongModel(model)) { - while (start > 0 && - CrownCompatible(rows, start - 1, start, model)) + while (start > 0 && CrownCompatible(rows, start - 1, start, model)) start--; } - if (start == 0 || - (!StrongModel(model)) || + if (start == 0 || (!StrongModel(model)) || (StrongModel(model) && !ValidFirstLine(rows, start - 1, model))) { // crownify rows[start, end) - const ParagraphModel *crown_model = model; + const ParagraphModel* crown_model = model; if (StrongModel(model)) { - if (model->justification() == JUSTIFICATION_LEFT) - crown_model = kCrownLeft; - else - crown_model = kCrownRight; + if (model->justification() == JUSTIFICATION_LEFT) + crown_model = kCrownLeft; + else + crown_model = kCrownRight; } (*rows)[start].SetUnknown(); (*rows)[start].AddStartLine(crown_model); @@ -1537,7 +1508,6 @@ void DowngradeWeakestToCrowns(int debug_level, DiscardUnusedModels(*rows, theory); } - // Clear all hypotheses about lines [start, end) and reset margins. // // The empty space between the left of a row and the block boundary (and @@ -1555,35 +1525,32 @@ void DowngradeWeakestToCrowns(int debug_level, // user to specify the percentile (0..100) of indent values to use as // the common margin for each row in the run of rows[start, end). void RecomputeMarginsAndClearHypotheses( - GenericVector *rows, int start, int end, + GenericVector* rows, int start, int end, int percentile) { - if (!AcceptableRowArgs(0, 0, __func__, rows, start, end)) - return; + if (!AcceptableRowArgs(0, 0, __func__, rows, start, end)) return; int lmin, lmax, rmin, rmax; lmin = lmax = (*rows)[start].lmargin_ + (*rows)[start].lindent_; rmin = rmax = (*rows)[start].rmargin_ + (*rows)[start].rindent_; for (int i = start; i < end; i++) { - RowScratchRegisters &sr = (*rows)[i]; + RowScratchRegisters& sr = (*rows)[i]; sr.SetUnknown(); - if (sr.ri_->num_words == 0) - continue; + if (sr.ri_->num_words == 0) continue; UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax); UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax); } STATS lefts(lmin, lmax + 1); STATS rights(rmin, rmax + 1); for (int i = start; i < end; i++) { - RowScratchRegisters &sr = (*rows)[i]; - if (sr.ri_->num_words == 0) - continue; + RowScratchRegisters& sr = (*rows)[i]; + if (sr.ri_->num_words == 0) continue; lefts.add(sr.lmargin_ + sr.lindent_, 1); rights.add(sr.rmargin_ + sr.rindent_, 1); } int ignorable_left = lefts.ile(ClipToRange(percentile, 0, 100) / 100.0); int ignorable_right = rights.ile(ClipToRange(percentile, 0, 100) / 100.0); for (int i = start; i < end; i++) { - RowScratchRegisters &sr = (*rows)[i]; + RowScratchRegisters& sr = (*rows)[i]; int ldelta = ignorable_left - sr.lmargin_; sr.lmargin_ += ldelta; sr.lindent_ -= ldelta; @@ -1594,13 +1561,15 @@ void RecomputeMarginsAndClearHypotheses( } // Return the median inter-word space in rows[row_start, row_end). -int InterwordSpace(const GenericVector &rows, +int InterwordSpace(const GenericVector& rows, int row_start, int row_end) { if (row_end < row_start + 1) return 1; int word_height = (rows[row_start].ri_->lword_box.height() + - rows[row_end - 1].ri_->lword_box.height()) / 2; + rows[row_end - 1].ri_->lword_box.height()) / + 2; int word_width = (rows[row_start].ri_->lword_box.width() + - rows[row_end - 1].ri_->lword_box.width()) / 2; + rows[row_end - 1].ri_->lword_box.width()) / + 2; STATS spacing_widths(0, 5 + word_width); for (int i = row_start; i < row_end; i++) { if (rows[i].ri_->num_words > 1) { @@ -1608,20 +1577,18 @@ int InterwordSpace(const GenericVector &rows, } } int minimum_reasonable_space = word_height / 3; - if (minimum_reasonable_space < 2) - minimum_reasonable_space = 2; + if (minimum_reasonable_space < 2) minimum_reasonable_space = 2; int median = spacing_widths.median(); - return (median > minimum_reasonable_space) - ? median : minimum_reasonable_space; + return (median > minimum_reasonable_space) ? median + : minimum_reasonable_space; } // Return whether the first word on the after line can fit in the space at // the end of the before line (knowing which way the text is aligned and read). -bool FirstWordWouldHaveFit(const RowScratchRegisters &before, - const RowScratchRegisters &after, +bool FirstWordWouldHaveFit(const RowScratchRegisters& before, + const RowScratchRegisters& after, tesseract::ParagraphJustification justification) { - if (before.ri_->num_words == 0 || after.ri_->num_words == 0) - return true; + if (before.ri_->num_words == 0 || after.ri_->num_words == 0) return true; if (justification == JUSTIFICATION_UNKNOWN) { tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n"); @@ -1634,31 +1601,27 @@ bool FirstWordWouldHaveFit(const RowScratchRegisters &before, } available_space -= before.ri_->average_interword_space; - if (before.ri_->ltr) - return after.ri_->lword_box.width() < available_space; + if (before.ri_->ltr) return after.ri_->lword_box.width() < available_space; return after.ri_->rword_box.width() < available_space; } // Return whether the first word on the after line can fit in the space at // the end of the before line (not knowing which way the text goes) in a left // or right alignemnt. -bool FirstWordWouldHaveFit(const RowScratchRegisters &before, - const RowScratchRegisters &after) { - if (before.ri_->num_words == 0 || after.ri_->num_words == 0) - return true; +bool FirstWordWouldHaveFit(const RowScratchRegisters& before, + const RowScratchRegisters& after) { + if (before.ri_->num_words == 0 || after.ri_->num_words == 0) return true; int available_space = before.lindent_; - if (before.rindent_ > available_space) - available_space = before.rindent_; + if (before.rindent_ > available_space) available_space = before.rindent_; available_space -= before.ri_->average_interword_space; - if (before.ri_->ltr) - return after.ri_->lword_box.width() < available_space; + if (before.ri_->ltr) return after.ri_->lword_box.width() < available_space; return after.ri_->rword_box.width() < available_space; } -bool TextSupportsBreak(const RowScratchRegisters &before, - const RowScratchRegisters &after) { +bool TextSupportsBreak(const RowScratchRegisters& before, + const RowScratchRegisters& after) { if (before.ri_->ltr) { return before.ri_->rword_likely_ends_idea && after.ri_->lword_likely_starts_idea; @@ -1668,19 +1631,18 @@ bool TextSupportsBreak(const RowScratchRegisters &before, } } -bool LikelyParagraphStart(const RowScratchRegisters &before, - const RowScratchRegisters &after) { - return before.ri_->num_words == 0 || - (FirstWordWouldHaveFit(before, after) && - TextSupportsBreak(before, after)); +bool LikelyParagraphStart(const RowScratchRegisters& before, + const RowScratchRegisters& after) { + return before.ri_->num_words == 0 || (FirstWordWouldHaveFit(before, after) && + TextSupportsBreak(before, after)); } -bool LikelyParagraphStart(const RowScratchRegisters &before, - const RowScratchRegisters &after, +bool LikelyParagraphStart(const RowScratchRegisters& before, + const RowScratchRegisters& after, tesseract::ParagraphJustification j) { return before.ri_->num_words == 0 || - (FirstWordWouldHaveFit(before, after, j) && - TextSupportsBreak(before, after)); + (FirstWordWouldHaveFit(before, after, j) && + TextSupportsBreak(before, after)); } // Examine rows[start, end) and try to determine what sort of ParagraphModel @@ -1689,8 +1651,8 @@ bool LikelyParagraphStart(const RowScratchRegisters &before, // If the rows given could be a consistent start to a paragraph, set *consistent // true. ParagraphModel InternalParagraphModelByOutline( - const GenericVector *rows, - int start, int end, int tolerance, bool *consistent) { + const GenericVector* rows, int start, int end, + int tolerance, bool* consistent) { int ltr_line_count = 0; for (int i = start; i < end; i++) { ltr_line_count += static_cast((*rows)[i].ri_->ltr); @@ -1724,8 +1686,7 @@ ParagraphModel InternalParagraphModelByOutline( int cdiff = cmax - cmin; if (rdiff > tolerance && ldiff > tolerance) { if (cdiff < tolerance * 2) { - if (end - start < 3) - return ParagraphModel(); + if (end - start < 3) return ParagraphModel(); return ParagraphModel(JUSTIFICATION_CENTER, 0, 0, 0, tolerance); } *consistent = false; @@ -1790,9 +1751,8 @@ ParagraphModel InternalParagraphModelByOutline( // justification_ = JUSTIFICATION_UNKNOWN and print the paragraph to debug // output if we're debugging. ParagraphModel ParagraphModelByOutline( - int debug_level, - const GenericVector *rows, - int start, int end, int tolerance) { + int debug_level, const GenericVector* rows, int start, + int end, int tolerance) { bool unused_consistent; ParagraphModel retval = InternalParagraphModelByOutline( rows, start, end, tolerance, &unused_consistent); @@ -1804,12 +1764,11 @@ ParagraphModel ParagraphModelByOutline( } // Do rows[start, end) form a single instance of the given paragraph model? -bool RowsFitModel(const GenericVector *rows, - int start, int end, const ParagraphModel *model) { - if (!AcceptableRowArgs(0, 1, __func__, rows, start, end)) - return false; +bool RowsFitModel(const GenericVector* rows, int start, + int end, const ParagraphModel* model) { + if (!AcceptableRowArgs(0, 1, __func__, rows, start, end)) return false; if (!ValidFirstLine(rows, start, model)) return false; - for (int i = start + 1 ; i < end; i++) { + for (int i = start + 1; i < end; i++) { if (!ValidBodyLine(rows, i, model)) return false; } return true; @@ -1826,12 +1785,12 @@ bool RowsFitModel(const GenericVector *rows, // We only take the very strongest signals, as we don't want to get // confused and marking up centered text, poetry, or source code as // clearly part of a typical paragraph. -void MarkStrongEvidence(GenericVector *rows, - int row_start, int row_end) { +void MarkStrongEvidence(GenericVector* rows, int row_start, + int row_end) { // Record patently obvious body text. for (int i = row_start + 1; i < row_end; i++) { - const RowScratchRegisters &prev = (*rows)[i - 1]; - RowScratchRegisters &curr = (*rows)[i]; + const RowScratchRegisters& prev = (*rows)[i - 1]; + RowScratchRegisters& curr = (*rows)[i]; tesseract::ParagraphJustification typical_justification = prev.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT; if (!curr.ri_->rword_likely_starts_idea && @@ -1855,8 +1814,8 @@ void MarkStrongEvidence(GenericVector *rows, // First row: { - RowScratchRegisters &curr = (*rows)[row_start]; - RowScratchRegisters &next = (*rows)[row_start + 1]; + RowScratchRegisters& curr = (*rows)[row_start]; + RowScratchRegisters& next = (*rows)[row_start + 1]; tesseract::ParagraphJustification j = curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT; if (curr.GetLineType() == LT_UNKNOWN && @@ -1868,9 +1827,9 @@ void MarkStrongEvidence(GenericVector *rows, } // Middle rows for (int i = row_start + 1; i < row_end - 1; i++) { - RowScratchRegisters &prev = (*rows)[i - 1]; - RowScratchRegisters &curr = (*rows)[i]; - RowScratchRegisters &next = (*rows)[i + 1]; + RowScratchRegisters& prev = (*rows)[i - 1]; + RowScratchRegisters& curr = (*rows)[i]; + RowScratchRegisters& next = (*rows)[i + 1]; tesseract::ParagraphJustification j = curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT; if (curr.GetLineType() == LT_UNKNOWN && @@ -1881,8 +1840,8 @@ void MarkStrongEvidence(GenericVector *rows, } // Last row { // the short circuit at the top means we have at least two lines. - RowScratchRegisters &prev = (*rows)[row_end - 2]; - RowScratchRegisters &curr = (*rows)[row_end - 1]; + RowScratchRegisters& prev = (*rows)[row_end - 2]; + RowScratchRegisters& curr = (*rows)[row_end - 1]; tesseract::ParagraphJustification j = curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT; if (curr.GetLineType() == LT_UNKNOWN && @@ -1897,19 +1856,16 @@ void MarkStrongEvidence(GenericVector *rows, // rows[row_start, row_end) and create ParagraphModels for them if // they seem coherent. void ModelStrongEvidence(int debug_level, - GenericVector *rows, - int row_start, int row_end, - bool allow_flush_models, - ParagraphTheory *theory) { + GenericVector* rows, + int row_start, int row_end, bool allow_flush_models, + ParagraphTheory* theory) { if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end)) return; int start = row_start; while (start < row_end) { - while (start < row_end && (*rows)[start].GetLineType() != LT_START) - start++; - if (start >= row_end - 1) - break; + while (start < row_end && (*rows)[start].GetLineType() != LT_START) start++; + if (start >= row_end - 1) break; int tolerance = Epsilon((*rows)[start + 1].ri_->average_interword_space); int end = start; @@ -1921,9 +1877,10 @@ void ModelStrongEvidence(int debug_level, // If rows[row, end + 1) is not consistent, // just model rows[row, end) if (end < row_end - 1) { - RowScratchRegisters &next = (*rows)[end]; + RowScratchRegisters& next = (*rows)[end]; LineType lt = next.GetLineType(); - next_consistent = lt == LT_BODY || + next_consistent = + lt == LT_BODY || (lt == LT_UNKNOWN && !FirstWordWouldHaveFit((*rows)[end - 1], (*rows)[end])); } else { @@ -1950,10 +1907,10 @@ void ModelStrongEvidence(int debug_level, // do so and mark this sequence with that model. if (end > start + 1) { // emit a new paragraph if we have more than one line. - const ParagraphModel *model = nullptr; - ParagraphModel new_model = ParagraphModelByOutline( - debug_level, rows, start, end, - Epsilon(InterwordSpace(*rows, start, end))); + const ParagraphModel* model = nullptr; + ParagraphModel new_model = + ParagraphModelByOutline(debug_level, rows, start, end, + Epsilon(InterwordSpace(*rows, start, end))); if (new_model.justification() == JUSTIFICATION_UNKNOWN) { // couldn't create a good model, oh well. } else if (new_model.is_flush()) { @@ -1992,9 +1949,9 @@ void ModelStrongEvidence(int debug_level, // (3) Form models for any sequence of start + continuation lines. // (4) Smear the paragraph models to cover surrounding text. void StrongEvidenceClassify(int debug_level, - GenericVector *rows, + GenericVector* rows, int row_start, int row_end, - ParagraphTheory *theory) { + ParagraphTheory* theory) { if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end)) return; @@ -2021,15 +1978,14 @@ void StrongEvidenceClassify(int debug_level, smearer.Smear(); } -void SeparateSimpleLeaderLines(GenericVector *rows, +void SeparateSimpleLeaderLines(GenericVector* rows, int row_start, int row_end, - ParagraphTheory *theory) { + ParagraphTheory* theory) { for (int i = row_start + 1; i < row_end - 1; i++) { - if ((*rows)[i - 1].ri_->has_leaders && - (*rows)[i].ri_->has_leaders && + if ((*rows)[i - 1].ri_->has_leaders && (*rows)[i].ri_->has_leaders && (*rows)[i + 1].ri_->has_leaders) { - const ParagraphModel *model = theory->AddModel( - ParagraphModel(JUSTIFICATION_UNKNOWN, 0, 0, 0, 0)); + const ParagraphModel* model = + theory->AddModel(ParagraphModel(JUSTIFICATION_UNKNOWN, 0, 0, 0, 0)); (*rows)[i].AddStartLine(model); } } @@ -2038,15 +1994,13 @@ void SeparateSimpleLeaderLines(GenericVector *rows, // Collect sequences of unique hypotheses in row registers and create proper // paragraphs for them, referencing the paragraphs in row_owners. void ConvertHypothesizedModelRunsToParagraphs( - int debug_level, - const GenericVector &rows, - GenericVector *row_owners, - ParagraphTheory *theory) { + int debug_level, const GenericVector& rows, + GenericVector* row_owners, ParagraphTheory* theory) { int end = rows.size(); int start; for (; end > 0; end = start) { start = end - 1; - const ParagraphModel *model = nullptr; + const ParagraphModel* model = nullptr; // TODO(eger): Be smarter about dealing with multiple hypotheses. bool single_line_paragraph = false; SetOfModels models; @@ -2069,7 +2023,7 @@ void ConvertHypothesizedModelRunsToParagraphs( continue; } // rows[start, end) should be a paragraph. - PARA *p = new PARA(); + PARA* p = new PARA(); if (model == kCrownLeft || model == kCrownRight) { p->is_very_first_or_continuation = true; // Crown paragraph. @@ -2078,8 +2032,8 @@ void ConvertHypothesizedModelRunsToParagraphs( for (int row = end; row < rows.size(); row++) { if ((*row_owners)[row] && (ValidBodyLine(&rows, start, (*row_owners)[row]->model) && - (start == 0 || - ValidFirstLine(&rows, start, (*row_owners)[row]->model)))) { + (start == 0 || + ValidFirstLine(&rows, start, (*row_owners)[row]->model)))) { model = (*row_owners)[row]->model; break; } @@ -2087,13 +2041,13 @@ void ConvertHypothesizedModelRunsToParagraphs( if (model == kCrownLeft) { // No subsequent model fits, so cons one up. model = theory->AddModel(ParagraphModel( - JUSTIFICATION_LEFT, rows[start].lmargin_ + rows[start].lindent_, - 0, 0, Epsilon(rows[start].ri_->average_interword_space))); + JUSTIFICATION_LEFT, rows[start].lmargin_ + rows[start].lindent_, 0, + 0, Epsilon(rows[start].ri_->average_interword_space))); } else if (model == kCrownRight) { // No subsequent model fits, so cons one up. model = theory->AddModel(ParagraphModel( - JUSTIFICATION_RIGHT, rows[start].rmargin_ + rows[start].rmargin_, - 0, 0, Epsilon(rows[start].ri_->average_interword_space))); + JUSTIFICATION_RIGHT, rows[start].rmargin_ + rows[start].rmargin_, 0, + 0, Epsilon(rows[start].ri_->average_interword_space))); } } rows[start].SetUnknown(); @@ -2104,14 +2058,14 @@ void ConvertHypothesizedModelRunsToParagraphs( } p->model = model; p->has_drop_cap = rows[start].ri_->has_drop_cap; - p->is_list_item = - model->justification() == JUSTIFICATION_RIGHT - ? rows[start].ri_->rword_indicates_list_item - : rows[start].ri_->lword_indicates_list_item; + p->is_list_item = model->justification() == JUSTIFICATION_RIGHT + ? rows[start].ri_->rword_indicates_list_item + : rows[start].ri_->lword_indicates_list_item; for (int row = start; row < end; row++) { if ((*row_owners)[row] != nullptr) { - tprintf("Memory leak! ConvertHypothesizeModelRunsToParagraphs() called " - "more than once!\n"); + tprintf( + "Memory leak! ConvertHypothesizeModelRunsToParagraphs() called " + "more than once!\n"); delete (*row_owners)[row]; } (*row_owners)[row] = p; @@ -2136,7 +2090,7 @@ struct Interval { // (1) If a line is surrounded by lines of unknown type, it's weak. // (2) If two lines in a row are start lines for a given paragraph type, but // after that the same paragraph type does not continue, they're weak. -bool RowIsStranded(const GenericVector &rows, int row) { +bool RowIsStranded(const GenericVector& rows, int row) { SetOfModels row_models; rows[row].StrongHypotheses(&row_models); @@ -2148,11 +2102,17 @@ bool RowIsStranded(const GenericVector &rows, int row) { SetOfModels models; rows[i].NonNullHypotheses(&models); switch (rows[i].GetLineType(row_models[m])) { - case LT_START: run_length++; break; + case LT_START: + run_length++; + break; case LT_MULTIPLE: // explicit fall-through - case LT_BODY: run_length++; all_starts = false; break; + case LT_BODY: + run_length++; + all_starts = false; + break; case LT_UNKNOWN: // explicit fall-through - default: continues = false; + default: + continues = false; } } continues = true; @@ -2160,11 +2120,17 @@ bool RowIsStranded(const GenericVector &rows, int row) { SetOfModels models; rows[i].NonNullHypotheses(&models); switch (rows[i].GetLineType(row_models[m])) { - case LT_START: run_length++; break; + case LT_START: + run_length++; + break; case LT_MULTIPLE: // explicit fall-through - case LT_BODY: run_length++; all_starts = false; break; + case LT_BODY: + run_length++; + all_starts = false; + break; case LT_UNKNOWN: // explicit fall-through - default: continues = false; + default: + continues = false; } } if (run_length > 2 || (!all_starts && run_length > 1)) return false; @@ -2178,9 +2144,9 @@ bool RowIsStranded(const GenericVector &rows, int row) { // + Crown paragraphs not immediately followed by a strongly modeled line. // + Single line paragraphs surrounded by text that doesn't match the // model. -void LeftoverSegments(const GenericVector &rows, - GenericVector *to_fix, - int row_start, int row_end) { +void LeftoverSegments(const GenericVector& rows, + GenericVector* to_fix, int row_start, + int row_end) { to_fix->clear(); for (int i = row_start; i < row_end; i++) { bool needs_fixing = false; @@ -2229,13 +2195,12 @@ void LeftoverSegments(const GenericVector &rows, // Given a set of row_owners pointing to PARAs or nullptr (no paragraph known), // normalize each row_owner to point to an actual PARA, and output the // paragraphs in order onto paragraphs. -void CanonicalizeDetectionResults( - GenericVector *row_owners, - PARA_LIST *paragraphs) { - GenericVector &rows = *row_owners; +void CanonicalizeDetectionResults(GenericVector* row_owners, + PARA_LIST* paragraphs) { + GenericVector& rows = *row_owners; paragraphs->clear(); PARA_IT out(paragraphs); - PARA *formerly_null = nullptr; + PARA* formerly_null = nullptr; for (int i = 0; i < rows.size(); i++) { if (rows[i] == nullptr) { if (i == 0 || rows[i - 1] != formerly_null) { @@ -2261,11 +2226,9 @@ void CanonicalizeDetectionResults( // paragraphs - this is the actual list of PARA objects. // models - the list of paragraph models referenced by the PARA objects. // caller is responsible for deleting the models. -void DetectParagraphs(int debug_level, - GenericVector *row_infos, - GenericVector *row_owners, - PARA_LIST *paragraphs, - GenericVector *models) { +void DetectParagraphs(int debug_level, GenericVector* row_infos, + GenericVector* row_owners, PARA_LIST* paragraphs, + GenericVector* models) { GenericVector rows; ParagraphTheory theory(models); @@ -2295,8 +2258,8 @@ void DetectParagraphs(int debug_level, // followed by two lines that look like body lines, make a paragraph // model for that and see if that model applies throughout the text // (that is, "smear" it). - StrongEvidenceClassify(debug_level, &rows, - leftovers[i].begin, leftovers[i].end, &theory); + StrongEvidenceClassify(debug_level, &rows, leftovers[i].begin, + leftovers[i].end, &theory); // Pass 2b: // If we had any luck in pass 2a, we got part of the page and didn't @@ -2304,14 +2267,14 @@ void DetectParagraphs(int debug_level, // didn't find a model and reprocess them individually. GenericVector leftovers2; LeftoverSegments(rows, &leftovers2, leftovers[i].begin, leftovers[i].end); - bool pass2a_was_useful = leftovers2.size() > 1 || + bool pass2a_was_useful = + leftovers2.size() > 1 || (leftovers2.size() == 1 && (leftovers2[0].begin != 0 || leftovers2[0].end != rows.size())); if (pass2a_was_useful) { for (int j = 0; j < leftovers2.size(); j++) { - StrongEvidenceClassify(debug_level, &rows, - leftovers2[j].begin, leftovers2[j].end, - &theory); + StrongEvidenceClassify(debug_level, &rows, leftovers2[j].begin, + leftovers2[j].end, &theory); } } } @@ -2324,8 +2287,8 @@ void DetectParagraphs(int debug_level, // the geometric clues are simple enough that we could just use those. LeftoverSegments(rows, &leftovers, 0, rows.size()); for (int i = 0; i < leftovers.size(); i++) { - GeometricClassify(debug_level, &rows, - leftovers[i].begin, leftovers[i].end, &theory); + GeometricClassify(debug_level, &rows, leftovers[i].begin, leftovers[i].end, + &theory); } // Undo any flush models for which there's little evidence. @@ -2356,8 +2319,8 @@ void DetectParagraphs(int debug_level, // ============ Code interfacing with the rest of Tesseract ================== -void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, - RowInfo *info) { +void InitializeTextAndBoxesPreRecognition(const MutableIterator& it, + RowInfo* info) { // Set up text, lword_text, and rword_text (mostly for debug printing). STRING fake_text; PageIterator pit(static_cast(it)); @@ -2386,11 +2349,11 @@ void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, // Set up lword_box, rword_box, and num_words. PAGE_RES_IT page_res_it = *it.PageResIt(); - WERD_RES *word_res = page_res_it.restart_row(); - ROW_RES *this_row = page_res_it.row(); + WERD_RES* word_res = page_res_it.restart_row(); + ROW_RES* this_row = page_res_it.row(); - WERD_RES *lword = nullptr; - WERD_RES *rword = nullptr; + WERD_RES* lword = nullptr; + WERD_RES* rword = nullptr; info->num_words = 0; do { if (word_res) { @@ -2405,18 +2368,17 @@ void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, if (rword) info->rword_box = rword->word->bounding_box(); } - // Given a Tesseract Iterator pointing to a text line, fill in the paragraph // detector RowInfo with all relevant information from the row. -void InitializeRowInfo(bool after_recognition, - const MutableIterator &it, - RowInfo *info) { +void InitializeRowInfo(bool after_recognition, const MutableIterator& it, + RowInfo* info) { if (it.PageResIt()->row() != nullptr) { - ROW *row = it.PageResIt()->row()->row; + ROW* row = it.PageResIt()->row()->row; info->pix_ldistance = row->lmargin(); info->pix_rdistance = row->rmargin(); info->average_interword_space = - row->space() > 0 ? row->space() : std::max(static_cast(row->x_height()), 1); + row->space() > 0 ? row->space() + : std::max(static_cast(row->x_height()), 1); info->pix_xheight = row->x_height(); info->has_leaders = false; info->has_drop_cap = row->has_drop_cap(); @@ -2454,10 +2416,8 @@ void InitializeRowInfo(bool after_recognition, trailing_ws_idx--; if (trailing_ws_idx > 0) { int lspaces = info->pix_ldistance / info->average_interword_space; - for (int i = 0; i < lspaces; i++) - info->text += ' '; - for (int i = 0; i < trailing_ws_idx; i++) - info->text += text[i]; + for (int i = 0; i < lspaces; i++) info->text += ' '; + for (int i = 0; i < trailing_ws_idx; i++) info->text += text[i]; } if (info->text.size() == 0) { @@ -2465,9 +2425,9 @@ void InitializeRowInfo(bool after_recognition, } PAGE_RES_IT page_res_it = *it.PageResIt(); - GenericVector werds; - WERD_RES *word_res = page_res_it.restart_row(); - ROW_RES *this_row = page_res_it.row(); + GenericVector werds; + WERD_RES* word_res = page_res_it.restart_row(); + ROW_RES* this_row = page_res_it.row(); int num_leaders = 0; int ltr = 0; int rtl = 0; @@ -2489,13 +2449,11 @@ void InitializeRowInfo(bool after_recognition, info->rword_text = rword->best_choice->unichar_string().string(); info->lword_box = lword->word->bounding_box(); info->rword_box = rword->word->bounding_box(); - LeftWordAttributes(lword->uch_set, lword->best_choice, - info->lword_text, + LeftWordAttributes(lword->uch_set, lword->best_choice, info->lword_text, &info->lword_indicates_list_item, &info->lword_likely_starts_idea, &info->lword_likely_ends_idea); - RightWordAttributes(rword->uch_set, rword->best_choice, - info->rword_text, + RightWordAttributes(rword->uch_set, rword->best_choice, info->rword_text, &info->rword_indicates_list_item, &info->rword_likely_starts_idea, &info->rword_likely_ends_idea); @@ -2505,31 +2463,29 @@ void InitializeRowInfo(bool after_recognition, // This is called after rows have been identified and words are recognized. // Much of this could be implemented before word recognition, but text helps // to identify bulleted lists and gives good signals for sentence boundaries. -void DetectParagraphs(int debug_level, - bool after_text_recognition, - const MutableIterator *block_start, - GenericVector *models) { +void DetectParagraphs(int debug_level, bool after_text_recognition, + const MutableIterator* block_start, + GenericVector* models) { // Clear out any preconceived notions. if (block_start->Empty(RIL_TEXTLINE)) { return; } - BLOCK *block = block_start->PageResIt()->block()->block; + BLOCK* block = block_start->PageResIt()->block()->block; block->para_list()->clear(); - bool is_image_block = block->pdblk.poly_block() && !block->pdblk.poly_block()->IsText(); + bool is_image_block = + block->pdblk.poly_block() && !block->pdblk.poly_block()->IsText(); // Convert the Tesseract structures to RowInfos // for the paragraph detection algorithm. MutableIterator row(*block_start); - if (row.Empty(RIL_TEXTLINE)) - return; // end of input already. + if (row.Empty(RIL_TEXTLINE)) return; // end of input already. GenericVector row_infos; do { - if (!row.PageResIt()->row()) - continue; // empty row. + if (!row.PageResIt()->row()) continue; // empty row. row.PageResIt()->row()->row->set_para(nullptr); row_infos.push_back(RowInfo()); - RowInfo &ri = row_infos.back(); + RowInfo& ri = row_infos.back(); InitializeRowInfo(after_text_recognition, row, &ri); } while (!row.IsAtFinalElement(RIL_BLOCK, RIL_TEXTLINE) && row.Next(RIL_TEXTLINE)); @@ -2554,8 +2510,8 @@ void DetectParagraphs(int debug_level, } // Run the paragraph detection algorithm. - GenericVector row_owners; - GenericVector the_paragraphs; + GenericVector row_owners; + GenericVector the_paragraphs; if (!is_image_block) { DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(), models); @@ -2567,11 +2523,10 @@ void DetectParagraphs(int debug_level, // Now stitch in the row_owners into the rows. row = *block_start; for (int i = 0; i < row_owners.size(); i++) { - while (!row.PageResIt()->row()) - row.Next(RIL_TEXTLINE); + while (!row.PageResIt()->row()) row.Next(RIL_TEXTLINE); row.PageResIt()->row()->row->set_para(row_owners[i]); row.Next(RIL_TEXTLINE); } } -} // namespace +} // namespace tesseract diff --git a/src/ccmain/paragraphs.h b/src/ccmain/paragraphs.h index e92ff3b150..a4aab03468 100644 --- a/src/ccmain/paragraphs.h +++ b/src/ccmain/paragraphs.h @@ -20,12 +20,11 @@ #ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_ #define TESSERACT_CCMAIN_PARAGRAPHS_H_ -#include "rect.h" -#include "ocrpara.h" #include "genericvector.h" +#include "ocrpara.h" +#include "rect.h" #include "strngs.h" - class WERD; class UNICHARSET; @@ -40,23 +39,23 @@ class MutableIterator; class RowInfo { public: // Constant data derived from Tesseract output. - STRING text; // the full UTF-8 text of the line. - bool ltr; // whether the majority of the text is left-to-right - // TODO(eger) make this more fine-grained. + STRING text; // the full UTF-8 text of the line. + bool ltr; // whether the majority of the text is left-to-right + // TODO(eger) make this more fine-grained. bool has_leaders; // does the line contain leader dots (.....)? bool has_drop_cap; // does the line have a drop cap? int pix_ldistance; // distance to the left pblock boundary in pixels int pix_rdistance; // distance to the right pblock boundary in pixels float pix_xheight; // guessed xheight for the line - int average_interword_space; // average space between words in pixels. + int average_interword_space; // average space between words in pixels. int num_words; - TBOX lword_box; // in normalized (horiz text rows) space - TBOX rword_box; // in normalized (horiz text rows) space + TBOX lword_box; // in normalized (horiz text rows) space + TBOX rword_box; // in normalized (horiz text rows) space - STRING lword_text; // the UTF-8 text of the leftmost werd - STRING rword_text; // the UTF-8 text of the rightmost werd + STRING lword_text; // the UTF-8 text of the leftmost werd + STRING rword_text; // the UTF-8 text of the rightmost werd // The text of a paragraph typically starts with the start of an idea and // ends with the end of an idea. Here we define paragraph as something that @@ -87,22 +86,19 @@ class RowInfo { // paragraphs - this is the actual list of PARA objects. // models - the list of paragraph models referenced by the PARA objects. // caller is responsible for deleting the models. -void DetectParagraphs(int debug_level, - GenericVector *row_infos, - GenericVector *row_owners, - PARA_LIST *paragraphs, - GenericVector *models); +void DetectParagraphs(int debug_level, GenericVector* row_infos, + GenericVector* row_owners, PARA_LIST* paragraphs, + GenericVector* models); // Given a MutableIterator to the start of a block, run DetectParagraphs on // that block and commit the results to the underlying ROW and BLOCK structs, // saving the ParagraphModels in models. Caller owns the models. // We use unicharset during the function to answer questions such as "is the // first letter of this word upper case?" -void DetectParagraphs(int debug_level, - bool after_text_recognition, - const MutableIterator *block_start, - GenericVector *models); +void DetectParagraphs(int debug_level, bool after_text_recognition, + const MutableIterator* block_start, + GenericVector* models); -} // namespace +} // namespace tesseract #endif // TESSERACT_CCMAIN_PARAGRAPHS_H_ diff --git a/src/ccmain/paragraphs_internal.h b/src/ccmain/paragraphs_internal.h index ccf5334ed0..2e64322799 100644 --- a/src/ccmain/paragraphs_internal.h +++ b/src/ccmain/paragraphs_internal.h @@ -30,21 +30,21 @@ class WERD_CHOICE; namespace tesseract { // Return whether the given word is likely to be a list item start word. -bool AsciiLikelyListItem(const STRING &word); +bool AsciiLikelyListItem(const STRING& word); // Return the first Unicode Codepoint from werd[pos]. -int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos); +int UnicodeFor(const UNICHARSET* u, const WERD_CHOICE* werd, int pos); // Set right word attributes given either a unicharset and werd or a utf8 // string. -void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, - const STRING &utf8, - bool *is_list, bool *starts_idea, bool *ends_idea); +void RightWordAttributes(const UNICHARSET* unicharset, const WERD_CHOICE* werd, + const STRING& utf8, bool* is_list, bool* starts_idea, + bool* ends_idea); // Set left word attributes given either a unicharset and werd or a utf8 string. -void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, - const STRING &utf8, - bool *is_list, bool *starts_idea, bool *ends_idea); +void LeftWordAttributes(const UNICHARSET* unicharset, const WERD_CHOICE* werd, + const STRING& utf8, bool* is_list, bool* starts_idea, + bool* ends_idea); enum LineType { LT_START = 'S', // First line of a paragraph. @@ -64,42 +64,42 @@ enum LineType { // // Nonetheless, while building hypotheses, it is useful to mark the lines // of crown paragraphs temporarily as crowns, either aligned left or right. -extern const ParagraphModel *kCrownLeft; -extern const ParagraphModel *kCrownRight; +extern const ParagraphModel* kCrownLeft; +extern const ParagraphModel* kCrownRight; -inline bool StrongModel(const ParagraphModel *model) { +inline bool StrongModel(const ParagraphModel* model) { return model != nullptr && model != kCrownLeft && model != kCrownRight; } struct LineHypothesis { LineHypothesis() : ty(LT_UNKNOWN), model(nullptr) {} - LineHypothesis(LineType line_type, const ParagraphModel *m) + LineHypothesis(LineType line_type, const ParagraphModel* m) : ty(line_type), model(m) {} - LineHypothesis(const LineHypothesis &other) + LineHypothesis(const LineHypothesis& other) : ty(other.ty), model(other.model) {} - bool operator==(const LineHypothesis &other) const { + bool operator==(const LineHypothesis& other) const { return ty == other.ty && model == other.model; } LineType ty; - const ParagraphModel *model; + const ParagraphModel* model; }; class ParagraphTheory; // Forward Declaration -using SetOfModels = GenericVectorEqEq; +using SetOfModels = GenericVectorEqEq; // Row Scratch Registers are data generated by the paragraph detection // algorithm based on a RowInfo input. class RowScratchRegisters { public: // We presume row will outlive us. - void Init(const RowInfo &row); + void Init(const RowInfo& row); LineType GetLineType() const; - LineType GetLineType(const ParagraphModel *model) const; + LineType GetLineType(const ParagraphModel* model) const; // Mark this as a start line type, sans model. This is useful for the // initial marking of probable body lines or paragraph start lines. @@ -110,59 +110,65 @@ class RowScratchRegisters { void SetBodyLine(); // Record that this row fits as a paragraph start line in the given model, - void AddStartLine(const ParagraphModel *model); + void AddStartLine(const ParagraphModel* model); // Record that this row fits as a paragraph body line in the given model, - void AddBodyLine(const ParagraphModel *model); + void AddBodyLine(const ParagraphModel* model); // Clear all hypotheses about this line. void SetUnknown() { hypotheses_.truncate(0); } // Append all hypotheses of strong models that match this row as a start. - void StartHypotheses(SetOfModels *models) const; + void StartHypotheses(SetOfModels* models) const; // Append all hypotheses of strong models matching this row. - void StrongHypotheses(SetOfModels *models) const; + void StrongHypotheses(SetOfModels* models) const; // Append all hypotheses for this row. - void NonNullHypotheses(SetOfModels *models) const; + void NonNullHypotheses(SetOfModels* models) const; // Discard any hypotheses whose model is not in the given list. - void DiscardNonMatchingHypotheses(const SetOfModels &models); + void DiscardNonMatchingHypotheses(const SetOfModels& models); // If we have only one hypothesis and that is that this line is a paragraph // start line of a certain model, return that model. Else return nullptr. - const ParagraphModel *UniqueStartHypothesis() const; + const ParagraphModel* UniqueStartHypothesis() const; // If we have only one hypothesis and that is that this line is a paragraph // body line of a certain model, return that model. Else return nullptr. - const ParagraphModel *UniqueBodyHypothesis() const; + const ParagraphModel* UniqueBodyHypothesis() const; // Return the indentation for the side opposite of the aligned side. int OffsideIndent(tesseract::ParagraphJustification just) const { switch (just) { - case tesseract::JUSTIFICATION_RIGHT: return lindent_; - case tesseract::JUSTIFICATION_LEFT: return rindent_; - default: return lindent_ > rindent_ ? lindent_ : rindent_; + case tesseract::JUSTIFICATION_RIGHT: + return lindent_; + case tesseract::JUSTIFICATION_LEFT: + return rindent_; + default: + return lindent_ > rindent_ ? lindent_ : rindent_; } } // Return the indentation for the side the text is aligned to. int AlignsideIndent(tesseract::ParagraphJustification just) const { switch (just) { - case tesseract::JUSTIFICATION_RIGHT: return rindent_; - case tesseract::JUSTIFICATION_LEFT: return lindent_; - default: return lindent_ > rindent_ ? lindent_ : rindent_; + case tesseract::JUSTIFICATION_RIGHT: + return rindent_; + case tesseract::JUSTIFICATION_LEFT: + return lindent_; + default: + return lindent_ > rindent_ ? lindent_ : rindent_; } } // Append header fields to a vector of row headings. - static void AppendDebugHeaderFields(GenericVector *header); + static void AppendDebugHeaderFields(GenericVector* header); // Append data for this row to a vector of debug strings. - void AppendDebugInfo(const ParagraphTheory &theory, - GenericVector *dbg) const; + void AppendDebugInfo(const ParagraphTheory& theory, + GenericVector* dbg) const; - const RowInfo *ri_; + const RowInfo* ri_; // These four constants form a horizontal box model for the white space // on the edges of each line. At each point in the algorithm, the following @@ -185,39 +191,39 @@ class ParagraphTheory { public: // We presume models will outlive us, and that models will take ownership // of any ParagraphModel *'s we add. - explicit ParagraphTheory(GenericVector *models) + explicit ParagraphTheory(GenericVector* models) : models_(models) {} - GenericVector &models() { return *models_; } - const GenericVector &models() const { return *models_; } + GenericVector& models() { return *models_; } + const GenericVector& models() const { return *models_; } // Return an existing model if one that is Comparable() can be found. // Else, allocate a new copy of model to save and return a pointer to it. - const ParagraphModel *AddModel(const ParagraphModel &model); + const ParagraphModel* AddModel(const ParagraphModel& model); // Discard any models we've made that are not in the list of used models. - void DiscardUnusedModels(const SetOfModels &used_models); + void DiscardUnusedModels(const SetOfModels& used_models); // Return the set of all non-centered models. - void NonCenteredModels(SetOfModels *models); + void NonCenteredModels(SetOfModels* models); // If any of the non-centered paragraph models we know about fit // rows[start, end), return it. Else nullptr. - const ParagraphModel *Fits(const GenericVector *rows, + const ParagraphModel* Fits(const GenericVector* rows, int start, int end) const; - int IndexOf(const ParagraphModel *model) const; + int IndexOf(const ParagraphModel* model) const; private: - GenericVector *models_; - GenericVectorEqEq models_we_added_; + GenericVector* models_; + GenericVectorEqEq models_we_added_; }; -bool ValidFirstLine(const GenericVector *rows, - int row, const ParagraphModel *model); -bool ValidBodyLine(const GenericVector *rows, - int row, const ParagraphModel *model); -bool CrownCompatible(const GenericVector *rows, - int a, int b, const ParagraphModel *model); +bool ValidFirstLine(const GenericVector* rows, int row, + const ParagraphModel* model); +bool ValidBodyLine(const GenericVector* rows, int row, + const ParagraphModel* model); +bool CrownCompatible(const GenericVector* rows, int a, + int b, const ParagraphModel* model); // A class for smearing Paragraph Model hypotheses to surrounding rows. // The idea here is that StrongEvidenceClassify first marks only exceedingly @@ -228,9 +234,8 @@ bool CrownCompatible(const GenericVector *rows, // "smear" our models over the text. class ParagraphModelSmearer { public: - ParagraphModelSmearer(GenericVector *rows, - int row_start, int row_end, - ParagraphTheory *theory); + ParagraphModelSmearer(GenericVector* rows, int row_start, + int row_end, ParagraphTheory* theory); // Smear forward paragraph models from existing row markings to subsequent // text lines if they fit, and mark any thereafter still unmodeled rows @@ -245,12 +250,12 @@ class ParagraphModelSmearer { // either a body or start line in that model. void CalculateOpenModels(int row_start, int row_end); - SetOfModels &OpenModels(int row) { + SetOfModels& OpenModels(int row) { return open_models_[row - row_start_ + 1]; } - ParagraphTheory *theory_; - GenericVector *rows_; + ParagraphTheory* theory_; + GenericVector* rows_; int row_start_; int row_end_; @@ -269,39 +274,38 @@ class ParagraphModelSmearer { // percentile (0..100) value of the left and right row edges for this run of // rows. void RecomputeMarginsAndClearHypotheses( - GenericVector *rows, int start, int end, + GenericVector* rows, int start, int end, int percentile); // Return the median inter-word space in rows[row_start, row_end). -int InterwordSpace(const GenericVector &rows, +int InterwordSpace(const GenericVector& rows, int row_start, int row_end); // Return whether the first word on the after line can fit in the space at // the end of the before line (knowing which way the text is aligned and read). -bool FirstWordWouldHaveFit(const RowScratchRegisters &before, - const RowScratchRegisters &after, +bool FirstWordWouldHaveFit(const RowScratchRegisters& before, + const RowScratchRegisters& after, tesseract::ParagraphJustification justification); // Return whether the first word on the after line can fit in the space at // the end of the before line (not knowing the text alignment). -bool FirstWordWouldHaveFit(const RowScratchRegisters &before, - const RowScratchRegisters &after); +bool FirstWordWouldHaveFit(const RowScratchRegisters& before, + const RowScratchRegisters& after); // Do rows[start, end) form a single instance of the given paragraph model? -bool RowsFitModel(const GenericVector *rows, - int start, int end, const ParagraphModel *model); +bool RowsFitModel(const GenericVector* rows, int start, + int end, const ParagraphModel* model); // Do the text and geometry of two rows support a paragraph break between them? -bool LikelyParagraphStart(const RowScratchRegisters &before, - const RowScratchRegisters &after, +bool LikelyParagraphStart(const RowScratchRegisters& before, + const RowScratchRegisters& after, tesseract::ParagraphJustification j); // Given a set of row_owners pointing to PARAs or nullptr (no paragraph known), // normalize each row_owner to point to an actual PARA, and output the // paragraphs in order onto paragraphs. -void CanonicalizeDetectionResults( - GenericVector *row_owners, - PARA_LIST *paragraphs); +void CanonicalizeDetectionResults(GenericVector* row_owners, + PARA_LIST* paragraphs); -} // namespace +} // namespace tesseract #endif // TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_ diff --git a/src/ccmain/paramsd.cpp b/src/ccmain/paramsd.cpp index 8424c4860f..324e5ffadd 100644 --- a/src/ccmain/paramsd.cpp +++ b/src/ccmain/paramsd.cpp @@ -21,8 +21,8 @@ // tesseract from the ui. #ifdef _WIN32 #else -#include #include +#include #endif #include @@ -35,13 +35,11 @@ #ifndef GRAPHICS_DISABLED #include "paramsd.h" - #include "params.h" #include "scrollview.h" #include "svmnode.h" - -#define VARDIR "configs/" /*parameters files */ +#define VARDIR "configs/" /*parameters files */ #define MAX_ITEMS_IN_SUBMENU 30 // The following variables should remain static globals, since they @@ -88,20 +86,17 @@ ParamContent::ParamContent(tesseract::DoubleParam* it) { } // Gets a VC object identified by its ID. -ParamContent* ParamContent::GetParamContentById(int id) { - return vcMap[id]; -} +ParamContent* ParamContent::GetParamContentById(int id) { return vcMap[id]; } // Copy the first N words from the source string to the target string. // Words are delimited by "_". -void ParamsEditor::GetFirstWords( - const char *s, // source string - int n, // number of words - char *t // target string - ) { +void ParamsEditor::GetFirstWords(const char* s, // source string + int n, // number of words + char* t // target string +) { int full_length = strlen(s); - int reqd_len = 0; // No. of chars requird - const char *next_word = s; + int reqd_len = 0; // No. of chars requird + const char* next_word = s; while ((n > 0) && reqd_len < full_length) { reqd_len += strcspn(next_word, "_") + 1; @@ -109,30 +104,40 @@ void ParamsEditor::GetFirstWords( n--; } strncpy(t, s, reqd_len); - t[reqd_len] = '\0'; // ensure null terminal + t[reqd_len] = '\0'; // ensure null terminal } // Getter for the name. const char* ParamContent::GetName() const { - if (param_type_ == VT_INTEGER) { return iIt->name_str(); } - else if (param_type_ == VT_BOOLEAN) { return bIt->name_str(); } - else if (param_type_ == VT_DOUBLE) { return dIt->name_str(); } - else if (param_type_ == VT_STRING) { return sIt->name_str(); } - else + if (param_type_ == VT_INTEGER) { + return iIt->name_str(); + } else if (param_type_ == VT_BOOLEAN) { + return bIt->name_str(); + } else if (param_type_ == VT_DOUBLE) { + return dIt->name_str(); + } else if (param_type_ == VT_STRING) { + return sIt->name_str(); + } else return "ERROR: ParamContent::GetName()"; } // Getter for the description. const char* ParamContent::GetDescription() const { - if (param_type_ == VT_INTEGER) { return iIt->info_str(); } - else if (param_type_ == VT_BOOLEAN) { return bIt->info_str(); } - else if (param_type_ == VT_DOUBLE) { return dIt->info_str(); } - else if (param_type_ == VT_STRING) { return sIt->info_str(); } - else return nullptr; + if (param_type_ == VT_INTEGER) { + return iIt->info_str(); + } else if (param_type_ == VT_BOOLEAN) { + return bIt->info_str(); + } else if (param_type_ == VT_DOUBLE) { + return dIt->info_str(); + } else if (param_type_ == VT_STRING) { + return sIt->info_str(); + } else + return nullptr; } // Getter for the value. -STRING ParamContent::GetValue() const { +STRING +ParamContent::GetValue() const { STRING result; if (param_type_ == VT_INTEGER) { result.add_str_int("", *iIt); @@ -152,8 +157,8 @@ STRING ParamContent::GetValue() const { // Setter for the value. void ParamContent::SetValue(const char* val) { -// TODO (wanke) Test if the values actually are properly converted. -// (Quickly visible impacts?) + // TODO (wanke) Test if the values actually are properly converted. + // (Quickly visible impacts?) changed_ = true; if (param_type_ == VT_INTEGER) { iIt->set_value(atoi(val)); @@ -169,8 +174,7 @@ void ParamContent::SetValue(const char* val) { // Gets the up to the first 3 prefixes from s (split by _). // For example, tesseract_foo_bar will be split into tesseract,foo and bar. void ParamsEditor::GetPrefixes(const char* s, STRING* level_one, - STRING* level_two, - STRING* level_three) { + STRING* level_two, STRING* level_three) { char* p = new char[1024]; GetFirstWords(s, 1, p); *level_one = p; @@ -191,7 +195,7 @@ int ParamContent::Compare(const void* v1, const void* v2) { // Find all editable parameters used within tesseract and create a // SVMenuNode tree from it. // TODO (wanke): This is actually sort of hackish. -SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) { +SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract* tess) { SVMenuNode* mr = new SVMenuNode(); ParamContent_LIST vclist; ParamContent_IT vc_it(&vclist); @@ -203,7 +207,7 @@ SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) { int v, i; int num_iterations = (tess->params() == nullptr) ? 1 : 2; for (v = 0; v < num_iterations; ++v) { - tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params(); + tesseract::ParamsVectors* vec = (v == 0) ? GlobalParams() : tess->params(); for (i = 0; i < vec->int_params.size(); ++i) { vc_it.add_after_then_move(new ParamContent(vec->int_params[i])); } @@ -251,12 +255,12 @@ SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) { SVMenuNode* sv = mr->AddChild(tag.string()); if ((amount[tag.string()] <= MAX_ITEMS_IN_SUBMENU) || (amount[tag2.string()] <= 1)) { - sv->AddChild(vc->GetName(), vc->GetId(), - vc->GetValue().string(), vc->GetDescription()); + sv->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(), + vc->GetDescription()); } else { // Make subsubmenus. SVMenuNode* sv2 = sv->AddChild(tag2.string()); - sv2->AddChild(vc->GetName(), vc->GetId(), - vc->GetValue().string(), vc->GetDescription()); + sv2->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(), + vc->GetDescription()); } } } @@ -272,11 +276,10 @@ void ParamsEditor::Notify(const SVEvent* sve) { } else if (sve->command_id == writeCommands[1]) { WriteParams(param, true); } else { - ParamContent* vc = ParamContent::GetParamContentById( - sve->command_id); + ParamContent* vc = ParamContent::GetParamContentById(sve->command_id); vc->SetValue(param); - sv_window_->AddMessage("Setting %s to %s", - vc->GetName(), vc->GetValue().string()); + sv_window_->AddMessage("Setting %s to %s", vc->GetName(), + vc->GetValue().string()); } } } @@ -284,8 +287,7 @@ void ParamsEditor::Notify(const SVEvent* sve) { // Integrate the parameters editor as popupmenu into the existing scrollview // window (usually the pg editor). If sv == null, create a new empty // empty window and attach the parameters editor to that window (ugly). -ParamsEditor::ParamsEditor(tesseract::Tesseract* tess, - ScrollView* sv) { +ParamsEditor::ParamsEditor(tesseract::Tesseract* tess, ScrollView* sv) { if (sv == nullptr) { const char* name = "ParamEditorMAIN"; sv = new ScrollView(name, 1, 1, 200, 200, 300, 200); @@ -293,47 +295,48 @@ ParamsEditor::ParamsEditor(tesseract::Tesseract* tess, sv_window_ = sv; - //Only one event handler per window. - //sv->AddEventHandler((SVEventHandler*) this); + // Only one event handler per window. + // sv->AddEventHandler((SVEventHandler*) this); SVMenuNode* svMenuRoot = BuildListOfAllLeaves(tess); STRING paramfile; paramfile = tess->datadir; - paramfile += VARDIR; // parameters dir - paramfile += "edited"; // actual name + paramfile += VARDIR; // parameters dir + paramfile += "edited"; // actual name - SVMenuNode* std_menu = svMenuRoot->AddChild ("Build Config File"); + SVMenuNode* std_menu = svMenuRoot->AddChild("Build Config File"); - writeCommands[0] = nrParams+1; - std_menu->AddChild("All Parameters", writeCommands[0], - paramfile.string(), "Config file name?"); + writeCommands[0] = nrParams + 1; + std_menu->AddChild("All Parameters", writeCommands[0], paramfile.string(), + "Config file name?"); - writeCommands[1] = nrParams+2; - std_menu->AddChild ("changed_ Parameters Only", writeCommands[1], - paramfile.string(), "Config file name?"); + writeCommands[1] = nrParams + 2; + std_menu->AddChild("changed_ Parameters Only", writeCommands[1], + paramfile.string(), "Config file name?"); svMenuRoot->BuildMenu(sv, false); } - // Write all (changed_) parameters to a config file. -void ParamsEditor::WriteParams(char *filename, - bool changes_only) { - FILE *fp; // input file +void ParamsEditor::WriteParams(char* filename, bool changes_only) { + FILE* fp; // input file char msg_str[255]; - // if file exists - if ((fp = fopen (filename, "rb")) != nullptr) { + // if file exists + if ((fp = fopen(filename, "rb")) != nullptr) { fclose(fp); - sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename); + sprintf(msg_str, + "Overwrite file " + "%s" + "? (Y/N)", + filename); int a = sv_window_->ShowYesNoDialog(msg_str); if (a == 'n') { return; } // don't write } - - fp = fopen (filename, "wb"); // can we write to it? + fp = fopen(filename, "wb"); // can we write to it? if (fp == nullptr) { sv_window_->AddMessage( "Can't write to file " @@ -344,12 +347,11 @@ void ParamsEditor::WriteParams(char *filename, } for (std::map::iterator iter = vcMap.begin(); - iter != vcMap.end(); - ++iter) { + iter != vcMap.end(); ++iter) { ParamContent* cur = iter->second; if (!changes_only || cur->HasChanged()) { - fprintf(fp, "%-25s %-12s # %s\n", - cur->GetName(), cur->GetValue().string(), cur->GetDescription()); + fprintf(fp, "%-25s %-12s # %s\n", cur->GetName(), + cur->GetValue().string(), cur->GetDescription()); } } fclose(fp); diff --git a/src/ccmain/paramsd.h b/src/ccmain/paramsd.h index 2f21d14581..68fbd5868a 100644 --- a/src/ccmain/paramsd.h +++ b/src/ccmain/paramsd.h @@ -31,12 +31,7 @@ class SVMenuNode; // A list of all possible parameter types used. -enum ParamType { - VT_INTEGER, - VT_BOOLEAN, - VT_STRING, - VT_DOUBLE -}; +enum ParamType { VT_INTEGER, VT_BOOLEAN, VT_STRING, VT_DOUBLE }; // A rather hackish helper structure which can take any kind of parameter input // (defined by ParamType) and do a couple of common operations on them, like @@ -58,7 +53,6 @@ class ParamContent : public ELIST_LINK { explicit ParamContent(tesseract::BoolParam* it); explicit ParamContent(tesseract::DoubleParam* it); - // Getters and Setters. void SetValue(const char* val); STRING GetValue() const; @@ -100,18 +94,18 @@ class ParamsEditor : public SVEventHandler { private: // Gets the up to the first 3 prefixes from s (split by _). // For example, tesseract_foo_bar will be split into tesseract,foo and bar. - void GetPrefixes(const char* s, STRING* level_one, - STRING* level_two, STRING* level_three); + void GetPrefixes(const char* s, STRING* level_one, STRING* level_two, + STRING* level_three); // Gets the first n words (split by _) and puts them in t. // For example, tesseract_foo_bar with N=2 will yield tesseract_foo_. - void GetFirstWords(const char *s, // source string + void GetFirstWords(const char* s, // source string int n, // number of words - char *t); // target string + char* t); // target string // Find all editable parameters used within tesseract and create a // SVMenuNode tree from it. - SVMenuNode *BuildListOfAllLeaves(tesseract::Tesseract *tess); + SVMenuNode* BuildListOfAllLeaves(tesseract::Tesseract* tess); // Write all (changed_) parameters to a config file. void WriteParams(char* filename, bool changes_only); diff --git a/src/ccmain/pgedit.cpp b/src/ccmain/pgedit.cpp index 7c6b449f35..a6252b0256 100644 --- a/src/ccmain/pgedit.cpp +++ b/src/ccmain/pgedit.cpp @@ -22,33 +22,32 @@ #include "config_auto.h" #endif -#include "pgedit.h" +#include "pgedit.h" -#include -#include +#include +#include #include "blread.h" #include "control.h" -#include "paramsd.h" #include "pageres.h" -#include "tordmain.h" +#include "paramsd.h" #include "scrollview.h" -#include "svmnode.h" #include "statistc.h" +#include "svmnode.h" #include "tesseractclass.h" +#include "tordmain.h" #include "werdit.h" #ifndef GRAPHICS_DISABLED -#define ASC_HEIGHT (2 * kBlnBaselineOffset + kBlnXHeight) -#define X_HEIGHT (kBlnBaselineOffset + kBlnXHeight) -#define BL_HEIGHT kBlnBaselineOffset -#define DESC_HEIGHT 0 -#define MAXSPACING 128 /*max expected spacing in pix */ +#define ASC_HEIGHT (2 * kBlnBaselineOffset + kBlnXHeight) +#define X_HEIGHT (kBlnBaselineOffset + kBlnXHeight) +#define BL_HEIGHT kBlnBaselineOffset +#define DESC_HEIGHT 0 +#define MAXSPACING 128 /*max expected spacing in pix */ const ERRCODE EMPTYBLOCKLIST = "No blocks to edit"; -enum CMD_EVENTS -{ +enum CMD_EVENTS { NULL_CMD_EVENT, CHANGE_DISP_CMD_EVENT, DUMP_WERD_CMD_EVENT, @@ -105,13 +104,13 @@ ParamsEditor* pe; bool stillRunning = false; #ifdef __UNIX__ -FILE *debug_window = nullptr; // opened on demand +FILE* debug_window = nullptr; // opened on demand #endif -ScrollView* bln_word_window = nullptr; // baseline norm words +ScrollView* bln_word_window = nullptr; // baseline norm words CMD_EVENTS mode = CHANGE_DISP_CMD_EVENT; // selected words op -bool recog_done = false; // recog_all_words was called +bool recog_done = false; // recog_all_words was called // These variables should remain global, since they are only used for the // debug mode (in which only a single Tesseract thread/instance will be exist). @@ -121,10 +120,9 @@ BOOL8 display_image = FALSE; BOOL8 display_blocks = FALSE; BOOL8 display_baselines = FALSE; -PAGE_RES *current_page_res = nullptr; +PAGE_RES* current_page_res = nullptr; -STRING_VAR(editor_image_win_name, "EditorImage", - "Editor image window name"); +STRING_VAR(editor_image_win_name, "EditorImage", "Editor image window name"); INT_VAR(editor_image_xpos, 590, "Editor image X Pos"); INT_VAR(editor_image_ypos, 10, "Editor image Y Pos"); INT_VAR(editor_image_menuheight, 50, "Add to image height for menu bar"); @@ -132,11 +130,9 @@ INT_VAR(editor_image_word_bb_color, ScrollView::BLUE, "Word bounding box colour"); INT_VAR(editor_image_blob_bb_color, ScrollView::YELLOW, "Blob bounding box colour"); -INT_VAR(editor_image_text_color, ScrollView::WHITE, - "Correct text colour"); +INT_VAR(editor_image_text_color, ScrollView::WHITE, "Correct text colour"); -STRING_VAR(editor_dbwin_name, "EditorDBWin", - "Editor debug window name"); +STRING_VAR(editor_dbwin_name, "EditorDBWin", "Editor debug window name"); INT_VAR(editor_dbwin_xpos, 50, "Editor debug window X Pos"); INT_VAR(editor_dbwin_ypos, 500, "Editor debug window Y Pos"); INT_VAR(editor_dbwin_height, 24, "Editor debug window height"); @@ -148,7 +144,8 @@ INT_VAR(editor_word_ypos, 510, "Word window Y Pos"); INT_VAR(editor_word_height, 240, "Word window height"); INT_VAR(editor_word_width, 655, "Word window width"); -STRING_VAR(editor_debug_config_file, "", "Config file to apply to single words"); +STRING_VAR(editor_debug_config_file, "", + "Config file to apply to single words"); class BlnEventHandler : public SVEventHandler { public: @@ -166,12 +163,12 @@ class BlnEventHandler : public SVEventHandler { * @return a WINDOW for the word window, creating it if necessary */ ScrollView* bln_word_window_handle() { // return handle - // not opened yet + // not opened yet if (bln_word_window == nullptr) { pgeditor_msg("Creating BLN word window..."); - bln_word_window = new ScrollView(editor_word_name.string(), - editor_word_xpos, editor_word_ypos, editor_word_width, - editor_word_height, 4000, 4000, true); + bln_word_window = new ScrollView( + editor_word_name.string(), editor_word_xpos, editor_word_ypos, + editor_word_width, editor_word_height, 4000, 4000, true); BlnEventHandler* a = new BlnEventHandler(); bln_word_window->AddEventHandler(a); pgeditor_msg("Creating BLN word window...Done"); @@ -188,13 +185,9 @@ ScrollView* bln_word_window_handle() { // return handle void build_image_window(int width, int height) { delete image_win; - image_win = new ScrollView(editor_image_win_name.string(), - editor_image_xpos, editor_image_ypos, - width + 1, - height + editor_image_menuheight + 1, - width, - height, - true); + image_win = new ScrollView( + editor_image_win_name.string(), editor_image_xpos, editor_image_ypos, + width + 1, height + editor_image_menuheight + 1, width, height, true); } /** @@ -203,21 +196,18 @@ void build_image_window(int width, int height) { * Display normalized baseline, x-height, ascender limit and descender limit */ -void display_bln_lines(ScrollView* window, - ScrollView::Color colour, - float scale_factor, - float y_offset, - float minx, +void display_bln_lines(ScrollView* window, ScrollView::Color colour, + float scale_factor, float y_offset, float minx, float maxx) { window->Pen(colour); - window->Line(minx, y_offset + scale_factor * DESC_HEIGHT, - maxx, y_offset + scale_factor * DESC_HEIGHT); - window->Line(minx, y_offset + scale_factor * BL_HEIGHT, - maxx, y_offset + scale_factor * BL_HEIGHT); - window->Line(minx, y_offset + scale_factor * X_HEIGHT, - maxx, y_offset + scale_factor * X_HEIGHT); - window->Line(minx, y_offset + scale_factor * ASC_HEIGHT, - maxx, y_offset + scale_factor * ASC_HEIGHT); + window->Line(minx, y_offset + scale_factor * DESC_HEIGHT, maxx, + y_offset + scale_factor * DESC_HEIGHT); + window->Line(minx, y_offset + scale_factor * BL_HEIGHT, maxx, + y_offset + scale_factor * BL_HEIGHT); + window->Line(minx, y_offset + scale_factor * X_HEIGHT, maxx, + y_offset + scale_factor * X_HEIGHT); + window->Line(minx, y_offset + scale_factor * ASC_HEIGHT, maxx, + y_offset + scale_factor * ASC_HEIGHT); } /** @@ -232,14 +222,17 @@ void PGEventHandler::Notify(const SVEvent* event) { char myval = '0'; if (event->type == SVET_POPUP) { pe->Notify(event); - } // These are handled by ParamsEditor - else if (event->type == SVET_EXIT) { stillRunning = false; } - else if (event->type == SVET_MENU) { - if (strcmp(event->parameter, "true") == 0) { myval = 'T'; } - else if (strcmp(event->parameter, "false") == 0) { myval = 'F'; } - tess_->process_cmd_win_event(event->command_id, &myval); - } - else { + } // These are handled by ParamsEditor + else if (event->type == SVET_EXIT) { + stillRunning = false; + } else if (event->type == SVET_MENU) { + if (strcmp(event->parameter, "true") == 0) { + myval = 'T'; + } else if (strcmp(event->parameter, "false") == 0) { + myval = 'F'; + } + tess_->process_cmd_win_event(event->command_id, &myval); + } else { tess_->process_image_event(*event); } } @@ -250,7 +243,7 @@ void PGEventHandler::Notify(const SVEvent* event) { * Construct the menu tree used by the command window */ namespace tesseract { -SVMenuNode *Tesseract::build_menu_new() { +SVMenuNode* Tesseract::build_menu_new() { SVMenuNode* parent_menu; SVMenuNode* root_menu_item = new SVMenuNode(); @@ -283,7 +276,6 @@ SVMenuNode *Tesseract::build_menu_new() { parent_menu->AddChild("SmallCaps", SHOW_SMALLCAPS_CMD_EVENT); parent_menu->AddChild("DropCaps", SHOW_DROPCAPS_CMD_EVENT); - parent_menu = root_menu_item->AddChild("OTHER"); parent_menu->AddChild("Quit", QUIT_CMD_EVENT); @@ -302,7 +294,7 @@ SVMenuNode *Tesseract::build_menu_new() { * Redisplay page */ void Tesseract::do_re_display( - bool (tesseract::Tesseract::* word_painter)(PAGE_RES_IT* pr_it)) { + bool (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it)) { int block_count = 1; image_win->Clear(); @@ -317,7 +309,8 @@ void Tesseract::do_re_display( if (display_baselines && pr_it.row() != pr_it.prev_row()) pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN); if (display_blocks && pr_it.block() != pr_it.prev_block()) - pr_it.block()->block->pdblk.plot(image_win, block_count++, ScrollView::RED); + pr_it.block()->block->pdblk.plot(image_win, block_count++, + ScrollView::RED); } image_win->Update(); } @@ -330,10 +323,9 @@ void Tesseract::do_re_display( * */ -void Tesseract::pgeditor_main(int width, int height, PAGE_RES *page_res) { +void Tesseract::pgeditor_main(int width, int height, PAGE_RES* page_res) { current_page_res = page_res; - if (current_page_res->block_res_list.empty()) - return; + if (current_page_res->block_res_list.empty()) return; recog_done = false; stillRunning = true; @@ -359,16 +351,15 @@ void Tesseract::pgeditor_main(int width, int height, PAGE_RES *page_res) { } } // namespace tesseract - /** * pgeditor_msg() * * Display a message - in the command window if there is one, or to stdout */ -void pgeditor_msg( // message display - const char *msg) { - image_win->AddMessage(msg); +void pgeditor_msg( // message display + const char* msg) { + image_win->AddMessage(msg); } /** @@ -377,8 +368,8 @@ void pgeditor_msg( // message display * Display the coordinates of a point in the command window */ -void pgeditor_show_point( // display coords - SVEvent *event) { +void pgeditor_show_point( // display coords + SVEvent* event) { image_win->AddMessage("Pointing at(%d, %d)", event->x, event->y); } @@ -390,9 +381,9 @@ void pgeditor_show_point( // display coords */ namespace tesseract { -bool Tesseract::process_cmd_win_event( // UI command semantics - int32_t cmd_event, // which menu item? - char* new_value // any prompt data +bool Tesseract::process_cmd_win_event( // UI command semantics + int32_t cmd_event, // which menu item? + char* new_value // any prompt data ) { char msg[160]; bool exit = false; @@ -431,7 +422,7 @@ bool Tesseract::process_cmd_win_event( // UI command semantics case RECOG_WERDS: case RECOG_PSEUDO: case SHOW_BLOB_FEATURES: - mode =(CMD_EVENTS) cmd_event; + mode = (CMD_EVENTS)cmd_event; break; case DEBUG_WERD_CMD_EVENT: mode = DEBUG_WERD_CMD_EVENT; @@ -484,15 +475,15 @@ bool Tesseract::process_cmd_win_event( // UI command semantics do_re_display(&tesseract::Tesseract::word_set_display); break; case IMAGE_CMD_EVENT: - display_image =(new_value[0] == 'T'); + display_image = (new_value[0] == 'T'); do_re_display(&tesseract::Tesseract::word_display); break; case BLOCKS_CMD_EVENT: - display_blocks =(new_value[0] == 'T'); + display_blocks = (new_value[0] == 'T'); do_re_display(&tesseract::Tesseract::word_display); break; case BASELINES_CMD_EVENT: - display_baselines =(new_value[0] == 'T'); + display_baselines = (new_value[0] == 'T'); do_re_display(&tesseract::Tesseract::word_display); break; case SHOW_SUBSCRIPT_CMD_EVENT: @@ -543,12 +534,11 @@ bool Tesseract::process_cmd_win_event( // UI command semantics snprintf(msg, sizeof(msg), "Unrecognised event %" PRId32 "(%s)", cmd_event, new_value); image_win->AddMessage(msg); - break; + break; } return exit; } - /** * process_image_event() * @@ -558,17 +548,16 @@ bool Tesseract::process_cmd_win_event( // UI command semantics * If UP - for each word in the selected area do the operation defined by * the current mode. */ -void Tesseract::process_image_event( // action in image win - const SVEvent &event) { - // The following variable should remain static, since it is used by - // debug editor, which uses a single Tesseract instance. +void Tesseract::process_image_event( // action in image win + const SVEvent& event) { + // The following variable should remain static, since it is used by + // debug editor, which uses a single Tesseract instance. static ICOORD down; ICOORD up; TBOX selection_box; char msg[80]; - switch(event.type) { - + switch (event.type) { case SVET_SELECTION: if (event.type == SVET_SELECTION) { down.set_x(event.x + event.x_size); @@ -582,33 +571,29 @@ void Tesseract::process_image_event( // action in image win selection_box = TBOX(down, up); - switch(mode) { + switch (mode) { case CHANGE_DISP_CMD_EVENT: process_selected_words( - current_page_res, - selection_box, + current_page_res, selection_box, &tesseract::Tesseract::word_blank_and_set_display); break; - case DUMP_WERD_CMD_EVENT: - process_selected_words(current_page_res, - selection_box, + case DUMP_WERD_CMD_EVENT: + process_selected_words(current_page_res, selection_box, &tesseract::Tesseract::word_dumper); break; case SHOW_BLN_WERD_CMD_EVENT: - process_selected_words(current_page_res, - selection_box, + process_selected_words(current_page_res, selection_box, &tesseract::Tesseract::word_bln_display); break; case DEBUG_WERD_CMD_EVENT: debug_word(current_page_res, selection_box); break; case SHOW_POINT_CMD_EVENT: - break; // ignore up event + break; // ignore up event case RECOG_WERDS: image_win->AddMessage("Recogging selected words"); - this->process_selected_words(current_page_res, - selection_box, + this->process_selected_words(current_page_res, selection_box, &Tesseract::recog_interactive); break; case RECOG_PSEUDO: @@ -634,13 +619,12 @@ void Tesseract::process_image_event( // action in image win * * Process the whole image, but load word_config_ for the selected word(s). */ -void Tesseract::debug_word(PAGE_RES* page_res, const TBOX &selection_box) { +void Tesseract::debug_word(PAGE_RES* page_res, const TBOX& selection_box) { ResetAdaptiveClassifier(); recog_all_words(page_res, nullptr, &selection_box, word_config_.string(), 0); } } // namespace tesseract - /** * show_point() * @@ -654,32 +638,28 @@ void show_point(PAGE_RES* page_res, float x, float y) { const int kBufsize = 512; char msg[kBufsize]; - char *msg_ptr = msg; + char* msg_ptr = msg; msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y); for (WERD_RES* word = pr_it.word(); word != nullptr; word = pr_it.forward()) { if (pr_it.row() != pr_it.prev_row() && pr_it.row()->row->bounding_box().contains(pt)) { - msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", - pr_it.row()->row->base_line(x)); + msg_ptr += + sprintf(msg_ptr, "BL(x)=%0.3f ", pr_it.row()->row->base_line(x)); } if (word->word->bounding_box().contains(pt)) { TBOX box = word->word->bounding_box(); - msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", - box.left(), box.bottom(), - box.right(), box.top()); + msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", box.left(), + box.bottom(), box.right(), box.top()); C_BLOB_IT cblob_it(word->word->cblob_list()); - for (cblob_it.mark_cycle_pt(); - !cblob_it.cycled_list(); + for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) { C_BLOB* cblob = cblob_it.data(); box = cblob->bounding_box(); if (box.contains(pt)) { - msg_ptr += sprintf(msg_ptr, - "CBlb(%d, %d)/(%d, %d) ", - box.left(), box.bottom(), - box.right(), box.top()); + msg_ptr += sprintf(msg_ptr, "CBlb(%d, %d)/(%d, %d) ", box.left(), + box.bottom(), box.right(), box.top()); } } } @@ -687,7 +667,6 @@ void show_point(PAGE_RES* page_res, float x, float y) { image_win->AddMessage(msg); } - /********************************************************************** * WERD PROCESSOR FUNCTIONS * ======================== @@ -716,7 +695,6 @@ bool Tesseract::word_blank_and_set_display(PAGE_RES_IT* pr_it) { return word_set_display(pr_it); } - /** * word_bln_display() * @@ -726,16 +704,14 @@ bool Tesseract::word_bln_display(PAGE_RES_IT* pr_it) { WERD_RES* word_res = pr_it->word(); if (word_res->chopped_word == nullptr) { // Setup word normalization parameters. - word_res->SetupForRecognition(unicharset, this, BestPix(), - tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - pr_it->row()->row, pr_it->block()->block); + word_res->SetupForRecognition( + unicharset, this, BestPix(), tessedit_ocr_engine_mode, nullptr, + classify_bln_numeric_mode, textord_use_cjk_fp_model, + poly_allow_detailed_fx, pr_it->row()->row, pr_it->block()->block); } bln_word_window_handle()->Clear(); - display_bln_lines(bln_word_window_handle(), ScrollView::CYAN, - 1.0, 0.0f, -1000.0f, 1000.0f); + display_bln_lines(bln_word_window_handle(), ScrollView::CYAN, 1.0, 0.0f, + -1000.0f, 1000.0f); C_BLOB_IT it(word_res->word->cblob_list()); ScrollView::Color color = WERD::NextColor(ScrollView::BLACK); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { @@ -747,8 +723,6 @@ bool Tesseract::word_bln_display(PAGE_RES_IT* pr_it) { return true; } - - /** * word_display() Word Processor * @@ -757,11 +731,11 @@ bool Tesseract::word_bln_display(PAGE_RES_IT* pr_it) { bool Tesseract::word_display(PAGE_RES_IT* pr_it) { WERD_RES* word_res = pr_it->word(); WERD* word = word_res->word; - TBOX word_bb; // word bounding box - int word_height; // ht of word BB + TBOX word_bb; // word bounding box + int word_height; // ht of word BB bool displayed_something = false; - float shift; // from bot left - C_BLOB_IT c_it; // cblob iterator + float shift; // from bot left + C_BLOB_IT c_it; // cblob iterator if (color_mode != CM_RAINBOW && word_res->box_word != nullptr) { BoxWord* box_word = word_res->box_word; @@ -781,24 +755,19 @@ bool Tesseract::word_display(PAGE_RES_IT* pr_it) { color = ScrollView::RED; break; case CM_ITALIC: - if (font_info.is_italic()) - color = ScrollView::RED; + if (font_info.is_italic()) color = ScrollView::RED; break; case CM_BOLD: - if (font_info.is_bold()) - color = ScrollView::RED; + if (font_info.is_bold()) color = ScrollView::RED; break; case CM_FIXEDPITCH: - if (font_info.is_fixed_pitch()) - color = ScrollView::RED; + if (font_info.is_fixed_pitch()) color = ScrollView::RED; break; case CM_SERIF: - if (font_info.is_serif()) - color = ScrollView::RED; + if (font_info.is_serif()) color = ScrollView::RED; break; case CM_SMALLCAPS: - if (word_res->small_caps) - color = ScrollView::RED; + if (word_res->small_caps) color = ScrollView::RED; break; case CM_DROPCAPS: if (best_choice->BlobPosition(i) == SP_DROPCAP) @@ -819,16 +788,14 @@ bool Tesseract::word_display(PAGE_RES_IT* pr_it) { Note the double coercions of(COLOUR)((int32_t)editor_image_word_bb_color) etc. are to keep the compiler happy. */ - // display bounding box + // display bounding box if (word->display_flag(DF_BOX)) { - word->bounding_box().plot(image_win, - (ScrollView::Color)((int32_t) - editor_image_word_bb_color), - (ScrollView::Color)((int32_t) - editor_image_word_bb_color)); - - ScrollView::Color c = (ScrollView::Color) - ((int32_t) editor_image_blob_bb_color); + word->bounding_box().plot( + image_win, (ScrollView::Color)((int32_t)editor_image_word_bb_color), + (ScrollView::Color)((int32_t)editor_image_word_bb_color)); + + ScrollView::Color c = + (ScrollView::Color)((int32_t)editor_image_blob_bb_color); image_win->Pen(c); c_it.set_to_list(word->cblob_list()); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) @@ -836,15 +803,15 @@ bool Tesseract::word_display(PAGE_RES_IT* pr_it) { displayed_something = true; } - // display edge steps - if (word->display_flag(DF_EDGE_STEP)) { // edgesteps available - word->plot(image_win); // rainbow colors + // display edge steps + if (word->display_flag(DF_EDGE_STEP)) { // edgesteps available + word->plot(image_win); // rainbow colors displayed_something = true; } - // display poly approx + // display poly approx if (word->display_flag(DF_POLYGONAL)) { - // need to convert + // need to convert TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word); tword->plot(image_win); delete tword; @@ -861,7 +828,7 @@ bool Tesseract::word_display(PAGE_RES_IT* pr_it) { !(word_res->blamer_bundle != nullptr && word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) { text = ""; - const BlamerBundle *blamer_bundle = word_res->blamer_bundle; + const BlamerBundle* blamer_bundle = word_res->blamer_bundle; if (blamer_bundle == nullptr) { text += "NULL"; } else { @@ -875,8 +842,9 @@ bool Tesseract::word_display(PAGE_RES_IT* pr_it) { word_res->best_choice->string_and_lengths(&best_choice_str, nullptr); } text += best_choice_str; - IncorrectResultReason reason = (blamer_bundle == nullptr) ? - IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason(); + IncorrectResultReason reason = + (blamer_bundle == nullptr) ? IRR_PAGE_LAYOUT + : blamer_bundle->incorrect_result_reason(); ASSERT_HOST(reason < IRR_NUM_REASONS) blame += " ["; blame += BlamerBundle::IncorrectReasonName(reason); @@ -901,11 +869,10 @@ bool Tesseract::word_display(PAGE_RES_IT* pr_it) { displayed_something = true; } - if (!displayed_something) // display BBox anyway - word->bounding_box().plot(image_win, - (ScrollView::Color)((int32_t) editor_image_word_bb_color), - (ScrollView::Color)((int32_t) - editor_image_word_bb_color)); + if (!displayed_something) // display BBox anyway + word->bounding_box().plot( + image_win, (ScrollView::Color)((int32_t)editor_image_word_bb_color), + (ScrollView::Color)((int32_t)editor_image_word_bb_color)); return true; } #endif // GRAPHICS_DISABLED @@ -946,7 +913,7 @@ bool Tesseract::word_set_display(PAGE_RES_IT* pr_it) { word->set_display_flag(DF_POLYGONAL, word_display_mode.bit(DF_POLYGONAL)); word->set_display_flag(DF_EDGE_STEP, word_display_mode.bit(DF_EDGE_STEP)); word->set_display_flag(DF_BN_POLYGONAL, - word_display_mode.bit(DF_BN_POLYGONAL)); + word_display_mode.bit(DF_BN_POLYGONAL)); word->set_display_flag(DF_BLAMER, word_display_mode.bit(DF_BLAMER)); return word_display(pr_it); } @@ -959,12 +926,10 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res, if (it != nullptr) { WERD_RES* word_res = it->word(); word_res->x_height = it->row()->row->x_height(); - word_res->SetupForRecognition(unicharset, this, BestPix(), - tessedit_ocr_engine_mode, nullptr, - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - it->row()->row, it->block()->block); + word_res->SetupForRecognition( + unicharset, this, BestPix(), tessedit_ocr_engine_mode, nullptr, + classify_bln_numeric_mode, textord_use_cjk_fp_model, + poly_allow_detailed_fx, it->row()->row, it->block()->block); TWERD* bln_word = word_res->chopped_word; TBLOB* bln_blob = bln_word->blobs[0]; INT_FX_RESULT_STRUCT fx_info; @@ -990,7 +955,6 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res, } } - #endif // GRAPHICS_DISABLED } // namespace tesseract diff --git a/src/ccmain/pgedit.h b/src/ccmain/pgedit.h index e9fc693679..db8cd4a32a 100644 --- a/src/ccmain/pgedit.h +++ b/src/ccmain/pgedit.h @@ -17,15 +17,15 @@ // /////////////////////////////////////////////////////////////////////// -#ifndef PGEDIT_H -#define PGEDIT_H +#ifndef PGEDIT_H +#define PGEDIT_H -#include "ocrblock.h" -#include "ocrrow.h" -#include "werd.h" -#include "rect.h" -#include "params.h" -#include "tesseractclass.h" +#include "ocrblock.h" +#include "ocrrow.h" +#include "params.h" +#include "rect.h" +#include "tesseractclass.h" +#include "werd.h" class ScrollView; class SVMenuNode; @@ -34,54 +34,49 @@ struct SVEvent; // A small event handler class to process incoming events to // this window. class PGEventHandler : public SVEventHandler { - public: - PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) { - } - void Notify(const SVEvent* sve); - private: - tesseract::Tesseract* tess_; + public: + PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {} + void Notify(const SVEvent* sve); + + private: + tesseract::Tesseract* tess_; }; -extern BLOCK_LIST *current_block_list; -extern STRING_VAR_H (editor_image_win_name, "EditorImage", -"Editor image window name"); -extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos"); -extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos"); -extern INT_VAR_H (editor_image_height, 680, "Editor image height"); -extern INT_VAR_H (editor_image_width, 655, "Editor image width"); -extern INT_VAR_H (editor_image_word_bb_color, BLUE, -"Word bounding box colour"); -extern INT_VAR_H (editor_image_blob_bb_color, YELLOW, -"Blob bounding box colour"); -extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour"); -extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin", -"Editor debug window name"); -extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos"); -extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos"); -extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height"); -extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width"); -extern STRING_VAR_H (editor_word_name, "BlnWords", -"BL normalised word window"); -extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos"); -extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos"); -extern INT_VAR_H (editor_word_height, 240, "Word window height"); -extern INT_VAR_H (editor_word_width, 655, "Word window width"); -extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image"); +extern BLOCK_LIST* current_block_list; +extern STRING_VAR_H(editor_image_win_name, "EditorImage", + "Editor image window name"); +extern INT_VAR_H(editor_image_xpos, 590, "Editor image X Pos"); +extern INT_VAR_H(editor_image_ypos, 10, "Editor image Y Pos"); +extern INT_VAR_H(editor_image_height, 680, "Editor image height"); +extern INT_VAR_H(editor_image_width, 655, "Editor image width"); +extern INT_VAR_H(editor_image_word_bb_color, BLUE, "Word bounding box colour"); +extern INT_VAR_H(editor_image_blob_bb_color, YELLOW, + "Blob bounding box colour"); +extern INT_VAR_H(editor_image_text_color, WHITE, "Correct text colour"); +extern STRING_VAR_H(editor_dbwin_name, "EditorDBWin", + "Editor debug window name"); +extern INT_VAR_H(editor_dbwin_xpos, 50, "Editor debug window X Pos"); +extern INT_VAR_H(editor_dbwin_ypos, 500, "Editor debug window Y Pos"); +extern INT_VAR_H(editor_dbwin_height, 24, "Editor debug window height"); +extern INT_VAR_H(editor_dbwin_width, 80, "Editor debug window width"); +extern STRING_VAR_H(editor_word_name, "BlnWords", "BL normalised word window"); +extern INT_VAR_H(editor_word_xpos, 60, "Word window X Pos"); +extern INT_VAR_H(editor_word_ypos, 510, "Word window Y Pos"); +extern INT_VAR_H(editor_word_height, 240, "Word window height"); +extern INT_VAR_H(editor_word_width, 655, "Word window width"); +extern double_VAR_H(editor_smd_scale_factor, 1.0, "Scaling for smd image"); -ScrollView* bln_word_window_handle(); //return handle +ScrollView* bln_word_window_handle(); // return handle void build_image_window(int width, int height); -void display_bln_lines(ScrollView window, - ScrollView::Color colour, - float scale_factor, - float y_offset, - float minx, +void display_bln_lines(ScrollView window, ScrollView::Color colour, + float scale_factor, float y_offset, float minx, float maxx); - //function to call -void pgeditor_msg( //message display - const char *msg); -void pgeditor_show_point( //display coords - SVEvent *event); - //put bln word in box +// function to call +void pgeditor_msg( // message display + const char* msg); +void pgeditor_show_point( // display coords + SVEvent* event); +// put bln word in box void show_point(PAGE_RES* page_res, float x, float y); #endif diff --git a/src/ccmain/recogtraining.cpp b/src/ccmain/recogtraining.cpp index d9c0af8a31..915a7fd0a8 100644 --- a/src/ccmain/recogtraining.cpp +++ b/src/ccmain/recogtraining.cpp @@ -33,24 +33,24 @@ const int16_t kMaxBoxEdgeDiff = 2; // Sets flags necessary for recognition in the training mode. // Opens and returns the pointer to the output file. -FILE *Tesseract::init_recog_training(const STRING &fname) { +FILE* Tesseract::init_recog_training(const STRING& fname) { if (tessedit_ambigs_training) { - tessedit_tess_adaption_mode.set_value(0); // turn off adaption - tessedit_enable_doc_dict.set_value(0); // turn off document dictionary + tessedit_tess_adaption_mode.set_value(0); // turn off adaption + tessedit_enable_doc_dict.set_value(0); // turn off document dictionary // Explore all segmentations. getDict().stopper_no_acceptable_choices.set_value(1); } STRING output_fname = fname; - const char *lastdot = strrchr(output_fname.string(), '.'); + const char* lastdot = strrchr(output_fname.string(), '.'); if (lastdot != nullptr) output_fname[lastdot - output_fname.string()] = '\0'; output_fname += ".txt"; - FILE *output_file = open_file(output_fname.string(), "a+"); + FILE* output_file = open_file(output_fname.string(), "a+"); return output_file; } // Copies the bounding box from page_res_it->word() to the given TBOX. -bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) { +bool read_t(PAGE_RES_IT* page_res_it, TBOX* tbox) { while (page_res_it->block() != nullptr && page_res_it->word() == nullptr) page_res_it->forward(); @@ -76,16 +76,16 @@ bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) { // match to those specified by the input box file. For each word (ngram in a // single bounding box from the input box file) it outputs the ocred result, // the correct label, rating and certainty. -void Tesseract::recog_training_segmented(const STRING &fname, - PAGE_RES *page_res, - volatile ETEXT_DESC *monitor, - FILE *output_file) { +void Tesseract::recog_training_segmented(const STRING& fname, + PAGE_RES* page_res, + volatile ETEXT_DESC* monitor, + FILE* output_file) { STRING box_fname = fname; - const char *lastdot = strrchr(box_fname.string(), '.'); + const char* lastdot = strrchr(box_fname.string(), '.'); if (lastdot != nullptr) box_fname[lastdot - box_fname.string()] = '\0'; box_fname += ".box"; // ReadNextBox() will close box_file - FILE *box_file = open_file(box_fname.string(), "r"); + FILE* box_file = open_file(box_fname.string(), "r"); PAGE_RES_IT page_res_it; page_res_it.page_res = page_res; @@ -100,8 +100,8 @@ void Tesseract::recog_training_segmented(const STRING &fname, int examined_words = 0; do { keep_going = read_t(&page_res_it, &tbox); - keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label, - &bbox); + keep_going &= + ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox); // Align bottom left points of the TBOXes. while (keep_going && !NearlyEqual(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) { @@ -109,8 +109,8 @@ void Tesseract::recog_training_segmented(const STRING &fname, page_res_it.forward(); keep_going = read_t(&page_res_it, &tbox); } else { - keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, - &bbox); + keep_going = + ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox); } } while (keep_going && @@ -119,16 +119,16 @@ void Tesseract::recog_training_segmented(const STRING &fname, page_res_it.forward(); keep_going = read_t(&page_res_it, &tbox); } else { - keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, - &bbox); + keep_going = + ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox); } } // OCR the word if top right points of the TBOXes are similar. if (keep_going && NearlyEqual(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) && NearlyEqual(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) { - ambigs_classify_and_output(label.string(), &page_res_it, output_file); - examined_words++; + ambigs_classify_and_output(label.string(), &page_res_it, output_file); + examined_words++; } page_res_it.forward(); } while (keep_going); @@ -147,38 +147,37 @@ void Tesseract::recog_training_segmented(const STRING &fname, } } if (examined_words < 0.85 * total_words) { - tprintf("TODO(antonova): clean up recog_training_segmented; " - " It examined only a small fraction of the ambigs image.\n"); + tprintf( + "TODO(antonova): clean up recog_training_segmented; " + " It examined only a small fraction of the ambigs image.\n"); } - tprintf("recog_training_segmented: examined %d / %d words.\n", - examined_words, total_words); + tprintf("recog_training_segmented: examined %d / %d words.\n", examined_words, + total_words); } // Helper prints the given set of blob choices. static void PrintPath(int length, const BLOB_CHOICE** blob_choices, - const UNICHARSET& unicharset, - const char *label, FILE *output_file) { + const UNICHARSET& unicharset, const char* label, + FILE* output_file) { float rating = 0.0f; float certainty = 0.0f; for (int i = 0; i < length; ++i) { const BLOB_CHOICE* blob_choice = blob_choices[i]; fprintf(output_file, "%s", - unicharset.id_to_unichar(blob_choice->unichar_id())); + unicharset.id_to_unichar(blob_choice->unichar_id())); rating += blob_choice->rating(); if (certainty > blob_choice->certainty()) certainty = blob_choice->certainty(); } - fprintf(output_file, "\t%s\t%.4f\t%.4f\n", - label, rating, certainty); + fprintf(output_file, "\t%s\t%.4f\t%.4f\n", label, rating, certainty); } // Helper recursively prints all paths through the ratings matrix, starting // at column col. -static void PrintMatrixPaths(int col, int dim, - const MATRIX& ratings, +static void PrintMatrixPaths(int col, int dim, const MATRIX& ratings, int length, const BLOB_CHOICE** blob_choices, - const UNICHARSET& unicharset, - const char *label, FILE *output_file) { + const UNICHARSET& unicharset, const char* label, + FILE* output_file) { for (int row = col; row < dim && row - col < ratings.bandwidth(); ++row) { if (ratings.get(col, row) != NOT_CLASSIFIED) { BLOB_CHOICE_IT bc_it(ratings.get(col, row)); @@ -199,16 +198,16 @@ static void PrintMatrixPaths(int col, int dim, // raw choice as a result of the classification. For words labeled with a // single unichar also outputs all alternatives from blob_choices of the // best choice. -void Tesseract::ambigs_classify_and_output(const char *label, +void Tesseract::ambigs_classify_and_output(const char* label, PAGE_RES_IT* pr_it, - FILE *output_file) { + FILE* output_file) { // Classify word. fflush(stdout); WordData word_data(*pr_it); SetupWordPassN(1, &word_data); classify_word_and_language(1, pr_it, &word_data); WERD_RES* werd_res = word_data.word; - WERD_CHOICE *best_choice = werd_res->best_choice; + WERD_CHOICE* best_choice = werd_res->best_choice; ASSERT_HOST(best_choice != nullptr); // Compute the number of unichars in the label. @@ -221,9 +220,9 @@ void Tesseract::ambigs_classify_and_output(const char *label, // Dump all paths through the ratings matrix (which is normally small). int dim = werd_res->ratings->dimension(); const BLOB_CHOICE** blob_choices = new const BLOB_CHOICE*[dim]; - PrintMatrixPaths(0, dim, *werd_res->ratings, 0, blob_choices, - unicharset, label, output_file); - delete [] blob_choices; + PrintMatrixPaths(0, dim, *werd_res->ratings, 0, blob_choices, unicharset, + label, output_file); + delete[] blob_choices; } } // namespace tesseract diff --git a/src/ccmain/reject.cpp b/src/ccmain/reject.cpp index 88202b3f4e..1fcac6b324 100644 --- a/src/ccmain/reject.cpp +++ b/src/ccmain/reject.cpp @@ -17,21 +17,21 @@ * **********************************************************************/ -#include "tessvars.h" +#include "tessvars.h" #ifdef __UNIX__ -#include -#include +#include +#include #endif -#include "scanutils.h" -#include -#include -#include "genericvector.h" -#include "reject.h" -#include "control.h" -#include "docqual.h" -#include "globaloc.h" // For err_exit. -#include "globals.h" -#include "helpers.h" +#include +#include +#include "control.h" +#include "docqual.h" +#include "genericvector.h" +#include "globaloc.h" // For err_exit. +#include "globals.h" +#include "helpers.h" +#include "reject.h" +#include "scanutils.h" #include "tesseractclass.h" @@ -40,7 +40,8 @@ #include "config_auto.h" #endif -CLISTIZEH (STRING) CLISTIZE (STRING) +CLISTIZEH(STRING) +CLISTIZE(STRING) /************************************************************************* * set_done() @@ -49,22 +50,24 @@ CLISTIZEH (STRING) CLISTIZE (STRING) *************************************************************************/ namespace tesseract { -void Tesseract::set_done(WERD_RES *word, int16_t pass) { - word->done = word->tess_accepted && +void Tesseract::set_done(WERD_RES* word, int16_t pass) { + word->done = + word->tess_accepted && (strchr(word->best_choice->unichar_string().string(), ' ') == nullptr); bool word_is_ambig = word->best_choice->dangerous_ambig_found(); bool word_from_dict = word->best_choice->permuter() == SYSTEM_DAWG_PERM || - word->best_choice->permuter() == FREQ_DAWG_PERM || - word->best_choice->permuter() == USER_DAWG_PERM; + word->best_choice->permuter() == FREQ_DAWG_PERM || + word->best_choice->permuter() == USER_DAWG_PERM; if (word->done && (pass == 1) && (!word_from_dict || word_is_ambig) && one_ell_conflict(word, false)) { if (tessedit_rejection_debug) tprintf("one_ell_conflict detected\n"); word->done = FALSE; } - if (word->done && ((!word_from_dict && - word->best_choice->permuter() != NUMBER_PERM) || word_is_ambig)) { + if (word->done && + ((!word_from_dict && word->best_choice->permuter() != NUMBER_PERM) || + word_is_ambig)) { if (tessedit_rejection_debug) tprintf("non-dict or ambig word detected\n"); - word->done = FALSE; + word->done = FALSE; } if (tessedit_rejection_debug) { tprintf("set_done(): done=%d\n", word->done); @@ -72,7 +75,6 @@ void Tesseract::set_done(WERD_RES *word, int16_t pass) { } } - /************************************************************************* * make_reject_map() * @@ -80,43 +82,43 @@ void Tesseract::set_done(WERD_RES *word, int16_t pass) { * * Sets a reject map for the word. *************************************************************************/ -void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) { +void Tesseract::make_reject_map(WERD_RES* word, ROW* row, int16_t pass) { int i; int offset; flip_0O(word); - check_debug_pt(word, -1); // For trap only - set_done(word, pass); // Set acceptance + check_debug_pt(word, -1); // For trap only + set_done(word, pass); // Set acceptance word->reject_map.initialise(word->best_choice->unichar_lengths().length()); reject_blanks(word); /* 0: Rays original heuristic - the baseline */ if (tessedit_reject_mode == 0) { - if (!word->done) - reject_poor_matches(word); + if (!word->done) reject_poor_matches(word); } else if (tessedit_reject_mode == 5) { /* - 5: Reject I/1/l from words where there is no strong contextual confirmation; - the whole of any unacceptable words (incl PERM rej of dubious 1/I/ls); - and the whole of any words which are very small + 5: Reject I/1/l from words where there is no strong contextual + confirmation; the whole of any unacceptable words (incl PERM rej of + dubious 1/I/ls); and the whole of any words which are very small */ if (kBlnXHeight / word->denorm.y_scale() <= min_sane_x_ht_pixels) { word->reject_map.rej_word_small_xht(); } else { one_ell_conflict(word, true); /* - Originally the code here just used the done flag. Now I have duplicated - and unpacked the conditions for setting the done flag so that each - mechanism can be turned on or off independently. This works WITHOUT - affecting the done flag setting. + Originally the code here just used the done flag. Now I have + duplicated and unpacked the conditions for setting the done flag so + that each mechanism can be turned on or off independently. This works + WITHOUT affecting the done flag setting. */ if (rej_use_tess_accepted && !word->tess_accepted) - word->reject_map.rej_word_not_tess_accepted (); + word->reject_map.rej_word_not_tess_accepted(); if (rej_use_tess_blanks && - (strchr (word->best_choice->unichar_string().string (), ' ') != nullptr)) - word->reject_map.rej_word_contains_blanks (); + (strchr(word->best_choice->unichar_string().string(), ' ') != + nullptr)) + word->reject_map.rej_word_contains_blanks(); WERD_CHOICE* best_choice = word->best_choice; if (rej_use_good_perm) { @@ -124,10 +126,9 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) { best_choice->permuter() == FREQ_DAWG_PERM || best_choice->permuter() == USER_DAWG_PERM) && (!rej_use_sensible_wd || - acceptable_word_string(*word->uch_set, - best_choice->unichar_string().string(), - best_choice->unichar_lengths().string()) != - AC_UNACCEPTABLE)) { + acceptable_word_string( + *word->uch_set, best_choice->unichar_string().string(), + best_choice->unichar_lengths().string()) != AC_UNACCEPTABLE)) { // PASSED TEST } else if (best_choice->permuter() == NUMBER_PERM) { if (rej_alphas_in_number_perm) { @@ -153,14 +154,13 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) { err_exit(); } - if (tessedit_image_border > -1) - reject_edge_blobs(word); + if (tessedit_image_border > -1) reject_edge_blobs(word); - check_debug_pt (word, 10); + check_debug_pt(word, 10); if (tessedit_rejection_debug) { - tprintf("Permuter Type = %d\n", word->best_choice->permuter ()); - tprintf("Certainty: %f Rating: %f\n", - word->best_choice->certainty (), word->best_choice->rating ()); + tprintf("Permuter Type = %d\n", word->best_choice->permuter()); + tprintf("Certainty: %f Rating: %f\n", word->best_choice->certainty(), + word->best_choice->rating()); tprintf("Dict word: %d\n", dict_word(*(word->best_choice))); } @@ -169,37 +169,35 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) { } } // namespace tesseract - -void reject_blanks(WERD_RES *word) { +void reject_blanks(WERD_RES* word) { int16_t i; int16_t offset; for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0'; offset += word->best_choice->unichar_lengths()[i], i += 1) { if (word->best_choice->unichar_string()[offset] == ' ') - //rej unrecognised blobs - word->reject_map[i].setrej_tess_failure (); + // rej unrecognised blobs + word->reject_map[i].setrej_tess_failure(); } } namespace tesseract { -void Tesseract::reject_I_1_L(WERD_RES *word) { +void Tesseract::reject_I_1_L(WERD_RES* word) { int16_t i; int16_t offset; for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0'; offset += word->best_choice->unichar_lengths()[i], i += 1) { - if (STRING (conflict_set_I_l_1). - contains (word->best_choice->unichar_string()[offset])) { - //rej 1Il conflict - word->reject_map[i].setrej_1Il_conflict (); + if (STRING(conflict_set_I_l_1) + .contains(word->best_choice->unichar_string()[offset])) { + // rej 1Il conflict + word->reject_map[i].setrej_1Il_conflict(); } } } } // namespace tesseract - -void reject_poor_matches(WERD_RES *word) { +void reject_poor_matches(WERD_RES* word) { float threshold = compute_reject_threshold(word->best_choice); for (int i = 0; i < word->best_choice->length(); ++i) { if (word->best_choice->unichar_id(i) == UNICHAR_SPACE) @@ -209,7 +207,6 @@ void reject_poor_matches(WERD_RES *word) { } } - /********************************************************************** * compute_reject_threshold * @@ -219,11 +216,11 @@ void reject_poor_matches(WERD_RES *word) { **********************************************************************/ float compute_reject_threshold(WERD_CHOICE* word) { - float threshold; // rejection threshold - float bestgap = 0.0f; // biggest gap - float gapstart; // bottom of gap - // super iterator - BLOB_CHOICE_IT choice_it; // real iterator + float threshold; // rejection threshold + float bestgap = 0.0f; // biggest gap + float gapstart; // bottom of gap + // super iterator + BLOB_CHOICE_IT choice_it; // real iterator int blob_count = word->length(); GenericVector ratings; @@ -232,7 +229,7 @@ float compute_reject_threshold(WERD_CHOICE* word) { ratings[i] = word->certainty(i); } ratings.sort(); - gapstart = ratings[0] - 1; // all reject if none better + gapstart = ratings[0] - 1; // all reject if none better if (blob_count >= 3) { for (int index = 0; index < blob_count - 1; index++) { if (ratings[index + 1] - ratings[index] > bestgap) { @@ -247,7 +244,6 @@ float compute_reject_threshold(WERD_CHOICE* word) { return threshold; } - /************************************************************************* * reject_edge_blobs() * @@ -255,7 +251,7 @@ float compute_reject_threshold(WERD_CHOICE* word) { * in the word which are too close to the edge as they could be clipped. *************************************************************************/ namespace tesseract { -void Tesseract::reject_edge_blobs(WERD_RES *word) { +void Tesseract::reject_edge_blobs(WERD_RES* word) { TBOX word_box = word->word->bounding_box(); // Use the box_word as it is already denormed back to image coordinates. int blobcount = word->box_word->length(); @@ -285,14 +281,14 @@ void Tesseract::reject_edge_blobs(WERD_RES *word) { * - A bundle of contextual heuristics! **********************************************************************/ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { - const char *word; - const char *lengths; - int16_t word_len; //its length + const char* word; + const char* lengths; + int16_t word_len; // its length int16_t first_alphanum_index_; int16_t first_alphanum_offset_; int16_t i; int16_t offset; - bool non_conflict_set_char; //non conf set a/n? + bool non_conflict_set_char; // non conf set a/n? bool conflict = false; bool allow_1s; ACCEPTABLE_WERD_TYPE word_type; @@ -300,15 +296,14 @@ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { bool dict_word_ok; int dict_word_type; - word = word_res->best_choice->unichar_string().string (); + word = word_res->best_choice->unichar_string().string(); lengths = word_res->best_choice->unichar_lengths().string(); word_len = strlen(lengths); /* If there are no occurrences of the conflict set characters then the word is OK. */ - if (strpbrk(word, conflict_set_I_l_1.string ()) == nullptr) - return false; + if (strpbrk(word, conflict_set_I_l_1.string()) == nullptr) return false; /* There is a conflict if there are NO other (confirmed) alphanumerics apart @@ -319,11 +314,10 @@ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { (i < word_len) && !non_conflict_set_char; offset += lengths[i++]) non_conflict_set_char = (word_res->uch_set->get_isalpha(word + offset, lengths[i]) || - word_res->uch_set->get_isdigit(word + offset, lengths[i])) && - !STRING (conflict_set_I_l_1).contains (word[offset]); + word_res->uch_set->get_isdigit(word + offset, lengths[i])) && + !STRING(conflict_set_I_l_1).contains(word[offset]); if (!non_conflict_set_char) { - if (update_map) - reject_I_1_L(word_res); + if (update_map) reject_I_1_L(word_res); return true; } @@ -333,31 +327,29 @@ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { is, then there is a potential error otherwise the word is ok. */ - dict_perm_type = (word_res->best_choice->permuter () == SYSTEM_DAWG_PERM) || - (word_res->best_choice->permuter () == USER_DAWG_PERM) || - (rej_trust_doc_dawg && - (word_res->best_choice->permuter () == DOC_DAWG_PERM)) || - (word_res->best_choice->permuter () == FREQ_DAWG_PERM); + dict_perm_type = (word_res->best_choice->permuter() == SYSTEM_DAWG_PERM) || + (word_res->best_choice->permuter() == USER_DAWG_PERM) || + (rej_trust_doc_dawg && + (word_res->best_choice->permuter() == DOC_DAWG_PERM)) || + (word_res->best_choice->permuter() == FREQ_DAWG_PERM); dict_word_type = dict_word(*(word_res->best_choice)); dict_word_ok = (dict_word_type > 0) && - (rej_trust_doc_dawg || (dict_word_type != DOC_DAWG_PERM)); + (rej_trust_doc_dawg || (dict_word_type != DOC_DAWG_PERM)); if ((rej_1Il_use_dict_word && dict_word_ok) || - (rej_1Il_trust_permuter_type && dict_perm_type) || - (dict_perm_type && dict_word_ok)) { - first_alphanum_index_ = first_alphanum_index (word, lengths); - first_alphanum_offset_ = first_alphanum_offset (word, lengths); + (rej_1Il_trust_permuter_type && dict_perm_type) || + (dict_perm_type && dict_word_ok)) { + first_alphanum_index_ = first_alphanum_index(word, lengths); + first_alphanum_offset_ = first_alphanum_offset(word, lengths); if (lengths[first_alphanum_index_] == 1 && word[first_alphanum_offset_] == 'I') { word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; if (safe_dict_word(word_res) > 0) { word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I'; if (update_map) - word_res->reject_map[first_alphanum_index_]. - setrej_1Il_conflict(); + word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict(); return true; - } - else { + } else { word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I'; return false; } @@ -369,11 +361,9 @@ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { if (safe_dict_word(word_res) > 0) { word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; if (update_map) - word_res->reject_map[first_alphanum_index_]. - setrej_1Il_conflict(); + word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict(); return true; - } - else { + } else { word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; return false; } @@ -392,8 +382,8 @@ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { REGARDLESS OF PERMUTER, see if flipping a leading I/l generates a dictionary word. */ - first_alphanum_index_ = first_alphanum_index (word, lengths); - first_alphanum_offset_ = first_alphanum_offset (word, lengths); + first_alphanum_index_ = first_alphanum_index(word, lengths); + first_alphanum_offset_ = first_alphanum_offset(word, lengths); if (lengths[first_alphanum_index_] == 1 && word[first_alphanum_offset_] == 'l') { word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I'; @@ -401,9 +391,8 @@ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { return false; else word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; - } - else if (lengths[first_alphanum_index_] == 1 && - word[first_alphanum_offset_] == 'I') { + } else if (lengths[first_alphanum_index_] == 1 && + word[first_alphanum_offset_] == 'I') { word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l'; if (safe_dict_word(word_res) > 0) return false; @@ -416,18 +405,17 @@ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { reject any non 1 conflict chs Else reject all conflict chs */ - if (word_contains_non_1_digit (word, lengths)) { - allow_1s = (alpha_count (word, lengths) == 0) || - (word_res->best_choice->permuter () == NUMBER_PERM); + if (word_contains_non_1_digit(word, lengths)) { + allow_1s = (alpha_count(word, lengths) == 0) || + (word_res->best_choice->permuter() == NUMBER_PERM); int16_t offset; conflict = false; for (i = 0, offset = 0; word[offset] != '\0'; offset += word_res->best_choice->unichar_lengths()[i++]) { if ((!allow_1s || (word[offset] != '1')) && - STRING (conflict_set_I_l_1).contains (word[offset])) { - if (update_map) - word_res->reject_map[i].setrej_1Il_conflict (); + STRING(conflict_set_I_l_1).contains(word[offset])) { + if (update_map) word_res->reject_map[i].setrej_1Il_conflict(); conflict = true; } } @@ -439,30 +427,24 @@ bool Tesseract::one_ell_conflict(WERD_RES* word_res, bool update_map) { */ word_type = acceptable_word_string(*word_res->uch_set, word, lengths); if ((word_type == AC_LOWER_CASE) || (word_type == AC_INITIAL_CAP)) { - first_alphanum_index_ = first_alphanum_index (word, lengths); - first_alphanum_offset_ = first_alphanum_offset (word, lengths); - if (STRING (conflict_set_I_l_1).contains (word[first_alphanum_offset_])) { + first_alphanum_index_ = first_alphanum_index(word, lengths); + first_alphanum_offset_ = first_alphanum_offset(word, lengths); + if (STRING(conflict_set_I_l_1).contains(word[first_alphanum_offset_])) { if (update_map) - word_res->reject_map[first_alphanum_index_]. - setrej_1Il_conflict (); + word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict(); return true; - } - else + } else return false; - } - else if (word_type == AC_UPPER_CASE) { + } else if (word_type == AC_UPPER_CASE) { return false; - } - else { - if (update_map) - reject_I_1_L(word_res); + } else { + if (update_map) reject_I_1_L(word_res); return true; } } - -int16_t Tesseract::first_alphanum_index(const char *word, - const char *word_lengths) { +int16_t Tesseract::first_alphanum_index(const char* word, + const char* word_lengths) { int16_t i; int16_t offset; @@ -474,8 +456,8 @@ int16_t Tesseract::first_alphanum_index(const char *word, return -1; } -int16_t Tesseract::first_alphanum_offset(const char *word, - const char *word_lengths) { +int16_t Tesseract::first_alphanum_offset(const char* word, + const char* word_lengths) { int16_t i; int16_t offset; @@ -487,27 +469,24 @@ int16_t Tesseract::first_alphanum_offset(const char *word, return -1; } -int16_t Tesseract::alpha_count(const char *word, - const char *word_lengths) { +int16_t Tesseract::alpha_count(const char* word, const char* word_lengths) { int16_t i; int16_t offset; int16_t count = 0; for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) { - if (unicharset.get_isalpha (word + offset, word_lengths[i])) - count++; + if (unicharset.get_isalpha(word + offset, word_lengths[i])) count++; } return count; } - bool Tesseract::word_contains_non_1_digit(const char* word, const char* word_lengths) { int16_t i; int16_t offset; for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) { - if (unicharset.get_isdigit (word + offset, word_lengths[i]) && + if (unicharset.get_isdigit(word + offset, word_lengths[i]) && (word_lengths[i] != 1 || word[offset] != '1')) return true; } @@ -518,12 +497,12 @@ bool Tesseract::word_contains_non_1_digit(const char* word, * dont_allow_1Il() * Don't unreject LONE accepted 1Il conflict set chars *************************************************************************/ -void Tesseract::dont_allow_1Il(WERD_RES *word) { +void Tesseract::dont_allow_1Il(WERD_RES* word) { int i = 0; int offset; int word_len = word->reject_map.length(); - const char *s = word->best_choice->unichar_string().string(); - const char *lengths = word->best_choice->unichar_lengths().string(); + const char* s = word->best_choice->unichar_string().string(); + const char* lengths = word->best_choice->unichar_lengths().string(); bool accepted_1Il = false; for (i = 0, offset = 0; i < word_len; @@ -534,55 +513,50 @@ void Tesseract::dont_allow_1Il(WERD_RES *word) { } else { if (word->uch_set->get_isalpha(s + offset, lengths[i]) || word->uch_set->get_isdigit(s + offset, lengths[i])) - return; // >=1 non 1Il ch accepted + return; // >=1 non 1Il ch accepted } } } - if (!accepted_1Il) - return; //Nothing to worry about + if (!accepted_1Il) return; // Nothing to worry about for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) { if (STRING(conflict_set_I_l_1).contains(s[offset]) && - word->reject_map[i].accepted()) + word->reject_map[i].accepted()) word->reject_map[i].setrej_postNN_1Il(); } } - -int16_t Tesseract::count_alphanums(WERD_RES *word_res) { +int16_t Tesseract::count_alphanums(WERD_RES* word_res) { int count = 0; - const WERD_CHOICE *best_choice = word_res->best_choice; + const WERD_CHOICE* best_choice = word_res->best_choice; for (int i = 0; i < word_res->reject_map.length(); ++i) { if ((word_res->reject_map[i].accepted()) && (word_res->uch_set->get_isalpha(best_choice->unichar_id(i)) || - word_res->uch_set->get_isdigit(best_choice->unichar_id(i)))) { + word_res->uch_set->get_isdigit(best_choice->unichar_id(i)))) { count++; } } return count; } - // reject all if most rejected. -void Tesseract::reject_mostly_rejects(WERD_RES *word) { +void Tesseract::reject_mostly_rejects(WERD_RES* word) { /* Reject the whole of the word if the fraction of rejects exceeds a limit */ - if ((float) word->reject_map.reject_count() / word->reject_map.length() >= - rej_whole_of_mostly_reject_word_fract) + if ((float)word->reject_map.reject_count() / word->reject_map.length() >= + rej_whole_of_mostly_reject_word_fract) word->reject_map.rej_word_mostly_rej(); } - bool Tesseract::repeated_nonalphanum_wd(WERD_RES* word, ROW* row) { int16_t char_quality; int16_t accepted_char_quality; - if (word->best_choice->unichar_lengths().length() <= 1) - return false; + if (word->best_choice->unichar_lengths().length() <= 1) return false; - if (!STRING(ok_repeated_ch_non_alphanum_wds). - contains(word->best_choice->unichar_string()[0])) + if (!STRING(ok_repeated_ch_non_alphanum_wds) + .contains(word->best_choice->unichar_string()[0])) return false; UNICHAR_ID uch_id = word->best_choice->unichar_id(0); @@ -592,15 +566,15 @@ bool Tesseract::repeated_nonalphanum_wd(WERD_RES* word, ROW* row) { word_char_quality(word, row, &char_quality, &accepted_char_quality); - if ((word->best_choice->unichar_lengths().length () == char_quality) && - (char_quality == accepted_char_quality)) + if ((word->best_choice->unichar_lengths().length() == char_quality) && + (char_quality == accepted_char_quality)) return true; else return false; } -int16_t Tesseract::safe_dict_word(const WERD_RES *werd_res) { - const WERD_CHOICE &word = *werd_res->best_choice; +int16_t Tesseract::safe_dict_word(const WERD_RES* werd_res) { + const WERD_CHOICE& word = *werd_res->best_choice; int dict_word_type = werd_res->tesseract->dict_word(word); return dict_word_type == DOC_DAWG_PERM ? 0 : dict_word_type; } @@ -608,16 +582,15 @@ int16_t Tesseract::safe_dict_word(const WERD_RES *werd_res) { // Note: After running this function word_res->ratings // might not contain the right BLOB_CHOICE corresponding to each character // in word_res->best_choice. -void Tesseract::flip_hyphens(WERD_RES *word_res) { - WERD_CHOICE *best_choice = word_res->best_choice; +void Tesseract::flip_hyphens(WERD_RES* word_res) { + WERD_CHOICE* best_choice = word_res->best_choice; int i; int prev_right = -9999; int next_left; TBOX out_box; float aspect_ratio; - if (tessedit_lower_flip_hyphen <= 1) - return; + if (tessedit_lower_flip_hyphen <= 1) return; int num_blobs = word_res->rebuild_word->NumBlobs(); UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-"); @@ -631,7 +604,7 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) { // Don't touch small or touching blobs - it is too dangerous. if ((out_box.width() > 8 * word_res->denorm.x_scale()) && (out_box.left() > prev_right) && (out_box.right() < next_left)) { - aspect_ratio = out_box.width() / (float) out_box.height(); + aspect_ratio = out_box.width() / (float)out_box.height(); if (word_res->uch_set->eq(best_choice->unichar_id(i), ".")) { if (aspect_ratio >= tessedit_upper_flip_hyphen && word_res->uch_set->contains_unichar_id(unichar_dash) && @@ -642,19 +615,18 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) { word_res->reject_map[i].setrej_hyphen_accept(); } if ((aspect_ratio > tessedit_lower_flip_hyphen) && - word_res->reject_map[i].accepted()) - //Suspected HYPHEN - word_res->reject_map[i].setrej_hyphen (); - } - else if (best_choice->unichar_id(i) == unichar_dash) { + word_res->reject_map[i].accepted()) + // Suspected HYPHEN + word_res->reject_map[i].setrej_hyphen(); + } else if (best_choice->unichar_id(i) == unichar_dash) { if ((aspect_ratio >= tessedit_upper_flip_hyphen) && - (word_res->reject_map[i].rejected())) + (word_res->reject_map[i].rejected())) word_res->reject_map[i].setrej_hyphen_accept(); - //Certain HYPHEN + // Certain HYPHEN if ((aspect_ratio <= tessedit_lower_flip_hyphen) && - (word_res->reject_map[i].accepted())) - //Suspected HYPHEN + (word_res->reject_map[i].accepted())) + // Suspected HYPHEN word_res->reject_map[i].setrej_hyphen(); } } @@ -665,13 +637,12 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) { // Note: After running this function word_res->ratings // might not contain the right BLOB_CHOICE corresponding to each character // in word_res->best_choice. -void Tesseract::flip_0O(WERD_RES *word_res) { - WERD_CHOICE *best_choice = word_res->best_choice; +void Tesseract::flip_0O(WERD_RES* word_res) { + WERD_CHOICE* best_choice = word_res->best_choice; int i; TBOX out_box; - if (!tessedit_flip_0O) - return; + if (!tessedit_flip_0O) return; int num_blobs = word_res->rebuild_word->NumBlobs(); for (i = 0; i < best_choice->length() && i < num_blobs; ++i) { @@ -680,8 +651,8 @@ void Tesseract::flip_0O(WERD_RES *word_res) { word_res->uch_set->get_isdigit(best_choice->unichar_id(i))) { out_box = blob->bounding_box(); if ((out_box.top() < kBlnBaselineOffset + kBlnXHeight) || - (out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4)) - return; //Beware words with sub/superscripts + (out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4)) + return; // Beware words with sub/superscripts } } UNICHAR_ID unichar_0 = word_res->uch_set->unichar_to_id("0"); @@ -696,74 +667,74 @@ void Tesseract::flip_0O(WERD_RES *word_res) { if (best_choice->unichar_id(i) == unichar_0 || best_choice->unichar_id(i) == unichar_O) { /* A0A */ - if ((i+1) < best_choice->length() && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i-1)) && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i+1))) { + if ((i + 1) < best_choice->length() && + non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) && + non_O_upper(*word_res->uch_set, best_choice->unichar_id(i + 1))) { best_choice->set_unichar_id(unichar_O, i); } /* A00A */ - if (non_O_upper(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+1) < best_choice->length() && - (best_choice->unichar_id(i+1) == unichar_0 || - best_choice->unichar_id(i+1) == unichar_O) && - (i+2) < best_choice->length() && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i+2))) { + if (non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) && + (i + 1) < best_choice->length() && + (best_choice->unichar_id(i + 1) == unichar_0 || + best_choice->unichar_id(i + 1) == unichar_O) && + (i + 2) < best_choice->length() && + non_O_upper(*word_res->uch_set, best_choice->unichar_id(i + 2))) { best_choice->set_unichar_id(unichar_O, i); i++; } /* AA0 */ if ((i > 1) && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i-2)) && - non_O_upper(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (((i+1) < best_choice->length() && - !word_res->uch_set->get_isdigit(best_choice->unichar_id(i+1)) && - !word_res->uch_set->eq(best_choice->unichar_id(i+1), "l") && - !word_res->uch_set->eq(best_choice->unichar_id(i+1), "I")) || + non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 2)) && + non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) && + (((i + 1) < best_choice->length() && + !word_res->uch_set->get_isdigit(best_choice->unichar_id(i + 1)) && + !word_res->uch_set->eq(best_choice->unichar_id(i + 1), "l") && + !word_res->uch_set->eq(best_choice->unichar_id(i + 1), "I")) || (i == best_choice->length() - 1))) { best_choice->set_unichar_id(unichar_O, i); } /* 9O9 */ - if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+1) < best_choice->length() && - non_0_digit(*word_res->uch_set, best_choice->unichar_id(i+1))) { + if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) && + (i + 1) < best_choice->length() && + non_0_digit(*word_res->uch_set, best_choice->unichar_id(i + 1))) { best_choice->set_unichar_id(unichar_0, i); } /* 9OOO */ - if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+2) < best_choice->length() && - (best_choice->unichar_id(i+1) == unichar_0 || - best_choice->unichar_id(i+1) == unichar_O) && - (best_choice->unichar_id(i+2) == unichar_0 || - best_choice->unichar_id(i+2) == unichar_O)) { + if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) && + (i + 2) < best_choice->length() && + (best_choice->unichar_id(i + 1) == unichar_0 || + best_choice->unichar_id(i + 1) == unichar_O) && + (best_choice->unichar_id(i + 2) == unichar_0 || + best_choice->unichar_id(i + 2) == unichar_O)) { best_choice->set_unichar_id(unichar_0, i); - best_choice->set_unichar_id(unichar_0, i+1); - best_choice->set_unichar_id(unichar_0, i+2); + best_choice->set_unichar_id(unichar_0, i + 1); + best_choice->set_unichar_id(unichar_0, i + 2); i += 2; } /* 9OO */ - if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+2) < best_choice->length() && - (best_choice->unichar_id(i+1) == unichar_0 || - best_choice->unichar_id(i+1) == unichar_O) && - !word_res->uch_set->get_isupper(best_choice->unichar_id(i+2))) { + if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) && + (i + 2) < best_choice->length() && + (best_choice->unichar_id(i + 1) == unichar_0 || + best_choice->unichar_id(i + 1) == unichar_O) && + !word_res->uch_set->get_isupper(best_choice->unichar_id(i + 2))) { best_choice->set_unichar_id(unichar_0, i); - best_choice->set_unichar_id(unichar_0, i+1); + best_choice->set_unichar_id(unichar_0, i + 1); i++; } /* 9O */ - if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i-1)) && - (i+1) < best_choice->length() && - !word_res->uch_set->get_isupper(best_choice->unichar_id(i+1))) { + if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) && + (i + 1) < best_choice->length() && + !word_res->uch_set->get_isupper(best_choice->unichar_id(i + 1))) { best_choice->set_unichar_id(unichar_0, i); } /* 9[.,]OOO.. */ if ((i > 1) && - (word_res->uch_set->eq(best_choice->unichar_id(i-1), ".") || - word_res->uch_set->eq(best_choice->unichar_id(i-1), ",")) && - (word_res->uch_set->get_isdigit(best_choice->unichar_id(i-2)) || - best_choice->unichar_id(i-2) == unichar_O)) { - if (best_choice->unichar_id(i-2) == unichar_O) { - best_choice->set_unichar_id(unichar_0, i-2); + (word_res->uch_set->eq(best_choice->unichar_id(i - 1), ".") || + word_res->uch_set->eq(best_choice->unichar_id(i - 1), ",")) && + (word_res->uch_set->get_isdigit(best_choice->unichar_id(i - 2)) || + best_choice->unichar_id(i - 2) == unichar_O)) { + if (best_choice->unichar_id(i - 2) == unichar_O) { + best_choice->set_unichar_id(unichar_0, i - 2); } while (i < best_choice->length() && (best_choice->unichar_id(i) == unichar_O || diff --git a/src/ccmain/reject.h b/src/ccmain/reject.h index a4ccd49639..5cdb1f77d6 100644 --- a/src/ccmain/reject.h +++ b/src/ccmain/reject.h @@ -17,18 +17,18 @@ * **********************************************************************/ -#ifndef REJECT_H -#define REJECT_H +#ifndef REJECT_H +#define REJECT_H -#include "params.h" -#include "pageres.h" +#include "pageres.h" +#include "params.h" -void reject_blanks(WERD_RES *word); -void reject_poor_matches(WERD_RES *word); +void reject_blanks(WERD_RES* word); +void reject_poor_matches(WERD_RES* word); float compute_reject_threshold(WERD_CHOICE* word); bool word_contains_non_1_digit(const char* word, const char* word_lengths); -void dont_allow_1Il(WERD_RES *word); -void flip_hyphens(WERD_RES *word); -void flip_0O(WERD_RES *word); +void dont_allow_1Il(WERD_RES* word); +void flip_hyphens(WERD_RES* word); +void flip_0O(WERD_RES* word); bool non_0_digit(const char* str, int length); #endif diff --git a/src/ccmain/resultiterator.cpp b/src/ccmain/resultiterator.cpp index 5c502cbdbb..d16e76ca81 100644 --- a/src/ccmain/resultiterator.cpp +++ b/src/ccmain/resultiterator.cpp @@ -30,13 +30,13 @@ namespace tesseract { -ResultIterator::ResultIterator(const LTRResultIterator &resit) +ResultIterator::ResultIterator(const LTRResultIterator& resit) : LTRResultIterator(resit) { in_minor_direction_ = false; at_beginning_of_minor_run_ = false; preserve_interword_spaces_ = false; - BoolParam *p = ParamUtils::FindParam( + BoolParam* p = ParamUtils::FindParam( "preserve_interword_spaces", GlobalParams()->bool_params, tesseract_->params()->bool_params); if (p != nullptr) preserve_interword_spaces_ = (bool)(*p); @@ -45,8 +45,8 @@ ResultIterator::ResultIterator(const LTRResultIterator &resit) MoveToLogicalStartOfTextline(); } -ResultIterator *ResultIterator::StartOfParagraph( - const LTRResultIterator &resit) { +ResultIterator* ResultIterator::StartOfParagraph( + const LTRResultIterator& resit) { return new ResultIterator(resit); } @@ -55,8 +55,7 @@ bool ResultIterator::ParagraphIsLtr() const { } bool ResultIterator::CurrentParagraphIsLtr() const { - if (!it_->word()) - return true; // doesn't matter. + if (!it_->word()) return true; // doesn't matter. LTRResultIterator it(*this); it.RestartParagraph(); // Try to figure out the ltr-ness of the paragraph. The rules below @@ -94,16 +93,14 @@ bool ResultIterator::CurrentParagraphIsLtr() const { num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0; num_ltr += rightmost_ltr ? 1 : 0; } - if (leftmost_rtl) - return false; - if (rightmost_ltr) - return true; + if (leftmost_rtl) return false; + if (rightmost_ltr) return true; // First line is ambiguous. Take statistics on the whole paragraph. if (!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)) do { - StrongScriptDirection dir = it.WordDirection(); - num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0; - num_ltr += (dir == DIR_LEFT_TO_RIGHT) ? 1 : 0; - } while (it.Next(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)); + StrongScriptDirection dir = it.WordDirection(); + num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0; + num_ltr += (dir == DIR_LEFT_TO_RIGHT) ? 1 : 0; + } while (it.Next(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)); return num_ltr >= num_rtl; } @@ -112,14 +109,13 @@ const int ResultIterator::kMinorRunEnd = -2; const int ResultIterator::kComplexWord = -3; void ResultIterator::CalculateBlobOrder( - GenericVector *blob_indices) const { + GenericVector* blob_indices) const { bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_; blob_indices->clear(); if (Empty(RIL_WORD)) return; if (context_is_ltr || it_->word()->UnicharsInReadingOrder()) { // Easy! just return the blobs in order; - for (int i = 0; i < word_length_; i++) - blob_indices->push_back(i); + for (int i = 0; i < word_length_; i++) blob_indices->push_back(i); return; } @@ -152,13 +148,17 @@ void ResultIterator::CalculateBlobOrder( for (int i = 0; i < word_length_; i++) { if (letter_types[i] == U_EURO_NUM_TERM) { int j = i + 1; - while (j < word_length_ && letter_types[j] == U_EURO_NUM_TERM) { j++; } + while (j < word_length_ && letter_types[j] == U_EURO_NUM_TERM) { + j++; + } if (j < word_length_ && letter_types[j] == U_EURO_NUM) { // The sequence [i..j] should be converted to all European Numbers. for (int k = i; k < j; k++) letter_types[k] = U_EURO_NUM; } j = i - 1; - while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) { j--; } + while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) { + j--; + } if (j > -1 && letter_types[j] == U_EURO_NUM) { // The sequence [j..i] should be converted to all European Numbers. for (int k = j; k <= i; k++) letter_types[k] = U_EURO_NUM; @@ -200,7 +200,8 @@ void ResultIterator::CalculateBlobOrder( } else { // left to right sequence. scan to the beginning. int j = i - 1; - for (; j >= 0 && letter_types[j] != U_RTL; j--) { } // pass + for (; j >= 0 && letter_types[j] != U_RTL; j--) { + } // pass // Now (j, i] is LTR for (int k = j + 1; k <= i; k++) blob_indices->push_back(k); i = j; @@ -209,34 +210,42 @@ void ResultIterator::CalculateBlobOrder( ASSERT_HOST(blob_indices->size() == word_length_); } -static void PrintScriptDirs(const GenericVector &dirs) { +static void PrintScriptDirs(const GenericVector& dirs) { for (int i = 0; i < dirs.size(); i++) { switch (dirs[i]) { - case DIR_NEUTRAL: tprintf ("N "); break; - case DIR_LEFT_TO_RIGHT: tprintf("L "); break; - case DIR_RIGHT_TO_LEFT: tprintf("R "); break; - case DIR_MIX: tprintf("Z "); break; - default: tprintf("? "); break; + case DIR_NEUTRAL: + tprintf("N "); + break; + case DIR_LEFT_TO_RIGHT: + tprintf("L "); + break; + case DIR_RIGHT_TO_LEFT: + tprintf("R "); + break; + case DIR_MIX: + tprintf("Z "); + break; + default: + tprintf("? "); + break; } } tprintf("\n"); } void ResultIterator::CalculateTextlineOrder( - bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVectorEqEq *word_indices) const { + bool paragraph_is_ltr, const LTRResultIterator& resit, + GenericVectorEqEq* word_indices) const { GenericVector directions; CalculateTextlineOrder(paragraph_is_ltr, resit, &directions, word_indices); } void ResultIterator::CalculateTextlineOrder( - bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVector *dirs_arg, - GenericVectorEqEq *word_indices) const { + bool paragraph_is_ltr, const LTRResultIterator& resit, + GenericVector* dirs_arg, + GenericVectorEqEq* word_indices) const { GenericVector dirs; - GenericVector *directions; + GenericVector* directions; directions = (dirs_arg != nullptr) ? dirs_arg : &dirs; directions->truncate(0); @@ -254,8 +263,8 @@ void ResultIterator::CalculateTextlineOrder( void ResultIterator::CalculateTextlineOrder( bool paragraph_is_ltr, - const GenericVector &word_dirs, - GenericVectorEqEq *reading_order) { + const GenericVector& word_dirs, + GenericVectorEqEq* reading_order) { reading_order->truncate(0); if (word_dirs.size() == 0) return; @@ -302,11 +311,9 @@ void ResultIterator::CalculateTextlineOrder( for (int i = start; i != end;) { if (word_dirs[i] == minor_direction) { int j = i; - while (j != end && word_dirs[j] != major_direction) - j += major_step; + while (j != end && word_dirs[j] != major_direction) j += major_step; if (j == end) j -= major_step; - while (j != i && word_dirs[j] != minor_direction) - j -= major_step; + while (j != i && word_dirs[j] != minor_direction) j -= major_step; // [j..i] is a minor direction run. reading_order->push_back(kMinorRunStart); for (int k = j; k != i; k -= major_step) { @@ -359,7 +366,7 @@ bool ResultIterator::IsAtFirstSymbolOfWord() const { return blob_order.size() == 0 || blob_order[0] == blob_index_; } -void ResultIterator::AppendSuffixMarks(STRING *text) const { +void ResultIterator::AppendSuffixMarks(STRING* text) const { if (!it_->word()) return; bool reading_direction_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_; @@ -369,8 +376,7 @@ void ResultIterator::AppendSuffixMarks(STRING *text) const { // direction's mark; else if this was a complex word, insert the // current reading order's mark. GenericVectorEqEq textline_order; - CalculateTextlineOrder(current_paragraph_is_ltr_, - *this, &textline_order); + CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &textline_order); int this_word_index = LTRWordIndex(); int i = textline_order.get_index(this_word_index); if (i < 0) return; @@ -398,8 +404,10 @@ void ResultIterator::MoveToLogicalStartOfTextline() { &word_indices); int i = 0; for (; i < word_indices.size() && word_indices[i] < 0; i++) { - if (word_indices[i] == kMinorRunStart) in_minor_direction_ = true; - else if (word_indices[i] == kMinorRunEnd) in_minor_direction_ = false; + if (word_indices[i] == kMinorRunStart) + in_minor_direction_ = true; + else if (word_indices[i] == kMinorRunEnd) + in_minor_direction_ = false; } if (in_minor_direction_) at_beginning_of_minor_run_ = true; if (i >= word_indices.size()) return; @@ -419,7 +427,7 @@ void ResultIterator::Begin() { } bool ResultIterator::Next(PageIteratorLevel level) { - if (it_->block() == nullptr) return false; // already at end! + if (it_->block() == nullptr) return false; // already at end! switch (level) { case RIL_BLOCK: // explicit fall-through case RIL_PARA: // explicit fall-through @@ -433,8 +441,7 @@ bool ResultIterator::Next(PageIteratorLevel level) { in_minor_direction_ = false; MoveToLogicalStartOfTextline(); return it_->block() != nullptr; - case RIL_SYMBOL: - { + case RIL_SYMBOL: { GenericVector blob_order; CalculateBlobOrder(&blob_order); int next_blob = 0; @@ -455,9 +462,7 @@ bool ResultIterator::Next(PageIteratorLevel level) { if (it_->word() == nullptr) return Next(RIL_BLOCK); GenericVectorEqEq word_indices; int this_word_index = LTRWordIndex(); - CalculateTextlineOrder(current_paragraph_is_ltr_, - *this, - &word_indices); + CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &word_indices); int final_real_index = word_indices.size() - 1; while (final_real_index > 0 && word_indices[final_real_index] < 0) final_real_index--; @@ -471,8 +476,8 @@ bool ResultIterator::Next(PageIteratorLevel level) { at_beginning_of_minor_run_ = (word_indices[j - 1] == kMinorRunStart); // awesome, we move to word_indices[j] if (BidiDebug(3)) { - tprintf("Next(RIL_WORD): %d -> %d\n", - this_word_index, word_indices[j]); + tprintf("Next(RIL_WORD): %d -> %d\n", this_word_index, + word_indices[j]); } PageIterator::RestartRow(); for (int k = 0; k < word_indices[j]; k++) { @@ -495,7 +500,7 @@ bool ResultIterator::Next(PageIteratorLevel level) { bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const { if (it_->block() == nullptr) return false; // Already at the end! - if (it_->word() == nullptr) return true; // In an image block. + if (it_->word() == nullptr) return true; // In an image block. if (level == RIL_SYMBOL) return true; // Always at beginning of a symbol. bool at_word_start = IsAtFirstSymbolOfWord(); @@ -511,13 +516,13 @@ bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const { // now we move to the left-most word... line_start.RestartRow(); bool at_block_start = at_textline_start && - line_start.it_->block() != line_start.it_->prev_block(); + line_start.it_->block() != line_start.it_->prev_block(); if (level == RIL_BLOCK) return at_block_start; - bool at_para_start = at_block_start || - (at_textline_start && - line_start.it_->row()->row->para() != - line_start.it_->prev_row()->row->para()); + bool at_para_start = + at_block_start || + (at_textline_start && line_start.it_->row()->row->para() != + line_start.it_->prev_row()->row->para()); if (level == RIL_PARA) return at_para_start; ASSERT_HOST(false); // shouldn't happen. @@ -543,8 +548,7 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, if (next.Empty(element)) return true; // Reached the end of the page. while (element > level) { element = static_cast(element - 1); - if (!next.IsAtBeginningOf(element)) - return false; + if (!next.IsAtBeginningOf(element)) return false; } return true; } @@ -563,38 +567,32 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const { if (it_->word() == nullptr) return nullptr; // Already at the end! STRING text; switch (level) { - case RIL_BLOCK: - { - ResultIterator pp(*this); - do { - pp.AppendUTF8ParagraphText(&text); - } while (pp.Next(RIL_PARA) && pp.it_->block() == it_->block()); - } - break; + case RIL_BLOCK: { + ResultIterator pp(*this); + do { + pp.AppendUTF8ParagraphText(&text); + } while (pp.Next(RIL_PARA) && pp.it_->block() == it_->block()); + } break; case RIL_PARA: AppendUTF8ParagraphText(&text); break; - case RIL_TEXTLINE: - { - ResultIterator it(*this); - it.MoveToLogicalStartOfTextline(); - it.IterateAndAppendUTF8TextlineText(&text); - } - break; + case RIL_TEXTLINE: { + ResultIterator it(*this); + it.MoveToLogicalStartOfTextline(); + it.IterateAndAppendUTF8TextlineText(&text); + } break; case RIL_WORD: AppendUTF8WordText(&text); break; - case RIL_SYMBOL: - { - bool reading_direction_is_ltr = + case RIL_SYMBOL: { + bool reading_direction_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_; - if (at_beginning_of_minor_run_) { - text += reading_direction_is_ltr ? kLRM : kRLM; - } - text = it_->word()->BestUTF8(blob_index_, false); - if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text); + if (at_beginning_of_minor_run_) { + text += reading_direction_is_ltr ? kLRM : kRLM; } - break; + text = it_->word()->BestUTF8(blob_index_, false); + if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text); + } break; } int length = text.length() + 1; char* result = new char[length]; @@ -602,7 +600,7 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const { return result; } -void ResultIterator::AppendUTF8WordText(STRING *text) const { +void ResultIterator::AppendUTF8WordText(STRING* text) const { if (!it_->word()) return; ASSERT_HOST(it_->word()->best_choice != nullptr); bool reading_direction_is_ltr = @@ -619,7 +617,7 @@ void ResultIterator::AppendUTF8WordText(STRING *text) const { AppendSuffixMarks(text); } -void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) { +void ResultIterator::IterateAndAppendUTF8TextlineText(STRING* text) { if (Empty(RIL_WORD)) { Next(RIL_WORD); return; @@ -627,8 +625,8 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) { if (BidiDebug(1)) { GenericVectorEqEq textline_order; GenericVector dirs; - CalculateTextlineOrder(current_paragraph_is_ltr_, - *this, &dirs, &textline_order); + CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &dirs, + &textline_order); tprintf("Strong Script dirs [%p/P=%s]: ", it_->row(), current_paragraph_is_ltr_ ? "ltr" : "rtl"); PrintScriptDirs(dirs); @@ -663,7 +661,7 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) { } } -void ResultIterator::AppendUTF8ParagraphText(STRING *text) const { +void ResultIterator::AppendUTF8ParagraphText(STRING* text) const { ResultIterator it(*this); it.RestartParagraph(); it.MoveToLogicalStartOfTextline(); @@ -675,9 +673,9 @@ void ResultIterator::AppendUTF8ParagraphText(STRING *text) const { bool ResultIterator::BidiDebug(int min_level) const { int debug_level = 1; - IntParam *p = ParamUtils::FindParam( - "bidi_debug", GlobalParams()->int_params, - tesseract_->params()->int_params); + IntParam* p = + ParamUtils::FindParam("bidi_debug", GlobalParams()->int_params, + tesseract_->params()->int_params); if (p != nullptr) debug_level = (int32_t)(*p); return debug_level >= min_level; } diff --git a/src/ccmain/resultiterator.h b/src/ccmain/resultiterator.h index f3d0e98885..49b80e4c9d 100644 --- a/src/ccmain/resultiterator.h +++ b/src/ccmain/resultiterator.h @@ -22,11 +22,13 @@ #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_ #define TESSERACT_CCMAIN_RESULT_ITERATOR_H_ -#include "platform.h" #include "ltrresultiterator.h" +#include "platform.h" -template class GenericVector; -template class GenericVectorEqEq; +template +class GenericVector; +template +class GenericVectorEqEq; class BLOB_CHOICE_IT; class WERD_RES; class STRING; @@ -37,7 +39,7 @@ class Tesseract; class TESS_API ResultIterator : public LTRResultIterator { public: - static ResultIterator *StartOfParagraph(const LTRResultIterator &resit); + static ResultIterator* StartOfParagraph(const LTRResultIterator& resit); /** * ResultIterator is copy constructible! @@ -78,7 +80,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * Implement PageIterator's IsAtFinalElement correctly in a BiDi context. * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we * point at the last word in a paragraph. See PageIterator for full comment. - */ + */ virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const; @@ -91,13 +93,13 @@ class TESS_API ResultIterator : public LTRResultIterator { /** * Returns the null terminated UTF-8 encoded text string for the current * object at the given level. Use delete [] to free after use. - */ + */ virtual char* GetUTF8Text(PageIteratorLevel level) const; /** * Return whether the current paragraph's dominant reading direction * is left-to-right (as opposed to right-to-left). - */ + */ bool ParagraphIsLtr() const; // ============= Exposed only for testing =============. @@ -126,8 +128,8 @@ class TESS_API ResultIterator : public LTRResultIterator { */ static void CalculateTextlineOrder( bool paragraph_is_ltr, - const GenericVector &word_dirs, - GenericVectorEqEq *reading_order); + const GenericVector& word_dirs, + GenericVectorEqEq* reading_order); static const int kMinorRunStart; static const int kMinorRunEnd; @@ -140,7 +142,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * it resets to the beginning of the paragraph instead of staying wherever * resit might have pointed. */ - TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit); + TESS_LOCAL explicit ResultIterator(const LTRResultIterator& resit); private: /** @@ -161,13 +163,13 @@ class TESS_API ResultIterator : public LTRResultIterator { * right-to-left characters and was treated as neutral. */ void CalculateTextlineOrder(bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVectorEqEq *indices) const; + const LTRResultIterator& resit, + GenericVectorEqEq* indices) const; /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */ void CalculateTextlineOrder(bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVector *ssd, - GenericVectorEqEq *indices) const; + const LTRResultIterator& resit, + GenericVector* ssd, + GenericVectorEqEq* indices) const; /** * What is the index of the current word in a strict left-to-right reading @@ -179,7 +181,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * Given an iterator pointing at a word, returns the logical reading order * of blob indices for the word. */ - void CalculateBlobOrder(GenericVector *blob_indices) const; + void CalculateBlobOrder(GenericVector* blob_indices) const; /** Precondition: current_paragraph_is_ltr_ is set. */ void MoveToLogicalStartOfTextline(); @@ -200,10 +202,10 @@ class TESS_API ResultIterator : public LTRResultIterator { * Append any extra marks that should be appended to this word when printed. * Mostly, these are Unicode BiDi control characters. */ - void AppendSuffixMarks(STRING *text) const; + void AppendSuffixMarks(STRING* text) const; /** Appends the current word in reading order to the given buffer.*/ - void AppendUTF8WordText(STRING *text) const; + void AppendUTF8WordText(STRING* text) const; /** * Appends the text of the current text line, *assuming this iterator is @@ -212,7 +214,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * Each textline is terminated in a single newline character. * If the textline ends a paragraph, it gets a second terminal newline. */ - void IterateAndAppendUTF8TextlineText(STRING *text); + void IterateAndAppendUTF8TextlineText(STRING* text); /** * Appends the text of the current paragraph in reading order @@ -220,7 +222,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * Each textline is terminated in a single newline character, and the * paragraph gets an extra newline at the end. */ - void AppendUTF8ParagraphText(STRING *text) const; + void AppendUTF8ParagraphText(STRING* text) const; /** Returns whether the bidi_debug flag is set to at least min_level. */ bool BidiDebug(int min_level) const; diff --git a/src/ccmain/superscript.cpp b/src/ccmain/superscript.cpp index 75e12b6472..9141e09300 100644 --- a/src/ccmain/superscript.cpp +++ b/src/ccmain/superscript.cpp @@ -20,21 +20,19 @@ #include "normalis.h" #include "tesseractclass.h" -static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) { +static int LeadingUnicharsToChopped(WERD_RES* word, int num_unichars) { int num_chopped = 0; - for (int i = 0; i < num_unichars; i++) - num_chopped += word->best_state[i]; + for (int i = 0; i < num_unichars; i++) num_chopped += word->best_state[i]; return num_chopped; } -static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) { +static int TrailingUnicharsToChopped(WERD_RES* word, int num_unichars) { int num_chopped = 0; for (int i = 0; i < num_unichars; i++) num_chopped += word->best_state[word->best_state.size() - 1 - i]; return num_chopped; } - namespace tesseract { /** @@ -43,10 +41,10 @@ namespace tesseract { * or superscript letter based only on y position. Also do this for the * right side. */ -void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, - int super_y_bottom, int sub_y_top, - ScriptPos *leading_pos, int *num_leading_outliers, - ScriptPos *trailing_pos, int *num_trailing_outliers) { +void YOutlierPieces(WERD_RES* word, int rebuilt_blob_index, int super_y_bottom, + int sub_y_top, ScriptPos* leading_pos, + int* num_leading_outliers, ScriptPos* trailing_pos, + int* num_trailing_outliers) { ScriptPos sp_unused1, sp_unused2; int unused1, unused2; if (!leading_pos) leading_pos = &sp_unused1; @@ -98,9 +96,8 @@ void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, * * @return Whether we modified the given word. */ -bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { - if (word->tess_failed || word->word->flag(W_REP_CHAR) || - !word->best_choice) { +bool Tesseract::SubAndSuperscriptFix(WERD_RES* word) { + if (word->tess_failed || word->word->flag(W_REP_CHAR) || !word->best_choice) { return false; } int num_leading, num_trailing; @@ -110,12 +107,11 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { // Calculate the number of whole suspicious characters at the edges. GetSubAndSuperscriptCandidates( - word, &num_leading, &sp_leading, &leading_certainty, - &num_trailing, &sp_trailing, &trailing_certainty, - &avg_certainty, &unlikely_threshold); + word, &num_leading, &sp_leading, &leading_certainty, &num_trailing, + &sp_trailing, &trailing_certainty, &avg_certainty, &unlikely_threshold); - const char *leading_pos = sp_leading == SP_SUBSCRIPT ? "sub" : "super"; - const char *trailing_pos = sp_trailing == SP_SUBSCRIPT ? "sub" : "super"; + const char* leading_pos = sp_leading == SP_SUBSCRIPT ? "sub" : "super"; + const char* trailing_pos = sp_trailing == SP_SUBSCRIPT ? "sub" : "super"; int num_blobs = word->best_choice->length(); @@ -127,15 +123,14 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { if (num_leading + num_trailing < num_blobs && unlikely_threshold < 0.0) { int super_y_bottom = kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom; - int sub_y_top = - kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top; + int sub_y_top = kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top; int last_word_char = num_blobs - 1 - num_trailing; float last_char_certainty = word->best_choice->certainty(last_word_char); if (word->best_choice->unichar_id(last_word_char) != 0 && last_char_certainty <= unlikely_threshold) { ScriptPos rpos; - YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top, - nullptr, nullptr, &rpos, &num_remainder_trailing); + YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top, nullptr, + nullptr, &rpos, &num_remainder_trailing); if (num_trailing > 0 && rpos != sp_trailing) num_remainder_trailing = 0; if (num_remainder_trailing > 0 && last_char_certainty < trailing_certainty) { @@ -143,14 +138,14 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { } } bool another_blob_available = (num_remainder_trailing == 0) || - num_leading + num_trailing + 1 < num_blobs; + num_leading + num_trailing + 1 < num_blobs; int first_char_certainty = word->best_choice->certainty(num_leading); if (another_blob_available && word->best_choice->unichar_id(num_leading) != 0 && first_char_certainty <= unlikely_threshold) { ScriptPos lpos; - YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top, - &lpos, &num_remainder_leading, nullptr, nullptr); + YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top, &lpos, + &num_remainder_leading, nullptr, nullptr); if (num_leading > 0 && lpos != sp_leading) num_remainder_leading = 0; if (num_remainder_leading > 0 && first_char_certainty < leading_certainty) { @@ -160,8 +155,9 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { } // If nothing to do, bail now. - if (num_leading + num_trailing + - num_remainder_leading + num_remainder_trailing == 0) { + if (num_leading + num_trailing + num_remainder_leading + + num_remainder_trailing == + 0) { return false; } @@ -184,8 +180,7 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { if (superscript_debug >= 2) { tprintf(" Certainties -- Average: %.2f Unlikely thresh: %.2f ", avg_certainty, unlikely_threshold); - if (num_leading) - tprintf("Orig. leading (min): %.2f ", leading_certainty); + if (num_leading) tprintf("Orig. leading (min): %.2f ", leading_certainty); if (num_trailing) tprintf("Orig. trailing (min): %.2f ", trailing_certainty); tprintf("\n"); @@ -202,10 +197,10 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { int retry_leading = 0; int retry_trailing = 0; bool is_good = false; - WERD_RES *revised = TrySuperscriptSplits( - num_chopped_leading, leading_certainty, sp_leading, - num_chopped_trailing, trailing_certainty, sp_trailing, - word, &is_good, &retry_leading, &retry_trailing); + WERD_RES* revised = TrySuperscriptSplits( + num_chopped_leading, leading_certainty, sp_leading, num_chopped_trailing, + trailing_certainty, sp_trailing, word, &is_good, &retry_leading, + &retry_trailing); if (is_good) { word->ConsumeWordResults(revised); } else if (retry_leading || retry_trailing) { @@ -213,10 +208,10 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { LeadingUnicharsToChopped(revised, retry_leading); int retry_chopped_trailing = TrailingUnicharsToChopped(revised, retry_trailing); - WERD_RES *revised2 = TrySuperscriptSplits( + WERD_RES* revised2 = TrySuperscriptSplits( retry_chopped_leading, leading_certainty, sp_leading, - retry_chopped_trailing, trailing_certainty, sp_trailing, - revised, &is_good, &retry_leading, &retry_trailing); + retry_chopped_trailing, trailing_certainty, sp_trailing, revised, + &is_good, &retry_leading, &retry_trailing); if (is_good) { word->ConsumeWordResults(revised2); } @@ -250,23 +245,18 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) { * @param[out] unlikely_threshold the threshold (on certainty) we used to * select "bad enough" outlier characters. */ -void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, - int *num_rebuilt_leading, - ScriptPos *leading_pos, - float *leading_certainty, - int *num_rebuilt_trailing, - ScriptPos *trailing_pos, - float *trailing_certainty, - float *avg_certainty, - float *unlikely_threshold) { +void Tesseract::GetSubAndSuperscriptCandidates( + const WERD_RES* word, int* num_rebuilt_leading, ScriptPos* leading_pos, + float* leading_certainty, int* num_rebuilt_trailing, + ScriptPos* trailing_pos, float* trailing_certainty, float* avg_certainty, + float* unlikely_threshold) { *avg_certainty = *unlikely_threshold = 0.0f; *num_rebuilt_leading = *num_rebuilt_trailing = 0; *leading_certainty = *trailing_certainty = 0.0f; int super_y_bottom = kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom; - int sub_y_top = - kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top; + int sub_y_top = kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top; // Step one: Get an average certainty for "normally placed" characters. @@ -319,8 +309,7 @@ void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, *avg_certainty = normal_certainty_total / num_normal; *unlikely_threshold = superscript_worse_certainty * (*avg_certainty); } - if (num_normal == 0 || - (leading_outliers == 0 && trailing_outliers == 0)) { + if (num_normal == 0 || (leading_outliers == 0 && trailing_outliers == 0)) { return; } @@ -328,8 +317,7 @@ void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, // and have much lower certainty than average // Calculate num_leading and leading_certainty. for (*leading_certainty = 0.0f, *num_rebuilt_leading = 0; - *num_rebuilt_leading < leading_outliers; - (*num_rebuilt_leading)++) { + *num_rebuilt_leading < leading_outliers; (*num_rebuilt_leading)++) { float char_certainty = word->best_choice->certainty(*num_rebuilt_leading); if (char_certainty > *unlikely_threshold) { break; @@ -341,8 +329,7 @@ void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, // Calculate num_trailing and trailing_certainty. for (*trailing_certainty = 0.0f, *num_rebuilt_trailing = 0; - *num_rebuilt_trailing < trailing_outliers; - (*num_rebuilt_trailing)++) { + *num_rebuilt_trailing < trailing_outliers; (*num_rebuilt_trailing)++) { int blob_idx = num_blobs - 1 - *num_rebuilt_trailing; float char_certainty = word->best_choice->certainty(blob_idx); if (char_certainty > *unlikely_threshold) { @@ -354,7 +341,6 @@ void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, } } - /** * Try splitting off the given number of (chopped) blobs from the front and * back of the given word and recognizing the pieces. @@ -379,24 +365,22 @@ void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, * and trailing blobs / unichars. * @return A word which is the result of re-recognizing as asked. */ -WERD_RES *Tesseract::TrySuperscriptSplits( +WERD_RES* Tesseract::TrySuperscriptSplits( int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, - int num_chopped_trailing, float trailing_certainty, - ScriptPos trailing_pos, - WERD_RES *word, - bool *is_good, - int *retry_rebuild_leading, int *retry_rebuild_trailing) { + int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, + WERD_RES* word, bool* is_good, int* retry_rebuild_leading, + int* retry_rebuild_trailing) { int num_chopped = word->chopped_word->NumBlobs(); *retry_rebuild_leading = *retry_rebuild_trailing = 0; // Chop apart the word into up to three pieces. - BlamerBundle *bb0 = nullptr; - BlamerBundle *bb1 = nullptr; - WERD_RES *prefix = nullptr; - WERD_RES *core = nullptr; - WERD_RES *suffix = nullptr; + BlamerBundle* bb0 = nullptr; + BlamerBundle* bb1 = nullptr; + WERD_RES* prefix = nullptr; + WERD_RES* core = nullptr; + WERD_RES* suffix = nullptr; if (num_chopped_leading > 0) { prefix = new WERD_RES(*word); split_word(prefix, num_chopped_leading, &core, &bb0); @@ -460,14 +444,16 @@ WERD_RES *Tesseract::TrySuperscriptSplits( // Evaluate whether we think the results are believably better // than what we already had. - bool good_prefix = !prefix || BelievableSuperscript( - superscript_debug >= 1, *prefix, - superscript_bettered_certainty * leading_certainty, - retry_rebuild_leading, nullptr); - bool good_suffix = !suffix || BelievableSuperscript( - superscript_debug >= 1, *suffix, - superscript_bettered_certainty * trailing_certainty, - nullptr, retry_rebuild_trailing); + bool good_prefix = + !prefix || + BelievableSuperscript(superscript_debug >= 1, *prefix, + superscript_bettered_certainty * leading_certainty, + retry_rebuild_leading, nullptr); + bool good_suffix = + !suffix || + BelievableSuperscript(superscript_debug >= 1, *suffix, + superscript_bettered_certainty * trailing_certainty, + nullptr, retry_rebuild_trailing); *is_good = good_prefix && good_suffix; if (!*is_good && !*retry_rebuild_leading && !*retry_rebuild_trailing) { @@ -499,7 +485,6 @@ WERD_RES *Tesseract::TrySuperscriptSplits( return core; } - /** * Return whether this is believable superscript or subscript text. * @@ -518,33 +503,31 @@ WERD_RES *Tesseract::TrySuperscriptSplits( * @param[out] right_ok How many right-side characters were ok? * @return Whether the complete best choice is believable as a superscript. */ -bool Tesseract::BelievableSuperscript(bool debug, - const WERD_RES &word, - float certainty_threshold, - int *left_ok, - int *right_ok) const { +bool Tesseract::BelievableSuperscript(bool debug, const WERD_RES& word, + float certainty_threshold, int* left_ok, + int* right_ok) const { int initial_ok_run_count = 0; int ok_run_count = 0; float worst_certainty = 0.0f; - const WERD_CHOICE &wc = *word.best_choice; + const WERD_CHOICE& wc = *word.best_choice; const UnicityTable& fontinfo_table = get_fontinfo_table(); for (int i = 0; i < wc.length(); i++) { - TBLOB *blob = word.rebuild_word->blobs[i]; + TBLOB* blob = word.rebuild_word->blobs[i]; UNICHAR_ID unichar_id = wc.unichar_id(i); float char_certainty = wc.certainty(i); bool bad_certainty = char_certainty < certainty_threshold; bool is_punc = wc.unicharset()->get_ispunctuation(unichar_id); bool is_italic = word.fontinfo && word.fontinfo->is_italic(); - BLOB_CHOICE *choice = word.GetBlobChoice(i); + BLOB_CHOICE* choice = word.GetBlobChoice(i); if (choice && fontinfo_table.size() > 0) { // Get better information from the specific choice, if available. int font_id1 = choice->fontinfo_id(); - bool font1_is_italic = font_id1 >= 0 - ? fontinfo_table.get(font_id1).is_italic() : false; + bool font1_is_italic = + font_id1 >= 0 ? fontinfo_table.get(font_id1).is_italic() : false; int font_id2 = choice->fontinfo_id2(); is_italic = font1_is_italic && - (font_id2 < 0 || fontinfo_table.get(font_id2).is_italic()); + (font_id2 < 0 || fontinfo_table.get(font_id2).is_italic()); } float height_fraction = 1.0f; @@ -552,9 +535,8 @@ bool Tesseract::BelievableSuperscript(bool debug, float normal_height = char_height; if (wc.unicharset()->top_bottom_useful()) { int min_bot, max_bot, min_top, max_top; - wc.unicharset()->get_top_bottom(unichar_id, - &min_bot, &max_bot, - &min_top, &max_top); + wc.unicharset()->get_top_bottom(unichar_id, &min_bot, &max_bot, &min_top, + &max_top); float hi_height = max_top - max_bot; float lo_height = min_top - min_bot; normal_height = (hi_height + lo_height) / 2; @@ -573,15 +555,18 @@ bool Tesseract::BelievableSuperscript(bool debug, if (is_punc) { tprintf(" Rejecting: punctuation present.\n"); } - const char *char_str = wc.unicharset()->id_to_unichar(unichar_id); + const char* char_str = wc.unicharset()->id_to_unichar(unichar_id); if (bad_certainty) { - tprintf(" Rejecting: don't believe character %s with certainty %.2f " - "which is less than threshold %.2f\n", char_str, - char_certainty, certainty_threshold); + tprintf( + " Rejecting: don't believe character %s with certainty %.2f " + "which is less than threshold %.2f\n", + char_str, char_certainty, certainty_threshold); } if (bad_height) { - tprintf(" Rejecting: character %s seems too small @ %.2f versus " - "expected %.2f\n", char_str, char_height, normal_height); + tprintf( + " Rejecting: character %s seems too small @ %.2f versus " + "expected %.2f\n", + char_str, char_height, normal_height); } } if (bad_certainty || bad_height || is_punc || is_italic) { @@ -607,5 +592,4 @@ bool Tesseract::BelievableSuperscript(bool debug, return all_ok; } - } // namespace tesseract diff --git a/src/ccmain/tessbox.cpp b/src/ccmain/tessbox.cpp index 64b3eb68f2..cb8bf01d44 100644 --- a/src/ccmain/tessbox.cpp +++ b/src/ccmain/tessbox.cpp @@ -17,8 +17,8 @@ * **********************************************************************/ -#include "mfoutline.h" #include "tessbox.h" +#include "mfoutline.h" #include "tesseractclass.h" #define EXTERN @@ -32,7 +32,7 @@ */ namespace tesseract { -void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES *word) { +void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES* word) { int saved_enable_assoc = 0; int saved_chop_enable = 0; @@ -47,8 +47,7 @@ void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES *word) { else set_pass2(); recog_word(word); - if (word->best_choice == nullptr) - word->SetupFake(*word->uch_set); + if (word->best_choice == nullptr) word->SetupFake(*word->uch_set); if (word->word->flag(W_DONT_CHOP)) { wordrec_enable_assoc.set_value(saved_enable_assoc); chop_enable.set_value(saved_chop_enable); @@ -66,13 +65,12 @@ bool Tesseract::tess_acceptable_word(WERD_RES* word) { return getDict().AcceptableResult(word); } - /** * @name tess_add_doc_word * * Add the given word to the document dictionary */ -void Tesseract::tess_add_doc_word(WERD_CHOICE *word_choice) { +void Tesseract::tess_add_doc_word(WERD_CHOICE* word_choice) { getDict().add_document_word(*word_choice); } } // namespace tesseract diff --git a/src/ccmain/tessbox.h b/src/ccmain/tessbox.h index d655a20401..b14b24a955 100644 --- a/src/ccmain/tessbox.h +++ b/src/ccmain/tessbox.h @@ -17,10 +17,10 @@ * **********************************************************************/ -#ifndef TESSBOX_H -#define TESSBOX_H +#ifndef TESSBOX_H +#define TESSBOX_H -#include "ratngs.h" +#include "ratngs.h" #include "tesseractclass.h" // TODO(ocr-team): Delete this along with other empty header files. diff --git a/src/ccmain/tessedit.cpp b/src/ccmain/tessedit.cpp index e07be68233..0d81d11050 100644 --- a/src/ccmain/tessedit.cpp +++ b/src/ccmain/tessedit.cpp @@ -24,40 +24,40 @@ #include "config_auto.h" #endif -#include "stderr.h" -#include "basedir.h" -#include "tessvars.h" -#include "control.h" -#include "reject.h" -#include "pageres.h" -#include "nwmain.h" -#include "pgedit.h" -#include "tprintf.h" -#include "tessedit.h" -#include "stopper.h" -#include "intmatcher.h" +#include "basedir.h" #include "chop.h" -#include "efio.h" +#include "control.h" #include "danerror.h" +#include "efio.h" #include "globals.h" +#include "intmatcher.h" +#include "nwmain.h" +#include "pageres.h" +#include "pgedit.h" +#include "reject.h" +#include "stderr.h" +#include "stopper.h" +#include "tessedit.h" +#include "tessvars.h" +#include "tprintf.h" #ifndef ANDROID_BUILD #include "lstmrecognizer.h" #endif -#include "tesseractclass.h" #include "params.h" +#include "tesseractclass.h" -#define VARDIR "configs/" /*variables files */ - // config under api -#define API_CONFIG "configs/api_config" +#define VARDIR "configs/" /*variables files */ + // config under api +#define API_CONFIG "configs/api_config" -ETEXT_DESC *global_monitor = nullptr; // progress monitor +ETEXT_DESC* global_monitor = nullptr; // progress monitor namespace tesseract { // Read a "config" file containing a set of variable, value pairs. // Searches the standard places: tessdata/configs, tessdata/tessconfigs // and also accepts a relative or absolute path name. -void Tesseract::read_config_file(const char *filename, +void Tesseract::read_config_file(const char* filename, SetParamConstraint constraint) { STRING path = datadir; path += "configs/"; @@ -89,11 +89,11 @@ void Tesseract::read_config_file(const char *filename, // the config files specified on the command line or left as the default // OEM_TESSERACT_ONLY if none of the configs specify this variable. bool Tesseract::init_tesseract_lang_data( - const char *arg0, const char *textbase, const char *language, - OcrEngineMode oem, char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, bool set_only_non_debug_params, - TessdataManager *mgr) { + const char* arg0, const char* textbase, const char* language, + OcrEngineMode oem, char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, bool set_only_non_debug_params, + TessdataManager* mgr) { // Set the basename, compute the data directory. main_setup(arg0, textbase); @@ -107,8 +107,9 @@ bool Tesseract::init_tesseract_lang_data( STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix; if (!mgr->is_loaded() && !mgr->Init(tessdata_path.string())) { tprintf("Error opening data file %s\n", tessdata_path.string()); - tprintf("Please make sure the TESSDATA_PREFIX environment variable is set" - " to your \"tessdata\" directory.\n"); + tprintf( + "Please make sure the TESSDATA_PREFIX environment variable is set" + " to your \"tessdata\" directory.\n"); return false; } if (oem == OEM_DEFAULT) { @@ -130,8 +131,9 @@ bool Tesseract::init_tesseract_lang_data( this->params()); } - SetParamConstraint set_params_constraint = set_only_non_debug_params ? - SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY : SET_PARAM_CONSTRAINT_NONE; + SetParamConstraint set_params_constraint = + set_only_non_debug_params ? SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY + : SET_PARAM_CONSTRAINT_NONE; // Load tesseract variables from config files. This is done after loading // language-specific variables from [lang].traineddata file, so that custom // config files can override values in [lang].traineddata file. @@ -152,8 +154,8 @@ bool Tesseract::init_tesseract_lang_data( } } - if (((STRING &)tessedit_write_params_to_file).length() > 0) { - FILE *params_file = fopen(tessedit_write_params_to_file.string(), "wb"); + if (((STRING&)tessedit_write_params_to_file).length() > 0) { + FILE* params_file = fopen(tessedit_write_params_to_file.string(), "wb"); if (params_file != nullptr) { ParamUtils::PrintParams(params_file, this->params()); fclose(params_file); @@ -219,8 +221,8 @@ bool Tesseract::init_tesseract_lang_data( // Init ParamsModel. // Load pass1 and pass2 weights (for now these two sets are the same, but in // the future separate sets of weights can be generated). - for (int p = ParamsModel::PTRAIN_PASS1; - p < ParamsModel::PTRAIN_NUM_PASSES; ++p) { + for (int p = ParamsModel::PTRAIN_PASS1; p < ParamsModel::PTRAIN_NUM_PASSES; + ++p) { language_model_->getParamsModel().SetPass( static_cast(p)); if (mgr->GetComponent(TESSDATA_PARAMS_MODEL, &fp)) { @@ -237,8 +239,7 @@ bool Tesseract::init_tesseract_lang_data( static bool IsStrInList(const STRING& str, const GenericVector& str_list) { for (int i = 0; i < str_list.size(); ++i) { - if (str_list[i] == str) - return true; + if (str_list[i] == str) return true; } return false; } @@ -255,8 +256,7 @@ void Tesseract::ParseLanguageString(const char* lang_str, while (remains.length() > 0) { // Find the start of the lang code and which vector to add to. const char* start = remains.string(); - while (*start == '+') - ++start; + while (*start == '+') ++start; GenericVector* target = to_load; if (*start == '~') { target = not_to_load; @@ -265,8 +265,7 @@ void Tesseract::ParseLanguageString(const char* lang_str, // Find the index of the end of the lang code in string start. int end = strlen(start); const char* plus = strchr(start, '+'); - if (plus != nullptr && plus - start < end) - end = plus - start; + if (plus != nullptr && plus - start < end) end = plus - start; STRING lang_code(start); lang_code.truncate_at(end); STRING next(start + end); @@ -282,13 +281,13 @@ void Tesseract::ParseLanguageString(const char* lang_str, // string and recursively any additional languages required by any language // traineddata file (via tessedit_load_sublangs in its config) that is loaded. // See init_tesseract_internal for args. -int Tesseract::init_tesseract(const char *arg0, const char *textbase, - const char *language, OcrEngineMode oem, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, +int Tesseract::init_tesseract(const char* arg0, const char* textbase, + const char* language, OcrEngineMode oem, + char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, bool set_only_non_debug_params, - TessdataManager *mgr) { + TessdataManager* mgr) { GenericVector langs_to_load; GenericVector langs_not_to_load; ParseLanguageString(language, &langs_to_load, &langs_not_to_load); @@ -301,8 +300,8 @@ int Tesseract::init_tesseract(const char *arg0, const char *textbase, // Load the rest into sub_langs_. for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) { if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) { - const char *lang_str = langs_to_load[lang_index].string(); - Tesseract *tess_to_init; + const char* lang_str = langs_to_load[lang_index].string(); + Tesseract* tess_to_init; if (!loaded_primary) { tess_to_init = this; } else { @@ -380,13 +379,13 @@ int Tesseract::init_tesseract(const char *arg0, const char *textbase, // in vars_vec. // If set_only_init_params is true, then only the initialization variables // will be set. -int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase, - const char *language, OcrEngineMode oem, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, +int Tesseract::init_tesseract_internal(const char* arg0, const char* textbase, + const char* language, OcrEngineMode oem, + char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, bool set_only_non_debug_params, - TessdataManager *mgr) { + TessdataManager* mgr) { if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params, mgr)) { @@ -400,7 +399,7 @@ int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase, bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY; program_editup(textbase, init_tesseract ? mgr : nullptr, init_tesseract ? mgr : nullptr); - return 0; //Normal exit + return 0; // Normal exit } // Helper builds the all_fonts table by adding new fonts from new_fonts. @@ -444,8 +443,8 @@ void Tesseract::SetupUniversalFontIds() { } // init the LM component -int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase, - const char *language, TessdataManager *mgr) { +int Tesseract::init_tesseract_lm(const char* arg0, const char* textbase, + const char* language, TessdataManager* mgr) { if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY, nullptr, 0, nullptr, nullptr, false, mgr)) return -1; @@ -455,14 +454,11 @@ int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase, return 0; } -void Tesseract::end_tesseract() { - end_recog(); -} +void Tesseract::end_tesseract() { end_recog(); } /* Define command type identifiers */ -enum CMD_EVENTS -{ +enum CMD_EVENTS { ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, diff --git a/src/ccmain/tessedit.h b/src/ccmain/tessedit.h index 5510e19585..aa9f0c8d84 100644 --- a/src/ccmain/tessedit.h +++ b/src/ccmain/tessedit.h @@ -17,13 +17,13 @@ * **********************************************************************/ -#ifndef TESSEDIT_H -#define TESSEDIT_H +#ifndef TESSEDIT_H +#define TESSEDIT_H -#include "blobs.h" -#include "pgedit.h" +#include "blobs.h" +#include "pgedit.h" - //progress monitor -extern ETEXT_DESC *global_monitor; +// progress monitor +extern ETEXT_DESC* global_monitor; #endif diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index 60db38615a..2a16582cef 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -541,18 +541,14 @@ Tesseract::~Tesseract() { #endif } -Dict& Tesseract::getDict() -{ - if (0 == Classify::getDict().NumDawgs() && AnyLSTMLang()) - { - if (lstm_recognizer_ && lstm_recognizer_->GetDict()) - { - return *const_cast(lstm_recognizer_->GetDict()); - } +Dict& Tesseract::getDict() { + if (0 == Classify::getDict().NumDawgs() && AnyLSTMLang()) { + if (lstm_recognizer_ && lstm_recognizer_->GetDict()) { + return *const_cast(lstm_recognizer_->GetDict()); } - return Classify::getDict(); } - + return Classify::getDict(); +} void Tesseract::Clear() { STRING debug_name = imagebasename + "_debug.pdf"; @@ -565,8 +561,7 @@ void Tesseract::Clear() { reskew_ = FCOORD(1.0f, 0.0f); splitter_.Clear(); scaled_factor_ = -1; - for (int i = 0; i < sub_langs_.size(); ++i) - sub_langs_[i]->Clear(); + for (int i = 0; i < sub_langs_.size(); ++i) sub_langs_[i]->Clear(); } void Tesseract::SetEquationDetect(EquationDetect* detector) { @@ -610,11 +605,11 @@ void Tesseract::PrepareForPageseg() { // Find the max splitter strategy over all langs. ShiroRekhaSplitter::SplitStrategy max_pageseg_strategy = static_cast( - static_cast(pageseg_devanagari_split_strategy)); + static_cast(pageseg_devanagari_split_strategy)); for (int i = 0; i < sub_langs_.size(); ++i) { ShiroRekhaSplitter::SplitStrategy pageseg_strategy = - static_cast( - static_cast(sub_langs_[i]->pageseg_devanagari_split_strategy)); + static_cast(static_cast( + sub_langs_[i]->pageseg_devanagari_split_strategy)); if (pageseg_strategy > max_pageseg_strategy) max_pageseg_strategy = pageseg_strategy; pixDestroy(&sub_langs_[i]->pix_binary_); @@ -636,18 +631,17 @@ void Tesseract::PrepareForPageseg() { // Note that this method resets pix_binary_ to the original binarized image, // which may be different from the image actually used for OCR depending on the // value of devanagari_ocr_split_strategy. -void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list, - Tesseract* osd_tess, OSResults* osr) { +void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list, Tesseract* osd_tess, + OSResults* osr) { // Find the max splitter strategy over all langs. ShiroRekhaSplitter::SplitStrategy max_ocr_strategy = static_cast( - static_cast(ocr_devanagari_split_strategy)); + static_cast(ocr_devanagari_split_strategy)); for (int i = 0; i < sub_langs_.size(); ++i) { ShiroRekhaSplitter::SplitStrategy ocr_strategy = static_cast( - static_cast(sub_langs_[i]->ocr_devanagari_split_strategy)); - if (ocr_strategy > max_ocr_strategy) - max_ocr_strategy = ocr_strategy; + static_cast(sub_langs_[i]->ocr_devanagari_split_strategy)); + if (ocr_strategy > max_ocr_strategy) max_ocr_strategy = ocr_strategy; } // Utilize the segmentation information available. splitter_.set_segmentation_block_list(block_list); @@ -664,8 +658,8 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list, if (splitter_.HasDifferentSplitStrategies()) { BLOCK block("", TRUE, 0, 0, 0, 0, pixGetWidth(pix_binary_), pixGetHeight(pix_binary_)); - Pix* pix_for_ocr = split_for_ocr ? splitter_.splitted_image() : - splitter_.orig_pix(); + Pix* pix_for_ocr = + split_for_ocr ? splitter_.splitted_image() : splitter_.orig_pix(); extract_edges(pix_for_ocr, &block); splitter_.RefreshSegmentationWithNewBlobs(block.blob_list()); } diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index c3a14032f0..03cc862569 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -51,7 +51,6 @@ class WERD; class WERD_CHOICE; class WERD_RES; - // Top-level class for all tesseract global instance data. // This class either holds or points to all data used by an instance // of Tesseract, including the memory allocator. When this is @@ -105,18 +104,18 @@ class Tesseract; // A collection of various variables for statistics and debugging. struct TesseractStats { TesseractStats() - : adaption_word_number(0), - doc_blob_quality(0), - doc_outline_errs(0), - doc_char_quality(0), - good_char_count(0), - doc_good_char_quality(0), - word_count(0), - dict_words(0), - tilde_crunch_written(false), - last_char_was_newline(true), - last_char_was_tilde(false), - write_results_empty_block(true) {} + : adaption_word_number(0), + doc_blob_quality(0), + doc_outline_errs(0), + doc_char_quality(0), + good_char_count(0), + doc_good_char_quality(0), + word_count(0), + dict_words(0), + tilde_crunch_written(false), + last_char_was_newline(true), + last_char_was_tilde(false), + write_results_empty_block(true) {} int32_t adaption_word_number; int16_t doc_blob_quality; @@ -124,8 +123,8 @@ struct TesseractStats { int16_t doc_char_quality; int16_t good_char_count; int16_t doc_good_char_quality; - int32_t word_count; // count of word in the document - int32_t dict_words; // number of dicitionary words in the document + int32_t word_count; // count of word in the document + int32_t dict_words; // number of dicitionary words in the document STRING dump_words_str; // accumulator used by dump_words() // Flags used by write_results() bool tilde_crunch_written; @@ -136,12 +135,15 @@ struct TesseractStats { // Struct to hold all the pointers to relevant data for processing a word. struct WordData { - WordData() : word(nullptr), row(nullptr), block(nullptr), prev_word(nullptr) {} + WordData() + : word(nullptr), row(nullptr), block(nullptr), prev_word(nullptr) {} explicit WordData(const PAGE_RES_IT& page_res_it) - : word(page_res_it.word()), row(page_res_it.row()->row), - block(page_res_it.block()->block), prev_word(nullptr) {} + : word(page_res_it.word()), + row(page_res_it.row()->row), + block(page_res_it.block()->block), + prev_word(nullptr) {} WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res) - : word(word_res), row(row_in), block(block_in), prev_word(nullptr) {} + : word(word_res), row(row_in), block(block_in), prev_word(nullptr) {} WERD_RES* word; ROW* row; @@ -152,11 +154,11 @@ struct WordData { // Definition of a Tesseract WordRecognizer. The WordData provides the context // of row/block, in_word holds an initialized, possibly pre-classified word, -// that the recognizer may or may not consume (but if so it sets *in_word=nullptr) -// and produces one or more output words in out_words, which may be the -// consumed in_word, or may be generated independently. -// This api allows both a conventional tesseract classifier to work, or a -// line-level classifier that generates multiple words from a merged input. +// that the recognizer may or may not consume (but if so it sets +// *in_word=nullptr) and produces one or more output words in out_words, which +// may be the consumed in_word, or may be generated independently. This api +// allows both a conventional tesseract classifier to work, or a line-level +// classifier that generates multiple words from a merged input. typedef void (Tesseract::*WordRecognizer)(const WordData& word_data, WERD_RES** in_word, PointerVector* out_words); @@ -166,7 +168,7 @@ class Tesseract : public Wordrec { Tesseract(); ~Tesseract(); - // Return appropriate dictionary + // Return appropriate dictionary Dict& getDict() override; // Clear as much used memory as possible without resetting the adaptive @@ -181,20 +183,14 @@ class Tesseract : public Wordrec { void SetEquationDetect(EquationDetect* detector); // Simple accessors. - const FCOORD& reskew() const { - return reskew_; - } + const FCOORD& reskew() const { return reskew_; } // Destroy any existing pix and return a pointer to the pointer. Pix** mutable_pix_binary() { pixDestroy(&pix_binary_); return &pix_binary_; } - Pix* pix_binary() const { - return pix_binary_; - } - Pix* pix_grey() const { - return pix_grey_; - } + Pix* pix_binary() const { return pix_binary_; } + Pix* pix_grey() const { return pix_grey_; } void set_pix_grey(Pix* grey_pix) { pixDestroy(&pix_grey_); pix_grey_ = grey_pix; @@ -229,44 +225,22 @@ class Tesseract : public Wordrec { pixDestroy(&pix_thresholds_); pix_thresholds_ = thresholds; } - int source_resolution() const { - return source_resolution_; - } - void set_source_resolution(int ppi) { - source_resolution_ = ppi; - } - int ImageWidth() const { - return pixGetWidth(pix_binary_); - } - int ImageHeight() const { - return pixGetHeight(pix_binary_); - } - Pix* scaled_color() const { - return scaled_color_; - } - int scaled_factor() const { - return scaled_factor_; - } + int source_resolution() const { return source_resolution_; } + void set_source_resolution(int ppi) { source_resolution_ = ppi; } + int ImageWidth() const { return pixGetWidth(pix_binary_); } + int ImageHeight() const { return pixGetHeight(pix_binary_); } + Pix* scaled_color() const { return scaled_color_; } + int scaled_factor() const { return scaled_factor_; } void SetScaledColor(int factor, Pix* color) { scaled_factor_ = factor; scaled_color_ = color; } - const Textord& textord() const { - return textord_; - } - Textord* mutable_textord() { - return &textord_; - } + const Textord& textord() const { return textord_; } + Textord* mutable_textord() { return &textord_; } - bool right_to_left() const { - return right_to_left_; - } - int num_sub_langs() const { - return sub_langs_.size(); - } - Tesseract* get_sub_lang(int index) const { - return sub_langs_[index]; - } + bool right_to_left() const { return right_to_left_; } + int num_sub_langs() const { return sub_langs_.size(); } + Tesseract* get_sub_lang(int index) const { return sub_langs_[index]; } // Returns true if any language uses Tesseract (as opposed to LSTM). bool AnyTessLang() const { if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true; @@ -298,8 +272,8 @@ class Tesseract : public Wordrec { // Uses the strategy specified in the global variable // ocr_devanagari_split_strategy for performing splitting while preparing for // Tesseract ocr. - void PrepareForTessOCR(BLOCK_LIST* block_list, - Tesseract* osd_tess, OSResults* osr); + void PrepareForTessOCR(BLOCK_LIST* block_list, Tesseract* osd_tess, + OSResults* osr); int SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr); @@ -320,23 +294,20 @@ class Tesseract : public Wordrec { // serialized DocumentData based on output_basename. void TrainLineRecognizer(const STRING& input_imagename, const STRING& output_basename, - BLOCK_LIST *block_list); + BLOCK_LIST* block_list); // Generates training data for training a line recognizer, eg LSTM. // Breaks the boxes into lines, normalizes them, converts to ImageData and // appends them to the given training_data. void TrainFromBoxes(const GenericVector& boxes, const GenericVector& texts, - BLOCK_LIST *block_list, - DocumentData* training_data); + BLOCK_LIST* block_list, DocumentData* training_data); // Returns an Imagedata containing the image of the given textline, // and ground truth boxes/truth text if available in the input. // The image is not normalized in any way. - ImageData* GetLineData(const TBOX& line_box, - const GenericVector& boxes, - const GenericVector& texts, - int start_box, int end_box, - const BLOCK& block); + ImageData* GetLineData(const TBOX& line_box, const GenericVector& boxes, + const GenericVector& texts, int start_box, + int end_box, const BLOCK& block); // Helper gets the image of a rectangle, using the block.re_rotation() if // needed to get to the image, and rotating the result back to horizontal // layout. (CJK characters will be on their left sides) The vertical text flag @@ -347,7 +318,7 @@ class Tesseract : public Wordrec { TBOX* revised_box) const; // Recognizes a word or group of words, converting to WERD_RES in *words. // Analogous to classify_word_pass1, but can handle a group of words as well. - void LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word, + void LSTMRecognizeWord(const BLOCK& block, ROW* row, WERD_RES* word, PointerVector* words); // Apply segmentation search to the given set of words, within the constraints // of the existing ratings matrix. If there is already a best_choice on a word @@ -358,27 +329,20 @@ class Tesseract : public Wordrec { bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box, const char* word_config, int pass); // Sets up the words ready for whichever engine is to be run - void SetupAllWordsPassN(int pass_n, - const TBOX* target_word_box, - const char* word_config, - PAGE_RES* page_res, + void SetupAllWordsPassN(int pass_n, const TBOX* target_word_box, + const char* word_config, PAGE_RES* page_res, GenericVector* words); // Sets up the single word ready for whichever engine is to be run. void SetupWordPassN(int pass_n, WordData* word); // Runs word recognition on all the words. - bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, - PAGE_RES_IT* pr_it, + bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, PAGE_RES_IT* pr_it, GenericVector* words); - bool recog_all_words(PAGE_RES* page_res, - ETEXT_DESC* monitor, - const TBOX* target_word_box, - const char* word_config, + bool recog_all_words(PAGE_RES* page_res, ETEXT_DESC* monitor, + const TBOX* target_word_box, const char* word_config, int dopasses); - void rejection_passes(PAGE_RES* page_res, - ETEXT_DESC* monitor, - const TBOX* target_word_box, - const char* word_config); - void bigram_correction_pass(PAGE_RES *page_res); + void rejection_passes(PAGE_RES* page_res, ETEXT_DESC* monitor, + const TBOX* target_word_box, const char* word_config); + void bigram_correction_pass(PAGE_RES* page_res); void blamer_pass(PAGE_RES* page_res); // Sets script positions and detects smallcaps on all output words. void script_pos_pass(PAGE_RES* page_res); @@ -429,80 +393,68 @@ class Tesseract : public Wordrec { STRING* best_str, float* c2); void classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it, WordData* word_data); - void classify_word_pass1(const WordData& word_data, - WERD_RES** in_word, + void classify_word_pass1(const WordData& word_data, WERD_RES** in_word, PointerVector* out_words); void recog_pseudo_word(PAGE_RES* page_res, // blocks to check - TBOX &selection_box); + TBOX& selection_box); void fix_rep_char(PAGE_RES_IT* page_res_it); ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET& char_set, - const char *s, - const char *lengths); - void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block); - void classify_word_pass2(const WordData& word_data, - WERD_RES** in_word, + const char* s, + const char* lengths); + void match_word_pass_n(int pass_n, WERD_RES* word, ROW* row, BLOCK* block); + void classify_word_pass2(const WordData& word_data, WERD_RES** in_word, PointerVector* out_words); - void ReportXhtFixResult(bool accept_new_word, float new_x_ht, - WERD_RES* word, WERD_RES* new_word); - bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row); - bool TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row); + void ReportXhtFixResult(bool accept_new_word, float new_x_ht, WERD_RES* word, + WERD_RES* new_word); + bool RunOldFixXht(WERD_RES* word, BLOCK* block, ROW* row); + bool TrainedXheightFix(WERD_RES* word, BLOCK* block, ROW* row); // Runs recognition with the test baseline shift and x-height and returns true // if there was an improvement in recognition result. bool TestNewNormalization(int original_misfits, float baseline_shift, - float new_x_ht, WERD_RES *word, BLOCK* block, - ROW *row); + float new_x_ht, WERD_RES* word, BLOCK* block, + ROW* row); bool recog_interactive(PAGE_RES_IT* pr_it); // Set fonts of this word. - void set_word_fonts(WERD_RES *word); + void set_word_fonts(WERD_RES* word); void font_recognition_pass(PAGE_RES* page_res); void dictionary_correction_pass(PAGE_RES* page_res); bool check_debug_pt(WERD_RES* word, int location); //// superscript.cpp //////////////////////////////////////////////////// - bool SubAndSuperscriptFix(WERD_RES *word_res); - void GetSubAndSuperscriptCandidates(const WERD_RES *word, - int *num_rebuilt_leading, - ScriptPos *leading_pos, - float *leading_certainty, - int *num_rebuilt_trailing, - ScriptPos *trailing_pos, - float *trailing_certainty, - float *avg_certainty, - float *unlikely_threshold); - WERD_RES *TrySuperscriptSplits(int num_chopped_leading, - float leading_certainty, - ScriptPos leading_pos, + bool SubAndSuperscriptFix(WERD_RES* word_res); + void GetSubAndSuperscriptCandidates( + const WERD_RES* word, int* num_rebuilt_leading, ScriptPos* leading_pos, + float* leading_certainty, int* num_rebuilt_trailing, + ScriptPos* trailing_pos, float* trailing_certainty, float* avg_certainty, + float* unlikely_threshold); + WERD_RES* TrySuperscriptSplits(int num_chopped_leading, + float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, - ScriptPos trailing_pos, - WERD_RES *word, - bool *is_good, - int *retry_leading, - int *retry_trailing); - bool BelievableSuperscript(bool debug, - const WERD_RES &word, - float certainty_threshold, - int *left_ok, - int *right_ok) const; + ScriptPos trailing_pos, WERD_RES* word, + bool* is_good, int* retry_leading, + int* retry_trailing); + bool BelievableSuperscript(bool debug, const WERD_RES& word, + float certainty_threshold, int* left_ok, + int* right_ok) const; //// output.h ////////////////////////////////////////////////////////// - void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box); + void output_pass(PAGE_RES_IT& page_res_it, const TBOX* target_word_box); void write_results(PAGE_RES_IT& page_res_it, // full info char newline_type, // type of newline - bool force_eol // override tilde crunch? + bool force_eol // override tilde crunch? ); - void set_unlv_suspects(WERD_RES *word); - UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated? - bool acceptable_number_string(const char* s, - const char* lengths); - int16_t count_alphanums(const WERD_CHOICE &word); - int16_t count_alphas(const WERD_CHOICE &word); + void set_unlv_suspects(WERD_RES* word); + UNICHAR_ID get_rep_char(WERD_RES* word); // what char is repeated? + bool acceptable_number_string(const char* s, const char* lengths); + int16_t count_alphanums(const WERD_CHOICE& word); + int16_t count_alphas(const WERD_CHOICE& word); //// tessedit.h //////////////////////////////////////////////////////// - void read_config_file(const char *filename, SetParamConstraint constraint); + void read_config_file(const char* filename, SetParamConstraint constraint); // Initialize for potentially a set of languages defined by the language // string and recursively any additional languages required by any language // traineddata file (via tessedit_load_sublangs in its config) that is loaded. @@ -512,12 +464,11 @@ class Tesseract : public Wordrec { int configs_size, const GenericVector* vars_vec, const GenericVector* vars_values, bool set_only_init_params, TessdataManager* mgr); - int init_tesseract(const char *datapath, - const char *language, + int init_tesseract(const char* datapath, const char* language, OcrEngineMode oem) { TessdataManager mgr; - return init_tesseract(datapath, nullptr, language, oem, nullptr, 0, nullptr, nullptr, - false, &mgr); + return init_tesseract(datapath, nullptr, language, oem, nullptr, 0, nullptr, + nullptr, false, &mgr); } // Common initialization for a single language. // arg0 is the datapath for the tessdata directory, which could be the @@ -560,24 +511,23 @@ class Tesseract : public Wordrec { bool set_only_init_params, TessdataManager* mgr); - void ParseLanguageString(const char* lang_str, - GenericVector* to_load, + void ParseLanguageString(const char* lang_str, GenericVector* to_load, GenericVector* not_to_load); //// pgedit.h ////////////////////////////////////////////////////////// - SVMenuNode *build_menu_new(); - #ifndef GRAPHICS_DISABLED + SVMenuNode* build_menu_new(); +#ifndef GRAPHICS_DISABLED void pgeditor_main(int width, int height, PAGE_RES* page_res); - #endif // GRAPHICS_DISABLED - void process_image_event( // action in image win - const SVEvent &event); - bool process_cmd_win_event( // UI command semantics - int32_t cmd_event, // which menu item? - char* new_value // any prompt data +#endif // GRAPHICS_DISABLED + void process_image_event( // action in image win + const SVEvent& event); + bool process_cmd_win_event( // UI command semantics + int32_t cmd_event, // which menu item? + char* new_value // any prompt data ); - void debug_word(PAGE_RES* page_res, const TBOX &selection_box); + void debug_word(PAGE_RES* page_res, const TBOX& selection_box); void do_re_display( - bool (tesseract::Tesseract::* word_painter)(PAGE_RES_IT* pr_it)); + bool (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it)); bool word_display(PAGE_RES_IT* pr_it); bool word_bln_display(PAGE_RES_IT* pr_it); bool word_blank_and_set_display(PAGE_RES_IT* pr_its); @@ -588,107 +538,92 @@ class Tesseract : public Wordrec { void blob_feature_display(PAGE_RES* page_res, const TBOX& selection_box); //// reject.h ////////////////////////////////////////////////////////// // make rej map for word - void make_reject_map(WERD_RES *word, ROW *row, int16_t pass); + void make_reject_map(WERD_RES* word, ROW* row, int16_t pass); bool one_ell_conflict(WERD_RES* word_res, bool update_map); - int16_t first_alphanum_index(const char *word, - const char *word_lengths); - int16_t first_alphanum_offset(const char *word, - const char *word_lengths); - int16_t alpha_count(const char *word, - const char *word_lengths); - bool word_contains_non_1_digit(const char* word, - const char* word_lengths); - void dont_allow_1Il(WERD_RES *word); - int16_t count_alphanums( //how many alphanums - WERD_RES *word); - void flip_0O(WERD_RES *word); + int16_t first_alphanum_index(const char* word, const char* word_lengths); + int16_t first_alphanum_offset(const char* word, const char* word_lengths); + int16_t alpha_count(const char* word, const char* word_lengths); + bool word_contains_non_1_digit(const char* word, const char* word_lengths); + void dont_allow_1Il(WERD_RES* word); + int16_t count_alphanums( // how many alphanums + WERD_RES* word); + void flip_0O(WERD_RES* word); bool non_0_digit(const UNICHARSET& ch_set, UNICHAR_ID unichar_id); bool non_O_upper(const UNICHARSET& ch_set, UNICHAR_ID unichar_id); bool repeated_nonalphanum_wd(WERD_RES* word, ROW* row); - void nn_match_word( //Match a word - WERD_RES *word, - ROW *row); - void nn_recover_rejects(WERD_RES *word, ROW *row); - void set_done( //set done flag - WERD_RES *word, - int16_t pass); - int16_t safe_dict_word(const WERD_RES *werd_res); // is best_choice in dict? - void flip_hyphens(WERD_RES *word); - void reject_I_1_L(WERD_RES *word); - void reject_edge_blobs(WERD_RES *word); - void reject_mostly_rejects(WERD_RES *word); + void nn_match_word( // Match a word + WERD_RES* word, ROW* row); + void nn_recover_rejects(WERD_RES* word, ROW* row); + void set_done( // set done flag + WERD_RES* word, int16_t pass); + int16_t safe_dict_word(const WERD_RES* werd_res); // is best_choice in dict? + void flip_hyphens(WERD_RES* word); + void reject_I_1_L(WERD_RES* word); + void reject_edge_blobs(WERD_RES* word); + void reject_mostly_rejects(WERD_RES* word); //// adaptions.h /////////////////////////////////////////////////////// - bool word_adaptable( //should we adapt? - WERD_RES* word, - uint16_t mode); + bool word_adaptable( // should we adapt? + WERD_RES* word, uint16_t mode); //// tfacepp.cpp /////////////////////////////////////////////////////// void recog_word_recursive(WERD_RES* word); - void recog_word(WERD_RES *word); + void recog_word(WERD_RES* word); void split_and_recog_word(WERD_RES* word); - void split_word(WERD_RES *word, - int split_pt, - WERD_RES **right_piece, - BlamerBundle **orig_blamer_bundle) const; - void join_words(WERD_RES *word, - WERD_RES *word2, - BlamerBundle *orig_bb) const; + void split_word(WERD_RES* word, int split_pt, WERD_RES** right_piece, + BlamerBundle** orig_blamer_bundle) const; + void join_words(WERD_RES* word, WERD_RES* word2, BlamerBundle* orig_bb) const; //// fixspace.cpp /////////////////////////////////////////////////////// - bool digit_or_numeric_punct(WERD_RES *word, int char_position); - int16_t eval_word_spacing(WERD_RES_LIST &word_res_list); - void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block); - int16_t fp_eval_word_spacing(WERD_RES_LIST &word_res_list); - void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block); - void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block); - void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block); - void fix_fuzzy_spaces( //find fuzzy words - ETEXT_DESC *monitor, //progress monitor - int32_t word_count, //count of words in doc - PAGE_RES *page_res); - void dump_words(WERD_RES_LIST &perm, int16_t score, - int16_t mode, bool improved); - bool fixspace_thinks_word_done(WERD_RES *word); - int16_t worst_noise_blob(WERD_RES *word_res, float *worst_noise_score); - float blob_noise_score(TBLOB *blob); - void break_noisiest_blob_word(WERD_RES_LIST &words); + bool digit_or_numeric_punct(WERD_RES* word, int char_position); + int16_t eval_word_spacing(WERD_RES_LIST& word_res_list); + void match_current_words(WERD_RES_LIST& words, ROW* row, BLOCK* block); + int16_t fp_eval_word_spacing(WERD_RES_LIST& word_res_list); + void fix_noisy_space_list(WERD_RES_LIST& best_perm, ROW* row, BLOCK* block); + void fix_fuzzy_space_list(WERD_RES_LIST& best_perm, ROW* row, BLOCK* block); + void fix_sp_fp_word(WERD_RES_IT& word_res_it, ROW* row, BLOCK* block); + void fix_fuzzy_spaces( // find fuzzy words + ETEXT_DESC* monitor, // progress monitor + int32_t word_count, // count of words in doc + PAGE_RES* page_res); + void dump_words(WERD_RES_LIST& perm, int16_t score, int16_t mode, + bool improved); + bool fixspace_thinks_word_done(WERD_RES* word); + int16_t worst_noise_blob(WERD_RES* word_res, float* worst_noise_score); + float blob_noise_score(TBLOB* blob); + void break_noisiest_blob_word(WERD_RES_LIST& words); //// docqual.cpp //////////////////////////////////////////////////////// - GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word); - bool potential_word_crunch(WERD_RES* word, - GARBAGE_LEVEL garbage_level, + GARBAGE_LEVEL garbage_word(WERD_RES* word, BOOL8 ok_dict_word); + bool potential_word_crunch(WERD_RES* word, GARBAGE_LEVEL garbage_level, bool ok_dict_word); - void tilde_crunch(PAGE_RES_IT &page_res_it); - void unrej_good_quality_words( //unreject potential - PAGE_RES_IT &page_res_it); - void doc_and_block_rejection( //reject big chunks - PAGE_RES_IT &page_res_it, - bool good_quality_doc); - void quality_based_rejection(PAGE_RES_IT &page_res_it, - bool good_quality_doc); - void convert_bad_unlv_chs(WERD_RES *word_res); - void tilde_delete(PAGE_RES_IT &page_res_it); - int16_t word_blob_quality(WERD_RES *word, ROW *row); - void word_char_quality(WERD_RES *word, ROW *row, int16_t *match_count, - int16_t *accepted_match_count); - void unrej_good_chs(WERD_RES *word, ROW *row); + void tilde_crunch(PAGE_RES_IT& page_res_it); + void unrej_good_quality_words( // unreject potential + PAGE_RES_IT& page_res_it); + void doc_and_block_rejection( // reject big chunks + PAGE_RES_IT& page_res_it, bool good_quality_doc); + void quality_based_rejection(PAGE_RES_IT& page_res_it, bool good_quality_doc); + void convert_bad_unlv_chs(WERD_RES* word_res); + void tilde_delete(PAGE_RES_IT& page_res_it); + int16_t word_blob_quality(WERD_RES* word, ROW* row); + void word_char_quality(WERD_RES* word, ROW* row, int16_t* match_count, + int16_t* accepted_match_count); + void unrej_good_chs(WERD_RES* word, ROW* row); int16_t count_outline_errs(char c, int16_t outline_count); - int16_t word_outline_errs(WERD_RES *word); + int16_t word_outline_errs(WERD_RES* word); bool terrible_word_crunch(WERD_RES* word, GARBAGE_LEVEL garbage_level); - CRUNCH_MODE word_deletable(WERD_RES *word, int16_t &delete_mode); - int16_t failure_count(WERD_RES *word); + CRUNCH_MODE word_deletable(WERD_RES* word, int16_t& delete_mode); + int16_t failure_count(WERD_RES* word); bool noise_outlines(TWERD* word); //// pagewalk.cpp /////////////////////////////////////////////////////// - void - process_selected_words( - PAGE_RES* page_res, // blocks to check - //function to call - TBOX& selection_box, - bool (tesseract::Tesseract::* word_processor)(PAGE_RES_IT* pr_it)); + void process_selected_words( + PAGE_RES* page_res, // blocks to check + // function to call + TBOX& selection_box, + bool (tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it)); //// tessbox.cpp /////////////////////////////////////////////////////// - void tess_add_doc_word( //test acceptability - WERD_CHOICE *word_choice //after context - ); - void tess_segment_pass_n(int pass_n, WERD_RES *word); - bool tess_acceptable_word(WERD_RES *word); + void tess_add_doc_word( // test acceptability + WERD_CHOICE* word_choice // after context + ); + void tess_segment_pass_n(int pass_n, WERD_RES* word); + bool tess_acceptable_word(WERD_RES* word); //// applybox.cpp ////////////////////////////////////////////////////// // Applies the box file based on the image name fname, and resegments @@ -712,21 +647,21 @@ class Tesseract : public Wordrec { // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords // is not required before calling ApplyBoxTraining. PAGE_RES* ApplyBoxes(const STRING& fname, bool find_segmentation, - BLOCK_LIST *block_list); + BLOCK_LIST* block_list); // Any row xheight that is significantly different from the median is set // to the median. - void PreenXHeights(BLOCK_LIST *block_list); + void PreenXHeights(BLOCK_LIST* block_list); // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: // All fuzzy spaces are removed, and all the words are maximally chopped. PAGE_RES* SetupApplyBoxes(const GenericVector& boxes, - BLOCK_LIST *block_list); + BLOCK_LIST* block_list); // Tests the chopper by exhaustively running chop_one_blob. // The word_res will contain filled chopped_word, seam_array, denorm, // box_word and best_state for the maximally chopped word. - void MaximallyChopWord(const GenericVector& boxes, - BLOCK* block, ROW* row, WERD_RES* word_res); + void MaximallyChopWord(const GenericVector& boxes, BLOCK* block, + ROW* row, WERD_RES* word_res); // Gather consecutive blobs that match the given box into the best_state // and corresponding correct_text. // Fights over which box owns which blobs are settled by pre-chopping and @@ -735,7 +670,7 @@ class Tesseract : public Wordrec { // failing to find an appropriate blob for a box. // This means that occasionally, blobs may be incorrectly segmented if the // chopper fails to find a suitable chop point. - bool ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box, + bool ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box, const TBOX& box, const TBOX& next_box, const char* correct_text); // Consume all source blobs that strongly overlap the given box, @@ -744,9 +679,8 @@ class Tesseract : public Wordrec { // applying the blobs to box or next_box with the least non-overlap. // Returns false if the box was in error, which can only be caused by // failing to find an overlapping blob for a box. - bool ResegmentWordBox(BLOCK_LIST *block_list, - const TBOX& box, const TBOX& next_box, - const char* correct_text); + bool ResegmentWordBox(BLOCK_LIST* block_list, const TBOX& box, + const TBOX& next_box, const char* correct_text); // Resegments the words by running the classifier in an attempt to find the // correct segmentation that produces the required string. void ReSegmentByClassification(PAGE_RES* page_res); @@ -771,17 +705,17 @@ class Tesseract : public Wordrec { void SearchForText(const GenericVector* choices, int choices_pos, int choices_length, const GenericVector& target_text, - int text_index, - float rating, GenericVector* segmentation, - float* best_rating, GenericVector* best_segmentation); + int text_index, float rating, + GenericVector* segmentation, float* best_rating, + GenericVector* best_segmentation); // Counts up the labelled words and the blobs within. // Deletes all unused or emptied words, counting the unused ones. // Resets W_BOL and W_EOL flags correctly. // Builds the rebuild_word and rebuilds the box_word. void TidyUp(PAGE_RES* page_res); // Logs a bad box by line in the box file and box coords. - void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch, - const char *err_msg); + void ReportFailedBox(int boxfile_lineno, TBOX box, const char* box_ch, + const char* err_msg); // Creates a fake best_choice entry in each WERD_RES with the correct text. void CorrectClassifyWords(PAGE_RES* page_res); // Call LearnWord to extract features for labelled blobs within each word. @@ -790,18 +724,18 @@ class Tesseract : public Wordrec { //// fixxht.cpp /////////////////////////////////////////////////////// // Returns the number of misfit blob tops in this word. - int CountMisfitTops(WERD_RES *word_res); + int CountMisfitTops(WERD_RES* word_res); // Returns a new x-height in pixels (original image coords) that is // maximally compatible with the result in word_res. // Returns 0.0f if no x-height is found that is better than the current // estimate. - float ComputeCompatibleXheight(WERD_RES *word_res, float* baseline_shift); + float ComputeCompatibleXheight(WERD_RES* word_res, float* baseline_shift); //// Data members /////////////////////////////////////////////////////// // TODO(ocr-team): Find and remove obsolete parameters. BOOL_VAR_H(tessedit_resegment_from_boxes, false, "Take segmentation and labeling from box file"); BOOL_VAR_H(tessedit_resegment_from_line_boxes, false, - "Conversion of word/line box file to char box file"); + "Conversion of word/line box file to char box file"); BOOL_VAR_H(tessedit_train_from_boxes, false, "Generate training data from boxed chars"); BOOL_VAR_H(tessedit_make_boxes_from_boxes, false, @@ -819,8 +753,7 @@ class Tesseract : public Wordrec { " to loading and running the most accurate available."); STRING_VAR_H(tessedit_char_blacklist, "", "Blacklist of chars not to recognize"); - STRING_VAR_H(tessedit_char_whitelist, "", - "Whitelist of chars to recognize"); + STRING_VAR_H(tessedit_char_whitelist, "", "Whitelist of chars to recognize"); STRING_VAR_H(tessedit_char_unblacklist, "", "List of chars to override tessedit_char_blacklist"); BOOL_VAR_H(tessedit_ambigs_training, false, @@ -854,8 +787,7 @@ class Tesseract : public Wordrec { BOOL_VAR_H(tessedit_display_outwords, false, "Draw output words"); BOOL_VAR_H(tessedit_dump_choices, false, "Dump char choices"); BOOL_VAR_H(tessedit_timing_debug, false, "Print timing stats"); - BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true, - "Try to improve fuzzy spaces"); + BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true, "Try to improve fuzzy spaces"); BOOL_VAR_H(tessedit_unrej_any_wd, false, "Don't bother with word plausibility"); BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?"); @@ -868,7 +800,8 @@ class Tesseract : public Wordrec { "Enable correction based on the word bigram dictionary."); BOOL_VAR_H(tessedit_enable_dict_correction, false, "Enable single word correction based on the dictionary."); - INT_VAR_H(tessedit_bigram_debug, 0, "Amount of debug output for bigram " + INT_VAR_H(tessedit_bigram_debug, 0, + "Amount of debug output for bigram " "correction."); BOOL_VAR_H(enable_noise_removal, true, "Remove and conditionally reassign small outlines when they" @@ -946,24 +879,19 @@ class Tesseract : public Wordrec { "Apply row rejection to good docs"); double_VAR_H(tessedit_good_doc_still_rowrej_wd, 1.1, "rej good doc wd if more than this fraction rejected"); - BOOL_VAR_H(tessedit_reject_bad_qual_wds, true, - "Reject all bad quality wds"); + BOOL_VAR_H(tessedit_reject_bad_qual_wds, true, "Reject all bad quality wds"); BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats"); BOOL_VAR_H(tessedit_debug_quality_metrics, false, "Output data to debug file"); BOOL_VAR_H(bland_unrej, false, "unrej potential with no checks"); - double_VAR_H(quality_rowrej_pc, 1.1, - "good_quality_doc gte good char limit"); - BOOL_VAR_H(unlv_tilde_crunching, true, - "Mark v.bad words for tilde crunch"); - BOOL_VAR_H(hocr_font_info, false, - "Add font info to hocr output"); + double_VAR_H(quality_rowrej_pc, 1.1, "good_quality_doc gte good char limit"); + BOOL_VAR_H(unlv_tilde_crunching, true, "Mark v.bad words for tilde crunch"); + BOOL_VAR_H(hocr_font_info, false, "Add font info to hocr output"); BOOL_VAR_H(crunch_early_merge_tess_fails, true, "Before word crunch?"); BOOL_VAR_H(crunch_early_convert_bad_unlv_chs, false, "Take out ~^ early?"); double_VAR_H(crunch_terrible_rating, 80.0, "crunch rating lt this"); BOOL_VAR_H(crunch_terrible_garbage, true, "As it says"); - double_VAR_H(crunch_poor_garbage_cert, -9.0, - "crunch garbage cert lt this"); + double_VAR_H(crunch_poor_garbage_cert, -9.0, "crunch garbage cert lt this"); double_VAR_H(crunch_poor_garbage_rate, 60, "crunch garbage rating lt this"); double_VAR_H(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this"); double_VAR_H(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this"); @@ -973,8 +901,7 @@ class Tesseract : public Wordrec { double_VAR_H(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this"); double_VAR_H(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this"); double_VAR_H(crunch_del_min_width, 3.0, "Del if word width lt xht x this"); - double_VAR_H(crunch_del_high_word, 1.5, - "Del if word gt xht x this above bl"); + double_VAR_H(crunch_del_high_word, 1.5, "Del if word gt xht x this above bl"); double_VAR_H(crunch_del_low_word, 0.5, "Del if word gt xht x this below bl"); double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this"); INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch"); @@ -990,22 +917,22 @@ class Tesseract : public Wordrec { "Don't crunch words with long lower case strings"); INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions"); INT_VAR_H(crunch_debug, 0, "As it says"); - INT_VAR_H(fixsp_non_noise_limit, 1, - "How many non-noise blbs either side?"); + INT_VAR_H(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?"); double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this"); BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctation joins"); INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing"); INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug"); - STRING_VAR_H(numeric_punctuation, ".,", - "Punct. chs expected WITHIN numbers"); + STRING_VAR_H(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers"); INT_VAR_H(x_ht_acceptance_tolerance, 8, "Max allowed deviation of blob top outside of font data"); INT_VAR_H(x_ht_min_change, 8, "Min change in xht before actually trying it"); INT_VAR_H(superscript_debug, 0, "Debug level for sub & superscript fixer"); - double_VAR_H(superscript_worse_certainty, 2.0, "How many times worse " + double_VAR_H(superscript_worse_certainty, 2.0, + "How many times worse " "certainty does a superscript position glyph need to be for us " "to try classifying it as a char with a different baseline?"); - double_VAR_H(superscript_bettered_certainty, 0.97, "What reduction in " + double_VAR_H(superscript_bettered_certainty, 0.97, + "What reduction in " "badness do we think sufficient to choose a superscript over " "what we'd thought. For example, a value of 0.6 means we want " "to reduce badness of certainty by 40%"); @@ -1018,13 +945,12 @@ class Tesseract : public Wordrec { "above the baseline for us to reconsider whether it's a " "subscript."); double_VAR_H(superscript_min_y_bottom, 0.3, - "Minimum bottom of a character measured as a multiple of " - "x-height above the baseline for us to reconsider whether it's " - "a superscript."); + "Minimum bottom of a character measured as a multiple of " + "x-height above the baseline for us to reconsider whether it's " + "a superscript."); BOOL_VAR_H(tessedit_write_block_separators, false, "Write block separators in output"); - BOOL_VAR_H(tessedit_write_rep_codes, false, - "Write repetition char code"); + BOOL_VAR_H(tessedit_write_rep_codes, false, "Write repetition char code"); BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file"); BOOL_VAR_H(tessedit_create_txt, false, "Write .txt output file"); BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file"); @@ -1032,11 +958,9 @@ class Tesseract : public Wordrec { BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file"); BOOL_VAR_H(textonly_pdf, false, "Create PDF with only one invisible text layer"); - STRING_VAR_H(unrecognised_char, "|", - "Output char for unidentified blobs"); + STRING_VAR_H(unrecognised_char, "|", "Output char for unidentified blobs"); INT_VAR_H(suspect_level, 99, "Suspect marker level"); - INT_VAR_H(suspect_space_level, 100, - "Min suspect level for rejecting spaces"); + INT_VAR_H(suspect_space_level, 100, "Min suspect level for rejecting spaces"); INT_VAR_H(suspect_short_words, 2, "Don't Suspect dict wds longer than this"); BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected"); double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit"); @@ -1051,10 +975,8 @@ class Tesseract : public Wordrec { INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm"); BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug"); BOOL_VAR_H(tessedit_flip_0O, true, "Contextual 0O O0 flips"); - double_VAR_H(tessedit_lower_flip_hyphen, 1.5, - "Aspect ratio dot/hyphen test"); - double_VAR_H(tessedit_upper_flip_hyphen, 1.8, - "Aspect ratio dot/hyphen test"); + double_VAR_H(tessedit_lower_flip_hyphen, 1.5, "Aspect ratio dot/hyphen test"); + double_VAR_H(tessedit_upper_flip_hyphen, 1.8, "Aspect ratio dot/hyphen test"); BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector"); BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test"); BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Don't double check"); @@ -1065,8 +987,7 @@ class Tesseract : public Wordrec { BOOL_VAR_H(rej_alphas_in_number_perm, false, "Extend permuter check"); double_VAR_H(rej_whole_of_mostly_reject_word_fract, 0.85, "if >this fract"); INT_VAR_H(tessedit_image_border, 2, "Rej blbs near image edge limit"); - STRING_VAR_H(ok_repeated_ch_non_alphanum_wds, "-?*\075", - "Allow NN to unrej"); + STRING_VAR_H(ok_repeated_ch_non_alphanum_wds, "-?*\075", "Allow NN to unrej"); STRING_VAR_H(conflict_set_I_l_1, "Il1[]", "Il1 conflict set"); INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this"); BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes"); @@ -1107,14 +1028,12 @@ class Tesseract : public Wordrec { "Page separator (default is form feed control character)"); //// ambigsrecog.cpp ///////////////////////////////////////////////////////// - FILE *init_recog_training(const STRING &fname); - void recog_training_segmented(const STRING &fname, - PAGE_RES *page_res, - volatile ETEXT_DESC *monitor, - FILE *output_file); - void ambigs_classify_and_output(const char *label, - PAGE_RES_IT* pr_it, - FILE *output_file); + FILE* init_recog_training(const STRING& fname); + void recog_training_segmented(const STRING& fname, PAGE_RES* page_res, + volatile ETEXT_DESC* monitor, + FILE* output_file); + void ambigs_classify_and_output(const char* label, PAGE_RES_IT* pr_it, + FILE* output_file); private: // The filename of a backup config file. If not null, then we currently diff --git a/src/ccmain/tessvars.cpp b/src/ccmain/tessvars.cpp index d7d7b6cf51..6ef27edda6 100644 --- a/src/ccmain/tessvars.cpp +++ b/src/ccmain/tessvars.cpp @@ -19,6 +19,6 @@ #include -#include "tessvars.h" +#include "tessvars.h" -FILE *debug_fp = stderr; // write debug stuff here +FILE* debug_fp = stderr; // write debug stuff here diff --git a/src/ccmain/tessvars.h b/src/ccmain/tessvars.h index fcb5beb924..5a2c9b5fa5 100644 --- a/src/ccmain/tessvars.h +++ b/src/ccmain/tessvars.h @@ -17,11 +17,10 @@ * **********************************************************************/ -#ifndef TESSVARS_H -#define TESSVARS_H +#ifndef TESSVARS_H +#define TESSVARS_H #include - -extern FILE *debug_fp; // write debug stuff here +extern FILE* debug_fp; // write debug stuff here #endif diff --git a/src/ccmain/tfacepp.cpp b/src/ccmain/tfacepp.cpp index 746bf8531a..2c7fe4e240 100644 --- a/src/ccmain/tfacepp.cpp +++ b/src/ccmain/tfacepp.cpp @@ -28,8 +28,6 @@ #define MAX_UNDIVIDED_LENGTH 24 - - /********************************************************************** * recog_word * @@ -37,9 +35,10 @@ * Convert the output back to editor form. **********************************************************************/ namespace tesseract { -void Tesseract::recog_word(WERD_RES *word) { - if (wordrec_skip_no_truth_words && (word->blamer_bundle == nullptr || - word->blamer_bundle->incorrect_result_reason() == IRR_NO_TRUTH)) { +void Tesseract::recog_word(WERD_RES* word) { + if (wordrec_skip_no_truth_words && + (word->blamer_bundle == nullptr || + word->blamer_bundle->incorrect_result_reason() == IRR_NO_TRUTH)) { if (classify_debug_level) tprintf("No truth for word - skipping\n"); word->tess_failed = true; return; @@ -48,10 +47,11 @@ void Tesseract::recog_word(WERD_RES *word) { recog_word_recursive(word); word->SetupBoxWord(); if (word->best_choice->length() != word->box_word->length()) { - tprintf("recog_word ASSERT FAIL String:\"%s\"; " - "Strlen=%d; #Blobs=%d\n", - word->best_choice->debug_string().string(), - word->best_choice->length(), word->box_word->length()); + tprintf( + "recog_word ASSERT FAIL String:\"%s\"; " + "Strlen=%d; #Blobs=%d\n", + word->best_choice->debug_string().string(), word->best_choice->length(), + word->box_word->length()); } ASSERT_HOST(word->best_choice->length() == word->box_word->length()); // Check that the ratings matrix size matches the sum of all the @@ -64,8 +64,8 @@ void Tesseract::recog_word(WERD_RES *word) { if (tessedit_override_permuter) { /* Override the permuter type if a straight dictionary check disagrees. */ uint8_t perm_type = word->best_choice->permuter(); - if ((perm_type != SYSTEM_DAWG_PERM) && - (perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) { + if ((perm_type != SYSTEM_DAWG_PERM) && (perm_type != FREQ_DAWG_PERM) && + (perm_type != USER_DAWG_PERM)) { uint8_t real_dict_perm_type = dict_word(*word->best_choice); if (((real_dict_perm_type == SYSTEM_DAWG_PERM) || (real_dict_perm_type == FREQ_DAWG_PERM) || @@ -77,8 +77,8 @@ void Tesseract::recog_word(WERD_RES *word) { } if (tessedit_rejection_debug && perm_type != word->best_choice->permuter()) { - tprintf("Permuter Type Flipped from %d to %d\n", - perm_type, word->best_choice->permuter()); + tprintf("Permuter Type Flipped from %d to %d\n", perm_type, + word->best_choice->permuter()); } } // Factored out from control.cpp @@ -94,14 +94,13 @@ void Tesseract::recog_word(WERD_RES *word) { } } - /********************************************************************** * recog_word_recursive * * Convert the word to tess form and pass it to the tess segmenter. * Convert the output back to editor form. **********************************************************************/ -void Tesseract::recog_word_recursive(WERD_RES *word) { +void Tesseract::recog_word_recursive(WERD_RES* word) { int word_length = word->chopped_word->NumBlobs(); // no of blobs if (word_length > MAX_UNDIVIDED_LENGTH) { return split_and_recog_word(word); @@ -112,10 +111,11 @@ void Tesseract::recog_word_recursive(WERD_RES *word) { // Do sanity checks and minor fixes on best_choice. if (word->best_choice->length() > word_length) { word->best_choice->make_bad(); // should never happen - tprintf("recog_word: Discarded long string \"%s\"" - " (%d characters vs %d blobs)\n", - word->best_choice->unichar_string().string(), - word->best_choice->length(), word_length); + tprintf( + "recog_word: Discarded long string \"%s\"" + " (%d characters vs %d blobs)\n", + word->best_choice->unichar_string().string(), + word->best_choice->length(), word_length); tprintf("Word is at:"); word->word->bounding_box().print(); } @@ -128,14 +128,13 @@ void Tesseract::recog_word_recursive(WERD_RES *word) { } } - /********************************************************************** * split_and_recog_word * * Split the word into 2 smaller pieces at the largest gap. * Recognize the pieces and stick the results back together. **********************************************************************/ -void Tesseract::split_and_recog_word(WERD_RES *word) { +void Tesseract::split_and_recog_word(WERD_RES* word) { // Find the biggest blob gap in the chopped_word. int bestgap = -INT32_MAX; int split_index = 0; @@ -150,8 +149,8 @@ void Tesseract::split_and_recog_word(WERD_RES *word) { } ASSERT_HOST(split_index > 0); - WERD_RES *word2 = nullptr; - BlamerBundle *orig_bb = nullptr; + WERD_RES* word2 = nullptr; + BlamerBundle* orig_bb = nullptr; split_word(word, split_index, &word2, &orig_bb); // Recognize the first part of the word. @@ -162,7 +161,6 @@ void Tesseract::split_and_recog_word(WERD_RES *word) { join_words(word, word2, orig_bb); } - /********************************************************************** * split_word * @@ -173,22 +171,20 @@ void Tesseract::split_and_recog_word(WERD_RES *word) { * and will now be owned by the caller. New blamer bundles are forged for the * two pieces. **********************************************************************/ -void Tesseract::split_word(WERD_RES *word, - int split_pt, - WERD_RES **right_piece, - BlamerBundle **orig_blamer_bundle) const { - ASSERT_HOST(split_pt >0 && split_pt < word->chopped_word->NumBlobs()); +void Tesseract::split_word(WERD_RES* word, int split_pt, WERD_RES** right_piece, + BlamerBundle** orig_blamer_bundle) const { + ASSERT_HOST(split_pt > 0 && split_pt < word->chopped_word->NumBlobs()); // Save a copy of the blamer bundle so we can try to reconstruct it below. - BlamerBundle *orig_bb = + BlamerBundle* orig_bb = word->blamer_bundle ? new BlamerBundle(*word->blamer_bundle) : nullptr; - WERD_RES *word2 = new WERD_RES(*word); + WERD_RES* word2 = new WERD_RES(*word); // blow away the copied chopped_word, as we want to work with // the blobs from the input chopped_word so seam_arrays can be merged. - TWERD *chopped = word->chopped_word; - TWERD *chopped2 = new TWERD; + TWERD* chopped = word->chopped_word; + TWERD* chopped2 = new TWERD; chopped2->blobs.reserve(chopped->NumBlobs() - split_pt); for (int i = split_pt; i < chopped->NumBlobs(); ++i) { chopped2->blobs.push_back(chopped->blobs[i]); @@ -198,7 +194,7 @@ void Tesseract::split_word(WERD_RES *word, delete word2->chopped_word; word2->chopped_word = nullptr; - const UNICHARSET &unicharset = *word->uch_set; + const UNICHARSET& unicharset = *word->uch_set; word->ClearResults(); word2->ClearResults(); word->chopped_word = chopped; @@ -214,15 +210,14 @@ void Tesseract::split_word(WERD_RES *word, word2->blamer_bundle = new BlamerBundle(); orig_bb->SplitBundle(chopped->blobs.back()->bounding_box().right(), word2->chopped_word->blobs[0]->bounding_box().left(), - wordrec_debug_blamer, - word->blamer_bundle, word2->blamer_bundle); + wordrec_debug_blamer, word->blamer_bundle, + word2->blamer_bundle); } *right_piece = word2; *orig_blamer_bundle = orig_bb; } - /********************************************************************** * join_words * @@ -231,9 +226,8 @@ void Tesseract::split_word(WERD_RES *word, * onto the right of word and then delete word2. * Also, if orig_bb is provided, stitch it back into word. **********************************************************************/ -void Tesseract::join_words(WERD_RES *word, - WERD_RES *word2, - BlamerBundle *orig_bb) const { +void Tesseract::join_words(WERD_RES* word, WERD_RES* word2, + BlamerBundle* orig_bb) const { TBOX prev_box = word->chopped_word->blobs.back()->bounding_box(); TBOX blob_box = word2->chopped_word->blobs[0]->bounding_box(); // Tack the word2 outputs onto the end of the word outputs. @@ -243,8 +237,9 @@ void Tesseract::join_words(WERD_RES *word, word2->rebuild_word->blobs.clear(); TPOINT split_pt; split_pt.x = (prev_box.right() + blob_box.left()) / 2; - split_pt.y = (prev_box.top() + prev_box.bottom() + - blob_box.top() + blob_box.bottom()) / 4; + split_pt.y = (prev_box.top() + prev_box.bottom() + blob_box.top() + + blob_box.bottom()) / + 4; // Move the word2 seams onto the end of the word1 seam_array. // Since the seam list is one element short, an empty seam marking the // end of the last blob in the first word is needed first. @@ -281,16 +276,15 @@ void Tesseract::join_words(WERD_RES *word, // finished with them. int bc2_index = 1; for (bc2_it.forward(); !bc2_it.at_first(); bc2_it.forward(), ++bc2_index) { - if (total_joined_choices >= kTooManyAltChoices && - bc2_index > kAltsPerPiece) + if (total_joined_choices >= kTooManyAltChoices && bc2_index > kAltsPerPiece) break; int bc1_index = 0; for (bc1_it.move_to_first(); bc1_index < num_word1_choices; - ++bc1_index, bc1_it.forward()) { + ++bc1_index, bc1_it.forward()) { if (total_joined_choices >= kTooManyAltChoices && bc1_index > kAltsPerPiece) break; - WERD_CHOICE *wc = new WERD_CHOICE(*bc1_it.data()); + WERD_CHOICE* wc = new WERD_CHOICE(*bc1_it.data()); *wc += *bc2_it.data(); jc_it.add_after_then_move(wc); ++total_joined_choices; @@ -319,5 +313,4 @@ void Tesseract::join_words(WERD_RES *word, delete word2; } - } // namespace tesseract diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index 5a193c32a9..56696c6c51 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -30,26 +30,24 @@ namespace tesseract { ImageThresholder::ImageThresholder() - : pix_(nullptr), - image_width_(0), image_height_(0), - pix_channels_(0), pix_wpl_(0), - scale_(1), yres_(300), estimated_res_(300) { + : pix_(nullptr), + image_width_(0), + image_height_(0), + pix_channels_(0), + pix_wpl_(0), + scale_(1), + yres_(300), + estimated_res_(300) { SetRectangle(0, 0, 0, 0); } -ImageThresholder::~ImageThresholder() { - Clear(); -} +ImageThresholder::~ImageThresholder() { Clear(); } // Destroy the Pix if there is one, freeing memory. -void ImageThresholder::Clear() { - pixDestroy(&pix_); -} +void ImageThresholder::Clear() { pixDestroy(&pix_); } // Return true if no image has been set. -bool ImageThresholder::IsEmpty() const { - return pix_ == nullptr; -} +bool ImageThresholder::IsEmpty() const { return pix_ == nullptr; } // SetImage makes a copy of all the image data, so it may be deleted // immediately after this call. @@ -59,57 +57,59 @@ bool ImageThresholder::IsEmpty() const { // Binary images of 1 bit per pixel may also be given but they must be // byte packed with the MSB of the first byte being the first pixel, and a // one pixel is WHITE. For binary images set bytes_per_pixel=0. -void ImageThresholder::SetImage(const unsigned char* imagedata, - int width, int height, - int bytes_per_pixel, int bytes_per_line) { +void ImageThresholder::SetImage(const unsigned char* imagedata, int width, + int height, int bytes_per_pixel, + int bytes_per_line) { int bpp = bytes_per_pixel * 8; if (bpp == 0) bpp = 1; Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); l_uint32* data = pixGetData(pix); int wpl = pixGetWpl(pix); switch (bpp) { - case 1: - for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { - for (int x = 0; x < width; ++x) { - if (imagedata[x / 8] & (0x80 >> (x % 8))) - CLEAR_DATA_BIT(data, x); - else - SET_DATA_BIT(data, x); + case 1: + for (int y = 0; y < height; + ++y, data += wpl, imagedata += bytes_per_line) { + for (int x = 0; x < width; ++x) { + if (imagedata[x / 8] & (0x80 >> (x % 8))) + CLEAR_DATA_BIT(data, x); + else + SET_DATA_BIT(data, x); + } } - } - break; + break; - case 8: - // Greyscale just copies the bytes in the right order. - for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { - for (int x = 0; x < width; ++x) - SET_DATA_BYTE(data, x, imagedata[x]); - } - break; + case 8: + // Greyscale just copies the bytes in the right order. + for (int y = 0; y < height; + ++y, data += wpl, imagedata += bytes_per_line) { + for (int x = 0; x < width; ++x) SET_DATA_BYTE(data, x, imagedata[x]); + } + break; - case 24: - // Put the colors in the correct places in the line buffer. - for (int y = 0; y < height; ++y, imagedata += bytes_per_line) { - for (int x = 0; x < width; ++x, ++data) { - SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]); - SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]); - SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]); + case 24: + // Put the colors in the correct places in the line buffer. + for (int y = 0; y < height; ++y, imagedata += bytes_per_line) { + for (int x = 0; x < width; ++x, ++data) { + SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]); + SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]); + SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]); + } } - } - break; + break; - case 32: - // Maintain byte order consistency across different endianness. - for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) { - for (int x = 0; x < width; ++x) { - data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) | - (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]; + case 32: + // Maintain byte order consistency across different endianness. + for (int y = 0; y < height; + ++y, imagedata += bytes_per_line, data += wpl) { + for (int x = 0; x < width; ++x) { + data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) | + (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]; + } } - } - break; + break; - default: - tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp); + default: + tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp); } pixSetYRes(pix, 300); SetImage(pix); @@ -129,9 +129,9 @@ void ImageThresholder::SetRectangle(int left, int top, int width, int height) { // original image (not just within the rectangle). // Left and top are enough with top-down coordinates, but // the height of the rectangle and the image are needed for bottom-up. -void ImageThresholder::GetImageSizes(int* left, int* top, - int* width, int* height, - int* imagewidth, int* imageheight) { +void ImageThresholder::GetImageSizes(int* left, int* top, int* width, + int* height, int* imagewidth, + int* imageheight) { *left = rect_left_; *top = rect_top_; *width = rect_width_; @@ -146,8 +146,7 @@ void ImageThresholder::GetImageSizes(int* left, int* top, // immediately after, but may not go away until after the Thresholder has // finished with it. void ImageThresholder::SetImage(const Pix* pix) { - if (pix_ != nullptr) - pixDestroy(&pix_); + if (pix_ != nullptr) pixDestroy(&pix_); Pix* src = const_cast(pix); int depth; pixGetDimensions(src, &image_width_, &image_height_, &depth); @@ -216,8 +215,8 @@ Pix* ImageThresholder::GetPixRectThresholds() { Pix* pix_thresholds = pixCreate(width, height, 8); int threshold = thresholds[0] > 0 ? thresholds[0] : 128; pixSetAllArbitrary(pix_thresholds, threshold); - delete [] thresholds; - delete [] hi_values; + delete[] thresholds; + delete[] hi_values; return pix_thresholds; } @@ -252,8 +251,8 @@ Pix* ImageThresholder::GetPixRectGrey() { Pix* pix = GetPixRect(); // May have to be reduced to grey. int depth = pixGetDepth(pix); if (depth != 8) { - Pix* result = depth < 8 ? pixConvertTo8(pix, false) - : pixConvertRGBToLuminance(pix); + Pix* result = + depth < 8 ? pixConvertTo8(pix, false) : pixConvertRGBToLuminance(pix); pixDestroy(&pix); return result; } @@ -272,8 +271,8 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix, // only use opencl if compiled w/ OpenCL and selected device is opencl #ifdef USE_OPENCL OpenclDevice od; - if ((num_channels == 4 || num_channels == 1) && - od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) { + if ((num_channels == 4 || num_channels == 1) && od.selectedDeviceIsOpenCL() && + rect_top_ == 0 && rect_left_ == 0) { od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, thresholds, hi_values, out_pix /*pix_OCL*/, rect_height_, rect_width_, @@ -284,8 +283,8 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix, #ifdef USE_OPENCL } #endif - delete [] thresholds; - delete [] hi_values; + delete[] thresholds; + delete[] hi_values; PERF_COUNT_END } @@ -294,8 +293,7 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix, /// from the class, using thresholds/hi_values to the output pix. /// NOTE that num_channels is the size of the thresholds and hi_values // arrays and also the bytes per pixel in src_pix. -void ImageThresholder::ThresholdRectToPix(Pix* src_pix, - int num_channels, +void ImageThresholder::ThresholdRectToPix(Pix* src_pix, int num_channels, const int* thresholds, const int* hi_values, Pix** pix) const { diff --git a/src/ccmain/thresholder.h b/src/ccmain/thresholder.h index 30b1d37cc6..0642cd22fa 100644 --- a/src/ccmain/thresholder.h +++ b/src/ccmain/thresholder.h @@ -67,18 +67,12 @@ class TESS_API ImageThresholder { int* imagewidth, int* imageheight); /// Return true if the source image is color. - bool IsColor() const { - return pix_channels_ >= 3; - } + bool IsColor() const { return pix_channels_ >= 3; } /// Returns true if the source image is binary. - bool IsBinary() const { - return pix_channels_ == 0; - } + bool IsBinary() const { return pix_channels_ == 0; } - int GetScaleFactor() const { - return scale_; - } + int GetScaleFactor() const { return scale_; } // Set the resolution of the source image in pixels per inch. // This should be called right after SetImage(), and will let us return @@ -87,25 +81,17 @@ class TESS_API ImageThresholder { yres_ = ppi; estimated_res_ = ppi; } - int GetSourceYResolution() const { - return yres_; - } - int GetScaledYResolution() const { - return scale_ * yres_; - } + int GetSourceYResolution() const { return yres_; } + int GetScaledYResolution() const { return scale_ * yres_; } // Set the resolution of the source image in pixels per inch, as estimated // by the thresholder from the text size found during thresholding. // This value will be used to set internal size thresholds during recognition // and will not influence the output "point size." The default value is // the same as the source resolution. (yres_) - void SetEstimatedResolution(int ppi) { - estimated_res_ = ppi; - } + void SetEstimatedResolution(int ppi) { estimated_res_ = ppi; } // Returns the estimated resolution, including any active scaling. // This value will be used to set internal size thresholds during recognition. - int GetScaledEstimatedResolution() const { - return scale_ * estimated_res_; - } + int GetScaledEstimatedResolution() const { return scale_ * estimated_res_; } /// Pix vs raw, which to use? Pix is the preferred input for efficiency, /// since raw buffers are copied. @@ -151,8 +137,8 @@ class TESS_API ImageThresholder { /// Return true if we are processing the full image. bool IsFullImage() const { - return rect_left_ == 0 && rect_top_ == 0 && - rect_width_ == image_width_ && rect_height_ == image_height_; + return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ && + rect_height_ == image_height_; } // Otsu thresholds the rectangle, taking the rectangle from *this. @@ -162,27 +148,26 @@ class TESS_API ImageThresholder { /// from the class, using thresholds/hi_values to the output pix. /// NOTE that num_channels is the size of the thresholds and hi_values // arrays and also the bytes per pixel in src_pix. - void ThresholdRectToPix(Pix* src_pix, int num_channels, - const int* thresholds, const int* hi_values, - Pix** pix) const; + void ThresholdRectToPix(Pix* src_pix, int num_channels, const int* thresholds, + const int* hi_values, Pix** pix) const; protected: /// Clone or other copy of the source Pix. /// The pix will always be PixDestroy()ed on destruction of the class. - Pix* pix_; + Pix* pix_; - int image_width_; //< Width of source pix_. - int image_height_; //< Height of source pix_. - int pix_channels_; //< Number of 8-bit channels in pix_. - int pix_wpl_; //< Words per line of pix_. + int image_width_; //< Width of source pix_. + int image_height_; //< Height of source pix_. + int pix_channels_; //< Number of 8-bit channels in pix_. + int pix_wpl_; //< Words per line of pix_. // Limits of image rectangle to be processed. - int scale_; //< Scale factor from original image. - int yres_; //< y pixels/inch in source image. - int estimated_res_; //< Resolution estimate from text size. - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; + int scale_; //< Scale factor from original image. + int yres_; //< y pixels/inch in source image. + int estimated_res_; //< Resolution estimate from text size. + int rect_left_; + int rect_top_; + int rect_width_; + int rect_height_; }; } // namespace tesseract. diff --git a/src/ccmain/werdit.cpp b/src/ccmain/werdit.cpp index 274ec0b429..8c30239ce4 100644 --- a/src/ccmain/werdit.cpp +++ b/src/ccmain/werdit.cpp @@ -38,8 +38,7 @@ PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box) { WERD* word = word_res->word; if (word->bounding_box().overlap(selection_box)) { C_BLOB_IT blob_it(word->cblob_list()); - for (blob_it.mark_cycle_pt(); - !blob_it.cycled_list(); blob_it.forward()) { + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* blob = blob_it.data(); if (blob->bounding_box().overlap(selection_box)) { new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob)); diff --git a/src/ccmain/werdit.h b/src/ccmain/werdit.h index 4366dbb08c..bc3b9d42aa 100644 --- a/src/ccmain/werdit.h +++ b/src/ccmain/werdit.h @@ -17,10 +17,10 @@ * **********************************************************************/ -#ifndef WERDIT_H -#define WERDIT_H +#ifndef WERDIT_H +#define WERDIT_H -#include "pageres.h" +#include "pageres.h" PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box); diff --git a/src/ccstruct/blamer.cpp b/src/ccstruct/blamer.cpp index a84e87f20d..ddda5fb17d 100644 --- a/src/ccstruct/blamer.cpp +++ b/src/ccstruct/blamer.cpp @@ -38,26 +38,19 @@ const char kBlameNoTruthSplit[] = "no_tr_spl"; const char kBlameNoTruth[] = "no_tr"; const char kBlameUnknown[] = "unkn"; -const char * const kIncorrectResultReasonNames[] = { - kBlameCorrect, - kBlameClassifier, - kBlameChopper, - kBlameClassLMTradeoff, - kBlamePageLayout, - kBlameSegsearchHeur, - kBlameSegsearchPP, - kBlameClassOldLMTradeoff, - kBlameAdaption, - kBlameNoTruthSplit, - kBlameNoTruth, - kBlameUnknown -}; +const char* const kIncorrectResultReasonNames[] = { + kBlameCorrect, kBlameClassifier, + kBlameChopper, kBlameClassLMTradeoff, + kBlamePageLayout, kBlameSegsearchHeur, + kBlameSegsearchPP, kBlameClassOldLMTradeoff, + kBlameAdaption, kBlameNoTruthSplit, + kBlameNoTruth, kBlameUnknown}; -const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) { +const char* BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) { return kIncorrectResultReasonNames[irr]; } -const char *BlamerBundle::IncorrectReason() const { +const char* BlamerBundle::IncorrectReason() const { return kIncorrectResultReasonNames[incorrect_result_reason_]; } @@ -120,9 +113,8 @@ bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE* word_choice) const { return truth_str == normed_choice_str; } -void BlamerBundle::FillDebugString(const STRING &msg, - const WERD_CHOICE *choice, - STRING *debug) { +void BlamerBundle::FillDebugString(const STRING& msg, const WERD_CHOICE* choice, + STRING* debug) { (*debug) += "Truth "; for (int i = 0; i < this->truth_text_.length(); ++i) { (*debug) += this->truth_text_[i]; @@ -150,15 +142,15 @@ void BlamerBundle::SetupNormTruthWord(const DENORM& denorm) { TPOINT norm_topleft; TPOINT norm_botright; for (int b = 0; b < truth_word_.length(); ++b) { - const TBOX &box = truth_word_.BlobBox(b); + const TBOX& box = truth_word_.BlobBox(b); topleft.x = box.left(); topleft.y = box.top(); botright.x = box.right(); botright.y = box.bottom(); denorm.NormTransform(nullptr, topleft, &norm_topleft); denorm.NormTransform(nullptr, botright, &norm_botright); - TBOX norm_box(norm_topleft.x, norm_botright.y, - norm_botright.x, norm_topleft.y); + TBOX norm_box(norm_topleft.x, norm_botright.y, norm_botright.x, + norm_topleft.y); norm_truth_word_.InsertBox(b, norm_box); } } @@ -173,8 +165,7 @@ void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, // Find truth boxes that correspond to the split in the blobs. int b; int begin2_truth_index = -1; - if (incorrect_result_reason_ != IRR_NO_TRUTH && - truth_has_char_boxes_) { + if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) { debug_str = "Looking for truth split at"; debug_str.add_str_int(" end1_x ", word1_right); debug_str.add_str_int(" begin2_x ", word2_left); @@ -184,9 +175,9 @@ void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, for (b = 1; b < norm_truth_word_.length(); ++b) { norm_truth_word_.BlobBox(b).print_to_str(&debug_str); if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < - norm_box_tolerance_) && + norm_box_tolerance_) && (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < - norm_box_tolerance_)) { + norm_box_tolerance_)) { begin2_truth_index = b; debug_str += "Split found"; break; @@ -202,7 +193,7 @@ void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, bundle1->norm_box_tolerance_ = norm_box_tolerance_; bundle2->truth_has_char_boxes_ = true; bundle2->norm_box_tolerance_ = norm_box_tolerance_; - BlamerBundle *curr_bb = bundle1; + BlamerBundle* curr_bb = bundle1; for (b = 0; b < norm_truth_word_.length(); ++b) { if (b == begin2_truth_index) curr_bb = bundle2; curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b)); @@ -214,8 +205,7 @@ void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, bundle2->incorrect_result_reason_ = IRR_NO_TRUTH; } else { debug_str += "Truth split not found"; - debug_str += truth_has_char_boxes_ ? - "\n" : " (no truth char boxes)\n"; + debug_str += truth_has_char_boxes_ ? "\n" : " (no truth char boxes)\n"; bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug); bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug); } @@ -258,28 +248,27 @@ void BlamerBundle::BlameClassifier(const UNICHARSET& unicharset, const TBOX& blob_box, const BLOB_CHOICE_LIST& choices, bool debug) { - if (!truth_has_char_boxes_ || - incorrect_result_reason_ != IRR_CORRECT) + if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) return; // Nothing to do here. for (int b = 0; b < norm_truth_word_.length(); ++b) { - const TBOX &truth_box = norm_truth_word_.BlobBox(b); + const TBOX& truth_box = norm_truth_word_.BlobBox(b); // Note that we are more strict on the bounding box boundaries here // than in other places (chopper, segmentation search), since we do // not have the ability to check the previous and next bounding box. - if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) { + if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) { bool found = false; bool incorrect_adapted = false; UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID; - const char *truth_str = truth_text_[b].string(); + const char* truth_str = truth_text_[b].string(); // We promise not to modify the list or its contents, using a // const BLOB_CHOICE* below. BLOB_CHOICE_IT choices_it(const_cast(&choices)); for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); - choices_it.forward()) { + choices_it.forward()) { const BLOB_CHOICE* choice = choices_it.data(); - if (strcmp(truth_str, unicharset.get_normed_unichar( - choice->unichar_id())) == 0) { + if (strcmp(truth_str, + unicharset.get_normed_unichar(choice->unichar_id())) == 0) { found = true; break; } else if (choice->IsAdapted()) { @@ -320,7 +309,7 @@ void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) { int16_t truth_x = -1; while (box_index < truth_word_.length() && blob_index < num_blobs) { truth_x = norm_truth_word_.BlobBox(box_index).right(); - TBLOB * curr_blob = word->chopped_word->blobs[blob_index]; + TBLOB* curr_blob = word->chopped_word->blobs[blob_index]; if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) { ++blob_index; continue; // encountered an extra chop, keep looking @@ -338,7 +327,7 @@ void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) { debug_str.add_str_int("Detected missing chop (tolerance=", norm_box_tolerance_); debug_str += ") at Bounding Box="; - TBLOB * curr_blob = word->chopped_word->blobs[blob_index]; + TBLOB* curr_blob = word->chopped_word->blobs[blob_index]; curr_blob->bounding_box().print_to_str(&debug_str); debug_str.add_str_int("\nNo chop for truth at x=", truth_x); } else { @@ -348,7 +337,7 @@ void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) { } debug_str += "\nMaximally chopped word boxes:\n"; for (blob_index = 0; blob_index < num_blobs; ++blob_index) { - TBLOB * curr_blob = word->chopped_word->blobs[blob_index]; + TBLOB* curr_blob = word->chopped_word->blobs[blob_index]; curr_blob->bounding_box().print_to_str(&debug_str); debug_str += '\n'; } @@ -366,16 +355,16 @@ void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) { // Blames the classifier if best_choice is classifier's top choice and is a // dictionary word (i.e. language model could not have helped). // Otherwise, blames the language model (formerly permuter word adjustment). -void BlamerBundle::BlameClassifierOrLangModel( - const WERD_RES* word, - const UNICHARSET& unicharset, bool valid_permuter, bool debug) { +void BlamerBundle::BlameClassifierOrLangModel(const WERD_RES* word, + const UNICHARSET& unicharset, + bool valid_permuter, bool debug) { if (valid_permuter) { // Find out whether best choice is a top choice. best_choice_is_dict_and_top_choice_ = true; for (int i = 0; i < word->best_choice->length(); ++i) { BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i)); ASSERT_HOST(!blob_choice_it.empty()); - BLOB_CHOICE *first_choice = nullptr; + BLOB_CHOICE* first_choice = nullptr; for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list(); blob_choice_it.forward()) { // find first non-fragment choice if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) { @@ -399,7 +388,7 @@ void BlamerBundle::BlameClassifierOrLangModel( debug_str = "Classifier/Old LM tradeoff is to blame"; } SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER - : IRR_CLASS_OLD_LM_TRADEOFF, + : IRR_CLASS_OLD_LM_TRADEOFF, debug_str, word->best_choice, debug); } @@ -417,8 +406,8 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD* word, bool debug) { if (num_blobs == 0) return; // No blobs to play with. int blob_index = 0; int16_t next_box_x = word->blobs[blob_index]->bounding_box().right(); - for (int truth_idx = 0; blob_index < num_blobs && - truth_idx < norm_truth_word_.length(); + for (int truth_idx = 0; + blob_index < num_blobs && truth_idx < norm_truth_word_.length(); ++blob_index) { ++next_box_col; int16_t curr_box_x = next_box_x; @@ -434,18 +423,20 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD* word, bool debug) { (blob_index + 1 >= num_blobs || // next box can't be included next_box_x > truth_x + norm_box_tolerance_)) { correct_segmentation_cols_.push_back(curr_box_col); - correct_segmentation_rows_.push_back(next_box_col-1); + correct_segmentation_rows_.push_back(next_box_col - 1); ++truth_idx; debug_str.add_str_int("col=", curr_box_col); - debug_str.add_str_int(" row=", next_box_col-1); + debug_str.add_str_int(" row=", next_box_col - 1); debug_str += "\n"; curr_box_col = next_box_col; } } if (blob_index < num_blobs || // trailing blobs correct_segmentation_cols_.length() != norm_truth_word_.length()) { - debug_str.add_str_int("Blamer failed to find correct segmentation" - " (tolerance=", norm_box_tolerance_); + debug_str.add_str_int( + "Blamer failed to find correct segmentation" + " (tolerance=", + norm_box_tolerance_); if (blob_index >= num_blobs) debug_str += " blob == nullptr"; debug_str += ")\n"; debug_str.add_str_int(" path length ", correct_segmentation_cols_.length()); @@ -458,11 +449,10 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD* word, bool debug) { } // Returns true if a guided segmentation search is needed. -bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const { +bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE* best_choice) const { return incorrect_result_reason_ == IRR_CORRECT && - !segsearch_is_looking_for_blame_ && - truth_has_char_boxes_ && - !ChoiceIsCorrect(best_choice); + !segsearch_is_looking_for_blame_ && truth_has_char_boxes_ && + !ChoiceIsCorrect(best_choice); } // Setup ready to guide the segmentation search to the correct segmentation. @@ -470,9 +460,9 @@ bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const { // It calls into LMPainPoints::GenerateForBlamer by pre-binding the // WERD_RES, and the LMPainPoints itself. // pp_cb must be a permanent callback, and should be deleted by the caller. -void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, +void BlamerBundle::InitForSegSearch(const WERD_CHOICE* best_choice, MATRIX* ratings, UNICHAR_ID wildcard_id, - bool debug, STRING *debug_str, + bool debug, STRING* debug_str, TessResultCallback2* cb) { segsearch_is_looking_for_blame_ = true; if (debug) { @@ -486,8 +476,7 @@ void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]); *debug_str += "\n"; if (!ratings->Classified(correct_segmentation_cols_[idx], - correct_segmentation_rows_[idx], - wildcard_id) && + correct_segmentation_rows_[idx], wildcard_id) && !cb->Run(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx])) { segsearch_is_looking_for_blame_ = false; @@ -503,8 +492,8 @@ bool BlamerBundle::GuidedSegsearchStillGoing() const { } // The segmentation search has ended. Sets the blame appropriately. -void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, - bool debug, STRING *debug_str) { +void BlamerBundle::FinishSegSearch(const WERD_CHOICE* best_choice, bool debug, + STRING* debug_str) { // If we are still looking for blame (i.e. best_choice is incorrect, but a // path representing the correct segmentation could be constructed), we can // blame segmentation search pain point prioritization if the rating of the @@ -523,13 +512,11 @@ void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, *debug_str += " with permuter "; *debug_str += best_choice->permuter_name(); SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug); - } else if (best_correctly_segmented_rating_ < - best_choice->rating()) { + } else if (best_correctly_segmented_rating_ < best_choice->rating()) { *debug_str += "Correct segmentation state was not explored"; SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug); } else { - if (best_correctly_segmented_rating_ >= - WERD_CHOICE::kBadRating) { + if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) { *debug_str += "Correct segmentation paths were pruned by LM\n"; } else { debug_str->add_str_double("Best correct segmentation rating ", @@ -571,11 +558,11 @@ void BlamerBundle::LastChanceBlame(bool debug, WERD_RES* word) { // Sets the misadaption debug if this word is incorrect, as this word is // being adapted to. -void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice, +void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE* best_choice, bool debug) { if (incorrect_result_reason_ != IRR_NO_TRUTH && !ChoiceIsCorrect(best_choice)) { - misadaption_debug_ ="misadapt to word ("; + misadaption_debug_ = "misadapt to word ("; misadaption_debug_ += best_choice->permuter_name(); misadaption_debug_ += "): "; FillDebugString("", best_choice, &misadaption_debug_); diff --git a/src/ccstruct/blamer.h b/src/ccstruct/blamer.h index f7d13d68fe..77d651e557 100644 --- a/src/ccstruct/blamer.h +++ b/src/ccstruct/blamer.h @@ -86,11 +86,14 @@ enum IncorrectResultReason { // Blamer-related information to determine the source of errors. struct BlamerBundle { - static const char *IncorrectReasonName(IncorrectResultReason irr); - BlamerBundle() : truth_has_char_boxes_(false), - incorrect_result_reason_(IRR_CORRECT), - lattice_data_(nullptr) { ClearResults(); } - BlamerBundle(const BlamerBundle &other) { + static const char* IncorrectReasonName(IncorrectResultReason irr); + BlamerBundle() + : truth_has_char_boxes_(false), + incorrect_result_reason_(IRR_CORRECT), + lattice_data_(nullptr) { + ClearResults(); + } + BlamerBundle(const BlamerBundle& other) { this->CopyTruth(other); this->CopyResults(other); } @@ -99,8 +102,7 @@ struct BlamerBundle { // Accessors. STRING TruthString() const { STRING truth_str; - for (int i = 0; i < truth_text_.length(); ++i) - truth_str += truth_text_[i]; + for (int i = 0; i < truth_text_.length(); ++i) truth_str += truth_text_[i]; return truth_str; } IncorrectResultReason incorrect_result_reason() const { @@ -113,12 +115,8 @@ struct BlamerBundle { bool HasDebugInfo() const { return debug_.length() > 0 || misadaption_debug_.length() > 0; } - const STRING& debug() const { - return debug_; - } - const STRING& misadaption_debug() const { - return misadaption_debug_; - } + const STRING& debug() const { return debug_; } + const STRING& misadaption_debug() const { return misadaption_debug_; } void UpdateBestRating(float rating) { if (rating < best_correctly_segmented_rating_) best_correctly_segmented_rating_ = rating; @@ -130,20 +128,18 @@ struct BlamerBundle { // in the correct segmentation path at the given index. bool MatrixPositionCorrect(int index, const MATRIX_COORD& coord) { return correct_segmentation_cols_[index] == coord.col && - correct_segmentation_rows_[index] == coord.row; + correct_segmentation_rows_[index] == coord.row; } void set_best_choice_is_dict_and_top_choice(bool value) { best_choice_is_dict_and_top_choice_ = value; } - const char* lattice_data() const { - return lattice_data_; - } + const char* lattice_data() const { return lattice_data_; } int lattice_size() const { return lattice_size_; // size of lattice_data in bytes } void set_lattice_data(const char* data, int size) { lattice_size_ = size; - delete [] lattice_data_; + delete[] lattice_data_; lattice_data_ = new char[lattice_size_]; memcpy(lattice_data_, data, lattice_size_); } @@ -157,12 +153,12 @@ struct BlamerBundle { // Functions to setup the blamer. // Whole word string, whole word bounding box. - void SetWordTruth(const UNICHARSET& unicharset, - const char* truth_str, const TBOX& word_box); + void SetWordTruth(const UNICHARSET& unicharset, const char* truth_str, + const TBOX& word_box); // Single "character" string, "character" bounding box. // May be called multiple times to indicate the characters in a word. - void SetSymbolTruth(const UNICHARSET& unicharset, - const char* char_str, const TBOX& char_box); + void SetSymbolTruth(const UNICHARSET& unicharset, const char* char_str, + const TBOX& char_box); // Marks that there is something wrong with the truth text, like it contains // reject characters. void SetRejectedTruth(); @@ -184,14 +180,14 @@ struct BlamerBundle { lattice_data_ = nullptr; lattice_size_ = 0; } - void CopyTruth(const BlamerBundle &other) { + void CopyTruth(const BlamerBundle& other) { truth_has_char_boxes_ = other.truth_has_char_boxes_; truth_word_ = other.truth_word_; truth_text_ = other.truth_text_; incorrect_result_reason_ = (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT); } - void CopyResults(const BlamerBundle &other) { + void CopyResults(const BlamerBundle& other) { norm_truth_word_ = other.norm_truth_word_; norm_box_tolerance_ = other.norm_box_tolerance_; incorrect_result_reason_ = other.incorrect_result_reason_; @@ -209,11 +205,11 @@ struct BlamerBundle { lattice_data_ = nullptr; } } - const char *IncorrectReason() const; + const char* IncorrectReason() const; // Appends choice and truth details to the given debug string. - void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, - STRING *debug); + void FillDebugString(const STRING& msg, const WERD_CHOICE* choice, + STRING* debug); // Sets up the norm_truth_word from truth_word using the given DENORM. void SetupNormTruthWord(const DENORM& denorm); @@ -230,11 +226,8 @@ struct BlamerBundle { // If a blob with the same bounding box as one of the truth character // bounding boxes is not classified as the corresponding truth character // blames character classifier for incorrect answer. - void BlameClassifier(const UNICHARSET& unicharset, - const TBOX& blob_box, - const BLOB_CHOICE_LIST& choices, - bool debug); - + void BlameClassifier(const UNICHARSET& unicharset, const TBOX& blob_box, + const BLOB_CHOICE_LIST& choices, bool debug); // Checks whether chops were made at all the character bounding box // boundaries in word->truth_word. If not - blames the chopper for an @@ -245,28 +238,27 @@ struct BlamerBundle { // Blames the classifier if best_choice is classifier's top choice and is a // dictionary word (i.e. language model could not have helped). // Otherwise, blames the language model (formerly permuter word adjustment). - void BlameClassifierOrLangModel( - const WERD_RES* word, - const UNICHARSET& unicharset, bool valid_permuter, bool debug); + void BlameClassifierOrLangModel(const WERD_RES* word, + const UNICHARSET& unicharset, + bool valid_permuter, bool debug); // Sets up the correct_segmentation_* to mark the correct bounding boxes. void SetupCorrectSegmentation(const TWERD* word, bool debug); // Returns true if a guided segmentation search is needed. - bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const; + bool GuidedSegsearchNeeded(const WERD_CHOICE* best_choice) const; // Setup ready to guide the segmentation search to the correct segmentation. // The callback pp_cb is used to avoid a cyclic dependency. // It calls into LMPainPoints::GenerateForBlamer by pre-binding the // WERD_RES, and the LMPainPoints itself. // pp_cb must be a permanent callback, and should be deleted by the caller. - void InitForSegSearch(const WERD_CHOICE *best_choice, - MATRIX* ratings, UNICHAR_ID wildcard_id, - bool debug, STRING *debug_str, + void InitForSegSearch(const WERD_CHOICE* best_choice, MATRIX* ratings, + UNICHAR_ID wildcard_id, bool debug, STRING* debug_str, TessResultCallback2* pp_cb); // Returns true if the guided segsearch is in progress. bool GuidedSegsearchStillGoing() const; // The segmentation search has ended. Sets the blame appropriately. - void FinishSegSearch(const WERD_CHOICE *best_choice, - bool debug, STRING *debug_str); + void FinishSegSearch(const WERD_CHOICE* best_choice, bool debug, + STRING* debug_str); // If the bundle is null or still does not indicate the correct result, // fix it and use some backup reason for the blame. @@ -274,11 +266,11 @@ struct BlamerBundle { // Sets the misadaption debug if this word is incorrect, as this word is // being adapted to. - void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug); + void SetMisAdaptionDebug(const WERD_CHOICE* best_choice, bool debug); private: - void SetBlame(IncorrectResultReason irr, const STRING &msg, - const WERD_CHOICE *choice, bool debug) { + void SetBlame(IncorrectResultReason irr, const STRING& msg, + const WERD_CHOICE* choice, bool debug) { incorrect_result_reason_ = irr; debug_ = IncorrectReason(); debug_ += " to blame: "; @@ -320,11 +312,10 @@ struct BlamerBundle { // classifier's top choice. bool best_choice_is_dict_and_top_choice_; // Serialized segmentation search lattice. - char *lattice_data_; + char* lattice_data_; int lattice_size_; // size of lattice_data in bytes // Information about hypotheses (paths) explored by the segmentation search. tesseract::ParamsTrainingBundle params_training_bundle_; }; - #endif // TESSERACT_CCSTRUCT_BLAMER_H_ diff --git a/src/ccstruct/blckerr.h b/src/ccstruct/blckerr.h index e306163983..00a79c6b71 100644 --- a/src/ccstruct/blckerr.h +++ b/src/ccstruct/blckerr.h @@ -17,10 +17,10 @@ * **********************************************************************/ -#ifndef BLCKERR_H -#define BLCKERR_H +#ifndef BLCKERR_H +#define BLCKERR_H -#include "errcode.h" +#include "errcode.h" const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds"; const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line"; diff --git a/src/ccstruct/blobbox.cpp b/src/ccstruct/blobbox.cpp index 9d89d74441..1174e7c958 100644 --- a/src/ccstruct/blobbox.cpp +++ b/src/ccstruct/blobbox.cpp @@ -22,15 +22,15 @@ #include "config_auto.h" #endif -#include "blobbox.h" #include "allheaders.h" +#include "blobbox.h" #include "blobs.h" #include "helpers.h" #include "normalis.h" #include -#define PROJECTION_MARGIN 10 //arbitrary +#define PROJECTION_MARGIN 10 // arbitrary #define EXTERN ELISTIZE(BLOBNBOX) @@ -86,15 +86,14 @@ void BLOBNBOX::rotate_box(FCOORD rotation) { * * Merge this blob with the given blob, which should be after this. **********************************************************************/ -void BLOBNBOX::merge( //merge blobs - BLOBNBOX *nextblob //blob to join with - ) { - box += nextblob->box; //merge boxes +void BLOBNBOX::merge( // merge blobs + BLOBNBOX* nextblob // blob to join with +) { + box += nextblob->box; // merge boxes set_diacritic_box(box); nextblob->joined = TRUE; } - // Merge this with other, taking the outlines from other. // Other is not deleted, but left for the caller to handle. void BLOBNBOX::really_merge(BLOBNBOX* other) { @@ -105,7 +104,6 @@ void BLOBNBOX::really_merge(BLOBNBOX* other) { compute_bounding_box(); } - /********************************************************************** * BLOBNBOX::chop * @@ -114,59 +112,56 @@ void BLOBNBOX::really_merge(BLOBNBOX* other) { * with the relevant bounding boxes. **********************************************************************/ -void BLOBNBOX::chop( //chop blobs - BLOBNBOX_IT *start_it, //location of this - BLOBNBOX_IT *end_it, //iterator - FCOORD rotation, //for landscape - float xheight //of line - ) { - int16_t blobcount; //no of blobs - BLOBNBOX *newblob; //fake blob - BLOBNBOX *blob; //current blob - int16_t blobindex; //number of chop - int16_t leftx; //left edge of blob - float blobwidth; //width of each - float rightx; //right edge to scan - float ymin, ymax; //limits of new blob - float test_ymin, test_ymax; //limits of part blob - ICOORD bl, tr; //corners of box - BLOBNBOX_IT blob_it; //blob iterator - - //get no of chops - blobcount = (int16_t) floor (box.width () / xheight); +void BLOBNBOX::chop( // chop blobs + BLOBNBOX_IT* start_it, // location of this + BLOBNBOX_IT* end_it, // iterator + FCOORD rotation, // for landscape + float xheight // of line +) { + int16_t blobcount; // no of blobs + BLOBNBOX* newblob; // fake blob + BLOBNBOX* blob; // current blob + int16_t blobindex; // number of chop + int16_t leftx; // left edge of blob + float blobwidth; // width of each + float rightx; // right edge to scan + float ymin, ymax; // limits of new blob + float test_ymin, test_ymax; // limits of part blob + ICOORD bl, tr; // corners of box + BLOBNBOX_IT blob_it; // blob iterator + + // get no of chops + blobcount = (int16_t)floor(box.width() / xheight); if (blobcount > 1 && cblob_ptr != nullptr) { - //width of each - blobwidth = (float) (box.width () + 1) / blobcount; - for (blobindex = blobcount - 1, rightx = box.right (); - blobindex >= 0; blobindex--, rightx -= blobwidth) { - ymin = (float) INT32_MAX; - ymax = (float) -INT32_MAX; + // width of each + blobwidth = (float)(box.width() + 1) / blobcount; + for (blobindex = blobcount - 1, rightx = box.right(); blobindex >= 0; + blobindex--, rightx -= blobwidth) { + ymin = (float)INT32_MAX; + ymax = (float)-INT32_MAX; blob_it = *start_it; do { - blob = blob_it.data (); - find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, - rightx, - /*rotation, */ test_ymin, test_ymax); - blob_it.forward (); + blob = blob_it.data(); + find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, rightx, + /*rotation, */ test_ymin, test_ymax); + blob_it.forward(); UpdateRange(test_ymin, test_ymax, &ymin, &ymax); - } - while (blob != end_it->data ()); + } while (blob != end_it->data()); if (ymin < ymax) { - leftx = (int16_t) floor (rightx - blobwidth); - if (leftx < box.left ()) - leftx = box.left (); //clip to real box - bl = ICOORD (leftx, (int16_t) floor (ymin)); - tr = ICOORD ((int16_t) ceil (rightx), (int16_t) ceil (ymax)); + leftx = (int16_t)floor(rightx - blobwidth); + if (leftx < box.left()) leftx = box.left(); // clip to real box + bl = ICOORD(leftx, (int16_t)floor(ymin)); + tr = ICOORD((int16_t)ceil(rightx), (int16_t)ceil(ymax)); if (blobindex == 0) - box = TBOX (bl, tr); //change box + box = TBOX(bl, tr); // change box else { newblob = new BLOBNBOX; - //box is all it has - newblob->box = TBOX (bl, tr); - //stay on current + // box is all it has + newblob->box = TBOX(bl, tr); + // stay on current newblob->base_char_top_ = tr.y(); newblob->base_char_bottom_ = bl.y(); - end_it->add_after_stay_put (newblob); + end_it->add_after_stay_put(newblob); } } } @@ -194,8 +189,8 @@ void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { // the min is less, the max is replaced with the min. // The objective is to catch cases where there is only a single neighbour // and avoid reporting the other gap as a ridiculously large number -void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max, - int* v_min, int* v_max) const { +void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max, int* v_min, + int* v_max) const { int max_dimension = std::max(box.width(), box.height()); int gaps[BND_COUNT]; NeighbourGaps(gaps); @@ -224,8 +219,7 @@ int BLOBNBOX::GoodTextBlob() const { int score = 0; for (int dir = 0; dir < BND_COUNT; ++dir) { BlobNeighbourDir bnd = static_cast(dir); - if (good_stroke_neighbour(bnd)) - ++score; + if (good_stroke_neighbour(bnd)) ++score; } return score; } @@ -236,8 +230,7 @@ int BLOBNBOX::NoisyNeighbours() const { for (int dir = 0; dir < BND_COUNT; ++dir) { BlobNeighbourDir bnd = static_cast(dir); BLOBNBOX* blob = neighbour(bnd); - if (blob != nullptr && blob->region_type() == BRT_NOISE) - ++count; + if (blob != nullptr && blob->region_type() == BRT_NOISE) ++count; } return count; } @@ -306,12 +299,11 @@ bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other, // no information in the blob. double p_width = area_stroke_width(); double n_p_width = other.area_stroke_width(); - float h_tolerance = horz_stroke_width_ * fractional_tolerance - + constant_tolerance; - float v_tolerance = vert_stroke_width_ * fractional_tolerance - + constant_tolerance; - double p_tolerance = p_width * fractional_tolerance - + constant_tolerance; + float h_tolerance = + horz_stroke_width_ * fractional_tolerance + constant_tolerance; + float v_tolerance = + vert_stroke_width_ * fractional_tolerance + constant_tolerance; + double p_tolerance = p_width * fractional_tolerance + constant_tolerance; bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f; bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f; bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, @@ -333,8 +325,7 @@ TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) { float bottom = box.bottom(); if (cblob_ptr != nullptr) { find_cblob_limits(cblob_ptr, static_cast(left), - static_cast(right), no_rotation, - bottom, top); + static_cast(right), no_rotation, bottom, top); } if (top < bottom) { @@ -409,14 +400,11 @@ void BLOBNBOX::ComputeEdgeOffsets(Pix* thresholds, Pix* grey, } } - #ifndef GRAPHICS_DISABLED // Helper to draw all the blobs on the list in the given body_colour, // with child outlines in the child_colour. -void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list, - ScrollView::Color body_colour, - ScrollView::Color child_colour, - ScrollView* win) { +void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list, ScrollView::Color body_colour, + ScrollView::Color child_colour, ScrollView* win) { BLOBNBOX_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->plot(win, body_colour, child_colour); @@ -428,13 +416,11 @@ void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list, // child_colour. void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list, ScrollView::Color body_colour, - ScrollView::Color child_colour, - ScrollView* win) { + ScrollView::Color child_colour, ScrollView* win) { BLOBNBOX_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); - if (blob->DeletableNoise()) - blob->plot(win, body_colour, child_colour); + if (blob->DeletableNoise()) blob->plot(win, body_colour, child_colour); } } @@ -454,20 +440,14 @@ ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, case BRT_VERT_TEXT: if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) return ScrollView::GREEN; - if (flow_type == BTFT_CHAIN) - return ScrollView::LIME_GREEN; + if (flow_type == BTFT_CHAIN) return ScrollView::LIME_GREEN; return ScrollView::YELLOW; case BRT_TEXT: - if (flow_type == BTFT_STRONG_CHAIN) - return ScrollView::BLUE; - if (flow_type == BTFT_TEXT_ON_IMAGE) - return ScrollView::LIGHT_BLUE; - if (flow_type == BTFT_CHAIN) - return ScrollView::MEDIUM_BLUE; - if (flow_type == BTFT_LEADER) - return ScrollView::WHEAT; - if (flow_type == BTFT_NONTEXT) - return ScrollView::PINK; + if (flow_type == BTFT_STRONG_CHAIN) return ScrollView::BLUE; + if (flow_type == BTFT_TEXT_ON_IMAGE) return ScrollView::LIGHT_BLUE; + if (flow_type == BTFT_CHAIN) return ScrollView::MEDIUM_BLUE; + if (flow_type == BTFT_LEADER) return ScrollView::WHEAT; + if (flow_type == BTFT_NONTEXT) return ScrollView::PINK; return ScrollView::MAGENTA; default: return ScrollView::GREY; @@ -482,8 +462,7 @@ ScrollView::Color BLOBNBOX::BoxColor() const { void BLOBNBOX::plot(ScrollView* window, // window to draw in ScrollView::Color blob_colour, // for outer bits ScrollView::Color child_colour) { // for holes - if (cblob_ptr != nullptr) - cblob_ptr->plot(window, blob_colour, child_colour); + if (cblob_ptr != nullptr) cblob_ptr->plot(window, blob_colour, child_colour); } #endif /********************************************************************** @@ -493,39 +472,38 @@ void BLOBNBOX::plot(ScrollView* window, // window to draw in * between the given x limits. **********************************************************************/ -void find_cblob_limits( //get y limits - C_BLOB *blob, //blob to search - float leftx, //x limits - float rightx, - FCOORD rotation, //for landscape - float &ymin, //output y limits - float &ymax) { - int16_t stepindex; //current point - ICOORD pos; //current coords - ICOORD vec; //rotated step - C_OUTLINE *outline; //current outline - //outlines - C_OUTLINE_IT out_it = blob->out_list (); - - ymin = (float) INT32_MAX; - ymax = (float) -INT32_MAX; - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - outline = out_it.data (); - pos = outline->start_pos (); //get coords - pos.rotate (rotation); - for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { - //inside - if (pos.x () >= leftx && pos.x () <= rightx) { +void find_cblob_limits( // get y limits + C_BLOB* blob, // blob to search + float leftx, // x limits + float rightx, + FCOORD rotation, // for landscape + float& ymin, // output y limits + float& ymax) { + int16_t stepindex; // current point + ICOORD pos; // current coords + ICOORD vec; // rotated step + C_OUTLINE* outline; // current outline + // outlines + C_OUTLINE_IT out_it = blob->out_list(); + + ymin = (float)INT32_MAX; + ymax = (float)-INT32_MAX; + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.data(); + pos = outline->start_pos(); // get coords + pos.rotate(rotation); + for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { + // inside + if (pos.x() >= leftx && pos.x() <= rightx) { UpdateRange(pos.y(), &ymin, &ymax); } - vec = outline->step (stepindex); - vec.rotate (rotation); - pos += vec; //move to next + vec = outline->step(stepindex); + vec.rotate(rotation); + pos += vec; // move to next } } } - /********************************************************************** * find_cblob_vlimits * @@ -533,36 +511,35 @@ void find_cblob_limits( //get y limits * between the given x limits. **********************************************************************/ -void find_cblob_vlimits( //get y limits - C_BLOB *blob, //blob to search - float leftx, //x limits - float rightx, - float &ymin, //output y limits - float &ymax) { - int16_t stepindex; //current point - ICOORD pos; //current coords - ICOORD vec; //rotated step - C_OUTLINE *outline; //current outline - //outlines - C_OUTLINE_IT out_it = blob->out_list (); - - ymin = (float) INT32_MAX; - ymax = (float) -INT32_MAX; - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - outline = out_it.data (); - pos = outline->start_pos (); //get coords - for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { - //inside - if (pos.x () >= leftx && pos.x () <= rightx) { +void find_cblob_vlimits( // get y limits + C_BLOB* blob, // blob to search + float leftx, // x limits + float rightx, + float& ymin, // output y limits + float& ymax) { + int16_t stepindex; // current point + ICOORD pos; // current coords + ICOORD vec; // rotated step + C_OUTLINE* outline; // current outline + // outlines + C_OUTLINE_IT out_it = blob->out_list(); + + ymin = (float)INT32_MAX; + ymax = (float)-INT32_MAX; + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.data(); + pos = outline->start_pos(); // get coords + for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { + // inside + if (pos.x() >= leftx && pos.x() <= rightx) { UpdateRange(pos.y(), &ymin, &ymax); } - vec = outline->step (stepindex); - pos += vec; //move to next + vec = outline->step(stepindex); + pos += vec; // move to next } } } - /********************************************************************** * find_cblob_hlimits * @@ -570,31 +547,31 @@ void find_cblob_vlimits( //get y limits * between the given y limits. **********************************************************************/ -void find_cblob_hlimits( //get x limits - C_BLOB *blob, //blob to search - float bottomy, //y limits - float topy, - float &xmin, //output x limits - float &xmax) { - int16_t stepindex; //current point - ICOORD pos; //current coords - ICOORD vec; //rotated step - C_OUTLINE *outline; //current outline - //outlines - C_OUTLINE_IT out_it = blob->out_list (); - - xmin = (float) INT32_MAX; - xmax = (float) -INT32_MAX; - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - outline = out_it.data (); - pos = outline->start_pos (); //get coords - for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { - //inside - if (pos.y () >= bottomy && pos.y () <= topy) { +void find_cblob_hlimits( // get x limits + C_BLOB* blob, // blob to search + float bottomy, // y limits + float topy, + float& xmin, // output x limits + float& xmax) { + int16_t stepindex; // current point + ICOORD pos; // current coords + ICOORD vec; // rotated step + C_OUTLINE* outline; // current outline + // outlines + C_OUTLINE_IT out_it = blob->out_list(); + + xmin = (float)INT32_MAX; + xmax = (float)-INT32_MAX; + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.data(); + pos = outline->start_pos(); // get coords + for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { + // inside + if (pos.y() >= bottomy && pos.y() <= topy) { UpdateRange(pos.x(), &xmin, &xmax); } - vec = outline->step (stepindex); - pos += vec; //move to next + vec = outline->step(stepindex); + pos += vec; // move to next } } } @@ -605,23 +582,22 @@ void find_cblob_hlimits( //get x limits * Rotate the copy by the given vector and return a C_BLOB. **********************************************************************/ -C_BLOB *crotate_cblob( //rotate it - C_BLOB *blob, //blob to search - FCOORD rotation //for landscape - ) { - C_OUTLINE_LIST out_list; //output outlines - //input outlines - C_OUTLINE_IT in_it = blob->out_list (); - //output outlines +C_BLOB* crotate_cblob( // rotate it + C_BLOB* blob, // blob to search + FCOORD rotation // for landscape +) { + C_OUTLINE_LIST out_list; // output outlines + // input outlines + C_OUTLINE_IT in_it = blob->out_list(); + // output outlines C_OUTLINE_IT out_it = &out_list; - for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) { - out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation)); + for (in_it.mark_cycle_pt(); !in_it.cycled_list(); in_it.forward()) { + out_it.add_after_then_move(new C_OUTLINE(in_it.data(), rotation)); } - return new C_BLOB (&out_list); + return new C_BLOB(&out_list); } - /********************************************************************** * box_next * @@ -630,27 +606,26 @@ C_BLOB *crotate_cblob( //rotate it * Then move the iterator on to the start of the next blob. **********************************************************************/ -TBOX box_next( //get bounding box - BLOBNBOX_IT *it //iterator to blobds - ) { - BLOBNBOX *blob; //current blob - TBOX result; //total box +TBOX box_next( // get bounding box + BLOBNBOX_IT* it // iterator to blobds +) { + BLOBNBOX* blob; // current blob + TBOX result; // total box - blob = it->data (); - result = blob->bounding_box (); + blob = it->data(); + result = blob->bounding_box(); do { - it->forward (); - blob = it->data (); + it->forward(); + blob = it->data(); if (blob->cblob() == nullptr) - //was pre-chopped - result += blob->bounding_box (); + // was pre-chopped + result += blob->bounding_box(); } - //until next real blob + // until next real blob while ((blob->cblob() == nullptr) || blob->joined_to_prev()); return result; } - /********************************************************************** * box_next_pre_chopped * @@ -659,51 +634,50 @@ TBOX box_next( //get bounding box * Then move the iterator on to the start of the next pre-chopped blob. **********************************************************************/ -TBOX box_next_pre_chopped( //get bounding box - BLOBNBOX_IT *it //iterator to blobds - ) { - BLOBNBOX *blob; //current blob - TBOX result; //total box +TBOX box_next_pre_chopped( // get bounding box + BLOBNBOX_IT* it // iterator to blobds +) { + BLOBNBOX* blob; // current blob + TBOX result; // total box - blob = it->data (); - result = blob->bounding_box (); + blob = it->data(); + result = blob->bounding_box(); do { - it->forward (); - blob = it->data (); + it->forward(); + blob = it->data(); } - //until next real blob - while (blob->joined_to_prev ()); + // until next real blob + while (blob->joined_to_prev()); return result; } - /********************************************************************** * TO_ROW::TO_ROW * * Constructor to make a row from a blob. **********************************************************************/ -TO_ROW::TO_ROW ( //constructor -BLOBNBOX * blob, //first blob -float top, //corrected top -float bottom, //of row -float row_size //ideal +TO_ROW::TO_ROW( // constructor + BLOBNBOX* blob, // first blob + float top, // corrected top + float bottom, // of row + float row_size // ideal ) { clear(); y_min = bottom; y_max = top; initial_y_min = bottom; - float diff; //in size - BLOBNBOX_IT it = &blobs; //list of blobs + float diff; // in size + BLOBNBOX_IT it = &blobs; // list of blobs - it.add_to_end (blob); + it.add_to_end(blob); diff = top - bottom - row_size; if (diff > 0) { y_max -= diff / 2; y_min += diff / 2; } - //very small object + // very small object else if ((top - bottom) * 3 < row_size) { diff = row_size / 3 + bottom - top; y_max += diff / 2; @@ -712,13 +686,14 @@ float row_size //ideal } void TO_ROW::print() const { - tprintf("pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g," - " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g," - " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n", - pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp, - spacing, xheight, y_origin, xheight_evidence, ascrise, descdrop, - body_size, min_space, max_nonspace, space_threshold, kern_size, - space_size); + tprintf( + "pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g," + " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g," + " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n", + pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp, + spacing, xheight, y_origin, xheight_evidence, ascrise, descdrop, + body_size, min_space, max_nonspace, space_threshold, kern_size, + space_size); } /********************************************************************** @@ -727,93 +702,85 @@ void TO_ROW::print() const { * Add the blob to the end of the row. **********************************************************************/ -void TO_ROW::add_blob( //constructor - BLOBNBOX *blob, //first blob - float top, //corrected top - float bottom, //of row - float row_size //ideal - ) { - float allowed; //allowed expansion - float available; //expansion - BLOBNBOX_IT it = &blobs; //list of blobs - - it.add_to_end (blob); +void TO_ROW::add_blob( // constructor + BLOBNBOX* blob, // first blob + float top, // corrected top + float bottom, // of row + float row_size // ideal +) { + float allowed; // allowed expansion + float available; // expansion + BLOBNBOX_IT it = &blobs; // list of blobs + + it.add_to_end(blob); allowed = row_size + y_min - y_max; if (allowed > 0) { available = top > y_max ? top - y_max : 0; if (bottom < y_min) - //total available - available += y_min - bottom; + // total available + available += y_min - bottom; if (available > 0) { - available += available; //do it gradually - if (available < allowed) - available = allowed; - if (bottom < y_min) - y_min -= (y_min - bottom) * allowed / available; - if (top > y_max) - y_max += (top - y_max) * allowed / available; + available += available; // do it gradually + if (available < allowed) available = allowed; + if (bottom < y_min) y_min -= (y_min - bottom) * allowed / available; + if (top > y_max) y_max += (top - y_max) * allowed / available; } } } - /********************************************************************** * TO_ROW:insert_blob * * Add the blob to the row in the correct position. **********************************************************************/ -void TO_ROW::insert_blob( //constructor - BLOBNBOX *blob //first blob - ) { - BLOBNBOX_IT it = &blobs; //list of blobs +void TO_ROW::insert_blob( // constructor + BLOBNBOX* blob // first blob +) { + BLOBNBOX_IT it = &blobs; // list of blobs - if (it.empty ()) - it.add_before_then_move (blob); + if (it.empty()) + it.add_before_then_move(blob); else { - it.mark_cycle_pt (); - while (!it.cycled_list () - && it.data ()->bounding_box ().left () <= - blob->bounding_box ().left ()) - it.forward (); - if (it.cycled_list ()) - it.add_to_end (blob); + it.mark_cycle_pt(); + while (!it.cycled_list() && + it.data()->bounding_box().left() <= blob->bounding_box().left()) + it.forward(); + if (it.cycled_list()) + it.add_to_end(blob); else - it.add_before_stay_put (blob); + it.add_before_stay_put(blob); } } - /********************************************************************** * TO_ROW::compute_vertical_projection * * Compute the vertical projection of a TO_ROW from its blobs. **********************************************************************/ -void TO_ROW::compute_vertical_projection() { //project whole row - TBOX row_box; //bound of row - BLOBNBOX *blob; //current blob - TBOX blob_box; //bounding box - BLOBNBOX_IT blob_it = blob_list (); - - if (blob_it.empty ()) - return; - row_box = blob_it.data ()->bounding_box (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) - row_box += blob_it.data ()->bounding_box (); - - projection.set_range (row_box.left () - PROJECTION_MARGIN, - row_box.right () + PROJECTION_MARGIN); - projection_left = row_box.left () - PROJECTION_MARGIN; - projection_right = row_box.right () + PROJECTION_MARGIN; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { +void TO_ROW::compute_vertical_projection() { // project whole row + TBOX row_box; // bound of row + BLOBNBOX* blob; // current blob + TBOX blob_box; // bounding box + BLOBNBOX_IT blob_it = blob_list(); + + if (blob_it.empty()) return; + row_box = blob_it.data()->bounding_box(); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) + row_box += blob_it.data()->bounding_box(); + + projection.set_range(row_box.left() - PROJECTION_MARGIN, + row_box.right() + PROJECTION_MARGIN); + projection_left = row_box.left() - PROJECTION_MARGIN; + projection_right = row_box.right() + PROJECTION_MARGIN; + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { blob = blob_it.data(); if (blob->cblob() != nullptr) vertical_cblob_projection(blob->cblob(), &projection); } } - /********************************************************************** * TO_ROW::clear * @@ -854,7 +821,6 @@ void TO_ROW::clear() { num_repeated_sets_ = -1; } - /********************************************************************** * vertical_cblob_projection * @@ -862,19 +828,18 @@ void TO_ROW::clear() { * and add to the given STATS. **********************************************************************/ -void vertical_cblob_projection( //project outlines - C_BLOB *blob, //blob to project - STATS *stats //output - ) { - //outlines of blob - C_OUTLINE_IT out_it = blob->out_list (); +void vertical_cblob_projection( // project outlines + C_BLOB* blob, // blob to project + STATS* stats // output +) { + // outlines of blob + C_OUTLINE_IT out_it = blob->out_list(); - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - vertical_coutline_projection (out_it.data (), stats); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + vertical_coutline_projection(out_it.data(), stats); } } - /********************************************************************** * vertical_coutline_projection * @@ -882,43 +847,42 @@ void vertical_cblob_projection( //project outlines * and add to the given STATS. **********************************************************************/ -void vertical_coutline_projection( //project outlines - C_OUTLINE *outline, //outline to project - STATS *stats //output - ) { - ICOORD pos; //current point - ICOORD step; //edge step - int32_t length; //of outline - int16_t stepindex; //current step - C_OUTLINE_IT out_it = outline->child (); - - pos = outline->start_pos (); - length = outline->pathlength (); +void vertical_coutline_projection( // project outlines + C_OUTLINE* outline, // outline to project + STATS* stats // output +) { + ICOORD pos; // current point + ICOORD step; // edge step + int32_t length; // of outline + int16_t stepindex; // current step + C_OUTLINE_IT out_it = outline->child(); + + pos = outline->start_pos(); + length = outline->pathlength(); for (stepindex = 0; stepindex < length; stepindex++) { - step = outline->step (stepindex); - if (step.x () > 0) { - stats->add (pos.x (), -pos.y ()); - } else if (step.x () < 0) { - stats->add (pos.x () - 1, pos.y ()); + step = outline->step(stepindex); + if (step.x() > 0) { + stats->add(pos.x(), -pos.y()); + } else if (step.x() < 0) { + stats->add(pos.x() - 1, pos.y()); } pos += step; } - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - vertical_coutline_projection (out_it.data (), stats); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + vertical_coutline_projection(out_it.data(), stats); } } - /********************************************************************** * TO_BLOCK::TO_BLOCK * * Constructor to make a TO_BLOCK from a real block. **********************************************************************/ -TO_BLOCK::TO_BLOCK( //make a block - BLOCK *src_block //real block - ) { +TO_BLOCK::TO_BLOCK( // make a block + BLOCK* src_block // real block +) { clear(); block = src_block; } @@ -958,7 +922,6 @@ void TO_BLOCK::clear() { key_row = nullptr; } - TO_BLOCK::~TO_BLOCK() { // Any residual BLOBNBOXes at this stage own their blobs, so delete them. clear_blobnboxes(&blobs); @@ -974,8 +937,7 @@ TO_BLOCK::~TO_BLOCK() { // medium and large are by height. // SIDE-EFFECT: reset all blobs to initial state by calling Init(). static void SizeFilterBlobs(int min_height, int max_height, - BLOBNBOX_LIST* src_list, - BLOBNBOX_LIST* noise_list, + BLOBNBOX_LIST* src_list, BLOBNBOX_LIST* noise_list, BLOBNBOX_LIST* small_list, BLOBNBOX_LIST* medium_list, BLOBNBOX_LIST* large_list) { @@ -988,8 +950,7 @@ static void SizeFilterBlobs(int min_height, int max_height, blob->ReInit(); int width = blob->bounding_box().width(); int height = blob->bounding_box().height(); - if (height < min_height && - (width < min_height || width > max_height)) + if (height < min_height && (width < min_height || width > max_height)) noise_it.add_after_then_move(blob); else if (height > max_height) large_it.add_after_then_move(blob); @@ -1012,14 +973,14 @@ void TO_BLOCK::ReSetAndReFilterBlobs() { BLOBNBOX_LIST small_list; BLOBNBOX_LIST medium_list; BLOBNBOX_LIST large_list; - SizeFilterBlobs(min_height, max_height, &blobs, - &noise_list, &small_list, &medium_list, &large_list); - SizeFilterBlobs(min_height, max_height, &large_blobs, - &noise_list, &small_list, &medium_list, &large_list); - SizeFilterBlobs(min_height, max_height, &small_blobs, - &noise_list, &small_list, &medium_list, &large_list); - SizeFilterBlobs(min_height, max_height, &noise_blobs, - &noise_list, &small_list, &medium_list, &large_list); + SizeFilterBlobs(min_height, max_height, &blobs, &noise_list, &small_list, + &medium_list, &large_list); + SizeFilterBlobs(min_height, max_height, &large_blobs, &noise_list, + &small_list, &medium_list, &large_list); + SizeFilterBlobs(min_height, max_height, &small_blobs, &noise_list, + &small_list, &medium_list, &large_list); + SizeFilterBlobs(min_height, max_height, &noise_blobs, &noise_list, + &small_list, &medium_list, &large_list); BLOBNBOX_IT blob_it(&blobs); blob_it.add_list_after(&medium_list); blob_it.set_to_list(&large_blobs); @@ -1081,7 +1042,7 @@ void TO_BLOCK::plot_graded_blobs(ScrollView* win) { **********************************************************************/ void plot_blob_list(ScrollView* win, // window to draw in - BLOBNBOX_LIST *list, // blob list + BLOBNBOX_LIST* list, // blob list ScrollView::Color body_colour, // colour to draw ScrollView::Color child_colour) { // colour of child BLOBNBOX_IT it = list; diff --git a/src/ccstruct/blobbox.h b/src/ccstruct/blobbox.h index 76401889fa..3323c91a65 100644 --- a/src/ccstruct/blobbox.h +++ b/src/ccstruct/blobbox.h @@ -17,20 +17,19 @@ * **********************************************************************/ -#ifndef BLOBBOX_H -#define BLOBBOX_H - -#include "clst.h" -#include "elst2.h" -#include "werd.h" -#include "ocrblock.h" -#include "statistc.h" - -enum PITCH_TYPE -{ - PITCH_DUNNO, // insufficient data - PITCH_DEF_FIXED, // definitely fixed - PITCH_MAYBE_FIXED, // could be +#ifndef BLOBBOX_H +#define BLOBBOX_H + +#include "clst.h" +#include "elst2.h" +#include "ocrblock.h" +#include "statistc.h" +#include "werd.h" + +enum PITCH_TYPE { + PITCH_DUNNO, // insufficient data + PITCH_DEF_FIXED, // definitely fixed + PITCH_MAYBE_FIXED, // could be PITCH_DEF_PROP, PITCH_MAYBE_PROP, PITCH_CORR_FIXED, @@ -64,27 +63,21 @@ enum BlobRegionType { BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented. BRT_TEXT, // Convincing text. - BRT_COUNT // Number of possibilities. + BRT_COUNT // Number of possibilities. }; // enum for elements of arrays that refer to neighbours. // NOTE: keep in this order, so ^2 can be used to flip direction. -enum BlobNeighbourDir { - BND_LEFT, - BND_BELOW, - BND_RIGHT, - BND_ABOVE, - BND_COUNT -}; +enum BlobNeighbourDir { BND_LEFT, BND_BELOW, BND_RIGHT, BND_ABOVE, BND_COUNT }; // enum for special type of text characters, such as math symbol or italic. enum BlobSpecialTextType { - BSTT_NONE, // No special. - BSTT_ITALIC, // Italic style. - BSTT_DIGIT, // Digit symbols. - BSTT_MATH, // Mathmatical symobls (not including digit). + BSTT_NONE, // No special. + BSTT_ITALIC, // Italic style. + BSTT_DIGIT, // Digit symbols. + BSTT_MATH, // Mathmatical symobls (not including digit). BSTT_UNCLEAR, // Characters with low recognition rate. - BSTT_SKIP, // Characters that we skip labeling (usually too small). + BSTT_SKIP, // Characters that we skip labeling (usually too small). BSTT_COUNT }; @@ -124,330 +117,223 @@ class ColPartition; } class BLOBNBOX; -ELISTIZEH (BLOBNBOX) -class BLOBNBOX:public ELIST_LINK -{ - public: - BLOBNBOX() { - ConstructionInit(); - } - explicit BLOBNBOX(C_BLOB *srcblob) { - box = srcblob->bounding_box(); - ConstructionInit(); - cblob_ptr = srcblob; - area = static_cast(srcblob->area()); - } - ~BLOBNBOX() { - if (owns_cblob_) delete cblob_ptr; - } - static BLOBNBOX* RealBlob(C_OUTLINE* outline) { - C_BLOB* blob = new C_BLOB(outline); - return new BLOBNBOX(blob); - } +ELISTIZEH(BLOBNBOX) +class BLOBNBOX : public ELIST_LINK { + public: + BLOBNBOX() { ConstructionInit(); } + explicit BLOBNBOX(C_BLOB* srcblob) { + box = srcblob->bounding_box(); + ConstructionInit(); + cblob_ptr = srcblob; + area = static_cast(srcblob->area()); + } + ~BLOBNBOX() { + if (owns_cblob_) delete cblob_ptr; + } + static BLOBNBOX* RealBlob(C_OUTLINE* outline) { + C_BLOB* blob = new C_BLOB(outline); + return new BLOBNBOX(blob); + } - // Rotates the box and the underlying blob. - void rotate(FCOORD rotation); - - // Methods that act on the box without touching the underlying blob. - // Reflect the box in the y-axis, leaving the underlying blob untouched. - void reflect_box_in_y_axis(); - // Rotates the box by the angle given by rotation. - // If the blob is a diacritic, then only small rotations for skew - // correction can be applied. - void rotate_box(FCOORD rotation); - // Moves just the box by the given vector. - void translate_box(ICOORD v) { - if (IsDiacritic()) { - box.move(v); - base_char_top_ += v.y(); - base_char_bottom_ += v.y(); - } else { - box.move(v); - set_diacritic_box(box); - } + // Rotates the box and the underlying blob. + void rotate(FCOORD rotation); + + // Methods that act on the box without touching the underlying blob. + // Reflect the box in the y-axis, leaving the underlying blob untouched. + void reflect_box_in_y_axis(); + // Rotates the box by the angle given by rotation. + // If the blob is a diacritic, then only small rotations for skew + // correction can be applied. + void rotate_box(FCOORD rotation); + // Moves just the box by the given vector. + void translate_box(ICOORD v) { + if (IsDiacritic()) { + box.move(v); + base_char_top_ += v.y(); + base_char_bottom_ += v.y(); + } else { + box.move(v); + set_diacritic_box(box); } - void merge(BLOBNBOX *nextblob); - void really_merge(BLOBNBOX* other); - void chop( // fake chop blob - BLOBNBOX_IT *start_it, // location of this - BLOBNBOX_IT *blob_it, // iterator - FCOORD rotation, // for landscape - float xheight); // line height - - void NeighbourGaps(int gaps[BND_COUNT]) const; - void MinMaxGapsClipped(int* h_min, int* h_max, - int* v_min, int* v_max) const; - void CleanNeighbours(); - // Returns positive if there is at least one side neighbour that has a - // similar stroke width and is not on the other side of a rule line. - int GoodTextBlob() const; - // Returns the number of side neighbours that are of type BRT_NOISE. - int NoisyNeighbours() const; - - // Returns true if the blob is noise and has no owner. - bool DeletableNoise() const { - return owner() == nullptr && region_type() == BRT_NOISE; - } - - // Returns true, and sets vert_possible/horz_possible if the blob has some - // feature that makes it individually appear to flow one way. - // eg if it has a high aspect ratio, yet has a complex shape, such as a - // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1. - bool DefiniteIndividualFlow(); - - // Returns true if there is no tabstop violation in merging this and other. - bool ConfirmNoTabViolation(const BLOBNBOX& other) const; - - // Returns true if other has a similar stroke width to this. - bool MatchingStrokeWidth(const BLOBNBOX& other, - double fractional_tolerance, - double constant_tolerance) const; - - // Returns a bounding box of the outline contained within the - // given horizontal range. - TBOX BoundsWithinLimits(int left, int right); - - // Estimates and stores the baseline position based on the shape of the - // outline. - void EstimateBaselinePosition(); + } + void merge(BLOBNBOX* nextblob); + void really_merge(BLOBNBOX* other); + void chop( // fake chop blob + BLOBNBOX_IT* start_it, // location of this + BLOBNBOX_IT* blob_it, // iterator + FCOORD rotation, // for landscape + float xheight); // line height + + void NeighbourGaps(int gaps[BND_COUNT]) const; + void MinMaxGapsClipped(int* h_min, int* h_max, int* v_min, int* v_max) const; + void CleanNeighbours(); + // Returns positive if there is at least one side neighbour that has a + // similar stroke width and is not on the other side of a rule line. + int GoodTextBlob() const; + // Returns the number of side neighbours that are of type BRT_NOISE. + int NoisyNeighbours() const; + + // Returns true if the blob is noise and has no owner. + bool DeletableNoise() const { + return owner() == nullptr && region_type() == BRT_NOISE; + } - // Simple accessors. - const TBOX& bounding_box() const { - return box; - } - // Set the bounding box. Use with caution. - // Normally use compute_bounding_box instead. - void set_bounding_box(const TBOX& new_box) { - box = new_box; - base_char_top_ = box.top(); - base_char_bottom_ = box.bottom(); - } - void compute_bounding_box() { - box = cblob_ptr->bounding_box(); - base_char_top_ = box.top(); - base_char_bottom_ = box.bottom(); - baseline_y_ = box.bottom(); - } - const TBOX& reduced_box() const { - return red_box; - } - void set_reduced_box(TBOX new_box) { - red_box = new_box; - reduced = true; - } - int32_t enclosed_area() const { - return area; - } - bool joined_to_prev() const { - return joined != 0; - } - bool red_box_set() const { - return reduced != 0; - } - int repeated_set() const { - return repeated_set_; - } - void set_repeated_set(int set_id) { - repeated_set_ = set_id; - } - C_BLOB *cblob() const { - return cblob_ptr; - } - TabType left_tab_type() const { - return left_tab_type_; - } - void set_left_tab_type(TabType new_type) { - left_tab_type_ = new_type; - } - TabType right_tab_type() const { - return right_tab_type_; - } - void set_right_tab_type(TabType new_type) { - right_tab_type_ = new_type; - } - BlobRegionType region_type() const { - return region_type_; - } - void set_region_type(BlobRegionType new_type) { - region_type_ = new_type; - } - BlobSpecialTextType special_text_type() const { - return spt_type_; - } - void set_special_text_type(BlobSpecialTextType new_type) { - spt_type_ = new_type; - } - BlobTextFlowType flow() const { - return flow_; - } - void set_flow(BlobTextFlowType value) { - flow_ = value; - } - bool vert_possible() const { - return vert_possible_; - } - void set_vert_possible(bool value) { - vert_possible_ = value; - } - bool horz_possible() const { - return horz_possible_; - } - void set_horz_possible(bool value) { - horz_possible_ = value; - } - int left_rule() const { - return left_rule_; - } - void set_left_rule(int new_left) { - left_rule_ = new_left; - } - int right_rule() const { - return right_rule_; - } - void set_right_rule(int new_right) { - right_rule_ = new_right; - } - int left_crossing_rule() const { - return left_crossing_rule_; - } - void set_left_crossing_rule(int new_left) { - left_crossing_rule_ = new_left; - } - int right_crossing_rule() const { - return right_crossing_rule_; - } - void set_right_crossing_rule(int new_right) { - right_crossing_rule_ = new_right; - } - float horz_stroke_width() const { - return horz_stroke_width_; - } - void set_horz_stroke_width(float width) { - horz_stroke_width_ = width; - } - float vert_stroke_width() const { - return vert_stroke_width_; - } - void set_vert_stroke_width(float width) { - vert_stroke_width_ = width; - } - float area_stroke_width() const { - return area_stroke_width_; - } - tesseract::ColPartition* owner() const { - return owner_; - } - void set_owner(tesseract::ColPartition* new_owner) { - owner_ = new_owner; - } - bool leader_on_left() const { - return leader_on_left_; - } - void set_leader_on_left(bool flag) { - leader_on_left_ = flag; - } - bool leader_on_right() const { - return leader_on_right_; - } - void set_leader_on_right(bool flag) { - leader_on_right_ = flag; - } - BLOBNBOX* neighbour(BlobNeighbourDir n) const { - return neighbours_[n]; - } - bool good_stroke_neighbour(BlobNeighbourDir n) const { - return good_stroke_neighbours_[n]; - } - void set_neighbour(BlobNeighbourDir n, BLOBNBOX* neighbour, bool good) { - neighbours_[n] = neighbour; - good_stroke_neighbours_[n] = good; - } - bool IsDiacritic() const { - return base_char_top_ != box.top() || base_char_bottom_ != box.bottom(); - } - int base_char_top() const { - return base_char_top_; - } - int base_char_bottom() const { - return base_char_bottom_; - } - int baseline_position() const { - return baseline_y_; - } - int line_crossings() const { - return line_crossings_; - } - void set_line_crossings(int value) { - line_crossings_ = value; - } - void set_diacritic_box(const TBOX& diacritic_box) { - base_char_top_ = diacritic_box.top(); - base_char_bottom_ = diacritic_box.bottom(); - } - BLOBNBOX* base_char_blob() const { - return base_char_blob_; - } - void set_base_char_blob(BLOBNBOX* blob) { - base_char_blob_ = blob; - } - void set_owns_cblob(bool value) { owns_cblob_ = value; } + // Returns true, and sets vert_possible/horz_possible if the blob has some + // feature that makes it individually appear to flow one way. + // eg if it has a high aspect ratio, yet has a complex shape, such as a + // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1. + bool DefiniteIndividualFlow(); + + // Returns true if there is no tabstop violation in merging this and other. + bool ConfirmNoTabViolation(const BLOBNBOX& other) const; + + // Returns true if other has a similar stroke width to this. + bool MatchingStrokeWidth(const BLOBNBOX& other, double fractional_tolerance, + double constant_tolerance) const; + + // Returns a bounding box of the outline contained within the + // given horizontal range. + TBOX BoundsWithinLimits(int left, int right); + + // Estimates and stores the baseline position based on the shape of the + // outline. + void EstimateBaselinePosition(); + + // Simple accessors. + const TBOX& bounding_box() const { return box; } + // Set the bounding box. Use with caution. + // Normally use compute_bounding_box instead. + void set_bounding_box(const TBOX& new_box) { + box = new_box; + base_char_top_ = box.top(); + base_char_bottom_ = box.bottom(); + } + void compute_bounding_box() { + box = cblob_ptr->bounding_box(); + base_char_top_ = box.top(); + base_char_bottom_ = box.bottom(); + baseline_y_ = box.bottom(); + } + const TBOX& reduced_box() const { return red_box; } + void set_reduced_box(TBOX new_box) { + red_box = new_box; + reduced = true; + } + int32_t enclosed_area() const { return area; } + bool joined_to_prev() const { return joined != 0; } + bool red_box_set() const { return reduced != 0; } + int repeated_set() const { return repeated_set_; } + void set_repeated_set(int set_id) { repeated_set_ = set_id; } + C_BLOB* cblob() const { return cblob_ptr; } + TabType left_tab_type() const { return left_tab_type_; } + void set_left_tab_type(TabType new_type) { left_tab_type_ = new_type; } + TabType right_tab_type() const { return right_tab_type_; } + void set_right_tab_type(TabType new_type) { right_tab_type_ = new_type; } + BlobRegionType region_type() const { return region_type_; } + void set_region_type(BlobRegionType new_type) { region_type_ = new_type; } + BlobSpecialTextType special_text_type() const { return spt_type_; } + void set_special_text_type(BlobSpecialTextType new_type) { + spt_type_ = new_type; + } + BlobTextFlowType flow() const { return flow_; } + void set_flow(BlobTextFlowType value) { flow_ = value; } + bool vert_possible() const { return vert_possible_; } + void set_vert_possible(bool value) { vert_possible_ = value; } + bool horz_possible() const { return horz_possible_; } + void set_horz_possible(bool value) { horz_possible_ = value; } + int left_rule() const { return left_rule_; } + void set_left_rule(int new_left) { left_rule_ = new_left; } + int right_rule() const { return right_rule_; } + void set_right_rule(int new_right) { right_rule_ = new_right; } + int left_crossing_rule() const { return left_crossing_rule_; } + void set_left_crossing_rule(int new_left) { left_crossing_rule_ = new_left; } + int right_crossing_rule() const { return right_crossing_rule_; } + void set_right_crossing_rule(int new_right) { + right_crossing_rule_ = new_right; + } + float horz_stroke_width() const { return horz_stroke_width_; } + void set_horz_stroke_width(float width) { horz_stroke_width_ = width; } + float vert_stroke_width() const { return vert_stroke_width_; } + void set_vert_stroke_width(float width) { vert_stroke_width_ = width; } + float area_stroke_width() const { return area_stroke_width_; } + tesseract::ColPartition* owner() const { return owner_; } + void set_owner(tesseract::ColPartition* new_owner) { owner_ = new_owner; } + bool leader_on_left() const { return leader_on_left_; } + void set_leader_on_left(bool flag) { leader_on_left_ = flag; } + bool leader_on_right() const { return leader_on_right_; } + void set_leader_on_right(bool flag) { leader_on_right_ = flag; } + BLOBNBOX* neighbour(BlobNeighbourDir n) const { return neighbours_[n]; } + bool good_stroke_neighbour(BlobNeighbourDir n) const { + return good_stroke_neighbours_[n]; + } + void set_neighbour(BlobNeighbourDir n, BLOBNBOX* neighbour, bool good) { + neighbours_[n] = neighbour; + good_stroke_neighbours_[n] = good; + } + bool IsDiacritic() const { + return base_char_top_ != box.top() || base_char_bottom_ != box.bottom(); + } + int base_char_top() const { return base_char_top_; } + int base_char_bottom() const { return base_char_bottom_; } + int baseline_position() const { return baseline_y_; } + int line_crossings() const { return line_crossings_; } + void set_line_crossings(int value) { line_crossings_ = value; } + void set_diacritic_box(const TBOX& diacritic_box) { + base_char_top_ = diacritic_box.top(); + base_char_bottom_ = diacritic_box.bottom(); + } + BLOBNBOX* base_char_blob() const { return base_char_blob_; } + void set_base_char_blob(BLOBNBOX* blob) { base_char_blob_ = blob; } + void set_owns_cblob(bool value) { owns_cblob_ = value; } - bool UniquelyVertical() const { - return vert_possible_ && !horz_possible_; - } - bool UniquelyHorizontal() const { - return horz_possible_ && !vert_possible_; - } + bool UniquelyVertical() const { return vert_possible_ && !horz_possible_; } + bool UniquelyHorizontal() const { return horz_possible_ && !vert_possible_; } - // Returns true if the region type is text. - static bool IsTextType(BlobRegionType type) { - return type == BRT_TEXT || type == BRT_VERT_TEXT; - } - // Returns true if the region type is image. - static bool IsImageType(BlobRegionType type) { - return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE; - } - // Returns true if the region type is line. - static bool IsLineType(BlobRegionType type) { - return type == BRT_HLINE || type == BRT_VLINE; - } - // Returns true if the region type cannot be merged. - static bool UnMergeableType(BlobRegionType type) { - return IsLineType(type) || IsImageType(type); - } - // Helper to call CleanNeighbours on all blobs on the list. - static void CleanNeighbours(BLOBNBOX_LIST* blobs); - // Helper to delete all the deletable blobs on the list. - static void DeleteNoiseBlobs(BLOBNBOX_LIST* blobs); - // Helper to compute edge offsets for all the blobs on the list. - // See coutln.h for an explanation of edge offsets. - static void ComputeEdgeOffsets(Pix* thresholds, Pix* grey, - BLOBNBOX_LIST* blobs); + // Returns true if the region type is text. + static bool IsTextType(BlobRegionType type) { + return type == BRT_TEXT || type == BRT_VERT_TEXT; + } + // Returns true if the region type is image. + static bool IsImageType(BlobRegionType type) { + return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE; + } + // Returns true if the region type is line. + static bool IsLineType(BlobRegionType type) { + return type == BRT_HLINE || type == BRT_VLINE; + } + // Returns true if the region type cannot be merged. + static bool UnMergeableType(BlobRegionType type) { + return IsLineType(type) || IsImageType(type); + } + // Helper to call CleanNeighbours on all blobs on the list. + static void CleanNeighbours(BLOBNBOX_LIST* blobs); + // Helper to delete all the deletable blobs on the list. + static void DeleteNoiseBlobs(BLOBNBOX_LIST* blobs); + // Helper to compute edge offsets for all the blobs on the list. + // See coutln.h for an explanation of edge offsets. + static void ComputeEdgeOffsets(Pix* thresholds, Pix* grey, + BLOBNBOX_LIST* blobs); #ifndef GRAPHICS_DISABLED - // Helper to draw all the blobs on the list in the given body_colour, - // with child outlines in the child_colour. - static void PlotBlobs(BLOBNBOX_LIST* list, - ScrollView::Color body_colour, - ScrollView::Color child_colour, - ScrollView* win); - // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the - // given list in the given body_colour, with child outlines in the - // child_colour. - static void PlotNoiseBlobs(BLOBNBOX_LIST* list, - ScrollView::Color body_colour, - ScrollView::Color child_colour, - ScrollView* win); - - static ScrollView::Color TextlineColor(BlobRegionType region_type, - BlobTextFlowType flow_type); - - // Keep in sync with BlobRegionType. - ScrollView::Color BoxColor() const; - - void plot(ScrollView* window, // window to draw in - ScrollView::Color blob_colour, // for outer bits - ScrollView::Color child_colour); // for holes + // Helper to draw all the blobs on the list in the given body_colour, + // with child outlines in the child_colour. + static void PlotBlobs(BLOBNBOX_LIST* list, ScrollView::Color body_colour, + ScrollView::Color child_colour, ScrollView* win); + // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the + // given list in the given body_colour, with child outlines in the + // child_colour. + static void PlotNoiseBlobs(BLOBNBOX_LIST* list, ScrollView::Color body_colour, + ScrollView::Color child_colour, ScrollView* win); + + static ScrollView::Color TextlineColor(BlobRegionType region_type, + BlobTextFlowType flow_type); + + // Keep in sync with BlobRegionType. + ScrollView::Color BoxColor() const; + + void plot(ScrollView* window, // window to draw in + ScrollView::Color blob_colour, // for outer bits + ScrollView::Color child_colour); // for holes #endif // Initializes the bulk of the members to default values for use at @@ -476,8 +362,8 @@ class BLOBNBOX:public ELIST_LINK right_rule_ = 0; left_crossing_rule_ = 0; right_crossing_rule_ = 0; - if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr - && cblob()->perimeter()!=0) + if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr && + cblob()->perimeter() != 0) area_stroke_width_ = 2.0f * area / cblob()->perimeter(); owner_ = nullptr; base_char_top_ = box.top(); @@ -500,10 +386,10 @@ class BLOBNBOX:public ELIST_LINK } private: - C_BLOB *cblob_ptr; // edgestep blob + C_BLOB* cblob_ptr; // edgestep blob TBOX box; // bounding box TBOX red_box; // bounding box - int area:30; // enclosed area + int area : 30; // enclosed area unsigned joined : 1; // joined to prev unsigned reduced : 1; // reduced box set int repeated_set_; // id of the set of repeated blobs @@ -511,337 +397,303 @@ class BLOBNBOX:public ELIST_LINK TabType right_tab_type_; // Indicates tab-stop assessment BlobRegionType region_type_; // Type of region this blob belongs to BlobTextFlowType flow_; // Quality of text flow. - int16_t left_rule_; // x-coord of nearest but not crossing rule line - int16_t right_rule_; // x-coord of nearest but not crossing rule line - int16_t left_crossing_rule_; // x-coord of nearest or crossing rule line - int16_t right_crossing_rule_; // x-coord of nearest or crossing rule line - int16_t base_char_top_; // y-coord of top/bottom of diacritic base, - int16_t base_char_bottom_; // if it exists else top/bottom of this blob. - int16_t baseline_y_; // Estimate of baseline position. - int line_crossings_; // Number of line intersections touched. - BLOBNBOX* base_char_blob_; // The blob that was the base char. - float horz_stroke_width_; // Median horizontal stroke width - float vert_stroke_width_; // Median vertical stroke width - float area_stroke_width_; // Stroke width from area/perimeter ratio. + int16_t left_rule_; // x-coord of nearest but not crossing rule line + int16_t right_rule_; // x-coord of nearest but not crossing rule line + int16_t left_crossing_rule_; // x-coord of nearest or crossing rule line + int16_t right_crossing_rule_; // x-coord of nearest or crossing rule line + int16_t base_char_top_; // y-coord of top/bottom of diacritic base, + int16_t base_char_bottom_; // if it exists else top/bottom of this blob. + int16_t baseline_y_; // Estimate of baseline position. + int line_crossings_; // Number of line intersections touched. + BLOBNBOX* base_char_blob_; // The blob that was the base char. + float horz_stroke_width_; // Median horizontal stroke width + float vert_stroke_width_; // Median vertical stroke width + float area_stroke_width_; // Stroke width from area/perimeter ratio. tesseract::ColPartition* owner_; // Who will delete me when I am not needed - BlobSpecialTextType spt_type_; // Special text type. + BlobSpecialTextType spt_type_; // Special text type. BLOBNBOX* neighbours_[BND_COUNT]; bool good_stroke_neighbours_[BND_COUNT]; - bool horz_possible_; // Could be part of horizontal flow. - bool vert_possible_; // Could be part of vertical flow. - bool leader_on_left_; // There is a leader to the left. - bool leader_on_right_; // There is a leader to the right. + bool horz_possible_; // Could be part of horizontal flow. + bool vert_possible_; // Could be part of vertical flow. + bool leader_on_left_; // There is a leader to the left. + bool leader_on_right_; // There is a leader to the right. // Iff true, then the destructor should delete the cblob_ptr. // TODO(rays) migrate all uses to correctly setting this flag instead of // deleting the C_BLOB before deleting the BLOBNBOX. bool owns_cblob_; }; -class TO_ROW: public ELIST2_LINK -{ - public: - static const int kErrorWeight = 3; - - TO_ROW() { - clear(); - } //empty - TO_ROW( //constructor - BLOBNBOX *blob, //from first blob - float top, //of row //target height - float bottom, - float row_size); - - void print() const; - float max_y() const { //access function - return y_max; - } - float min_y() const { - return y_min; - } - float mean_y() const { - return (y_min + y_max) / 2.0f; - } - float initial_min_y() const { - return initial_y_min; - } - float line_m() const { //access to line fit - return m; - } - float line_c() const { - return c; - } - float line_error() const { - return error; - } - float parallel_c() const { - return para_c; - } - float parallel_error() const { - return para_error; - } - float believability() const { //baseline goodness - return credibility; - } - float intercept() const { //real parallel_c - return y_origin; - } - void add_blob( //put in row - BLOBNBOX *blob, //blob to add - float top, //of row //target height - float bottom, - float row_size); - void insert_blob( //put in row in order - BLOBNBOX *blob); - - BLOBNBOX_LIST *blob_list() { //get list - return &blobs; - } +class TO_ROW : public ELIST2_LINK { + public: + static const int kErrorWeight = 3; - void set_line( //set line spec - float new_m, //line to set - float new_c, - float new_error) { - m = new_m; - c = new_c; - error = new_error; - } - void set_parallel_line( //set fixed gradient line - float gradient, //page gradient - float new_c, - float new_error) { - para_c = new_c; - para_error = new_error; - credibility = - (float) (blobs.length () - kErrorWeight * new_error); - y_origin = (float) (new_c / sqrt (1 + gradient * gradient)); - //real intercept - } - void set_limits( //set min,max - float new_min, //bottom and - float new_max) { //top of row - y_min = new_min; - y_max = new_max; - } - void compute_vertical_projection(); - //get projection + TO_ROW() { clear(); } // empty + TO_ROW( // constructor + BLOBNBOX* blob, // from first blob + float top, // of row //target height + float bottom, float row_size); - bool rep_chars_marked() const { - return num_repeated_sets_ != -1; - } - void clear_rep_chars_marked() { - num_repeated_sets_ = -1; - } - int num_repeated_sets() const { - return num_repeated_sets_; - } - void set_num_repeated_sets(int num_sets) { - num_repeated_sets_ = num_sets; - } + void print() const; + float max_y() const { // access function + return y_max; + } + float min_y() const { return y_min; } + float mean_y() const { return (y_min + y_max) / 2.0f; } + float initial_min_y() const { return initial_y_min; } + float line_m() const { // access to line fit + return m; + } + float line_c() const { return c; } + float line_error() const { return error; } + float parallel_c() const { return para_c; } + float parallel_error() const { return para_error; } + float believability() const { // baseline goodness + return credibility; + } + float intercept() const { // real parallel_c + return y_origin; + } + void add_blob( // put in row + BLOBNBOX* blob, // blob to add + float top, // of row //target height + float bottom, float row_size); + void insert_blob( // put in row in order + BLOBNBOX* blob); + + BLOBNBOX_LIST* blob_list() { // get list + return &blobs; + } - // true when dead - bool merged; - bool all_caps; // had no ascenders - bool used_dm_model; // in guessing pitch - int16_t projection_left; // start of projection - int16_t projection_right; // start of projection - PITCH_TYPE pitch_decision; // how strong is decision - float fixed_pitch; // pitch or 0 - float fp_space; // sp if fixed pitch - float fp_nonsp; // nonsp if fixed pitch - float pr_space; // sp if prop - float pr_nonsp; // non sp if prop - float spacing; // to "next" row - float xheight; // of line - int xheight_evidence; // number of blobs of height xheight - float ascrise; // ascenders - float descdrop; // descenders - float body_size; // of CJK characters. Assumed to be - // xheight+ascrise for non-CJK text. - int32_t min_space; // min size for real space - int32_t max_nonspace; // max size of non-space - int32_t space_threshold; // space vs nonspace - float kern_size; // average non-space - float space_size; // average space - WERD_LIST rep_words; // repeated chars - ICOORDELT_LIST char_cells; // fixed pitch cells - QSPLINE baseline; // curved baseline - STATS projection; // vertical projection - - private: - void clear(); // clear all values to reasonable defaults - - BLOBNBOX_LIST blobs; //blobs in row - float y_min; //coords - float y_max; - float initial_y_min; - float m, c; //line spec - float error; //line error - float para_c; //constrained fit - float para_error; - float y_origin; //rotated para_c; - float credibility; //baseline believability - int num_repeated_sets_; // number of sets of repeated blobs - // set to -1 if we have not searched - // for repeated blobs in this row yet + void set_line( // set line spec + float new_m, // line to set + float new_c, float new_error) { + m = new_m; + c = new_c; + error = new_error; + } + void set_parallel_line( // set fixed gradient line + float gradient, // page gradient + float new_c, float new_error) { + para_c = new_c; + para_error = new_error; + credibility = (float)(blobs.length() - kErrorWeight * new_error); + y_origin = (float)(new_c / sqrt(1 + gradient * gradient)); + // real intercept + } + void set_limits( // set min,max + float new_min, // bottom and + float new_max) { // top of row + y_min = new_min; + y_max = new_max; + } + void compute_vertical_projection(); + // get projection + + bool rep_chars_marked() const { return num_repeated_sets_ != -1; } + void clear_rep_chars_marked() { num_repeated_sets_ = -1; } + int num_repeated_sets() const { return num_repeated_sets_; } + void set_num_repeated_sets(int num_sets) { num_repeated_sets_ = num_sets; } + + // true when dead + bool merged; + bool all_caps; // had no ascenders + bool used_dm_model; // in guessing pitch + int16_t projection_left; // start of projection + int16_t projection_right; // start of projection + PITCH_TYPE pitch_decision; // how strong is decision + float fixed_pitch; // pitch or 0 + float fp_space; // sp if fixed pitch + float fp_nonsp; // nonsp if fixed pitch + float pr_space; // sp if prop + float pr_nonsp; // non sp if prop + float spacing; // to "next" row + float xheight; // of line + int xheight_evidence; // number of blobs of height xheight + float ascrise; // ascenders + float descdrop; // descenders + float body_size; // of CJK characters. Assumed to be + // xheight+ascrise for non-CJK text. + int32_t min_space; // min size for real space + int32_t max_nonspace; // max size of non-space + int32_t space_threshold; // space vs nonspace + float kern_size; // average non-space + float space_size; // average space + WERD_LIST rep_words; // repeated chars + ICOORDELT_LIST char_cells; // fixed pitch cells + QSPLINE baseline; // curved baseline + STATS projection; // vertical projection + + private: + void clear(); // clear all values to reasonable defaults + + BLOBNBOX_LIST blobs; // blobs in row + float y_min; // coords + float y_max; + float initial_y_min; + float m, c; // line spec + float error; // line error + float para_c; // constrained fit + float para_error; + float y_origin; // rotated para_c; + float credibility; // baseline believability + int num_repeated_sets_; // number of sets of repeated blobs + // set to -1 if we have not searched + // for repeated blobs in this row yet }; -ELIST2IZEH (TO_ROW) -class TO_BLOCK:public ELIST_LINK -{ - public: - TO_BLOCK() : pitch_decision(PITCH_DUNNO) { - clear(); - } //empty - TO_BLOCK( //constructor - BLOCK *src_block); //real block - ~TO_BLOCK(); - - void clear(); // clear all scalar members. - - TO_ROW_LIST *get_rows() { //access function - return &row_list; - } +ELIST2IZEH(TO_ROW) +class TO_BLOCK : public ELIST_LINK { + public: + TO_BLOCK() : pitch_decision(PITCH_DUNNO) { clear(); } // empty + TO_BLOCK( // constructor + BLOCK* src_block); // real block + ~TO_BLOCK(); - // Rotate all the blobnbox lists and the underlying block. Then update the - // median size statistic from the blobs list. - void rotate(const FCOORD& rotation) { - BLOBNBOX_LIST* blobnbox_list[] = {&blobs, &underlines, &noise_blobs, - &small_blobs, &large_blobs, nullptr}; - for (BLOBNBOX_LIST** list = blobnbox_list; *list != nullptr; ++list) { - BLOBNBOX_IT it(*list); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - it.data()->rotate(rotation); - } - } - // Rotate the block - ASSERT_HOST(block->pdblk.poly_block() != nullptr); - block->rotate(rotation); - // Update the median size statistic from the blobs list. - STATS widths(0, block->pdblk.bounding_box().width()); - STATS heights(0, block->pdblk.bounding_box().height()); - BLOBNBOX_IT blob_it(&blobs); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - widths.add(blob_it.data()->bounding_box().width(), 1); - heights.add(blob_it.data()->bounding_box().height(), 1); + void clear(); // clear all scalar members. + + TO_ROW_LIST* get_rows() { // access function + return &row_list; + } + + // Rotate all the blobnbox lists and the underlying block. Then update the + // median size statistic from the blobs list. + void rotate(const FCOORD& rotation) { + BLOBNBOX_LIST* blobnbox_list[] = {&blobs, &underlines, &noise_blobs, + &small_blobs, &large_blobs, nullptr}; + for (BLOBNBOX_LIST** list = blobnbox_list; *list != nullptr; ++list) { + BLOBNBOX_IT it(*list); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->rotate(rotation); } - block->set_median_size(static_cast(widths.median() + 0.5), - static_cast(heights.median() + 0.5)); } + // Rotate the block + ASSERT_HOST(block->pdblk.poly_block() != nullptr); + block->rotate(rotation); + // Update the median size statistic from the blobs list. + STATS widths(0, block->pdblk.bounding_box().width()); + STATS heights(0, block->pdblk.bounding_box().height()); + BLOBNBOX_IT blob_it(&blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + widths.add(blob_it.data()->bounding_box().width(), 1); + heights.add(blob_it.data()->bounding_box().height(), 1); + } + block->set_median_size(static_cast(widths.median() + 0.5), + static_cast(heights.median() + 0.5)); + } - void print_rows() { //debug info - TO_ROW_IT row_it = &row_list; - TO_ROW *row; + void print_rows() { // debug info + TO_ROW_IT row_it = &row_list; + TO_ROW* row; - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); - row_it.forward()) { - row = row_it.data(); - tprintf("Row range (%g,%g), para_c=%g, blobcount=%" PRId32 "\n", - row->min_y(), row->max_y(), row->parallel_c(), - row->blob_list()->length()); - } + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + tprintf("Row range (%g,%g), para_c=%g, blobcount=%" PRId32 "\n", + row->min_y(), row->max_y(), row->parallel_c(), + row->blob_list()->length()); } + } - // Reorganizes the blob lists with a different definition of small, medium - // and large, compared to the original definition. - // Height is still the primary filter key, but medium width blobs of small - // height become medium, and very wide blobs of small height stay small. - void ReSetAndReFilterBlobs(); + // Reorganizes the blob lists with a different definition of small, medium + // and large, compared to the original definition. + // Height is still the primary filter key, but medium width blobs of small + // height become medium, and very wide blobs of small height stay small. + void ReSetAndReFilterBlobs(); - // Deletes noise blobs from all lists where not owned by a ColPartition. - void DeleteUnownedNoise(); + // Deletes noise blobs from all lists where not owned by a ColPartition. + void DeleteUnownedNoise(); - // Computes and stores the edge offsets on each blob for use in feature - // extraction, using greyscale if the supplied grey and thresholds pixes - // are 8-bit or otherwise (if nullptr or not 8 bit) the original binary - // edge step outlines. - // Thresholds must either be the same size as grey or an integer down-scale - // of grey. - // See coutln.h for an explanation of edge offsets. - void ComputeEdgeOffsets(Pix* thresholds, Pix* grey); + // Computes and stores the edge offsets on each blob for use in feature + // extraction, using greyscale if the supplied grey and thresholds pixes + // are 8-bit or otherwise (if nullptr or not 8 bit) the original binary + // edge step outlines. + // Thresholds must either be the same size as grey or an integer down-scale + // of grey. + // See coutln.h for an explanation of edge offsets. + void ComputeEdgeOffsets(Pix* thresholds, Pix* grey); #ifndef GRAPHICS_DISABLED - // Draw the noise blobs from all lists in red. - void plot_noise_blobs(ScrollView* to_win); - // Draw the blobs on on the various lists in the block in different colors. - void plot_graded_blobs(ScrollView* to_win); + // Draw the noise blobs from all lists in red. + void plot_noise_blobs(ScrollView* to_win); + // Draw the blobs on on the various lists in the block in different colors. + void plot_graded_blobs(ScrollView* to_win); #endif - BLOBNBOX_LIST blobs; //medium size - BLOBNBOX_LIST underlines; //underline blobs - BLOBNBOX_LIST noise_blobs; //very small - BLOBNBOX_LIST small_blobs; //fairly small - BLOBNBOX_LIST large_blobs; //big blobs - BLOCK *block; //real block - PITCH_TYPE pitch_decision; //how strong is decision - float line_spacing; //estimate - // line_size is a lower-bound estimate of the font size in pixels of - // the text in the block (with ascenders and descenders), being a small - // (1.25) multiple of the median height of filtered blobs. - // In most cases the font size will be bigger, but it will be closer - // if the text is allcaps, or in a no-x-height script. - float line_size; //estimate - float max_blob_size; //line assignment limit - float baseline_offset; //phase shift - float xheight; //median blob size - float fixed_pitch; //pitch or 0 - float kern_size; //average non-space - float space_size; //average space - int32_t min_space; //min definite space - int32_t max_nonspace; //max definite - float fp_space; //sp if fixed pitch - float fp_nonsp; //nonsp if fixed pitch - float pr_space; //sp if prop - float pr_nonsp; //non sp if prop - TO_ROW *key_row; //starting row - - private: - TO_ROW_LIST row_list; //temporary rows + BLOBNBOX_LIST blobs; // medium size + BLOBNBOX_LIST underlines; // underline blobs + BLOBNBOX_LIST noise_blobs; // very small + BLOBNBOX_LIST small_blobs; // fairly small + BLOBNBOX_LIST large_blobs; // big blobs + BLOCK* block; // real block + PITCH_TYPE pitch_decision; // how strong is decision + float line_spacing; // estimate + // line_size is a lower-bound estimate of the font size in pixels of + // the text in the block (with ascenders and descenders), being a small + // (1.25) multiple of the median height of filtered blobs. + // In most cases the font size will be bigger, but it will be closer + // if the text is allcaps, or in a no-x-height script. + float line_size; // estimate + float max_blob_size; // line assignment limit + float baseline_offset; // phase shift + float xheight; // median blob size + float fixed_pitch; // pitch or 0 + float kern_size; // average non-space + float space_size; // average space + int32_t min_space; // min definite space + int32_t max_nonspace; // max definite + float fp_space; // sp if fixed pitch + float fp_nonsp; // nonsp if fixed pitch + float pr_space; // sp if prop + float pr_nonsp; // non sp if prop + TO_ROW* key_row; // starting row + + private: + TO_ROW_LIST row_list; // temporary rows }; -ELISTIZEH (TO_BLOCK) -extern double_VAR_H (textord_error_weight, 3, -"Weighting for error in believability"); -void find_cblob_limits( //get y limits - C_BLOB *blob, //blob to search - float leftx, //x limits - float rightx, - FCOORD rotation, //for landscape - float &ymin, //output y limits - float &ymax); -void find_cblob_vlimits( //get y limits - C_BLOB *blob, //blob to search - float leftx, //x limits - float rightx, - float &ymin, //output y limits - float &ymax); -void find_cblob_hlimits( //get x limits - C_BLOB *blob, //blob to search - float bottomy, //y limits - float topy, - float &xmin, //output x limits - float &xymax); -C_BLOB *crotate_cblob( //rotate it - C_BLOB *blob, //blob to search - FCOORD rotation //for landscape - ); -TBOX box_next( //get bounding box - BLOBNBOX_IT *it //iterator to blobds - ); -TBOX box_next_pre_chopped( //get bounding box - BLOBNBOX_IT *it //iterator to blobds - ); -void vertical_cblob_projection( //project outlines - C_BLOB *blob, //blob to project - STATS *stats //output - ); -void vertical_coutline_projection( //project outlines - C_OUTLINE *outline, //outline to project - STATS *stats //output - ); +ELISTIZEH(TO_BLOCK) +extern double_VAR_H(textord_error_weight, 3, + "Weighting for error in believability"); +void find_cblob_limits( // get y limits + C_BLOB* blob, // blob to search + float leftx, // x limits + float rightx, + FCOORD rotation, // for landscape + float& ymin, // output y limits + float& ymax); +void find_cblob_vlimits( // get y limits + C_BLOB* blob, // blob to search + float leftx, // x limits + float rightx, + float& ymin, // output y limits + float& ymax); +void find_cblob_hlimits( // get x limits + C_BLOB* blob, // blob to search + float bottomy, // y limits + float topy, + float& xmin, // output x limits + float& xymax); +C_BLOB* crotate_cblob( // rotate it + C_BLOB* blob, // blob to search + FCOORD rotation // for landscape +); +TBOX box_next( // get bounding box + BLOBNBOX_IT* it // iterator to blobds +); +TBOX box_next_pre_chopped( // get bounding box + BLOBNBOX_IT* it // iterator to blobds +); +void vertical_cblob_projection( // project outlines + C_BLOB* blob, // blob to project + STATS* stats // output +); +void vertical_coutline_projection( // project outlines + C_OUTLINE* outline, // outline to project + STATS* stats // output +); #ifndef GRAPHICS_DISABLED -void plot_blob_list(ScrollView* win, // window to draw in - BLOBNBOX_LIST *list, // blob list - ScrollView::Color body_colour, // colour to draw - ScrollView::Color child_colour); // colour of child -#endif // GRAPHICS_DISABLED +void plot_blob_list(ScrollView* win, // window to draw in + BLOBNBOX_LIST* list, // blob list + ScrollView::Color body_colour, // colour to draw + ScrollView::Color child_colour); // colour of child +#endif // GRAPHICS_DISABLED #endif diff --git a/src/ccstruct/blobs.cpp b/src/ccstruct/blobs.cpp index ebe97a8e16..b596919eed 100644 --- a/src/ccstruct/blobs.cpp +++ b/src/ccstruct/blobs.cpp @@ -109,16 +109,15 @@ TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { if (outline->src_outline != nullptr) { // ASSUMPTION: This function is only ever called from ApproximateOutline // and therefore either all points have a src_outline or all do not. - // Just as SetupFromPos sets the vectors from the vertices, setup the - // step_count members to indicate the (positive) number of original - // C_OUTLINE steps to the next vertex. - EDGEPT* pt = outline; - do { - pt->step_count = pt->next->start_step - pt->start_step; - if (pt->step_count < 0) - pt->step_count += pt->src_outline->pathlength(); - pt = pt->next; - } while (pt != outline); + // Just as SetupFromPos sets the vectors from the vertices, setup the + // step_count members to indicate the (positive) number of original + // C_OUTLINE steps to the next vertex. + EDGEPT* pt = outline; + do { + pt->step_count = pt->next->start_step - pt->start_step; + if (pt->step_count < 0) pt->step_count += pt->src_outline->pathlength(); + pt = pt->next; + } while (pt != outline); } result->SetupFromPos(); return result; @@ -153,8 +152,7 @@ void TESSLINE::CopyFrom(const TESSLINE& src) { // Deletes owned data. void TESSLINE::Clear() { - if (loop == nullptr) - return; + if (loop == nullptr) return; EDGEPT* this_edge = loop; do { @@ -179,10 +177,10 @@ void TESSLINE::Normalize(const DENORM& denorm) { void TESSLINE::Rotate(const FCOORD rot) { EDGEPT* pt = loop; do { - int tmp = static_cast(floor(pt->pos.x * rot.x() - - pt->pos.y * rot.y() + 0.5)); - pt->pos.y = static_cast(floor(pt->pos.y * rot.x() + - pt->pos.x * rot.y() + 0.5)); + int tmp = static_cast( + floor(pt->pos.x * rot.x() - pt->pos.y * rot.y() + 0.5)); + pt->pos.y = static_cast( + floor(pt->pos.y * rot.x() + pt->pos.x * rot.y() + 0.5)); pt->pos.x = tmp; pt = pt->next; } while (pt != loop); @@ -235,14 +233,10 @@ void TESSLINE::ComputeBoundingBox() { EDGEPT* this_edge = loop; do { if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { - if (this_edge->pos.x < minx) - minx = this_edge->pos.x; - if (this_edge->pos.y < miny) - miny = this_edge->pos.y; - if (this_edge->pos.x > maxx) - maxx = this_edge->pos.x; - if (this_edge->pos.y > maxy) - maxy = this_edge->pos.y; + if (this_edge->pos.x < minx) minx = this_edge->pos.x; + if (this_edge->pos.y < miny) miny = this_edge->pos.y; + if (this_edge->pos.x > maxx) maxx = this_edge->pos.x; + if (this_edge->pos.y > maxy) maxy = this_edge->pos.y; } this_edge = this_edge->next; } while (this_edge != loop); @@ -258,8 +252,8 @@ void TESSLINE::ComputeBoundingBox() { // this is the left and right edge of the outline perpendicular to the // given direction, but to get the distance units correct, you would // have to divide by the modulus of vec. -void TESSLINE::MinMaxCrossProduct(const TPOINT vec, - int* min_xp, int* max_xp) const { +void TESSLINE::MinMaxCrossProduct(const TPOINT vec, int* min_xp, + int* max_xp) const { *min_xp = INT32_MAX; *max_xp = INT32_MIN; EDGEPT* this_edge = loop; @@ -320,8 +314,7 @@ EDGEPT* TESSLINE::FindBestStartPt() const { // of the resulting list of TESSLINEs. static TESSLINE** ApproximateOutlineList(bool allow_detailed_fx, C_OUTLINE_LIST* outlines, - bool children, - TESSLINE** tail) { + bool children, TESSLINE** tail) { C_OUTLINE_IT ol_it(outlines); for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { C_OUTLINE* outline = ol_it.data(); @@ -375,11 +368,12 @@ TBLOB* TBLOB::ClassifyNormalizeIfNeeded() const { const FCOORD& rotation = denorm_.block()->classify_rotation(); // Move the rotated blob back to the same y-position so that we // can still distinguish similar glyphs with differeny y-position. - float target_y = kBlnBaselineOffset + + float target_y = + kBlnBaselineOffset + (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle); rotated_blob->Normalize(nullptr, &rotation, &denorm_, x_middle, y_middle, - 1.0f, 1.0f, 0.0f, target_y, - denorm_.inverse(), denorm_.pix()); + 1.0f, 1.0f, 0.0f, target_y, denorm_.inverse(), + denorm_.pix()); } return rotated_blob; } @@ -412,13 +406,10 @@ void TBLOB::Clear() { // Sets up the built-in DENORM and normalizes the blob in-place. // For parameters see DENORM::SetupNormalization, plus the inverse flag for // this blob and the Pix for the full image. -void TBLOB::Normalize(const BLOCK* block, - const FCOORD* rotation, - const DENORM* predecessor, - float x_origin, float y_origin, - float x_scale, float y_scale, - float final_xshift, float final_yshift, - bool inverse, Pix* pix) { +void TBLOB::Normalize(const BLOCK* block, const FCOORD* rotation, + const DENORM* predecessor, float x_origin, float y_origin, + float x_scale, float y_scale, float final_xshift, + float final_yshift, bool inverse, Pix* pix) { denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale, final_xshift, final_yshift); denorm_.set_inverse(inverse); @@ -441,28 +432,32 @@ void TBLOB::Normalize(const BLOCK* block, // Rotates by the given rotation in place. void TBLOB::Rotate(const FCOORD rotation) { - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) { + for (TESSLINE* outline = outlines; outline != nullptr; + outline = outline->next) { outline->Rotate(rotation); } } // Moves by the given vec in place. void TBLOB::Move(const ICOORD vec) { - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) { + for (TESSLINE* outline = outlines; outline != nullptr; + outline = outline->next) { outline->Move(vec); } } // Scales by the given factor in place. void TBLOB::Scale(float factor) { - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) { + for (TESSLINE* outline = outlines; outline != nullptr; + outline = outline->next) { outline->Scale(factor); } } // Recomputes the bounding boxes of the outlines. void TBLOB::ComputeBoundingBoxes() { - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) { + for (TESSLINE* outline = outlines; outline != nullptr; + outline = outline->next) { outline->ComputeBoundingBox(); } } @@ -470,7 +465,8 @@ void TBLOB::ComputeBoundingBoxes() { // Returns the number of outlines. int TBLOB::NumOutlines() const { int result = 0; - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) + for (TESSLINE* outline = outlines; outline != nullptr; + outline = outline->next) ++result; return result; } @@ -482,9 +478,8 @@ int TBLOB::NumOutlines() const { * bounding box of the union of all top-level outlines in the blob. **********************************************************************/ TBOX TBLOB::bounding_box() const { - if (outlines == nullptr) - return TBOX(0, 0, 0, 0); - TESSLINE *outline = outlines; + if (outlines == nullptr) return TBOX(0, 0, 0, 0); + TESSLINE* outline = outlines; TBOX box = outline->bounding_box(); for (outline = outline->next; outline != nullptr; outline = outline->next) { box += outline->bounding_box(); @@ -495,7 +490,8 @@ TBOX TBLOB::bounding_box() const { // Finds and deletes any duplicate outlines in this blob, without deleting // their EDGEPTs. void TBLOB::EliminateDuplicateOutlines() { - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) { + for (TESSLINE* outline = outlines; outline != nullptr; + outline = outline->next) { TESSLINE* last_outline = outline; for (TESSLINE* other_outline = outline->next; other_outline != nullptr; last_outline = other_outline, other_outline = other_outline->next) { @@ -525,7 +521,8 @@ void TBLOB::CorrectBlobOrder(TBLOB* next) { #ifndef GRAPHICS_DISABLED void TBLOB::plot(ScrollView* window, ScrollView::Color color, ScrollView::Color child_color) { - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) + for (TESSLINE* outline = outlines; outline != nullptr; + outline = outline->next) outline->plot(window, color, child_color); } #endif // GRAPHICS_DISABLED @@ -570,17 +567,15 @@ void TBLOB::GetPreciseBoundingBox(TBOX* precise_box) const { // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. void TBLOB::GetEdgeCoords(const TBOX& box, - GenericVector >* x_coords, - GenericVector >* y_coords) const { + GenericVector>* x_coords, + GenericVector>* y_coords) const { GenericVector empty; x_coords->init_to_size(box.height(), empty); y_coords->init_to_size(box.width(), empty); CollectEdges(box, nullptr, nullptr, x_coords, y_coords); // Sort the output vectors. - for (int i = 0; i < x_coords->size(); ++i) - (*x_coords)[i].sort(); - for (int i = 0; i < y_coords->size(); ++i) - (*y_coords)[i].sort(); + for (int i = 0; i < x_coords->size(); ++i) (*x_coords)[i].sort(); + for (int i = 0; i < y_coords->size(); ++i) (*y_coords)[i].sort(); } // Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over @@ -612,13 +607,14 @@ static void SegmentLLSQ(const FCOORD& pt1, const FCOORD& pt2, // bottom-left of the bounding box, hence indices to x_coords, y_coords // are clipped to ([0,x_limit], [0,y_limit]). // See GetEdgeCoords above for a description of x_coords, y_coords. -static void SegmentCoords(const FCOORD& pt1, const FCOORD& pt2, - int x_limit, int y_limit, - GenericVector >* x_coords, - GenericVector >* y_coords) { +static void SegmentCoords(const FCOORD& pt1, const FCOORD& pt2, int x_limit, + int y_limit, + GenericVector>* x_coords, + GenericVector>* y_coords) { FCOORD step(pt2); step -= pt1; - int start = ClipToRange(IntCastRounded(std::min(pt1.x(), pt2.x())), 0, x_limit); + int start = + ClipToRange(IntCastRounded(std::min(pt1.x(), pt2.x())), 0, x_limit); int end = ClipToRange(IntCastRounded(std::max(pt1.x(), pt2.x())), 0, x_limit); for (int x = start; x < end; ++x) { int y = IntCastRounded(pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x()); @@ -641,20 +637,20 @@ static void SegmentBBox(const FCOORD& pt1, const FCOORD& pt2, TBOX* bbox) { int x1 = IntCastRounded(std::min(pt1.x(), pt2.x())); int x2 = IntCastRounded(std::max(pt1.x(), pt2.x())); if (x2 > x1) { - int y1 = IntCastRounded(pt1.y() + step.y() * (x1 + 0.5 - pt1.x()) / - step.x()); - int y2 = IntCastRounded(pt1.y() + step.y() * (x2 - 0.5 - pt1.x()) / - step.x()); + int y1 = + IntCastRounded(pt1.y() + step.y() * (x1 + 0.5 - pt1.x()) / step.x()); + int y2 = + IntCastRounded(pt1.y() + step.y() * (x2 - 0.5 - pt1.x()) / step.x()); TBOX point(x1, std::min(y1, y2), x2, std::max(y1, y2)); *bbox += point; } int y1 = IntCastRounded(std::min(pt1.y(), pt2.y())); int y2 = IntCastRounded(std::max(pt1.y(), pt2.y())); if (y2 > y1) { - int x1 = IntCastRounded(pt1.x() + step.x() * (y1 + 0.5 - pt1.y()) / - step.y()); - int x2 = IntCastRounded(pt1.x() + step.x() * (y2 - 0.5 - pt1.y()) / - step.y()); + int x1 = + IntCastRounded(pt1.x() + step.x() * (y1 + 0.5 - pt1.y()) / step.y()); + int x2 = + IntCastRounded(pt1.x() + step.x() * (y2 - 0.5 - pt1.y()) / step.y()); TBOX point(std::min(x1, x2), y1, std::max(x1, x2), y2); *bbox += point; } @@ -672,10 +668,9 @@ static void SegmentBBox(const FCOORD& pt1, const FCOORD& pt2, TBOX* bbox) { // indices into x_coords, y_coords are offset by box.botleft(). static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, const DENORM& denorm, const TBOX& box, - TBOX* bounding_box, - LLSQ* accumulator, - GenericVector > *x_coords, - GenericVector > *y_coords) { + TBOX* bounding_box, LLSQ* accumulator, + GenericVector>* x_coords, + GenericVector>* y_coords) { const C_OUTLINE* outline = startpt->src_outline; int x_limit = box.width() - 1; int y_limit = box.height() - 1; @@ -694,8 +689,7 @@ static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, // bounds of the outline steps/ due to wrap-around, so we use % step_length // everywhere, except for start_index. int end_index = lastpt->start_step + lastpt->step_count; - if (end_index <= start_index) - end_index += step_length; + if (end_index <= start_index) end_index += step_length; // pos is the integer coordinates of the binary image steps. ICOORD pos = outline->position_at_index(start_index); FCOORD origin(box.left(), box.bottom()); @@ -718,8 +712,8 @@ static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, // with a greyscale image, the positioning of the edge there may be a // fictitious extrapolation, so previous processing has eliminated it. if (outline->edge_strength_at_index(index % step_length) > 0) { - FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, - index % step_length); + FCOORD f_pos = + outline->sub_pixel_pos_at_index(pos, index % step_length); FCOORD pos_normed; denorm.NormTransform(root_denorm, f_pos, &pos_normed); pos_normed -= origin; @@ -731,8 +725,8 @@ static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, SegmentLLSQ(pos_normed, prev_normed, accumulator); } if (x_coords != nullptr && y_coords != nullptr) { - SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, - x_coords, y_coords); + SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, x_coords, + y_coords); } prev_normed = pos_normed; } @@ -764,10 +758,9 @@ static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, // llsq and/or the x_coords/y_coords. Both are used in different kinds of // normalization. // For a description of x_coords, y_coords, see GetEdgeCoords above. -void TBLOB::CollectEdges(const TBOX& box, - TBOX* bounding_box, LLSQ* llsq, - GenericVector >* x_coords, - GenericVector >* y_coords) const { +void TBLOB::CollectEdges(const TBOX& box, TBOX* bounding_box, LLSQ* llsq, + GenericVector>* x_coords, + GenericVector>* y_coords) const { // Iterate the outlines. for (const TESSLINE* ol = outlines; ol != nullptr; ol = ol->next) { // Iterate the polygon. @@ -783,8 +776,8 @@ void TBLOB::CollectEdges(const TBOX& box, } while (last_pt != loop_pt && !last_pt->IsHidden() && last_pt->src_outline == pt->src_outline); last_pt = last_pt->prev; - CollectEdgesOfRun(pt, last_pt, denorm_, box, - bounding_box, llsq, x_coords, y_coords); + CollectEdgesOfRun(pt, last_pt, denorm_, box, bounding_box, llsq, x_coords, + y_coords); pt = last_pt; } while ((pt = pt->next) != loop_pt); } @@ -809,8 +802,7 @@ TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) { void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, - const TBOX* norm_box, - DENORM* word_denorm) { + const TBOX* norm_box, DENORM* word_denorm) { TBOX word_box = bounding_box(); if (norm_box != nullptr) word_box = *norm_box; float word_middle = (word_box.left() + word_box.right()) / 2.0f; @@ -848,8 +840,8 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, } if (word_denorm != nullptr) { word_denorm->SetupNormalization(block, nullptr, nullptr, word_middle, - input_y_offset, scale, scale, - 0.0f, final_y_offset); + input_y_offset, scale, scale, 0.0f, + final_y_offset); word_denorm->set_inverse(inverse); word_denorm->set_pix(pix); } @@ -890,7 +882,7 @@ TBOX TWERD::bounding_box() const { // Merges the blobs from start to end, not including end, and deletes // the blobs between start and end. void TWERD::MergeBlobs(int start, int end) { - if (start >= blobs.size() - 1) return; // Nothing to do. + if (start >= blobs.size() - 1) return; // Nothing to do. TESSLINE* outline = blobs[start]->outlines; for (int i = start + 1; i < end && i < blobs.size(); ++i) { TBLOB* next_blob = blobs[i]; @@ -899,8 +891,7 @@ void TWERD::MergeBlobs(int start, int end) { blobs[start]->outlines = next_blob->outlines; outline = blobs[start]->outlines; } else { - while (outline->next != nullptr) - outline = outline->next; + while (outline->next != nullptr) outline = outline->next; outline->next = next_blob->outlines; next_blob->outlines = nullptr; } @@ -931,34 +922,34 @@ void TWERD::plot(ScrollView* window) { * separated using divide_blobs. Sets the location to be used in the * call to divide_blobs. **********************************************************************/ -bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location) { +bool divisible_blob(TBLOB* blob, bool italic_blob, TPOINT* location) { if (blob->outlines == nullptr || blob->outlines->next == nullptr) return false; // Need at least 2 outlines for it to be possible. int max_gap = 0; - TPOINT vertical = italic_blob ? kDivisibleVerticalItalic - : kDivisibleVerticalUpright; + TPOINT vertical = + italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright; for (TESSLINE* outline1 = blob->outlines; outline1 != nullptr; outline1 = outline1->next) { - if (outline1->is_hole) - continue; // Holes do not count as separable. + if (outline1->is_hole) continue; // Holes do not count as separable. TPOINT mid_pt1( - static_cast((outline1->topleft.x + outline1->botright.x) / 2), - static_cast((outline1->topleft.y + outline1->botright.y) / 2)); + static_cast((outline1->topleft.x + outline1->botright.x) / 2), + static_cast((outline1->topleft.y + outline1->botright.y) / 2)); int mid_prod1 = CROSS(mid_pt1, vertical); int min_prod1, max_prod1; outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1); for (TESSLINE* outline2 = outline1->next; outline2 != nullptr; outline2 = outline2->next) { - if (outline2->is_hole) - continue; // Holes do not count as separable. - TPOINT mid_pt2( - static_cast((outline2->topleft.x + outline2->botright.x) / 2), - static_cast((outline2->topleft.y + outline2->botright.y) / 2)); + if (outline2->is_hole) continue; // Holes do not count as separable. + TPOINT mid_pt2(static_cast( + (outline2->topleft.x + outline2->botright.x) / 2), + static_cast( + (outline2->topleft.y + outline2->botright.y) / 2)); int mid_prod2 = CROSS(mid_pt2, vertical); int min_prod2, max_prod2; outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2); int mid_gap = abs(mid_prod2 - mid_prod1); - int overlap = std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2); + int overlap = + std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2); if (mid_gap - overlap / 4 > max_gap) { max_gap = mid_gap - overlap / 4; *location = mid_pt1; @@ -980,21 +971,21 @@ bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location) { * other blob. The ones whose x location is less than that point are * retained in the original blob. **********************************************************************/ -void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, +void divide_blobs(TBLOB* blob, TBLOB* other_blob, bool italic_blob, const TPOINT& location) { - TPOINT vertical = italic_blob ? kDivisibleVerticalItalic - : kDivisibleVerticalUpright; - TESSLINE *outline1 = nullptr; - TESSLINE *outline2 = nullptr; + TPOINT vertical = + italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright; + TESSLINE* outline1 = nullptr; + TESSLINE* outline2 = nullptr; - TESSLINE *outline = blob->outlines; + TESSLINE* outline = blob->outlines; blob->outlines = nullptr; int location_prod = CROSS(location, vertical); while (outline != nullptr) { TPOINT mid_pt( - static_cast((outline->topleft.x + outline->botright.x) / 2), - static_cast((outline->topleft.y + outline->botright.y) / 2)); + static_cast((outline->topleft.x + outline->botright.x) / 2), + static_cast((outline->topleft.y + outline->botright.y) / 2)); int mid_prod = CROSS(mid_pt, vertical); if (mid_prod < location_prod) { // Outline is in left blob. @@ -1014,8 +1005,6 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, outline = outline->next; } - if (outline1) - outline1->next = nullptr; - if (outline2) - outline2->next = nullptr; + if (outline1) outline1->next = nullptr; + if (outline2) outline2->next = nullptr; } diff --git a/src/ccstruct/blobs.h b/src/ccstruct/blobs.h index 26101f339b..2a7e1c76a2 100644 --- a/src/ccstruct/blobs.h +++ b/src/ccstruct/blobs.h @@ -45,12 +45,12 @@ class WERD; /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ -#define EDGEPTFLAGS 4 /*concavity,length etc. */ +#define EDGEPTFLAGS 4 /*concavity,length etc. */ struct TPOINT { - TPOINT(): x(0), y(0) {} + TPOINT() : x(0), y(0) {} TPOINT(int16_t vx, int16_t vy) : x(vx), y(vy) {} - TPOINT(const ICOORD &ic) : x(ic.x()), y(ic.y()) {} + TPOINT(const ICOORD& ic) : x(ic.x()), y(ic.y()) {} void operator+=(const TPOINT& other) { x += other.x; @@ -68,19 +68,21 @@ struct TPOINT { static bool IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0, const TPOINT& b1); - int16_t x; // absolute x coord. - int16_t y; // absolute y coord. + int16_t x; // absolute x coord. + int16_t y; // absolute y coord. }; -using VECTOR = TPOINT; // structure for coordinates. +using VECTOR = TPOINT; // structure for coordinates. struct EDGEPT { EDGEPT() - : next(nullptr), prev(nullptr), src_outline(nullptr), start_step(0), step_count(0) { + : next(nullptr), + prev(nullptr), + src_outline(nullptr), + start_step(0), + step_count(0) { memset(flags, 0, EDGEPTFLAGS * sizeof(flags[0])); } - EDGEPT(const EDGEPT& src) : next(nullptr), prev(nullptr) { - CopyFrom(src); - } + EDGEPT(const EDGEPT& src) : next(nullptr), prev(nullptr) { CopyFrom(src); } EDGEPT& operator=(const EDGEPT& src) { CopyFrom(src); return *this; @@ -144,34 +146,24 @@ struct EDGEPT { } // Accessors to hide or reveal a cut edge from feature extractors. - void Hide() { - flags[0] = true; - } - void Reveal() { - flags[0] = false; - } - bool IsHidden() const { - return flags[0] != 0; - } - void MarkChop() { - flags[2] = true; - } - bool IsChopPt() const { - return flags[2] != 0; - } - - TPOINT pos; // position - VECTOR vec; // vector to next point + void Hide() { flags[0] = true; } + void Reveal() { flags[0] = false; } + bool IsHidden() const { return flags[0] != 0; } + void MarkChop() { flags[2] = true; } + bool IsChopPt() const { return flags[2] != 0; } + + TPOINT pos; // position + VECTOR vec; // vector to next point // TODO(rays) Remove flags and replace with // is_hidden, runlength, dir, and fixed. The only use // of the flags other than is_hidden is in polyaprx.cpp. - char flags[EDGEPTFLAGS]; // concavity, length etc - EDGEPT* next; // anticlockwise element - EDGEPT* prev; // clockwise element - C_OUTLINE* src_outline; // Outline it came from. + char flags[EDGEPTFLAGS]; // concavity, length etc + EDGEPT* next; // anticlockwise element + EDGEPT* prev; // clockwise element + C_OUTLINE* src_outline; // Outline it came from. // The following fields are not used if src_outline is nullptr. - int start_step; // Location of pos in src_outline. - int step_count; // Number of steps used (may wrap around). + int start_step; // Location of pos in src_outline. + int step_count; // Number of steps used (may wrap around). }; // For use in chop and findseam to keep a list of which EDGEPTs were inserted. @@ -182,9 +174,7 @@ struct TESSLINE { TESSLINE(const TESSLINE& src) : loop(nullptr), next(nullptr) { CopyFrom(src); } - ~TESSLINE() { - Clear(); - } + ~TESSLINE() { Clear(); } TESSLINE& operator=(const TESSLINE& src) { CopyFrom(src); return *this; @@ -232,40 +222,35 @@ struct TESSLINE { } // Returns true if the point is contained within the outline box. bool Contains(const TPOINT& pt) const { - return topleft.x <= pt.x && pt.x <= botright.x && - botright.y <= pt.y && pt.y <= topleft.y; + return topleft.x <= pt.x && pt.x <= botright.x && botright.y <= pt.y && + pt.y <= topleft.y; } - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED void plot(ScrollView* window, ScrollView::Color color, ScrollView::Color child_color); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED // Returns the first outline point that has a different src_outline to its // predecessor, or, if all the same, the lowest indexed point. EDGEPT* FindBestStartPt() const; - int BBArea() const { return (botright.x - topleft.x) * (topleft.y - botright.y); } - TPOINT topleft; // Top left of loop. - TPOINT botright; // Bottom right of loop. - TPOINT start; // Start of loop. - bool is_hole; // True if this is a hole/child outline. - EDGEPT *loop; // Edgeloop. - TESSLINE *next; // Next outline in blob. -}; // Outline structure. + TPOINT topleft; // Top left of loop. + TPOINT botright; // Bottom right of loop. + TPOINT start; // Start of loop. + bool is_hole; // True if this is a hole/child outline. + EDGEPT* loop; // Edgeloop. + TESSLINE* next; // Next outline in blob. +}; // Outline structure. struct TBLOB { TBLOB() : outlines(nullptr) {} - TBLOB(const TBLOB& src) : outlines(nullptr) { - CopyFrom(src); - } - ~TBLOB() { - Clear(); - } + TBLOB(const TBLOB& src) : outlines(nullptr) { CopyFrom(src); } + ~TBLOB() { Clear(); } TBLOB& operator=(const TBLOB& src) { CopyFrom(src); return *this; @@ -291,13 +276,10 @@ struct TBLOB { // Sets up the built-in DENORM and normalizes the blob in-place. // For parameters see DENORM::SetupNormalization, plus the inverse flag for // this blob and the Pix for the full image. - void Normalize(const BLOCK* block, - const FCOORD* rotation, - const DENORM* predecessor, - float x_origin, float y_origin, - float x_scale, float y_scale, - float final_xshift, float final_yshift, - bool inverse, Pix* pix); + void Normalize(const BLOCK* block, const FCOORD* rotation, + const DENORM* predecessor, float x_origin, float y_origin, + float x_scale, float y_scale, float final_xshift, + float final_yshift, bool inverse, Pix* pix); // Rotates by the given rotation in place. void Rotate(const FCOORD rotation); // Moves by the given vec in place. @@ -337,18 +319,17 @@ struct TBLOB { // increasing x. void CorrectBlobOrder(TBLOB* next); - const DENORM& denorm() const { - return denorm_; - } + const DENORM& denorm() const { return denorm_; } - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED void plot(ScrollView* window, ScrollView::Color color, ScrollView::Color child_color); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED int BBArea() const { int total_area = 0; - for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) + for (TESSLINE* outline = outlines; outline != nullptr; + outline = outline->next) total_area += outline->BBArea(); return total_area; } @@ -371,10 +352,10 @@ struct TBLOB { // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. void GetEdgeCoords(const TBOX& box, - GenericVector >* x_coords, - GenericVector >* y_coords) const; + GenericVector>* x_coords, + GenericVector>* y_coords) const; - TESSLINE *outlines; // List of outlines in blob. + TESSLINE* outlines; // List of outlines in blob. private: // TODO(rays) Someday the data members will be private too. // For all the edge steps in all the outlines, or polygonal approximation @@ -382,24 +363,19 @@ struct TBLOB { // llsq and/or the x_coords/y_coords. Both are used in different kinds of // normalization. // For a description of x_coords, y_coords, see GetEdgeCoords above. - void CollectEdges(const TBOX& box, - TBOX* bounding_box, LLSQ* llsq, - GenericVector >* x_coords, - GenericVector >* y_coords) const; + void CollectEdges(const TBOX& box, TBOX* bounding_box, LLSQ* llsq, + GenericVector>* x_coords, + GenericVector>* y_coords) const; private: // DENORM indicating the transformations that this blob has undergone so far. DENORM denorm_; -}; // Blob structure. +}; // Blob structure. struct TWERD { TWERD() : latin_script(false) {} - TWERD(const TWERD& src) { - CopyFrom(src); - } - ~TWERD() { - Clear(); - } + TWERD(const TWERD& src) { CopyFrom(src); } + ~TWERD() { Clear(); } TWERD& operator=(const TWERD& src) { CopyFrom(src); return *this; @@ -411,8 +387,7 @@ struct TWERD { // DENORMs in the blobs. void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, - tesseract::OcrEngineMode hint, - const TBOX* norm_box, + tesseract::OcrEngineMode hint, const TBOX* norm_box, DENORM* word_denorm); // Copies the data and the blobs, but leaves next untouched. void CopyFrom(const TWERD& src); @@ -422,9 +397,7 @@ struct TWERD { void ComputeBoundingBoxes(); // Returns the number of blobs in the word. - int NumBlobs() const { - return blobs.size(); - } + int NumBlobs() const { return blobs.size(); } TBOX bounding_box() const; // Merges the blobs from start to end, not including end, and deletes @@ -433,17 +406,17 @@ struct TWERD { void plot(ScrollView* window); - GenericVector blobs; // Blobs in word. - bool latin_script; // This word is in a latin-based script. + GenericVector blobs; // Blobs in word. + bool latin_script; // This word is in a latin-based script. }; /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ // TODO(rays) Make divisible_blob and divide_blobs members of TBLOB. -bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location); +bool divisible_blob(TBLOB* blob, bool italic_blob, TPOINT* location); -void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, +void divide_blobs(TBLOB* blob, TBLOB* other_blob, bool italic_blob, const TPOINT& location); #endif diff --git a/src/ccstruct/blread.cpp b/src/ccstruct/blread.cpp index 1c2d68b09b..b46677be33 100644 --- a/src/ccstruct/blread.cpp +++ b/src/ccstruct/blread.cpp @@ -17,15 +17,15 @@ * **********************************************************************/ -#include +#include #ifdef __UNIX__ -#include +#include #endif -#include "scanutils.h" -#include "fileerr.h" -#include "blread.h" +#include "blread.h" +#include "fileerr.h" +#include "scanutils.h" -#define UNLV_EXT ".uzn" // unlv zone file +#define UNLV_EXT ".uzn" // unlv zone file /********************************************************************** * read_unlv_file @@ -33,38 +33,38 @@ * Read a whole unlv zone file to make a list of blocks. **********************************************************************/ -bool read_unlv_file( //print list of sides - STRING name, //basename of file - int32_t xsize, //image size - int32_t ysize, //image size - BLOCK_LIST *blocks //output list - ) { - FILE *pdfp; //file pointer - BLOCK *block; //current block - int x; //current top-down coords +bool read_unlv_file( // print list of sides + STRING name, // basename of file + int32_t xsize, // image size + int32_t ysize, // image size + BLOCK_LIST* blocks // output list +) { + FILE* pdfp; // file pointer + BLOCK* block; // current block + int x; // current top-down coords int y; - int width; //of current block + int width; // of current block int height; - BLOCK_IT block_it = blocks; //block iterator + BLOCK_IT block_it = blocks; // block iterator - name += UNLV_EXT; //add extension - if ((pdfp = fopen (name.string (), "rb")) == nullptr) { - return false; //didn't read one + name += UNLV_EXT; // add extension + if ((pdfp = fopen(name.string(), "rb")) == nullptr) { + return false; // didn't read one } else { while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) { - //make rect block - block = new BLOCK (name.string (), TRUE, 0, 0, - (int16_t) x, (int16_t) (ysize - y - height), - (int16_t) (x + width), (int16_t) (ysize - y)); - //on end of list - block_it.add_to_end (block); + // make rect block + block = new BLOCK(name.string(), TRUE, 0, 0, (int16_t)x, + (int16_t)(ysize - y - height), (int16_t)(x + width), + (int16_t)(ysize - y)); + // on end of list + block_it.add_to_end(block); } fclose(pdfp); } return true; } -void FullPageBlock(int width, int height, BLOCK_LIST *blocks) { +void FullPageBlock(int width, int height, BLOCK_LIST* blocks) { BLOCK_IT block_it(blocks); BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); block_it.add_to_end(block); diff --git a/src/ccstruct/blread.h b/src/ccstruct/blread.h index de943ce343..782fd76842 100644 --- a/src/ccstruct/blread.h +++ b/src/ccstruct/blread.h @@ -17,17 +17,17 @@ * **********************************************************************/ -#ifndef BLREAD_H -#define BLREAD_H +#ifndef BLREAD_H +#define BLREAD_H -#include "params.h" -#include "ocrblock.h" +#include "ocrblock.h" +#include "params.h" -bool read_unlv_file( //print list of sides - STRING name, //basename of file - int32_t xsize, //image size - int32_t ysize, //image size - BLOCK_LIST *blocks //output list - ); -void FullPageBlock(int width, int height, BLOCK_LIST *blocks); +bool read_unlv_file( // print list of sides + STRING name, // basename of file + int32_t xsize, // image size + int32_t ysize, // image size + BLOCK_LIST* blocks // output list +); +void FullPageBlock(int width, int height, BLOCK_LIST* blocks); #endif diff --git a/src/ccstruct/boxread.cpp b/src/ccstruct/boxread.cpp index 277b8ddba7..d8731c22b2 100644 --- a/src/ccstruct/boxread.cpp +++ b/src/ccstruct/boxread.cpp @@ -48,10 +48,8 @@ FILE* OpenBoxFile(const STRING& fname) { // Each of the output vectors is optional (may be nullptr). // Returns false if no boxes are found. bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename, - GenericVector* boxes, - GenericVector* texts, - GenericVector* box_texts, - GenericVector* pages) { + GenericVector* boxes, GenericVector* texts, + GenericVector* box_texts, GenericVector* pages) { GenericVector box_data; if (!tesseract::LoadDataFromFile(BoxFileName(filename), &box_data)) return false; @@ -64,11 +62,9 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename, // Reads all boxes from the string. Otherwise, as ReadAllBoxes. bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, - bool continue_on_failure, - GenericVector* boxes, + bool continue_on_failure, GenericVector* boxes, GenericVector* texts, - GenericVector* box_texts, - GenericVector* pages) { + GenericVector* box_texts, GenericVector* pages) { STRING box_str(box_data); GenericVector lines; box_str.split('\n', &lines); @@ -100,9 +96,10 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, } // Returns the box file name corresponding to the given image_filename. -STRING BoxFileName(const STRING& image_filename) { +STRING +BoxFileName(const STRING& image_filename) { STRING box_filename = image_filename; - const char *lastdot = strrchr(box_filename.string(), '.'); + const char* lastdot = strrchr(box_filename.string(), '.'); if (lastdot != nullptr) box_filename.truncate_at(lastdot - box_filename.string()); @@ -121,25 +118,25 @@ STRING BoxFileName(const STRING& image_filename) { // for valid utf-8 and allows space or tab between fields. // utf8_str is set with the unichar string, and bounding box with the box. // If there are page numbers in the file, it reads them all. -bool ReadNextBox(int *line_number, FILE* box_file, - STRING* utf8_str, TBOX* bounding_box) { +bool ReadNextBox(int* line_number, FILE* box_file, STRING* utf8_str, + TBOX* bounding_box) { return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box); } // As ReadNextBox above, but get a specific page number. (0-based) // Use -1 to read any page number. Files without page number all // read as if they are page 0. -bool ReadNextBox(int target_page, int *line_number, FILE* box_file, +bool ReadNextBox(int target_page, int* line_number, FILE* box_file, STRING* utf8_str, TBOX* bounding_box) { int page = 0; - char buff[kBoxReadBufSize]; // boxfile read buffer - char *buffptr = buff; + char buff[kBoxReadBufSize]; // boxfile read buffer + char* buffptr = buff; while (fgets(buff, sizeof(buff) - 1, box_file)) { (*line_number)++; buffptr = buff; - const unsigned char *ubuf = reinterpret_cast(buffptr); + const unsigned char* ubuf = reinterpret_cast(buffptr); if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) buffptr += 3; // Skip unicode file designation. // Check for blank lines in box file @@ -152,7 +149,7 @@ bool ReadNextBox(int target_page, int *line_number, FILE* box_file, continue; } if (target_page >= 0 && target_page != page) - continue; // Not on the appropriate page. + continue; // Not on the appropriate page. return true; // Successfully read a box. } } @@ -170,19 +167,18 @@ bool ReadNextBox(int target_page, int *line_number, FILE* box_file, // See applyybox.cpp for more information. bool ParseBoxFileStr(const char* boxfile_str, int* page_number, STRING* utf8_str, TBOX* bounding_box) { - *bounding_box = TBOX(); // Initialize it to empty. + *bounding_box = TBOX(); // Initialize it to empty. *utf8_str = ""; char uch[kBoxReadBufSize]; - const char *buffptr = boxfile_str; + const char* buffptr = boxfile_str; // Read the unichar without messing up on Tibetan. // According to issue 253 the utf-8 surrogates 85 and A0 are treated // as whitespace by sscanf, so it is more reliable to just find // ascii space and tab. int uch_len = 0; // Skip unicode file designation, if present. - const unsigned char *ubuf = reinterpret_cast(buffptr); - if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) - buffptr += 3; + const unsigned char* ubuf = reinterpret_cast(buffptr); + if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) buffptr += 3; // Allow a single blank as the UTF-8 string. Check for empty string and // then blindly eat the first character. if (*buffptr == '\0') return false; @@ -194,8 +190,8 @@ bool ParseBoxFileStr(const char* boxfile_str, int* page_number, if (*buffptr != '\0') ++buffptr; int x_min, y_min, x_max, y_max; *page_number = 0; - int count = sscanf(buffptr, "%d %d %d %d %d", - &x_min, &y_min, &x_max, &y_max, page_number); + int count = sscanf(buffptr, "%d %d %d %d %d", &x_min, &y_min, &x_max, &y_max, + page_number); if (count != 5 && count != 4) { tprintf("Bad box coordinates in boxfile string! %s\n", ubuf); return false; @@ -214,8 +210,8 @@ bool ParseBoxFileStr(const char* boxfile_str, int* page_number, tesseract::UNICHAR ch(uch + used, uch_len - used); int new_used = ch.utf8_len(); if (new_used == 0) { - tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n", - uch + used, uch[used], used + 1); + tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n", uch + used, + uch[used], used + 1); return false; } used += new_used; diff --git a/src/ccstruct/boxread.h b/src/ccstruct/boxread.h index 3473f6534a..27388ceb97 100644 --- a/src/ccstruct/boxread.h +++ b/src/ccstruct/boxread.h @@ -42,24 +42,21 @@ FILE* OpenBoxFile(const STRING& fname); // Each of the output vectors is optional (may be nullptr). // Returns false if no boxes are found. bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename, - GenericVector* boxes, - GenericVector* texts, - GenericVector* box_texts, - GenericVector* pages); + GenericVector* boxes, GenericVector* texts, + GenericVector* box_texts, GenericVector* pages); // Reads all boxes from the string. Otherwise, as ReadAllBoxes. // continue_on_failure allows reading to continue even if an invalid box is // encountered and will return true if it succeeds in reading some boxes. // It otherwise gives up and returns false on encountering an invalid box. bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, - bool continue_on_failure, - GenericVector* boxes, + bool continue_on_failure, GenericVector* boxes, GenericVector* texts, - GenericVector* box_texts, - GenericVector* pages); + GenericVector* box_texts, GenericVector* pages); // Returns the box file name corresponding to the given image_filename. -STRING BoxFileName(const STRING& image_filename); +STRING +BoxFileName(const STRING& image_filename); // ReadNextBox factors out the code to interpret a line of a box // file so that applybox and unicharset_extractor interpret the same way. @@ -69,12 +66,12 @@ STRING BoxFileName(const STRING& image_filename); // for valid utf-8 and allows space or tab between fields. // utf8_str is set with the unichar string, and bounding box with the box. // If there are page numbers in the file, it reads them all. -bool ReadNextBox(int *line_number, FILE* box_file, - STRING* utf8_str, TBOX* bounding_box); +bool ReadNextBox(int* line_number, FILE* box_file, STRING* utf8_str, + TBOX* bounding_box); // As ReadNextBox above, but get a specific page number. (0-based) // Use -1 to read any page number. Files without page number all // read as if they are page 0. -bool ReadNextBox(int target_page, int *line_number, FILE* box_file, +bool ReadNextBox(int target_page, int* line_number, FILE* box_file, STRING* utf8_str, TBOX* bounding_box); // Parses the given box file string into a page_number, utf8_str, and diff --git a/src/ccstruct/boxword.cpp b/src/ccstruct/boxword.cpp index 7d81edc6fd..e55956c445 100644 --- a/src/ccstruct/boxword.cpp +++ b/src/ccstruct/boxword.cpp @@ -17,8 +17,8 @@ // /////////////////////////////////////////////////////////////////////// -#include "blobs.h" #include "boxword.h" +#include "blobs.h" #include "normalis.h" #include "ocrblock.h" #include "pageres.h" @@ -30,12 +30,9 @@ namespace tesseract { // the word bounding box. const int kBoxClipTolerance = 2; -BoxWord::BoxWord() : length_(0) { -} +BoxWord::BoxWord() : length_(0) {} -BoxWord::BoxWord(const BoxWord& src) { - CopyFrom(src); -} +BoxWord::BoxWord(const BoxWord& src) { CopyFrom(src); } BoxWord& BoxWord::operator=(const BoxWord& src) { CopyFrom(src); @@ -47,8 +44,7 @@ void BoxWord::CopyFrom(const BoxWord& src) { length_ = src.length_; boxes_.clear(); boxes_.reserve(length_); - for (int i = 0; i < length_; ++i) - boxes_.push_back(src.boxes_[i]); + for (int i = 0; i < length_; ++i) boxes_.push_back(src.boxes_[i]); } // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to @@ -93,15 +89,14 @@ void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) { for (int i = 0; i < length_; ++i) { TBOX box = boxes_[i]; // Expand by a single pixel, as the poly approximation error is 1 pixel. - box = TBOX(box.left() - 1, box.bottom() - 1, - box.right() + 1, box.top() + 1); + box = + TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1); // Now find the original box that matches. TBOX original_box; C_BLOB_IT b_it(original_word->cblob_list()); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { TBOX blob_box = b_it.data()->bounding_box(); - if (block != nullptr) - blob_box.rotate(block->re_rotation()); + if (block != nullptr) blob_box.rotate(block->re_rotation()); if (blob_box.major_overlap(box)) { original_box += blob_box; } @@ -119,8 +114,7 @@ void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) { box.set_bottom(original_box.bottom()); } original_box = original_word->bounding_box(); - if (block != nullptr) - original_box.rotate(block->re_rotation()); + if (block != nullptr) original_box.rotate(block->re_rotation()); boxes_[i] = box.intersection(original_box); } ComputeBoundingBox(); @@ -131,15 +125,13 @@ void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) { void BoxWord::MergeBoxes(int start, int end) { start = ClipToRange(start, 0, length_); end = ClipToRange(end, 0, length_); - if (end <= start + 1) - return; + if (end <= start + 1) return; for (int i = start + 1; i < end; ++i) { boxes_[start] += boxes_[i]; } int shrinkage = end - 1 - start; length_ -= shrinkage; - for (int i = start + 1; i < length_; ++i) - boxes_[i] = boxes_[i + shrinkage]; + for (int i = start + 1; i < length_; ++i) boxes_[i] = boxes_[i + shrinkage]; boxes_.truncate(length_); } @@ -180,8 +172,7 @@ void BoxWord::DeleteAllBoxes() { // Computes the bounding box of the word. void BoxWord::ComputeBoundingBox() { bbox_ = TBOX(); - for (int i = 0; i < length_; ++i) - bbox_ += boxes_[i]; + for (int i = 0; i < length_; ++i) bbox_ += boxes_[i]; } // This and other putatively are the same, so call the (permanent) callback @@ -191,8 +182,7 @@ void BoxWord::ProcessMatchedBlobs(const TWERD& other, TessCallback1* cb) const { for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) { TBOX blob_box = other.blobs[i]->bounding_box(); - if (blob_box == boxes_[i]) - cb->Run(i); + if (blob_box == boxes_[i]) cb->Run(i); } delete cb; } diff --git a/src/ccstruct/boxword.h b/src/ccstruct/boxword.h index 04ecdc0d2b..4c1aeb5c28 100644 --- a/src/ccstruct/boxword.h +++ b/src/ccstruct/boxword.h @@ -79,13 +79,9 @@ class BoxWord { // The callback is deleted on completion. void ProcessMatchedBlobs(const TWERD& other, TessCallback1* cb) const; - const TBOX& bounding_box() const { - return bbox_; - } + const TBOX& bounding_box() const { return bbox_; } int length() const { return length_; } - const TBOX& BlobBox(int index) const { - return boxes_[index]; - } + const TBOX& BlobBox(int index) const { return boxes_[index]; } private: void ComputeBoundingBox(); diff --git a/src/ccstruct/ccstruct.cpp b/src/ccstruct/ccstruct.cpp index 151df33e1e..f8d367a57c 100644 --- a/src/ccstruct/ccstruct.cpp +++ b/src/ccstruct/ccstruct.cpp @@ -18,14 +18,15 @@ #include "ccstruct.h" -namespace tesseract { +namespace tesseract { // APPROXIMATIONS of the fractions of the character cell taken by // the descenders, ascenders, and x-height. const double CCStruct::kDescenderFraction = 0.25; const double CCStruct::kXHeightFraction = 0.5; const double CCStruct::kAscenderFraction = 0.25; -const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction / +const double CCStruct::kXHeightCapRatio = + CCStruct::kXHeightFraction / (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction); -} +} // namespace tesseract diff --git a/src/ccstruct/ccstruct.h b/src/ccstruct/ccstruct.h index 1f860cdce2..169a685112 100644 --- a/src/ccstruct/ccstruct.h +++ b/src/ccstruct/ccstruct.h @@ -34,7 +34,7 @@ class CCStruct : public CUtil { static const double kXHeightFraction; // = 0.5; static const double kAscenderFraction; // = 0.25; // Derived value giving the x-height as a fraction of cap-height. - static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender). + static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender). }; class Tesseract; diff --git a/src/ccstruct/coutln.cpp b/src/ccstruct/coutln.cpp index 6680b3be3d..2cdb3ed1c6 100644 --- a/src/ccstruct/coutln.cpp +++ b/src/ccstruct/coutln.cpp @@ -17,8 +17,8 @@ * **********************************************************************/ -#include #include +#include #ifdef __UNIX__ #include #endif @@ -34,10 +34,9 @@ #include "config_auto.h" #endif -ELISTIZE (C_OUTLINE) -ICOORD C_OUTLINE::step_coords[4] = { - ICOORD (-1, 0), ICOORD (0, -1), ICOORD (1, 0), ICOORD (0, 1) -}; +ELISTIZE(C_OUTLINE) +ICOORD C_OUTLINE::step_coords[4] = {ICOORD(-1, 0), ICOORD(0, -1), ICOORD(1, 0), + ICOORD(0, 1)}; /** * @name C_OUTLINE::C_OUTLINE @@ -52,22 +51,22 @@ ICOORD C_OUTLINE::step_coords[4] = { C_OUTLINE::C_OUTLINE(CRACKEDGE* startpt, ICOORD bot_left, ICOORD top_right, int16_t length) : box(bot_left, top_right), start(startpt->pos), offsets(nullptr) { - int16_t stepindex; //index to step - CRACKEDGE *edgept; //current point + int16_t stepindex; // index to step + CRACKEDGE* edgept; // current point - stepcount = length; //no of steps + stepcount = length; // no of steps if (length == 0) { steps = nullptr; return; } - //get memory - steps = (uint8_t *) alloc_mem (step_mem()); + // get memory + steps = (uint8_t*)alloc_mem(step_mem()); memset(steps, 0, step_mem()); edgept = startpt; for (stepindex = 0; stepindex < length; stepindex++) { - //set compact step - set_step (stepindex, edgept->stepdir); + // set compact step + set_step(stepindex, edgept->stepdir); edgept = edgept->next; } } @@ -77,23 +76,24 @@ C_OUTLINE::C_OUTLINE(CRACKEDGE* startpt, ICOORD bot_left, ICOORD top_right, * * Constructor to build a C_OUTLINE from a C_OUTLINE_FRAG. */ -C_OUTLINE::C_OUTLINE ( -//constructor - //steps to copy -ICOORD startpt, DIR128 * new_steps, -int16_t length //length of loop -):start (startpt), offsets(nullptr) { - int8_t dirdiff; //direction difference - DIR128 prevdir; //previous direction - DIR128 dir; //current direction - DIR128 lastdir; //dir of last step - TBOX new_box; //easy bounding - int16_t stepindex; //index to step - int16_t srcindex; //source steps - ICOORD pos; //current position +C_OUTLINE::C_OUTLINE( + // constructor + // steps to copy + ICOORD startpt, DIR128* new_steps, + int16_t length // length of loop + ) + : start(startpt), offsets(nullptr) { + int8_t dirdiff; // direction difference + DIR128 prevdir; // previous direction + DIR128 dir; // current direction + DIR128 lastdir; // dir of last step + TBOX new_box; // easy bounding + int16_t stepindex; // index to step + int16_t srcindex; // source steps + ICOORD pos; // current position pos = startpt; - stepcount = length; // No. of steps. + stepcount = length; // No. of steps. ASSERT_HOST(length >= 0); steps = static_cast(alloc_mem(step_mem())); // Get memory. memset(steps, 0, step_mem()); @@ -101,34 +101,31 @@ int16_t length //length of loop lastdir = new_steps[length - 1]; prevdir = lastdir; for (stepindex = 0, srcindex = 0; srcindex < length; - stepindex++, srcindex++) { - new_box = TBOX (pos, pos); + stepindex++, srcindex++) { + new_box = TBOX(pos, pos); box += new_box; - //copy steps + // copy steps dir = new_steps[srcindex]; set_step(stepindex, dir); dirdiff = dir - prevdir; - pos += step (stepindex); + pos += step(stepindex); if ((dirdiff == 64 || dirdiff == -64) && stepindex > 0) { - stepindex -= 2; //cancel there-and-back - prevdir = stepindex >= 0 ? step_dir (stepindex) : lastdir; - } - else + stepindex -= 2; // cancel there-and-back + prevdir = stepindex >= 0 ? step_dir(stepindex) : lastdir; + } else prevdir = dir; } - ASSERT_HOST (pos.x () == startpt.x () && pos.y () == startpt.y ()); + ASSERT_HOST(pos.x() == startpt.x() && pos.y() == startpt.y()); do { - dirdiff = step_dir (stepindex - 1) - step_dir (0); + dirdiff = step_dir(stepindex - 1) - step_dir(0); if (dirdiff == 64 || dirdiff == -64) { - start += step (0); - stepindex -= 2; //cancel there-and-back - for (int i = 0; i < stepindex; ++i) - set_step(i, step_dir(i + 1)); + start += step(0); + stepindex -= 2; // cancel there-and-back + for (int i = 0; i < stepindex; ++i) set_step(i, step_dir(i + 1)); } - } - while (stepindex > 1 && (dirdiff == 64 || dirdiff == -64)); + } while (stepindex > 1 && (dirdiff == 64 || dirdiff == -64)); stepcount = stepindex; - ASSERT_HOST (stepcount >= 4); + ASSERT_HOST(stepcount >= 4); } /** @@ -140,15 +137,15 @@ int16_t length //length of loop */ C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(nullptr) { - TBOX new_box; //easy bounding - int16_t stepindex; //index to step - int16_t dirdiff; //direction change - ICOORD pos; //current position - ICOORD prevpos; //previous dest point - - ICOORD destpos; //destination point - int16_t destindex; //index to step - DIR128 dir; //coded direction + TBOX new_box; // easy bounding + int16_t stepindex; // index to step + int16_t dirdiff; // direction change + ICOORD pos; // current position + ICOORD prevpos; // previous dest point + + ICOORD destpos; // destination point + int16_t destindex; // index to step + DIR128 dir; // coded direction uint8_t new_step; stepcount = srcline->stepcount * 2; @@ -158,8 +155,8 @@ C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(nullptr) { box.rotate(rotation); return; } - //get memory - steps = (uint8_t *) alloc_mem (step_mem()); + // get memory + steps = (uint8_t*)alloc_mem(step_mem()); memset(steps, 0, step_mem()); for (int iteration = 0; iteration < 2; ++iteration) { @@ -167,27 +164,26 @@ C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(nullptr) { DIR128 round2 = iteration != 0 ? 32 : 0; pos = srcline->start; prevpos = pos; - prevpos.rotate (rotation); + prevpos.rotate(rotation); start = prevpos; - box = TBOX (start, start); + box = TBOX(start, start); destindex = 0; for (stepindex = 0; stepindex < srcline->stepcount; stepindex++) { - pos += srcline->step (stepindex); + pos += srcline->step(stepindex); destpos = pos; - destpos.rotate (rotation); + destpos.rotate(rotation); // tprintf("%i %i %i %i ", destpos.x(), destpos.y(), pos.x(), pos.y()); - while (destpos.x () != prevpos.x () || destpos.y () != prevpos.y ()) { - dir = DIR128 (FCOORD (destpos - prevpos)); - dir += 64; //turn to step style - new_step = dir.get_dir (); + while (destpos.x() != prevpos.x() || destpos.y() != prevpos.y()) { + dir = DIR128(FCOORD(destpos - prevpos)); + dir += 64; // turn to step style + new_step = dir.get_dir(); // tprintf(" %i\n", new_step); if (new_step & 31) { set_step(destindex++, dir + round1); prevpos += step(destindex - 1); - if (destindex < 2 - || ((dirdiff = - step_dir (destindex - 1) - step_dir (destindex - 2)) != - -64 && dirdiff != 64)) { + if (destindex < 2 || ((dirdiff = step_dir(destindex - 1) - + step_dir(destindex - 2)) != -64 && + dirdiff != 64)) { set_step(destindex++, dir + round2); prevpos += step(destindex - 1); } else { @@ -197,43 +193,40 @@ C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(nullptr) { set_step(destindex - 1, dir + round2); prevpos += step(destindex - 1); } - } - else { + } else { set_step(destindex++, dir); prevpos += step(destindex - 1); } - while (destindex >= 2 && - ((dirdiff = - step_dir (destindex - 1) - step_dir (destindex - 2)) == -64 || - dirdiff == 64)) { + while (destindex >= 2 && ((dirdiff = step_dir(destindex - 1) - + step_dir(destindex - 2)) == -64 || + dirdiff == 64)) { prevpos -= step(destindex - 1); prevpos -= step(destindex - 2); - destindex -= 2; // Forget u turn + destindex -= 2; // Forget u turn } - //ASSERT_HOST(prevpos.x() == destpos.x() && prevpos.y() == destpos.y()); - new_box = TBOX (destpos, destpos); + // ASSERT_HOST(prevpos.x() == destpos.x() && prevpos.y() == + // destpos.y()); + new_box = TBOX(destpos, destpos); box += new_box; } } - ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ()); - dirdiff = step_dir (destindex - 1) - step_dir (0); + ASSERT_HOST(destpos.x() == start.x() && destpos.y() == start.y()); + dirdiff = step_dir(destindex - 1) - step_dir(0); while ((dirdiff == 64 || dirdiff == -64) && destindex > 1) { - start += step (0); + start += step(0); destindex -= 2; - for (int i = 0; i < destindex; ++i) - set_step(i, step_dir(i + 1)); - dirdiff = step_dir (destindex - 1) - step_dir (0); + for (int i = 0; i < destindex; ++i) set_step(i, step_dir(i + 1)); + dirdiff = step_dir(destindex - 1) - step_dir(0); } - if (destindex >= 4) - break; + if (destindex >= 4) break; } ASSERT_HOST(destindex <= stepcount); stepcount = destindex; destpos = start; for (stepindex = 0; stepindex < stepcount; stepindex++) { - destpos += step (stepindex); + destpos += step(stepindex); } - ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ()); + ASSERT_HOST(destpos.x() == start.x() && destpos.y() == start.y()); } // Build a fake outline, given just a bounding box and append to the list. @@ -254,29 +247,29 @@ void C_OUTLINE::FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines) { */ int32_t C_OUTLINE::area() const { - int stepindex; //current step - int32_t total_steps; //steps to do - int32_t total; //total area - ICOORD pos; //position of point - ICOORD next_step; //step to next pix + int stepindex; // current step + int32_t total_steps; // steps to do + int32_t total; // total area + ICOORD pos; // position of point + ICOORD next_step; // step to next pix // We aren't going to modify the list, or its contents, but there is // no const iterator. C_OUTLINE_IT it(const_cast(&children)); - pos = start_pos (); - total_steps = pathlength (); + pos = start_pos(); + total_steps = pathlength(); total = 0; for (stepindex = 0; stepindex < total_steps; stepindex++) { - //all intersected - next_step = step (stepindex); - if (next_step.x () < 0) - total += pos.y (); - else if (next_step.x () > 0) - total -= pos.y (); + // all intersected + next_step = step(stepindex); + if (next_step.x() < 0) + total += pos.y(); + else if (next_step.x() > 0) + total -= pos.y(); pos += next_step; } - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - total += it.data ()->area ();//add areas of children + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) + total += it.data()->area(); // add areas of children return total; } @@ -288,7 +281,7 @@ int32_t C_OUTLINE::area() const { */ int32_t C_OUTLINE::perimeter() const { - int32_t total_steps; // Return value. + int32_t total_steps; // Return value. // We aren't going to modify the list, or its contents, but there is // no const iterator. C_OUTLINE_IT it(const_cast(&children)); @@ -307,24 +300,23 @@ int32_t C_OUTLINE::perimeter() const { */ int32_t C_OUTLINE::outer_area() const { - int stepindex; //current step - int32_t total_steps; //steps to do - int32_t total; //total area - ICOORD pos; //position of point - ICOORD next_step; //step to next pix - - pos = start_pos (); - total_steps = pathlength (); - if (total_steps == 0) - return box.area(); + int stepindex; // current step + int32_t total_steps; // steps to do + int32_t total; // total area + ICOORD pos; // position of point + ICOORD next_step; // step to next pix + + pos = start_pos(); + total_steps = pathlength(); + if (total_steps == 0) return box.area(); total = 0; for (stepindex = 0; stepindex < total_steps; stepindex++) { - //all intersected - next_step = step (stepindex); - if (next_step.x () < 0) - total += pos.y (); - else if (next_step.x () > 0) - total -= pos.y (); + // all intersected + next_step = step(stepindex); + if (next_step.x() < 0) + total += pos.y(); + else if (next_step.x() > 0) + total -= pos.y(); pos += next_step; } @@ -339,20 +331,20 @@ int32_t C_OUTLINE::outer_area() const { */ int32_t C_OUTLINE::count_transitions(int32_t threshold) { - bool first_was_max_x; //what was first + bool first_was_max_x; // what was first bool first_was_max_y; - bool looking_for_max_x; //what is next + bool looking_for_max_x; // what is next bool looking_for_min_x; - bool looking_for_max_y; //what is next + bool looking_for_max_y; // what is next bool looking_for_min_y; - int stepindex; //current step - int32_t total_steps; //steps to do - //current limits + int stepindex; // current step + int32_t total_steps; // steps to do + // current limits int32_t max_x, min_x, max_y, min_y; - int32_t initial_x, initial_y; //initial limits - int32_t total; //total changes - ICOORD pos; //position of point - ICOORD next_step; //step to next pix + int32_t initial_x, initial_y; // initial limits + int32_t total; // total changes + ICOORD pos; // position of point + ICOORD next_step; // step to next pix pos = start_pos(); total_steps = pathlength(); @@ -366,14 +358,13 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) { first_was_max_x = false; first_was_max_y = false; initial_x = pos.x(); - initial_y = pos.y(); //stop uninit warning + initial_y = pos.y(); // stop uninit warning for (stepindex = 0; stepindex < total_steps; stepindex++) { - //all intersected + // all intersected next_step = step(stepindex); pos += next_step; if (next_step.x() < 0) { - if (looking_for_max_x && pos.x() < min_x) - min_x = pos.x(); + if (looking_for_max_x && pos.x() < min_x) min_x = pos.x(); if (looking_for_min_x && max_x - pos.x() > threshold) { if (looking_for_max_x) { initial_x = max_x; @@ -382,15 +373,13 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) { total++; looking_for_max_x = true; looking_for_min_x = false; - min_x = pos.x(); //reset min + min_x = pos.x(); // reset min } - } - else if (next_step.x() > 0) { - if (looking_for_min_x && pos.x() > max_x) - max_x = pos.x(); + } else if (next_step.x() > 0) { + if (looking_for_min_x && pos.x() > max_x) max_x = pos.x(); if (looking_for_max_x && pos.x() - min_x > threshold) { if (looking_for_min_x) { - initial_x = min_x; //remember first min + initial_x = min_x; // remember first min first_was_max_x = true; } total++; @@ -398,27 +387,23 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) { looking_for_min_x = true; max_x = pos.x(); } - } - else if (next_step.y() < 0) { - if (looking_for_max_y && pos.y() < min_y) - min_y = pos.y(); + } else if (next_step.y() < 0) { + if (looking_for_max_y && pos.y() < min_y) min_y = pos.y(); if (looking_for_min_y && max_y - pos.y() > threshold) { if (looking_for_max_y) { - initial_y = max_y; //remember first max + initial_y = max_y; // remember first max first_was_max_y = false; } total++; looking_for_max_y = true; looking_for_min_y = false; - min_y = pos.y(); //reset min + min_y = pos.y(); // reset min } - } - else { - if (looking_for_min_y && pos.y() > max_y) - max_y = pos.y(); + } else { + if (looking_for_min_y && pos.y() > max_y) max_y = pos.y(); if (looking_for_max_y && pos.y() - min_y > threshold) { if (looking_for_min_y) { - initial_y = min_y; //remember first min + initial_y = min_y; // remember first min first_was_max_y = true; } total++; @@ -427,15 +412,13 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) { max_y = pos.y(); } } - } if (first_was_max_x && looking_for_min_x) { if (max_x - initial_x > threshold) total++; else total--; - } - else if (!first_was_max_x && looking_for_max_x) { + } else if (!first_was_max_x && looking_for_max_x) { if (initial_x - min_x > threshold) total++; else @@ -446,8 +429,7 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) { total++; else total--; - } - else if (!first_was_max_y && looking_for_max_y) { + } else if (!first_was_max_y && looking_for_max_y) { if (initial_y - min_y > threshold) total++; else @@ -464,28 +446,27 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) { * @param other other outline */ -bool -C_OUTLINE::operator<(const C_OUTLINE& other) const { - int16_t count = 0; //winding count - ICOORD pos; //position of point - int32_t stepindex; //index to cstep +bool C_OUTLINE::operator<(const C_OUTLINE& other) const { + int16_t count = 0; // winding count + ICOORD pos; // position of point + int32_t stepindex; // index to cstep - if (!box.overlap (other.box)) - return false; //can't be contained - if (stepcount == 0) - return other.box.contains(this->box); + if (!box.overlap(other.box)) return false; // can't be contained + if (stepcount == 0) return other.box.contains(this->box); pos = start; - for (stepindex = 0; stepindex < stepcount - && (count = other.winding_number (pos)) == INTERSECTING; stepindex++) - pos += step (stepindex); //try all points + for (stepindex = 0; stepindex < stepcount && + (count = other.winding_number(pos)) == INTERSECTING; + stepindex++) + pos += step(stepindex); // try all points if (count == INTERSECTING) { - //all intersected + // all intersected pos = other.start; - for (stepindex = 0; stepindex < other.stepcount - && (count = winding_number (pos)) == INTERSECTING; stepindex++) - //try other way round - pos += other.step (stepindex); + for (stepindex = 0; stepindex < other.stepcount && + (count = winding_number(pos)) == INTERSECTING; + stepindex++) + // try other way round + pos += other.step(stepindex); return count == INTERSECTING || count == 0; } return count != 0; @@ -499,34 +480,33 @@ C_OUTLINE::operator<(const C_OUTLINE& other) const { */ int16_t C_OUTLINE::winding_number(ICOORD point) const { - int16_t stepindex; //index to cstep - int16_t count; //winding count - ICOORD vec; //to current point - ICOORD stepvec; //step vector - int32_t cross; //cross product + int16_t stepindex; // index to cstep + int16_t count; // winding count + ICOORD vec; // to current point + ICOORD stepvec; // step vector + int32_t cross; // cross product - vec = start - point; //vector to it + vec = start - point; // vector to it count = 0; for (stepindex = 0; stepindex < stepcount; stepindex++) { - stepvec = step (stepindex); //get the step - //crossing the line - if (vec.y () <= 0 && vec.y () + stepvec.y () > 0) { - cross = vec * stepvec; //cross product + stepvec = step(stepindex); // get the step + // crossing the line + if (vec.y() <= 0 && vec.y() + stepvec.y() > 0) { + cross = vec * stepvec; // cross product if (cross > 0) - count++; //crossing right half + count++; // crossing right half else if (cross == 0) - return INTERSECTING; //going through point - } - else if (vec.y () > 0 && vec.y () + stepvec.y () <= 0) { + return INTERSECTING; // going through point + } else if (vec.y() > 0 && vec.y() + stepvec.y() <= 0) { cross = vec * stepvec; if (cross < 0) - count--; //crossing back + count--; // crossing back else if (cross == 0) - return INTERSECTING; //illegal + return INTERSECTING; // illegal } - vec += stepvec; //sum vectors + vec += stepvec; // sum vectors } - return count; //winding number + return count; // winding number } /** @@ -535,26 +515,25 @@ int16_t C_OUTLINE::winding_number(ICOORD point) const { * @return the sum direction delta of the outline. */ -int16_t C_OUTLINE::turn_direction() const { //winding number - DIR128 prevdir; //previous direction - DIR128 dir; //current direction - int16_t stepindex; //index to cstep - int8_t dirdiff; //direction difference - int16_t count; //winding count +int16_t C_OUTLINE::turn_direction() const { // winding number + DIR128 prevdir; // previous direction + DIR128 dir; // current direction + int16_t stepindex; // index to cstep + int8_t dirdiff; // direction difference + int16_t count; // winding count - if (stepcount == 0) - return 128; + if (stepcount == 0) return 128; count = 0; - prevdir = step_dir (stepcount - 1); + prevdir = step_dir(stepcount - 1); for (stepindex = 0; stepindex < stepcount; stepindex++) { - dir = step_dir (stepindex); + dir = step_dir(stepindex); dirdiff = dir - prevdir; - ASSERT_HOST (dirdiff == 0 || dirdiff == 32 || dirdiff == -32); + ASSERT_HOST(dirdiff == 0 || dirdiff == 32 || dirdiff == -32); count += dirdiff; prevdir = dir; } - ASSERT_HOST (count == 128 || count == -128); - return count; //winding number + ASSERT_HOST(count == 128 || count == -128); + return count; // winding number } /** @@ -563,19 +542,19 @@ int16_t C_OUTLINE::turn_direction() const { //winding number * Reverse the direction of an outline. */ -void C_OUTLINE::reverse() { //reverse drection - DIR128 halfturn = MODULUS / 2; //amount to shift - DIR128 stepdir; //direction of step - int16_t stepindex; //index to cstep - int16_t farindex; //index to other side - int16_t halfsteps; //half of stepcount +void C_OUTLINE::reverse() { // reverse drection + DIR128 halfturn = MODULUS / 2; // amount to shift + DIR128 stepdir; // direction of step + int16_t stepindex; // index to cstep + int16_t farindex; // index to other side + int16_t halfsteps; // half of stepcount halfsteps = (stepcount + 1) / 2; for (stepindex = 0; stepindex < halfsteps; stepindex++) { farindex = stepcount - stepindex - 1; - stepdir = step_dir (stepindex); - set_step (stepindex, step_dir (farindex) + halfturn); - set_step (farindex, stepdir + halfturn); + stepdir = step_dir(stepindex); + set_step(stepindex, step_dir(farindex) + halfturn); + set_step(farindex, stepdir + halfturn); } } @@ -589,11 +568,11 @@ void C_OUTLINE::reverse() { //reverse drection void C_OUTLINE::move(const ICOORD vec) { C_OUTLINE_IT it(&children); // iterator - box.move (vec); + box.move(vec); start += vec; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - it.data ()->move (vec); // move child outlines + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) + it.data()->move(vec); // move child outlines } /** @@ -649,9 +628,8 @@ void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) { * on the given (x,y). If the cell would go outside the image, it is padded * with white. */ -static void ComputeGradient(const l_uint32* data, int wpl, - int x, int y, int width, int height, - ICOORD* gradient) { +static void ComputeGradient(const l_uint32* data, int wpl, int x, int y, + int width, int height, ICOORD* gradient) { const l_uint32* line = data + y * wpl; int pix_x_y = x < width && y < height ? GET_DATA_BYTE(line, x) : 255; int pix_x_prevy = x < width && y > 0 ? GET_DATA_BYTE(line - wpl, x) : 255; @@ -667,10 +645,9 @@ static void ComputeGradient(const l_uint32* data, int wpl, * best_y if a new max. */ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign, - int x, int y, int height, - int* best_diff, int* best_sum, int* best_y) { - if (y <= 0 || y >= height) - return false; + int x, int y, int height, int* best_diff, + int* best_sum, int* best_y) { + if (y <= 0 || y >= height) return false; const l_uint32* line = data + y * wpl; int pixel1 = GET_DATA_BYTE(line - wpl, x); int pixel2 = GET_DATA_BYTE(line, x); @@ -688,11 +665,10 @@ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign, * by the input image line, returning true if the difference matches diff_sign * and updating the best_diff, best_sum, best_x if a new max. */ -static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign, - int x, int width, - int* best_diff, int* best_sum, int* best_x) { - if (x <= 0 || x >= width) - return false; +static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign, int x, + int width, int* best_diff, int* best_sum, + int* best_x) { + if (x <= 0 || x >= width) return false; int pixel1 = GET_DATA_BYTE(line, x - 1); int pixel2 = GET_DATA_BYTE(line, x); int diff = (pixel2 - pixel1) * diff_sign; @@ -726,7 +702,7 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); bool negative = flag(COUT_INVERSE); - delete [] offsets; + delete[] offsets; offsets = new EdgeOffset[stepcount]; ICOORD pos = start; ICOORD prev_gradient; @@ -755,8 +731,8 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) { int y = height - pt1.y; int best_sum = 0; int best_y = y; - EvaluateVerticalDiff(data, wpl, diff_sign, x, y, height, - &best_diff, &best_sum, &best_y); + EvaluateVerticalDiff(data, wpl, diff_sign, x, y, height, &best_diff, + &best_sum, &best_y); // Find the strongest edge. int test_y = y; do { @@ -768,8 +744,8 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) { --test_y; } while (EvaluateVerticalDiff(data, wpl, diff_sign, x, test_y, height, &best_diff, &best_sum, &best_y)); - offset = diff_sign * (best_sum / 2 - threshold) + - (y - best_y) * best_diff; + offset = + diff_sign * (best_sum / 2 - threshold) + (y - best_y) * best_diff; } else if (pt1.x == pt2.x && abs(gradient.x()) * 2 >= abs(gradient.y())) { // Vertical step. diff_sign == 1 indicates black on the left. int diff_sign = (pt1.y > pt2.y) == negative ? 1 : -1; @@ -778,8 +754,8 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) { const l_uint32* line = pixGetData(pix) + y * wpl; int best_sum = 0; int best_x = x; - EvaluateHorizontalDiff(line, diff_sign, x, width, - &best_diff, &best_sum, &best_x); + EvaluateHorizontalDiff(line, diff_sign, x, width, &best_diff, &best_sum, + &best_x); // Find the strongest edge. int test_x = x; do { @@ -791,11 +767,10 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) { --test_x; } while (EvaluateHorizontalDiff(line, diff_sign, test_x, width, &best_diff, &best_sum, &best_x)); - offset = diff_sign * (threshold - best_sum / 2) + - (best_x - x) * best_diff; + offset = + diff_sign * (threshold - best_sum / 2) + (best_x - x) * best_diff; } - offsets[s].offset_numerator = - ClipToRange(offset, -INT8_MAX, INT8_MAX); + offsets[s].offset_numerator = ClipToRange(offset, -INT8_MAX, INT8_MAX); offsets[s].pixel_diff = ClipToRange(best_diff, 0, UINT8_MAX); if (negative) gradient = -gradient; // Compute gradient angle quantized to 256 directions, rotated by 64 (pi/2) @@ -836,7 +811,7 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) { * outline, without losing important detail. */ void C_OUTLINE::ComputeBinaryOffsets() { - delete [] offsets; + delete[] offsets; offsets = new EdgeOffset[stepcount]; // Count of the number of steps in each direction in the sliding window. int dir_counts[4]; @@ -867,9 +842,10 @@ void C_OUTLINE::ComputeBinaryOffsets() { int offset = 0; // Use only steps that have a count of >=2 OR the strong U-turn with a // single d and 2 at d-1 and 2 at d+1 (mod 4). - if (dir_counts[dir_index] >= 2 || (dir_counts[dir_index] == 1 && - dir_counts[Modulo(dir_index - 1, 4)] == 2 && - dir_counts[Modulo(dir_index + 1, 4)] == 2)) { + if (dir_counts[dir_index] >= 2 || + (dir_counts[dir_index] == 1 && + dir_counts[Modulo(dir_index - 1, 4)] == 2 && + dir_counts[Modulo(dir_index + 1, 4)] == 2)) { // Valid step direction. best_diff = dir_counts[dir_index]; int edge_pos = step_vec.x() == 0 ? pos.x() : pos.y(); @@ -878,8 +854,7 @@ void C_OUTLINE::ComputeBinaryOffsets() { // See ASCII art above. offset = pos_totals[dir_index] - best_diff * edge_pos; } - offsets[s].offset_numerator = - ClipToRange(offset, -INT8_MAX, INT8_MAX); + offsets[s].offset_numerator = ClipToRange(offset, -INT8_MAX, INT8_MAX); offsets[s].pixel_diff = ClipToRange(best_diff, 0, UINT8_MAX); // The direction is just the vector from start to end of the window. FCOORD direction(head_pos.x() - tail_pos.x(), head_pos.y() - tail_pos.y()); @@ -897,8 +872,8 @@ void C_OUTLINE::render(int left, int top, Pix* pix) const { for (int stepindex = 0; stepindex < stepcount; ++stepindex) { ICOORD next_step = step(stepindex); if (next_step.y() < 0) { - pixRasterop(pix, 0, top - pos.y(), pos.x() - left, 1, - PIX_NOT(PIX_DST), nullptr, 0, 0); + pixRasterop(pix, 0, top - pos.y(), pos.x() - left, 1, PIX_NOT(PIX_DST), + nullptr, 0, 0); } else if (next_step.y() > 0) { pixRasterop(pix, 0, top - pos.y() - 1, pos.x() - left, 1, PIX_NOT(PIX_DST), nullptr, 0, 0); @@ -941,11 +916,11 @@ void C_OUTLINE::render_outline(int left, int top, Pix* pix) const { #ifndef GRAPHICS_DISABLED void C_OUTLINE::plot(ScrollView* window, ScrollView::Color colour) const { - int16_t stepindex; // index to cstep - ICOORD pos; // current position - DIR128 stepdir; // direction of step + int16_t stepindex; // index to cstep + ICOORD pos; // current position + DIR128 stepdir; // direction of step - pos = start; // current position + pos = start; // current position window->Pen(colour); if (stepcount == 0) { window->Rectangle(box.left(), box.top(), box.right(), box.bottom()); @@ -955,9 +930,9 @@ void C_OUTLINE::plot(ScrollView* window, ScrollView::Color colour) const { stepindex = 0; while (stepindex < stepcount) { - pos += step(stepindex); // step to next + pos += step(stepindex); // step to next stepdir = step_dir(stepindex); - stepindex++; // count steps + stepindex++; // count steps // merge straight lines while (stepindex < stepcount && stepdir.get_dir() == step_dir(stepindex).get_dir()) { @@ -980,7 +955,7 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, return; } const DENORM* root_denorm = denorm.RootDenorm(); - ICOORD pos = start; // current position + ICOORD pos = start; // current position FCOORD f_pos = sub_pixel_pos_at_index(pos, 0); FCOORD pos_normed; denorm.NormTransform(root_denorm, f_pos, &pos_normed); @@ -1011,15 +986,13 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, C_OUTLINE& C_OUTLINE::operator=(const C_OUTLINE& source) { box = source.box; start = source.start; - if (steps != nullptr) - free_mem(steps); + if (steps != nullptr) free_mem(steps); stepcount = source.stepcount; - steps = (uint8_t *) alloc_mem (step_mem()); - memmove (steps, source.steps, step_mem()); - if (!children.empty ()) - children.clear (); + steps = (uint8_t*)alloc_mem(step_mem()); + memmove(steps, source.steps, step_mem()); + if (!children.empty()) children.clear(); children.deep_copy(&source.children, &deep_copy); - delete [] offsets; + delete[] offsets; if (source.offsets != nullptr) { offsets = new EdgeOffset[stepcount]; memcpy(offsets, source.offsets, stepcount * sizeof(*offsets)); @@ -1048,6 +1021,5 @@ void C_OUTLINE::increment_step(int s, int increment, ICOORD* pos, *pos += step_vec; } -ICOORD C_OUTLINE::chain_step(int chaindir) { - return step_coords[chaindir % 4]; -} +ICOORD +C_OUTLINE::chain_step(int chaindir) { return step_coords[chaindir % 4]; } diff --git a/src/ccstruct/coutln.h b/src/ccstruct/coutln.h index 00faa61993..617d55e984 100644 --- a/src/ccstruct/coutln.h +++ b/src/ccstruct/coutln.h @@ -17,26 +17,25 @@ * **********************************************************************/ -#ifndef COUTLN_H -#define COUTLN_H +#ifndef COUTLN_H +#define COUTLN_H -#include "crakedge.h" -#include "mod128.h" -#include "bits16.h" -#include "rect.h" -#include "blckerr.h" -#include "scrollview.h" +#include "bits16.h" +#include "blckerr.h" +#include "crakedge.h" +#include "mod128.h" +#include "rect.h" +#include "scrollview.h" class DENORM; -#define INTERSECTING INT16_MAX//no winding number +#define INTERSECTING INT16_MAX // no winding number - //mask to get step -#define STEP_MASK 3 +// mask to get step +#define STEP_MASK 3 -enum C_OUTLINE_FLAGS -{ - COUT_INVERSE //White on black blob +enum C_OUTLINE_FLAGS { + COUT_INVERSE // White on black blob }; // Simple struct to hold the 3 values needed to compute a more precise edge @@ -62,230 +61,221 @@ struct EdgeOffset { uint8_t direction; }; -class DLLSYM C_OUTLINE; //forward declaration +class DLLSYM C_OUTLINE; // forward declaration struct Pix; -ELISTIZEH (C_OUTLINE) -class DLLSYM C_OUTLINE:public ELIST_LINK { +ELISTIZEH(C_OUTLINE) +class DLLSYM C_OUTLINE : public ELIST_LINK { public: - C_OUTLINE() { //empty constructor - steps = nullptr; - offsets = nullptr; - } - C_OUTLINE( //constructor - CRACKEDGE *startpt, //from edge detector - ICOORD bot_left, //bounding box //length of loop - ICOORD top_right, - int16_t length); - C_OUTLINE(ICOORD startpt, //start of loop - DIR128 *new_steps, //steps in loop - int16_t length); //length of loop - //outline to copy - C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation); //and rotate + C_OUTLINE() { // empty constructor + steps = nullptr; + offsets = nullptr; + } + C_OUTLINE( // constructor + CRACKEDGE* startpt, // from edge detector + ICOORD bot_left, // bounding box //length of loop + ICOORD top_right, int16_t length); + C_OUTLINE(ICOORD startpt, // start of loop + DIR128* new_steps, // steps in loop + int16_t length); // length of loop + // outline to copy + C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation); // and rotate - // Build a fake outline, given just a bounding box and append to the list. - static void FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines); + // Build a fake outline, given just a bounding box and append to the list. + static void FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines); - ~C_OUTLINE () { //destructor - if (steps != nullptr) - free_mem(steps); - steps = nullptr; - delete [] offsets; - } + ~C_OUTLINE() { // destructor + if (steps != nullptr) free_mem(steps); + steps = nullptr; + delete[] offsets; + } - bool flag( //test flag - C_OUTLINE_FLAGS mask) const { //flag to test - return flags.bit(mask); - } - void set_flag( //set flag value - C_OUTLINE_FLAGS mask, //flag to test - bool value) { //value to set - flags.set_bit(mask, value); - } + bool flag( // test flag + C_OUTLINE_FLAGS mask) const { // flag to test + return flags.bit(mask); + } + void set_flag( // set flag value + C_OUTLINE_FLAGS mask, // flag to test + bool value) { // value to set + flags.set_bit(mask, value); + } - C_OUTLINE_LIST *child() { //get child list - return &children; - } + C_OUTLINE_LIST* child() { // get child list + return &children; + } - //access function - const TBOX &bounding_box() const { - return box; - } - void set_step( //set a step - int16_t stepindex, //index of step - int8_t stepdir) { //chain code - int shift = stepindex%4 * 2; - uint8_t mask = 3 << shift; - steps[stepindex/4] = ((stepdir << shift) & mask) | - (steps[stepindex/4] & ~mask); - //squeeze 4 into byte - } - void set_step( //set a step - int16_t stepindex, //index of step - DIR128 stepdir) { //direction - //clean it - int8_t chaindir = stepdir.get_dir() >> (DIRBITS - 2); - //difference - set_step(stepindex, chaindir); - //squeeze 4 into byte - } + // access function + const TBOX& bounding_box() const { return box; } + void set_step( // set a step + int16_t stepindex, // index of step + int8_t stepdir) { // chain code + int shift = stepindex % 4 * 2; + uint8_t mask = 3 << shift; + steps[stepindex / 4] = + ((stepdir << shift) & mask) | (steps[stepindex / 4] & ~mask); + // squeeze 4 into byte + } + void set_step( // set a step + int16_t stepindex, // index of step + DIR128 stepdir) { // direction + // clean it + int8_t chaindir = stepdir.get_dir() >> (DIRBITS - 2); + // difference + set_step(stepindex, chaindir); + // squeeze 4 into byte + } - int32_t pathlength() const { //get path length - return stepcount; - } - // Return step at a given index as a DIR128. - DIR128 step_dir(int index) const { - return DIR128((int16_t)(((steps[index/4] >> (index%4 * 2)) & STEP_MASK) << - (DIRBITS - 2))); - } - // Return the step vector for the given outline position. - ICOORD step(int index) const { // index of step - return step_coords[chain_code(index)]; - } - // get start position - const ICOORD &start_pos() const { - return start; - } - // Returns the position at the given index on the outline. - // NOT to be used lightly, as it has to iterate the outline to find out. - ICOORD position_at_index(int index) const { - ICOORD pos = start; - for (int i = 0; i < index; ++i) - pos += step(i); - return pos; - } - // Returns the sub-pixel accurate position given the integer position pos - // at the given index on the outline. pos may be a return value of - // position_at_index, or computed by repeatedly adding step to the - // start_pos() in the usual way. - FCOORD sub_pixel_pos_at_index(const ICOORD& pos, int index) const { - const ICOORD& step_to_next(step(index)); - FCOORD f_pos(pos.x() + step_to_next.x() / 2.0f, - pos.y() + step_to_next.y() / 2.0f); - if (offsets != nullptr && offsets[index].pixel_diff > 0) { - float offset = offsets[index].offset_numerator; - offset /= offsets[index].pixel_diff; - if (step_to_next.x() != 0) - f_pos.set_y(f_pos.y() + offset); - else - f_pos.set_x(f_pos.x() + offset); - } - return f_pos; - } - // Returns the step direction for the given index or -1 if there is none. - int direction_at_index(int index) const { - if (offsets != nullptr && offsets[index].pixel_diff > 0) - return offsets[index].direction; - return -1; - } - // Returns the edge strength for the given index. - // If there are no recorded edge strengths, returns 1 (assuming the image - // is binary). Returns 0 if the gradient direction conflicts with the - // step direction, indicating that this position could be skipped. - int edge_strength_at_index(int index) const { - if (offsets != nullptr) - return offsets[index].pixel_diff; - return 1; - } - // Return the step as a chain code (0-3) related to the standard feature - // direction of binary_angle_plus_pi by: - // chain_code * 64 = feature direction. - int chain_code(int index) const { // index of step - return (steps[index / 4] >> (index % 4 * 2)) & STEP_MASK; + int32_t pathlength() const { // get path length + return stepcount; + } + // Return step at a given index as a DIR128. + DIR128 step_dir(int index) const { + return DIR128((int16_t)(((steps[index / 4] >> (index % 4 * 2)) & STEP_MASK) + << (DIRBITS - 2))); + } + // Return the step vector for the given outline position. + ICOORD step(int index) const { // index of step + return step_coords[chain_code(index)]; + } + // get start position + const ICOORD& start_pos() const { return start; } + // Returns the position at the given index on the outline. + // NOT to be used lightly, as it has to iterate the outline to find out. + ICOORD position_at_index(int index) const { + ICOORD pos = start; + for (int i = 0; i < index; ++i) pos += step(i); + return pos; + } + // Returns the sub-pixel accurate position given the integer position pos + // at the given index on the outline. pos may be a return value of + // position_at_index, or computed by repeatedly adding step to the + // start_pos() in the usual way. + FCOORD sub_pixel_pos_at_index(const ICOORD& pos, int index) const { + const ICOORD& step_to_next(step(index)); + FCOORD f_pos(pos.x() + step_to_next.x() / 2.0f, + pos.y() + step_to_next.y() / 2.0f); + if (offsets != nullptr && offsets[index].pixel_diff > 0) { + float offset = offsets[index].offset_numerator; + offset /= offsets[index].pixel_diff; + if (step_to_next.x() != 0) + f_pos.set_y(f_pos.y() + offset); + else + f_pos.set_x(f_pos.x() + offset); } + return f_pos; + } + // Returns the step direction for the given index or -1 if there is none. + int direction_at_index(int index) const { + if (offsets != nullptr && offsets[index].pixel_diff > 0) + return offsets[index].direction; + return -1; + } + // Returns the edge strength for the given index. + // If there are no recorded edge strengths, returns 1 (assuming the image + // is binary). Returns 0 if the gradient direction conflicts with the + // step direction, indicating that this position could be skipped. + int edge_strength_at_index(int index) const { + if (offsets != nullptr) return offsets[index].pixel_diff; + return 1; + } + // Return the step as a chain code (0-3) related to the standard feature + // direction of binary_angle_plus_pi by: + // chain_code * 64 = feature direction. + int chain_code(int index) const { // index of step + return (steps[index / 4] >> (index % 4 * 2)) & STEP_MASK; + } - int32_t area() const; // Returns area of self and 1st level children. - int32_t perimeter() const; // Total perimeter of self and 1st level children. - int32_t outer_area() const; // Returns area of self only. - int32_t count_transitions( //count maxima - int32_t threshold); //size threshold + int32_t area() const; // Returns area of self and 1st level children. + int32_t perimeter() const; // Total perimeter of self and 1st level children. + int32_t outer_area() const; // Returns area of self only. + int32_t count_transitions( // count maxima + int32_t threshold); // size threshold - bool operator< ( //containment test - const C_OUTLINE & other) const; - bool operator> ( //containment test - C_OUTLINE & other) const - { - return other < *this; //use the < to do it - } - int16_t winding_number( //get winding number - ICOORD testpt) const; //around this point - //get direction - int16_t turn_direction() const; - void reverse(); //reverse direction + bool operator<( // containment test + const C_OUTLINE& other) const; + bool operator>( // containment test + C_OUTLINE& other) const { + return other < *this; // use the < to do it + } + int16_t winding_number( // get winding number + ICOORD testpt) const; // around this point + // get direction + int16_t turn_direction() const; + void reverse(); // reverse direction - void move( // reposition outline - const ICOORD vec); // by vector + void move( // reposition outline + const ICOORD vec); // by vector - // Returns true if *this and its children are legally nested. - // The outer area of a child should have the opposite sign to the - // parent. If not, it means we have discarded an outline in between - // (probably due to excessive length). - bool IsLegallyNested() const; + // Returns true if *this and its children are legally nested. + // The outer area of a child should have the opposite sign to the + // parent. If not, it means we have discarded an outline in between + // (probably due to excessive length). + bool IsLegallyNested() const; - // If this outline is smaller than the given min_size, delete this and - // remove from its list, via *it, after checking that *it points to this. - // Otherwise, if any children of this are too small, delete them. - // On entry, *it must be an iterator pointing to this. If this gets deleted - // then this is extracted from *it, so an iteration can continue. - void RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it); + // If this outline is smaller than the given min_size, delete this and + // remove from its list, via *it, after checking that *it points to this. + // Otherwise, if any children of this are too small, delete them. + // On entry, *it must be an iterator pointing to this. If this gets deleted + // then this is extracted from *it, so an iteration can continue. + void RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it); - // Adds sub-pixel resolution EdgeOffsets for the outline if the supplied - // pix is 8-bit. Does nothing otherwise. - void ComputeEdgeOffsets(int threshold, Pix* pix); - // Adds sub-pixel resolution EdgeOffsets for the outline using only - // a binary image source. - void ComputeBinaryOffsets(); + // Adds sub-pixel resolution EdgeOffsets for the outline if the supplied + // pix is 8-bit. Does nothing otherwise. + void ComputeEdgeOffsets(int threshold, Pix* pix); + // Adds sub-pixel resolution EdgeOffsets for the outline using only + // a binary image source. + void ComputeBinaryOffsets(); - // Renders the outline to the given pix, with left and top being - // the coords of the upper-left corner of the pix. - void render(int left, int top, Pix* pix) const; + // Renders the outline to the given pix, with left and top being + // the coords of the upper-left corner of the pix. + void render(int left, int top, Pix* pix) const; - // Renders just the outline to the given pix (no fill), with left and top - // being the coords of the upper-left corner of the pix. - void render_outline(int left, int top, Pix* pix) const; + // Renders just the outline to the given pix (no fill), with left and top + // being the coords of the upper-left corner of the pix. + void render_outline(int left, int top, Pix* pix) const; - #ifndef GRAPHICS_DISABLED - void plot( //draw one - ScrollView* window, //window to draw in - ScrollView::Color colour) const; //colour to draw it - // Draws the outline in the given colour, normalized using the given denorm, - // making use of sub-pixel accurate information if available. - void plot_normed(const DENORM& denorm, ScrollView::Color colour, - ScrollView* window) const; - #endif // GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED + void plot( // draw one + ScrollView* window, // window to draw in + ScrollView::Color colour) const; // colour to draw it + // Draws the outline in the given colour, normalized using the given denorm, + // making use of sub-pixel accurate information if available. + void plot_normed(const DENORM& denorm, ScrollView::Color colour, + ScrollView* window) const; +#endif // GRAPHICS_DISABLED - C_OUTLINE& operator=(const C_OUTLINE& source); + C_OUTLINE& operator=(const C_OUTLINE& source); - static C_OUTLINE* deep_copy(const C_OUTLINE* src) { - C_OUTLINE* outline = new C_OUTLINE; - *outline = *src; - return outline; - } + static C_OUTLINE* deep_copy(const C_OUTLINE* src) { + C_OUTLINE* outline = new C_OUTLINE; + *outline = *src; + return outline; + } - static ICOORD chain_step(int chaindir); + static ICOORD chain_step(int chaindir); - // The maximum length of any outline. The stepcount is stored as 16 bits, - // but it is probably not a good idea to increase this constant by much - // and switch to 32 bits, as it plays an important role in keeping huge - // outlines invisible, which prevents bad speed behavior. - static const int kMaxOutlineLength = 16000; + // The maximum length of any outline. The stepcount is stored as 16 bits, + // but it is probably not a good idea to increase this constant by much + // and switch to 32 bits, as it plays an important role in keeping huge + // outlines invisible, which prevents bad speed behavior. + static const int kMaxOutlineLength = 16000; - private: - // Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals - // by the step, increment, and vertical step ? x : y position * increment - // at step s Mod stepcount respectively. Used to add or subtract the - // direction and position to/from accumulators of a small neighbourhood. - void increment_step(int s, int increment, ICOORD* pos, int* dir_counts, - int* pos_totals) const; - int step_mem() const { return (stepcount+3) / 4; } + private: + // Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals + // by the step, increment, and vertical step ? x : y position * increment + // at step s Mod stepcount respectively. Used to add or subtract the + // direction and position to/from accumulators of a small neighbourhood. + void increment_step(int s, int increment, ICOORD* pos, int* dir_counts, + int* pos_totals) const; + int step_mem() const { return (stepcount + 3) / 4; } - TBOX box; // bounding box - ICOORD start; // start coord - int16_t stepcount; // no of steps - BITS16 flags; // flags about outline - uint8_t *steps; // step array - EdgeOffset* offsets; // Higher precision edge. - C_OUTLINE_LIST children; // child elements - static ICOORD step_coords[4]; + TBOX box; // bounding box + ICOORD start; // start coord + int16_t stepcount; // no of steps + BITS16 flags; // flags about outline + uint8_t* steps; // step array + EdgeOffset* offsets; // Higher precision edge. + C_OUTLINE_LIST children; // child elements + static ICOORD step_coords[4]; }; #endif diff --git a/src/ccstruct/crakedge.h b/src/ccstruct/crakedge.h index 8a7e79b13b..1aeac5c093 100644 --- a/src/ccstruct/crakedge.h +++ b/src/ccstruct/crakedge.h @@ -1,6 +1,6 @@ /********************************************************************** - * File: crakedge.h (Formerly: crkedge.h) - * Description: Sturctures for the Crack following edge detector. + * File: crakedge.h (Formerly: + *crkedge.h) Description: Sturctures for the Crack following edge detector. * Author: Ray Smith * Created: Fri Mar 22 16:06:38 GMT 1991 * @@ -17,21 +17,21 @@ * **********************************************************************/ -#ifndef CRAKEDGE_H -#define CRAKEDGE_H +#ifndef CRAKEDGE_H +#define CRAKEDGE_H -#include "points.h" -#include "mod128.h" +#include "mod128.h" +#include "points.h" class CRACKEDGE { public: CRACKEDGE() = default; - ICOORD pos; /*position of crack */ - int8_t stepx; //edge step + ICOORD pos; /*position of crack */ + int8_t stepx; // edge step int8_t stepy; - int8_t stepdir; //chaincode - CRACKEDGE *prev; /*previous point */ - CRACKEDGE *next; /*next point */ + int8_t stepdir; // chaincode + CRACKEDGE* prev; /*previous point */ + CRACKEDGE* next; /*next point */ }; #endif diff --git a/src/ccstruct/detlinefit.cpp b/src/ccstruct/detlinefit.cpp index 91c2f9c43c..ccf3b471ed 100644 --- a/src/ccstruct/detlinefit.cpp +++ b/src/ccstruct/detlinefit.cpp @@ -18,8 +18,8 @@ /////////////////////////////////////////////////////////////////////// #include "detlinefit.h" -#include "statistc.h" #include "ndminx.h" +#include "statistc.h" #include "tprintf.h" #include @@ -38,8 +38,7 @@ const int kMinPointsForErrorCount = 16; // mis-fitted points, which will get square-rooted for true distance. const int kMaxRealDistance = 2.0; -DetLineFit::DetLineFit() : square_length_(0.0) { -} +DetLineFit::DetLineFit() : square_length_(0.0) {} // Delete all Added points. void DetLineFit::Clear() { @@ -48,9 +47,7 @@ void DetLineFit::Clear() { } // Add a new point. Takes a copy - the pt doesn't need to stay in scope. -void DetLineFit::Add(const ICOORD& pt) { - pts_.push_back(PointWidth(pt, 0)); -} +void DetLineFit::Add(const ICOORD& pt) { pts_.push_back(PointWidth(pt, 0)); } // Associates a half-width with the given point if a point overlaps the // previous point by more than half the width, and its distance is further // than the previous point, then the more distant point is ignored in the @@ -62,8 +59,8 @@ void DetLineFit::Add(const ICOORD& pt, int halfwidth) { // Fits a line to the points, ignoring the skip_first initial points and the // skip_last final points, returning the fitted line as a pair of points, // and the upper quartile error. -double DetLineFit::Fit(int skip_first, int skip_last, - ICOORD* pt1, ICOORD* pt2) { +double DetLineFit::Fit(int skip_first, int skip_last, ICOORD* pt1, + ICOORD* pt2) { // Do something sensible with no points. if (pts_.empty()) { pt1->set_x(0); @@ -127,9 +124,9 @@ double DetLineFit::Fit(int skip_first, int skip_last, // [min_dist, max_dist]. Returns the resulting error metric using the same // reduced set of points. // *Makes use of floating point arithmetic* -double DetLineFit::ConstrainedFit(const FCOORD& direction, - double min_dist, double max_dist, - bool debug, ICOORD* line_pt) { +double DetLineFit::ConstrainedFit(const FCOORD& direction, double min_dist, + double max_dist, bool debug, + ICOORD* line_pt) { ComputeConstrainedDistances(direction, min_dist, max_dist); // Do something sensible with no points or computed distances. if (pts_.empty() || distances_.empty()) { @@ -141,8 +138,8 @@ double DetLineFit::ConstrainedFit(const FCOORD& direction, *line_pt = distances_[median_index].data; if (debug) { tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n", - direction.x(), direction.y(), - line_pt->x(), line_pt->y(), distances_.size()); + direction.x(), direction.y(), line_pt->x(), line_pt->y(), + distances_.size()); for (int i = 0; i < distances_.size(); ++i) { tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(), distances_[i].data.y(), distances_[i].key); @@ -191,8 +188,8 @@ double DetLineFit::ConstrainedFit(double m, float* c) { double cos = 1.0 / sqrt(1.0 + m * m); FCOORD direction(cos, m * cos); ICOORD line_pt; - double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false, - &line_pt); + double error = + ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false, &line_pt); *c = line_pt.y() - line_pt.x() * m; return error; } @@ -235,8 +232,7 @@ int DetLineFit::NumberOfMisfittedPoints(double threshold) const { int num_dists = distances_.size(); // Get the absolute values of the errors. for (int i = 0; i < num_dists; ++i) { - if (distances_[i].key > threshold) - ++num_misfits; + if (distances_[i].key > threshold) ++num_misfits; } return num_misfits; } diff --git a/src/ccstruct/detlinefit.h b/src/ccstruct/detlinefit.h index 82940437ba..3931e602a2 100644 --- a/src/ccstruct/detlinefit.h +++ b/src/ccstruct/detlinefit.h @@ -72,9 +72,7 @@ class DetLineFit { // Fits a line to the points, returning the fitted line as a pair of // points, and the upper quartile error. - double Fit(ICOORD* pt1, ICOORD* pt2) { - return Fit(0, 0, pt1, pt2); - } + double Fit(ICOORD* pt1, ICOORD* pt2) { return Fit(0, 0, pt1, pt2); } // Fits a line to the points, ignoring the skip_first initial points and the // skip_last final points, returning the fitted line as a pair of points, // and the upper quartile error. @@ -86,9 +84,8 @@ class DetLineFit { // [min_dist, max_dist]. Returns the resulting error metric using the same // reduced set of points. // *Makes use of floating point arithmetic* - double ConstrainedFit(const FCOORD& direction, - double min_dist, double max_dist, - bool debug, ICOORD* line_pt); + double ConstrainedFit(const FCOORD& direction, double min_dist, + double max_dist, bool debug, ICOORD* line_pt); // Returns true if there were enough points at the last call to Fit or // ConstrainedFit for the fitted points to be used on a badly fitted line. @@ -112,7 +109,7 @@ class DetLineFit { struct PointWidth { PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {} PointWidth(const ICOORD& pt0, int halfwidth0) - : pt(pt0), halfwidth(halfwidth0) {} + : pt(pt0), halfwidth(halfwidth0) {} ICOORD pt; int halfwidth; @@ -141,8 +138,8 @@ class DetLineFit { // Computes all the cross product distances of the points perpendicular to // the given direction, ignoring distances outside of the give distance range, // storing the actual (signed) cross products in distances_. - void ComputeConstrainedDistances(const FCOORD& direction, - double min_dist, double max_dist); + void ComputeConstrainedDistances(const FCOORD& direction, double min_dist, + double max_dist); // Stores all the source points in the order they were given and their // halfwidths, if any. diff --git a/src/ccstruct/dppoint.cpp b/src/ccstruct/dppoint.cpp index fe952bd9a4..e3a751ca1b 100644 --- a/src/ccstruct/dppoint.cpp +++ b/src/ccstruct/dppoint.cpp @@ -30,11 +30,9 @@ namespace tesseract { DPPoint* DPPoint::Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint* points) { if (size <= 0 || max_step < min_step || min_step >= size) - return nullptr; // Degenerate, but not necessarily an error. + return nullptr; // Degenerate, but not necessarily an error. ASSERT_HOST(min_step > 0); // Infinite loop possible if this is not true. - if (debug) - tprintf("min = %d, max=%d\n", - min_step, max_step); + if (debug) tprintf("min = %d, max=%d\n", min_step, max_step); // Evaluate the total cost at each point. for (int i = 0; i < size; ++i) { for (int offset = min_step; offset <= max_step; ++offset) { @@ -46,8 +44,8 @@ DPPoint* DPPoint::Solve(int min_step, int max_step, bool debug, } points[i].total_cost_ += points[i].local_cost_; if (debug) { - tprintf("At point %d, local cost=%d, total_cost=%d, steps=%d\n", - i, points[i].local_cost_, points[i].total_cost_, + tprintf("At point %d, local cost=%d, total_cost=%d, steps=%d\n", i, + points[i].local_cost_, points[i].total_cost_, points[i].total_steps_); } } diff --git a/src/ccstruct/dppoint.h b/src/ccstruct/dppoint.h index 6008c09ccc..903533f7b2 100644 --- a/src/ccstruct/dppoint.h +++ b/src/ccstruct/dppoint.h @@ -49,9 +49,13 @@ class DPPoint { typedef int64_t (DPPoint::*CostFunc)(const DPPoint* prev); DPPoint() - : local_cost_(0), total_cost_(INT32_MAX), total_steps_(1), best_prev_(nullptr), - n_(0), sig_x_(0), sig_xsq_(0) { - } + : local_cost_(0), + total_cost_(INT32_MAX), + total_steps_(1), + best_prev_(nullptr), + n_(0), + sig_x_(0), + sig_xsq_(0) {} // Solve the dynamic programming problem for the given array of points, with // the given size and cost function. @@ -65,18 +69,10 @@ class DPPoint { int64_t CostWithVariance(const DPPoint* prev); // Accessors. - int total_cost() const { - return total_cost_; - } - int Pathlength() const { - return total_steps_; - } - const DPPoint* best_prev() const { - return best_prev_; - } - void AddLocalCost(int new_cost) { - local_cost_ += new_cost; - } + int total_cost() const { return total_cost_; } + int Pathlength() const { return total_steps_; } + const DPPoint* best_prev() const { return best_prev_; } + void AddLocalCost(int new_cost) { local_cost_ += new_cost; } private: // Code common to different cost functions. @@ -85,15 +81,15 @@ class DPPoint { void UpdateIfBetter(int64_t cost, int32_t steps, const DPPoint* prev, int32_t n, int32_t sig_x, int64_t sig_xsq); - int32_t local_cost_; // Cost of this point on its own. - int32_t total_cost_; // Sum of all costs in best path to here. - // During cost calculations local_cost is excluded. - int32_t total_steps_; // Number of steps in best path to here. + int32_t local_cost_; // Cost of this point on its own. + int32_t total_cost_; // Sum of all costs in best path to here. + // During cost calculations local_cost is excluded. + int32_t total_steps_; // Number of steps in best path to here. const DPPoint* best_prev_; // Pointer to prev point in best path from here. // Information for computing the variance part of the cost. - int32_t n_; // Number of steps in best path to here for variance. - int32_t sig_x_; // Sum of step sizes for computing variance. - int64_t sig_xsq_; // Sum of squares of steps for computing variance. + int32_t n_; // Number of steps in best path to here for variance. + int32_t sig_x_; // Sum of step sizes for computing variance. + int64_t sig_xsq_; // Sum of squares of steps for computing variance. }; } // namespace tesseract. diff --git a/src/ccstruct/fontinfo.cpp b/src/ccstruct/fontinfo.cpp index 30f3748abb..e9eaa1bca1 100644 --- a/src/ccstruct/fontinfo.cpp +++ b/src/ccstruct/fontinfo.cpp @@ -42,8 +42,7 @@ FontInfoTable::FontInfoTable() { set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback)); } -FontInfoTable::~FontInfoTable() { -} +FontInfoTable::~FontInfoTable() {} // Writes to the given file. Returns false in case of error. bool FontInfoTable::Serialize(FILE* fp) const { @@ -62,8 +61,7 @@ bool FontInfoTable::SetContainsFontProperties( int font_id, const GenericVector& font_set) const { uint32_t properties = get(font_id).properties; for (int f = 0; f < font_set.size(); ++f) { - if (get(font_set[f].fontinfo_id).properties == properties) - return true; + if (get(font_set[f].fontinfo_id).properties == properties) return true; } return false; } @@ -75,8 +73,7 @@ bool FontInfoTable::SetContainsMultipleFontProperties( int first_font = font_set[0].fontinfo_id; uint32_t properties = get(first_font).properties; for (int f = 1; f < font_set.size(); ++f) { - if (get(font_set[f].fontinfo_id).properties != properties) - return true; + if (get(font_set[f].fontinfo_id).properties != properties) return true; } return false; } @@ -94,7 +91,7 @@ void FontInfoTable::MoveSpacingInfoFrom(FontInfoTable* other) { push_back(other->get(i)); other->get(i).name = nullptr; } else { - delete [] get(target_index).spacing_vec; + delete[] get(target_index).spacing_vec; get(target_index).spacing_vec = other->get(i).spacing_vec; } other->get(i).spacing_vec = nullptr; @@ -115,7 +112,6 @@ void FontInfoTable::MoveTo(UnicityTable* target) { } } - // Compare FontInfo structures. bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2) { // The font properties are required to be the same for two font with the same @@ -126,11 +122,9 @@ bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2) { } // Compare FontSet structures. bool CompareFontSet(const FontSet& fs1, const FontSet& fs2) { - if (fs1.size != fs2.size) - return false; + if (fs1.size != fs2.size) return false; for (int i = 0; i < fs1.size; ++i) { - if (fs1.configs[i] != fs2.configs[i]) - return false; + if (fs1.configs[i] != fs2.configs[i]) return false; } return true; } @@ -143,9 +137,7 @@ void FontInfoDeleteCallback(FontInfo f) { } delete[] f.name; } -void FontSetDeleteCallback(FontSet fs) { - delete[] fs.configs; -} +void FontSetDeleteCallback(FontSet fs) { delete[] fs.configs; } /*---------------------------------------------------------------------------*/ // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. @@ -177,7 +169,7 @@ bool read_spacing_info(TFile* f, FontInfo* fi) { if (vec_size == 0) return true; fi->init_spacing(vec_size); for (int i = 0; i < vec_size; ++i) { - FontSpacingInfo *fs = new FontSpacingInfo(); + FontSpacingInfo* fs = new FontSpacingInfo(); if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1) != 1 || f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1) != 1 || f->FReadEndian(&kern_size, sizeof(kern_size), 1) != 1) { @@ -200,10 +192,10 @@ bool read_spacing_info(TFile* f, FontInfo* fi) { bool write_spacing_info(FILE* f, const FontInfo& fi) { int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size(); - if (fwrite(&vec_size, sizeof(vec_size), 1, f) != 1) return false; + if (fwrite(&vec_size, sizeof(vec_size), 1, f) != 1) return false; int16_t x_gap_invalid = -1; for (int i = 0; i < vec_size; ++i) { - FontSpacingInfo *fs = fi.spacing_vec->get(i); + FontSpacingInfo* fs = fi.spacing_vec->get(i); int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size(); if (fs == nullptr) { // Valid to have the identical fwrites. Writing invalid x-gaps. diff --git a/src/ccstruct/fontinfo.h b/src/ccstruct/fontinfo.h index 4dd8bf6f6d..ea8c0bb2f7 100644 --- a/src/ccstruct/fontinfo.h +++ b/src/ccstruct/fontinfo.h @@ -17,7 +17,6 @@ // /////////////////////////////////////////////////////////////////////// - #ifndef TESSERACT_CCSTRUCT_FONTINFO_H_ #define TESSERACT_CCSTRUCT_FONTINFO_H_ @@ -25,7 +24,8 @@ #include "host.h" #include "unichar.h" -template class UnicityTable; +template +class UnicityTable; namespace tesseract { @@ -60,7 +60,8 @@ struct FontSpacingInfo { * serif, fraktur */ struct FontInfo { - FontInfo() : name(nullptr), properties(0), universal_id(0), spacing_vec(nullptr) {} + FontInfo() + : name(nullptr), properties(0), universal_id(0), spacing_vec(nullptr) {} ~FontInfo() = default; // Writes to the given file. Returns false in case of error. @@ -71,30 +72,30 @@ struct FontInfo { // Reserves unicharset_size spots in spacing_vec. void init_spacing(int unicharset_size) { - spacing_vec = new GenericVector(); + spacing_vec = new GenericVector(); spacing_vec->init_to_size(unicharset_size, nullptr); } // Adds the given pointer to FontSpacingInfo to spacing_vec member // (FontInfo class takes ownership of the pointer). // Note: init_spacing should be called before calling this function. - void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) { + void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo* spacing_info) { ASSERT_HOST(spacing_vec != nullptr && spacing_vec->size() > uch_id); (*spacing_vec)[uch_id] = spacing_info; } // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID. - const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const { - return (spacing_vec == nullptr || spacing_vec->size() <= uch_id) ? - nullptr : (*spacing_vec)[uch_id]; + const FontSpacingInfo* get_spacing(UNICHAR_ID uch_id) const { + return (spacing_vec == nullptr || spacing_vec->size() <= uch_id) + ? nullptr + : (*spacing_vec)[uch_id]; } // Fills spacing with the value of the x gap expected between the two given // UNICHAR_IDs. Returns true on success. - bool get_spacing(UNICHAR_ID prev_uch_id, - UNICHAR_ID uch_id, - int *spacing) const { - const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id); - const FontSpacingInfo *fsi = this->get_spacing(uch_id); + bool get_spacing(UNICHAR_ID prev_uch_id, UNICHAR_ID uch_id, + int* spacing) const { + const FontSpacingInfo* prev_fsi = this->get_spacing(prev_uch_id); + const FontSpacingInfo* fsi = this->get_spacing(uch_id); if (prev_fsi == nullptr || fsi == nullptr) return false; int i = 0; for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) { @@ -122,7 +123,7 @@ struct FontInfo { // ResultIterator::WordFontAttributes. int32_t universal_id; // Horizontal spacing between characters (indexed by UNICHAR_ID). - GenericVector *spacing_vec; + GenericVector* spacing_vec; }; // Every class (character) owns a FontSet that represents all the fonts that can @@ -135,8 +136,8 @@ struct FontInfo { // the FontInfo in the FontSet structure, it's better to share FontInfos among // FontSets (Classify::fontinfo_table_). struct FontSet { - int size; - int* configs; // FontInfo ids + int size; + int* configs; // FontInfo ids }; // Class that adds a bit of functionality on top of GenericVector to diff --git a/src/ccstruct/genblob.cpp b/src/ccstruct/genblob.cpp index 03ae60b9a4..55a6f54204 100644 --- a/src/ccstruct/genblob.cpp +++ b/src/ccstruct/genblob.cpp @@ -17,8 +17,8 @@ * **********************************************************************/ -#include "genblob.h" -#include "stepblob.h" +#include "genblob.h" +#include "stepblob.h" /********************************************************************** * c_blob_comparator() @@ -27,12 +27,12 @@ * order of left edge. **********************************************************************/ -int c_blob_comparator( // sort blobs - const void *blob1p, // ptr to ptr to blob1 - const void *blob2p // ptr to ptr to blob2 - ) { - C_BLOB *blob1 = *(C_BLOB **) blob1p; - C_BLOB *blob2 = *(C_BLOB **) blob2p; +int c_blob_comparator( // sort blobs + const void* blob1p, // ptr to ptr to blob1 + const void* blob2p // ptr to ptr to blob2 +) { + C_BLOB* blob1 = *(C_BLOB**)blob1p; + C_BLOB* blob2 = *(C_BLOB**)blob2p; - return blob1->bounding_box ().left () - blob2->bounding_box ().left (); + return blob1->bounding_box().left() - blob2->bounding_box().left(); } diff --git a/src/ccstruct/genblob.h b/src/ccstruct/genblob.h index 2fb55a1a41..b26ce75d3f 100644 --- a/src/ccstruct/genblob.h +++ b/src/ccstruct/genblob.h @@ -17,11 +17,11 @@ * **********************************************************************/ -#ifndef GENBLOB_H -#define GENBLOB_H +#ifndef GENBLOB_H +#define GENBLOB_H // Sort function to sort blobs by ascending left edge. -int c_blob_comparator(const void *blob1p, // ptr to ptr to blob1 - const void *blob2p); +int c_blob_comparator(const void* blob1p, // ptr to ptr to blob1 + const void* blob2p); #endif diff --git a/src/ccstruct/hpdsizes.h b/src/ccstruct/hpdsizes.h index f4d886a0b4..fb5a199d0e 100644 --- a/src/ccstruct/hpdsizes.h +++ b/src/ccstruct/hpdsizes.h @@ -7,11 +7,11 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef HPDSIZES_H -#define HPDSIZES_H +#ifndef HPDSIZES_H +#define HPDSIZES_H -#define NUM_TEXT_ATTR 10 -#define NUM_BLOCK_ATTR 7 +#define NUM_TEXT_ATTR 10 +#define NUM_BLOCK_ATTR 7 #define MAXLENGTH 128 -#define NUM_BACKGROUNDS 8 +#define NUM_BACKGROUNDS 8 #endif diff --git a/src/ccstruct/imagedata.cpp b/src/ccstruct/imagedata.cpp index 8bc7b73ef6..52148cb251 100644 --- a/src/ccstruct/imagedata.cpp +++ b/src/ccstruct/imagedata.cpp @@ -42,14 +42,12 @@ const int kMaxReadAhead = 8; namespace tesseract { -WordFeature::WordFeature() : x_(0), y_(0), dir_(0) { -} +WordFeature::WordFeature() : x_(0), y_(0), dir_(0) {} WordFeature::WordFeature(const FCOORD& fcoord, uint8_t dir) - : x_(IntCastRounded(fcoord.x())), - y_(ClipToRange(IntCastRounded(fcoord.y()), 0, UINT8_MAX)), - dir_(dir) { -} + : x_(IntCastRounded(fcoord.x())), + y_(ClipToRange(IntCastRounded(fcoord.y()), 0, UINT8_MAX)), + dir_(dir) {} // Computes the maximum x and y value in the features. void WordFeature::ComputeSize(const GenericVector& features, @@ -74,7 +72,7 @@ void WordFeature::Draw(const GenericVector& features, window->SetCursor(IntCastRounded(pos.x() - dir.x()), IntCastRounded(pos.y() - dir.y())); window->DrawTo(IntCastRounded(pos.x() + dir.x()), - IntCastRounded(pos.y() + dir.y())); + IntCastRounded(pos.y() + dir.y())); } #endif } @@ -119,15 +117,13 @@ int FloatWordFeature::SortByXBucket(const void* v1, const void* v2) { return x_diff; } -ImageData::ImageData() : page_number_(-1), vertical_text_(false) { -} +ImageData::ImageData() : page_number_(-1), vertical_text_(false) {} // Takes ownership of the pix and destroys it. ImageData::ImageData(bool vertical, Pix* pix) - : page_number_(0), vertical_text_(vertical) { + : page_number_(0), vertical_text_(vertical) { SetPix(pix); } -ImageData::~ImageData() { -} +ImageData::~ImageData() {} // Builds and returns an ImageData from the basic data. Note that imagedata, // truth_text, and box_text are all the actual file data, NOT filenames. @@ -209,14 +205,10 @@ bool ImageData::SkipDeSerialize(TFile* fp) { } // Saves the given Pix as a PNG-encoded string and destroys it. -void ImageData::SetPix(Pix* pix) { - SetPixInternal(pix, &image_data_); -} +void ImageData::SetPix(Pix* pix) { SetPixInternal(pix, &image_data_); } // Returns the Pix image for *this. Must be pixDestroyed after use. -Pix* ImageData::GetPix() const { - return GetPixInternal(image_data_); -} +Pix* ImageData::GetPix() const { return GetPixInternal(image_data_); } // Gets anything and everything with a non-nullptr pointer, prescaled to a // given target_height (if 0, then the original image height), and aligned. @@ -239,8 +231,7 @@ Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor, float im_factor = static_cast(target_height) / input_height; if (scaled_width != nullptr) *scaled_width = IntCastRounded(im_factor * input_width); - if (scaled_height != nullptr) - *scaled_height = target_height; + if (scaled_height != nullptr) *scaled_height = target_height; // Get the scaled image. Pix* pix = pixScale(src_pix, im_factor, im_factor); if (pix == nullptr) { @@ -268,9 +259,7 @@ Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor, return pix; } -int ImageData::MemoryUsed() const { - return image_data_.size(); -} +int ImageData::MemoryUsed() const { return image_data_.size(); } // Draws the data in a new window. void ImageData::Display() const { @@ -281,10 +270,9 @@ void ImageData::Display() const { if (pix == nullptr) return; int width = pixGetWidth(pix); int height = pixGetHeight(pix); - ScrollView* win = new ScrollView("Imagedata", 100, 100, - 2 * (width + 2 * kTextSize), - 2 * (height + 4 * kTextSize), - width + 10, height + 3 * kTextSize, true); + ScrollView* win = new ScrollView( + "Imagedata", 100, 100, 2 * (width + 2 * kTextSize), + 2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true); win->Image(pix, 0, height - 1); pixDestroy(&pix); // Draw the boxes. @@ -353,14 +341,15 @@ bool ImageData::AddBoxes(const char* box_text) { GenericVector boxes; GenericVector texts; GenericVector box_pages; - if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text, + if (ReadMemBoxes(page_number_, + /*skip_blanks*/ false, box_text, /*continue_on_failure*/ true, &boxes, &texts, nullptr, &box_pages)) { AddBoxes(boxes, texts, box_pages); return true; } else { - tprintf("Error: No boxes for page %d from image %s!\n", - page_number_, imagefilename_.string()); + tprintf("Error: No boxes for page %d from image %s!\n", page_number_, + imagefilename_.string()); } } return false; @@ -453,8 +442,8 @@ const ImageData* DocumentData::GetPage(int index) { bool needs_loading = pages_offset_ != index; pages_mutex_.Unlock(); if (needs_loading) LoadPageInBackground(index); - // We can't directly load the page, or the background load will delete it - // while the caller is using it, so give it a chance to work. + // We can't directly load the page, or the background load will delete it + // while the caller is using it, so give it a chance to work. #if defined(__MINGW32__) sleep(1); #else @@ -603,8 +592,7 @@ bool DocumentCache::AddToCache(DocumentData* data) { // Finds and returns a document by name. DocumentData* DocumentCache::FindDocument(const STRING& document_name) const { for (int i = 0; i < documents_.size(); ++i) { - if (documents_[i]->document_name() == document_name) - return documents_[i]; + if (documents_[i]->document_name() == document_name) return documents_[i]; } return nullptr; } diff --git a/src/ccstruct/imagedata.h b/src/ccstruct/imagedata.h index ad8d2b9610..e56f22c82c 100644 --- a/src/ccstruct/imagedata.h +++ b/src/ccstruct/imagedata.h @@ -20,7 +20,6 @@ #ifndef TESSERACT_IMAGE_IMAGEDATA_H_ #define TESSERACT_IMAGE_IMAGEDATA_H_ - #include "genericvector.h" #include "normalis.h" #include "rect.h" @@ -121,39 +120,17 @@ class ImageData { static bool SkipDeSerialize(tesseract::TFile* fp); // Other accessors. - const STRING& imagefilename() const { - return imagefilename_; - } - void set_imagefilename(const STRING& name) { - imagefilename_ = name; - } - int page_number() const { - return page_number_; - } - void set_page_number(int num) { - page_number_ = num; - } - const GenericVector& image_data() const { - return image_data_; - } - const STRING& language() const { - return language_; - } - void set_language(const STRING& lang) { - language_ = lang; - } - const STRING& transcription() const { - return transcription_; - } - const GenericVector& boxes() const { - return boxes_; - } - const GenericVector& box_texts() const { - return box_texts_; - } - const STRING& box_text(int index) const { - return box_texts_[index]; - } + const STRING& imagefilename() const { return imagefilename_; } + void set_imagefilename(const STRING& name) { imagefilename_ = name; } + int page_number() const { return page_number_; } + void set_page_number(int num) { page_number_ = num; } + const GenericVector& image_data() const { return image_data_; } + const STRING& language() const { return language_; } + void set_language(const STRING& lang) { language_ = lang; } + const STRING& transcription() const { return transcription_; } + const GenericVector& boxes() const { return boxes_; } + const GenericVector& box_texts() const { return box_texts_; } + const STRING& box_text(int index) const { return box_texts_[index]; } // Saves the given Pix as a PNG-encoded string and destroys it. void SetPix(Pix* pix); // Returns the Pix image for *this. Must be pixDestroyed after use. @@ -162,8 +139,8 @@ class ImageData { // given target_height (if 0, then the original image height), and aligned. // Also returns (if not nullptr) the width and height of the scaled image. // The return value is the scaled Pix, which must be pixDestroyed after use, - // and scale_factor (if not nullptr) is set to the scale factor that was applied - // to the image to achieve the target_height. + // and scale_factor (if not nullptr) is set to the scale factor that was + // applied to the image to achieve the target_height. Pix* PreScale(int target_height, int max_height, float* scale_factor, int* scaled_width, int* scaled_height, GenericVector* boxes) const; @@ -190,7 +167,7 @@ class ImageData { private: STRING imagefilename_; // File to read image from. - int32_t page_number_; // Page number if multi-page tif or -1. + int32_t page_number_; // Page number if multi-page tif or -1. GenericVector image_data_; // PNG file data. STRING language_; // Language code for image. STRING transcription_; // UTF-8 ground truth of image. @@ -339,9 +316,7 @@ class DocumentCache { return GetPageRoundRobin(serial); } - const PointerVector& documents() const { - return documents_; - } + const PointerVector& documents() const { return documents_; } // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache // strategy, could take a long time. int TotalPages(); @@ -373,5 +348,4 @@ class DocumentCache { } // namespace tesseract - #endif // TESSERACT_IMAGE_IMAGEDATA_H_ diff --git a/src/ccstruct/ipoints.h b/src/ccstruct/ipoints.h index 099982ccda..d8dce00a19 100644 --- a/src/ccstruct/ipoints.h +++ b/src/ccstruct/ipoints.h @@ -17,10 +17,10 @@ * **********************************************************************/ -#ifndef IPOINTS_H -#define IPOINTS_H +#ifndef IPOINTS_H +#define IPOINTS_H -#include +#include /********************************************************************** * operator! @@ -28,413 +28,369 @@ * Rotate an ICOORD 90 degrees anticlockwise. **********************************************************************/ -inline ICOORD -operator! ( //rotate 90 deg anti -const ICOORD & src //thing to rotate +inline ICOORD operator!( // rotate 90 deg anti + const ICOORD& src // thing to rotate ) { - ICOORD result; //output + ICOORD result; // output result.xcoord = -src.ycoord; result.ycoord = src.xcoord; return result; } - /********************************************************************** * operator- * * Unary minus of an ICOORD. **********************************************************************/ -inline ICOORD -operator- ( //unary minus -const ICOORD & src //thing to minus +inline ICOORD operator-( // unary minus + const ICOORD& src // thing to minus ) { - ICOORD result; //output + ICOORD result; // output result.xcoord = -src.xcoord; result.ycoord = -src.ycoord; return result; } - /********************************************************************** * operator+ * * Add 2 ICOORDS. **********************************************************************/ -inline ICOORD -operator+ ( //sum vectors -const ICOORD & op1, //operands -const ICOORD & op2) { - ICOORD sum; //result +inline ICOORD operator+( // sum vectors + const ICOORD& op1, // operands + const ICOORD& op2) { + ICOORD sum; // result sum.xcoord = op1.xcoord + op2.xcoord; sum.ycoord = op1.ycoord + op2.ycoord; return sum; } - /********************************************************************** * operator+= * * Add 2 ICOORDS. **********************************************************************/ -inline ICOORD & -operator+= ( //sum vectors -ICOORD & op1, //operands -const ICOORD & op2) { +inline ICOORD& operator+=( // sum vectors + ICOORD& op1, // operands + const ICOORD& op2) { op1.xcoord += op2.xcoord; op1.ycoord += op2.ycoord; return op1; } - /********************************************************************** * operator- * * Subtract 2 ICOORDS. **********************************************************************/ -inline ICOORD -operator- ( //subtract vectors -const ICOORD & op1, //operands -const ICOORD & op2) { - ICOORD sum; //result +inline ICOORD operator-( // subtract vectors + const ICOORD& op1, // operands + const ICOORD& op2) { + ICOORD sum; // result sum.xcoord = op1.xcoord - op2.xcoord; sum.ycoord = op1.ycoord - op2.ycoord; return sum; } - /********************************************************************** * operator-= * * Subtract 2 ICOORDS. **********************************************************************/ -inline ICOORD & -operator-= ( //sum vectors -ICOORD & op1, //operands -const ICOORD & op2) { +inline ICOORD& operator-=( // sum vectors + ICOORD& op1, // operands + const ICOORD& op2) { op1.xcoord -= op2.xcoord; op1.ycoord -= op2.ycoord; return op1; } - /********************************************************************** * operator% * * Scalar product of 2 ICOORDS. **********************************************************************/ -inline int32_t -operator% ( //scalar product -const ICOORD & op1, //operands -const ICOORD & op2) { +inline int32_t operator%( // scalar product + const ICOORD& op1, // operands + const ICOORD& op2) { return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord; } - /********************************************************************** * operator* * * Cross product of 2 ICOORDS. **********************************************************************/ -inline int32_t operator *( //cross product - const ICOORD &op1, //operands - const ICOORD &op2) { +inline int32_t operator*( // cross product + const ICOORD& op1, // operands + const ICOORD& op2) { return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord; } - /********************************************************************** * operator* * * Scalar multiply of an ICOORD. **********************************************************************/ -inline ICOORD operator *( //scalar multiply - const ICOORD &op1, //operands - int16_t scale) { - ICOORD result; //output +inline ICOORD operator*( // scalar multiply + const ICOORD& op1, // operands + int16_t scale) { + ICOORD result; // output result.xcoord = op1.xcoord * scale; result.ycoord = op1.ycoord * scale; return result; } - -inline ICOORD operator *( //scalar multiply - int16_t scale, - const ICOORD &op1 //operands - ) { - ICOORD result; //output +inline ICOORD operator*( // scalar multiply + int16_t scale, + const ICOORD& op1 // operands +) { + ICOORD result; // output result.xcoord = op1.xcoord * scale; result.ycoord = op1.ycoord * scale; return result; } - /********************************************************************** * operator*= * * Scalar multiply of an ICOORD. **********************************************************************/ -inline ICOORD & -operator*= ( //scalar multiply -ICOORD & op1, //operands -int16_t scale) { +inline ICOORD& operator*=( // scalar multiply + ICOORD& op1, // operands + int16_t scale) { op1.xcoord *= scale; op1.ycoord *= scale; return op1; } - /********************************************************************** * operator/ * * Scalar divide of an ICOORD. **********************************************************************/ -inline ICOORD -operator/ ( //scalar divide -const ICOORD & op1, //operands -int16_t scale) { - ICOORD result; //output +inline ICOORD operator/( // scalar divide + const ICOORD& op1, // operands + int16_t scale) { + ICOORD result; // output result.xcoord = op1.xcoord / scale; result.ycoord = op1.ycoord / scale; return result; } - /********************************************************************** * operator/= * * Scalar divide of an ICOORD. **********************************************************************/ -inline ICOORD & -operator/= ( //scalar divide -ICOORD & op1, //operands -int16_t scale) { +inline ICOORD& operator/=( // scalar divide + ICOORD& op1, // operands + int16_t scale) { op1.xcoord /= scale; op1.ycoord /= scale; return op1; } - /********************************************************************** * ICOORD::rotate * * Rotate an ICOORD by the given (normalized) (cos,sin) vector. **********************************************************************/ -inline void ICOORD::rotate( //rotate by vector - const FCOORD& vec) { +inline void ICOORD::rotate( // rotate by vector + const FCOORD& vec) { int16_t tmp; - tmp = (int16_t) floor (xcoord * vec.x () - ycoord * vec.y () + 0.5); - ycoord = (int16_t) floor (ycoord * vec.x () + xcoord * vec.y () + 0.5); + tmp = (int16_t)floor(xcoord * vec.x() - ycoord * vec.y() + 0.5); + ycoord = (int16_t)floor(ycoord * vec.x() + xcoord * vec.y() + 0.5); xcoord = tmp; } - /********************************************************************** * operator! * * Rotate an FCOORD 90 degrees anticlockwise. **********************************************************************/ -inline FCOORD -operator! ( //rotate 90 deg anti -const FCOORD & src //thing to rotate +inline FCOORD operator!( // rotate 90 deg anti + const FCOORD& src // thing to rotate ) { - FCOORD result; //output + FCOORD result; // output result.xcoord = -src.ycoord; result.ycoord = src.xcoord; return result; } - /********************************************************************** * operator- * * Unary minus of an FCOORD. **********************************************************************/ -inline FCOORD -operator- ( //unary minus -const FCOORD & src //thing to minus +inline FCOORD operator-( // unary minus + const FCOORD& src // thing to minus ) { - FCOORD result; //output + FCOORD result; // output result.xcoord = -src.xcoord; result.ycoord = -src.ycoord; return result; } - /********************************************************************** * operator+ * * Add 2 FCOORDS. **********************************************************************/ -inline FCOORD -operator+ ( //sum vectors -const FCOORD & op1, //operands -const FCOORD & op2) { - FCOORD sum; //result +inline FCOORD operator+( // sum vectors + const FCOORD& op1, // operands + const FCOORD& op2) { + FCOORD sum; // result sum.xcoord = op1.xcoord + op2.xcoord; sum.ycoord = op1.ycoord + op2.ycoord; return sum; } - /********************************************************************** * operator+= * * Add 2 FCOORDS. **********************************************************************/ -inline FCOORD & -operator+= ( //sum vectors -FCOORD & op1, //operands -const FCOORD & op2) { +inline FCOORD& operator+=( // sum vectors + FCOORD& op1, // operands + const FCOORD& op2) { op1.xcoord += op2.xcoord; op1.ycoord += op2.ycoord; return op1; } - /********************************************************************** * operator- * * Subtract 2 FCOORDS. **********************************************************************/ -inline FCOORD -operator- ( //subtract vectors -const FCOORD & op1, //operands -const FCOORD & op2) { - FCOORD sum; //result +inline FCOORD operator-( // subtract vectors + const FCOORD& op1, // operands + const FCOORD& op2) { + FCOORD sum; // result sum.xcoord = op1.xcoord - op2.xcoord; sum.ycoord = op1.ycoord - op2.ycoord; return sum; } - /********************************************************************** * operator-= * * Subtract 2 FCOORDS. **********************************************************************/ -inline FCOORD & -operator-= ( //sum vectors -FCOORD & op1, //operands -const FCOORD & op2) { +inline FCOORD& operator-=( // sum vectors + FCOORD& op1, // operands + const FCOORD& op2) { op1.xcoord -= op2.xcoord; op1.ycoord -= op2.ycoord; return op1; } - /********************************************************************** * operator% * * Scalar product of 2 FCOORDS. **********************************************************************/ -inline float -operator% ( //scalar product -const FCOORD & op1, //operands -const FCOORD & op2) { +inline float operator%( // scalar product + const FCOORD& op1, // operands + const FCOORD& op2) { return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord; } - /********************************************************************** * operator* * * Cross product of 2 FCOORDS. **********************************************************************/ -inline float operator *( //cross product - const FCOORD &op1, //operands - const FCOORD &op2) { +inline float operator*( // cross product + const FCOORD& op1, // operands + const FCOORD& op2) { return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord; } - /********************************************************************** * operator* * * Scalar multiply of an FCOORD. **********************************************************************/ -inline FCOORD operator *( //scalar multiply - const FCOORD &op1, //operands - float scale) { - FCOORD result; //output +inline FCOORD operator*( // scalar multiply + const FCOORD& op1, // operands + float scale) { + FCOORD result; // output result.xcoord = op1.xcoord * scale; result.ycoord = op1.ycoord * scale; return result; } - -inline FCOORD operator *( //scalar multiply - float scale, - const FCOORD &op1 //operands - ) { - FCOORD result; //output +inline FCOORD operator*( // scalar multiply + float scale, + const FCOORD& op1 // operands +) { + FCOORD result; // output result.xcoord = op1.xcoord * scale; result.ycoord = op1.ycoord * scale; return result; } - /********************************************************************** * operator*= * * Scalar multiply of an FCOORD. **********************************************************************/ -inline FCOORD & -operator*= ( //scalar multiply -FCOORD & op1, //operands -float scale) { +inline FCOORD& operator*=( // scalar multiply + FCOORD& op1, // operands + float scale) { op1.xcoord *= scale; op1.ycoord *= scale; return op1; } - /********************************************************************** * operator/ * * Scalar divide of an FCOORD. **********************************************************************/ -inline FCOORD -operator/ ( //scalar divide -const FCOORD & op1, //operands -float scale) { - FCOORD result; //output +inline FCOORD operator/( // scalar divide + const FCOORD& op1, // operands + float scale) { + FCOORD result; // output if (scale != 0) { result.xcoord = op1.xcoord / scale; @@ -443,17 +399,15 @@ float scale) { return result; } - /********************************************************************** * operator/= * * Scalar divide of an FCOORD. **********************************************************************/ -inline FCOORD & -operator/= ( //scalar divide -FCOORD & op1, //operands -float scale) { +inline FCOORD& operator/=( // scalar divide + FCOORD& op1, // operands + float scale) { if (scale != 0) { op1.xcoord /= scale; op1.ycoord /= scale; @@ -461,19 +415,18 @@ float scale) { return op1; } - /********************************************************************** * rotate * * Rotate an FCOORD by the given (normalized) (cos,sin) vector. **********************************************************************/ -inline void FCOORD::rotate( //rotate by vector - const FCOORD vec) { +inline void FCOORD::rotate( // rotate by vector + const FCOORD vec) { float tmp; - tmp = xcoord * vec.x () - ycoord * vec.y (); - ycoord = ycoord * vec.x () + xcoord * vec.y (); + tmp = xcoord * vec.x() - ycoord * vec.y(); + ycoord = ycoord * vec.x() + xcoord * vec.y(); xcoord = tmp; } diff --git a/src/ccstruct/linlsq.cpp b/src/ccstruct/linlsq.cpp index 0dc9b5fb82..d9b37f98c3 100644 --- a/src/ccstruct/linlsq.cpp +++ b/src/ccstruct/linlsq.cpp @@ -17,10 +17,10 @@ * **********************************************************************/ -#include -#include -#include "errcode.h" -#include "linlsq.h" +#include "linlsq.h" +#include +#include +#include "errcode.h" const ERRCODE EMPTY_LLSQ = "Can't delete from an empty LLSQ"; @@ -30,25 +30,24 @@ const ERRCODE EMPTY_LLSQ = "Can't delete from an empty LLSQ"; * Function to initialize a LLSQ. **********************************************************************/ -void LLSQ::clear() { // initialize - total_weight = 0.0; // no elements - sigx = 0.0; // update accumulators +void LLSQ::clear() { // initialize + total_weight = 0.0; // no elements + sigx = 0.0; // update accumulators sigy = 0.0; sigxx = 0.0; sigxy = 0.0; sigyy = 0.0; } - /********************************************************************** * LLSQ::add * * Add an element to the accumulator. **********************************************************************/ -void LLSQ::add(double x, double y) { // add an element - total_weight++; // count elements - sigx += x; // update accumulators +void LLSQ::add(double x, double y) { // add an element + total_weight++; // count elements + sigx += x; // update accumulators sigy += y; sigxx += x * x; sigxy += x * y; @@ -57,7 +56,7 @@ void LLSQ::add(double x, double y) { // add an element // Adds an element with a specified weight. void LLSQ::add(double x, double y, double weight) { total_weight += weight; - sigx += x * weight; // update accumulators + sigx += x * weight; // update accumulators sigy += y * weight; sigxx += x * x * weight; sigxy += x * y * weight; @@ -66,32 +65,30 @@ void LLSQ::add(double x, double y, double weight) { // Adds a whole LLSQ. void LLSQ::add(const LLSQ& other) { total_weight += other.total_weight; - sigx += other.sigx; // update accumulators + sigx += other.sigx; // update accumulators sigy += other.sigy; sigxx += other.sigxx; sigxy += other.sigxy; sigyy += other.sigyy; } - /********************************************************************** * LLSQ::remove * * Delete an element from the acculuator. **********************************************************************/ -void LLSQ::remove(double x, double y) { // delete an element - if (total_weight <= 0.0) // illegal +void LLSQ::remove(double x, double y) { // delete an element + if (total_weight <= 0.0) // illegal EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr); - total_weight--; // count elements - sigx -= x; // update accumulators + total_weight--; // count elements + sigx -= x; // update accumulators sigy -= y; sigxx -= x * x; sigxy -= x * y; sigyy -= y * y; } - /********************************************************************** * LLSQ::m * @@ -104,47 +101,44 @@ double LLSQ::m() const { // get gradient if (x_var != 0.0) return covar / x_var; else - return 0.0; // too little + return 0.0; // too little } - /********************************************************************** * LLSQ::c * * Return the constant of the line fit. **********************************************************************/ -double LLSQ::c(double m) const { // get constant +double LLSQ::c(double m) const { // get constant if (total_weight > 0.0) return (sigy - m * sigx) / total_weight; else - return 0; // too little + return 0; // too little } - /********************************************************************** * LLSQ::rms * * Return the rms error of the fit. **********************************************************************/ -double LLSQ::rms(double m, double c) const { // get error - double error; // total error +double LLSQ::rms(double m, double c) const { // get error + double error; // total error if (total_weight > 0) { - error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * - (total_weight * c - 2 * sigy); + error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + + c * (total_weight * c - 2 * sigy); if (error >= 0) error = sqrt(error / total_weight); // sqrt of mean else error = 0; } else { - error = 0; // too little + error = 0; // too little } return error; } - /********************************************************************** * LLSQ::pearson * @@ -152,19 +146,19 @@ double LLSQ::rms(double m, double c) const { // get error **********************************************************************/ double LLSQ::pearson() const { // get correlation - double r = 0.0; // Correlation is 0 if insufficent data. + double r = 0.0; // Correlation is 0 if insufficent data. double covar = covariance(); if (covar != 0.0) { - double var_product = x_variance() * y_variance(); - if (var_product > 0.0) - r = covar / sqrt(var_product); + double var_product = x_variance() * y_variance(); + if (var_product > 0.0) r = covar / sqrt(var_product); } return r; } // Returns the x,y means as an FCOORD. -FCOORD LLSQ::mean_point() const { +FCOORD +LLSQ::mean_point() const { if (total_weight > 0.0) { return FCOORD(sigx / total_weight, sigy / total_weight); } else { @@ -193,11 +187,10 @@ FCOORD LLSQ::mean_point() const { // = v * N * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] / N * v' // = v * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] * v' // = code below -double LLSQ::rms_orth(const FCOORD &dir) const { +double LLSQ::rms_orth(const FCOORD& dir) const { FCOORD v = !dir; v.normalise(); - return sqrt(v.x() * v.x() * x_variance() + - 2 * v.x() * v.y() * covariance() + + return sqrt(v.x() * v.x() * x_variance() + 2 * v.x() * v.y() * covariance() + v.y() * v.y() * y_variance()); } @@ -249,7 +242,8 @@ double LLSQ::rms_orth(const FCOORD &dir) const { // that is still a much more complex derivation. It seems Pearson had already // found this simple solution in 1901. // http://books.google.com/books?id=WXwvAQAAIAAJ&pg=PA559 -FCOORD LLSQ::vector_fit() const { +FCOORD +LLSQ::vector_fit() const { double x_var = x_variance(); double y_var = y_variance(); double covar = covariance(); diff --git a/src/ccstruct/linlsq.h b/src/ccstruct/linlsq.h index 00c4e4175b..02b48c044c 100644 --- a/src/ccstruct/linlsq.h +++ b/src/ccstruct/linlsq.h @@ -20,12 +20,12 @@ #ifndef TESSERACT_CCSTRUCT_LINLSQ_H_ #define TESSERACT_CCSTRUCT_LINLSQ_H_ -#include "points.h" #include "params.h" +#include "points.h" class LLSQ { public: - LLSQ() { // constructor + LLSQ() { // constructor clear(); // set to zeros } void clear(); // initialize @@ -42,17 +42,17 @@ class LLSQ { return static_cast(total_weight + 0.5); } - double m() const; // get gradient - double c(double m) const; // get constant - double rms(double m, double c) const; // get error - double pearson() const; // get correlation coefficient. + double m() const; // get gradient + double c(double m) const; // get constant + double rms(double m, double c) const; // get error + double pearson() const; // get correlation coefficient. // Returns the x,y means as an FCOORD. FCOORD mean_point() const; // Returns the average sum of squared perpendicular error from a line // through mean_point() in the direction dir. - double rms_orth(const FCOORD &dir) const; + double rms_orth(const FCOORD& dir) const; // Returns the direction of the fitted line as a unit vector, using the // least mean squared perpendicular distance. The line runs through the @@ -90,15 +90,14 @@ class LLSQ { } private: - double total_weight; // no of elements or sum of weights. - double sigx; // sum of x - double sigy; // sum of y - double sigxx; // sum x squared - double sigxy; // sum of xy - double sigyy; // sum y squared + double total_weight; // no of elements or sum of weights. + double sigx; // sum of x + double sigy; // sum of y + double sigxx; // sum x squared + double sigxy; // sum of xy + double sigyy; // sum y squared }; - // Returns the median value of the vector, given that the values are // circular, with the given modulus. Values may be signed or unsigned, // eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi). @@ -108,7 +107,8 @@ class LLSQ { // the wrap-around point. // Cannot be a member of GenericVector, as it makes heavy used of LLSQ. // T must be an integer or float/double type. -template T MedianOfCircularValues(T modulus, GenericVector* v) { +template +T MedianOfCircularValues(T modulus, GenericVector* v) { LLSQ stats; T halfrange = static_cast(modulus / 2); int num_elements = v->size(); @@ -130,5 +130,4 @@ template T MedianOfCircularValues(T modulus, GenericVector* v) { return (*v)[median_index]; } - #endif // TESSERACT_CCSTRUCT_LINLSQ_H_ diff --git a/src/ccstruct/matrix.cpp b/src/ccstruct/matrix.cpp index 28598745f4..c0fafd57ed 100644 --- a/src/ccstruct/matrix.cpp +++ b/src/ccstruct/matrix.cpp @@ -38,8 +38,7 @@ bool MATRIX::Classified(int col, int row, int wildcard_id) const { BLOB_CHOICE_IT b_it(get(col, row)); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOB_CHOICE* choice = b_it.data(); - if (choice->IsClassified()) - return true; + if (choice->IsClassified()) return true; } return false; } @@ -109,19 +108,19 @@ MATRIX* MATRIX::DeepCopy() const { } // Print the best guesses out of the match rating matrix. -void MATRIX::print(const UNICHARSET &unicharset) const { +void MATRIX::print(const UNICHARSET& unicharset) const { tprintf("Ratings Matrix (top 3 choices)\n"); int dim = dimension(); int band_width = bandwidth(); int row, col; for (col = 0; col < dim; ++col) { for (row = col; row < dim && row < col + band_width; ++row) { - BLOB_CHOICE_LIST *rating = this->get(col, row); + BLOB_CHOICE_LIST* rating = this->get(col, row); if (rating == NOT_CLASSIFIED) continue; BLOB_CHOICE_IT b_it(rating); tprintf("col=%d row=%d ", col, row); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - tprintf("%s rat=%g cert=%g " , + tprintf("%s rat=%g cert=%g ", unicharset.id_to_unichar(b_it.data()->unichar_id()), b_it.data()->rating(), b_it.data()->certainty()); } @@ -139,13 +138,12 @@ void MATRIX::print(const UNICHARSET &unicharset) const { tprintf(" \t"); continue; } - BLOB_CHOICE_LIST *rating = this->get(col, row); + BLOB_CHOICE_LIST* rating = this->get(col, row); if (rating != NOT_CLASSIFIED) { BLOB_CHOICE_IT b_it(rating); int counter = 0; for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - tprintf("%s ", - unicharset.id_to_unichar(b_it.data()->unichar_id())); + tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id())); ++counter; if (counter == 3) break; } diff --git a/src/ccstruct/matrix.h b/src/ccstruct/matrix.h index 4fd4339cda..2b9178fda0 100644 --- a/src/ccstruct/matrix.h +++ b/src/ccstruct/matrix.h @@ -52,27 +52,31 @@ class GENERIC_2D_ARRAY { // member will be routed to the base class implementation. Subclasses can // either pass the memory in, or allocate after by calling Resize(). GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array) - : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) { + : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) { size_allocated_ = dim1 * dim2; } // Original constructor for a full rectangular matrix DOES allocate memory // and initialize it to empty. GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty) - : empty_(empty), dim1_(dim1), dim2_(dim2) { + : empty_(empty), dim1_(dim1), dim2_(dim2) { int new_size = dim1 * dim2; array_ = new T[new_size]; size_allocated_ = new_size; - for (int i = 0; i < size_allocated_; ++i) - array_[i] = empty_; + for (int i = 0; i < size_allocated_; ++i) array_[i] = empty_; } // Default constructor for array allocation. Use Resize to set the size. GENERIC_2D_ARRAY() - : array_(nullptr), empty_(static_cast(0)), dim1_(0), dim2_(0), - size_allocated_(0) { - } + : array_(nullptr), + empty_(static_cast(0)), + dim1_(0), + dim2_(0), + size_allocated_(0) {} GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY& src) - : array_(nullptr), empty_(static_cast(0)), dim1_(0), dim2_(0), - size_allocated_(0) { + : array_(nullptr), + empty_(static_cast(0)), + dim1_(0), + dim2_(0), + size_allocated_(0) { *this = src; } virtual ~GENERIC_2D_ARRAY() { delete[] array_; } @@ -89,7 +93,7 @@ class GENERIC_2D_ARRAY { void ResizeNoInit(int size1, int size2, int pad = 0) { int new_size = size1 * size2 + pad; if (new_size > size_allocated_) { - delete [] array_; + delete[] array_; array_ = new T[new_size]; size_allocated_ = new_size; } @@ -133,8 +137,7 @@ class GENERIC_2D_ARRAY { // Sets all the elements of the array to the empty value. void Clear() { int total_size = num_elements(); - for (int i = 0; i < total_size; ++i) - array_[i] = empty_; + for (int i = 0; i < total_size; ++i) array_[i] = empty_; } // Writes to the given file. Returns false in case of error. @@ -164,8 +167,7 @@ class GENERIC_2D_ARRAY { int size = num_elements(); if (fread(array_, sizeof(*array_), size, fp) != size) return false; if (swap) { - for (int i = 0; i < size; ++i) - ReverseN(&array_[i], sizeof(array_[i])); + for (int i = 0; i < size; ++i) ReverseN(&array_[i], sizeof(array_[i])); } return true; } @@ -225,12 +227,8 @@ class GENERIC_2D_ARRAY { } // Get the item at a specified location from the matrix. - T get(ICOORD pos) const { - return array_[this->index(pos.x(), pos.y())]; - } - T get(int column, int row) const { - return array_[this->index(column, row)]; - } + T get(ICOORD pos) const { return array_[this->index(pos.x(), pos.y())]; } + T get(int column, int row) const { return array_[this->index(column, row)]; } // Return a reference to the element at the specified location. const T& operator()(int column, int row) const { return array_[this->index(column, row)]; @@ -240,9 +238,7 @@ class GENERIC_2D_ARRAY { } // Allow access using array[column][row]. NOTE that the indices are // in the same left-to-right order as the () indexing. - T* operator[](int column) { - return &array_[this->index(column, 0)]; - } + T* operator[](int column) { return &array_[this->index(column, 0)]; } const T* operator[](int column) const { return &array_[this->index(column, 0)]; } @@ -306,8 +302,7 @@ class GENERIC_2D_ARRAY { int size = num_elements(); for (int i = 0; i < size; ++i) { const T& value = array_[i]; - if (value < rangemin || rangemax < value) - return false; + if (value < rangemin || rangemax < value) return false; } return true; } @@ -456,8 +451,7 @@ class GENERIC_2D_ARRAY { int size = num_elements(); for (int i = 0; i < size; ++i) { T matrix_cell = array_[i]; - if (matrix_cell != empty_) - delete matrix_cell; + if (matrix_cell != empty_) delete matrix_cell; } } @@ -520,8 +514,7 @@ class BandTriMatrix : public GENERIC_2D_ARRAY { // Initialize all the elements of the array to empty instead of assuming // that a default constructor can be used. BandTriMatrix(int dim1, int dim2, const T& empty) - : GENERIC_2D_ARRAY(dim1, dim2, empty) { - } + : GENERIC_2D_ARRAY(dim1, dim2, empty) {} // The default destructor will do. // Provide the dimensions of this matrix. @@ -553,8 +546,8 @@ class BandTriMatrix : public GENERIC_2D_ARRAY { if (col < this->dim1_ && j < this->dim2_) { new_array[new_index] = this->get(col, col + j); } else if (col >= this->dim1_ && j < array2->dim2_) { - new_array[new_index] = array2->get(col - this->dim1_, - col - this->dim1_ + j); + new_array[new_index] = + array2->get(col - this->dim1_, col - this->dim1_ + j); array2->put(col - this->dim1_, col - this->dim1_ + j, nullptr); } else { new_array[new_index] = this->empty_; @@ -568,10 +561,11 @@ class BandTriMatrix : public GENERIC_2D_ARRAY { } }; -class MATRIX : public BandTriMatrix { +class MATRIX : public BandTriMatrix { public: MATRIX(int dimension, int bandwidth) - : BandTriMatrix(dimension, bandwidth, NOT_CLASSIFIED) {} + : BandTriMatrix(dimension, bandwidth, NOT_CLASSIFIED) { + } // Returns true if there are any real classification results. bool Classified(int col, int row, int wildcard_id) const; @@ -593,22 +587,22 @@ class MATRIX : public BandTriMatrix { MATRIX* DeepCopy() const; // Print a shortened version of the contents of the matrix. - void print(const UNICHARSET &unicharset) const; + void print(const UNICHARSET& unicharset) const; }; struct MATRIX_COORD { - static void Delete(void *arg) { - MATRIX_COORD *c = static_cast(arg); + static void Delete(void* arg) { + MATRIX_COORD* c = static_cast(arg); delete c; } // Default constructor required by GenericHeap. MATRIX_COORD() : col(0), row(0) {} - MATRIX_COORD(int c, int r): col(c), row(r) {} + MATRIX_COORD(int c, int r) : col(c), row(r) {} ~MATRIX_COORD() {} - bool Valid(const MATRIX &m) const { - return 0 <= col && col < m.dimension() && - col <= row && row < col + m.bandwidth() && row < m.dimension(); + bool Valid(const MATRIX& m) const { + return 0 <= col && col < m.dimension() && col <= row && + row < col + m.bandwidth() && row < m.dimension(); } // Remaps the col,row pair to split the blob at the given (ind,ind) diagonal diff --git a/src/ccstruct/mod128.cpp b/src/ccstruct/mod128.cpp index 3275fd1242..03e40baf6b 100644 --- a/src/ccstruct/mod128.cpp +++ b/src/ccstruct/mod128.cpp @@ -17,44 +17,33 @@ * **********************************************************************/ -#include "mod128.h" +#include "mod128.h" const int16_t idirtab[] = { - 1000, 0, 998, 49, 995, 98, 989, 146, - 980, 195, 970, 242, 956, 290, 941, 336, - 923, 382, 903, 427, 881, 471, 857, 514, - 831, 555, 803, 595, 773, 634, 740, 671, - 707, 707, 671, 740, 634, 773, 595, 803, - 555, 831, 514, 857, 471, 881, 427, 903, - 382, 923, 336, 941, 290, 956, 242, 970, - 195, 980, 146, 989, 98, 995, 49, 998, - 0, 1000, -49, 998, -98, 995, -146, 989, - -195, 980, -242, 970, -290, 956, -336, 941, - -382, 923, -427, 903, -471, 881, -514, 857, - -555, 831, -595, 803, -634, 773, -671, 740, - -707, 707, -740, 671, -773, 634, -803, 595, - -831, 555, -857, 514, -881, 471, -903, 427, - -923, 382, -941, 336, -956, 290, -970, 242, - -980, 195, -989, 146, -995, 98, -998, 49, - -1000, 0, -998, -49, -995, -98, -989, -146, - -980, -195, -970, -242, -956, -290, -941, -336, - -923, -382, -903, -427, -881, -471, -857, -514, - -831, -555, -803, -595, -773, -634, -740, -671, - -707, -707, -671, -740, -634, -773, -595, -803, - -555, -831, -514, -857, -471, -881, -427, -903, - -382, -923, -336, -941, -290, -956, -242, -970, - -195, -980, -146, -989, -98, -995, -49, -998, - 0, -1000, 49, -998, 98, -995, 146, -989, - 195, -980, 242, -970, 290, -956, 336, -941, - 382, -923, 427, -903, 471, -881, 514, -857, - 555, -831, 595, -803, 634, -773, 671, -740, - 707, -707, 740, -671, 773, -634, 803, -595, - 831, -555, 857, -514, 881, -471, 903, -427, - 923, -382, 941, -336, 956, -290, 970, -242, - 980, -195, 989, -146, 995, -98, 998, -49 -}; + 1000, 0, 998, 49, 995, 98, 989, 146, 980, 195, 970, 242, + 956, 290, 941, 336, 923, 382, 903, 427, 881, 471, 857, 514, + 831, 555, 803, 595, 773, 634, 740, 671, 707, 707, 671, 740, + 634, 773, 595, 803, 555, 831, 514, 857, 471, 881, 427, 903, + 382, 923, 336, 941, 290, 956, 242, 970, 195, 980, 146, 989, + 98, 995, 49, 998, 0, 1000, -49, 998, -98, 995, -146, 989, + -195, 980, -242, 970, -290, 956, -336, 941, -382, 923, -427, 903, + -471, 881, -514, 857, -555, 831, -595, 803, -634, 773, -671, 740, + -707, 707, -740, 671, -773, 634, -803, 595, -831, 555, -857, 514, + -881, 471, -903, 427, -923, 382, -941, 336, -956, 290, -970, 242, + -980, 195, -989, 146, -995, 98, -998, 49, -1000, 0, -998, -49, + -995, -98, -989, -146, -980, -195, -970, -242, -956, -290, -941, -336, + -923, -382, -903, -427, -881, -471, -857, -514, -831, -555, -803, -595, + -773, -634, -740, -671, -707, -707, -671, -740, -634, -773, -595, -803, + -555, -831, -514, -857, -471, -881, -427, -903, -382, -923, -336, -941, + -290, -956, -242, -970, -195, -980, -146, -989, -98, -995, -49, -998, + 0, -1000, 49, -998, 98, -995, 146, -989, 195, -980, 242, -970, + 290, -956, 336, -941, 382, -923, 427, -903, 471, -881, 514, -857, + 555, -831, 595, -803, 634, -773, 671, -740, 707, -707, 740, -671, + 773, -634, 803, -595, 831, -555, 857, -514, 881, -471, 903, -427, + 923, -382, 941, -336, 956, -290, 970, -242, 980, -195, 989, -146, + 995, -98, 998, -49}; -const ICOORD *dirtab = (ICOORD *) idirtab; +const ICOORD* dirtab = (ICOORD*)idirtab; /********************************************************************** * DIR128::DIR128 @@ -62,14 +51,14 @@ const ICOORD *dirtab = (ICOORD *) idirtab; * Quantize the direction of an FCOORD to make a DIR128. **********************************************************************/ -DIR128::DIR128( //from fcoord - const FCOORD fc //vector to quantize - ) { - int high, low, current; //binary search +DIR128::DIR128( // from fcoord + const FCOORD fc // vector to quantize +) { + int high, low, current; // binary search low = 0; - if (fc.y () == 0) { - if (fc.x () >= 0) + if (fc.y() == 0) { + if (fc.x() >= 0) dir = 0; else dir = MODULUS / 2; @@ -82,7 +71,6 @@ DIR128::DIR128( //from fcoord low = current; else high = current; - } - while (high - low > 1); + } while (high - low > 1); dir = low; } diff --git a/src/ccstruct/mod128.h b/src/ccstruct/mod128.h index 7afa1453b2..f32e2b31e0 100644 --- a/src/ccstruct/mod128.h +++ b/src/ccstruct/mod128.h @@ -17,67 +17,64 @@ * **********************************************************************/ -#ifndef MOD128_H -#define MOD128_H +#ifndef MOD128_H +#define MOD128_H -#include "points.h" +#include "points.h" -#define MODULUS 128 /*range of directions */ -#define DIRBITS 7 //no of bits used -#define DIRSCALE 1000 //length of vector +#define MODULUS 128 /*range of directions */ +#define DIRBITS 7 // no of bits used +#define DIRSCALE 1000 // length of vector -class DLLSYM DIR128 -{ - public: - DIR128() = default; +class DLLSYM DIR128 { + public: + DIR128() = default; - DIR128( //constructor - int16_t value) { //value to assign - value %= MODULUS; //modulo arithmetic - if (value < 0) - value += MODULUS; //done properly - dir = (int8_t) value; - } - DIR128(const FCOORD fc); //quantize vector + DIR128( // constructor + int16_t value) { // value to assign + value %= MODULUS; // modulo arithmetic + if (value < 0) value += MODULUS; // done properly + dir = (int8_t)value; + } + DIR128(const FCOORD fc); // quantize vector - DIR128 & operator= ( //assign of int16_t - int16_t value) { //value to assign - value %= MODULUS; //modulo arithmetic - if (value < 0) - value += MODULUS; //done properly - dir = (int8_t) value; - return *this; - } - int8_t operator- ( //subtraction - const DIR128 & minus) const//for signed result - { - //result - int16_t result = dir - minus.dir; + DIR128& operator=( // assign of int16_t + int16_t value) { // value to assign + value %= MODULUS; // modulo arithmetic + if (value < 0) value += MODULUS; // done properly + dir = (int8_t)value; + return *this; + } + int8_t operator-( // subtraction + const DIR128& minus) const // for signed result + { + // result + int16_t result = dir - minus.dir; - if (result > MODULUS / 2) - result -= MODULUS; //get in range - else if (result < -MODULUS / 2) - result += MODULUS; - return (int8_t) result; - } - DIR128 operator+ ( //addition - const DIR128 & add) const //of itself - { - DIR128 result; //sum + if (result > MODULUS / 2) + result -= MODULUS; // get in range + else if (result < -MODULUS / 2) + result += MODULUS; + return (int8_t)result; + } + DIR128 operator+( // addition + const DIR128& add) const // of itself + { + DIR128 result; // sum - result = dir + add.dir; //let = do the work - return result; - } - DIR128 & operator+= ( //same as + - const DIR128 & add) { - *this = dir + add.dir; //let = do the work - return *this; - } - int8_t get_dir() const { //access function - return dir; - } + result = dir + add.dir; // let = do the work + return result; + } + DIR128& operator+=( // same as + + const DIR128& add) { + *this = dir + add.dir; // let = do the work + return *this; + } + int8_t get_dir() const { // access function + return dir; + } - private: - int8_t dir; //a direction + private: + int8_t dir; // a direction }; #endif diff --git a/src/ccstruct/normalis.cpp b/src/ccstruct/normalis.cpp index 5afcf9a85b..10c58f99cc 100644 --- a/src/ccstruct/normalis.cpp +++ b/src/ccstruct/normalis.cpp @@ -34,17 +34,14 @@ const int kSloppyTolerance = 4; // Final tolerance in pixels added to the computed xheight range. const float kFinalPixelTolerance = 0.125f; -DENORM::DENORM() { - Init(); -} +DENORM::DENORM() { Init(); } -DENORM::DENORM(const DENORM &src) { +DENORM::DENORM(const DENORM& src) { rotation_ = nullptr; *this = src; } - -DENORM & DENORM::operator=(const DENORM & src) { +DENORM& DENORM::operator=(const DENORM& src) { Clear(); inverse_ = src.inverse_; predecessor_ = src.predecessor_; @@ -63,9 +60,7 @@ DENORM & DENORM::operator=(const DENORM & src) { return *this; } -DENORM::~DENORM() { - Clear(); -} +DENORM::~DENORM() { Clear(); } // Initializes the denorm for a transformation. For details see the large // comment in normalis.h. @@ -92,11 +87,9 @@ DENORM::~DENORM() { // // final_xshift: The x component of the final translation. // final_yshift: The y component of the final translation. -void DENORM::SetupNormalization(const BLOCK* block, - const FCOORD* rotation, - const DENORM* predecessor, - float x_origin, float y_origin, - float x_scale, float y_scale, +void DENORM::SetupNormalization(const BLOCK* block, const FCOORD* rotation, + const DENORM* predecessor, float x_origin, + float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift) { Clear(); block_ = block; @@ -153,9 +146,8 @@ void DENORM::SetupNormalization(const BLOCK* block, // pre-initialized to be the same size as box. Each element will contain the // minimum of x and y run-length as shown above. static void ComputeRunlengthImage( - const TBOX& box, - const GenericVector >& x_coords, - const GenericVector >& y_coords, + const TBOX& box, const GenericVector>& x_coords, + const GenericVector>& y_coords, GENERIC_2D_ARRAY* minruns) { int width = box.width(); int height = box.height(); @@ -187,15 +179,13 @@ static void ComputeRunlengthImage( int x_edge = ClipToRange(x_coords[iy][i], 0, width); int gap = x_edge - x; while (x < x_edge) { - if (gap < (*minruns)(x, iy)) - (*minruns)(x, iy) = gap; + if (gap < (*minruns)(x, iy)) (*minruns)(x, iy) = gap; ++x; } } int gap = width - x; while (x < width) { - if (gap < (*minruns)(x, iy)) - (*minruns)(x, iy) = gap; + if (gap < (*minruns)(x, iy)) (*minruns)(x, iy) = gap; ++x; } } @@ -264,11 +254,11 @@ static void ComputeEdgeDensityProfiles(const TBOX& box, // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. // The second-level vectors must all be sorted in ascending order. // See comments on the helper functions above for more details. -void DENORM::SetupNonLinear( - const DENORM* predecessor, const TBOX& box, float target_width, - float target_height, float final_xshift, float final_yshift, - const GenericVector >& x_coords, - const GenericVector >& y_coords) { +void DENORM::SetupNonLinear(const DENORM* predecessor, const TBOX& box, + float target_width, float target_height, + float final_xshift, float final_yshift, + const GenericVector>& x_coords, + const GenericVector>& y_coords) { Clear(); predecessor_ = predecessor; // x_map_ and y_map_ store a mapping from input x and y coordinate to output @@ -312,15 +302,14 @@ void DENORM::LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const { void DENORM::LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const { FCOORD translated(pt.x() - x_origin_, pt.y() - y_origin_); if (x_map_ != nullptr && y_map_ != nullptr) { - int x = ClipToRange(IntCastRounded(translated.x()), 0, x_map_->size()-1); + int x = ClipToRange(IntCastRounded(translated.x()), 0, x_map_->size() - 1); translated.set_x((*x_map_)[x]); - int y = ClipToRange(IntCastRounded(translated.y()), 0, y_map_->size()-1); + int y = ClipToRange(IntCastRounded(translated.y()), 0, y_map_->size() - 1); translated.set_y((*y_map_)[y]); } else { translated.set_x(translated.x() * x_scale_); translated.set_y(translated.y() * y_scale_); - if (rotation_ != nullptr) - translated.rotate(*rotation_); + if (rotation_ != nullptr) translated.rotate(*rotation_); } transformed->set_x(translated.x() + final_xshift_); transformed->set_y(translated.y() + final_yshift_); @@ -412,10 +401,8 @@ void DENORM::LocalNormBlob(TBLOB* blob) const { TBOX blob_box = blob->bounding_box(); ICOORD translation(-IntCastRounded(x_origin_), -IntCastRounded(y_origin_)); blob->Move(translation); - if (y_scale_ != 1.0f) - blob->Scale(y_scale_); - if (rotation_ != nullptr) - blob->Rotate(*rotation_); + if (y_scale_ != 1.0f) blob->Scale(y_scale_); + if (rotation_ != nullptr) blob->Rotate(*rotation_); translation.set_x(IntCastRounded(final_xshift_)); translation.set_y(IntCastRounded(final_yshift_)); blob->Move(translation); @@ -426,15 +413,14 @@ void DENORM::LocalNormBlob(TBLOB* blob) const { // initial crude x-height estimate (such as word size) and this denoting the // transformation that was used. void DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset, - const TBOX& bbox, - float* min_xht, float* max_xht, float* yshift) const { + const TBOX& bbox, float* min_xht, float* max_xht, + float* yshift) const { // Default return -- accept anything. *yshift = 0.0f; *min_xht = 0.0f; *max_xht = MAX_FLOAT32; - if (!unicharset.top_bottom_useful()) - return; + if (!unicharset.top_bottom_useful()) return; // Clip the top and bottom to the limit of normalized feature space. int top = ClipToRange(bbox.top(), 0, kBlnCellHeight - 1); @@ -447,8 +433,8 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset, tolerance = y_scale() * kSloppyTolerance; int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, - &min_top, &max_top); + unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, + &max_top); // Calculate the scale factor we'll use to get to image y-pixels double midx = (bbox.left() + bbox.right()) / 2.0; @@ -504,14 +490,13 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset, // Prints the content of the DENORM for debug purposes. void DENORM::Print() const { if (pix_ != nullptr) { - tprintf("Pix dimensions %d x %d x %d\n", - pixGetWidth(pix_), pixGetHeight(pix_), pixGetDepth(pix_)); + tprintf("Pix dimensions %d x %d x %d\n", pixGetWidth(pix_), + pixGetHeight(pix_), pixGetDepth(pix_)); } - if (inverse_) - tprintf("Inverse\n"); + if (inverse_) tprintf("Inverse\n"); if (block_ && block_->re_rotation().x() != 1.0f) { - tprintf("Block rotation %g, %g\n", - block_->re_rotation().x(), block_->re_rotation().y()); + tprintf("Block rotation %g, %g\n", block_->re_rotation().x(), + block_->re_rotation().y()); } tprintf("Input Origin = (%g, %g)\n", x_origin_, y_origin_); if (x_map_ != nullptr && y_map_ != nullptr) { @@ -536,7 +521,6 @@ void DENORM::Print() const { } } - // ============== Private Code ====================== // Free allocated memory and clear pointers. diff --git a/src/ccstruct/normalis.h b/src/ccstruct/normalis.h index 20e5011490..6cf6dad34f 100644 --- a/src/ccstruct/normalis.h +++ b/src/ccstruct/normalis.h @@ -17,8 +17,8 @@ * **********************************************************************/ -#ifndef NORMALIS_H -#define NORMALIS_H +#ifndef NORMALIS_H +#define NORMALIS_H #include #include "genericvector.h" @@ -29,7 +29,7 @@ const int kBlnXHeight = 128; // x-height for baseline normalization. const int kBlnBaselineOffset = 64; // offset for baseline normalization. struct Pix; -class ROW; // Forward decl +class ROW; // Forward decl class BLOCK; class FCOORD; struct TBLOB; @@ -54,7 +54,7 @@ class DENORM { DENORM(); // Copying a DENORM is allowed. - DENORM(const DENORM &); + DENORM(const DENORM&); DENORM& operator=(const DENORM&); ~DENORM(); @@ -173,11 +173,9 @@ class DENORM { // 1.0f, // 1.0f, // 0, 0); - void SetupNormalization(const BLOCK* block, - const FCOORD* rotation, - const DENORM* predecessor, - float x_origin, float y_origin, - float x_scale, float y_scale, + void SetupNormalization(const BLOCK* block, const FCOORD* rotation, + const DENORM* predecessor, float x_origin, + float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift); // Sets up the DENORM to execute a non-linear transformation based on @@ -197,8 +195,8 @@ class DENORM { void SetupNonLinear(const DENORM* predecessor, const TBOX& box, float target_width, float target_height, float final_xshift, float final_yshift, - const GenericVector >& x_coords, - const GenericVector >& y_coords); + const GenericVector>& x_coords, + const GenericVector>& y_coords); // Transforms the given coords one step forward to normalized space, without // using any block rotation or predecessor. @@ -206,9 +204,9 @@ class DENORM { void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const; // Transforms the given coords forward to normalized space using the // full transformation sequence defined by the block rotation, the - // predecessors, deepest first, and finally this. If first_norm is not nullptr, - // then the first and deepest transformation used is first_norm, ending - // with this, and the block rotation will not be applied. + // predecessors, deepest first, and finally this. If first_norm is not + // nullptr, then the first and deepest transformation used is first_norm, + // ending with this, and the block rotation will not be applied. void NormTransform(const DENORM* first_norm, const TPOINT& pt, TPOINT* transformed) const; void NormTransform(const DENORM* first_norm, const FCOORD& pt, @@ -237,47 +235,26 @@ class DENORM { // size) and this denoting the transformation that was used. // Also returns the amount the character must have shifted up or down. void XHeightRange(int unichar_id, const UNICHARSET& unicharset, - const TBOX& bbox, - float* min_xht, - float* max_xht, + const TBOX& bbox, float* min_xht, float* max_xht, float* yshift) const; // Prints the content of the DENORM for debug purposes. void Print() const; - Pix* pix() const { - return pix_; - } - void set_pix(Pix* pix) { - pix_ = pix; - } - bool inverse() const { - return inverse_; - } - void set_inverse(bool value) { - inverse_ = value; - } + Pix* pix() const { return pix_; } + void set_pix(Pix* pix) { pix_ = pix; } + bool inverse() const { return inverse_; } + void set_inverse(bool value) { inverse_ = value; } const DENORM* RootDenorm() const { - if (predecessor_ != nullptr) - return predecessor_->RootDenorm(); + if (predecessor_ != nullptr) return predecessor_->RootDenorm(); return this; } - const DENORM* predecessor() const { - return predecessor_; - } + const DENORM* predecessor() const { return predecessor_; } // Accessors - perhaps should not be needed. - float x_scale() const { - return x_scale_; - } - float y_scale() const { - return y_scale_; - } - const BLOCK* block() const { - return block_; - } - void set_block(const BLOCK* block) { - block_ = block; - } + float x_scale() const { return x_scale_; } + float y_scale() const { return y_scale_; } + const BLOCK* block() const { return block_; } + void set_block(const BLOCK* block) { block_ = block; } private: // Free allocated memory and clear pointers. diff --git a/src/ccstruct/ocrblock.cpp b/src/ccstruct/ocrblock.cpp index 7fbaf1e496..0febf50be6 100644 --- a/src/ccstruct/ocrblock.cpp +++ b/src/ccstruct/ocrblock.cpp @@ -24,26 +24,27 @@ #include "stepblob.h" #include "tprintf.h" -#define BLOCK_LABEL_HEIGHT 150 //char height of block id +#define BLOCK_LABEL_HEIGHT 150 // char height of block id -ELISTIZE (BLOCK) +ELISTIZE(BLOCK) /** * BLOCK::BLOCK * * Constructor for a simple rectangular block. */ -BLOCK::BLOCK(const char *name, //< filename - BOOL8 prop, //< proportional - int16_t kern, //< kerning - int16_t space, //< spacing - int16_t xmin, //< bottom left - int16_t ymin, int16_t xmax, //< top right +BLOCK::BLOCK(const char* name, //< filename + BOOL8 prop, //< proportional + int16_t kern, //< kerning + int16_t space, //< spacing + int16_t xmin, //< bottom left + int16_t ymin, + int16_t xmax, //< top right int16_t ymax) - : pdblk(xmin, ymin, xmax, ymax), - filename(name), - re_rotation_(1.0f, 0.0f), - classify_rotation_(1.0f, 0.0f), - skew_(1.0f, 0.0f) { + : pdblk(xmin, ymin, xmax, ymax), + filename(name), + re_rotation_(1.0f, 0.0f), + classify_rotation_(1.0f, 0.0f), + skew_(1.0f, 0.0f) { ICOORDELT_IT left_it = &pdblk.leftside; ICOORDELT_IT right_it = &pdblk.rightside; @@ -51,16 +52,16 @@ BLOCK::BLOCK(const char *name, //< filename right_to_left_ = false; kerning = kern; spacing = space; - font_class = -1; //not assigned + font_class = -1; // not assigned cell_over_xheight_ = 2.0f; pdblk.hand_poly = nullptr; - left_it.set_to_list (&pdblk.leftside); - right_it.set_to_list (&pdblk.rightside); - //make default box - left_it.add_to_end (new ICOORDELT (xmin, ymin)); - left_it.add_to_end (new ICOORDELT (xmin, ymax)); - right_it.add_to_end (new ICOORDELT (xmax, ymin)); - right_it.add_to_end (new ICOORDELT (xmax, ymax)); + left_it.set_to_list(&pdblk.leftside); + right_it.set_to_list(&pdblk.rightside); + // make default box + left_it.add_to_end(new ICOORDELT(xmin, ymin)); + left_it.add_to_end(new ICOORDELT(xmin, ymax)); + right_it.add_to_end(new ICOORDELT(xmax, ymin)); + right_it.add_to_end(new ICOORDELT(xmax, ymax)); } /** @@ -70,13 +71,11 @@ BLOCK::BLOCK(const char *name, //< filename */ int decreasing_top_order( // - const void *row1, - const void *row2) { - return (*(ROW **) row2)->bounding_box ().top () - - (*(ROW **) row1)->bounding_box ().top (); + const void* row1, const void* row2) { + return (*(ROW**)row2)->bounding_box().top() - + (*(ROW**)row1)->bounding_box().top(); } - /** * BLOCK::rotate * @@ -119,10 +118,9 @@ void BLOCK::reflect_polygon_in_y_axis() { void BLOCK::sort_rows() { // order on "top" ROW_IT row_it(&rows); - row_it.sort (decreasing_top_order); + row_it.sort(decreasing_top_order); } - /** * BLOCK::compress * @@ -131,36 +129,35 @@ void BLOCK::sort_rows() { // order on "top" */ void BLOCK::compress() { // squash it up - #define ROW_SPACING 5 +#define ROW_SPACING 5 ROW_IT row_it(&rows); - ROW *row; - ICOORD row_spacing (0, ROW_SPACING); + ROW* row; + ICOORD row_spacing(0, ROW_SPACING); ICOORDELT_IT icoordelt_it; sort_rows(); - pdblk.box = TBOX (pdblk.box.topleft (), pdblk.box.topleft ()); - pdblk.box.move_bottom_edge (ROW_SPACING); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - row->move (pdblk.box.botleft () - row_spacing - - row->bounding_box ().topleft ()); - pdblk.box += row->bounding_box (); + pdblk.box = TBOX(pdblk.box.topleft(), pdblk.box.topleft()); + pdblk.box.move_bottom_edge(ROW_SPACING); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + row->move(pdblk.box.botleft() - row_spacing - + row->bounding_box().topleft()); + pdblk.box += row->bounding_box(); } - pdblk.leftside.clear (); - icoordelt_it.set_to_list (&pdblk.leftside); - icoordelt_it.add_to_end (new ICOORDELT (pdblk.box.left (), pdblk.box.bottom ())); - icoordelt_it.add_to_end (new ICOORDELT (pdblk.box.left (), pdblk.box.top ())); - pdblk.rightside.clear (); - icoordelt_it.set_to_list (&pdblk.rightside); - icoordelt_it.add_to_end (new ICOORDELT (pdblk.box.right (), pdblk.box.bottom ())); - icoordelt_it.add_to_end (new ICOORDELT (pdblk.box.right (), pdblk.box.top ())); + pdblk.leftside.clear(); + icoordelt_it.set_to_list(&pdblk.leftside); + icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.bottom())); + icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.top())); + pdblk.rightside.clear(); + icoordelt_it.set_to_list(&pdblk.rightside); + icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.bottom())); + icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.top())); } - /** * BLOCK::check_pitch * @@ -173,50 +170,48 @@ void BLOCK::check_pitch() { // check prop pitch = -1; } - /** * BLOCK::compress * * Compress and move in a single operation. */ -void BLOCK::compress( // squash it up - const ICOORD vec // and move - ) { - pdblk.box.move (vec); +void BLOCK::compress( // squash it up + const ICOORD vec // and move +) { + pdblk.box.move(vec); compress(); } - /** * BLOCK::print * * Print the info on a block */ -void BLOCK::print( //print list of sides - FILE*, //< file to print on - bool dump //< print full detail +void BLOCK::print( // print list of sides + FILE*, //< file to print on + bool dump //< print full detail ) { - ICOORDELT_IT it = &pdblk.leftside; //iterator + ICOORDELT_IT it = &pdblk.leftside; // iterator - pdblk.box.print (); - tprintf ("Proportional= %s\n", proportional ? "TRUE" : "FALSE"); - tprintf ("Kerning= %d\n", kerning); - tprintf ("Spacing= %d\n", spacing); - tprintf ("Fixed_pitch=%d\n", pitch); - tprintf ("Filename= %s\n", filename.string ()); + pdblk.box.print(); + tprintf("Proportional= %s\n", proportional ? "TRUE" : "FALSE"); + tprintf("Kerning= %d\n", kerning); + tprintf("Spacing= %d\n", spacing); + tprintf("Fixed_pitch=%d\n", pitch); + tprintf("Filename= %s\n", filename.string()); if (dump) { - tprintf ("Left side coords are:\n"); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); - tprintf ("\n"); - tprintf ("Right side coords are:\n"); - it.set_to_list (&pdblk.rightside); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); - tprintf ("\n"); + tprintf("Left side coords are:\n"); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) + tprintf("(%d,%d) ", it.data()->x(), it.data()->y()); + tprintf("\n"); + tprintf("Right side coords are:\n"); + it.set_to_list(&pdblk.rightside); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) + tprintf("(%d,%d) ", it.data()->x(), it.data()->y()); + tprintf("\n"); } } @@ -226,17 +221,16 @@ void BLOCK::print( //print list of sides * Assignment - duplicate the block structure, but with an EMPTY row list. */ -BLOCK & BLOCK::operator= ( //assignment -const BLOCK & source //from this +BLOCK& BLOCK::operator=( // assignment + const BLOCK& source // from this ) { - this->ELIST_LINK::operator= (source); + this->ELIST_LINK::operator=(source); pdblk = source.pdblk; proportional = source.proportional; kerning = source.kerning; spacing = source.spacing; - filename = source.filename; //STRINGs assign ok - if (!rows.empty ()) - rows.clear (); + filename = source.filename; // STRINGs assign ok + if (!rows.empty()) rows.clear(); re_rotation_ = source.re_rotation_; classify_rotation_ = source.classify_rotation_; skew_ = source.skew_; @@ -253,11 +247,10 @@ const BLOCK & source //from this // margin - return value, the distance from x,y to the left margin of the // block containing it. // If all segments were to the right of x, we return false and 0. -bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) { +bool LeftMargin(ICOORDELT_LIST* segments, int x, int* margin) { bool found = false; *margin = 0; - if (segments->empty()) - return found; + if (segments->empty()) return found; ICOORDELT_IT seg_it(segments); for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { int cur_margin = x - seg_it.data()->x(); @@ -283,11 +276,10 @@ bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) { // margin - return value, the distance from x,y to the right margin of the // block containing it. // If all segments were to the left of x, we return false and 0. -bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) { +bool RightMargin(ICOORDELT_LIST* segments, int x, int* margin) { bool found = false; *margin = 0; - if (segments->empty()) - return found; + if (segments->empty()) return found; ICOORDELT_IT seg_it(segments); for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x; @@ -338,7 +330,7 @@ void BLOCK::compute_row_margins() { // If Layout analysis was not called, default to this. POLY_BLOCK rect_block(pdblk.bounding_box(), PT_FLOWING_TEXT); - POLY_BLOCK *pblock = &rect_block; + POLY_BLOCK* pblock = &rect_block; if (pdblk.poly_block() != nullptr) { pblock = pdblk.poly_block(); } @@ -346,12 +338,12 @@ void BLOCK::compute_row_margins() { // Step One: Determine if there is a drop-cap. // TODO(eger): Fix up drop cap code for RTL languages. ROW_IT r_it(row_list()); - ROW *first_row = r_it.data(); - ROW *second_row = r_it.data_relative(1); + ROW* first_row = r_it.data(); + ROW* second_row = r_it.data_relative(1); // initialize the bottom of a fictitious drop cap far above the first line. - int drop_cap_bottom = first_row->bounding_box().top() + - first_row->bounding_box().height(); + int drop_cap_bottom = + first_row->bounding_box().top() + first_row->bounding_box().height(); int drop_cap_right = first_row->bounding_box().left(); int mid_second_line = second_row->bounding_box().top() - second_row->bounding_box().height() / 2; @@ -364,10 +356,8 @@ void BLOCK::compute_row_margins() { if (bbox.bottom() <= mid_second_line) { // we found a real drop cap first_row->set_has_drop_cap(true); - if (drop_cap_bottom > bbox.bottom()) - drop_cap_bottom = bbox.bottom(); - if (drop_cap_right < bbox.right()) - drop_cap_right = bbox.right(); + if (drop_cap_bottom > bbox.bottom()) drop_cap_bottom = bbox.bottom(); + if (drop_cap_right < bbox.right()) drop_cap_right = bbox.right(); } } } @@ -377,7 +367,7 @@ void BLOCK::compute_row_margins() { PB_LINE_IT lines(pblock); r_it.set_to_list(row_list()); for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { - ROW *row = r_it.data(); + ROW* row = r_it.data(); TBOX row_box = row->bounding_box(); int left_y = row->base_line(row_box.left()) + row->x_height(); int left_margin; @@ -387,10 +377,8 @@ void BLOCK::compute_row_margins() { if (row_box.top() >= drop_cap_bottom) { int drop_cap_distance = row_box.left() - row->space() - drop_cap_right; - if (drop_cap_distance < 0) - drop_cap_distance = 0; - if (drop_cap_distance < left_margin) - left_margin = drop_cap_distance; + if (drop_cap_distance < 0) drop_cap_distance = 0; + if (drop_cap_distance < left_margin) left_margin = drop_cap_distance; } int right_y = row->base_line(row_box.right()) + row->x_height(); @@ -485,7 +473,8 @@ void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list, BLOCK_IT block_it(block_list); for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { BLOCK* block = block_it.data(); - if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) + if (block->pdblk.poly_block() != nullptr && + !block->pdblk.poly_block()->IsText()) continue; // Don't touch non-text blocks. // Iterate over all rows in the block. ROW_IT row_it(block->row_list()); @@ -497,8 +486,8 @@ void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list, WERD_IT new_words_it(&new_words); for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { WERD* werd = werd_it.extract(); - WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, - not_found_blobs); + WERD* new_werd = + werd->ConstructWerdWithNewBlobs(new_blobs, not_found_blobs); if (new_werd) { // Insert this new werd into the actual row's werd-list. Remove the // existing one. diff --git a/src/ccstruct/ocrblock.h b/src/ccstruct/ocrblock.h index d07fe3b46c..9aea35a435 100644 --- a/src/ccstruct/ocrblock.h +++ b/src/ccstruct/ocrblock.h @@ -17,38 +17,38 @@ * **********************************************************************/ -#ifndef OCRBLOCK_H -#define OCRBLOCK_H +#ifndef OCRBLOCK_H +#define OCRBLOCK_H -#include "ocrpara.h" -#include "ocrrow.h" -#include "pdblock.h" +#include "ocrpara.h" +#include "ocrrow.h" +#include "pdblock.h" -class BLOCK; //forward decl +class BLOCK; // forward decl -ELISTIZEH (BLOCK) -class BLOCK:public ELIST_LINK -//page block +ELISTIZEH(BLOCK) +class BLOCK : public ELIST_LINK +// page block { - friend class BLOCK_RECT_IT; //block iterator + friend class BLOCK_RECT_IT; // block iterator public: BLOCK() - : re_rotation_(1.0f, 0.0f), - classify_rotation_(1.0f, 0.0f), - skew_(1.0f, 0.0f) { + : re_rotation_(1.0f, 0.0f), + classify_rotation_(1.0f, 0.0f), + skew_(1.0f, 0.0f) { right_to_left_ = false; pdblk.hand_poly = nullptr; } - BLOCK(const char *name, //< filename + BLOCK(const char* name, //< filename BOOL8 prop, //< proportional - int16_t kern, //< kerning - int16_t space, //< spacing - int16_t xmin, //< bottom left + int16_t kern, //< kerning + int16_t space, //< spacing + int16_t xmin, //< bottom left int16_t ymin, - int16_t xmax, //< top right + int16_t xmax, //< top right int16_t ymax); - ~BLOCK () = default; + ~BLOCK() = default; /** * set space size etc. @@ -57,103 +57,60 @@ class BLOCK:public ELIST_LINK * @param space inter word size * @param ch_pitch pitch if fixed */ - void set_stats(BOOL8 prop, - int16_t kern, - int16_t space, - int16_t ch_pitch) { + void set_stats(BOOL8 prop, int16_t kern, int16_t space, int16_t ch_pitch) { proportional = prop; - kerning = (int8_t) kern; + kerning = (int8_t)kern; spacing = space; pitch = ch_pitch; } /// set char size - void set_xheight(int32_t height) { - xheight = height; - } + void set_xheight(int32_t height) { xheight = height; } /// set font class - void set_font_class(int16_t font) { - font_class = font; - } + void set_font_class(int16_t font) { font_class = font; } /// return proportional - BOOL8 prop() const { - return proportional; - } - bool right_to_left() const { - return right_to_left_; - } - void set_right_to_left(bool value) { - right_to_left_ = value; - } + BOOL8 prop() const { return proportional; } + bool right_to_left() const { return right_to_left_; } + void set_right_to_left(bool value) { right_to_left_ = value; } /// return pitch - int32_t fixed_pitch() const { - return pitch; - } + int32_t fixed_pitch() const { return pitch; } /// return kerning - int16_t kern() const { - return kerning; - } + int16_t kern() const { return kerning; } /// return font class - int16_t font() const { - return font_class; - } + int16_t font() const { return font_class; } /// return spacing - int16_t space() const { - return spacing; - } + int16_t space() const { return spacing; } /// return filename - const char *name() const { - return filename.string (); - } + const char* name() const { return filename.string(); } /// return xheight - int32_t x_height() const { - return xheight; - } - float cell_over_xheight() const { - return cell_over_xheight_; - } - void set_cell_over_xheight(float ratio) { - cell_over_xheight_ = ratio; - } + int32_t x_height() const { return xheight; } + float cell_over_xheight() const { return cell_over_xheight_; } + void set_cell_over_xheight(float ratio) { cell_over_xheight_ = ratio; } /// get rows - ROW_LIST *row_list() { - return &rows; - } + ROW_LIST* row_list() { return &rows; } // Compute the margins between the edges of each row and this block's // polyblock, and store the results in the rows. void compute_row_margins(); // get paragraphs - PARA_LIST *para_list() { - return ¶s_; - } + PARA_LIST* para_list() { return ¶s_; } /// get blobs - C_BLOB_LIST *blob_list() { - return &c_blobs; - } - C_BLOB_LIST *reject_blobs() { - return &rej_blobs; - } + C_BLOB_LIST* blob_list() { return &c_blobs; } + C_BLOB_LIST* reject_blobs() { return &rej_blobs; } FCOORD re_rotation() const { - return re_rotation_; // How to transform coords back to image. - } - void set_re_rotation(const FCOORD& rotation) { - re_rotation_ = rotation; + return re_rotation_; // How to transform coords back to image. } + void set_re_rotation(const FCOORD& rotation) { re_rotation_ = rotation; } FCOORD classify_rotation() const { - return classify_rotation_; // Apply this before classifying. + return classify_rotation_; // Apply this before classifying. } void set_classify_rotation(const FCOORD& rotation) { classify_rotation_ = rotation; } FCOORD skew() const { - return skew_; // Direction of true horizontal. - } - void set_skew(const FCOORD& skew) { - skew_ = skew; - } - const ICOORD& median_size() const { - return median_size_; + return skew_; // Direction of true horizontal. } + void set_skew(const FCOORD& skew) { skew_ = skew; } + const ICOORD& median_size() const { return median_size_; } void set_median_size(int x, int y) { median_size_.set_x(x); median_size_.set_y(y); @@ -188,30 +145,30 @@ class BLOCK:public ELIST_LINK /// dump whole table void print(FILE* fp, bool dump); - BLOCK& operator=(const BLOCK & source); - PDBLK pdblk; //< Page Description Block + BLOCK& operator=(const BLOCK& source); + PDBLK pdblk; //< Page Description Block private: - BOOL8 proportional; //< proportional - bool right_to_left_; //< major script is right to left. - int8_t kerning; //< inter blob gap - int16_t spacing; //< inter word gap - int16_t pitch; //< pitch of non-props - int16_t font_class; //< correct font class - int32_t xheight; //< height of chars - float cell_over_xheight_; //< Ratio of cell height to xheight. - STRING filename; //< name of block - ROW_LIST rows; //< rows in block - PARA_LIST paras_; //< paragraphs of block - C_BLOB_LIST c_blobs; //< before textord - C_BLOB_LIST rej_blobs; //< duff stuff - FCOORD re_rotation_; //< How to transform coords back to image. - FCOORD classify_rotation_; //< Apply this before classifying. - FCOORD skew_; //< Direction of true horizontal. - ICOORD median_size_; //< Median size of blobs. + BOOL8 proportional; //< proportional + bool right_to_left_; //< major script is right to left. + int8_t kerning; //< inter blob gap + int16_t spacing; //< inter word gap + int16_t pitch; //< pitch of non-props + int16_t font_class; //< correct font class + int32_t xheight; //< height of chars + float cell_over_xheight_; //< Ratio of cell height to xheight. + STRING filename; //< name of block + ROW_LIST rows; //< rows in block + PARA_LIST paras_; //< paragraphs of block + C_BLOB_LIST c_blobs; //< before textord + C_BLOB_LIST rej_blobs; //< duff stuff + FCOORD re_rotation_; //< How to transform coords back to image. + FCOORD classify_rotation_; //< Apply this before classifying. + FCOORD skew_; //< Direction of true horizontal. + ICOORD median_size_; //< Median size of blobs. }; -int decreasing_top_order(const void *row1, const void *row2); +int decreasing_top_order(const void* row1, const void* row2); // A function to print segmentation stats for the given block list. void PrintSegmentationStats(BLOCK_LIST* block_list); diff --git a/src/ccstruct/ocrpara.cpp b/src/ccstruct/ocrpara.cpp index 022d25c841..75e34c179f 100644 --- a/src/ccstruct/ocrpara.cpp +++ b/src/ccstruct/ocrpara.cpp @@ -19,14 +19,14 @@ #include -#include "ocrpara.h" #include "host.h" // For NearlyEqual() +#include "ocrpara.h" ELISTIZE(PARA) +using tesseract::JUSTIFICATION_CENTER; using tesseract::JUSTIFICATION_LEFT; using tesseract::JUSTIFICATION_RIGHT; -using tesseract::JUSTIFICATION_CENTER; using tesseract::JUSTIFICATION_UNKNOWN; static STRING ParagraphJustificationToString( @@ -43,8 +43,8 @@ static STRING ParagraphJustificationToString( } } -bool ParagraphModel::ValidFirstLine(int lmargin, int lindent, - int rindent, int rmargin) const { +bool ParagraphModel::ValidFirstLine(int lmargin, int lindent, int rindent, + int rmargin) const { switch (justification_) { case JUSTIFICATION_LEFT: return NearlyEqual(lmargin + lindent, margin_ + first_indent_, @@ -60,15 +60,13 @@ bool ParagraphModel::ValidFirstLine(int lmargin, int lindent, } } -bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, - int rindent, int rmargin) const { +bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, int rindent, + int rmargin) const { switch (justification_) { case JUSTIFICATION_LEFT: - return NearlyEqual(lmargin + lindent, margin_ + body_indent_, - tolerance_); + return NearlyEqual(lmargin + lindent, margin_ + body_indent_, tolerance_); case JUSTIFICATION_RIGHT: - return NearlyEqual(rmargin + rindent, margin_ + body_indent_, - tolerance_); + return NearlyEqual(rmargin + rindent, margin_ + body_indent_, tolerance_); case JUSTIFICATION_CENTER: return NearlyEqual(lindent, rindent, tolerance_ * 2); default: @@ -77,22 +75,22 @@ bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, } } -bool ParagraphModel::Comparable(const ParagraphModel &other) const { - if (justification_ != other.justification_) - return false; +bool ParagraphModel::Comparable(const ParagraphModel& other) const { + if (justification_ != other.justification_) return false; if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN) return true; int tolerance = (tolerance_ + other.tolerance_) / 4; return NearlyEqual(margin_ + first_indent_, other.margin_ + other.first_indent_, tolerance) && - NearlyEqual(margin_ + body_indent_, - other.margin_ + other.body_indent_, tolerance); + NearlyEqual(margin_ + body_indent_, other.margin_ + other.body_indent_, + tolerance); } -STRING ParagraphModel::ToString() const { +STRING +ParagraphModel::ToString() const { char buffer[200]; - const STRING &alignment = ParagraphJustificationToString(justification_); + const STRING& alignment = ParagraphJustificationToString(justification_); snprintf(buffer, sizeof(buffer), "margin: %d, first_indent: %d, body_indent: %d, alignment: %s", margin_, first_indent_, body_indent_, alignment.string()); diff --git a/src/ccstruct/ocrpara.h b/src/ccstruct/ocrpara.h index cbe052177e..2f5e6dbb75 100644 --- a/src/ccstruct/ocrpara.h +++ b/src/ccstruct/ocrpara.h @@ -20,20 +20,23 @@ #ifndef TESSERACT_CCSTRUCT_OCRPARA_H_ #define TESSERACT_CCSTRUCT_OCRPARA_H_ -#include "publictypes.h" #include "elst.h" +#include "publictypes.h" #include "strngs.h" class ParagraphModel; struct PARA : public ELIST_LINK { public: - PARA() : model(nullptr), is_list_item(false), - is_very_first_or_continuation(false), has_drop_cap(false) {} + PARA() + : model(nullptr), + is_list_item(false), + is_very_first_or_continuation(false), + has_drop_cap(false) {} // We do not own the model, we just reference it. // model may be nullptr if there is not a good model for this paragraph. - const ParagraphModel *model; + const ParagraphModel* model; bool is_list_item; @@ -113,11 +116,8 @@ ELISTIZEH(PARA) // +--------------------------------+ class ParagraphModel { public: - ParagraphModel(tesseract::ParagraphJustification justification, - int margin, - int first_indent, - int body_indent, - int tolerance) + ParagraphModel(tesseract::ParagraphJustification justification, int margin, + int first_indent, int body_indent, int tolerance) : justification_(justification), margin_(margin), first_indent_(first_indent), @@ -125,8 +125,7 @@ class ParagraphModel { tolerance_(tolerance) { // Make one of {first_indent, body_indent} is 0. int added_margin = first_indent; - if (body_indent < added_margin) - added_margin = body_indent; + if (body_indent < added_margin) added_margin = body_indent; margin_ += added_margin; first_indent_ -= added_margin; body_indent_ -= added_margin; @@ -134,10 +133,10 @@ class ParagraphModel { ParagraphModel() : justification_(tesseract::JUSTIFICATION_UNKNOWN), - margin_(0), - first_indent_(0), - body_indent_(0), - tolerance_(0) { } + margin_(0), + first_indent_(0), + body_indent_(0), + tolerance_(0) {} // ValidFirstLine() and ValidBodyLine() take arguments describing a text line // in a block of text which we are trying to model: @@ -171,12 +170,12 @@ class ParagraphModel { bool is_flush() const { return (justification_ == tesseract::JUSTIFICATION_LEFT || justification_ == tesseract::JUSTIFICATION_RIGHT) && - abs(first_indent_ - body_indent_) <= tolerance_; + abs(first_indent_ - body_indent_) <= tolerance_; } // Return whether this model is likely to agree with the other model on most // paragraphs they are marked. - bool Comparable(const ParagraphModel &other) const; + bool Comparable(const ParagraphModel& other) const; STRING ToString() const; diff --git a/src/ccstruct/ocrrow.cpp b/src/ccstruct/ocrrow.cpp index 4458749266..762fb011e8 100644 --- a/src/ccstruct/ocrrow.cpp +++ b/src/ccstruct/ocrrow.cpp @@ -17,34 +17,33 @@ * **********************************************************************/ -#include "ocrrow.h" -#include "blobbox.h" +#include "ocrrow.h" +#include "blobbox.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif -ELISTIZE (ROW) +ELISTIZE(ROW) /********************************************************************** * ROW::ROW * * Constructor to build a ROW. Only the stats stuff are given here. * The words are added directly. **********************************************************************/ -ROW::ROW ( //constructor -int32_t spline_size, //no of segments -int32_t * xstarts, //segment boundaries -double *coeffs, //coefficients -float x_height, //line height -float ascenders, //ascender size -float descenders, //descender drop -int16_t kern, //char gap -int16_t space //word gap -) - : baseline(spline_size, xstarts, coeffs), - para_(nullptr) { - kerning = kern; //just store stuff +ROW::ROW( // constructor + int32_t spline_size, // no of segments + int32_t* xstarts, // segment boundaries + double* coeffs, // coefficients + float x_height, // line height + float ascenders, // ascender size + float descenders, // descender drop + int16_t kern, // char gap + int16_t space // word gap + ) + : baseline(spline_size, xstarts, coeffs), para_(nullptr) { + kerning = kern; // just store stuff spacing = space; xheight = x_height; ascrise = ascenders; @@ -55,7 +54,6 @@ int16_t space //word gap rmargin_ = 0; } - /********************************************************************** * ROW::ROW * @@ -63,12 +61,13 @@ int16_t space //word gap * The words are added directly. **********************************************************************/ -ROW::ROW( //constructor - TO_ROW *to_row, //source row - int16_t kern, //char gap - int16_t space //word gap - ) : para_(nullptr) { - kerning = kern; //just store stuff +ROW::ROW( // constructor + TO_ROW* to_row, // source row + int16_t kern, // char gap + int16_t space // word gap + ) + : para_(nullptr) { + kerning = kern; // just store stuff spacing = space; xheight = to_row->xheight; bodysize = to_row->body_size; @@ -85,7 +84,7 @@ ROW::ROW( //constructor TBOX ROW::restricted_bounding_box(bool upper_dots, bool lower_dots) const { TBOX box; // This is a read-only iteration of the words in the row. - WERD_IT it(const_cast(&words)); + WERD_IT it(const_cast(&words)); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { box += it.data()->restricted_bounding_box(upper_dots, lower_dots); } @@ -98,75 +97,73 @@ TBOX ROW::restricted_bounding_box(bool upper_dots, bool lower_dots) const { * Set the bounding box correctly **********************************************************************/ -void ROW::recalc_bounding_box() { //recalculate BB - WERD *word; //current word - WERD_IT it = &words; //words of ROW - int16_t left; //of word - int16_t prev_left; //old left - - if (!it.empty ()) { - word = it.data (); - prev_left = word->bounding_box ().left (); - it.forward (); - while (!it.at_first ()) { - word = it.data (); - left = word->bounding_box ().left (); +void ROW::recalc_bounding_box() { // recalculate BB + WERD* word; // current word + WERD_IT it = &words; // words of ROW + int16_t left; // of word + int16_t prev_left; // old left + + if (!it.empty()) { + word = it.data(); + prev_left = word->bounding_box().left(); + it.forward(); + while (!it.at_first()) { + word = it.data(); + left = word->bounding_box().left(); if (left < prev_left) { - it.move_to_first (); - //words in BB order - it.sort (word_comparator); + it.move_to_first(); + // words in BB order + it.sort(word_comparator); break; } prev_left = left; - it.forward (); + it.forward(); } } - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - word = it.data (); - if (it.at_first ()) - word->set_flag (W_BOL, TRUE); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + word = it.data(); + if (it.at_first()) + word->set_flag(W_BOL, TRUE); else - //not start of line - word->set_flag (W_BOL, FALSE); - if (it.at_last ()) - word->set_flag (W_EOL, TRUE); + // not start of line + word->set_flag(W_BOL, FALSE); + if (it.at_last()) + word->set_flag(W_EOL, TRUE); else - //not end of line - word->set_flag (W_EOL, FALSE); - //extend BB as reqd - bound_box += word->bounding_box (); + // not end of line + word->set_flag(W_EOL, FALSE); + // extend BB as reqd + bound_box += word->bounding_box(); } } - /********************************************************************** * ROW::move * * Reposition row by vector **********************************************************************/ -void ROW::move( // reposition row - const ICOORD vec // by vector - ) { +void ROW::move( // reposition row + const ICOORD vec // by vector +) { WERD_IT it(&words); // word iterator - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - it.data ()->move (vec); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) + it.data()->move(vec); - bound_box.move (vec); - baseline.move (vec); + bound_box.move(vec); + baseline.move(vec); } - /********************************************************************** * ROW::print * * Display members **********************************************************************/ -void ROW::print( //print - FILE *fp //file to print on - ) { +void ROW::print( // print + FILE* fp // file to print on +) { tprintf("Kerning= %d\n", kerning); tprintf("Spacing= %d\n", spacing); bound_box.print(); @@ -177,7 +174,6 @@ void ROW::print( //print tprintf("lmargin= %d, rmargin= %d\n", lmargin_, rmargin_); } - /********************************************************************** * ROW::plot * @@ -185,16 +181,16 @@ void ROW::print( //print **********************************************************************/ #ifndef GRAPHICS_DISABLED -void ROW::plot( //draw it - ScrollView* window, //window to draw in - ScrollView::Color colour //colour to draw in - ) { - WERD *word; //current word - WERD_IT it = &words; //words of ROW - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - word = it.data (); - word->plot (window, colour); //all in one colour +void ROW::plot( // draw it + ScrollView* window, // window to draw in + ScrollView::Color colour // colour to draw in +) { + WERD* word; // current word + WERD_IT it = &words; // words of ROW + + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + word = it.data(); + word->plot(window, colour); // all in one colour } } @@ -204,15 +200,15 @@ void ROW::plot( //draw it * Draw the ROW in rainbow colours. **********************************************************************/ -void ROW::plot( //draw it - ScrollView* window //window to draw in - ) { - WERD *word; //current word - WERD_IT it = &words; //words of ROW +void ROW::plot( // draw it + ScrollView* window // window to draw in +) { + WERD* word; // current word + WERD_IT it = &words; // words of ROW - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - word = it.data (); - word->plot (window); //in rainbow colours + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + word = it.data(); + word->plot(window); // in rainbow colours } } #endif // GRAPHICS_DISABLED @@ -223,17 +219,16 @@ void ROW::plot( //draw it * Assign rows by duplicating the row structure but NOT the WERDLIST **********************************************************************/ -ROW & ROW::operator= (const ROW & source) { - this->ELIST_LINK::operator= (source); +ROW& ROW::operator=(const ROW& source) { + this->ELIST_LINK::operator=(source); kerning = source.kerning; spacing = source.spacing; xheight = source.xheight; bodysize = source.bodysize; ascrise = source.ascrise; descdrop = source.descdrop; - if (!words.empty ()) - words.clear (); - baseline = source.baseline; //QSPLINES must do = + if (!words.empty()) words.clear(); + baseline = source.baseline; // QSPLINES must do = bound_box = source.bound_box; has_drop_cap_ = source.has_drop_cap_; lmargin_ = source.lmargin_; diff --git a/src/ccstruct/ocrrow.h b/src/ccstruct/ocrrow.h index a72ae2e61e..6743e23d23 100644 --- a/src/ccstruct/ocrrow.h +++ b/src/ccstruct/ocrrow.h @@ -17,8 +17,8 @@ * **********************************************************************/ -#ifndef OCRROW_H -#define OCRROW_H +#ifndef OCRROW_H +#define OCRROW_H #include @@ -29,136 +29,119 @@ class TO_ROW; struct PARA; -class ROW:public ELIST_LINK -{ - friend void tweak_row_baseline(ROW *, double, double); - public: - ROW() = default; - ROW( //constructor - int32_t spline_size, //no of segments - int32_t *xstarts, //segment boundaries - double *coeffs, //coefficients //ascender size - float x_height, - float ascenders, - float descenders, //descender size - int16_t kern, //char gap - int16_t space); //word gap - ROW( //constructor - TO_ROW *row, //textord row - int16_t kern, //char gap - int16_t space); //word gap - - WERD_LIST *word_list() { //get words - return &words; - } - - float base_line( //compute baseline - float xpos) const { //at the position - //get spline value - return (float) baseline.y (xpos); - } - float x_height() const { //return x height - return xheight; - } - void set_x_height(float new_xheight) { // set x height - xheight = new_xheight; - } - int32_t kern() const { //return kerning - return kerning; - } - float body_size() const { //return body size - return bodysize; - } - void set_body_size(float new_size) { // set body size - bodysize = new_size; - } - int32_t space() const { //return spacing - return spacing; - } - float ascenders() const { //return size - return ascrise; - } - float descenders() const { //return size - return descdrop; - } - TBOX bounding_box() const { //return bounding box - return bound_box; - } - // Returns the bounding box including the desired combination of upper and - // lower noise/diacritic elements. - TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const; - - void set_lmargin(int16_t lmargin) { - lmargin_ = lmargin; - } - void set_rmargin(int16_t rmargin) { - rmargin_ = rmargin; - } - int16_t lmargin() const { - return lmargin_; - } - int16_t rmargin() const { - return rmargin_; - } - - void set_has_drop_cap(bool has) { - has_drop_cap_ = has; - } - bool has_drop_cap() const { - return has_drop_cap_; - } - - void set_para(PARA *p) { - para_ = p; - } - PARA *para() const { - return para_; - } - - void recalc_bounding_box(); //recalculate BB - - void move( // reposition row - const ICOORD vec); // by vector - - void print( //print - FILE *fp); //file to print on - - #ifndef GRAPHICS_DISABLED - void plot( //draw one - ScrollView* window, //window to draw in - ScrollView::Color colour); //uniform colour - void plot( //draw one - ScrollView* window); //in rainbow colours - - void plot_baseline( //draw the baseline - ScrollView* window, //window to draw in - ScrollView::Color colour) { //colour to draw - //draw it - baseline.plot (window, colour); - } - #endif // GRAPHICS_DISABLED - ROW& operator= (const ROW & source); - - private: - int32_t kerning; //inter char gap - int32_t spacing; //inter word gap - TBOX bound_box; //bounding box - float xheight; //height of line - float ascrise; //size of ascenders - float descdrop; //-size of descenders - float bodysize; //CJK character size. (equals to - //xheight+ascrise by default) - WERD_LIST words; //words - QSPLINE baseline; //baseline spline - - // These get set after blocks have been determined. - bool has_drop_cap_; - int16_t lmargin_; // Distance to left polyblock margin. - int16_t rmargin_; // Distance to right polyblock margin. - - // This gets set during paragraph analysis. - PARA *para_; // Paragraph of which this row is part. +class ROW : public ELIST_LINK { + friend void tweak_row_baseline(ROW*, double, double); + + public: + ROW() = default; + ROW( // constructor + int32_t spline_size, // no of segments + int32_t* xstarts, // segment boundaries + double* coeffs, // coefficients //ascender size + float x_height, float ascenders, + float descenders, // descender size + int16_t kern, // char gap + int16_t space); // word gap + ROW( // constructor + TO_ROW* row, // textord row + int16_t kern, // char gap + int16_t space); // word gap + + WERD_LIST* word_list() { // get words + return &words; + } + + float base_line( // compute baseline + float xpos) const { // at the position + // get spline value + return (float)baseline.y(xpos); + } + float x_height() const { // return x height + return xheight; + } + void set_x_height(float new_xheight) { // set x height + xheight = new_xheight; + } + int32_t kern() const { // return kerning + return kerning; + } + float body_size() const { // return body size + return bodysize; + } + void set_body_size(float new_size) { // set body size + bodysize = new_size; + } + int32_t space() const { // return spacing + return spacing; + } + float ascenders() const { // return size + return ascrise; + } + float descenders() const { // return size + return descdrop; + } + TBOX bounding_box() const { // return bounding box + return bound_box; + } + // Returns the bounding box including the desired combination of upper and + // lower noise/diacritic elements. + TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const; + + void set_lmargin(int16_t lmargin) { lmargin_ = lmargin; } + void set_rmargin(int16_t rmargin) { rmargin_ = rmargin; } + int16_t lmargin() const { return lmargin_; } + int16_t rmargin() const { return rmargin_; } + + void set_has_drop_cap(bool has) { has_drop_cap_ = has; } + bool has_drop_cap() const { return has_drop_cap_; } + + void set_para(PARA* p) { para_ = p; } + PARA* para() const { return para_; } + + void recalc_bounding_box(); // recalculate BB + + void move( // reposition row + const ICOORD vec); // by vector + + void print( // print + FILE* fp); // file to print on + +#ifndef GRAPHICS_DISABLED + void plot( // draw one + ScrollView* window, // window to draw in + ScrollView::Color colour); // uniform colour + void plot( // draw one + ScrollView* window); // in rainbow colours + + void plot_baseline( // draw the baseline + ScrollView* window, // window to draw in + ScrollView::Color colour) { // colour to draw + // draw it + baseline.plot(window, colour); + } +#endif // GRAPHICS_DISABLED + ROW& operator=(const ROW& source); + + private: + int32_t kerning; // inter char gap + int32_t spacing; // inter word gap + TBOX bound_box; // bounding box + float xheight; // height of line + float ascrise; // size of ascenders + float descdrop; //-size of descenders + float bodysize; // CJK character size. (equals to + // xheight+ascrise by default) + WERD_LIST words; // words + QSPLINE baseline; // baseline spline + + // These get set after blocks have been determined. + bool has_drop_cap_; + int16_t lmargin_; // Distance to left polyblock margin. + int16_t rmargin_; // Distance to right polyblock margin. + + // This gets set during paragraph analysis. + PARA* para_; // Paragraph of which this row is part. }; -ELISTIZEH (ROW) +ELISTIZEH(ROW) #endif diff --git a/src/ccstruct/otsuthr.cpp b/src/ccstruct/otsuthr.cpp index 76b2e125e3..1734b41e6c 100644 --- a/src/ccstruct/otsuthr.cpp +++ b/src/ccstruct/otsuthr.cpp @@ -24,7 +24,6 @@ #include "helpers.h" #include "openclwrapper.h" - namespace tesseract { // Computes the Otsu threshold(s) for the given image rectangle, making one @@ -66,14 +65,14 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, for (int ch = 0; ch < num_channels; ++ch) { (*thresholds)[ch] = -1; (*hi_values)[ch] = -1; - int *histogram = &histogramAllChannels[kHistogramSize * ch]; + int* histogram = &histogramAllChannels[kHistogramSize * ch]; int H; int best_omega_0; int best_t = OtsuStats(histogram, &H, &best_omega_0); if (best_omega_0 == 0 || best_omega_0 == H) { - // This channel is empty. - continue; - } + // This channel is empty. + continue; + } // To be a convincing foreground we must have a small fraction of H // or to be a convincing background we must have a large fraction of H. // In between we assume this channel contains no thresholding information. @@ -107,9 +106,9 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, int best_omega_0; int best_t = OtsuStats(histogram, &H, &best_omega_0); if (best_omega_0 == 0 || best_omega_0 == H) { - // This channel is empty. - continue; - } + // This channel is empty. + continue; + } // To be a convincing foreground we must have a small fraction of H // or to be a convincing background we must have a large fraction of H. // In between we assume this channel contains no thresholding information. @@ -148,9 +147,8 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, // single channel. Each channel is always one byte per pixel. // Histogram is always a kHistogramSize(256) element array to count // occurrences of each pixel value. -void HistogramRect(Pix* src_pix, int channel, - int left, int top, int width, int height, - int* histogram) { +void HistogramRect(Pix* src_pix, int channel, int left, int top, int width, + int height, int* histogram) { PERF_COUNT_START("HistogramRect") int num_channels = pixGetDepth(src_pix) / 8; channel = ClipToRange(channel, 0, num_channels - 1); @@ -191,11 +189,9 @@ int OtsuStats(const int* histogram, int* H_out, int* omega0_out) { for (int t = 0; t < kHistogramSize - 1; ++t) { omega_0 += histogram[t]; mu_t += t * static_cast(histogram[t]); - if (omega_0 == 0) - continue; + if (omega_0 == 0) continue; omega_1 = H - omega_0; - if (omega_1 == 0) - break; + if (omega_1 == 0) break; mu_0 = mu_t / omega_0; mu_1 = (mu_T - mu_t) / omega_1; double sig_sq_B = mu_1 - mu_0; diff --git a/src/ccstruct/otsuthr.h b/src/ccstruct/otsuthr.h index dd35d23f31..04d52001bc 100644 --- a/src/ccstruct/otsuthr.h +++ b/src/ccstruct/otsuthr.h @@ -42,9 +42,8 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, // single channel. Each channel is always one byte per pixel. // Histogram is always a kHistogramSize(256) element array to count // occurrences of each pixel value. -void HistogramRect(Pix* src_pix, int channel, - int left, int top, int width, int height, - int* histogram); +void HistogramRect(Pix* src_pix, int channel, int left, int top, int width, + int height, int* histogram); // Computes the Otsu threshold(s) for the given histogram. // Also returns H = total count in histogram, and diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp index 3db3b01c69..fae4eae62b 100644 --- a/src/ccstruct/pageres.cpp +++ b/src/ccstruct/pageres.cpp @@ -21,18 +21,21 @@ ** limitations under the License. * **********************************************************************/ -#include +#include #ifdef __UNIX__ -#include +#include #endif -#include "blamer.h" -#include "pageres.h" -#include "blobs.h" +#include "blamer.h" +#include "blobs.h" +#include "pageres.h" -ELISTIZE (BLOCK_RES) -CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES) +ELISTIZE(BLOCK_RES) +CLISTIZE(BLOCK_RES) +ELISTIZE(ROW_RES) +ELISTIZE(WERD_RES) -// Gain factor for computing thresholds that determine the ambiguity of a word. +// Gain factor for computing thresholds that determine the ambiguity of a +// word. static const double kStopperAmbiguityThresholdGain = 8.0; // Constant offset for computing thresholds that determine the ambiguity of a // word. @@ -52,7 +55,7 @@ const double kMaxWordGapRatio = 2.0; // TODO(rays) This is horrible. Replace with an enhance params training model. static double StopperAmbigThreshold(double f1, double f2) { return (f2 - f1) * kStopperAmbiguityThresholdGain - - kStopperAmbiguityThresholdOffset; + kStopperAmbiguityThresholdOffset; } /************************************************************************* @@ -60,17 +63,14 @@ static double StopperAmbigThreshold(double f1, double f2) { * * Constructor for page results *************************************************************************/ -PAGE_RES::PAGE_RES( - bool merge_similar_words, - BLOCK_LIST *the_block_list, - WERD_CHOICE **prev_word_best_choice_ptr) { +PAGE_RES::PAGE_RES(bool merge_similar_words, BLOCK_LIST* the_block_list, + WERD_CHOICE** prev_word_best_choice_ptr) { Init(); BLOCK_IT block_it(the_block_list); BLOCK_RES_IT block_res_it(&block_res_list); - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block_res_it.add_to_end(new BLOCK_RES(merge_similar_words, - block_it.data())); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block_res_it.add_to_end( + new BLOCK_RES(merge_similar_words, block_it.data())); } prev_word_best_choice = prev_word_best_choice_ptr; } @@ -81,13 +81,13 @@ PAGE_RES::PAGE_RES( * Constructor for BLOCK results *************************************************************************/ -BLOCK_RES::BLOCK_RES(bool merge_similar_words, BLOCK *the_block) { - ROW_IT row_it (the_block->row_list ()); +BLOCK_RES::BLOCK_RES(bool merge_similar_words, BLOCK* the_block) { + ROW_IT row_it(the_block->row_list()); ROW_RES_IT row_res_it(&row_res_list); char_count = 0; rej_count = 0; - font_class = -1; //not assigned + font_class = -1; // not assigned x_height = -1.0; font_assigned = false; bold = false; @@ -107,11 +107,11 @@ BLOCK_RES::BLOCK_RES(bool merge_similar_words, BLOCK *the_block) { * Constructor for ROW results *************************************************************************/ -ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) { +ROW_RES::ROW_RES(bool merge_similar_words, ROW* the_row) { WERD_IT word_it(the_row->word_list()); WERD_RES_IT word_res_it(&word_res_list); - WERD_RES *combo = nullptr; // current combination of fuzzies - WERD *copy_word; + WERD_RES* combo = nullptr; // current combination of fuzzies + WERD* copy_word; char_count = 0; rej_count = 0; @@ -120,8 +120,8 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) { row = the_row; bool add_next_word = false; TBOX union_box; - float line_height = the_row->x_height() + the_row->ascenders() - - the_row->descenders(); + float line_height = + the_row->x_height() + the_row->ascenders() - the_row->descenders(); for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { WERD_RES* word_res = new WERD_RES(word_it.data()); word_res->x_height = the_row->x_height(); @@ -133,7 +133,7 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) { } else if (merge_similar_words) { union_box = word_res->word->bounding_box(); add_next_word = !word_res->word->flag(W_REP_CHAR) && - union_box.height() <= line_height * kMaxWordSizeRatio; + union_box.height() <= line_height * kMaxWordSizeRatio; word_res->odd_size = !add_next_word; } WERD* next_word = word_it.data_relative(1); @@ -174,15 +174,14 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) { } } - -WERD_RES& WERD_RES::operator=(const WERD_RES & source) { +WERD_RES& WERD_RES::operator=(const WERD_RES& source) { this->ELIST_LINK::operator=(source); Clear(); if (source.combination) { word = new WERD; - *word = *(source.word); // deep copy + *word = *(source.word); // deep copy } else { - word = source.word; // pt to same word + word = source.word; // pt to same word } if (source.bln_boxes != nullptr) bln_boxes = new tesseract::BoxWord(*source.bln_boxes); @@ -206,7 +205,7 @@ WERD_RES& WERD_RES::operator=(const WERD_RES & source) { WERD_CHOICE_IT wc_it(const_cast(&source.best_choices)); WERD_CHOICE_IT wc_dest_it(&best_choices); for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) { - const WERD_CHOICE *choice = wc_it.data(); + const WERD_CHOICE* choice = wc_it.data(); wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice)); } if (!wc_dest_it.empty()) { @@ -231,7 +230,7 @@ WERD_RES& WERD_RES::operator=(const WERD_RES & source) { part_of_combo = source.part_of_combo; CopySimpleFields(source); if (source.blamer_bundle != nullptr) { - blamer_bundle = new BlamerBundle(*(source.blamer_bundle)); + blamer_bundle = new BlamerBundle(*(source.blamer_bundle)); } return *this; } @@ -293,12 +292,10 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES& source) { // Returns false if the word is empty and sets up fake results. bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in, tesseract::Tesseract* tess, Pix* pix, - int norm_mode, - const TBOX* norm_box, - bool numeric_mode, - bool use_body_size, - bool allow_detailed_fx, - ROW *row, const BLOCK* block) { + int norm_mode, const TBOX* norm_box, + bool numeric_mode, bool use_body_size, + bool allow_detailed_fx, ROW* row, + const BLOCK* block) { tesseract::OcrEngineMode norm_mode_hint = static_cast(norm_mode); tesseract = tess; @@ -315,8 +312,10 @@ bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in, ClearResults(); SetupWordScript(unicharset_in); chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word); - float word_xheight = use_body_size && row != nullptr && row->body_size() > 0.0f - ? row->body_size() : x_height; + float word_xheight = + use_body_size && row != nullptr && row->body_size() > 0.0f + ? row->body_size() + : x_height; chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), word_xheight, baseline_shift, numeric_mode, norm_mode_hint, norm_box, &denorm); @@ -332,7 +331,7 @@ bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in, // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty // accumulators from a made chopped word. We presume the fields are already // empty. -void WERD_RES::SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in) { +void WERD_RES::SetupBasicsFromChoppedWord(const UNICHARSET& unicharset_in) { bln_boxes = tesseract::BoxWord::CopyFromNormalized(chopped_word); start_seam_list(chopped_word, &seam_array); SetupBlobWidthsAndGaps(); @@ -361,7 +360,7 @@ void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) { fake_choices[blob_id++] = new BLOB_CHOICE; } FakeClassifyWord(blob_count, fake_choices); - delete [] fake_choices; + delete[] fake_choices; } else { WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in); word->make_bad(); @@ -394,12 +393,12 @@ void WERD_RES::SetupBlobWidthsAndGaps() { blob_gaps.truncate(0); int num_blobs = chopped_word->NumBlobs(); for (int b = 0; b < num_blobs; ++b) { - TBLOB *blob = chopped_word->blobs[b]; + TBLOB* blob = chopped_word->blobs[b]; TBOX box = blob->bounding_box(); blob_widths.push_back(box.width()); if (b + 1 < num_blobs) { - blob_gaps.push_back( - chopped_word->blobs[b + 1]->bounding_box().left() - box.right()); + blob_gaps.push_back(chopped_word->blobs[b + 1]->bounding_box().left() - + box.right()); } } } @@ -415,8 +414,7 @@ void WERD_RES::InsertSeam(int blob_number, SEAM* seam) { // Expand the ratings matrix. ratings = ratings->ConsumeAndMakeBigger(blob_number); // Fix all the segmentation states. - if (raw_choice != nullptr) - raw_choice->UpdateStateForSplit(blob_number); + if (raw_choice != nullptr) raw_choice->UpdateStateForSplit(blob_number); WERD_CHOICE_IT wc_it(&best_choices); for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) { WERD_CHOICE* choice = wc_it.data(); @@ -433,8 +431,7 @@ bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan(float threshold) const { WERD_CHOICE_IT wc_it(const_cast(&best_choices)); for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) { WERD_CHOICE* choice = wc_it.data(); - if (choice->adjust_factor() <= threshold) - return false; + if (choice->adjust_factor() <= threshold) return false; } return true; } @@ -470,11 +467,10 @@ bool WERD_RES::StatesAllValid() { // Prints a list of words found if debug is true or the word result matches // the word_to_debug. void WERD_RES::DebugWordChoices(bool debug, const char* word_to_debug) { - if (debug || - (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr && - best_choice->unichar_string() == STRING(word_to_debug))) { - if (raw_choice != nullptr) - raw_choice->print("\nBest Raw Choice"); + if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' && + best_choice != nullptr && + best_choice->unichar_string() == STRING(word_to_debug))) { + if (raw_choice != nullptr) raw_choice->print("\nBest Raw Choice"); WERD_CHOICE_IT it(&best_choices); int index = 0; @@ -489,8 +485,8 @@ void WERD_RES::DebugWordChoices(bool debug, const char* word_to_debug) { // Prints the top choice along with the accepted/done flags. void WERD_RES::DebugTopChoice(const char* msg) const { - tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", - tess_accepted, tess_would_adapt, done); + tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted, + tess_would_adapt, done); if (best_choice == nullptr) tprintf("\n"); else @@ -503,11 +499,9 @@ void WERD_RES::DebugTopChoice(const char* msg) const { // re-ranker, in place of this heuristic that is based on the previous // adjustment factor. void WERD_RES::FilterWordChoices(int debug_level) { - if (best_choice == nullptr || best_choices.singleton()) - return; + if (best_choice == nullptr || best_choices.singleton()) return; - if (debug_level >= 2) - best_choice->print("\nFiltering against best choice"); + if (debug_level >= 2) best_choice->print("\nFiltering against best choice"); WERD_CHOICE_IT it(&best_choices); int index = 0; for (it.forward(); !it.at_first(); it.forward(), ++index) { @@ -551,8 +545,7 @@ void WERD_RES::FilterWordChoices(int debug_level) { } void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, - float min_rating, - float max_rating, + float min_rating, float max_rating, float rating_margin, float* thresholds) { int chunk = 0; @@ -569,8 +562,7 @@ void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, ++raw_blob; end_raw_chunk += raw_choice->state(raw_blob); } - if (best_choice->unichar_id(i) != - raw_choice->unichar_id(raw_blob)) { + if (best_choice->unichar_id(i) != raw_choice->unichar_id(raw_blob)) { avg_rating += raw_choice->certainty(raw_blob); ++num_error_chunks; } @@ -584,10 +576,8 @@ void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, *thresholds = max_rating; } - if (*thresholds > max_rating) - *thresholds = max_rating; - if (*thresholds < min_rating) - *thresholds = min_rating; + if (*thresholds > max_rating) *thresholds = max_rating; + if (*thresholds < min_rating) *thresholds = min_rating; } } @@ -617,9 +607,8 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, // results according to the order in which words are found, which is an // undesirable behavior. It would be better to keep all the choices and // prune them later when more information is available. - float max_certainty_delta = - StopperAmbigThreshold(best_choice->adjust_factor(), - word_choice->adjust_factor()); + float max_certainty_delta = StopperAmbigThreshold( + best_choice->adjust_factor(), word_choice->adjust_factor()); if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) max_certainty_delta = -kStopperAmbiguityThresholdOffset; if (word_choice->certainty() - best_choice->certainty() < @@ -627,11 +616,12 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, if (debug) { STRING bad_string; word_choice->string_and_lengths(&bad_string, nullptr); - tprintf("Discarding choice \"%s\" with an overly low certainty" - " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n", - bad_string.string(), word_choice->certainty(), - best_choice->certainty(), - max_certainty_delta + best_choice->certainty()); + tprintf( + "Discarding choice \"%s\" with an overly low certainty" + " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n", + bad_string.string(), word_choice->certainty(), + best_choice->certainty(), + max_certainty_delta + best_choice->certainty()); } delete word_choice; return false; @@ -670,8 +660,7 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, } } else { ++num_choices; - if (num_choices > max_num_choices) - delete it.extract(); + if (num_choices > max_num_choices) delete it.extract(); } it.forward(); } while (!it.at_first()); @@ -679,8 +668,7 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, if (!inserted && num_choices < max_num_choices) { it.add_to_end(word_choice); inserted = true; - if (num_choices == 0) - best_choice = word_choice; // This is the new best. + if (num_choices == 0) best_choice = word_choice; // This is the new best. } if (debug) { if (inserted) @@ -696,10 +684,10 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, return true; } - // Simple helper moves the ownership of the pointer data from src to dest, // first deleting anything in dest, and nulling out src afterwards. -template static void MovePointerData(T** dest, T**src) { +template +static void MovePointerData(T** dest, T** src) { delete *dest; *dest = *src; *src = nullptr; @@ -723,15 +711,13 @@ int WERD_RES::GetBlobsWidth(int start_blob, int last_blob) { int result = 0; for (int b = start_blob; b <= last_blob; ++b) { result += blob_widths[b]; - if (b < last_blob) - result += blob_gaps[b]; + if (b < last_blob) result += blob_gaps[b]; } return result; } // Returns the width of a gap between the specified blob and the next one. int WERD_RES::GetBlobsGap(int blob_index) { - if (blob_index < 0 || blob_index >= blob_gaps.size()) - return 0; + if (blob_index < 0 || blob_index >= blob_gaps.size()) return 0; return blob_gaps[blob_index]; } @@ -801,8 +787,7 @@ void WERD_RES::RebuildBestState() { ASSERT_HOST(best_choice != nullptr); delete rebuild_word; rebuild_word = new TWERD; - if (seam_array.empty()) - start_seam_list(chopped_word, &seam_array); + if (seam_array.empty()) start_seam_list(chopped_word, &seam_array); best_state.truncate(0); int start = 0; for (int i = 0; i < best_choice->length(); ++i) { @@ -934,10 +919,10 @@ bool WERD_RES::ConditionalBlobMerge( bool modified = false; for (int i = 0; i + 1 < best_choice->length(); ++i) { UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i), - best_choice->unichar_id(i+1)); + best_choice->unichar_id(i + 1)); if (new_id != INVALID_UNICHAR_ID && - (box_cb == nullptr || box_cb->Run(box_word->BlobBox(i), - box_word->BlobBox(i + 1)))) { + (box_cb == nullptr || + box_cb->Run(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) { // Raw choice should not be fixed. best_choice->set_unichar_id(new_id, i); modified = true; @@ -982,24 +967,21 @@ void WERD_RES::MergeAdjacentBlobs(int index) { // Return true if the next character in the string (given the UTF8 length in // bytes) is a quote character. static int is_simple_quote(const char* signed_str, int length) { - const unsigned char* str = - reinterpret_cast(signed_str); + const unsigned char* str = reinterpret_cast(signed_str); // Standard 1 byte quotes. return (length == 1 && (*str == '\'' || *str == '`')) || - // UTF-8 3 bytes curved quotes. - (length == 3 && ((*str == 0xe2 && - *(str + 1) == 0x80 && - *(str + 2) == 0x98) || - (*str == 0xe2 && - *(str + 1) == 0x80 && - *(str + 2) == 0x99))); + // UTF-8 3 bytes curved quotes. + (length == 3 && + ((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) || + (*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99))); } // Callback helper for fix_quotes returns a double quote if both // arguments are quote, otherwise INVALID_UNICHAR_ID. -UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) { - const char *ch = uch_set->id_to_unichar(id1); - const char *next_ch = uch_set->id_to_unichar(id2); +UNICHAR_ID +WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) { + const char* ch = uch_set->id_to_unichar(id1); + const char* next_ch = uch_set->id_to_unichar(id2); if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) return uch_set->unichar_to_id("\""); @@ -1012,18 +994,18 @@ void WERD_RES::fix_quotes() { !uch_set->get_enabled(uch_set->unichar_to_id("\""))) return; // Don't create it if it is disallowed. - ConditionalBlobMerge( - NewPermanentTessCallback(this, &WERD_RES::BothQuotes), - nullptr); + ConditionalBlobMerge(NewPermanentTessCallback(this, &WERD_RES::BothQuotes), + nullptr); } // Callback helper for fix_hyphens returns UNICHAR_ID of - if both // arguments are hyphen, otherwise INVALID_UNICHAR_ID. -UNICHAR_ID WERD_RES::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) { - const char *ch = uch_set->id_to_unichar(id1); - const char *next_ch = uch_set->id_to_unichar(id2); - if (strlen(ch) == 1 && strlen(next_ch) == 1 && - (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~')) +UNICHAR_ID +WERD_RES::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) { + const char* ch = uch_set->id_to_unichar(id1); + const char* next_ch = uch_set->id_to_unichar(id2); + if (strlen(ch) == 1 && strlen(next_ch) == 1 && (*ch == '-' || *ch == '~') && + (*next_ch == '-' || *next_ch == '~')) return uch_set->unichar_to_id("-"); return INVALID_UNICHAR_ID; } @@ -1048,7 +1030,8 @@ void WERD_RES::fix_hyphens() { // Callback helper for merge_tess_fails returns a space if both // arguments are space, otherwise INVALID_UNICHAR_ID. -UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) { +UNICHAR_ID +WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) { if (id1 == id2 && id1 == uch_set->unichar_to_id(" ")) return id1; else @@ -1058,7 +1041,7 @@ UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) { // Change pairs of tess failures to a single one void WERD_RES::merge_tess_fails() { if (ConditionalBlobMerge( - NewPermanentTessCallback(this, &WERD_RES::BothSpaces), nullptr)) { + NewPermanentTessCallback(this, &WERD_RES::BothSpaces), nullptr)) { int len = best_choice->length(); ASSERT_HOST(reject_map.length() == len); ASSERT_HOST(box_word->length() == len); @@ -1078,10 +1061,7 @@ bool WERD_RES::PiecesAllNatural(int start, int count) const { return true; } - -WERD_RES::~WERD_RES () { - Clear(); -} +WERD_RES::~WERD_RES() { Clear(); } void WERD_RES::InitNonPointers() { tess_failed = false; @@ -1177,23 +1157,20 @@ void WERD_RES::ClearRatings() { } } - -bool PAGE_RES_IT::operator ==(const PAGE_RES_IT &other) const { - return word_res == other.word_res && - row_res == other.row_res && - block_res == other.block_res; +bool PAGE_RES_IT::operator==(const PAGE_RES_IT& other) const { + return word_res == other.word_res && row_res == other.row_res && + block_res == other.block_res; } -int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const { +int PAGE_RES_IT::cmp(const PAGE_RES_IT& other) const { ASSERT_HOST(page_res == other.page_res); if (other.block_res == nullptr) { // other points to the end of the page. - if (block_res == nullptr) - return 0; + if (block_res == nullptr) return 0; return -1; } if (block_res == nullptr) { - return 1; // we point to the end of the page. + return 1; // we point to the end of the page. } if (block_res == other.block_res) { if (other.row_res == nullptr || row_res == nullptr) { @@ -1235,8 +1212,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const { // We point to different blocks. BLOCK_RES_IT block_res_it(&page_res->block_res_list); - for (block_res_it.mark_cycle_pt(); - !block_res_it.cycled_list(); block_res_it.forward()) { + for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list(); + block_res_it.forward()) { if (block_res_it.data() == block_res) { return -1; } else if (block_res_it.data() == other.block_res) { @@ -1262,8 +1239,7 @@ WERD_RES* PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES& clone_res, WERD_RES_IT wr_it(&row()->word_res_list); for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { WERD_RES* word = wr_it.data(); - if (word == word_res) - break; + if (word == word_res) break; } ASSERT_HOST(!wr_it.cycled_list()); wr_it.add_before_then_move(new_res); @@ -1294,8 +1270,7 @@ static void ComputeBlobEnds(const WERD_RES& word, C_BLOB_LIST* next_word_blobs, // boundaries between them. int blob_end = INT32_MAX; if (!blob_it.at_first() || next_word_blobs != nullptr) { - if (blob_it.at_first()) - blob_it.set_to_list(next_word_blobs); + if (blob_it.at_first()) blob_it.set_to_list(next_word_blobs); blob_end = (blob_box.right() + blob_it.data()->bounding_box().left()) / 2; } blob_ends->push_back(blob_end); @@ -1330,8 +1305,7 @@ void PAGE_RES_IT::ReplaceCurrentWord( if (!input_word->combination) { for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { WERD* word = w_it.data(); - if (word == input_word->word) - break; + if (word == input_word->word) break; } // w_it is now set to the input_word's word. ASSERT_HOST(!w_it.cycled_list()); @@ -1340,8 +1314,7 @@ void PAGE_RES_IT::ReplaceCurrentWord( WERD_RES_IT wr_it(&row()->word_res_list); for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { WERD_RES* word = wr_it.data(); - if (word == input_word) - break; + if (word == input_word) break; } ASSERT_HOST(!wr_it.cycled_list()); // Since we only have an estimate of the bounds between blobs, use the blob @@ -1383,8 +1356,7 @@ void PAGE_RES_IT::ReplaceCurrentWord( // to clip. if (i > 0 && blob_box.left() < blob_ends[i - 1]) blob_box.set_left(blob_ends[i - 1]); - if (blob_box.right() > end_x) - blob_box.set_right(end_x); + if (blob_box.right() > end_x) blob_box.set_right(end_x); box_word->InsertBox(i, blob_box); } // Fix empty boxes. If a very joined blob sits over multiple characters, @@ -1426,8 +1398,7 @@ void PAGE_RES_IT::ReplaceCurrentWord( // Delete the current word, which has been replaced. We could just call // DeleteCurrentWord, but that would iterate both lists again, and we know // we are already in the right place. - if (!input_word->combination) - delete w_it.extract(); + if (!input_word->combination) delete w_it.extract(); delete wr_it.extract(); ResetWordIterator(); } @@ -1493,7 +1464,7 @@ void PAGE_RES_IT::MakeCurrentWordFuzzy() { * Set things up at the start of the page *************************************************************************/ -WERD_RES *PAGE_RES_IT::start_page(bool empty_ok) { +WERD_RES* PAGE_RES_IT::start_page(bool empty_ok) { block_res_it.set_to_list(&page_res->block_res_list); block_res_it.mark_cycle_pt(); prev_block_res = nullptr; @@ -1548,17 +1519,16 @@ void PAGE_RES_IT::ResetWordIterator() { * * Find the next word on the page. If empty_ok is true, then non-text blocks * and text blocks with no text are visited as if they contain a single - * imaginary word in a single imaginary row. (word() and row() both return nullptr - * in such a block and the return value is nullptr.) - * If empty_ok is false, the old behaviour is maintained. Each real word - * is visited and empty and non-text blocks and rows are skipped. - * new_block is used to initialize the iterators for a new block. - * The iterator maintains pointers to block, row and word for the previous, - * current and next words. These are correct, regardless of block/row - * boundaries. nullptr values denote start and end of the page. + * imaginary word in a single imaginary row. (word() and row() both return + *nullptr in such a block and the return value is nullptr.) If empty_ok is + *false, the old behaviour is maintained. Each real word is visited and empty + *and non-text blocks and rows are skipped. new_block is used to initialize the + *iterators for a new block. The iterator maintains pointers to block, row and + *word for the previous, current and next words. These are correct, regardless + *of block/row boundaries. nullptr values denote start and end of the page. *************************************************************************/ -WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) { +WERD_RES* PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) { bool new_row = false; prev_block_res = block_res; @@ -1606,11 +1576,12 @@ WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) { block_res_it.forward(); new_block = true; } - foundword: +foundword: // Update prev_word_best_choice pointer. if (page_res != nullptr && page_res->prev_word_best_choice != nullptr) { - *page_res->prev_word_best_choice = - (new_block || prev_word_res == nullptr) ? nullptr : prev_word_res->best_choice; + *page_res->prev_word_best_choice = (new_block || prev_word_res == nullptr) + ? nullptr + : prev_word_res->best_choice; } return word_res; } @@ -1620,8 +1591,8 @@ WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) { * * Move to the beginning (leftmost word) of the current row. *************************************************************************/ -WERD_RES *PAGE_RES_IT::restart_row() { - ROW_RES *row = this->row(); +WERD_RES* PAGE_RES_IT::restart_row() { + ROW_RES* row = this->row(); if (!row) return nullptr; for (restart_page(); this->row() != row; forward()) { // pass @@ -1635,7 +1606,7 @@ WERD_RES *PAGE_RES_IT::restart_row() { * Move to the beginning of the next paragraph, allowing empty blocks. *************************************************************************/ -WERD_RES *PAGE_RES_IT::forward_paragraph() { +WERD_RES* PAGE_RES_IT::forward_paragraph() { while (block_res == next_block_res && (next_row_res != nullptr && next_row_res->row != nullptr && row_res->row->para() == next_row_res->row->para())) { @@ -1650,7 +1621,7 @@ WERD_RES *PAGE_RES_IT::forward_paragraph() { * Move to the beginning of the next block, allowing empty blocks. *************************************************************************/ -WERD_RES *PAGE_RES_IT::forward_block() { +WERD_RES* PAGE_RES_IT::forward_block() { while (block_res == next_block_res) { internal_forward(false, true); } @@ -1661,12 +1632,12 @@ void PAGE_RES_IT::rej_stat_word() { int16_t chars_in_word; int16_t rejects_in_word = 0; - chars_in_word = word_res->reject_map.length (); + chars_in_word = word_res->reject_map.length(); page_res->char_count += chars_in_word; block_res->char_count += chars_in_word; row_res->char_count += chars_in_word; - rejects_in_word = word_res->reject_map.reject_count (); + rejects_in_word = word_res->reject_map.reject_count(); page_res->rej_count += rejects_in_word; block_res->rej_count += rejects_in_word; diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 98e2e4e571..bf06223a4f 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -16,8 +16,8 @@ ** limitations under the License. * **********************************************************************/ -#ifndef PAGERES_H -#define PAGERES_H +#ifndef PAGERES_H +#define PAGERES_H #include "blamer.h" #include "blobs.h" @@ -36,26 +36,24 @@ namespace tesseract { struct FontInfo; class Tesseract; -} +} // namespace tesseract using tesseract::FontInfo; /* Forward declarations */ class BLOCK_RES; -ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES) -class -ROW_RES; +ELISTIZEH(BLOCK_RES) CLISTIZEH(BLOCK_RES) class ROW_RES; -ELISTIZEH (ROW_RES) +ELISTIZEH(ROW_RES) class WERD_RES; -ELISTIZEH (WERD_RES) +ELISTIZEH(WERD_RES) /************************************************************************* * PAGE_RES - Page results *************************************************************************/ -class PAGE_RES { // page result +class PAGE_RES { // page result public: int32_t char_count; int32_t rej_count; @@ -63,7 +61,7 @@ class PAGE_RES { // page result bool rejected; // Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to // the next word. This pointer is not owned by PAGE_RES class. - WERD_CHOICE **prev_word_best_choice; + WERD_CHOICE** prev_word_best_choice; // Sums of blame reasons computed by the blamer. GenericVector blame_reasons; // Debug information about all the misadaptions on this page. @@ -83,45 +81,45 @@ class PAGE_RES { // page result PAGE_RES() { Init(); } // empty constructor PAGE_RES(bool merge_similar_words, - BLOCK_LIST *block_list, // real blocks - WERD_CHOICE **prev_word_best_choice_ptr); + BLOCK_LIST* block_list, // real blocks + WERD_CHOICE** prev_word_best_choice_ptr); - ~PAGE_RES () = default; + ~PAGE_RES() = default; }; /************************************************************************* * BLOCK_RES - Block results *************************************************************************/ -class BLOCK_RES:public ELIST_LINK { +class BLOCK_RES : public ELIST_LINK { public: - BLOCK * block; // real block - int32_t char_count; // chars in block - int32_t rej_count; // rejected chars - int16_t font_class; // + BLOCK* block; // real block + int32_t char_count; // chars in block + int32_t rej_count; // rejected chars + int16_t font_class; // int16_t row_count; float x_height; - bool font_assigned; // block already + bool font_assigned; // block already // processed - bool bold; // all bold - bool italic; // all italic + bool bold; // all bold + bool italic; // all italic ROW_RES_LIST row_res_list; BLOCK_RES() = default; - BLOCK_RES(bool merge_similar_words, BLOCK *the_block); // real block + BLOCK_RES(bool merge_similar_words, BLOCK* the_block); // real block - ~BLOCK_RES () = default; + ~BLOCK_RES() = default; }; /************************************************************************* * ROW_RES - Row results *************************************************************************/ -class ROW_RES:public ELIST_LINK { +class ROW_RES : public ELIST_LINK { public: - ROW * row; // real row + ROW* row; // real row int32_t char_count; // chars in block int32_t rej_count; // rejected chars int32_t whole_word_rej_count; // rejs in total rej wds @@ -129,7 +127,7 @@ class ROW_RES:public ELIST_LINK { ROW_RES() = default; - ROW_RES(bool merge_similar_words, ROW *the_row); // real row + ROW_RES(bool merge_similar_words, ROW* the_row); // real row ~ROW_RES() = default; }; @@ -137,13 +135,7 @@ class ROW_RES:public ELIST_LINK { /************************************************************************* * WERD_RES - Word results *************************************************************************/ -enum CRUNCH_MODE -{ - CR_NONE, - CR_KEEP_SPACE, - CR_LOOSE_SPACE, - CR_DELETE -}; +enum CRUNCH_MODE { CR_NONE, CR_KEEP_SPACE, CR_LOOSE_SPACE, CR_DELETE }; // WERD_RES is a collection of publicly accessible members that gathers // information about a word result. @@ -167,7 +159,7 @@ class WERD_RES : public ELIST_LINK { // The word is the input C_BLOBs in the rotated pixel space. // word is NOT owned by the WERD_RES unless combination is true. // All the other word pointers ARE owned by the WERD_RES. - WERD* word; // Input C_BLOB word. + WERD* word; // Input C_BLOB word. // -------------SETUP BY SetupFor*Recognition---READONLY-INPUT------------ @@ -182,7 +174,7 @@ class WERD_RES : public ELIST_LINK { // The denorm provides the transformation to get back to the rotated image // coords from the chopped_word/rebuild_word BLN coords, but each blob also // has its own denorm. - DENORM denorm; // For use on chopped_word. + DENORM denorm; // For use on chopped_word. // Unicharset used by the classifier output in best_choice and raw_choice. const UNICHARSET* uch_set; // For converting back to utf8. @@ -193,7 +185,7 @@ class WERD_RES : public ELIST_LINK { // The chopped_word is also in BLN space, and represents the fully chopped // character fragments that make up the word. // The length of chopped_word matches length of seam_array + 1 (if set). - TWERD* chopped_word; // BLN chopped fragments output. + TWERD* chopped_word; // BLN chopped fragments output. // Vector of SEAM* holding chopping points matching chopped_word. GenericVector seam_array; // Widths of blobs in chopped_word. @@ -207,22 +199,22 @@ class WERD_RES : public ELIST_LINK { // in chopped_word. The state_ members of best_choice, raw_choice and // best_choices all correspond to this ratings matrix and allow extraction // of the blob choices for any given WERD_CHOICE. - MATRIX* ratings; // Owned pointer. + MATRIX* ratings; // Owned pointer. // Pointer to the first WERD_CHOICE in best_choices. This is the result that // will be output from Tesseract. Note that this is now a borrowed pointer // and should NOT be deleted. - WERD_CHOICE* best_choice; // Borrowed pointer. + WERD_CHOICE* best_choice; // Borrowed pointer. // The best raw_choice found during segmentation search. Differs from the // best_choice by being the best result according to just the character // classifier, not taking any language model information into account. // Unlike best_choice, the pointer IS owned by this WERD_RES. - WERD_CHOICE* raw_choice; // Owned pointer. + WERD_CHOICE* raw_choice; // Owned pointer. // Alternative results found during chopping/segmentation search stages. // Note that being an ELIST, best_choices owns the WERD_CHOICEs. WERD_CHOICE_LIST best_choices; // Truth bounding boxes, text and incorrect choice reason. - BlamerBundle *blamer_bundle; + BlamerBundle* blamer_bundle; // --------------OUTPUT FROM RECOGNITION------------------------------- // --------------Not all fields are necessarily set.------------------- @@ -236,13 +228,13 @@ class WERD_RES : public ELIST_LINK { // The rebuild_word is also in BLN space, but represents the final best // segmentation of the word. Its length is therefore the same as box_word. - TWERD* rebuild_word; // BLN best segmented word. + TWERD* rebuild_word; // BLN best segmented word. // The box_word is in the original image coordinate space. It is the // bounding boxes of the rebuild_word, after denormalization. // The length of box_word matches rebuild_word, best_state (if set) and // correct_text (if set), as well as best_choice and represents the // number of classified units in the output. - tesseract::BoxWord* box_word; // Denormalized output boxes. + tesseract::BoxWord* box_word; // Denormalized output boxes. // The best_state stores the relationship between chopped_word and // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i] // adjacent blobs in chopped_word. The seams in seam_array are hidden @@ -262,8 +254,8 @@ class WERD_RES : public ELIST_LINK { // Less-well documented members. // TODO(rays) Add more documentation here. - WERD_CHOICE *ep_choice; // ep text TODO(rays) delete this. - REJMAP reject_map; // best_choice rejects + WERD_CHOICE* ep_choice; // ep text TODO(rays) delete this. + REJMAP reject_map; // best_choice rejects bool tess_failed; /* If tess_failed is TRUE, one of the following tests failed when Tess @@ -272,24 +264,24 @@ class WERD_RES : public ELIST_LINK { - The best_choice string contained ALL blanks; - The best_choice string was zero length */ - bool tess_accepted; // Tess thinks its ok? - bool tess_would_adapt; // Tess would adapt? - bool done; // ready for output? - bool small_caps; // word appears to be small caps - bool odd_size; // word is bigger than line or leader dots. + bool tess_accepted; // Tess thinks its ok? + bool tess_would_adapt; // Tess would adapt? + bool done; // ready for output? + bool small_caps; // word appears to be small caps + bool odd_size; // word is bigger than line or leader dots. int8_t italic; int8_t bold; // The fontinfos are pointers to data owned by the classifier. const FontInfo* fontinfo; const FontInfo* fontinfo2; - int8_t fontinfo_id_count; // number of votes - int8_t fontinfo_id2_count; // number of votes + int8_t fontinfo_id_count; // number of votes + int8_t fontinfo_id2_count; // number of votes bool guessed_x_ht; bool guessed_caps_ht; CRUNCH_MODE unlv_crunch_mode; - float x_height; // post match estimate - float caps_height; // post match estimate - float baseline_shift; // post match estimate. + float x_height; // post match estimate + float caps_height; // post match estimate + float baseline_shift; // post match estimate. // Certainty score for the spaces either side of this word (LSTM mode). // MIN this value with the actual word certainty. float space_certainty; @@ -310,15 +302,15 @@ class WERD_RES : public ELIST_LINK { Combination words are FOLLOWED by the sequence of part_of_combo words which they combine. */ - bool combination; //of two fuzzy gap wds - bool part_of_combo; //part of a combo - bool reject_spaces; //Reject spacing? + bool combination; // of two fuzzy gap wds + bool part_of_combo; // part of a combo + bool reject_spaces; // Reject spacing? WERD_RES() { InitNonPointers(); InitPointers(); } - WERD_RES(WERD *the_word) { + WERD_RES(WERD* the_word) { InitNonPointers(); InitPointers(); word = the_word; @@ -327,7 +319,7 @@ class WERD_RES : public ELIST_LINK { // To get that use deep_copy below. WERD_RES(const WERD_RES& source) : ELIST_LINK(source) { InitPointers(); - *this = source; // see operator= + *this = source; // see operator= } ~WERD_RES(); @@ -351,8 +343,7 @@ class WERD_RES : public ELIST_LINK { } // Returns the UTF-8 string for the given blob index in the raw_choice word. const char* RawUTF8(int blob_index) const { - if (blob_index < 0 || blob_index >= raw_choice->length()) - return nullptr; + if (blob_index < 0 || blob_index >= raw_choice->length()) return nullptr; UNICHAR_ID id = raw_choice->unichar_id(blob_index); if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) return nullptr; @@ -360,22 +351,21 @@ class WERD_RES : public ELIST_LINK { } UNICHARSET::Direction SymbolDirection(int blob_index) const { - if (best_choice == nullptr || - blob_index >= best_choice->length() || + if (best_choice == nullptr || blob_index >= best_choice->length() || blob_index < 0) return UNICHARSET::U_OTHER_NEUTRAL; return uch_set->get_direction(best_choice->unichar_id(blob_index)); } bool AnyRtlCharsInWord() const { - if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) + if (uch_set == nullptr || best_choice == nullptr || + best_choice->length() < 1) return false; for (int id = 0; id < best_choice->length(); id++) { int unichar_id = best_choice->unichar_id(id); if (unichar_id < 0 || unichar_id >= uch_set->size()) continue; // Ignore illegal chars. - UNICHARSET::Direction dir = - uch_set->get_direction(unichar_id); + UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || dir == UNICHARSET::U_ARABIC_NUMBER) @@ -385,15 +375,15 @@ class WERD_RES : public ELIST_LINK { } bool AnyLtrCharsInWord() const { - if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) + if (uch_set == nullptr || best_choice == nullptr || + best_choice->length() < 1) return false; for (int id = 0; id < best_choice->length(); id++) { int unichar_id = best_choice->unichar_id(id); if (unichar_id < 0 || unichar_id >= uch_set->size()) continue; // Ignore illegal chars. UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); - if (dir == UNICHARSET::U_LEFT_TO_RIGHT) - return true; + if (dir == UNICHARSET::U_LEFT_TO_RIGHT) return true; } return false; } @@ -414,7 +404,7 @@ class WERD_RES : public ELIST_LINK { // Deep copies everything except the ratings MATRIX. // To get that use deep_copy below. - WERD_RES& operator=(const WERD_RES& source); //from this + WERD_RES& operator=(const WERD_RES& source); // from this void CopySimpleFields(const WERD_RES& source); @@ -441,15 +431,15 @@ class WERD_RES : public ELIST_LINK { // Returns false if the word is empty and sets up fake results. bool SetupForRecognition(const UNICHARSET& unicharset_in, tesseract::Tesseract* tesseract, Pix* pix, - int norm_mode, - const TBOX* norm_box, bool numeric_mode, - bool use_body_size, bool allow_detailed_fx, - ROW *row, const BLOCK* block); + int norm_mode, const TBOX* norm_box, + bool numeric_mode, bool use_body_size, + bool allow_detailed_fx, ROW* row, + const BLOCK* block); // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty // accumulators from a made chopped word. We presume the fields are already // empty. - void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in); + void SetupBasicsFromChoppedWord(const UNICHARSET& unicharset_in); // Sets up the members used in recognition for an empty recognition result: // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. @@ -507,10 +497,8 @@ class WERD_RES : public ELIST_LINK { // min_rating limits how tight to make a template. // max_rating limits how loose to make a template. // rating_margin denotes the amount of margin to put in template. - void ComputeAdaptionThresholds(float certainty_scale, - float min_rating, - float max_rating, - float rating_margin, + void ComputeAdaptionThresholds(float certainty_scale, float min_rating, + float max_rating, float rating_margin, float* thresholds); // Saves a copy of the word_choice if it has the best unadjusted rating. @@ -629,14 +617,13 @@ class WERD_RES : public ELIST_LINK { WERD_RES* result = new WERD_RES(*src); // That didn't copy the ratings, but we want a copy if there is one to // begin with. - if (src->ratings != nullptr) - result->ratings = src->ratings->DeepCopy(); + if (src->ratings != nullptr) result->ratings = src->ratings->DeepCopy(); return result; } // Copy blobs from word_res onto this word (eliminating spaces between). // Since this may be called bidirectionally OR both the BOL and EOL flags. - void copy_on(WERD_RES *word_res) { //from this word + void copy_on(WERD_RES* word_res) { // from this word word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL)); word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL)); word->copy_on(word_res->word); @@ -653,36 +640,36 @@ class WERD_RES : public ELIST_LINK { class PAGE_RES_IT { public: - PAGE_RES * page_res; // page being iterated + PAGE_RES* page_res; // page being iterated PAGE_RES_IT() = default; - PAGE_RES_IT(PAGE_RES *the_page_res) { // page result + PAGE_RES_IT(PAGE_RES* the_page_res) { // page result page_res = the_page_res; restart_page(); // ready to scan } // Do two PAGE_RES_ITs point at the same word? // This is much cheaper than cmp(). - bool operator ==(const PAGE_RES_IT &other) const; + bool operator==(const PAGE_RES_IT& other) const; - bool operator !=(const PAGE_RES_IT &other) const {return !(*this == other); } + bool operator!=(const PAGE_RES_IT& other) const { return !(*this == other); } // Given another PAGE_RES_IT to the same page, // this before other: -1 // this equal to other: 0 // this later than other: 1 - int cmp(const PAGE_RES_IT &other) const; + int cmp(const PAGE_RES_IT& other) const; - WERD_RES *restart_page() { + WERD_RES* restart_page() { return start_page(false); // Skip empty blocks. } - WERD_RES *restart_page_with_empties() { + WERD_RES* restart_page_with_empties() { return start_page(true); // Allow empty blocks. } - WERD_RES *start_page(bool empty_ok); + WERD_RES* start_page(bool empty_ok); - WERD_RES *restart_row(); + WERD_RES* restart_row(); // ============ Methods that mutate the underling structures =========== // Note that these methods will potentially invalidate other PAGE_RES_ITs @@ -707,63 +694,61 @@ class PAGE_RES_IT { // corresponding part of combo if required. void MakeCurrentWordFuzzy(); - WERD_RES *forward() { // Get next word. + WERD_RES* forward() { // Get next word. return internal_forward(false, false); } // Move forward, but allow empty blocks to show as single nullptr words. - WERD_RES *forward_with_empties() { - return internal_forward(false, true); - } + WERD_RES* forward_with_empties() { return internal_forward(false, true); } - WERD_RES *forward_paragraph(); // get first word in next non-empty paragraph - WERD_RES *forward_block(); // get first word in next non-empty block + WERD_RES* forward_paragraph(); // get first word in next non-empty paragraph + WERD_RES* forward_block(); // get first word in next non-empty block - WERD_RES *prev_word() const { // previous word + WERD_RES* prev_word() const { // previous word return prev_word_res; } - ROW_RES *prev_row() const { // row of prev word + ROW_RES* prev_row() const { // row of prev word return prev_row_res; } - BLOCK_RES *prev_block() const { // block of prev word + BLOCK_RES* prev_block() const { // block of prev word return prev_block_res; } - WERD_RES *word() const { // current word + WERD_RES* word() const { // current word return word_res; } - ROW_RES *row() const { // row of current word + ROW_RES* row() const { // row of current word return row_res; } - BLOCK_RES *block() const { // block of cur. word + BLOCK_RES* block() const { // block of cur. word return block_res; } - WERD_RES *next_word() const { // next word + WERD_RES* next_word() const { // next word return next_word_res; } - ROW_RES *next_row() const { // row of next word + ROW_RES* next_row() const { // row of next word return next_row_res; } - BLOCK_RES *next_block() const { // block of next word + BLOCK_RES* next_block() const { // block of next word return next_block_res; } void rej_stat_word(); // for page/block/row void ResetWordIterator(); private: - WERD_RES *internal_forward(bool new_block, bool empty_ok); + WERD_RES* internal_forward(bool new_block, bool empty_ok); - WERD_RES * prev_word_res; // previous word - ROW_RES *prev_row_res; // row of prev word - BLOCK_RES *prev_block_res; // block of prev word + WERD_RES* prev_word_res; // previous word + ROW_RES* prev_row_res; // row of prev word + BLOCK_RES* prev_block_res; // block of prev word - WERD_RES *word_res; // current word - ROW_RES *row_res; // row of current word - BLOCK_RES *block_res; // block of cur. word + WERD_RES* word_res; // current word + ROW_RES* row_res; // row of current word + BLOCK_RES* block_res; // block of cur. word - WERD_RES *next_word_res; // next word - ROW_RES *next_row_res; // row of next word - BLOCK_RES *next_block_res; // block of next word + WERD_RES* next_word_res; // next word + ROW_RES* next_row_res; // row of next word + BLOCK_RES* next_block_res; // block of next word - BLOCK_RES_IT block_res_it; // iterators + BLOCK_RES_IT block_res_it; // iterators ROW_RES_IT row_res_it; WERD_RES_IT word_res_it; }; diff --git a/src/ccstruct/params_training_featdef.cpp b/src/ccstruct/params_training_featdef.cpp index 8944202ee2..0a7da4a27c 100644 --- a/src/ccstruct/params_training_featdef.cpp +++ b/src/ccstruct/params_training_featdef.cpp @@ -23,16 +23,13 @@ namespace tesseract { -int ParamsTrainingFeatureByName(const char *name) { - if (name == nullptr) - return -1; +int ParamsTrainingFeatureByName(const char* name) { + if (name == nullptr) return -1; int array_size = sizeof(kParamsTrainingFeatureTypeName) / - sizeof(kParamsTrainingFeatureTypeName[0]); + sizeof(kParamsTrainingFeatureTypeName[0]); for (int i = 0; i < array_size; i++) { - if (kParamsTrainingFeatureTypeName[i] == nullptr) - continue; - if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) - return i; + if (kParamsTrainingFeatureTypeName[i] == nullptr) continue; + if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) return i; } return -1; } diff --git a/src/ccstruct/params_training_featdef.h b/src/ccstruct/params_training_featdef.h index 1cd5076e34..e9d0ec7ec1 100644 --- a/src/ccstruct/params_training_featdef.h +++ b/src/ccstruct/params_training_featdef.h @@ -38,78 +38,76 @@ static const int kMaxMediumWordUnichars = 6; // kParamsTrainingFeatureTypeName enum kParamsTrainingFeatureType { // Digits - PTRAIN_DIGITS_SHORT, // 0 - PTRAIN_DIGITS_MED, // 1 - PTRAIN_DIGITS_LONG, // 2 - // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM) - PTRAIN_NUM_SHORT, // 3 - PTRAIN_NUM_MED, // 4 - PTRAIN_NUM_LONG, // 5 - // Document word (DOC_DAWG_PERM) - PTRAIN_DOC_SHORT, // 6 - PTRAIN_DOC_MED, // 7 - PTRAIN_DOC_LONG, // 8 - // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM) - PTRAIN_DICT_SHORT, // 9 - PTRAIN_DICT_MED, // 10 - PTRAIN_DICT_LONG, // 11 - // Frequent word (FREQ_DAWG_PERM) - PTRAIN_FREQ_SHORT, // 12 - PTRAIN_FREQ_MED, // 13 - PTRAIN_FREQ_LONG, // 14 - PTRAIN_SHAPE_COST_PER_CHAR, // 15 - PTRAIN_NGRAM_COST_PER_CHAR, // 16 - PTRAIN_NUM_BAD_PUNC, // 17 - PTRAIN_NUM_BAD_CASE, // 18 - PTRAIN_XHEIGHT_CONSISTENCY, // 19 - PTRAIN_NUM_BAD_CHAR_TYPE, // 20 - PTRAIN_NUM_BAD_SPACING, // 21 - PTRAIN_NUM_BAD_FONT, // 22 - PTRAIN_RATING_PER_CHAR, // 23 + PTRAIN_DIGITS_SHORT, // 0 + PTRAIN_DIGITS_MED, // 1 + PTRAIN_DIGITS_LONG, // 2 + // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM) + PTRAIN_NUM_SHORT, // 3 + PTRAIN_NUM_MED, // 4 + PTRAIN_NUM_LONG, // 5 + // Document word (DOC_DAWG_PERM) + PTRAIN_DOC_SHORT, // 6 + PTRAIN_DOC_MED, // 7 + PTRAIN_DOC_LONG, // 8 + // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM) + PTRAIN_DICT_SHORT, // 9 + PTRAIN_DICT_MED, // 10 + PTRAIN_DICT_LONG, // 11 + // Frequent word (FREQ_DAWG_PERM) + PTRAIN_FREQ_SHORT, // 12 + PTRAIN_FREQ_MED, // 13 + PTRAIN_FREQ_LONG, // 14 + PTRAIN_SHAPE_COST_PER_CHAR, // 15 + PTRAIN_NGRAM_COST_PER_CHAR, // 16 + PTRAIN_NUM_BAD_PUNC, // 17 + PTRAIN_NUM_BAD_CASE, // 18 + PTRAIN_XHEIGHT_CONSISTENCY, // 19 + PTRAIN_NUM_BAD_CHAR_TYPE, // 20 + PTRAIN_NUM_BAD_SPACING, // 21 + PTRAIN_NUM_BAD_FONT, // 22 + PTRAIN_RATING_PER_CHAR, // 23 PTRAIN_NUM_FEATURE_TYPES }; -static const char * const kParamsTrainingFeatureTypeName[] = { - "PTRAIN_DIGITS_SHORT", // 0 - "PTRAIN_DIGITS_MED", // 1 - "PTRAIN_DIGITS_LONG", // 2 - "PTRAIN_NUM_SHORT", // 3 - "PTRAIN_NUM_MED", // 4 - "PTRAIN_NUM_LONG", // 5 - "PTRAIN_DOC_SHORT", // 6 - "PTRAIN_DOC_MED", // 7 - "PTRAIN_DOC_LONG", // 8 - "PTRAIN_DICT_SHORT", // 9 - "PTRAIN_DICT_MED", // 10 - "PTRAIN_DICT_LONG", // 11 - "PTRAIN_FREQ_SHORT", // 12 - "PTRAIN_FREQ_MED", // 13 - "PTRAIN_FREQ_LONG", // 14 - "PTRAIN_SHAPE_COST_PER_CHAR", // 15 - "PTRAIN_NGRAM_COST_PER_CHAR", // 16 - "PTRAIN_NUM_BAD_PUNC", // 17 - "PTRAIN_NUM_BAD_CASE", // 18 - "PTRAIN_XHEIGHT_CONSISTENCY", // 19 - "PTRAIN_NUM_BAD_CHAR_TYPE", // 20 - "PTRAIN_NUM_BAD_SPACING", // 21 - "PTRAIN_NUM_BAD_FONT", // 22 - "PTRAIN_RATING_PER_CHAR", // 23 +static const char* const kParamsTrainingFeatureTypeName[] = { + "PTRAIN_DIGITS_SHORT", // 0 + "PTRAIN_DIGITS_MED", // 1 + "PTRAIN_DIGITS_LONG", // 2 + "PTRAIN_NUM_SHORT", // 3 + "PTRAIN_NUM_MED", // 4 + "PTRAIN_NUM_LONG", // 5 + "PTRAIN_DOC_SHORT", // 6 + "PTRAIN_DOC_MED", // 7 + "PTRAIN_DOC_LONG", // 8 + "PTRAIN_DICT_SHORT", // 9 + "PTRAIN_DICT_MED", // 10 + "PTRAIN_DICT_LONG", // 11 + "PTRAIN_FREQ_SHORT", // 12 + "PTRAIN_FREQ_MED", // 13 + "PTRAIN_FREQ_LONG", // 14 + "PTRAIN_SHAPE_COST_PER_CHAR", // 15 + "PTRAIN_NGRAM_COST_PER_CHAR", // 16 + "PTRAIN_NUM_BAD_PUNC", // 17 + "PTRAIN_NUM_BAD_CASE", // 18 + "PTRAIN_XHEIGHT_CONSISTENCY", // 19 + "PTRAIN_NUM_BAD_CHAR_TYPE", // 20 + "PTRAIN_NUM_BAD_SPACING", // 21 + "PTRAIN_NUM_BAD_FONT", // 22 + "PTRAIN_RATING_PER_CHAR", // 23 }; // Returns the index of the given feature (by name), // or -1 meaning the feature is unknown. -int ParamsTrainingFeatureByName(const char *name); - +int ParamsTrainingFeatureByName(const char* name); // Entry with features extracted from a single OCR hypothesis for a word. struct ParamsTrainingHypothesis { ParamsTrainingHypothesis() : cost(0.0) { memset(features, 0, sizeof(float) * PTRAIN_NUM_FEATURE_TYPES); } - ParamsTrainingHypothesis(const ParamsTrainingHypothesis &other) { - memcpy(features, other.features, - sizeof(float) * PTRAIN_NUM_FEATURE_TYPES); + ParamsTrainingHypothesis(const ParamsTrainingHypothesis& other) { + memcpy(features, other.features, sizeof(float) * PTRAIN_NUM_FEATURE_TYPES); str = other.str; cost = other.cost; } @@ -134,8 +132,8 @@ class ParamsTrainingBundle { } // Adds a new ParamsTrainingHypothesis to the current hypothesis list // and returns the reference to the newly added entry. - ParamsTrainingHypothesis &AddHypothesis( - const ParamsTrainingHypothesis &other) { + ParamsTrainingHypothesis& AddHypothesis( + const ParamsTrainingHypothesis& other) { if (hyp_list_vec.empty()) StartHypothesisList(); hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other)); return hyp_list_vec.back().back(); diff --git a/src/ccstruct/pdblock.cpp b/src/ccstruct/pdblock.cpp index ad9db418fe..e15e9a6390 100644 --- a/src/ccstruct/pdblock.cpp +++ b/src/ccstruct/pdblock.cpp @@ -28,45 +28,46 @@ #include "config_auto.h" #endif -#define BLOCK_LABEL_HEIGHT 150 //char height of block id +#define BLOCK_LABEL_HEIGHT 150 // char height of block id -CLISTIZE (PDBLK) +CLISTIZE(PDBLK) /********************************************************************** * PDBLK::PDBLK * * Constructor for a simple rectangular block. **********************************************************************/ -PDBLK::PDBLK ( //rectangular block -int16_t xmin, //bottom left -int16_t ymin, int16_t xmax, //top right -int16_t ymax): box (ICOORD (xmin, ymin), ICOORD (xmax, ymax)) { - //boundaries +PDBLK::PDBLK( // rectangular block + int16_t xmin, // bottom left + int16_t ymin, + int16_t xmax, // top right + int16_t ymax) + : box(ICOORD(xmin, ymin), ICOORD(xmax, ymax)) { + // boundaries ICOORDELT_IT left_it = &leftside; ICOORDELT_IT right_it = &rightside; hand_poly = nullptr; - left_it.set_to_list (&leftside); - right_it.set_to_list (&rightside); - //make default box - left_it.add_to_end (new ICOORDELT (xmin, ymin)); - left_it.add_to_end (new ICOORDELT (xmin, ymax)); - right_it.add_to_end (new ICOORDELT (xmax, ymin)); - right_it.add_to_end (new ICOORDELT (xmax, ymax)); + left_it.set_to_list(&leftside); + right_it.set_to_list(&rightside); + // make default box + left_it.add_to_end(new ICOORDELT(xmin, ymin)); + left_it.add_to_end(new ICOORDELT(xmin, ymax)); + right_it.add_to_end(new ICOORDELT(xmax, ymin)); + right_it.add_to_end(new ICOORDELT(xmax, ymax)); index_ = 0; } - /********************************************************************** * PDBLK::set_sides * * Sets left and right vertex lists **********************************************************************/ -void PDBLK::set_sides( //set vertex lists - ICOORDELT_LIST *left, //left vertices - ICOORDELT_LIST *right //right vertices - ) { - //boundaries +void PDBLK::set_sides( // set vertex lists + ICOORDELT_LIST* left, // left vertices + ICOORDELT_LIST* right // right vertices +) { + // boundaries ICOORDELT_IT left_it = &leftside; ICOORDELT_IT right_it = &rightside; @@ -84,44 +85,41 @@ void PDBLK::set_sides( //set vertex lists * Return TRUE if the given point is within the block. **********************************************************************/ -bool PDBLK::contains( //test containment - ICOORD pt //point to test +bool PDBLK::contains( // test containment + ICOORD pt // point to test ) { - BLOCK_RECT_IT it = this; //rectangle iterator - ICOORD bleft, tright; //corners of rectangle + BLOCK_RECT_IT it = this; // rectangle iterator + ICOORD bleft, tright; // corners of rectangle for (it.start_block(); !it.cycled_rects(); it.forward()) { - //get rectangle - it.bounding_box (bleft, tright); - //inside rect - if (pt.x() >= bleft.x() && pt.x() <= tright.x() - && pt.y() >= bleft.y() && pt.y() <= tright.y()) - return true; //is inside + // get rectangle + it.bounding_box(bleft, tright); + // inside rect + if (pt.x() >= bleft.x() && pt.x() <= tright.x() && pt.y() >= bleft.y() && + pt.y() <= tright.y()) + return true; // is inside } - return false; //not inside + return false; // not inside } - /********************************************************************** * PDBLK::move * * Reposition block **********************************************************************/ -void PDBLK::move( // reposition block - const ICOORD vec // by vector - ) { +void PDBLK::move( // reposition block + const ICOORD vec // by vector +) { ICOORDELT_IT it(&leftside); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - *(it.data ()) += vec; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) *(it.data()) += vec; - it.set_to_list (&rightside); + it.set_to_list(&rightside); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - *(it.data ()) += vec; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) *(it.data()) += vec; - box.move (vec); + box.move(vec); } // Returns a binary Pix mask with a 1 pixel for every pixel within the @@ -139,7 +137,7 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) { image_block.rotate(rerotation); // Block outline is a polygon, so use a PB_LINE_IT to get the // rasterized interior. (Runs of interior pixels on a line.) - PB_LINE_IT *lines = new PB_LINE_IT(&image_block); + PB_LINE_IT* lines = new PB_LINE_IT(&image_block); for (int y = box.bottom(); y < box.top(); ++y) { const std::unique_ptr segments( lines->get_line(y)); @@ -160,14 +158,13 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) { delete lines; } else { // Just fill the whole block as there is only a bounding box. - pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(), - PIX_SET, nullptr, 0, 0); + pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(), PIX_SET, + nullptr, 0, 0); } if (mask_box != nullptr) *mask_box = rotated_box; return pix; } - /********************************************************************** * PDBLK::plot * @@ -175,56 +172,55 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) { **********************************************************************/ #ifndef GRAPHICS_DISABLED -void PDBLK::plot( //draw outline - ScrollView* window, //window to draw in - int32_t serial, //serial number - ScrollView::Color colour //colour to draw in - ) { - ICOORD startpt; //start of outline - ICOORD endpt; //end of outline - ICOORD prevpt; //previous point - ICOORDELT_IT it = &leftside; //iterator - - //set the colour +void PDBLK::plot( // draw outline + ScrollView* window, // window to draw in + int32_t serial, // serial number + ScrollView::Color colour // colour to draw in +) { + ICOORD startpt; // start of outline + ICOORD endpt; // end of outline + ICOORD prevpt; // previous point + ICOORDELT_IT it = &leftside; // iterator + + // set the colour window->Pen(colour); window->TextAttributes("Times", BLOCK_LABEL_HEIGHT, false, false, false); if (hand_poly != nullptr) { hand_poly->plot(window, serial); - } else if (!leftside.empty ()) { - startpt = *(it.data ()); //bottom left corner + } else if (!leftside.empty()) { + startpt = *(it.data()); // bottom left corner // tprintf("Block %d bottom left is (%d,%d)\n", // serial,startpt.x(),startpt.y()); char temp_buff[34]; - #if defined(__UNIX__) || defined(MINGW) +#if defined(__UNIX__) || defined(MINGW) snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, serial); - #else - ultoa (serial, temp_buff, 10); - #endif - window->Text(startpt.x (), startpt.y (), temp_buff); +#else + ultoa(serial, temp_buff, 10); +#endif + window->Text(startpt.x(), startpt.y(), temp_buff); - window->SetCursor(startpt.x (), startpt.y ()); + window->SetCursor(startpt.x(), startpt.y()); do { - prevpt = *(it.data ()); //previous point - it.forward (); //move to next point - //draw round corner - window->DrawTo(prevpt.x (), it.data ()->y ()); - window->DrawTo(it.data ()->x (), it.data ()->y ()); - } - while (!it.at_last ()); //until end of list - endpt = *(it.data ()); //end point - - //other side of boundary - window->SetCursor(startpt.x (), startpt.y ()); - it.set_to_list (&rightside); + prevpt = *(it.data()); // previous point + it.forward(); // move to next point + // draw round corner + window->DrawTo(prevpt.x(), it.data()->y()); + window->DrawTo(it.data()->x(), it.data()->y()); + } while (!it.at_last()); // until end of list + endpt = *(it.data()); // end point + + // other side of boundary + window->SetCursor(startpt.x(), startpt.y()); + it.set_to_list(&rightside); prevpt = startpt; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - //draw round corner - window->DrawTo(prevpt.x (), it.data ()->y ()); - window->DrawTo(it.data ()->x (), it.data ()->y ()); - prevpt = *(it.data ()); //previous point + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + // draw round corner + window->DrawTo(prevpt.x(), it.data()->y()); + window->DrawTo(it.data()->x(), it.data()->y()); + prevpt = *(it.data()); // previous point } - //close boundary + // close boundary window->DrawTo(endpt.x(), endpt.y()); } } @@ -236,144 +232,135 @@ void PDBLK::plot( //draw outline * Assignment - duplicate the block structure, but with an EMPTY row list. **********************************************************************/ -PDBLK & PDBLK::operator= ( //assignment -const PDBLK & source //from this +PDBLK& PDBLK::operator=( // assignment + const PDBLK& source // from this ) { // this->ELIST_LINK::operator=(source); - if (!leftside.empty ()) - leftside.clear (); - if (!rightside.empty ()) - rightside.clear (); + if (!leftside.empty()) leftside.clear(); + if (!rightside.empty()) rightside.clear(); leftside.deep_copy(&source.leftside, &ICOORDELT::deep_copy); rightside.deep_copy(&source.rightside, &ICOORDELT::deep_copy); box = source.box; return *this; } - /********************************************************************** * BLOCK_RECT_IT::BLOCK_RECT_IT * * Construct a block rectangle iterator. **********************************************************************/ -BLOCK_RECT_IT::BLOCK_RECT_IT ( -//iterate rectangles -PDBLK * blkptr //from block -):left_it (&blkptr->leftside), right_it (&blkptr->rightside) { - block = blkptr; //remember block - //non empty list - if (!blkptr->leftside.empty ()) { - start_block(); //ready for iteration +BLOCK_RECT_IT::BLOCK_RECT_IT( + // iterate rectangles + PDBLK* blkptr // from block + ) + : left_it(&blkptr->leftside), right_it(&blkptr->rightside) { + block = blkptr; // remember block + // non empty list + if (!blkptr->leftside.empty()) { + start_block(); // ready for iteration } } - /********************************************************************** * BLOCK_RECT_IT::set_to_block * * Start a new block. **********************************************************************/ -void BLOCK_RECT_IT::set_to_block( //start (new) block - PDBLK *blkptr) { //block to start - block = blkptr; //remember block - //set iterators - left_it.set_to_list (&blkptr->leftside); - right_it.set_to_list (&blkptr->rightside); - if (!blkptr->leftside.empty ()) - start_block(); //ready for iteration +void BLOCK_RECT_IT::set_to_block( // start (new) block + PDBLK* blkptr) { // block to start + block = blkptr; // remember block + // set iterators + left_it.set_to_list(&blkptr->leftside); + right_it.set_to_list(&blkptr->rightside); + if (!blkptr->leftside.empty()) start_block(); // ready for iteration } - /********************************************************************** * BLOCK_RECT_IT::start_block * * Restart a block. **********************************************************************/ -void BLOCK_RECT_IT::start_block() { //start (new) block - left_it.move_to_first (); - right_it.move_to_first (); - left_it.mark_cycle_pt (); - right_it.mark_cycle_pt (); - ymin = left_it.data ()->y (); //bottom of first box - ymax = left_it.data_relative (1)->y (); - if (right_it.data_relative (1)->y () < ymax) - //smallest step - ymax = right_it.data_relative (1)->y (); +void BLOCK_RECT_IT::start_block() { // start (new) block + left_it.move_to_first(); + right_it.move_to_first(); + left_it.mark_cycle_pt(); + right_it.mark_cycle_pt(); + ymin = left_it.data()->y(); // bottom of first box + ymax = left_it.data_relative(1)->y(); + if (right_it.data_relative(1)->y() < ymax) + // smallest step + ymax = right_it.data_relative(1)->y(); } - /********************************************************************** * BLOCK_RECT_IT::forward * * Move to the next rectangle in the block. **********************************************************************/ -void BLOCK_RECT_IT::forward() { //next rectangle - if (!left_it.empty ()) { //non-empty list - if (left_it.data_relative (1)->y () == ymax) - left_it.forward (); //move to meet top - if (right_it.data_relative (1)->y () == ymax) - right_it.forward (); - //last is special - if (left_it.at_last () || right_it.at_last ()) { - left_it.move_to_first (); //restart - right_it.move_to_first (); - //now at bottom - ymin = left_it.data ()->y (); - } - else { - ymin = ymax; //new bottom +void BLOCK_RECT_IT::forward() { // next rectangle + if (!left_it.empty()) { // non-empty list + if (left_it.data_relative(1)->y() == ymax) + left_it.forward(); // move to meet top + if (right_it.data_relative(1)->y() == ymax) right_it.forward(); + // last is special + if (left_it.at_last() || right_it.at_last()) { + left_it.move_to_first(); // restart + right_it.move_to_first(); + // now at bottom + ymin = left_it.data()->y(); + } else { + ymin = ymax; // new bottom } - //next point - ymax = left_it.data_relative (1)->y (); - if (right_it.data_relative (1)->y () < ymax) - //least step forward - ymax = right_it.data_relative (1)->y (); + // next point + ymax = left_it.data_relative(1)->y(); + if (right_it.data_relative(1)->y() < ymax) + // least step forward + ymax = right_it.data_relative(1)->y(); } } - /********************************************************************** * BLOCK_LINE_IT::get_line * * Get the the start and width of a line in the block. **********************************************************************/ -int16_t BLOCK_LINE_IT::get_line( //get a line - int16_t y, //line to get - int16_t &xext //output extent - ) { - ICOORD bleft; //bounding box - ICOORD tright; //of block & rect +int16_t BLOCK_LINE_IT::get_line( // get a line + int16_t y, // line to get + int16_t& xext // output extent +) { + ICOORD bleft; // bounding box + ICOORD tright; // of block & rect - //get block box - block->bounding_box (bleft, tright); - if (y < bleft.y () || y >= tright.y ()) { + // get block box + block->bounding_box(bleft, tright); + if (y < bleft.y() || y >= tright.y()) { // block->print(stderr,FALSE); - BADBLOCKLINE.error ("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y); + BADBLOCKLINE.error("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y); } - //get rectangle box - rect_it.bounding_box (bleft, tright); - //inside rectangle - if (y >= bleft.y () && y < tright.y ()) { - //width of line - xext = tright.x () - bleft.x (); - return bleft.x (); //start of line + // get rectangle box + rect_it.bounding_box(bleft, tright); + // inside rectangle + if (y >= bleft.y() && y < tright.y()) { + // width of line + xext = tright.x() - bleft.x(); + return bleft.x(); // start of line } - for (rect_it.start_block (); !rect_it.cycled_rects (); rect_it.forward ()) { - //get rectangle box - rect_it.bounding_box (bleft, tright); - //inside rectangle - if (y >= bleft.y () && y < tright.y ()) { - //width of line - xext = tright.x () - bleft.x (); - return bleft.x (); //start of line + for (rect_it.start_block(); !rect_it.cycled_rects(); rect_it.forward()) { + // get rectangle box + rect_it.bounding_box(bleft, tright); + // inside rectangle + if (y >= bleft.y() && y < tright.y()) { + // width of line + xext = tright.x() - bleft.x(); + return bleft.x(); // start of line } } - LOSTBLOCKLINE.error ("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y); - return 0; //dummy to stop warning + LOSTBLOCKLINE.error("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y); + return 0; // dummy to stop warning } diff --git a/src/ccstruct/pdblock.h b/src/ccstruct/pdblock.h index 86b1601d0c..b203fe9c9e 100644 --- a/src/ccstruct/pdblock.h +++ b/src/ccstruct/pdblock.h @@ -17,21 +17,21 @@ * **********************************************************************/ -#ifndef PDBLOCK_H -#define PDBLOCK_H +#ifndef PDBLOCK_H +#define PDBLOCK_H -#include "clst.h" -#include "strngs.h" -#include "polyblk.h" +#include "clst.h" +#include "polyblk.h" +#include "strngs.h" -class DLLSYM PDBLK; //forward decl +class DLLSYM PDBLK; // forward decl struct Pix; -CLISTIZEH (PDBLK) -///page block +CLISTIZEH(PDBLK) +/// page block class PDBLK { - friend class BLOCK_RECT_IT; //< block iterator - friend class BLOCK; //< Page Block + friend class BLOCK_RECT_IT; //< block iterator + friend class BLOCK; //< Page Block public: /// empty constructor @@ -48,22 +48,22 @@ class PDBLK { /// set vertex lists ///@param left list of left vertices ///@param right list of right vertices - void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right); + void set_sides(ICOORDELT_LIST* left, ICOORDELT_LIST* right); /// destructor ~PDBLK() { delete hand_poly; } - POLY_BLOCK *poly_block() const { return hand_poly; } + POLY_BLOCK* poly_block() const { return hand_poly; } /// set the poly block - void set_poly_block(POLY_BLOCK *blk) { hand_poly = blk; } + void set_poly_block(POLY_BLOCK* blk) { hand_poly = blk; } /// get box - void bounding_box(ICOORD &bottom_left, // bottom left - ICOORD &top_right) const { // topright + void bounding_box(ICOORD& bottom_left, // bottom left + ICOORD& top_right) const { // topright bottom_left = box.botleft(); top_right = box.topright(); } /// get real box - const TBOX &bounding_box() const { return box; } + const TBOX& bounding_box() const { return box; } int index() const { return index_; } void set_index(int value) { index_ = value; } @@ -78,99 +78,93 @@ class PDBLK { // block. Rotates the coordinate system by rerotation prior to rendering. // If not nullptr, mask_box is filled with the position box of the returned // mask image. - Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box); + Pix* render_mask(const FCOORD& rerotation, TBOX* mask_box); #ifndef GRAPHICS_DISABLED /// draw histogram ///@param window window to draw in ///@param serial serial number ///@param colour colour to draw in - void plot(ScrollView *window, int32_t serial, ScrollView::Color colour); + void plot(ScrollView* window, int32_t serial, ScrollView::Color colour); #endif // GRAPHICS_DISABLED /// assignment ///@param source from this - PDBLK &operator=(const PDBLK &source); + PDBLK& operator=(const PDBLK& source); protected: - POLY_BLOCK *hand_poly; //< weird as well + POLY_BLOCK* hand_poly; //< weird as well ICOORDELT_LIST leftside; //< left side vertices ICOORDELT_LIST rightside; //< right side vertices TBOX box; //< bounding box int index_; //< Serial number of this block. }; -class DLLSYM BLOCK_RECT_IT //rectangle iterator +class DLLSYM BLOCK_RECT_IT // rectangle iterator { - public: - ///constructor - ///@param blkptr block to iterate - BLOCK_RECT_IT(PDBLK *blkptr); - - ///start (new) block - void set_to_block ( - PDBLK * blkptr); //block to iterate - - ///start iteration - void start_block(); - - ///next rectangle - void forward(); - - ///test end - bool cycled_rects() { - return left_it.cycled_list() && right_it.cycled_list(); - } - - ///current rectangle - ///@param bleft bottom left - ///@param tright top right - void bounding_box(ICOORD &bleft, - ICOORD &tright) { - //bottom left - bleft = ICOORD (left_it.data ()->x (), ymin); - //top right - tright = ICOORD (right_it.data ()->x (), ymax); - } - - private: - int16_t ymin; //< bottom of rectangle - int16_t ymax; //< top of rectangle - PDBLK *block; //< block to iterate - ICOORDELT_IT left_it; //< boundary iterators - ICOORDELT_IT right_it; + public: + /// constructor + ///@param blkptr block to iterate + BLOCK_RECT_IT(PDBLK* blkptr); + + /// start (new) block + void set_to_block(PDBLK* blkptr); // block to iterate + + /// start iteration + void start_block(); + + /// next rectangle + void forward(); + + /// test end + bool cycled_rects() { + return left_it.cycled_list() && right_it.cycled_list(); + } + + /// current rectangle + ///@param bleft bottom left + ///@param tright top right + void bounding_box(ICOORD& bleft, ICOORD& tright) { + // bottom left + bleft = ICOORD(left_it.data()->x(), ymin); + // top right + tright = ICOORD(right_it.data()->x(), ymax); + } + + private: + int16_t ymin; //< bottom of rectangle + int16_t ymax; //< top of rectangle + PDBLK* block; //< block to iterate + ICOORDELT_IT left_it; //< boundary iterators + ICOORDELT_IT right_it; }; -///rectangle iterator -class DLLSYM BLOCK_LINE_IT -{ - public: - ///constructor - ///@param blkptr from block - BLOCK_LINE_IT (PDBLK * blkptr) - :rect_it (blkptr) { - block = blkptr; //remember block - } - - ///start (new) block - ///@param blkptr block to start - void set_to_block (PDBLK * blkptr) { - block = blkptr; //remember block - //set iterator - rect_it.set_to_block (blkptr); - } - - ///get a line - ///@param y line to get - ///@param xext output extent - int16_t get_line(int16_t y, - int16_t &xext); - - private: - PDBLK * block; //< block to iterate - BLOCK_RECT_IT rect_it; //< rectangle iterator +/// rectangle iterator +class DLLSYM BLOCK_LINE_IT { + public: + /// constructor + ///@param blkptr from block + BLOCK_LINE_IT(PDBLK* blkptr) : rect_it(blkptr) { + block = blkptr; // remember block + } + + /// start (new) block + ///@param blkptr block to start + void set_to_block(PDBLK* blkptr) { + block = blkptr; // remember block + // set iterator + rect_it.set_to_block(blkptr); + } + + /// get a line + ///@param y line to get + ///@param xext output extent + int16_t get_line(int16_t y, int16_t& xext); + + private: + PDBLK* block; //< block to iterate + BLOCK_RECT_IT rect_it; //< rectangle iterator }; -int decreasing_top_order(const void *row1, - const void *row2); +int decreasing_top_order(const void* row1, const void* row2); #endif diff --git a/src/ccstruct/points.cpp b/src/ccstruct/points.cpp index 3efd4f9c73..f7ff923439 100644 --- a/src/ccstruct/points.cpp +++ b/src/ccstruct/points.cpp @@ -21,16 +21,16 @@ #define _USE_MATH_DEFINES #endif // _MSC_VER -#include +#include "points.h" #include +#include #include "helpers.h" #include "ndminx.h" #include "serialis.h" -#include "points.h" -ELISTIZE (ICOORDELT) //turn to list -bool FCOORD::normalise() { //Convert to unit vec - float len = length (); +ELISTIZE(ICOORDELT) // turn to list +bool FCOORD::normalise() { // Convert to unit vec + float len = length(); if (len < 0.0000000001) { return false; @@ -45,8 +45,7 @@ void ICOORD::set_with_shrink(int x, int y) { // Fit the vector into an ICOORD, which is 16 bit. int factor = 1; int max_extent = std::max(abs(x), abs(y)); - if (max_extent > INT16_MAX) - factor = max_extent / INT16_MAX + 1; + if (max_extent > INT16_MAX) factor = max_extent / INT16_MAX + 1; xcoord = x / factor; ycoord = y / factor; } @@ -84,8 +83,8 @@ bool ICOORD::DeSerialize(bool swap, FILE* fp) { // and then add minor to the accumulator. When the accumulator >= major // subtract major and step a minor step. -void ICOORD::setup_render(ICOORD* major_step, ICOORD* minor_step, - int* major, int* minor) const { +void ICOORD::setup_render(ICOORD* major_step, ICOORD* minor_step, int* major, + int* minor) const { int abs_x = abs(xcoord); int abs_y = abs(ycoord); if (abs_x >= abs_y) { @@ -109,9 +108,7 @@ void ICOORD::setup_render(ICOORD* major_step, ICOORD* minor_step, // Returns the standard feature direction corresponding to this. // See binary_angle_plus_pi below for a description of the direction. -uint8_t FCOORD::to_direction() const { - return binary_angle_plus_pi(angle()); -} +uint8_t FCOORD::to_direction() const { return binary_angle_plus_pi(angle()); } // Sets this with a unit vector in the given standard feature direction. void FCOORD::from_direction(uint8_t direction) { double radians = angle_from_direction(direction); @@ -134,8 +131,9 @@ double FCOORD::angle_from_direction(uint8_t direction) { // Returns the point on the given line nearest to this, ie the point such // that the vector point->this is perpendicular to the line. // The line is defined as a line_point and a dir_vector for its direction. -FCOORD FCOORD::nearest_pt_on_line(const FCOORD& line_point, - const FCOORD& dir_vector) const { +FCOORD +FCOORD::nearest_pt_on_line(const FCOORD& line_point, + const FCOORD& dir_vector) const { FCOORD point_vector(*this - line_point); // The dot product (%) is |dir_vector||point_vector|cos theta, so dividing by // the square of the length of dir_vector gives us the fraction of dir_vector diff --git a/src/ccstruct/points.h b/src/ccstruct/points.h index a434ebdd11..57e4c45614 100644 --- a/src/ccstruct/points.h +++ b/src/ccstruct/points.h @@ -17,304 +17,277 @@ * **********************************************************************/ -#ifndef POINTS_H -#define POINTS_H +#ifndef POINTS_H +#define POINTS_H -#include -#include -#include "elst.h" +#include +#include +#include "elst.h" class FCOORD; -///integer coordinate -class ICOORD -{ +/// integer coordinate +class ICOORD { friend class FCOORD; - public: - ///empty constructor - ICOORD() { - xcoord = ycoord = 0; //default zero - } - ///constructor - ///@param xin x value - ///@param yin y value - ICOORD(int16_t xin, - int16_t yin) { - xcoord = xin; - ycoord = yin; - } - ///destructor - ~ICOORD () = default; - - ///access function - int16_t x() const { - return xcoord; - } - ///access_function - int16_t y() const { - return ycoord; - } - - ///rewrite function - void set_x(int16_t xin) { - xcoord = xin; //write new value - } - ///rewrite function - void set_y(int16_t yin) { //value to set - ycoord = yin; - } - - /// Set from the given x,y, shrinking the vector to fit if needed. - void set_with_shrink(int x, int y); - - ///find sq length - float sqlength() const { - return (float) (xcoord * xcoord + ycoord * ycoord); - } - - ///find length - float length() const { - return (float) sqrt (sqlength ()); - } - - ///sq dist between pts - float pt_to_pt_sqdist(const ICOORD &pt) const { - ICOORD gap; - - gap.xcoord = xcoord - pt.xcoord; - gap.ycoord = ycoord - pt.ycoord; - return gap.sqlength (); - } - - ///Distance between pts - float pt_to_pt_dist(const ICOORD &pt) const { - return (float) sqrt (pt_to_pt_sqdist (pt)); - } - - ///find angle - float angle() const { - return (float) atan2 ((double) ycoord, (double) xcoord); - } - - ///test equality - bool operator== (const ICOORD & other) const { - return xcoord == other.xcoord && ycoord == other.ycoord; - } - ///test inequality - bool operator!= (const ICOORD & other) const { - return xcoord != other.xcoord || ycoord != other.ycoord; - } - ///rotate 90 deg anti - friend ICOORD operator! (const ICOORD &); - ///unary minus - friend ICOORD operator- (const ICOORD &); - ///add - friend ICOORD operator+ (const ICOORD &, const ICOORD &); - ///add - friend ICOORD & operator+= (ICOORD &, const ICOORD &); - ///subtract - friend ICOORD operator- (const ICOORD &, const ICOORD &); - ///subtract - friend ICOORD & operator-= (ICOORD &, const ICOORD &); - ///scalar product - friend int32_t operator% (const ICOORD &, const ICOORD &); - ///cross product - friend int32_t operator *(const ICOORD &, - const ICOORD &); - ///multiply - friend ICOORD operator *(const ICOORD &, - int16_t); - ///multiply - friend ICOORD operator *(int16_t, - const ICOORD &); - ///multiply - friend ICOORD & operator*= (ICOORD &, int16_t); - ///divide - friend ICOORD operator/ (const ICOORD &, int16_t); - ///divide - friend ICOORD & operator/= (ICOORD &, int16_t); - ///rotate - ///@param vec by vector - void rotate(const FCOORD& vec); - - /// Setup for iterating over the pixels in a vector by the well-known - /// Bresenham rendering algorithm. - /// Starting with major/2 in the accumulator, on each step move by - /// major_step, and then add minor to the accumulator. When - /// accumulator >= major subtract major and also move by minor_step. - void setup_render(ICOORD* major_step, ICOORD* minor_step, - int* major, int* minor) const; - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - protected: - int16_t xcoord; //< x value - int16_t ycoord; //< y value + public: + /// empty constructor + ICOORD() { + xcoord = ycoord = 0; // default zero + } + /// constructor + ///@param xin x value + ///@param yin y value + ICOORD(int16_t xin, int16_t yin) { + xcoord = xin; + ycoord = yin; + } + /// destructor + ~ICOORD() = default; + + /// access function + int16_t x() const { return xcoord; } + /// access_function + int16_t y() const { return ycoord; } + + /// rewrite function + void set_x(int16_t xin) { + xcoord = xin; // write new value + } + /// rewrite function + void set_y(int16_t yin) { // value to set + ycoord = yin; + } + + /// Set from the given x,y, shrinking the vector to fit if needed. + void set_with_shrink(int x, int y); + + /// find sq length + float sqlength() const { return (float)(xcoord * xcoord + ycoord * ycoord); } + + /// find length + float length() const { return (float)sqrt(sqlength()); } + + /// sq dist between pts + float pt_to_pt_sqdist(const ICOORD& pt) const { + ICOORD gap; + + gap.xcoord = xcoord - pt.xcoord; + gap.ycoord = ycoord - pt.ycoord; + return gap.sqlength(); + } + + /// Distance between pts + float pt_to_pt_dist(const ICOORD& pt) const { + return (float)sqrt(pt_to_pt_sqdist(pt)); + } + + /// find angle + float angle() const { return (float)atan2((double)ycoord, (double)xcoord); } + + /// test equality + bool operator==(const ICOORD& other) const { + return xcoord == other.xcoord && ycoord == other.ycoord; + } + /// test inequality + bool operator!=(const ICOORD& other) const { + return xcoord != other.xcoord || ycoord != other.ycoord; + } + /// rotate 90 deg anti + friend ICOORD operator!(const ICOORD&); + /// unary minus + friend ICOORD operator-(const ICOORD&); + /// add + friend ICOORD operator+(const ICOORD&, const ICOORD&); + /// add + friend ICOORD& operator+=(ICOORD&, const ICOORD&); + /// subtract + friend ICOORD operator-(const ICOORD&, const ICOORD&); + /// subtract + friend ICOORD& operator-=(ICOORD&, const ICOORD&); + /// scalar product + friend int32_t operator%(const ICOORD&, const ICOORD&); + /// cross product + friend int32_t operator*(const ICOORD&, const ICOORD&); + /// multiply + friend ICOORD operator*(const ICOORD&, int16_t); + /// multiply + friend ICOORD operator*(int16_t, const ICOORD&); + /// multiply + friend ICOORD& operator*=(ICOORD&, int16_t); + /// divide + friend ICOORD operator/(const ICOORD&, int16_t); + /// divide + friend ICOORD& operator/=(ICOORD&, int16_t); + /// rotate + ///@param vec by vector + void rotate(const FCOORD& vec); + + /// Setup for iterating over the pixels in a vector by the well-known + /// Bresenham rendering algorithm. + /// Starting with major/2 in the accumulator, on each step move by + /// major_step, and then add minor to the accumulator. When + /// accumulator >= major subtract major and also move by minor_step. + void setup_render(ICOORD* major_step, ICOORD* minor_step, int* major, + int* minor) const; + + // Writes to the given file. Returns false in case of error. + bool Serialize(FILE* fp) const; + // Reads from the given file. Returns false in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + bool DeSerialize(bool swap, FILE* fp); + + protected: + int16_t xcoord; //< x value + int16_t ycoord; //< y value }; -class DLLSYM ICOORDELT:public ELIST_LINK, public ICOORD - //embedded coord list +class DLLSYM ICOORDELT : public ELIST_LINK, + public ICOORD +// embedded coord list { - public: - ///empty constructor - ICOORDELT() = default; - ///constructor from ICOORD - ICOORDELT (ICOORD icoord):ICOORD (icoord) { - } - ///constructor - ///@param xin x value - ///@param yin y value - ICOORDELT(int16_t xin, - int16_t yin) { - xcoord = xin; - ycoord = yin; - } - - static ICOORDELT* deep_copy(const ICOORDELT* src) { - ICOORDELT* elt = new ICOORDELT; - *elt = *src; - return elt; - } - + public: + /// empty constructor + ICOORDELT() = default; + /// constructor from ICOORD + ICOORDELT(ICOORD icoord) : ICOORD(icoord) {} + /// constructor + ///@param xin x value + ///@param yin y value + ICOORDELT(int16_t xin, int16_t yin) { + xcoord = xin; + ycoord = yin; + } + + static ICOORDELT* deep_copy(const ICOORDELT* src) { + ICOORDELT* elt = new ICOORDELT; + *elt = *src; + return elt; + } }; -ELISTIZEH (ICOORDELT) -class DLLSYM FCOORD -{ - public: - ///empty constructor - FCOORD() = default; - ///constructor - ///@param xvalue x value - ///@param yvalue y value - FCOORD(float xvalue, - float yvalue) { - xcoord = xvalue; //set coords - ycoord = yvalue; - } - FCOORD( //make from ICOORD - ICOORD icoord) { //coords to set - xcoord = icoord.xcoord; - ycoord = icoord.ycoord; - } - - float x() const { //get coords - return xcoord; - } - float y() const { - return ycoord; - } - ///rewrite function - void set_x(float xin) { - xcoord = xin; //write new value - } - ///rewrite function - void set_y(float yin) { //value to set - ycoord = yin; - } - - ///find sq length - float sqlength() const { - return xcoord * xcoord + ycoord * ycoord; - } - - ///find length - float length() const { - return (float) sqrt (sqlength ()); - } - - ///sq dist between pts - float pt_to_pt_sqdist(const FCOORD &pt) const { - FCOORD gap; - - gap.xcoord = xcoord - pt.xcoord; - gap.ycoord = ycoord - pt.ycoord; - return gap.sqlength (); - } - - ///Distance between pts - float pt_to_pt_dist(const FCOORD &pt) const { - return (float) sqrt (pt_to_pt_sqdist (pt)); - } - - ///find angle - float angle() const { - return (float) atan2 (ycoord, xcoord); - } - // Returns the standard feature direction corresponding to this. - // See binary_angle_plus_pi below for a description of the direction. - uint8_t to_direction() const; - // Sets this with a unit vector in the given standard feature direction. - void from_direction(uint8_t direction); - - // Converts an angle in radians (from ICOORD::angle or FCOORD::angle) to a - // standard feature direction as an unsigned angle in 256ths of a circle - // measured anticlockwise from (-1, 0). - static uint8_t binary_angle_plus_pi(double angle); - // Inverse of binary_angle_plus_pi returns an angle in radians for the - // given standard feature direction. - static double angle_from_direction(uint8_t direction); - // Returns the point on the given line nearest to this, ie the point such - // that the vector point->this is perpendicular to the line. - // The line is defined as a line_point and a dir_vector for its direction. - // dir_vector need not be a unit vector. - FCOORD nearest_pt_on_line(const FCOORD& line_point, - const FCOORD& dir_vector) const; - - ///Convert to unit vec - bool normalise(); - - ///test equality - bool operator== (const FCOORD & other) { - return xcoord == other.xcoord && ycoord == other.ycoord; - } - ///test inequality - bool operator!= (const FCOORD & other) { - return xcoord != other.xcoord || ycoord != other.ycoord; - } - ///rotate 90 deg anti - friend FCOORD operator! (const FCOORD &); - ///unary minus - friend FCOORD operator- (const FCOORD &); - ///add - friend FCOORD operator+ (const FCOORD &, const FCOORD &); - ///add - friend FCOORD & operator+= (FCOORD &, const FCOORD &); - ///subtract - friend FCOORD operator- (const FCOORD &, const FCOORD &); - ///subtract - friend FCOORD & operator-= (FCOORD &, const FCOORD &); - ///scalar product - friend float operator% (const FCOORD &, const FCOORD &); - ///cross product - friend float operator *(const FCOORD &, const FCOORD &); - ///multiply - friend FCOORD operator *(const FCOORD &, float); - ///multiply - friend FCOORD operator *(float, const FCOORD &); - - ///multiply - friend FCOORD & operator*= (FCOORD &, float); - ///divide - friend FCOORD operator/ (const FCOORD &, float); - ///rotate - ///@param vec by vector - void rotate(const FCOORD vec); - // unrotate - undo a rotate(vec) - // @param vec by vector - void unrotate(const FCOORD &vec); - ///divide - friend FCOORD & operator/= (FCOORD &, float); - - private: - float xcoord; //2 floating coords - float ycoord; +ELISTIZEH(ICOORDELT) +class DLLSYM FCOORD { + public: + /// empty constructor + FCOORD() = default; + /// constructor + ///@param xvalue x value + ///@param yvalue y value + FCOORD(float xvalue, float yvalue) { + xcoord = xvalue; // set coords + ycoord = yvalue; + } + FCOORD( // make from ICOORD + ICOORD icoord) { // coords to set + xcoord = icoord.xcoord; + ycoord = icoord.ycoord; + } + + float x() const { // get coords + return xcoord; + } + float y() const { return ycoord; } + /// rewrite function + void set_x(float xin) { + xcoord = xin; // write new value + } + /// rewrite function + void set_y(float yin) { // value to set + ycoord = yin; + } + + /// find sq length + float sqlength() const { return xcoord * xcoord + ycoord * ycoord; } + + /// find length + float length() const { return (float)sqrt(sqlength()); } + + /// sq dist between pts + float pt_to_pt_sqdist(const FCOORD& pt) const { + FCOORD gap; + + gap.xcoord = xcoord - pt.xcoord; + gap.ycoord = ycoord - pt.ycoord; + return gap.sqlength(); + } + + /// Distance between pts + float pt_to_pt_dist(const FCOORD& pt) const { + return (float)sqrt(pt_to_pt_sqdist(pt)); + } + + /// find angle + float angle() const { return (float)atan2(ycoord, xcoord); } + // Returns the standard feature direction corresponding to this. + // See binary_angle_plus_pi below for a description of the direction. + uint8_t to_direction() const; + // Sets this with a unit vector in the given standard feature direction. + void from_direction(uint8_t direction); + + // Converts an angle in radians (from ICOORD::angle or FCOORD::angle) to a + // standard feature direction as an unsigned angle in 256ths of a circle + // measured anticlockwise from (-1, 0). + static uint8_t binary_angle_plus_pi(double angle); + // Inverse of binary_angle_plus_pi returns an angle in radians for the + // given standard feature direction. + static double angle_from_direction(uint8_t direction); + // Returns the point on the given line nearest to this, ie the point such + // that the vector point->this is perpendicular to the line. + // The line is defined as a line_point and a dir_vector for its direction. + // dir_vector need not be a unit vector. + FCOORD nearest_pt_on_line(const FCOORD& line_point, + const FCOORD& dir_vector) const; + + /// Convert to unit vec + bool normalise(); + + /// test equality + bool operator==(const FCOORD& other) { + return xcoord == other.xcoord && ycoord == other.ycoord; + } + /// test inequality + bool operator!=(const FCOORD& other) { + return xcoord != other.xcoord || ycoord != other.ycoord; + } + /// rotate 90 deg anti + friend FCOORD operator!(const FCOORD&); + /// unary minus + friend FCOORD operator-(const FCOORD&); + /// add + friend FCOORD operator+(const FCOORD&, const FCOORD&); + /// add + friend FCOORD& operator+=(FCOORD&, const FCOORD&); + /// subtract + friend FCOORD operator-(const FCOORD&, const FCOORD&); + /// subtract + friend FCOORD& operator-=(FCOORD&, const FCOORD&); + /// scalar product + friend float operator%(const FCOORD&, const FCOORD&); + /// cross product + friend float operator*(const FCOORD&, const FCOORD&); + /// multiply + friend FCOORD operator*(const FCOORD&, float); + /// multiply + friend FCOORD operator*(float, const FCOORD&); + + /// multiply + friend FCOORD& operator*=(FCOORD&, float); + /// divide + friend FCOORD operator/(const FCOORD&, float); + /// rotate + ///@param vec by vector + void rotate(const FCOORD vec); + // unrotate - undo a rotate(vec) + // @param vec by vector + void unrotate(const FCOORD& vec); + /// divide + friend FCOORD& operator/=(FCOORD&, float); + + private: + float xcoord; // 2 floating coords + float ycoord; }; -#include "ipoints.h" /*do inline funcs */ +#include "ipoints.h" /*do inline funcs */ #endif diff --git a/src/ccstruct/polyaprx.cpp b/src/ccstruct/polyaprx.cpp index 371a32cdba..f2b564458e 100644 --- a/src/ccstruct/polyaprx.cpp +++ b/src/ccstruct/polyaprx.cpp @@ -17,14 +17,14 @@ * **********************************************************************/ -#include +#include #ifdef __UNIX__ -#include +#include #endif -#define FASTEDGELENGTH 256 -#include "polyaprx.h" -#include "params.h" -#include "tprintf.h" +#define FASTEDGELENGTH 256 +#include "params.h" +#include "polyaprx.h" +#include "tprintf.h" #define EXTERN @@ -32,21 +32,20 @@ EXTERN BOOL_VAR(poly_debug, FALSE, "Debug old poly"); EXTERN BOOL_VAR(poly_wide_objects_better, TRUE, "More accurate approx on wide things"); -#define FIXED 4 /*OUTLINE point is fixed */ +#define FIXED 4 /*OUTLINE point is fixed */ -#define RUNLENGTH 1 /*length of run */ +#define RUNLENGTH 1 /*length of run */ -#define DIR 2 /*direction of run */ +#define DIR 2 /*direction of run */ -#define FLAGS 0 +#define FLAGS 0 -#define fixed_dist 20 //really an int_variable -#define approx_dist 15 //really an int_variable +#define fixed_dist 20 // really an int_variable +#define approx_dist 15 // really an int_variable const int par1 = 4500 / (approx_dist * approx_dist); const int par2 = 6750 / (approx_dist * approx_dist); - /********************************************************************** * tesspoly_outline * @@ -56,10 +55,9 @@ const int par2 = 6750 / (approx_dist * approx_dist); * feature extraction that does not use the polygonal approximation. **********************************************************************/ - TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) { - TBOX loop_box; // bounding box - int32_t area; // loop area + TBOX loop_box; // bounding box + int32_t area; // loop area EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path EDGEPT* edgepts = stack_edgepts; @@ -95,74 +93,69 @@ TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) { } prev_result = new_pt; edgept = edgept->next; - } - while (edgept != startpt); + } while (edgept != startpt); prev_result->next = result; result->prev = prev_result; - if (edgepts != stack_edgepts) - delete [] edgepts; + if (edgepts != stack_edgepts) delete[] edgepts; return TESSLINE::BuildFromOutlineList(result); } - /********************************************************************** * edgesteps_to_edgepts * * Convert a C_OUTLINE to EDGEPTs. **********************************************************************/ -EDGEPT * -edgesteps_to_edgepts ( //convert outline -C_OUTLINE * c_outline, //input -EDGEPT edgepts[] //output is array +EDGEPT* edgesteps_to_edgepts( // convert outline + C_OUTLINE* c_outline, // input + EDGEPT edgepts[] // output is array ) { - int32_t length; //steps in path - ICOORD pos; //current coords - int32_t stepindex; //current step - int32_t stepinc; //increment - int32_t epindex; //current EDGEPT - int32_t count; //repeated steps - ICOORD vec; //for this 8 step + int32_t length; // steps in path + ICOORD pos; // current coords + int32_t stepindex; // current step + int32_t stepinc; // increment + int32_t epindex; // current EDGEPT + int32_t count; // repeated steps + ICOORD vec; // for this 8 step ICOORD prev_vec; - int8_t epdir; //of this step - DIR128 prevdir; //prvious dir - DIR128 dir; //of this step + int8_t epdir; // of this step + DIR128 prevdir; // prvious dir + DIR128 dir; // of this step - pos = c_outline->start_pos (); //start of loop - length = c_outline->pathlength (); + pos = c_outline->start_pos(); // start of loop + length = c_outline->pathlength(); stepindex = 0; epindex = 0; prevdir = -1; count = 0; int prev_stepindex = 0; do { - dir = c_outline->step_dir (stepindex); - vec = c_outline->step (stepindex); - if (stepindex < length - 1 - && c_outline->step_dir (stepindex + 1) - dir == -32) { + dir = c_outline->step_dir(stepindex); + vec = c_outline->step(stepindex); + if (stepindex < length - 1 && + c_outline->step_dir(stepindex + 1) - dir == -32) { dir += 128 - 16; - vec += c_outline->step (stepindex + 1); + vec += c_outline->step(stepindex + 1); stepinc = 2; - } - else + } else stepinc = 1; if (count == 0) { prevdir = dir; prev_vec = vec; } - if (prevdir.get_dir () != dir.get_dir ()) { - edgepts[epindex].pos.x = pos.x (); - edgepts[epindex].pos.y = pos.y (); + if (prevdir.get_dir() != dir.get_dir()) { + edgepts[epindex].pos.x = pos.x(); + edgepts[epindex].pos.y = pos.y(); prev_vec *= count; - edgepts[epindex].vec.x = prev_vec.x (); - edgepts[epindex].vec.y = prev_vec.y (); + edgepts[epindex].vec.x = prev_vec.x(); + edgepts[epindex].vec.y = prev_vec.y(); pos += prev_vec; edgepts[epindex].flags[RUNLENGTH] = count; edgepts[epindex].prev = &edgepts[epindex - 1]; edgepts[epindex].flags[FLAGS] = 0; edgepts[epindex].next = &edgepts[epindex + 1]; prevdir += 64; - epdir = (DIR128) 0 - prevdir; + epdir = (DIR128)0 - prevdir; epdir >>= 4; epdir &= 7; edgepts[epindex].flags[DIR] = epdir; @@ -174,17 +167,15 @@ EDGEPT edgepts[] //output is array prev_vec = vec; count = 1; prev_stepindex = stepindex; - } - else + } else count++; stepindex += stepinc; - } - while (stepindex < length); - edgepts[epindex].pos.x = pos.x (); - edgepts[epindex].pos.y = pos.y (); + } while (stepindex < length); + edgepts[epindex].pos.x = pos.x(); + edgepts[epindex].pos.y = pos.y(); prev_vec *= count; - edgepts[epindex].vec.x = prev_vec.x (); - edgepts[epindex].vec.y = prev_vec.y (); + edgepts[epindex].vec.x = prev_vec.x(); + edgepts[epindex].vec.y = prev_vec.y(); pos += prev_vec; edgepts[epindex].flags[RUNLENGTH] = count; edgepts[epindex].flags[FLAGS] = 0; @@ -194,390 +185,357 @@ EDGEPT edgepts[] //output is array edgepts[epindex].prev = &edgepts[epindex - 1]; edgepts[epindex].next = &edgepts[0]; prevdir += 64; - epdir = (DIR128) 0 - prevdir; + epdir = (DIR128)0 - prevdir; epdir >>= 4; epdir &= 7; edgepts[epindex].flags[DIR] = epdir; edgepts[0].prev = &edgepts[epindex]; - ASSERT_HOST (pos.x () == c_outline->start_pos ().x () - && pos.y () == c_outline->start_pos ().y ()); + ASSERT_HOST(pos.x() == c_outline->start_pos().x() && + pos.y() == c_outline->start_pos().y()); return &edgepts[0]; } - /********************************************************************** *fix2(start,area) fixes points on the outline according to a trial method* **********************************************************************/ -//#pragma OPT_LEVEL 1 /*stop compiler bugs*/ - -void fix2( //polygonal approx - EDGEPT *start, /*loop to approimate */ - int area) { - EDGEPT *edgept; /*current point */ - EDGEPT *edgept1; - EDGEPT *loopstart; /*modified start of loop */ - EDGEPT *linestart; /*start of line segment */ - int dir1, dir2; /*directions of line */ - int sum1, sum2; /*lengths in dir1,dir2 */ - int stopped; /*completed flag */ - int fixed_count; //no of fixed points +//#pragma OPT_LEVEL 1 /*stop compiler bugs*/ + +void fix2( // polygonal approx + EDGEPT* start, /*loop to approimate */ + int area) { + EDGEPT* edgept; /*current point */ + EDGEPT* edgept1; + EDGEPT* loopstart; /*modified start of loop */ + EDGEPT* linestart; /*start of line segment */ + int dir1, dir2; /*directions of line */ + int sum1, sum2; /*lengths in dir1,dir2 */ + int stopped; /*completed flag */ + int fixed_count; // no of fixed points int d01, d12, d23, gapmin; TPOINT d01vec, d12vec, d23vec; EDGEPT *edgefix, *startfix; EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3; - edgept = start; /*start of loop */ - while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3 - && (dir1 = - (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 - && dir1 != 6) - edgept = edgept->next; /*find suitable start */ - loopstart = edgept; /*remember start */ + edgept = start; /*start of loop */ + while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3 && + (dir1 = (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != + 2 && + dir1 != 6) + edgept = edgept->next; /*find suitable start */ + loopstart = edgept; /*remember start */ stopped = 0; /*not finished yet */ edgept->flags[FLAGS] |= FIXED; /*fix it */ do { - linestart = edgept; /*possible start of line */ - dir1 = edgept->flags[DIR]; /*first direction */ - /*length of dir1 */ + linestart = edgept; /*possible start of line */ + dir1 = edgept->flags[DIR]; /*first direction */ + /*length of dir1 */ sum1 = edgept->flags[RUNLENGTH]; edgept = edgept->next; - dir2 = edgept->flags[DIR]; /*2nd direction */ - /*length in dir2 */ + dir2 = edgept->flags[DIR]; /*2nd direction */ + /*length in dir2 */ sum2 = edgept->flags[RUNLENGTH]; if (((dir1 - dir2 + 1) & 7) < 3) { while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) { - edgept = edgept->next; /*look at next */ - if (edgept->flags[DIR] == dir1) - /*sum lengths */ + edgept = edgept->next; /*look at next */ + if (edgept->flags[DIR] == dir1) /*sum lengths */ sum1 += edgept->flags[RUNLENGTH]; else sum2 += edgept->flags[RUNLENGTH]; } - if (edgept == loopstart) - stopped = 1; /*finished */ - if (sum2 + sum1 > 2 - && linestart->prev->flags[DIR] == dir2 - && (linestart->prev->flags[RUNLENGTH] > - linestart->flags[RUNLENGTH] || sum2 > sum1)) { - /*start is back one */ + if (edgept == loopstart) stopped = 1; /*finished */ + if (sum2 + sum1 > 2 && linestart->prev->flags[DIR] == dir2 && + (linestart->prev->flags[RUNLENGTH] > linestart->flags[RUNLENGTH] || + sum2 > sum1)) { + /*start is back one */ linestart = linestart->prev; linestart->flags[FLAGS] |= FIXED; } - if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3 - || (edgept->flags[DIR] == dir1 && sum1 >= sum2) - || ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH] - || (edgept->flags[DIR] == dir2 && sum2 >= sum1)) - && linestart->next != edgept)) + if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3 || + (edgept->flags[DIR] == dir1 && sum1 >= sum2) || + ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH] || + (edgept->flags[DIR] == dir2 && sum2 >= sum1)) && + linestart->next != edgept)) edgept = edgept->next; } - /*sharp bend */ + /*sharp bend */ edgept->flags[FLAGS] |= FIXED; } - /*do whole loop */ + /*do whole loop */ while (edgept != loopstart && !stopped); edgept = start; do { - if (((edgept->flags[RUNLENGTH] >= 8) && - (edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) || - ((edgept->flags[RUNLENGTH] >= 8) && - ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) { + if (((edgept->flags[RUNLENGTH] >= 8) && (edgept->flags[DIR] != 2) && + (edgept->flags[DIR] != 6)) || + ((edgept->flags[RUNLENGTH] >= 8) && + ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) { edgept->flags[FLAGS] |= FIXED; edgept1 = edgept->next; edgept1->flags[FLAGS] |= FIXED; } edgept = edgept->next; - } - while (edgept != start); + } while (edgept != start); edgept = start; do { - /*single fixed step */ - if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1 - /*and neighours free */ - && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0 - /*same pair of dirs */ - && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR] - && ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) { - /*unfix it */ + /*single fixed step */ + if (edgept->flags[FLAGS] & FIXED && + edgept->flags[RUNLENGTH] == 1 + /*and neighours free */ + && edgept->next->flags[FLAGS] & FIXED && + (edgept->prev->flags[FLAGS] & FIXED) == 0 + /*same pair of dirs */ + && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && + edgept->prev->flags[DIR] == edgept->next->flags[DIR] && + edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR] && + ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) { + /*unfix it */ edgept->flags[FLAGS] &= ~FIXED; edgept->next->flags[FLAGS] &= ~FIXED; } - edgept = edgept->next; /*do all points */ - } - while (edgept != start); /*until finished */ + edgept = edgept->next; /*do all points */ + } while (edgept != start); /*until finished */ stopped = 0; - if (area < 450) - area = 450; + if (area < 450) area = 450; gapmin = area * fixed_dist * fixed_dist / 44000; edgept = start; fixed_count = 0; do { - if (edgept->flags[FLAGS] & FIXED) - fixed_count++; - edgept = edgept->next; - } - while (edgept != start); - while ((edgept->flags[FLAGS] & FIXED) == 0) + if (edgept->flags[FLAGS] & FIXED) fixed_count++; edgept = edgept->next; + } while (edgept != start); + while ((edgept->flags[FLAGS] & FIXED) == 0) edgept = edgept->next; edgefix0 = edgept; edgept = edgept->next; - while ((edgept->flags[FLAGS] & FIXED) == 0) - edgept = edgept->next; + while ((edgept->flags[FLAGS] & FIXED) == 0) edgept = edgept->next; edgefix1 = edgept; edgept = edgept->next; - while ((edgept->flags[FLAGS] & FIXED) == 0) - edgept = edgept->next; + while ((edgept->flags[FLAGS] & FIXED) == 0) edgept = edgept->next; edgefix2 = edgept; edgept = edgept->next; - while ((edgept->flags[FLAGS] & FIXED) == 0) - edgept = edgept->next; + while ((edgept->flags[FLAGS] & FIXED) == 0) edgept = edgept->next; edgefix3 = edgept; startfix = edgefix2; do { - if (fixed_count <= 3) - break; //already too few - point_diff (d12vec, edgefix1->pos, edgefix2->pos); - d12 = LENGTH (d12vec); + if (fixed_count <= 3) break; // already too few + point_diff(d12vec, edgefix1->pos, edgefix2->pos); + d12 = LENGTH(d12vec); // TODO(rays) investigate this change: // Only unfix a point if it is part of a low-curvature section // of outline and the total angle change of the outlines is // less than 90 degrees, ie the scalar product is positive. // if (d12 <= gapmin && SCALAR(edgefix0->vec, edgefix2->vec) > 0) { if (d12 <= gapmin) { - point_diff (d01vec, edgefix0->pos, edgefix1->pos); - d01 = LENGTH (d01vec); - point_diff (d23vec, edgefix2->pos, edgefix3->pos); - d23 = LENGTH (d23vec); + point_diff(d01vec, edgefix0->pos, edgefix1->pos); + d01 = LENGTH(d01vec); + point_diff(d23vec, edgefix2->pos, edgefix3->pos); + d23 = LENGTH(d23vec); if (d01 > d23) { edgefix2->flags[FLAGS] &= ~FIXED; fixed_count--; - } - else { + } else { edgefix1->flags[FLAGS] &= ~FIXED; fixed_count--; edgefix1 = edgefix2; } - } - else { + } else { edgefix0 = edgefix1; edgefix1 = edgefix2; } edgefix2 = edgefix3; edgept = edgept->next; while ((edgept->flags[FLAGS] & FIXED) == 0) { - if (edgept == startfix) - stopped = 1; + if (edgept == startfix) stopped = 1; edgept = edgept->next; } edgefix3 = edgept; edgefix = edgefix2; - } - while ((edgefix != startfix) && (!stopped)); + } while ((edgefix != startfix) && (!stopped)); } - -//#pragma OPT_LEVEL 2 /*stop compiler bugs*/ +//#pragma OPT_LEVEL 2 /*stop compiler bugs*/ /********************************************************************** *poly2(startpt,area,path) applies a second approximation to the outline *using the points which have been fixed by the first approximation* **********************************************************************/ -EDGEPT *poly2( //second poly - EDGEPT *startpt, /*start of loop */ - int area /*area of blob box */ - ) { - EDGEPT *edgept; /*current outline point */ - EDGEPT *loopstart; /*starting point */ - EDGEPT *linestart; /*start of line */ - int edgesum; /*correction count */ +EDGEPT* poly2( // second poly + EDGEPT* startpt, /*start of loop */ + int area /*area of blob box */ +) { + EDGEPT* edgept; /*current outline point */ + EDGEPT* loopstart; /*starting point */ + EDGEPT* linestart; /*start of line */ + int edgesum; /*correction count */ - if (area < 1200) - area = 1200; /*minimum value */ + if (area < 1200) area = 1200; /*minimum value */ - loopstart = nullptr; /*not found it yet */ - edgept = startpt; /*start of loop */ + loopstart = nullptr; /*not found it yet */ + edgept = startpt; /*start of loop */ do { - /*current point fixed */ + /*current point fixed */ if (edgept->flags[FLAGS] & FIXED - /*and next not */ - && (edgept->next->flags[FLAGS] & FIXED) == 0) { - loopstart = edgept; /*start of repoly */ + /*and next not */ + && (edgept->next->flags[FLAGS] & FIXED) == 0) { + loopstart = edgept; /*start of repoly */ break; } - edgept = edgept->next; /*next point */ - } - while (edgept != startpt); /*until found or finished */ + edgept = edgept->next; /*next point */ + } while (edgept != startpt); /*until found or finished */ if (loopstart == nullptr && (startpt->flags[FLAGS] & FIXED) == 0) { - /*fixed start of loop */ + /*fixed start of loop */ startpt->flags[FLAGS] |= FIXED; - loopstart = startpt; /*or start of loop */ + loopstart = startpt; /*or start of loop */ } if (loopstart) { do { - edgept = loopstart; /*first to do */ + edgept = loopstart; /*first to do */ do { linestart = edgept; - edgesum = 0; /*sum of lengths */ + edgesum = 0; /*sum of lengths */ do { - /*sum lengths */ + /*sum lengths */ edgesum += edgept->flags[RUNLENGTH]; edgept = edgept->next; /*move on */ - } - while ((edgept->flags[FLAGS] & FIXED) == 0 - && edgept != loopstart && edgesum < 126); + } while ((edgept->flags[FLAGS] & FIXED) == 0 && edgept != loopstart && + edgesum < 126); if (poly_debug) - tprintf - ("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", - linestart->pos.x, linestart->pos.y, linestart->flags[DIR], - linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x, - edgept->pos.y); - /*reapproximate */ + tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", + linestart->pos.x, linestart->pos.y, linestart->flags[DIR], + linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x, + edgept->pos.y); + /*reapproximate */ cutline(linestart, edgept, area); - while ((edgept->next->flags[FLAGS] & FIXED) - && edgept != loopstart) + while ((edgept->next->flags[FLAGS] & FIXED) && edgept != loopstart) edgept = edgept->next; /*look for next non-fixed */ } - /*do all the loop */ + /*do all the loop */ while (edgept != loopstart); edgesum = 0; do { - if (edgept->flags[FLAGS] & FIXED) - edgesum++; + if (edgept->flags[FLAGS] & FIXED) edgesum++; edgept = edgept->next; } - //count fixed pts + // count fixed pts while (edgept != loopstart); - if (edgesum < 3) - area /= 2; //must have 3 pts - } - while (edgesum < 3); + if (edgesum < 3) area /= 2; // must have 3 pts + } while (edgesum < 3); do { linestart = edgept; do { edgept = edgept->next; - } - while ((edgept->flags[FLAGS] & FIXED) == 0); + } while ((edgept->flags[FLAGS] & FIXED) == 0); linestart->next = edgept; edgept->prev = linestart; linestart->vec.x = edgept->pos.x - linestart->pos.x; linestart->vec.y = edgept->pos.y - linestart->pos.y; - } - while (edgept != loopstart); - } - else - edgept = startpt; /*start of loop */ + } while (edgept != loopstart); + } else + edgept = startpt; /*start of loop */ - loopstart = edgept; /*new start */ - return loopstart; /*correct exit */ + loopstart = edgept; /*new start */ + return loopstart; /*correct exit */ } - /********************************************************************** *cutline(first,last,area) straightens out a line by partitioning *and joining the ends by a straight line* **********************************************************************/ -void cutline( //recursive refine - EDGEPT *first, /*ends of line */ - EDGEPT *last, - int area /*area of object */ - ) { - EDGEPT *edge; /*current edge */ - TPOINT vecsum; /*vector sum */ - int vlen; /*approx length of vecsum */ - TPOINT vec; /*accumulated vector */ - EDGEPT *maxpoint; /*worst point */ - int maxperp; /*max deviation */ - int perp; /*perp distance */ - int ptcount; /*no of points */ - int squaresum; /*sum of perps */ - - edge = first; /*start of line */ - if (edge->next == last) - return; /*simple line */ - - /*vector sum */ +void cutline( // recursive refine + EDGEPT* first, /*ends of line */ + EDGEPT* last, int area /*area of object */ +) { + EDGEPT* edge; /*current edge */ + TPOINT vecsum; /*vector sum */ + int vlen; /*approx length of vecsum */ + TPOINT vec; /*accumulated vector */ + EDGEPT* maxpoint; /*worst point */ + int maxperp; /*max deviation */ + int perp; /*perp distance */ + int ptcount; /*no of points */ + int squaresum; /*sum of perps */ + + edge = first; /*start of line */ + if (edge->next == last) return; /*simple line */ + + /*vector sum */ vecsum.x = last->pos.x - edge->pos.x; vecsum.y = last->pos.y - edge->pos.y; if (vecsum.x == 0 && vecsum.y == 0) { - /*special case */ + /*special case */ vecsum.x = -edge->prev->vec.x; vecsum.y = -edge->prev->vec.y; } - /*absolute value */ + /*absolute value */ vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x; if (vecsum.y > vlen) - vlen = vecsum.y; /*maximum */ + vlen = vecsum.y; /*maximum */ else if (-vecsum.y > vlen) - vlen = -vecsum.y; /*absolute value */ + vlen = -vecsum.y; /*absolute value */ - vec.x = edge->vec.x; /*accumulated vector */ + vec.x = edge->vec.x; /*accumulated vector */ vec.y = edge->vec.y; - maxperp = 0; /*none yet */ + maxperp = 0; /*none yet */ squaresum = ptcount = 0; - edge = edge->next; /*move to actual point */ - maxpoint = edge; /*in case there isn't one */ + edge = edge->next; /*move to actual point */ + maxpoint = edge; /*in case there isn't one */ do { - perp = CROSS (vec, vecsum); /*get perp distance */ + perp = CROSS(vec, vecsum); /*get perp distance */ if (perp != 0) { - perp *= perp; /*squared deviation */ + perp *= perp; /*squared deviation */ } - squaresum += perp; /*sum squares */ - ptcount++; /*count points */ - if (poly_debug) - tprintf ("Cutline:Final perp=%d\n", perp); + squaresum += perp; /*sum squares */ + ptcount++; /*count points */ + if (poly_debug) tprintf("Cutline:Final perp=%d\n", perp); if (perp > maxperp) { maxperp = perp; - maxpoint = edge; /*find greatest deviation */ + maxpoint = edge; /*find greatest deviation */ } - vec.x += edge->vec.x; /*accumulate vectors */ + vec.x += edge->vec.x; /*accumulate vectors */ vec.y += edge->vec.y; edge = edge->next; - } - while (edge != last); /*test all line */ + } while (edge != last); /*test all line */ - perp = LENGTH (vecsum); - ASSERT_HOST (perp != 0); + perp = LENGTH(vecsum); + ASSERT_HOST(perp != 0); if (maxperp < 256 * INT16_MAX) { maxperp <<= 8; - maxperp /= perp; /*true max perp */ - } - else { + maxperp /= perp; /*true max perp */ + } else { maxperp /= perp; - maxperp <<= 8; /*avoid overflow */ + maxperp <<= 8; /*avoid overflow */ } - if (squaresum < 256 * INT16_MAX) - /*mean squared perp */ + if (squaresum < 256 * INT16_MAX) /*mean squared perp */ perp = (squaresum << 8) / (perp * ptcount); else - /*avoid overflow */ + /*avoid overflow */ perp = (squaresum / perp << 8) / ptcount; if (poly_debug) - tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", - area, maxperp / 256.0, maxperp * 200.0 / area, - perp / 256.0, perp * 300.0 / area); + tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area, + maxperp / 256.0, maxperp * 200.0 / area, perp / 256.0, + perp * 300.0 / area); if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) { maxpoint->flags[FLAGS] |= FIXED; - /*partitions */ + /*partitions */ cutline(first, maxpoint, area); cutline(maxpoint, last, area); } diff --git a/src/ccstruct/polyaprx.h b/src/ccstruct/polyaprx.h index 45e3c755af..308c03d96d 100644 --- a/src/ccstruct/polyaprx.h +++ b/src/ccstruct/polyaprx.h @@ -17,28 +17,27 @@ * **********************************************************************/ -#ifndef POLYAPRX_H -#define POLYAPRX_H +#ifndef POLYAPRX_H +#define POLYAPRX_H -#include "blobs.h" -#include "coutln.h" +#include "blobs.h" +#include "coutln.h" // convert a chain-coded input to the old OUTLINE approximation -TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline); -EDGEPT *edgesteps_to_edgepts ( //convert outline -C_OUTLINE * c_outline, //input -EDGEPT edgepts[] //output is array +TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline); +EDGEPT* edgesteps_to_edgepts( // convert outline + C_OUTLINE* c_outline, // input + EDGEPT edgepts[] // output is array +); +void fix2( // polygonal approx + EDGEPT* start, /*loop to approimate */ + int area); +EDGEPT* poly2( // second poly + EDGEPT* startpt, /*start of loop */ + int area /*area of blob box */ +); +void cutline( // recursive refine + EDGEPT* first, /*ends of line */ + EDGEPT* last, int area /*area of object */ ); -void fix2( //polygonal approx - EDGEPT *start, /*loop to approimate */ - int area); -EDGEPT *poly2( //second poly - EDGEPT *startpt, /*start of loop */ - int area /*area of blob box */ - ); -void cutline( //recursive refine - EDGEPT *first, /*ends of line */ - EDGEPT *last, - int area /*area of object */ - ); #endif diff --git a/src/ccstruct/polyblk.cpp b/src/ccstruct/polyblk.cpp index 67021c0c35..505f32524b 100644 --- a/src/ccstruct/polyblk.cpp +++ b/src/ccstruct/polyblk.cpp @@ -32,9 +32,9 @@ #define PBLOCK_LABEL_SIZE 150 #define INTERSECTING INT16_MAX -int lessthan(const void *first, const void *second); +int lessthan(const void* first, const void* second); -POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType t) { +POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST* points, PolyBlockType t) { ICOORDELT_IT v = &vertices; vertices.clear(); @@ -63,35 +63,30 @@ POLY_BLOCK::POLY_BLOCK(const TBOX& box, PolyBlockType t) { * Compute the bounding box from the outline points. */ -void POLY_BLOCK::compute_bb() { //constructor - ICOORD ibl, itr; //integer bb - ICOORD botleft; //bounding box +void POLY_BLOCK::compute_bb() { // constructor + ICOORD ibl, itr; // integer bb + ICOORD botleft; // bounding box ICOORD topright; - ICOORD pos; //current pos; - ICOORDELT_IT pts = &vertices; //iterator + ICOORD pos; // current pos; + ICOORDELT_IT pts = &vertices; // iterator - botleft = *pts.data (); + botleft = *pts.data(); topright = botleft; do { - pos = *pts.data (); - if (pos.x () < botleft.x ()) - //get bounding box - botleft = ICOORD (pos.x (), botleft.y ()); - if (pos.y () < botleft.y ()) - botleft = ICOORD (botleft.x (), pos.y ()); - if (pos.x () > topright.x ()) - topright = ICOORD (pos.x (), topright.y ()); - if (pos.y () > topright.y ()) - topright = ICOORD (topright.x (), pos.y ()); - pts.forward (); - } - while (!pts.at_first ()); - ibl = ICOORD (botleft.x (), botleft.y ()); - itr = ICOORD (topright.x (), topright.y ()); - box = TBOX (ibl, itr); + pos = *pts.data(); + if (pos.x() < botleft.x()) + // get bounding box + botleft = ICOORD(pos.x(), botleft.y()); + if (pos.y() < botleft.y()) botleft = ICOORD(botleft.x(), pos.y()); + if (pos.x() > topright.x()) topright = ICOORD(pos.x(), topright.y()); + if (pos.y() > topright.y()) topright = ICOORD(topright.x(), pos.y()); + pts.forward(); + } while (!pts.at_first()); + ibl = ICOORD(botleft.x(), botleft.y()); + itr = ICOORD(topright.x(), topright.y()); + box = TBOX(ibl, itr); } - /** * @name POLY_BLOCK::winding_number * @@ -99,83 +94,74 @@ void POLY_BLOCK::compute_bb() { //constructor * @param point point to wind around */ -int16_t POLY_BLOCK::winding_number(const ICOORD &point) { - int16_t count; //winding count - ICOORD pt; //current point - ICOORD vec; //point to current point - ICOORD vvec; //current point to next point - int32_t cross; //cross product - ICOORDELT_IT it = &vertices; //iterator +int16_t POLY_BLOCK::winding_number(const ICOORD& point) { + int16_t count; // winding count + ICOORD pt; // current point + ICOORD vec; // point to current point + ICOORD vvec; // current point to next point + int32_t cross; // cross product + ICOORDELT_IT it = &vertices; // iterator count = 0; do { - pt = *it.data (); + pt = *it.data(); vec = pt - point; - vvec = *it.data_relative (1) - pt; - //crossing the line - if (vec.y () <= 0 && vec.y () + vvec.y () > 0) { - cross = vec * vvec; //cross product + vvec = *it.data_relative(1) - pt; + // crossing the line + if (vec.y() <= 0 && vec.y() + vvec.y() > 0) { + cross = vec * vvec; // cross product if (cross > 0) - count++; //crossing right half + count++; // crossing right half else if (cross == 0) - return INTERSECTING; //going through point - } - else if (vec.y () > 0 && vec.y () + vvec.y () <= 0) { + return INTERSECTING; // going through point + } else if (vec.y() > 0 && vec.y() + vvec.y() <= 0) { cross = vec * vvec; if (cross < 0) - count--; //crossing back + count--; // crossing back else if (cross == 0) - return INTERSECTING; //illegal - } - else if (vec.y () == 0 && vec.x () == 0) + return INTERSECTING; // illegal + } else if (vec.y() == 0 && vec.x() == 0) return INTERSECTING; - it.forward (); - } - while (!it.at_first ()); - return count; //winding number + it.forward(); + } while (!it.at_first()); + return count; // winding number } - /// @return true if other is inside this. -bool POLY_BLOCK::contains(POLY_BLOCK *other) { - int16_t count; // winding count - ICOORDELT_IT it = &vertices; // iterator +bool POLY_BLOCK::contains(POLY_BLOCK* other) { + int16_t count; // winding count + ICOORDELT_IT it = &vertices; // iterator ICOORD vertex; - if (!box.overlap (*(other->bounding_box ()))) - return false; // can't be contained + if (!box.overlap(*(other->bounding_box()))) + return false; // can't be contained /* check that no vertex of this is inside other */ do { - vertex = *it.data (); - // get winding number - count = other->winding_number (vertex); + vertex = *it.data(); + // get winding number + count = other->winding_number(vertex); if (count != INTERSECTING) - if (count != 0) - return false; - it.forward (); - } - while (!it.at_first ()); + if (count != 0) return false; + it.forward(); + } while (!it.at_first()); /* check that all vertices of other are inside this */ - //switch lists - it.set_to_list (other->points ()); + // switch lists + it.set_to_list(other->points()); do { - vertex = *it.data (); - //try other way round - count = winding_number (vertex); + vertex = *it.data(); + // try other way round + count = winding_number(vertex); if (count != INTERSECTING) - if (count == 0) - return false; - it.forward (); - } - while (!it.at_first ()); + if (count == 0) return false; + it.forward(); + } while (!it.at_first()); return true; } - /** * @name POLY_BLOCK::rotate * @@ -184,20 +170,19 @@ bool POLY_BLOCK::contains(POLY_BLOCK *other) { */ void POLY_BLOCK::rotate(FCOORD rotation) { - FCOORD pos; //current pos; - ICOORDELT *pt; //current point - ICOORDELT_IT pts = &vertices; //iterator + FCOORD pos; // current pos; + ICOORDELT* pt; // current point + ICOORDELT_IT pts = &vertices; // iterator do { - pt = pts.data (); - pos.set_x (pt->x ()); - pos.set_y (pt->y ()); - pos.rotate (rotation); - pt->set_x ((int16_t) (floor (pos.x () + 0.5))); - pt->set_y ((int16_t) (floor (pos.y () + 0.5))); - pts.forward (); - } - while (!pts.at_first ()); + pt = pts.data(); + pos.set_x(pt->x()); + pos.set_y(pt->y()); + pos.rotate(rotation); + pt->set_x((int16_t)(floor(pos.x() + 0.5))); + pt->set_y((int16_t)(floor(pos.y() + 0.5))); + pts.forward(); + } while (!pts.at_first()); compute_bb(); } @@ -208,19 +193,17 @@ void POLY_BLOCK::rotate(FCOORD rotation) { */ void POLY_BLOCK::reflect_in_y_axis() { - ICOORDELT *pt; // current point + ICOORDELT* pt; // current point ICOORDELT_IT pts = &vertices; // Iterator. do { pt = pts.data(); pt->set_x(-pt->x()); pts.forward(); - } - while (!pts.at_first()); + } while (!pts.at_first()); compute_bb(); } - /** * POLY_BLOCK::move * @@ -229,69 +212,66 @@ void POLY_BLOCK::reflect_in_y_axis() { */ void POLY_BLOCK::move(ICOORD shift) { - ICOORDELT *pt; //current point - ICOORDELT_IT pts = &vertices; //iterator + ICOORDELT* pt; // current point + ICOORDELT_IT pts = &vertices; // iterator do { - pt = pts.data (); + pt = pts.data(); *pt += shift; - pts.forward (); - } - while (!pts.at_first ()); + pts.forward(); + } while (!pts.at_first()); compute_bb(); } - #ifndef GRAPHICS_DISABLED void POLY_BLOCK::plot(ScrollView* window, int32_t num) { ICOORDELT_IT v = &vertices; window->Pen(ColorForPolyBlockType(type)); - v.move_to_first (); + v.move_to_first(); if (num > 0) { window->TextAttributes("Times", 80, false, false, false); char temp_buff[34]; - #if defined(__UNIX__) || defined(MINGW) +#if defined(__UNIX__) || defined(MINGW) snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, num); - #else - ltoa (num, temp_buff, 10); - #endif - window->Text(v.data ()->x (), v.data ()->y (), temp_buff); +#else + ltoa(num, temp_buff, 10); +#endif + window->Text(v.data()->x(), v.data()->y(), temp_buff); } - window->SetCursor(v.data ()->x (), v.data ()->y ()); - for (v.mark_cycle_pt (); !v.cycled_list (); v.forward ()) { - window->DrawTo(v.data ()->x (), v.data ()->y ()); - } - v.move_to_first (); - window->DrawTo(v.data ()->x (), v.data ()->y ()); + window->SetCursor(v.data()->x(), v.data()->y()); + for (v.mark_cycle_pt(); !v.cycled_list(); v.forward()) { + window->DrawTo(v.data()->x(), v.data()->y()); + } + v.move_to_first(); + window->DrawTo(v.data()->x(), v.data()->y()); } - void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) { int16_t y; int16_t width; - PB_LINE_IT *lines; + PB_LINE_IT* lines; ICOORDELT_IT s_it; - lines = new PB_LINE_IT (this); + lines = new PB_LINE_IT(this); window->Pen(colour); - for (y = this->bounding_box ()->bottom (); - y <= this->bounding_box ()->top (); y++) { + for (y = this->bounding_box()->bottom(); y <= this->bounding_box()->top(); + y++) { const std::unique_ptr segments( lines->get_line(y)); - if (!segments->empty ()) { + if (!segments->empty()) { s_it.set_to_list(segments.get()); - for (s_it.mark_cycle_pt (); !s_it.cycled_list (); s_it.forward ()) { + for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) { // Note different use of ICOORDELT, x coord is x coord of pixel // at the start of line segment, y coord is length of line segment // Last pixel is start pixel + length. - width = s_it.data ()->y (); - window->SetCursor(s_it.data ()->x (), y); - window->DrawTo(s_it.data ()->x () + (float) width, y); + width = s_it.data()->y(); + window->SetCursor(s_it.data()->x(), y); + window->DrawTo(s_it.data()->x() + (float)width, y); } } } @@ -300,94 +280,85 @@ void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) { } #endif - /// @return true if the polygons of other and this overlap. -bool POLY_BLOCK::overlap(POLY_BLOCK *other) { - int16_t count; // winding count - ICOORDELT_IT it = &vertices; // iterator +bool POLY_BLOCK::overlap(POLY_BLOCK* other) { + int16_t count; // winding count + ICOORDELT_IT it = &vertices; // iterator ICOORD vertex; if (!box.overlap(*(other->bounding_box()))) - return false; // can't be any overlap. + return false; // can't be any overlap. /* see if a vertex of this is inside other */ do { - vertex = *it.data (); - // get winding number - count = other->winding_number (vertex); + vertex = *it.data(); + // get winding number + count = other->winding_number(vertex); if (count != INTERSECTING) - if (count != 0) - return true; - it.forward (); - } - while (!it.at_first ()); + if (count != 0) return true; + it.forward(); + } while (!it.at_first()); /* see if a vertex of other is inside this */ - // switch lists - it.set_to_list (other->points ()); + // switch lists + it.set_to_list(other->points()); do { vertex = *it.data(); - // try other way round - count = winding_number (vertex); + // try other way round + count = winding_number(vertex); if (count != INTERSECTING) - if (count != 0) - return true; - it.forward (); - } - while (!it.at_first ()); + if (count != 0) return true; + it.forward(); + } while (!it.at_first()); return false; } - -ICOORDELT_LIST *PB_LINE_IT::get_line(int16_t y) { +ICOORDELT_LIST* PB_LINE_IT::get_line(int16_t y) { ICOORDELT_IT v, r; - ICOORDELT_LIST *result; + ICOORDELT_LIST* result; ICOORDELT *x, *current, *previous; float fy, fx; - fy = (float) (y + 0.5); - result = new ICOORDELT_LIST (); - r.set_to_list (result); - v.set_to_list (block->points ()); - - for (v.mark_cycle_pt (); !v.cycled_list (); v.forward ()) { - if (((v.data_relative (-1)->y () > y) && (v.data ()->y () <= y)) - || ((v.data_relative (-1)->y () <= y) && (v.data ()->y () > y))) { - previous = v.data_relative (-1); - current = v.data (); - fx = (float) (0.5 + previous->x () + - (current->x () - previous->x ()) * (fy - - previous->y ()) / - (current->y () - previous->y ())); - x = new ICOORDELT ((int16_t) fx, 0); - r.add_to_end (x); + fy = (float)(y + 0.5); + result = new ICOORDELT_LIST(); + r.set_to_list(result); + v.set_to_list(block->points()); + + for (v.mark_cycle_pt(); !v.cycled_list(); v.forward()) { + if (((v.data_relative(-1)->y() > y) && (v.data()->y() <= y)) || + ((v.data_relative(-1)->y() <= y) && (v.data()->y() > y))) { + previous = v.data_relative(-1); + current = v.data(); + fx = (float)(0.5 + previous->x() + + (current->x() - previous->x()) * (fy - previous->y()) / + (current->y() - previous->y())); + x = new ICOORDELT((int16_t)fx, 0); + r.add_to_end(x); } } - if (!r.empty ()) { - r.sort (lessthan); - for (r.mark_cycle_pt (); !r.cycled_list (); r.forward ()) - x = r.data (); - for (r.mark_cycle_pt (); !r.cycled_list (); r.forward ()) { - r.data ()->set_y (r.data_relative (1)->x () - r.data ()->x ()); - r.forward (); - delete (r.extract ()); + if (!r.empty()) { + r.sort(lessthan); + for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) x = r.data(); + for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) { + r.data()->set_y(r.data_relative(1)->x() - r.data()->x()); + r.forward(); + delete (r.extract()); } } return result; } +int lessthan(const void* first, const void* second) { + ICOORDELT* p1 = (*(ICOORDELT**)first); + ICOORDELT* p2 = (*(ICOORDELT**)second); -int lessthan(const void *first, const void *second) { - ICOORDELT *p1 = (*(ICOORDELT **) first); - ICOORDELT *p2 = (*(ICOORDELT **) second); - - if (p1->x () < p2->x ()) + if (p1->x() < p2->x()) return (-1); - else if (p1->x () > p2->x ()) + else if (p1->x() > p2->x()) return (1); else return (0); @@ -398,21 +369,23 @@ int lessthan(const void *first, const void *second) { ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) { // Keep kPBColors in sync with PolyBlockType. const ScrollView::Color kPBColors[PT_COUNT] = { - ScrollView::WHITE, // Type is not yet known. Keep as the 1st element. - ScrollView::BLUE, // Text that lives inside a column. - ScrollView::CYAN, // Text that spans more than one column. - ScrollView::MEDIUM_BLUE, // Text that is in a cross-column pull-out region. - ScrollView::AQUAMARINE, // Partition belonging to an equation region. - ScrollView::SKY_BLUE, // Partition belonging to an inline equation region. - ScrollView::MAGENTA, // Partition belonging to a table region. - ScrollView::GREEN, // Text-line runs vertically. - ScrollView::LIGHT_BLUE, // Text that belongs to an image. - ScrollView::RED, // Image that lives inside a column. - ScrollView::YELLOW, // Image that spans more than one column. - ScrollView::ORANGE, // Image in a cross-column pull-out region. - ScrollView::BROWN, // Horizontal Line. - ScrollView::DARK_GREEN, // Vertical Line. - ScrollView::GREY // Lies outside of any column. + ScrollView::WHITE, // Type is not yet known. Keep as the 1st element. + ScrollView::BLUE, // Text that lives inside a column. + ScrollView::CYAN, // Text that spans more than one column. + ScrollView::MEDIUM_BLUE, // Text that is in a cross-column pull-out + // region. + ScrollView::AQUAMARINE, // Partition belonging to an equation region. + ScrollView::SKY_BLUE, // Partition belonging to an inline equation + // region. + ScrollView::MAGENTA, // Partition belonging to a table region. + ScrollView::GREEN, // Text-line runs vertically. + ScrollView::LIGHT_BLUE, // Text that belongs to an image. + ScrollView::RED, // Image that lives inside a column. + ScrollView::YELLOW, // Image that spans more than one column. + ScrollView::ORANGE, // Image in a cross-column pull-out region. + ScrollView::BROWN, // Horizontal Line. + ScrollView::DARK_GREEN, // Vertical Line. + ScrollView::GREY // Lies outside of any column. }; if (type >= 0 && type < PT_COUNT) { return kPBColors[type]; diff --git a/src/ccstruct/polyblk.h b/src/ccstruct/polyblk.h index 598f4b0046..a26a395139 100644 --- a/src/ccstruct/polyblk.h +++ b/src/ccstruct/polyblk.h @@ -16,12 +16,12 @@ ** limitations under the License. * **********************************************************************/ -#ifndef POLYBLK_H -#define POLYBLK_H +#ifndef POLYBLK_H +#define POLYBLK_H -#include "publictypes.h" #include "elst.h" #include "points.h" +#include "publictypes.h" #include "rect.h" #include "scrollview.h" @@ -30,26 +30,22 @@ class DLLSYM POLY_BLOCK { POLY_BLOCK() = default; // Initialize from box coordinates. POLY_BLOCK(const TBOX& box, PolyBlockType type); - POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType type); - ~POLY_BLOCK () = default; + POLY_BLOCK(ICOORDELT_LIST* points, PolyBlockType type); + ~POLY_BLOCK() = default; - TBOX *bounding_box() { // access function + TBOX* bounding_box() { // access function return &box; } - ICOORDELT_LIST *points() { // access function + ICOORDELT_LIST* points() { // access function return &vertices; } void compute_bb(); - PolyBlockType isA() const { - return type; - } + PolyBlockType isA() const { return type; } - bool IsText() const { - return PTIsTextType(type); - } + bool IsText() const { return PTIsTextType(type); } // Rotate about the origin by the given rotation. (Analogous to // multiplying by a complex number. @@ -61,51 +57,47 @@ class DLLSYM POLY_BLOCK { void plot(ScrollView* window, int32_t num); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED void fill(ScrollView* window, ScrollView::Color colour); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED // Returns true if other is inside this. - bool contains(POLY_BLOCK *other); + bool contains(POLY_BLOCK* other); // Returns true if the polygons of other and this overlap. - bool overlap(POLY_BLOCK *other); + bool overlap(POLY_BLOCK* other); // Returns the winding number of this around the test_pt. // Positive for anticlockwise, negative for clockwise, and zero for // test_pt outside this. - int16_t winding_number(const ICOORD &test_pt); + int16_t winding_number(const ICOORD& test_pt); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED // Static utility functions to handle the PolyBlockType. // Returns a color to draw the given type. static ScrollView::Color ColorForPolyBlockType(PolyBlockType type); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED private: - ICOORDELT_LIST vertices; // vertices - TBOX box; // bounding box - PolyBlockType type; // Type of this region. + ICOORDELT_LIST vertices; // vertices + TBOX box; // bounding box + PolyBlockType type; // Type of this region. }; // Class to iterate the scanlines of a polygon. class DLLSYM PB_LINE_IT { public: - PB_LINE_IT(POLY_BLOCK *blkptr) { - block = blkptr; - } + PB_LINE_IT(POLY_BLOCK* blkptr) { block = blkptr; } - void set_to_block(POLY_BLOCK * blkptr) { - block = blkptr; - } + void set_to_block(POLY_BLOCK* blkptr) { block = blkptr; } // Returns a list of runs of pixels for the given y coord. // Each element of the returned list is the start (x) and extent(y) of // a run inside the region. // Delete the returned list after use. - ICOORDELT_LIST *get_line(int16_t y); + ICOORDELT_LIST* get_line(int16_t y); private: - POLY_BLOCK * block; + POLY_BLOCK* block; }; #endif diff --git a/src/ccstruct/publictypes.cpp b/src/ccstruct/publictypes.cpp index 47a1800944..25d9a97c9b 100644 --- a/src/ccstruct/publictypes.cpp +++ b/src/ccstruct/publictypes.cpp @@ -21,20 +21,20 @@ /** String name for each block type. Keep in sync with PolyBlockType. */ const char* kPolyBlockNames[] = { - "Unknown", - "Flowing Text", - "Heading Text", - "Pullout Text", - "Equation", - "Inline Equation", - "Table", - "Vertical Text", - "Caption Text", - "Flowing Image", - "Heading Image", - "Pullout Image", - "Horizontal Line", - "Vertical Line", - "Noise", - "" // End marker for testing that sizes match. + "Unknown", + "Flowing Text", + "Heading Text", + "Pullout Text", + "Equation", + "Inline Equation", + "Table", + "Vertical Text", + "Caption Text", + "Flowing Image", + "Heading Image", + "Pullout Image", + "Horizontal Line", + "Vertical Line", + "Noise", + "" // End marker for testing that sizes match. }; diff --git a/src/ccstruct/publictypes.h b/src/ccstruct/publictypes.h index 060424219d..3db652cac0 100644 --- a/src/ccstruct/publictypes.h +++ b/src/ccstruct/publictypes.h @@ -49,23 +49,23 @@ constexpr int kResolutionEstimationFactor = 10; * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions * below, as well as kPolyBlockNames in publictypes.cpp. * Used extensively by ColPartition, and POLY_BLOCK. -*/ + */ enum PolyBlockType { - PT_UNKNOWN, // Type is not yet known. Keep as the first element. - PT_FLOWING_TEXT, // Text that lives inside a column. - PT_HEADING_TEXT, // Text that spans more than one column. - PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. - PT_EQUATION, // Partition belonging to an equation region. + PT_UNKNOWN, // Type is not yet known. Keep as the first element. + PT_FLOWING_TEXT, // Text that lives inside a column. + PT_HEADING_TEXT, // Text that spans more than one column. + PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. + PT_EQUATION, // Partition belonging to an equation region. PT_INLINE_EQUATION, // Partition has inline equation. - PT_TABLE, // Partition belonging to a table region. - PT_VERTICAL_TEXT, // Text-line runs vertically. - PT_CAPTION_TEXT, // Text that belongs to an image. - PT_FLOWING_IMAGE, // Image that lives inside a column. - PT_HEADING_IMAGE, // Image that spans more than one column. - PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. - PT_HORZ_LINE, // Horizontal Line. - PT_VERT_LINE, // Vertical Line. - PT_NOISE, // Lies outside of any column. + PT_TABLE, // Partition belonging to a table region. + PT_VERTICAL_TEXT, // Text-line runs vertically. + PT_CAPTION_TEXT, // Text that belongs to an image. + PT_FLOWING_IMAGE, // Image that lives inside a column. + PT_HEADING_IMAGE, // Image that spans more than one column. + PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. + PT_HORZ_LINE, // Horizontal Line. + PT_VERT_LINE, // Vertical Line. + PT_NOISE, // Lies outside of any column. PT_COUNT }; @@ -131,7 +131,7 @@ enum Orientation { * * For English text, the writing direction is left-to-right. For the * Chinese text in the above example, the writing direction is top-to-bottom. -*/ + */ enum WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0, WRITING_DIRECTION_RIGHT_TO_LEFT = 1, @@ -148,7 +148,7 @@ enum WritingDirection { * * Note that only some combinations make sense. For example, * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM -*/ + */ enum TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, @@ -159,7 +159,7 @@ enum TextlineOrder { * Possible modes for page layout analysis. These *must* be kept in order * of decreasing amount of layout analysis to be done, except for OSD_ONLY, * so that the inequality test macros below work. -*/ + */ enum PageSegMode { PSM_OSD_ONLY, ///< Orientation and script detection only. PSM_AUTO_OSD, ///< Automatic page segmentation with orientation and @@ -169,17 +169,17 @@ enum PageSegMode { PSM_SINGLE_COLUMN, ///< Assume a single column of text of variable sizes. PSM_SINGLE_BLOCK_VERT_TEXT, ///< Assume a single uniform block of vertically ///< aligned text. - PSM_SINGLE_BLOCK, ///< Assume a single uniform block of text. (Default.) - PSM_SINGLE_LINE, ///< Treat the image as a single text line. - PSM_SINGLE_WORD, ///< Treat the image as a single word. - PSM_CIRCLE_WORD, ///< Treat the image as a single word in a circle. - PSM_SINGLE_CHAR, ///< Treat the image as a single character. - PSM_SPARSE_TEXT, ///< Find as much text as possible in no particular order. + PSM_SINGLE_BLOCK, ///< Assume a single uniform block of text. (Default.) + PSM_SINGLE_LINE, ///< Treat the image as a single text line. + PSM_SINGLE_WORD, ///< Treat the image as a single word. + PSM_CIRCLE_WORD, ///< Treat the image as a single word in a circle. + PSM_SINGLE_CHAR, ///< Treat the image as a single character. + PSM_SPARSE_TEXT, ///< Find as much text as possible in no particular order. PSM_SPARSE_TEXT_OSD, ///< Sparse text with orientation and script det. - PSM_RAW_LINE, ///< Treat the image as a single text line, bypassing - ///< hacks that are Tesseract-specific. + PSM_RAW_LINE, ///< Treat the image as a single text line, bypassing + ///< hacks that are Tesseract-specific. - PSM_COUNT ///< Number of enum entries. + PSM_COUNT ///< Number of enum entries. }; /** @@ -187,7 +187,7 @@ enum PageSegMode { * layout analysis are enabled. * *Depend critically on the order of elements of PageSegMode.* * NOTE that arg is an int for compatibility with INT_PARAM. -*/ + */ inline bool PSM_OSD_ENABLED(int pageseg_mode) { return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD; } @@ -208,14 +208,14 @@ inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) { } inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) { return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) || - pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD; + pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD; } /** * enum of the elements of the page hierarchy, used in ResultIterator * to provide functions that operate on each level without having to * have 5x as many functions. -*/ + */ enum PageIteratorLevel { RIL_BLOCK, // Block of text/image/separator line. RIL_PARA, // Paragraph within a block. @@ -264,7 +264,7 @@ enum ParagraphJustification { * appropriate changes to all the enums mirroring it (e.g. OCREngine in * cityblock/workflow/detection/detection_storage.proto). Such enums will * mention the connection to OcrEngineMode in the comments. -*/ + */ enum OcrEngineMode { OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated OEM_LSTM_ONLY, // Run just the LSTM line recognizer. @@ -278,7 +278,7 @@ enum OcrEngineMode { // command-line configs, or if not specified // in any of the above should be set to the // default OEM_TESSERACT_ONLY. - OEM_COUNT // Number of OEMs + OEM_COUNT // Number of OEMs }; } // namespace tesseract. diff --git a/src/ccstruct/quadlsq.cpp b/src/ccstruct/quadlsq.cpp index 5b9d632fa8..276a4230f0 100644 --- a/src/ccstruct/quadlsq.cpp +++ b/src/ccstruct/quadlsq.cpp @@ -17,9 +17,9 @@ * **********************************************************************/ -#include -#include #include "quadlsq.h" +#include +#include #include "tprintf.h" // Minimum variance in least squares before backing off to a lower degree. @@ -35,8 +35,8 @@ void QLSQ::clear() { // initialize a = 0.0; b = 0.0; c = 0.0; - n = 0; // No elements. - sigx = 0.0; // Zero accumulators. + n = 0; // No elements. + sigx = 0.0; // Zero accumulators. sigy = 0.0; sigxx = 0.0; sigxy = 0.0; @@ -46,7 +46,6 @@ void QLSQ::clear() { // initialize sigxxxx = 0.0; } - /********************************************************************** * QLSQ::add * @@ -54,8 +53,8 @@ void QLSQ::clear() { // initialize **********************************************************************/ void QLSQ::add(double x, double y) { - n++; // Count elements. - sigx += x; // Update accumulators. + n++; // Count elements. + sigx += x; // Update accumulators. sigy += y; sigxx += x * x; sigxy += x * y; @@ -65,7 +64,6 @@ void QLSQ::add(double x, double y) { sigxxxx += static_cast(x) * x * x * x; } - /********************************************************************** * QLSQ::remove * @@ -77,8 +75,8 @@ void QLSQ::remove(double x, double y) { tprintf("Can't remove an element from an empty QLSQ accumulator!\n"); return; } - n--; // Count elements. - sigx -= x; // Update accumulators. + n--; // Count elements. + sigx -= x; // Update accumulators. sigy -= y; sigxx -= x * x; sigxy -= x * y; @@ -88,7 +86,6 @@ void QLSQ::remove(double x, double y) { sigxxxx -= static_cast(x) * x * x * x; } - /********************************************************************** * QLSQ::fit * @@ -99,7 +96,7 @@ void QLSQ::remove(double x, double y) { void QLSQ::fit(int degree) { long double x_variance = static_cast(sigxx) * n - - static_cast(sigx) * sigx; + static_cast(sigx) * sigx; // Note: for computational efficiency, we do not normalize the variance, // covariance and cube variance here as they are in the same order in both @@ -115,19 +112,19 @@ void QLSQ::fit(int degree) { } return; } - long double top96 = 0.0; // Accurate top. - long double bottom96 = 0.0; // Accurate bottom. + long double top96 = 0.0; // Accurate top. + long double bottom96 = 0.0; // Accurate bottom. long double cubevar = sigxxx * n - static_cast(sigxx) * sigx; long double covariance = static_cast(sigxy) * n - - static_cast(sigx) * sigy; + static_cast(sigx) * sigy; if (n >= 4 && degree >= 2) { top96 = cubevar * covariance; top96 += x_variance * (static_cast(sigxx) * sigy - sigxxy * n); bottom96 = cubevar * cubevar; - bottom96 -= x_variance * - (sigxxxx * n - static_cast(sigxx) * sigxx); + bottom96 -= + x_variance * (sigxxxx * n - static_cast(sigxx) * sigxx); } if (bottom96 >= kMinVariance * n * n * n * n) { // Denominators looking good diff --git a/src/ccstruct/quadlsq.h b/src/ccstruct/quadlsq.h index 1d4e70e8e4..d2c721c4e7 100644 --- a/src/ccstruct/quadlsq.h +++ b/src/ccstruct/quadlsq.h @@ -17,51 +17,50 @@ * **********************************************************************/ -#ifndef QUADLSQ_H -#define QUADLSQ_H +#ifndef QUADLSQ_H +#define QUADLSQ_H -#include "points.h" +#include "points.h" -class QLSQ -{ - public: - QLSQ() { //constructor - clear(); //set to zeros - } - void clear(); //initialize +class QLSQ { + public: + QLSQ() { // constructor + clear(); // set to zeros + } + void clear(); // initialize - void add( //add element - double x, //coords to add - double y); - void remove( //delete element - double x, //coords to delete - double y); - int32_t count() { //no of elements - return n; - } + void add( // add element + double x, // coords to add + double y); + void remove( // delete element + double x, // coords to delete + double y); + int32_t count() { // no of elements + return n; + } - void fit( //fit the given - int degree); //return actual - double get_a() { //get x squard - return a; - } - double get_b() { //get x squard - return b; - } - double get_c() { //get x squard - return c; - } + void fit( // fit the given + int degree); // return actual + double get_a() { // get x squard + return a; + } + double get_b() { // get x squard + return b; + } + double get_c() { // get x squard + return c; + } - private: - int32_t n; //no of elements - double a, b, c; //result - double sigx; //sum of x - double sigy; //sum of y - double sigxx; //sum x squared - double sigxy; //sum of xy - double sigyy; //sum y squared - long double sigxxx; //sum x cubed - long double sigxxy; //sum xsquared y - long double sigxxxx; //sum x fourth + private: + int32_t n; // no of elements + double a, b, c; // result + double sigx; // sum of x + double sigy; // sum of y + double sigxx; // sum x squared + double sigxy; // sum of xy + double sigyy; // sum y squared + long double sigxxx; // sum x cubed + long double sigxxy; // sum xsquared y + long double sigxxxx; // sum x fourth }; #endif diff --git a/src/ccstruct/quadratc.h b/src/ccstruct/quadratc.h index ec79b0fd1c..714c4d3321 100644 --- a/src/ccstruct/quadratc.h +++ b/src/ccstruct/quadratc.h @@ -17,46 +17,44 @@ * **********************************************************************/ -#ifndef QUADRATC_H -#define QUADRATC_H +#ifndef QUADRATC_H +#define QUADRATC_H -#include "points.h" +#include "points.h" -class QUAD_COEFFS -{ - public: - QUAD_COEFFS() = default; - QUAD_COEFFS( //constructor - double xsq, //coefficients - float x, - float constant) { - a = xsq; - b = x; - c = constant; - } +class QUAD_COEFFS { + public: + QUAD_COEFFS() = default; + QUAD_COEFFS( // constructor + double xsq, // coefficients + float x, float constant) { + a = xsq; + b = x; + c = constant; + } - float y( //evaluate - float x) const { //at x - return (float) ((a * x + b) * x + c); - } + float y( // evaluate + float x) const { // at x + return (float)((a * x + b) * x + c); + } - void move( // reposition word - ICOORD vec) { // by vector - /************************************************************ - y - q = a (x - p)^2 + b (x - p) + c - y - q = ax^2 - 2apx + ap^2 + bx - bp + c - y = ax^2 + (b - 2ap)x + (c - bp + ap^2 + q) - ************************************************************/ - int16_t p = vec.x (); - int16_t q = vec.y (); + void move( // reposition word + ICOORD vec) { // by vector + /************************************************************ + y - q = a (x - p)^2 + b (x - p) + c + y - q = ax^2 - 2apx + ap^2 + bx - bp + c + y = ax^2 + (b - 2ap)x + (c - bp + ap^2 + q) + ************************************************************/ + int16_t p = vec.x(); + int16_t q = vec.y(); - c = (float) (c - b * p + a * p * p + q); - b = (float) (b - 2 * a * p); - } + c = (float)(c - b * p + a * p * p + q); + b = (float)(b - 2 * a * p); + } - double a; //x squared - float b; //x - float c; //constant - private: + double a; // x squared + float b; // x + float c; // constant + private: }; #endif diff --git a/src/ccstruct/quspline.cpp b/src/ccstruct/quspline.cpp index 20e7db9f1b..370aabd40d 100644 --- a/src/ccstruct/quspline.cpp +++ b/src/ccstruct/quspline.cpp @@ -17,17 +17,17 @@ * **********************************************************************/ +#include "quspline.h" #include "allheaders.h" #include "memry.h" #include "quadlsq.h" -#include "quspline.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif -#define QSPLINE_PRECISION 16 //no of steps to draw +#define QSPLINE_PRECISION 16 // no of steps to draw /********************************************************************** * QSPLINE::QSPLINE @@ -35,120 +35,116 @@ * Constructor to build a QSPLINE given the components used in the old code. **********************************************************************/ -QSPLINE::QSPLINE( //constructor - int32_t count, //no of segments - int32_t *xstarts, //start coords - double *coeffs //coefficients - ) { - int32_t index; //segment index +QSPLINE::QSPLINE( // constructor + int32_t count, // no of segments + int32_t* xstarts, // start coords + double* coeffs // coefficients +) { + int32_t index; // segment index - //get memory - xcoords = (int32_t *) alloc_mem ((count + 1) * sizeof (int32_t)); - quadratics = (QUAD_COEFFS *) alloc_mem (count * sizeof (QUAD_COEFFS)); + // get memory + xcoords = (int32_t*)alloc_mem((count + 1) * sizeof(int32_t)); + quadratics = (QUAD_COEFFS*)alloc_mem(count * sizeof(QUAD_COEFFS)); segments = count; for (index = 0; index < segments; index++) { - //copy them + // copy them xcoords[index] = xstarts[index]; - quadratics[index] = QUAD_COEFFS (coeffs[index * 3], - coeffs[index * 3 + 1], - coeffs[index * 3 + 2]); + quadratics[index] = QUAD_COEFFS(coeffs[index * 3], coeffs[index * 3 + 1], + coeffs[index * 3 + 2]); } - //right edge + // right edge xcoords[index] = xstarts[index]; } - /********************************************************************** * QSPLINE::QSPLINE * * Constructor to build a QSPLINE by appproximation of points. **********************************************************************/ -QSPLINE::QSPLINE ( //constructor -int xstarts[], //spline boundaries -int segcount, //no of segments -int xpts[], //points to fit -int ypts[], int pointcount, //no of pts -int degree //fit required +QSPLINE::QSPLINE( // constructor + int xstarts[], // spline boundaries + int segcount, // no of segments + int xpts[], // points to fit + int ypts[], + int pointcount, // no of pts + int degree // fit required ) { - int pointindex; /*no along text line */ - int segment; /*segment no */ - int32_t *ptcounts; //no in each segment - QLSQ qlsq; /*accumulator */ + int pointindex; /*no along text line */ + int segment; /*segment no */ + int32_t* ptcounts; // no in each segment + QLSQ qlsq; /*accumulator */ segments = segcount; - xcoords = (int32_t *) alloc_mem ((segcount + 1) * sizeof (int32_t)); - ptcounts = (int32_t *) alloc_mem ((segcount + 1) * sizeof (int32_t)); - quadratics = (QUAD_COEFFS *) alloc_mem (segcount * sizeof (QUAD_COEFFS)); - memmove (xcoords, xstarts, (segcount + 1) * sizeof (int32_t)); - ptcounts[0] = 0; /*none in any yet */ + xcoords = (int32_t*)alloc_mem((segcount + 1) * sizeof(int32_t)); + ptcounts = (int32_t*)alloc_mem((segcount + 1) * sizeof(int32_t)); + quadratics = (QUAD_COEFFS*)alloc_mem(segcount * sizeof(QUAD_COEFFS)); + memmove(xcoords, xstarts, (segcount + 1) * sizeof(int32_t)); + ptcounts[0] = 0; /*none in any yet */ for (segment = 0, pointindex = 0; pointindex < pointcount; pointindex++) { while (segment < segcount && xpts[pointindex] >= xstarts[segment]) { - segment++; /*try next segment */ - /*cumulative counts */ + segment++; /*try next segment */ + /*cumulative counts */ ptcounts[segment] = ptcounts[segment - 1]; } - ptcounts[segment]++; /*no in previous partition */ + ptcounts[segment]++; /*no in previous partition */ } while (segment < segcount) { segment++; - /*zero the rest */ + /*zero the rest */ ptcounts[segment] = ptcounts[segment - 1]; } for (segment = 0; segment < segcount; segment++) { - qlsq.clear (); - /*first blob */ + qlsq.clear(); + /*first blob */ pointindex = ptcounts[segment]; - if (pointindex > 0 - && xpts[pointindex] != xpts[pointindex - 1] - && xpts[pointindex] != xstarts[segment]) - qlsq.add (xstarts[segment], - ypts[pointindex - 1] - + (ypts[pointindex] - ypts[pointindex - 1]) - * (xstarts[segment] - xpts[pointindex - 1]) - / (xpts[pointindex] - xpts[pointindex - 1])); + if (pointindex > 0 && xpts[pointindex] != xpts[pointindex - 1] && + xpts[pointindex] != xstarts[segment]) + qlsq.add( + xstarts[segment], + ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) * + (xstarts[segment] - xpts[pointindex - 1]) / + (xpts[pointindex] - xpts[pointindex - 1])); for (; pointindex < ptcounts[segment + 1]; pointindex++) { - qlsq.add (xpts[pointindex], ypts[pointindex]); + qlsq.add(xpts[pointindex], ypts[pointindex]); } - if (pointindex > 0 && pointindex < pointcount - && xpts[pointindex] != xstarts[segment + 1]) - qlsq.add (xstarts[segment + 1], - ypts[pointindex - 1] - + (ypts[pointindex] - ypts[pointindex - 1]) - * (xstarts[segment + 1] - xpts[pointindex - 1]) - / (xpts[pointindex] - xpts[pointindex - 1])); - qlsq.fit (degree); - quadratics[segment].a = qlsq.get_a (); - quadratics[segment].b = qlsq.get_b (); - quadratics[segment].c = qlsq.get_c (); + if (pointindex > 0 && pointindex < pointcount && + xpts[pointindex] != xstarts[segment + 1]) + qlsq.add(xstarts[segment + 1], + ypts[pointindex - 1] + + (ypts[pointindex] - ypts[pointindex - 1]) * + (xstarts[segment + 1] - xpts[pointindex - 1]) / + (xpts[pointindex] - xpts[pointindex - 1])); + qlsq.fit(degree); + quadratics[segment].a = qlsq.get_a(); + quadratics[segment].b = qlsq.get_b(); + quadratics[segment].c = qlsq.get_c(); } free_mem(ptcounts); } - /********************************************************************** * QSPLINE::QSPLINE * * Constructor to build a QSPLINE from another. **********************************************************************/ -QSPLINE::QSPLINE( //constructor - const QSPLINE &src) { +QSPLINE::QSPLINE( // constructor + const QSPLINE& src) { segments = 0; xcoords = nullptr; quadratics = nullptr; *this = src; } - /********************************************************************** * QSPLINE::~QSPLINE * * Destroy a QSPLINE. **********************************************************************/ -QSPLINE::~QSPLINE ( //constructor +QSPLINE::~QSPLINE( // constructor ) { if (xcoords != nullptr) { free_mem(xcoords); @@ -160,116 +156,107 @@ QSPLINE::~QSPLINE ( //constructor } } - /********************************************************************** * QSPLINE::operator= * * Copy a QSPLINE **********************************************************************/ -QSPLINE & QSPLINE::operator= ( //assignment -const QSPLINE & source) { - if (xcoords != nullptr) - free_mem(xcoords); - if (quadratics != nullptr) - free_mem(quadratics); +QSPLINE& QSPLINE::operator=( // assignment + const QSPLINE& source) { + if (xcoords != nullptr) free_mem(xcoords); + if (quadratics != nullptr) free_mem(quadratics); segments = source.segments; - xcoords = (int32_t *) alloc_mem ((segments + 1) * sizeof (int32_t)); - quadratics = (QUAD_COEFFS *) alloc_mem (segments * sizeof (QUAD_COEFFS)); - memmove (xcoords, source.xcoords, (segments + 1) * sizeof (int32_t)); - memmove (quadratics, source.quadratics, segments * sizeof (QUAD_COEFFS)); + xcoords = (int32_t*)alloc_mem((segments + 1) * sizeof(int32_t)); + quadratics = (QUAD_COEFFS*)alloc_mem(segments * sizeof(QUAD_COEFFS)); + memmove(xcoords, source.xcoords, (segments + 1) * sizeof(int32_t)); + memmove(quadratics, source.quadratics, segments * sizeof(QUAD_COEFFS)); return *this; } - /********************************************************************** * QSPLINE::step * * Return the total of the step functions between the given coords. **********************************************************************/ -double QSPLINE::step( //find step functions - double x1, //between coords - double x2) { - int index1, index2; //indices of coords - double total; /*total steps */ +double QSPLINE::step( // find step functions + double x1, // between coords + double x2) { + int index1, index2; // indices of coords + double total; /*total steps */ - index1 = spline_index (x1); - index2 = spline_index (x2); + index1 = spline_index(x1); + index2 = spline_index(x2); total = 0; while (index1 < index2) { - total += - (double) quadratics[index1 + 1].y ((float) xcoords[index1 + 1]); - total -= (double) quadratics[index1].y ((float) xcoords[index1 + 1]); - index1++; /*next segment */ + total += (double)quadratics[index1 + 1].y((float)xcoords[index1 + 1]); + total -= (double)quadratics[index1].y((float)xcoords[index1 + 1]); + index1++; /*next segment */ } - return total; /*total steps */ + return total; /*total steps */ } - /********************************************************************** * QSPLINE::y * * Return the y value at the given x value. **********************************************************************/ -double QSPLINE::y( //evaluate - double x //coord to evaluate at - ) const { - int32_t index; //segment index +double QSPLINE::y( // evaluate + double x // coord to evaluate at + ) const { + int32_t index; // segment index - index = spline_index (x); - return quadratics[index].y (x);//in correct segment + index = spline_index(x); + return quadratics[index].y(x); // in correct segment } - /********************************************************************** * QSPLINE::spline_index * * Return the index to the largest xcoord not greater than x. **********************************************************************/ -int32_t QSPLINE::spline_index( //evaluate - double x //coord to evaluate at - ) const { - int32_t index; //segment index - int32_t bottom; //bottom of range - int32_t top; //top of range +int32_t QSPLINE::spline_index( // evaluate + double x // coord to evaluate at + ) const { + int32_t index; // segment index + int32_t bottom; // bottom of range + int32_t top; // top of range bottom = 0; top = segments; while (top - bottom > 1) { - index = (top + bottom) / 2; //centre of range + index = (top + bottom) / 2; // centre of range if (x >= xcoords[index]) - bottom = index; //new min + bottom = index; // new min else - top = index; //new max + top = index; // new max } return bottom; } - /********************************************************************** * QSPLINE::move * * Reposition spline by vector **********************************************************************/ -void QSPLINE::move( // reposition spline - ICOORD vec // by vector - ) { - int32_t segment; //index of segment - int16_t x_shift = vec.x (); +void QSPLINE::move( // reposition spline + ICOORD vec // by vector +) { + int32_t segment; // index of segment + int16_t x_shift = vec.x(); for (segment = 0; segment < segments; segment++) { xcoords[segment] += x_shift; - quadratics[segment].move (vec); + quadratics[segment].move(vec); } xcoords[segment] += x_shift; } - /********************************************************************** * QSPLINE::overlap * @@ -277,18 +264,20 @@ void QSPLINE::move( // reposition spline * than the bounds of this. **********************************************************************/ -bool QSPLINE::overlap( //test overlap - QSPLINE* spline2, //2 cannot be smaller - double fraction //by more than this +bool QSPLINE::overlap( // test overlap + QSPLINE* spline2, // 2 cannot be smaller + double fraction // by more than this ) { int leftlimit = xcoords[1]; /*common left limit */ int rightlimit = xcoords[segments - 1]; /*common right limit */ - /*or too non-overlap */ - return !(spline2->segments < 3 || spline2->xcoords[1] > leftlimit + fraction * (rightlimit - leftlimit) || - spline2->xcoords[spline2->segments - 1] < rightlimit - fraction * (rightlimit - leftlimit)); + /*or too non-overlap */ + return !(spline2->segments < 3 || + spline2->xcoords[1] > + leftlimit + fraction * (rightlimit - leftlimit) || + spline2->xcoords[spline2->segments - 1] < + rightlimit - fraction * (rightlimit - leftlimit)); } - /********************************************************************** * extrapolate_spline * @@ -296,33 +285,29 @@ bool QSPLINE::overlap( //test overlap * quadratic has at either end. **********************************************************************/ -void QSPLINE::extrapolate( //linear extrapolation - double gradient, //gradient to use - int xmin, //new left edge - int xmax //new right edge - ) { - int segment; /*current segment of spline */ - int dest_segment; //dest index - int *xstarts; //new boundaries - QUAD_COEFFS *quads; //new ones - int increment; //in size +void QSPLINE::extrapolate( // linear extrapolation + double gradient, // gradient to use + int xmin, // new left edge + int xmax // new right edge +) { + int segment; /*current segment of spline */ + int dest_segment; // dest index + int* xstarts; // new boundaries + QUAD_COEFFS* quads; // new ones + int increment; // in size increment = xmin < xcoords[0] ? 1 : 0; - if (xmax > xcoords[segments]) - increment++; - if (increment == 0) - return; - xstarts = (int *) alloc_mem ((segments + 1 + increment) * sizeof (int)); - quads = - (QUAD_COEFFS *) alloc_mem ((segments + increment) * sizeof (QUAD_COEFFS)); + if (xmax > xcoords[segments]) increment++; + if (increment == 0) return; + xstarts = (int*)alloc_mem((segments + 1 + increment) * sizeof(int)); + quads = (QUAD_COEFFS*)alloc_mem((segments + increment) * sizeof(QUAD_COEFFS)); if (xmin < xcoords[0]) { xstarts[0] = xmin; quads[0].a = 0; quads[0].b = gradient; - quads[0].c = y (xcoords[0]) - quads[0].b * xcoords[0]; + quads[0].c = y(xcoords[0]) - quads[0].b * xcoords[0]; dest_segment = 1; - } - else + } else dest_segment = 0; for (segment = 0; segment < segments; segment++) { xstarts[dest_segment] = xcoords[segment]; @@ -333,19 +318,18 @@ void QSPLINE::extrapolate( //linear extrapolation if (xmax > xcoords[segments]) { quads[dest_segment].a = 0; quads[dest_segment].b = gradient; - quads[dest_segment].c = y (xcoords[segments]) - - quads[dest_segment].b * xcoords[segments]; + quads[dest_segment].c = + y(xcoords[segments]) - quads[dest_segment].b * xcoords[segments]; dest_segment++; xstarts[dest_segment] = xmax + 1; } segments = dest_segment; free_mem(xcoords); free_mem(quadratics); - xcoords = (int32_t *) xstarts; + xcoords = (int32_t*)xstarts; quadratics = quads; } - /********************************************************************** * QSPLINE::plot * @@ -353,48 +337,47 @@ void QSPLINE::extrapolate( //linear extrapolation **********************************************************************/ #ifndef GRAPHICS_DISABLED -void QSPLINE::plot( //draw it - ScrollView* window, //window to draw in - ScrollView::Color colour //colour to draw in - ) const { - int32_t segment; //index of segment - int16_t step; //index of poly piece - double increment; //x increment - double x; //x coord +void QSPLINE::plot( // draw it + ScrollView* window, // window to draw in + ScrollView::Color colour // colour to draw in + ) const { + int32_t segment; // index of segment + int16_t step; // index of poly piece + double increment; // x increment + double x; // x coord window->Pen(colour); for (segment = 0; segment < segments; segment++) { increment = - (double) (xcoords[segment + 1] - - xcoords[segment]) / QSPLINE_PRECISION; + (double)(xcoords[segment + 1] - xcoords[segment]) / QSPLINE_PRECISION; x = xcoords[segment]; for (step = 0; step <= QSPLINE_PRECISION; step++) { if (segment == 0 && step == 0) - window->SetCursor(x, quadratics[segment].y (x)); + window->SetCursor(x, quadratics[segment].y(x)); else - window->DrawTo(x, quadratics[segment].y (x)); + window->DrawTo(x, quadratics[segment].y(x)); x += increment; } } } #endif -void QSPLINE::plot(Pix *pix) const { +void QSPLINE::plot(Pix* pix) const { if (pix == nullptr) { return; } - int32_t segment; // Index of segment - int16_t step; // Index of poly piece + int32_t segment; // Index of segment + int16_t step; // Index of poly piece double increment; // x increment - double x; // x coord + double x; // x coord double height = static_cast(pixGetHeight(pix)); Pta* points = ptaCreate(QSPLINE_PRECISION * segments); const int kLineWidth = 5; for (segment = 0; segment < segments; segment++) { - increment = static_cast((xcoords[segment + 1] - - xcoords[segment])) / QSPLINE_PRECISION; + increment = static_cast((xcoords[segment + 1] - xcoords[segment])) / + QSPLINE_PRECISION; x = xcoords[segment]; for (step = 0; step <= QSPLINE_PRECISION; step++) { double y = height - quadratics[segment].y(x); diff --git a/src/ccstruct/quspline.h b/src/ccstruct/quspline.h index b2d71c4360..26d65f540e 100644 --- a/src/ccstruct/quspline.h +++ b/src/ccstruct/quspline.h @@ -17,83 +17,77 @@ * **********************************************************************/ -#ifndef QUSPLINE_H -#define QUSPLINE_H +#ifndef QUSPLINE_H +#define QUSPLINE_H -#include "quadratc.h" -#include "serialis.h" -#include "memry.h" -#include "rect.h" +#include "memry.h" +#include "quadratc.h" +#include "rect.h" +#include "serialis.h" class ROW; struct Pix; -class QSPLINE -{ - friend void make_first_baseline(TBOX *, - int, - int *, - int *, - QSPLINE *, - QSPLINE *, +class QSPLINE { + friend void make_first_baseline(TBOX*, int, int*, int*, QSPLINE*, QSPLINE*, float); - friend void make_holed_baseline(TBOX *, int, QSPLINE *, QSPLINE *, float); - friend void tweak_row_baseline(ROW *, double, double); - public: - QSPLINE() { //empty constructor - segments = 0; - xcoords = nullptr; //everything empty - quadratics = nullptr; - } - QSPLINE( //copy constructor - const QSPLINE &src); - QSPLINE( //constructor - int32_t count, //number of segments - int32_t *xstarts, //segment starts - double *coeffs); //coefficients - ~QSPLINE (); //destructor - QSPLINE ( //least squares fit - int xstarts[], //spline boundaries - int segcount, //no of segments - int xcoords[], //points to fit - int ycoords[], int blobcount,//no of coords - int degree); //function + friend void make_holed_baseline(TBOX*, int, QSPLINE*, QSPLINE*, float); + friend void tweak_row_baseline(ROW*, double, double); - double step( //step change - double x1, //between coords - double x2); - double y( //evaluate - double x) const; //at x + public: + QSPLINE() { // empty constructor + segments = 0; + xcoords = nullptr; // everything empty + quadratics = nullptr; + } + QSPLINE( // copy constructor + const QSPLINE& src); + QSPLINE( // constructor + int32_t count, // number of segments + int32_t* xstarts, // segment starts + double* coeffs); // coefficients + ~QSPLINE(); // destructor + QSPLINE( // least squares fit + int xstarts[], // spline boundaries + int segcount, // no of segments + int xcoords[], // points to fit + int ycoords[], + int blobcount, // no of coords + int degree); // function - void move( // reposition spline - ICOORD vec); // by vector - bool overlap( //test overlap - QSPLINE* spline2, //2 cannot be smaller - double fraction); //by more than this - void extrapolate( //linear extrapolation - double gradient, //gradient to use - int left, //new left edge - int right); //new right edge + double step( // step change + double x1, // between coords + double x2); + double y( // evaluate + double x) const; // at x + + void move( // reposition spline + ICOORD vec); // by vector + bool overlap( // test overlap + QSPLINE* spline2, // 2 cannot be smaller + double fraction); // by more than this + void extrapolate( // linear extrapolation + double gradient, // gradient to use + int left, // new left edge + int right); // new right edge #ifndef GRAPHICS_DISABLED - void plot( //draw it - ScrollView* window, //in window - ScrollView::Color colour) const; //in colour + void plot( // draw it + ScrollView* window, // in window + ScrollView::Color colour) const; // in colour #endif - // Paint the baseline over pix. If pix has depth of 32, then the line will - // be painted in red. Otherwise it will be painted in black. - void plot(Pix* pix) const; - - QSPLINE & operator= ( - const QSPLINE & source); //from this + // Paint the baseline over pix. If pix has depth of 32, then the line will + // be painted in red. Otherwise it will be painted in black. + void plot(Pix* pix) const; - private: + QSPLINE& operator=(const QSPLINE& source); // from this - int32_t spline_index( //binary search - double x) const; //for x - int32_t segments; //no of segments - int32_t *xcoords; //no of coords - QUAD_COEFFS *quadratics; //spline pieces + private: + int32_t spline_index( // binary search + double x) const; // for x + int32_t segments; // no of segments + int32_t* xcoords; // no of coords + QUAD_COEFFS* quadratics; // spline pieces }; #endif diff --git a/src/ccstruct/ratngs.cpp b/src/ccstruct/ratngs.cpp index 2012d57944..ac63ed27c3 100644 --- a/src/ccstruct/ratngs.cpp +++ b/src/ccstruct/ratngs.cpp @@ -17,7 +17,6 @@ * **********************************************************************/ - #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif @@ -68,7 +67,7 @@ static const char kPermuterTypeUserDawgPerm[] = "User Dictionary"; static const char kPermuterTypeFreqDawgPerm[] = "Frequent Words Dictionary"; static const char kPermuterTypeCompoundPerm[] = "Compound"; -static const char * const kPermuterTypeNames[] = { +static const char* const kPermuterTypeNames[] = { kPermuterTypeNoPerm, // 0 kPermuterTypePuncPerm, // 1 kPermuterTypeTopPerm, // 2 @@ -89,14 +88,14 @@ static const char * const kPermuterTypeNames[] = { * * Constructor to build a BLOB_CHOICE from a char, rating and certainty. */ -BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id - float src_rating, // rating - float src_cert, // certainty - int src_script_id, // script - float min_xheight, // min xheight allowed - float max_xheight, // max xheight by this char - float yshift, // yshift out of position - BlobChoiceClassifier c) { // adapted match or other +BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id + float src_rating, // rating + float src_cert, // certainty + int src_script_id, // script + float min_xheight, // min xheight allowed + float max_xheight, // max xheight by this char + float yshift, // yshift out of position + BlobChoiceClassifier c) { // adapted match or other unichar_id_ = src_unichar_id; rating_ = src_rating; certainty_ = src_cert; @@ -114,7 +113,7 @@ BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id * * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. */ -BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) : ELIST_LINK(other) { +BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE& other) : ELIST_LINK(other) { unichar_id_ = other.unichar_id(); rating_ = other.rating(); certainty_ = other.certainty(); @@ -136,22 +135,22 @@ bool BLOB_CHOICE::PosAndSizeAgree(const BLOB_CHOICE& other, float x_height, double baseline_diff = fabs(yshift() - other.yshift()); if (baseline_diff > kMaxBaselineDrift * x_height) { if (debug) { - tprintf("Baseline diff %g for %d v %d\n", - baseline_diff, unichar_id_, other.unichar_id_); + tprintf("Baseline diff %g for %d v %d\n", baseline_diff, unichar_id_, + other.unichar_id_); } return false; } double this_range = max_xheight() - min_xheight(); double other_range = other.max_xheight() - other.min_xheight(); - double denominator = ClipToRange(std::min(this_range, other_range), - 1.0, kMaxOverlapDenominator * x_height); + double denominator = ClipToRange(std::min(this_range, other_range), 1.0, + kMaxOverlapDenominator * x_height); double overlap = std::min(max_xheight(), other.max_xheight()) - - std::max(min_xheight(), other.min_xheight()); + std::max(min_xheight(), other.min_xheight()); overlap /= denominator; if (debug) { tprintf("PosAndSize for %d v %d: bl diff = %g, ranges %g, %g / %g ->%g\n", - unichar_id_, other.unichar_id_, baseline_diff, - this_range, other_range, denominator, overlap); + unichar_id_, other.unichar_id_, baseline_diff, this_range, + other_range, denominator, overlap); } return overlap >= kMinXHeightMatch; @@ -159,8 +158,7 @@ bool BLOB_CHOICE::PosAndSizeAgree(const BLOB_CHOICE& other, float x_height, // Helper to find the BLOB_CHOICE in the bc_list that matches the given // unichar_id, or nullptr if there is no match. -BLOB_CHOICE* FindMatchingChoice(UNICHAR_ID char_id, - BLOB_CHOICE_LIST* bc_list) { +BLOB_CHOICE* FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST* bc_list) { // Find the corresponding best BLOB_CHOICE. BLOB_CHOICE_IT choice_it(bc_list); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); @@ -173,18 +171,22 @@ BLOB_CHOICE* FindMatchingChoice(UNICHAR_ID char_id, return nullptr; } -const char *WERD_CHOICE::permuter_name(uint8_t permuter) { +const char* WERD_CHOICE::permuter_name(uint8_t permuter) { return kPermuterTypeNames[permuter]; } namespace tesseract { -const char *ScriptPosToString(enum ScriptPos script_pos) { +const char* ScriptPosToString(enum ScriptPos script_pos) { switch (script_pos) { - case SP_NORMAL: return "NORM"; - case SP_SUBSCRIPT: return "SUB"; - case SP_SUPERSCRIPT: return "SUPER"; - case SP_DROPCAP: return "DROPC"; + case SP_NORMAL: + return "NORM"; + case SP_SUBSCRIPT: + return "SUB"; + case SP_SUPERSCRIPT: + return "SUPER"; + case SP_DROPCAP: + return "DROPC"; } return "SP_UNKNOWN"; } @@ -197,9 +199,8 @@ const char *ScriptPosToString(enum ScriptPos script_pos) { * Constructor to build a WERD_CHOICE from the given string. * The function assumes that src_string is not nullptr. */ -WERD_CHOICE::WERD_CHOICE(const char *src_string, - const UNICHARSET &unicharset) - : unicharset_(&unicharset){ +WERD_CHOICE::WERD_CHOICE(const char* src_string, const UNICHARSET& unicharset) + : unicharset_(&unicharset) { GenericVector encoding; GenericVector lengths; std::string cleaned = unicharset.CleanupString(src_string); @@ -224,22 +225,20 @@ WERD_CHOICE::WERD_CHOICE(const char *src_string, * src_lengths argument could be nullptr, in which case the unichars * in src_string are assumed to all be of length 1. */ -void WERD_CHOICE::init(const char *src_string, - const char *src_lengths, - float src_rating, - float src_certainty, +void WERD_CHOICE::init(const char* src_string, const char* src_lengths, + float src_rating, float src_certainty, uint8_t src_permuter) { int src_string_len = strlen(src_string); if (src_string_len == 0) { this->init(8); } else { - this->init(src_lengths ? strlen(src_lengths): src_string_len); + this->init(src_lengths ? strlen(src_lengths) : src_string_len); length_ = reserved_; int offset = 0; for (int i = 0; i < length_; ++i) { int unichar_length = src_lengths ? src_lengths[i] : 1; unichar_ids_[i] = - unicharset_->unichar_to_id(src_string+offset, unichar_length); + unicharset_->unichar_to_id(src_string + offset, unichar_length); state_[i] = 1; certainties_[i] = src_certainty; offset += unichar_length; @@ -262,7 +261,7 @@ WERD_CHOICE::~WERD_CHOICE() { delete[] certainties_; } -const char *WERD_CHOICE::permuter_name() const { +const char* WERD_CHOICE::permuter_name() const { return kPermuterTypeNames[permuter_]; } @@ -281,10 +280,10 @@ BLOB_CHOICE_LIST* WERD_CHOICE::blob_choices(int index, MATRIX* ratings) const { // Returns the MATRIX_COORD corresponding to the location in the ratings // MATRIX for the given index into the word. -MATRIX_COORD WERD_CHOICE::MatrixCoord(int index) const { +MATRIX_COORD +WERD_CHOICE::MatrixCoord(int index) const { int col = 0; - for (int i = 0; i < index; ++i) - col += state_[i]; + for (int i = 0; i < index; ++i) col += state_[i]; int row = col + state_[index] - 1; return MATRIX_COORD(col, row); } @@ -299,7 +298,6 @@ void WERD_CHOICE::set_blob_choice(int index, int blob_count, certainties_[index] = blob_choice->certainty(); } - /** * contains_unichar_id * @@ -347,11 +345,12 @@ void WERD_CHOICE::remove_unichar_ids(int start, int num) { void WERD_CHOICE::reverse_and_mirror_unichar_ids() { for (int i = 0; i < length_ / 2; ++i) { UNICHAR_ID tmp_id = unichar_ids_[i]; - unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]); - unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id); + unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_ - 1 - i]); + unichar_ids_[length_ - 1 - i] = unicharset_->get_mirror(tmp_id); } if (length_ % 2 != 0) { - unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]); + unichar_ids_[length_ / 2] = + unicharset_->get_mirror(unichar_ids_[length_ / 2]); } } @@ -362,30 +361,27 @@ void WERD_CHOICE::reverse_and_mirror_unichar_ids() { * enclose the core portion of this word -- the part after stripping * punctuation from the left and right. */ -void WERD_CHOICE::punct_stripped(int *start, int *end) const { +void WERD_CHOICE::punct_stripped(int* start, int* end) const { *start = 0; *end = length() - 1; while (*start < length() && unicharset()->get_ispunctuation(unichar_id(*start))) { (*start)++; } - while (*end > -1 && - unicharset()->get_ispunctuation(unichar_id(*end))) { + while (*end > -1 && unicharset()->get_ispunctuation(unichar_id(*end))) { (*end)--; } (*end)++; } -void WERD_CHOICE::GetNonSuperscriptSpan(int *pstart, int *pend) const { +void WERD_CHOICE::GetNonSuperscriptSpan(int* pstart, int* pend) const { int end = length(); - while (end > 0 && - unicharset_->get_isdigit(unichar_ids_[end - 1]) && + while (end > 0 && unicharset_->get_isdigit(unichar_ids_[end - 1]) && BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) { end--; } int start = 0; - while (start < end && - unicharset_->get_isdigit(unichar_ids_[start]) && + while (start < end && unicharset_->get_isdigit(unichar_ids_[start]) && BlobPosition(start) == tesseract::SP_SUPERSCRIPT) { start++; } @@ -393,14 +389,17 @@ void WERD_CHOICE::GetNonSuperscriptSpan(int *pstart, int *pend) const { *pend = end; } -WERD_CHOICE WERD_CHOICE::shallow_copy(int start, int end) const { +WERD_CHOICE +WERD_CHOICE::shallow_copy(int start, int end) const { ASSERT_HOST(start >= 0 && start <= length_); ASSERT_HOST(end >= 0 && end <= length_); - if (end < start) { end = start; } + if (end < start) { + end = start; + } WERD_CHOICE retval(unicharset_, end - start); for (int i = start; i < end; i++) { - retval.append_unichar_id_space_allocated( - unichar_ids_[i], state_[i], 0.0f, certainties_[i]); + retval.append_unichar_id_space_allocated(unichar_ids_[i], state_[i], 0.0f, + certainties_[i]); } return retval; } @@ -428,12 +427,12 @@ bool WERD_CHOICE::has_rtl_unichar_id() const { * Populates the given word_str with unichars from unichar_ids and * and word_lengths_str with the corresponding unichar lengths. */ -void WERD_CHOICE::string_and_lengths(STRING *word_str, - STRING *word_lengths_str) const { +void WERD_CHOICE::string_and_lengths(STRING* word_str, + STRING* word_lengths_str) const { *word_str = ""; if (word_lengths_str != nullptr) *word_lengths_str = ""; for (int i = 0; i < length_; ++i) { - const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]); + const char* ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]); *word_str += ch; if (word_lengths_str != nullptr) { *word_lengths_str += strlen(ch); @@ -447,14 +446,13 @@ void WERD_CHOICE::string_and_lengths(STRING *word_str, * Make sure there is enough space in the word for the new unichar id * and call append_unichar_id_space_allocated(). */ -void WERD_CHOICE::append_unichar_id( - UNICHAR_ID unichar_id, int blob_count, - float rating, float certainty) { +void WERD_CHOICE::append_unichar_id(UNICHAR_ID unichar_id, int blob_count, + float rating, float certainty) { if (length_ == reserved_) { this->double_the_size(); } - this->append_unichar_id_space_allocated(unichar_id, blob_count, - rating, certainty); + this->append_unichar_id_space_allocated(unichar_id, blob_count, rating, + certainty); } /** @@ -464,12 +462,12 @@ void WERD_CHOICE::append_unichar_id( * The ratings are added and the confidence is the min. * If the permuters are NOT the same the permuter is set to COMPOUND_PERM */ -WERD_CHOICE & WERD_CHOICE::operator+= (const WERD_CHOICE & second) { +WERD_CHOICE& WERD_CHOICE::operator+=(const WERD_CHOICE& second) { ASSERT_HOST(unicharset_ == second.unicharset_); while (reserved_ < length_ + second.length()) { this->double_the_size(); } - const UNICHAR_ID *other_unichar_ids = second.unichar_ids(); + const UNICHAR_ID* other_unichar_ids = second.unichar_ids(); for (int i = 0; i < second.length(); ++i) { unichar_ids_[length_ + i] = other_unichar_ids[i]; state_[length_ + i] = second.state_[i]; @@ -479,21 +477,18 @@ WERD_CHOICE & WERD_CHOICE::operator+= (const WERD_CHOICE & second) { length_ += second.length(); if (second.adjust_factor_ > adjust_factor_) adjust_factor_ = second.adjust_factor_; - rating_ += second.rating(); // add ratings - if (second.certainty() < certainty_) // take min + rating_ += second.rating(); // add ratings + if (second.certainty() < certainty_) // take min certainty_ = second.certainty(); - if (second.dangerous_ambig_found_) - dangerous_ambig_found_ = true; + if (second.dangerous_ambig_found_) dangerous_ambig_found_ = true; if (permuter_ == NO_PERM) { permuter_ = second.permuter(); - } else if (second.permuter() != NO_PERM && - second.permuter() != permuter_) { + } else if (second.permuter() != NO_PERM && second.permuter() != permuter_) { permuter_ = COMPOUND_PERM; } return *this; } - /** * WERD_CHOICE::operator= * @@ -506,7 +501,7 @@ WERD_CHOICE& WERD_CHOICE::operator=(const WERD_CHOICE& source) { } unicharset_ = source.unicharset_; - const UNICHAR_ID *other_unichar_ids = source.unichar_ids(); + const UNICHAR_ID* other_unichar_ids = source.unichar_ids(); for (int i = 0; i < source.length(); ++i) { unichar_ids_[i] = other_unichar_ids[i]; state_[i] = source.state_[i]; @@ -536,8 +531,7 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word) { int debug = 0; // Initialize to normal. - for (int i = 0; i < length_; ++i) - script_pos_[i] = tesseract::SP_NORMAL; + for (int i = 0; i < length_; ++i) script_pos_[i] = tesseract::SP_NORMAL; if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) { return; } @@ -553,14 +547,14 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word) { int uni_id = unichar_id(blob_index); TBOX blob_box = tblob->bounding_box(); if (state_ != nullptr) { - for (int i = 1; i < state_[blob_index]; ++i) { + for (int i = 1; i < state_[blob_index]; ++i) { ++chunk_index; tblob = word->blobs[chunk_index]; blob_box += tblob->bounding_box(); } } - script_pos_[blob_index] = ScriptPositionOf(false, *unicharset_, blob_box, - uni_id); + script_pos_[blob_index] = + ScriptPositionOf(false, *unicharset_, blob_box, uni_id); if (small_caps && script_pos_[blob_index] != tesseract::SP_DROPCAP) { script_pos_[blob_index] = tesseract::SP_NORMAL; } @@ -571,9 +565,10 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word) { if (position_counts[tesseract::SP_SUBSCRIPT] > 0.75 * length_ || position_counts[tesseract::SP_SUPERSCRIPT] > 0.75 * length_) { if (debug >= 2) { - tprintf("Most characters of %s are subscript or superscript.\n" - "That seems wrong, so I'll assume we got the baseline wrong\n", - unichar_string().string()); + tprintf( + "Most characters of %s are subscript or superscript.\n" + "That seems wrong, so I'll assume we got the baseline wrong\n", + unichar_string().string()); } for (int i = 0; i < length_; i++) { ScriptPos sp = script_pos_[i]; @@ -604,15 +599,14 @@ void WERD_CHOICE::SetScriptPositions(const tesseract::ScriptPos* positions, int length) { ASSERT_HOST(length == length_); if (positions != script_pos_) { - delete [] script_pos_; + delete[] script_pos_; script_pos_ = new ScriptPos[length]; memcpy(script_pos_, positions, sizeof(positions[0]) * length); } } // Sets all the script_pos_ positions to the given position. void WERD_CHOICE::SetAllScriptPositions(tesseract::ScriptPos position) { - for (int i = 0; i < length_; ++i) - script_pos_[i] = position; + for (int i = 0; i < length_; ++i) script_pos_[i] = position; } /* static */ @@ -624,9 +618,8 @@ ScriptPos WERD_CHOICE::ScriptPositionOf(bool print_debug, int top = blob_box.top(); int bottom = blob_box.bottom(); int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(unichar_id, - &min_bottom, &max_bottom, - &min_top, &max_top); + unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, + &max_top); int sub_thresh_top = min_top - kMinSubscriptOffset; int sub_thresh_bot = kBlnBaselineOffset - kMinSubscriptOffset; @@ -640,15 +633,14 @@ ScriptPos WERD_CHOICE::ScriptPositionOf(bool print_debug, } if (print_debug) { - const char *pos = ScriptPosToString(retval); - tprintf("%s Character %s[bot:%d top: %d] " - "bot_range[%d,%d] top_range[%d, %d] " - "sub_thresh[bot:%d top:%d] sup_thresh_bot %d\n", - pos, unicharset.id_to_unichar(unichar_id), - bottom, top, - min_bottom, max_bottom, min_top, max_top, - sub_thresh_bot, sub_thresh_top, - sup_thresh_bot); + const char* pos = ScriptPosToString(retval); + tprintf( + "%s Character %s[bot:%d top: %d] " + "bot_range[%d,%d] top_range[%d, %d] " + "sub_thresh[bot:%d top:%d] sup_thresh_bot %d\n", + pos, unicharset.id_to_unichar(unichar_id), bottom, top, min_bottom, + max_bottom, min_top, max_top, sub_thresh_bot, sub_thresh_top, + sup_thresh_bot); } return retval; } @@ -656,7 +648,7 @@ ScriptPos WERD_CHOICE::ScriptPositionOf(bool print_debug, // Returns the script-id (eg Han) of the dominant script in the word. int WERD_CHOICE::GetTopScriptID() const { int max_script = unicharset_->get_script_table_size(); - int *sid = new int[max_script]; + int* sid = new int[max_script]; int x; for (x = 0; x < max_script; x++) sid[x] = 0; for (x = 0; x < length_; ++x) { @@ -679,8 +671,7 @@ int WERD_CHOICE::GetTopScriptID() const { int max_sid = 0; for (x = 1; x < max_script; x++) if (sid[x] >= sid[max_sid]) max_sid = x; - if (sid[max_sid] < length_ / 2) - max_sid = unicharset_->null_sid(); + if (sid[max_sid] < length_ / 2) max_sid = unicharset_->null_sid(); delete[] sid; return max_sid; } @@ -711,14 +702,14 @@ int WERD_CHOICE::TotalOfStates() const { * * Print WERD_CHOICE to stdout. */ -void WERD_CHOICE::print(const char *msg) const { +void WERD_CHOICE::print(const char* msg) const { tprintf("%s : ", msg); for (int i = 0; i < length_; ++i) { tprintf("%s", unicharset_->id_to_unichar(unichar_ids_[i])); } - tprintf(" : R=%g, C=%g, F=%g, Perm=%d, xht=[%g,%g], ambig=%d\n", - rating_, certainty_, adjust_factor_, permuter_, - min_x_height_, max_x_height_, dangerous_ambig_found_); + tprintf(" : R=%g, C=%g, F=%g, Perm=%d, xht=[%g,%g], ambig=%d\n", rating_, + certainty_, adjust_factor_, permuter_, min_x_height_, max_x_height_, + dangerous_ambig_found_); tprintf("pos"); for (int i = 0; i < length_; ++i) { tprintf("\t%s", ScriptPosToString(script_pos_[i])); @@ -739,10 +730,9 @@ void WERD_CHOICE::print(const char *msg) const { } // Prints the segmentation state with an introductory message. -void WERD_CHOICE::print_state(const char *msg) const { +void WERD_CHOICE::print_state(const char* msg) const { tprintf("%s", msg); - for (int i = 0; i < length_; ++i) - tprintf(" %d", state_[i]); + for (int i = 0; i < length_; ++i) tprintf(" %d", state_[i]); tprintf("\n"); } @@ -752,7 +742,7 @@ void WERD_CHOICE::DisplaySegmentation(TWERD* word) { #ifndef GRAPHICS_DISABLED // Number of different colors to draw with. const int kNumColors = 6; - static ScrollView *segm_window = nullptr; + static ScrollView* segm_window = nullptr; // Check the state against the static prev_drawn_state. static GenericVector prev_drawn_state; bool already_done = prev_drawn_state.size() == length_; @@ -767,8 +757,8 @@ void WERD_CHOICE::DisplaySegmentation(TWERD* word) { // Create the window if needed. if (segm_window == nullptr) { - segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, - 2000.0, 256.0, true); + segm_window = + new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true); } else { segm_window->Clear(); } @@ -784,17 +774,16 @@ void WERD_CHOICE::DisplaySegmentation(TWERD* word) { blob->plot(segm_window, color, color); } } - segm_window->ZoomToRectangle(bbox.left(), bbox.top(), - bbox.right(), bbox.bottom()); + segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), + bbox.bottom()); segm_window->Update(); window_wait(segm_window); #endif } - -bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, - const WERD_CHOICE &word2) { - const UNICHARSET *uchset = word1.unicharset(); +bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE& word1, + const WERD_CHOICE& word2) { + const UNICHARSET* uchset = word1.unicharset(); if (word2.unicharset() != uchset) return false; int w1start, w1end; word1.punct_stripped(&w1start, &w1end); @@ -804,7 +793,7 @@ bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, for (int i = 0; i < w1end - w1start; i++) { if (uchset->to_lower(word1.unichar_id(w1start + i)) != uchset->to_lower(word2.unichar_id(w2start + i))) { - return false; + return false; } } return true; @@ -820,9 +809,8 @@ bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, * @param current_unicharset unicharset that can be used * for id-to-unichar conversion */ -void print_ratings_list(const char *msg, - BLOB_CHOICE_LIST *ratings, - const UNICHARSET ¤t_unicharset) { +void print_ratings_list(const char* msg, BLOB_CHOICE_LIST* ratings, + const UNICHARSET& current_unicharset) { if (ratings->length() == 0) { tprintf("%s:\n", msg); return; diff --git a/src/ccstruct/ratngs.h b/src/ccstruct/ratngs.h index fce3245594..3940031fe1 100644 --- a/src/ccstruct/ratngs.h +++ b/src/ccstruct/ratngs.h @@ -17,8 +17,8 @@ * **********************************************************************/ -#ifndef RATNGS_H -#define RATNGS_H +#ifndef RATNGS_H +#define RATNGS_H #include @@ -45,173 +45,130 @@ enum BlobChoiceClassifier { BCC_FAKE, // From some other process. }; -class BLOB_CHOICE: public ELIST_LINK -{ - public: - BLOB_CHOICE() { - unichar_id_ = UNICHAR_SPACE; - fontinfo_id_ = -1; - fontinfo_id2_ = -1; - rating_ = 10.0; - certainty_ = -1.0; - script_id_ = -1; - xgap_before_ = 0; - xgap_after_ = 0; - min_xheight_ = 0.0f; - max_xheight_ = 0.0f; - yshift_ = 0.0f; - classifier_ = BCC_FAKE; - } - BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id - float src_rating, // rating - float src_cert, // certainty - int script_id, // script - float min_xheight, // min xheight in image pixel units - float max_xheight, // max xheight allowed by this char - float yshift, // the larger of y shift (top or bottom) - BlobChoiceClassifier c); // adapted match or other - BLOB_CHOICE(const BLOB_CHOICE &other); - ~BLOB_CHOICE() = default; - - UNICHAR_ID unichar_id() const { - return unichar_id_; - } - float rating() const { - return rating_; - } - float certainty() const { - return certainty_; - } - int16_t fontinfo_id() const { - return fontinfo_id_; - } - int16_t fontinfo_id2() const { - return fontinfo_id2_; - } - const GenericVector& fonts() const { - return fonts_; - } - void set_fonts(const GenericVector& fonts) { - fonts_ = fonts; - int score1 = 0, score2 = 0; - fontinfo_id_ = -1; - fontinfo_id2_ = -1; - for (int f = 0; f < fonts_.size(); ++f) { - if (fonts_[f].score > score1) { - score2 = score1; - fontinfo_id2_ = fontinfo_id_; - score1 = fonts_[f].score; - fontinfo_id_ = fonts_[f].fontinfo_id; - } else if (fonts_[f].score > score2) { - score2 = fonts_[f].score; - fontinfo_id2_ = fonts_[f].fontinfo_id; - } +class BLOB_CHOICE : public ELIST_LINK { + public: + BLOB_CHOICE() { + unichar_id_ = UNICHAR_SPACE; + fontinfo_id_ = -1; + fontinfo_id2_ = -1; + rating_ = 10.0; + certainty_ = -1.0; + script_id_ = -1; + xgap_before_ = 0; + xgap_after_ = 0; + min_xheight_ = 0.0f; + max_xheight_ = 0.0f; + yshift_ = 0.0f; + classifier_ = BCC_FAKE; + } + BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id + float src_rating, // rating + float src_cert, // certainty + int script_id, // script + float min_xheight, // min xheight in image pixel units + float max_xheight, // max xheight allowed by this char + float yshift, // the larger of y shift (top or bottom) + BlobChoiceClassifier c); // adapted match or other + BLOB_CHOICE(const BLOB_CHOICE& other); + ~BLOB_CHOICE() = default; + + UNICHAR_ID unichar_id() const { return unichar_id_; } + float rating() const { return rating_; } + float certainty() const { return certainty_; } + int16_t fontinfo_id() const { return fontinfo_id_; } + int16_t fontinfo_id2() const { return fontinfo_id2_; } + const GenericVector& fonts() const { return fonts_; } + void set_fonts(const GenericVector& fonts) { + fonts_ = fonts; + int score1 = 0, score2 = 0; + fontinfo_id_ = -1; + fontinfo_id2_ = -1; + for (int f = 0; f < fonts_.size(); ++f) { + if (fonts_[f].score > score1) { + score2 = score1; + fontinfo_id2_ = fontinfo_id_; + score1 = fonts_[f].score; + fontinfo_id_ = fonts_[f].fontinfo_id; + } else if (fonts_[f].score > score2) { + score2 = fonts_[f].score; + fontinfo_id2_ = fonts_[f].fontinfo_id; } } - int script_id() const { - return script_id_; - } - const MATRIX_COORD& matrix_cell() { - return matrix_cell_; - } - int16_t xgap_before() const { - return xgap_before_; - } - int16_t xgap_after() const { - return xgap_after_; - } - float min_xheight() const { - return min_xheight_; - } - float max_xheight() const { - return max_xheight_; - } - float yshift() const { - return yshift_; - } - BlobChoiceClassifier classifier() const { - return classifier_; - } - bool IsAdapted() const { - return classifier_ == BCC_ADAPTED_CLASSIFIER; - } - bool IsClassified() const { - return classifier_ == BCC_STATIC_CLASSIFIER || - classifier_ == BCC_ADAPTED_CLASSIFIER || - classifier_ == BCC_SPECKLE_CLASSIFIER; - } - - void set_unichar_id(UNICHAR_ID newunichar_id) { - unichar_id_ = newunichar_id; - } - void set_rating(float newrat) { - rating_ = newrat; - } - void set_certainty(float newrat) { - certainty_ = newrat; - } - void set_script(int newscript_id) { - script_id_ = newscript_id; - } - void set_matrix_cell(int col, int row) { - matrix_cell_.col = col; - matrix_cell_.row = row; - } - void set_xgap_before(int16_t gap) { - xgap_before_ = gap; - } - void set_xgap_after(int16_t gap) { - xgap_after_ = gap; - } - void set_classifier(BlobChoiceClassifier classifier) { - classifier_ = classifier; - } - static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) { - BLOB_CHOICE* choice = new BLOB_CHOICE; - *choice = *src; - return choice; - } - // Returns true if *this and other agree on the baseline and x-height - // to within some tolerance based on a given estimate of the x-height. - bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height, - bool debug) const; - - void print(const UNICHARSET *unicharset) const { - tprintf("r%.2f c%.2f x[%g,%g]: %d %s", - rating_, certainty_, - min_xheight_, max_xheight_, unichar_id_, - (unicharset == nullptr) ? "" : - unicharset->debug_str(unichar_id_).string()); - } - void print_full() const { - print(nullptr); - tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", - script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_); - } - // Sort function for sorting BLOB_CHOICEs in increasing order of rating. - static int SortByRating(const void *p1, const void *p2) { - const BLOB_CHOICE *bc1 = *static_cast(p1); - const BLOB_CHOICE *bc2 = *static_cast(p2); - return (bc1->rating_ < bc2->rating_) ? -1 : 1; - } + } + int script_id() const { return script_id_; } + const MATRIX_COORD& matrix_cell() { return matrix_cell_; } + int16_t xgap_before() const { return xgap_before_; } + int16_t xgap_after() const { return xgap_after_; } + float min_xheight() const { return min_xheight_; } + float max_xheight() const { return max_xheight_; } + float yshift() const { return yshift_; } + BlobChoiceClassifier classifier() const { return classifier_; } + bool IsAdapted() const { return classifier_ == BCC_ADAPTED_CLASSIFIER; } + bool IsClassified() const { + return classifier_ == BCC_STATIC_CLASSIFIER || + classifier_ == BCC_ADAPTED_CLASSIFIER || + classifier_ == BCC_SPECKLE_CLASSIFIER; + } + + void set_unichar_id(UNICHAR_ID newunichar_id) { unichar_id_ = newunichar_id; } + void set_rating(float newrat) { rating_ = newrat; } + void set_certainty(float newrat) { certainty_ = newrat; } + void set_script(int newscript_id) { script_id_ = newscript_id; } + void set_matrix_cell(int col, int row) { + matrix_cell_.col = col; + matrix_cell_.row = row; + } + void set_xgap_before(int16_t gap) { xgap_before_ = gap; } + void set_xgap_after(int16_t gap) { xgap_after_ = gap; } + void set_classifier(BlobChoiceClassifier classifier) { + classifier_ = classifier; + } + static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) { + BLOB_CHOICE* choice = new BLOB_CHOICE; + *choice = *src; + return choice; + } + // Returns true if *this and other agree on the baseline and x-height + // to within some tolerance based on a given estimate of the x-height. + bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height, + bool debug) const; + + void print(const UNICHARSET* unicharset) const { + tprintf("r%.2f c%.2f x[%g,%g]: %d %s", rating_, certainty_, min_xheight_, + max_xheight_, unichar_id_, + (unicharset == nullptr) + ? "" + : unicharset->debug_str(unichar_id_).string()); + } + void print_full() const { + print(nullptr); + tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", + script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_); + } + // Sort function for sorting BLOB_CHOICEs in increasing order of rating. + static int SortByRating(const void* p1, const void* p2) { + const BLOB_CHOICE* bc1 = *static_cast(p1); + const BLOB_CHOICE* bc2 = *static_cast(p2); + return (bc1->rating_ < bc2->rating_) ? -1 : 1; + } private: - UNICHAR_ID unichar_id_; // unichar id + UNICHAR_ID unichar_id_; // unichar id // Fonts and scores. Allowed to be empty. GenericVector fonts_; - int16_t fontinfo_id_; // char font information - int16_t fontinfo_id2_; // 2nd choice font information + int16_t fontinfo_id_; // char font information + int16_t fontinfo_id2_; // 2nd choice font information // Rating is the classifier distance weighted by the length of the outline // in the blob. In terms of probability, classifier distance is -klog p such // that the resulting distance is in the range [0, 1] and then // rating = w (-k log p) where w is the weight for the length of the outline. // Sums of ratings may be compared meaningfully for words of different // segmentation. - float rating_; // size related + float rating_; // size related // Certainty is a number in [-20, 0] indicating the classifier certainty // of the choice. In terms of probability, certainty = 20 (k log p) where // k is defined as above to normalize -klog p to the range [0, 1]. - float certainty_; // absolute + float certainty_; // absolute int script_id_; // Holds the position of this choice in the ratings matrix. // Used to location position in the matrix during path backtracking. @@ -232,7 +189,7 @@ ELISTIZEH(BLOB_CHOICE) // Return the BLOB_CHOICE in bc_list matching a given unichar_id, // or nullptr if there is no match. -BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list); +BLOB_CHOICE* FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST* bc_list); // Permuter codes used in WERD_CHOICEs. enum PermuterType { @@ -255,94 +212,64 @@ enum PermuterType { namespace tesseract { // ScriptPos tells whether a character is subscript, superscript or normal. -enum ScriptPos { - SP_NORMAL, - SP_SUBSCRIPT, - SP_SUPERSCRIPT, - SP_DROPCAP -}; +enum ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP }; -const char *ScriptPosToString(tesseract::ScriptPos script_pos); +const char* ScriptPosToString(tesseract::ScriptPos script_pos); } // namespace tesseract. class WERD_CHOICE : public ELIST_LINK { public: static const float kBadRating; - static const char *permuter_name(uint8_t permuter); - - WERD_CHOICE(const UNICHARSET *unicharset) - : unicharset_(unicharset) { this->init(8); } - WERD_CHOICE(const UNICHARSET *unicharset, int reserved) - : unicharset_(unicharset) { this->init(reserved); } - WERD_CHOICE(const char *src_string, - const char *src_lengths, - float src_rating, - float src_certainty, - uint8_t src_permuter, - const UNICHARSET &unicharset) - : unicharset_(&unicharset) { - this->init(src_string, src_lengths, src_rating, - src_certainty, src_permuter); + static const char* permuter_name(uint8_t permuter); + + WERD_CHOICE(const UNICHARSET* unicharset) : unicharset_(unicharset) { + this->init(8); + } + WERD_CHOICE(const UNICHARSET* unicharset, int reserved) + : unicharset_(unicharset) { + this->init(reserved); } - WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset); - WERD_CHOICE(const WERD_CHOICE &word) + WERD_CHOICE(const char* src_string, const char* src_lengths, float src_rating, + float src_certainty, uint8_t src_permuter, + const UNICHARSET& unicharset) + : unicharset_(&unicharset) { + this->init(src_string, src_lengths, src_rating, src_certainty, + src_permuter); + } + WERD_CHOICE(const char* src_string, const UNICHARSET& unicharset); + WERD_CHOICE(const WERD_CHOICE& word) : ELIST_LINK(word), unicharset_(word.unicharset_) { this->init(word.length()); this->operator=(word); } ~WERD_CHOICE(); - const UNICHARSET *unicharset() const { - return unicharset_; - } - inline int length() const { - return length_; - } - float adjust_factor() const { - return adjust_factor_; - } - void set_adjust_factor(float factor) { - adjust_factor_ = factor; - } - inline const UNICHAR_ID *unichar_ids() const { - return unichar_ids_; - } + const UNICHARSET* unicharset() const { return unicharset_; } + inline int length() const { return length_; } + float adjust_factor() const { return adjust_factor_; } + void set_adjust_factor(float factor) { adjust_factor_ = factor; } + inline const UNICHAR_ID* unichar_ids() const { return unichar_ids_; } inline UNICHAR_ID unichar_id(int index) const { assert(index < length_); return unichar_ids_[index]; } - inline int state(int index) const { - return state_[index]; - } + inline int state(int index) const { return state_[index]; } tesseract::ScriptPos BlobPosition(int index) const { - if (index < 0 || index >= length_) - return tesseract::SP_NORMAL; + if (index < 0 || index >= length_) return tesseract::SP_NORMAL; return script_pos_[index]; } - inline float rating() const { - return rating_; - } - inline float certainty() const { - return certainty_; - } - inline float certainty(int index) const { - return certainties_[index]; - } - inline float min_x_height() const { - return min_x_height_; - } - inline float max_x_height() const { - return max_x_height_; - } + inline float rating() const { return rating_; } + inline float certainty() const { return certainty_; } + inline float certainty(int index) const { return certainties_[index]; } + inline float min_x_height() const { return min_x_height_; } + inline float max_x_height() const { return max_x_height_; } inline void set_x_heights(float min_height, float max_height) { min_x_height_ = min_height; max_x_height_ = max_height; } - inline uint8_t permuter() const { - return permuter_; - } - const char *permuter_name() const; + inline uint8_t permuter() const { return permuter_; } + const char* permuter_name() const; // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word, // taken from the appropriate cell in the ratings MATRIX. // Borrowed pointer, so do not delete. @@ -356,21 +283,13 @@ class WERD_CHOICE : public ELIST_LINK { assert(index < length_); unichar_ids_[index] = unichar_id; } - bool dangerous_ambig_found() const { - return dangerous_ambig_found_; - } + bool dangerous_ambig_found() const { return dangerous_ambig_found_; } void set_dangerous_ambig_found_(bool value) { dangerous_ambig_found_ = value; } - inline void set_rating(float new_val) { - rating_ = new_val; - } - inline void set_certainty(float new_val) { - certainty_ = new_val; - } - inline void set_permuter(uint8_t perm) { - permuter_ = perm; - } + inline void set_rating(float new_val) { rating_ = new_val; } + inline void set_certainty(float new_val) { certainty_ = new_val; } + inline void set_permuter(uint8_t perm) { permuter_ = perm; } // Note: this function should only be used if all the fields // are populated manually with set_* functions (rather than // (copy)constructors and append_* functions). @@ -386,10 +305,9 @@ class WERD_CHOICE : public ELIST_LINK { reserved_, unichar_ids_); script_pos_ = GenericVector::double_the_size_memcpy( reserved_, script_pos_); - state_ = GenericVector::double_the_size_memcpy( - reserved_, state_); - certainties_ = GenericVector::double_the_size_memcpy( - reserved_, certainties_); + state_ = GenericVector::double_the_size_memcpy(reserved_, state_); + certainties_ = + GenericVector::double_the_size_memcpy(reserved_, certainties_); reserved_ *= 2; } else { unichar_ids_ = new UNICHAR_ID[1]; @@ -431,9 +349,8 @@ class WERD_CHOICE : public ELIST_LINK { /// The function assumes that src_string is not nullptr. /// src_lengths argument could be nullptr, in which case the unichars /// in src_string are assumed to all be of length 1. - void init(const char *src_string, const char *src_lengths, - float src_rating, float src_certainty, - uint8_t src_permuter); + void init(const char* src_string, const char* src_lengths, float src_rating, + float src_certainty, uint8_t src_permuter); /// Set the fields in this choice to be default (bad) values. inline void make_bad() { @@ -445,17 +362,17 @@ class WERD_CHOICE : public ELIST_LINK { /// This function assumes that there is enough space reserved /// in the WERD_CHOICE for adding another unichar. /// This is an efficient alternative to append_unichar_id(). - inline void append_unichar_id_space_allocated( - UNICHAR_ID unichar_id, int blob_count, - float rating, float certainty) { + inline void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, + int blob_count, float rating, + float certainty) { assert(reserved_ > length_); length_++; - this->set_unichar_id(unichar_id, blob_count, - rating, certainty, length_-1); + this->set_unichar_id(unichar_id, blob_count, rating, certainty, + length_ - 1); } - void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, - float rating, float certainty); + void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, + float certainty); inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index) { @@ -486,18 +403,18 @@ class WERD_CHOICE : public ELIST_LINK { // Returns the half-open interval of unichar_id indices [start, end) which // enclose the core portion of this word -- the part after stripping // punctuation from the left and right. - void punct_stripped(int *start_core, int *end_core) const; + void punct_stripped(int* start_core, int* end_core) const; // Returns the indices [start, end) containing the core of the word, stripped // of any superscript digits on either side. (i.e., the non-footnote part // of the word). There is no guarantee that the output range is non-empty. - void GetNonSuperscriptSpan(int *start, int *end) const; + void GetNonSuperscriptSpan(int* start, int* end) const; // Return a copy of this WERD_CHOICE with the choices [start, end). // The result is useful only for checking against a dictionary. WERD_CHOICE shallow_copy(int start, int end) const; - void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const; + void string_and_lengths(STRING* word_str, STRING* word_lengths_str) const; const STRING debug_string() const { STRING word_str; for (int i = 0; i < length_; ++i) { @@ -528,20 +445,18 @@ class WERD_CHOICE : public ELIST_LINK { return unichars_in_script_order_ = in_script_order; } - bool unichars_in_script_order() const { - return unichars_in_script_order_; - } + bool unichars_in_script_order() const { return unichars_in_script_order_; } // Returns a UTF-8 string equivalent to the current choice // of UNICHAR IDs. - const STRING &unichar_string() const { + const STRING& unichar_string() const { this->string_and_lengths(&unichar_string_, &unichar_lengths_); return unichar_string_; } // Returns the lengths, one byte each, representing the number of bytes // required in the unichar_string for each UNICHAR_ID. - const STRING &unichar_lengths() const { + const STRING& unichar_lengths() const { this->string_and_lengths(&unichar_string_, &unichar_lengths_); return unichar_lengths_; } @@ -574,21 +489,21 @@ class WERD_CHOICE : public ELIST_LINK { int TotalOfStates() const; void print() const { this->print(""); } - void print(const char *msg) const; + void print(const char* msg) const; // Prints the segmentation state with an introductory message. - void print_state(const char *msg) const; + void print_state(const char* msg) const; // Displays the segmentation state of *this (if not the same as the last // one displayed) and waits for a click in the window. void DisplaySegmentation(TWERD* word); - WERD_CHOICE& operator+= ( // concatanate - const WERD_CHOICE & second);// second on first + WERD_CHOICE& operator+=( // concatanate + const WERD_CHOICE& second); // second on first - WERD_CHOICE& operator= (const WERD_CHOICE& source); + WERD_CHOICE& operator=(const WERD_CHOICE& source); private: - const UNICHARSET *unicharset_; + const UNICHARSET* unicharset_; // TODO(rays) Perhaps replace the multiple arrays with an array of structs? // unichar_ids_ is an array of classifier "results" that make up a word. // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position @@ -604,22 +519,22 @@ class WERD_CHOICE : public ELIST_LINK { // been moved to a lower level, augmenting the ratings matrix with the // combined fragments, and allowing the language-model/segmentation-search // to deal with only the combined unichar_ids. - UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word + UNICHAR_ID* unichar_ids_; // unichar ids that represent the text of the word tesseract::ScriptPos* script_pos_; // Normal/Sub/Superscript of each unichar. - int* state_; // Number of blobs in each unichar. - float* certainties_; // Certainty of each unichar. - int reserved_; // size of the above arrays - int length_; // word length + int* state_; // Number of blobs in each unichar. + float* certainties_; // Certainty of each unichar. + int reserved_; // size of the above arrays + int length_; // word length // Factor that was used to adjust the rating. float adjust_factor_; // Rating is the sum of the ratings of the individual blobs in the word. - float rating_; // size related + float rating_; // size related // certainty is the min (worst) certainty of the individual blobs in the word. - float certainty_; // absolute + float certainty_; // absolute // xheight computed from the result, or 0 if inconsistent. float min_x_height_; float max_x_height_; - uint8_t permuter_; // permuter code + uint8_t permuter_; // permuter code // Normally, the ratings_ matrix represents the recognition results in order // from left-to-right. However, some engines (say Cube) may return @@ -637,19 +552,19 @@ class WERD_CHOICE : public ELIST_LINK { // Make WERD_CHOICE listable. ELISTIZEH(WERD_CHOICE) -using BLOB_CHOICE_LIST_VECTOR = GenericVector; +using BLOB_CHOICE_LIST_VECTOR = GenericVector; // Utilities for comparing WERD_CHOICEs -bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, - const WERD_CHOICE &word2); +bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE& word1, + const WERD_CHOICE& word2); // Utilities for debug printing. void print_ratings_list( - const char *msg, // intro message - BLOB_CHOICE_LIST *ratings, // list of results - const UNICHARSET ¤t_unicharset // unicharset that can be used + const char* msg, // intro message + BLOB_CHOICE_LIST* ratings, // list of results + const UNICHARSET& current_unicharset // unicharset that can be used // for id-to-unichar conversion - ); +); #endif diff --git a/src/ccstruct/rect.cpp b/src/ccstruct/rect.cpp index 72773c226b..84a1d2b890 100644 --- a/src/ccstruct/rect.cpp +++ b/src/ccstruct/rect.cpp @@ -17,7 +17,7 @@ * **********************************************************************/ -#include "rect.h" +#include "rect.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -32,23 +32,20 @@ TBOX::TBOX( // constructor const ICOORD pt1, // one corner const ICOORD pt2 // the other corner - ) { - if (pt1.x () <= pt2.x ()) { - if (pt1.y () <= pt2.y ()) { +) { + if (pt1.x() <= pt2.x()) { + if (pt1.y() <= pt2.y()) { bot_left = pt1; top_right = pt2; + } else { + bot_left = ICOORD(pt1.x(), pt2.y()); + top_right = ICOORD(pt2.x(), pt1.y()); } - else { - bot_left = ICOORD (pt1.x (), pt2.y ()); - top_right = ICOORD (pt2.x (), pt1.y ()); - } - } - else { - if (pt1.y () <= pt2.y ()) { - bot_left = ICOORD (pt2.x (), pt1.y ()); - top_right = ICOORD (pt1.x (), pt2.y ()); - } - else { + } else { + if (pt1.y() <= pt2.y()) { + bot_left = ICOORD(pt2.x(), pt1.y()); + top_right = ICOORD(pt1.x(), pt2.y()); + } else { bot_left = pt2; top_right = pt1; } @@ -61,10 +58,9 @@ TBOX::TBOX( // constructor * order. **********************************************************************/ -TBOX::TBOX( //constructor +TBOX::TBOX( // constructor int16_t left, int16_t bottom, int16_t right, int16_t top) - : bot_left(left, bottom), top_right(right, top) { -} + : bot_left(left, bottom), top_right(right, top) {} // rotate_large constructs the containing bounding box of all 4 // corners after rotating them. It therefore guarantees that all @@ -84,87 +80,84 @@ void TBOX::rotate_large(const FCOORD& vec) { * **********************************************************************/ -TBOX TBOX::intersection( //shared area box - const TBOX &box) const { +TBOX TBOX::intersection( // shared area box + const TBOX& box) const { int16_t left; int16_t bottom; int16_t right; int16_t top; - if (overlap (box)) { - if (box.bot_left.x () > bot_left.x ()) - left = box.bot_left.x (); + if (overlap(box)) { + if (box.bot_left.x() > bot_left.x()) + left = box.bot_left.x(); else - left = bot_left.x (); + left = bot_left.x(); - if (box.top_right.x () < top_right.x ()) - right = box.top_right.x (); + if (box.top_right.x() < top_right.x()) + right = box.top_right.x(); else - right = top_right.x (); + right = top_right.x(); - if (box.bot_left.y () > bot_left.y ()) - bottom = box.bot_left.y (); + if (box.bot_left.y() > bot_left.y()) + bottom = box.bot_left.y(); else - bottom = bot_left.y (); + bottom = bot_left.y(); - if (box.top_right.y () < top_right.y ()) - top = box.top_right.y (); + if (box.top_right.y() < top_right.y()) + top = box.top_right.y(); else - top = top_right.y (); - } - else { + top = top_right.y(); + } else { left = INT16_MAX; bottom = INT16_MAX; top = -INT16_MAX; right = -INT16_MAX; } - return TBOX (left, bottom, right, top); + return TBOX(left, bottom, right, top); } - /********************************************************************** * TBOX::bounding_union() Build the smallest box containing both boxes * **********************************************************************/ -TBOX TBOX::bounding_union( //box enclosing both - const TBOX &box) const { - ICOORD bl; //bottom left - ICOORD tr; //top right +TBOX TBOX::bounding_union( // box enclosing both + const TBOX& box) const { + ICOORD bl; // bottom left + ICOORD tr; // top right - if (box.bot_left.x () < bot_left.x ()) - bl.set_x (box.bot_left.x ()); + if (box.bot_left.x() < bot_left.x()) + bl.set_x(box.bot_left.x()); else - bl.set_x (bot_left.x ()); + bl.set_x(bot_left.x()); - if (box.top_right.x () > top_right.x ()) - tr.set_x (box.top_right.x ()); + if (box.top_right.x() > top_right.x()) + tr.set_x(box.top_right.x()); else - tr.set_x (top_right.x ()); + tr.set_x(top_right.x()); - if (box.bot_left.y () < bot_left.y ()) - bl.set_y (box.bot_left.y ()); + if (box.bot_left.y() < bot_left.y()) + bl.set_y(box.bot_left.y()); else - bl.set_y (bot_left.y ()); + bl.set_y(bot_left.y()); - if (box.top_right.y () > top_right.y ()) - tr.set_y (box.top_right.y ()); + if (box.top_right.y() > top_right.y()) + tr.set_y(box.top_right.y()); else - tr.set_y (top_right.y ()); - return TBOX (bl, tr); + tr.set_y(top_right.y()); + return TBOX(bl, tr); } - /********************************************************************** * TBOX::plot() Paint a box using specified settings * **********************************************************************/ #ifndef GRAPHICS_DISABLED -void TBOX::plot( //paint box - ScrollView* fd, //where to paint - ScrollView::Color fill_colour, //colour for inside - ScrollView::Color border_colour //colour for border - ) const { +void TBOX::plot( // paint box + ScrollView* fd, // where to paint + ScrollView::Color fill_colour, // colour for inside + ScrollView::Color border_colour // colour for border + ) const { fd->Brush(fill_colour); fd->Pen(border_colour); plot(fd); @@ -172,7 +165,7 @@ void TBOX::plot( //paint box #endif // Appends the bounding box as (%d,%d)->(%d,%d) to a STRING. -void TBOX::print_to_str(STRING *str) const { +void TBOX::print_to_str(STRING* str) const { // "(%d,%d)->(%d,%d)", left(), bottom(), right(), top() str->add_str_int("(", left()); str->add_str_int(",", bottom()); @@ -201,26 +194,22 @@ bool TBOX::DeSerialize(bool swap, FILE* fp) { * Extend one box to include the other (In place union) **********************************************************************/ -DLLSYM TBOX & -operator+= ( //bounding bounding bx -TBOX & op1, //operands -const TBOX & op2) { - if (op2.bot_left.x () < op1.bot_left.x ()) - op1.bot_left.set_x (op2.bot_left.x ()); +DLLSYM TBOX& operator+=( // bounding bounding bx + TBOX& op1, // operands + const TBOX& op2) { + if (op2.bot_left.x() < op1.bot_left.x()) op1.bot_left.set_x(op2.bot_left.x()); - if (op2.top_right.x () > op1.top_right.x ()) - op1.top_right.set_x (op2.top_right.x ()); + if (op2.top_right.x() > op1.top_right.x()) + op1.top_right.set_x(op2.top_right.x()); - if (op2.bot_left.y () < op1.bot_left.y ()) - op1.bot_left.set_y (op2.bot_left.y ()); + if (op2.bot_left.y() < op1.bot_left.y()) op1.bot_left.set_y(op2.bot_left.y()); - if (op2.top_right.y () > op1.top_right.y ()) - op1.top_right.set_y (op2.top_right.y ()); + if (op2.top_right.y() > op1.top_right.y()) + op1.top_right.set_y(op2.top_right.y()); return op1; } - /********************************************************************** * operator&= * @@ -228,34 +217,33 @@ const TBOX & op2) { **********************************************************************/ TBOX& operator&=(TBOX& op1, const TBOX& op2) { - if (op1.overlap (op2)) { - if (op2.bot_left.x () > op1.bot_left.x ()) - op1.bot_left.set_x (op2.bot_left.x ()); - - if (op2.top_right.x () < op1.top_right.x ()) - op1.top_right.set_x (op2.top_right.x ()); - - if (op2.bot_left.y () > op1.bot_left.y ()) - op1.bot_left.set_y (op2.bot_left.y ()); - - if (op2.top_right.y () < op1.top_right.y ()) - op1.top_right.set_y (op2.top_right.y ()); - } - else { - op1.bot_left.set_x (INT16_MAX); - op1.bot_left.set_y (INT16_MAX); - op1.top_right.set_x (-INT16_MAX); - op1.top_right.set_y (-INT16_MAX); + if (op1.overlap(op2)) { + if (op2.bot_left.x() > op1.bot_left.x()) + op1.bot_left.set_x(op2.bot_left.x()); + + if (op2.top_right.x() < op1.top_right.x()) + op1.top_right.set_x(op2.top_right.x()); + + if (op2.bot_left.y() > op1.bot_left.y()) + op1.bot_left.set_y(op2.bot_left.y()); + + if (op2.top_right.y() < op1.top_right.y()) + op1.top_right.set_y(op2.top_right.y()); + } else { + op1.bot_left.set_x(INT16_MAX); + op1.bot_left.set_y(INT16_MAX); + op1.top_right.set_x(-INT16_MAX); + op1.top_right.set_y(-INT16_MAX); } return op1; } -bool TBOX::x_almost_equal(const TBOX &box, int tolerance) const { +bool TBOX::x_almost_equal(const TBOX& box, int tolerance) const { return (abs(left() - box.left()) <= tolerance && - abs(right() - box.right()) <= tolerance); + abs(right() - box.right()) <= tolerance); } -bool TBOX::almost_equal(const TBOX &box, int tolerance) const { +bool TBOX::almost_equal(const TBOX& box, int tolerance) const { return (abs(left() - box.left()) <= tolerance && abs(right() - box.right()) <= tolerance && abs(top() - box.top()) <= tolerance && diff --git a/src/ccstruct/rect.h b/src/ccstruct/rect.h index 3e46f4ef58..020347a1e3 100644 --- a/src/ccstruct/rect.h +++ b/src/ccstruct/rect.h @@ -17,289 +17,277 @@ * **********************************************************************/ -#ifndef RECT_H -#define RECT_H +#ifndef RECT_H +#define RECT_H #include -#include "points.h" #include "ndminx.h" +#include "points.h" #include "scrollview.h" #include "strngs.h" #include "tprintf.h" -class DLLSYM TBOX { // bounding box - public: - TBOX (): // empty constructor making a null box - bot_left (INT16_MAX, INT16_MAX), top_right (-INT16_MAX, -INT16_MAX) { - } - - TBOX( // constructor - const ICOORD pt1, // one corner - const ICOORD pt2); // the other corner - - TBOX( // constructor - int16_t left, int16_t bottom, int16_t right, int16_t top); - - TBOX( // box around FCOORD - const FCOORD pt); - - bool null_box() const { // Is box null - return ((left () >= right ()) || (top () <= bottom ())); - } - - bool operator==(const TBOX& other) const { - return bot_left == other.bot_left && top_right == other.top_right; - } - - int16_t top() const { // coord of top - return top_right.y (); - } - void set_top(int y) { - top_right.set_y(y); - } - - int16_t bottom() const { // coord of bottom - return bot_left.y (); - } - void set_bottom(int y) { - bot_left.set_y(y); - } - - int16_t left() const { // coord of left - return bot_left.x (); - } - void set_left(int x) { - bot_left.set_x(x); - } - - int16_t right() const { // coord of right - return top_right.x (); - } - void set_right(int x) { - top_right.set_x(x); - } - int x_middle() const { - return (bot_left.x() + top_right.x()) / 2; - } - int y_middle() const { - return (bot_left.y() + top_right.y()) / 2; - } - - const ICOORD &botleft() const { // access function - return bot_left; - } - - ICOORD botright() const { // ~ access function - return ICOORD (top_right.x (), bot_left.y ()); - } - - ICOORD topleft() const { // ~ access function - return ICOORD (bot_left.x (), top_right.y ()); - } - - const ICOORD &topright() const { // access function - return top_right; - } - - int16_t height() const { // how high is it? - if (!null_box ()) - return top_right.y () - bot_left.y (); - else - return 0; - } - - int16_t width() const { // how high is it? - if (!null_box ()) - return top_right.x () - bot_left.x (); - else - return 0; - } - - int32_t area() const { // what is the area? - if (!null_box ()) - return width () * height (); - else - return 0; - } - - // Pads the box on either side by the supplied x,y pad amounts. - // NO checks for exceeding any bounds like 0 or an image size. - void pad(int xpad, int ypad) { - ICOORD pad(xpad, ypad); - bot_left -= pad; - top_right += pad; - } - - void move_bottom_edge( // move one edge - const int16_t y) { // by +/- y - bot_left += ICOORD (0, y); - } - - void move_left_edge( // move one edge - const int16_t x) { // by +/- x - bot_left += ICOORD (x, 0); - } - - void move_right_edge( // move one edge - const int16_t x) { // by +/- x - top_right += ICOORD (x, 0); - } - - void move_top_edge( // move one edge - const int16_t y) { // by +/- y - top_right += ICOORD (0, y); - } - - void move( // move box - const ICOORD vec) { // by vector - bot_left += vec; - top_right += vec; - } - - void move( // move box - const FCOORD vec) { // by float vector - bot_left.set_x ((int16_t) floor (bot_left.x () + vec.x ())); - // round left - bot_left.set_y ((int16_t) floor (bot_left.y () + vec.y ())); - // round down - top_right.set_x ((int16_t) ceil (top_right.x () + vec.x ())); - // round right - top_right.set_y ((int16_t) ceil (top_right.y () + vec.y ())); - // round up - } - - void scale( // scale box - const float f) { // by multiplier - bot_left.set_x ((int16_t) floor (bot_left.x () * f)); // round left - bot_left.set_y ((int16_t) floor (bot_left.y () * f)); // round down - top_right.set_x ((int16_t) ceil (top_right.x () * f)); // round right - top_right.set_y ((int16_t) ceil (top_right.y () * f)); // round up - } - void scale( // scale box - const FCOORD vec) { // by float vector - bot_left.set_x ((int16_t) floor (bot_left.x () * vec.x ())); - bot_left.set_y ((int16_t) floor (bot_left.y () * vec.y ())); - top_right.set_x ((int16_t) ceil (top_right.x () * vec.x ())); - top_right.set_y ((int16_t) ceil (top_right.y () * vec.y ())); - } - - // rotate doesn't enlarge the box - it just rotates the bottom-left - // and top-right corners. Use rotate_large if you want to guarantee - // that all content is contained within the rotated box. - void rotate(const FCOORD& vec) { // by vector - bot_left.rotate (vec); - top_right.rotate (vec); - *this = TBOX (bot_left, top_right); - } - // rotate_large constructs the containing bounding box of all 4 - // corners after rotating them. It therefore guarantees that all - // original content is contained within, but also slightly enlarges the box. - void rotate_large(const FCOORD& vec); - - bool contains( // is pt inside box - const FCOORD pt) const; - - bool contains( // is box inside box - const TBOX &box) const; - - bool overlap( // do boxes overlap - const TBOX &box) const; - - bool major_overlap( // do boxes overlap more than half - const TBOX &box) const; - - // Do boxes overlap on x axis. - bool x_overlap(const TBOX &box) const; - - // Return the horizontal gap between the boxes. If the boxes - // overlap horizontally then the return value is negative, indicating - // the amount of the overlap. - int x_gap(const TBOX& box) const { - return std::max(bot_left.x(), box.bot_left.x()) - - std::min(top_right.x(), box.top_right.x()); - } - - // Return the vertical gap between the boxes. If the boxes - // overlap vertically then the return value is negative, indicating - // the amount of the overlap. - int y_gap(const TBOX& box) const { - return std::max(bot_left.y(), box.bot_left.y()) - - std::min(top_right.y(), box.top_right.y()); - } - - // Do boxes overlap on x axis by more than - // half of the width of the narrower box. - bool major_x_overlap(const TBOX &box) const; - - // Do boxes overlap on y axis. - bool y_overlap(const TBOX &box) const; - - // Do boxes overlap on y axis by more than - // half of the height of the shorter box. - bool major_y_overlap(const TBOX &box) const; - - // fraction of current box's area covered by other - double overlap_fraction(const TBOX &box) const; - - // fraction of the current box's projected area covered by the other's - double x_overlap_fraction(const TBOX& box) const; - - // fraction of the current box's projected area covered by the other's - double y_overlap_fraction(const TBOX& box) const; - - // Returns true if the boxes are almost equal on x axis. - bool x_almost_equal(const TBOX &box, int tolerance) const; - - // Returns true if the boxes are almost equal - bool almost_equal(const TBOX &box, int tolerance) const; - - TBOX intersection( // shared area box - const TBOX &box) const; - - TBOX bounding_union( // box enclosing both - const TBOX &box) const; - - // Sets the box boundaries to the given coordinates. - void set_to_given_coords(int x_min, int y_min, int x_max, int y_max) { - bot_left.set_x(x_min); - bot_left.set_y(y_min); - top_right.set_x(x_max); - top_right.set_y(y_max); - } - - void print() const { // print - tprintf("Bounding box=(%d,%d)->(%d,%d)\n", - left(), bottom(), right(), top()); - } - // Appends the bounding box as (%d,%d)->(%d,%d) to a STRING. - void print_to_str(STRING *str) const; +class DLLSYM TBOX { // bounding box + public: + TBOX() + : // empty constructor making a null box + bot_left(INT16_MAX, INT16_MAX), + top_right(-INT16_MAX, -INT16_MAX) {} + + TBOX( // constructor + const ICOORD pt1, // one corner + const ICOORD pt2); // the other corner + + TBOX( // constructor + int16_t left, int16_t bottom, int16_t right, int16_t top); + + TBOX( // box around FCOORD + const FCOORD pt); + + bool null_box() const { // Is box null + return ((left() >= right()) || (top() <= bottom())); + } + + bool operator==(const TBOX& other) const { + return bot_left == other.bot_left && top_right == other.top_right; + } + + int16_t top() const { // coord of top + return top_right.y(); + } + void set_top(int y) { top_right.set_y(y); } + + int16_t bottom() const { // coord of bottom + return bot_left.y(); + } + void set_bottom(int y) { bot_left.set_y(y); } + + int16_t left() const { // coord of left + return bot_left.x(); + } + void set_left(int x) { bot_left.set_x(x); } + + int16_t right() const { // coord of right + return top_right.x(); + } + void set_right(int x) { top_right.set_x(x); } + int x_middle() const { return (bot_left.x() + top_right.x()) / 2; } + int y_middle() const { return (bot_left.y() + top_right.y()) / 2; } + + const ICOORD& botleft() const { // access function + return bot_left; + } + + ICOORD botright() const { // ~ access function + return ICOORD(top_right.x(), bot_left.y()); + } + + ICOORD topleft() const { // ~ access function + return ICOORD(bot_left.x(), top_right.y()); + } + + const ICOORD& topright() const { // access function + return top_right; + } + + int16_t height() const { // how high is it? + if (!null_box()) + return top_right.y() - bot_left.y(); + else + return 0; + } + + int16_t width() const { // how high is it? + if (!null_box()) + return top_right.x() - bot_left.x(); + else + return 0; + } + + int32_t area() const { // what is the area? + if (!null_box()) + return width() * height(); + else + return 0; + } + + // Pads the box on either side by the supplied x,y pad amounts. + // NO checks for exceeding any bounds like 0 or an image size. + void pad(int xpad, int ypad) { + ICOORD pad(xpad, ypad); + bot_left -= pad; + top_right += pad; + } + + void move_bottom_edge( // move one edge + const int16_t y) { // by +/- y + bot_left += ICOORD(0, y); + } + + void move_left_edge( // move one edge + const int16_t x) { // by +/- x + bot_left += ICOORD(x, 0); + } + + void move_right_edge( // move one edge + const int16_t x) { // by +/- x + top_right += ICOORD(x, 0); + } + + void move_top_edge( // move one edge + const int16_t y) { // by +/- y + top_right += ICOORD(0, y); + } + + void move( // move box + const ICOORD vec) { // by vector + bot_left += vec; + top_right += vec; + } + + void move( // move box + const FCOORD vec) { // by float vector + bot_left.set_x((int16_t)floor(bot_left.x() + vec.x())); + // round left + bot_left.set_y((int16_t)floor(bot_left.y() + vec.y())); + // round down + top_right.set_x((int16_t)ceil(top_right.x() + vec.x())); + // round right + top_right.set_y((int16_t)ceil(top_right.y() + vec.y())); + // round up + } + + void scale( // scale box + const float f) { // by multiplier + bot_left.set_x((int16_t)floor(bot_left.x() * f)); // round left + bot_left.set_y((int16_t)floor(bot_left.y() * f)); // round down + top_right.set_x((int16_t)ceil(top_right.x() * f)); // round right + top_right.set_y((int16_t)ceil(top_right.y() * f)); // round up + } + void scale( // scale box + const FCOORD vec) { // by float vector + bot_left.set_x((int16_t)floor(bot_left.x() * vec.x())); + bot_left.set_y((int16_t)floor(bot_left.y() * vec.y())); + top_right.set_x((int16_t)ceil(top_right.x() * vec.x())); + top_right.set_y((int16_t)ceil(top_right.y() * vec.y())); + } + + // rotate doesn't enlarge the box - it just rotates the bottom-left + // and top-right corners. Use rotate_large if you want to guarantee + // that all content is contained within the rotated box. + void rotate(const FCOORD& vec) { // by vector + bot_left.rotate(vec); + top_right.rotate(vec); + *this = TBOX(bot_left, top_right); + } + // rotate_large constructs the containing bounding box of all 4 + // corners after rotating them. It therefore guarantees that all + // original content is contained within, but also slightly enlarges the box. + void rotate_large(const FCOORD& vec); + + bool contains( // is pt inside box + const FCOORD pt) const; + + bool contains( // is box inside box + const TBOX& box) const; + + bool overlap( // do boxes overlap + const TBOX& box) const; + + bool major_overlap( // do boxes overlap more than half + const TBOX& box) const; + + // Do boxes overlap on x axis. + bool x_overlap(const TBOX& box) const; + + // Return the horizontal gap between the boxes. If the boxes + // overlap horizontally then the return value is negative, indicating + // the amount of the overlap. + int x_gap(const TBOX& box) const { + return std::max(bot_left.x(), box.bot_left.x()) - + std::min(top_right.x(), box.top_right.x()); + } + + // Return the vertical gap between the boxes. If the boxes + // overlap vertically then the return value is negative, indicating + // the amount of the overlap. + int y_gap(const TBOX& box) const { + return std::max(bot_left.y(), box.bot_left.y()) - + std::min(top_right.y(), box.top_right.y()); + } + + // Do boxes overlap on x axis by more than + // half of the width of the narrower box. + bool major_x_overlap(const TBOX& box) const; + + // Do boxes overlap on y axis. + bool y_overlap(const TBOX& box) const; + + // Do boxes overlap on y axis by more than + // half of the height of the shorter box. + bool major_y_overlap(const TBOX& box) const; + + // fraction of current box's area covered by other + double overlap_fraction(const TBOX& box) const; + + // fraction of the current box's projected area covered by the other's + double x_overlap_fraction(const TBOX& box) const; + + // fraction of the current box's projected area covered by the other's + double y_overlap_fraction(const TBOX& box) const; + + // Returns true if the boxes are almost equal on x axis. + bool x_almost_equal(const TBOX& box, int tolerance) const; + + // Returns true if the boxes are almost equal + bool almost_equal(const TBOX& box, int tolerance) const; + + TBOX intersection( // shared area box + const TBOX& box) const; + + TBOX bounding_union( // box enclosing both + const TBOX& box) const; + + // Sets the box boundaries to the given coordinates. + void set_to_given_coords(int x_min, int y_min, int x_max, int y_max) { + bot_left.set_x(x_min); + bot_left.set_y(y_min); + top_right.set_x(x_max); + top_right.set_y(y_max); + } + + void print() const { // print + tprintf("Bounding box=(%d,%d)->(%d,%d)\n", left(), bottom(), right(), + top()); + } + // Appends the bounding box as (%d,%d)->(%d,%d) to a STRING. + void print_to_str(STRING* str) const; #ifndef GRAPHICS_DISABLED - void plot( // use current settings - ScrollView* fd) const { // where to paint - fd->Rectangle(bot_left.x (), bot_left.y (), top_right.x (), - top_right.y ()); - } - - void plot( // paint box - ScrollView* fd, // where to paint - ScrollView::Color fill_colour, // colour for inside - ScrollView::Color border_colour) const; // colour for border + void plot( // use current settings + ScrollView* fd) const { // where to paint + fd->Rectangle(bot_left.x(), bot_left.y(), top_right.x(), top_right.y()); + } + + void plot( // paint box + ScrollView* fd, // where to paint + ScrollView::Color fill_colour, // colour for inside + ScrollView::Color border_colour) const; // colour for border #endif - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - - friend TBOX& operator+=(TBOX&, const TBOX&); - // in place union - friend TBOX& operator&=(TBOX&, const TBOX&); - // in place intersection - - private: - ICOORD bot_left; // bottom left corner - ICOORD top_right; // top right corner + // Writes to the given file. Returns false in case of error. + bool Serialize(FILE* fp) const; + // Reads from the given file. Returns false in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + bool DeSerialize(bool swap, FILE* fp); + + friend TBOX& operator+=(TBOX&, const TBOX&); + // in place union + friend TBOX& operator&=(TBOX&, const TBOX&); + // in place intersection + + private: + ICOORD bot_left; // bottom left corner + ICOORD top_right; // top right corner }; /********************************************************************** @@ -309,45 +297,41 @@ class DLLSYM TBOX { // bounding box inline TBOX::TBOX( // constructor const FCOORD pt // floating centre - ) { - bot_left = ICOORD ((int16_t) floor (pt.x ()), (int16_t) floor (pt.y ())); - top_right = ICOORD ((int16_t) ceil (pt.x ()), (int16_t) ceil (pt.y ())); +) { + bot_left = ICOORD((int16_t)floor(pt.x()), (int16_t)floor(pt.y())); + top_right = ICOORD((int16_t)ceil(pt.x()), (int16_t)ceil(pt.y())); } - /********************************************************************** * TBOX::contains() Is point within box * **********************************************************************/ inline bool TBOX::contains(const FCOORD pt) const { - return ((pt.x () >= bot_left.x ()) && - (pt.x () <= top_right.x ()) && - (pt.y () >= bot_left.y ()) && (pt.y () <= top_right.y ())); + return ((pt.x() >= bot_left.x()) && (pt.x() <= top_right.x()) && + (pt.y() >= bot_left.y()) && (pt.y() <= top_right.y())); } - /********************************************************************** * TBOX::contains() Is box within box * **********************************************************************/ -inline bool TBOX::contains(const TBOX &box) const { - return (contains (box.bot_left) && contains (box.top_right)); +inline bool TBOX::contains(const TBOX& box) const { + return (contains(box.bot_left) && contains(box.top_right)); } - /********************************************************************** * TBOX::overlap() Do two boxes overlap? * **********************************************************************/ inline bool TBOX::overlap( // do boxes overlap - const TBOX &box) const { - return ((box.bot_left.x () <= top_right.x ()) && - (box.top_right.x () >= bot_left.x ()) && - (box.bot_left.y () <= top_right.y ()) && - (box.top_right.y () >= bot_left.y ())); + const TBOX& box) const { + return ((box.bot_left.x() <= top_right.x()) && + (box.top_right.x() >= bot_left.x()) && + (box.bot_left.y() <= top_right.y()) && + (box.top_right.y() >= bot_left.y())); } /********************************************************************** @@ -356,17 +340,15 @@ inline bool TBOX::overlap( // do boxes overlap **********************************************************************/ inline bool TBOX::major_overlap( // Do boxes overlap more that half. - const TBOX &box) const { + const TBOX& box) const { int overlap = std::min(box.top_right.x(), top_right.x()); overlap -= std::max(box.bot_left.x(), bot_left.x()); overlap += overlap; - if (overlap < std::min(box.width(), width())) - return false; + if (overlap < std::min(box.width(), width())) return false; overlap = std::min(box.top_right.y(), top_right.y()); overlap -= std::max(box.bot_left.y(), bot_left.y()); overlap += overlap; - if (overlap < std::min(box.height(), height())) - return false; + if (overlap < std::min(box.height(), height())) return false; return true; } @@ -375,7 +357,7 @@ inline bool TBOX::major_overlap( // Do boxes overlap more that half. * **********************************************************************/ -inline double TBOX::overlap_fraction(const TBOX &box) const { +inline double TBOX::overlap_fraction(const TBOX& box) const { double fraction = 0.0; if (this->area()) { fraction = this->intersection(box).area() * 1.0 / this->area(); @@ -388,9 +370,9 @@ inline double TBOX::overlap_fraction(const TBOX &box) const { * **********************************************************************/ -inline bool TBOX::x_overlap(const TBOX &box) const { +inline bool TBOX::x_overlap(const TBOX& box) const { return ((box.bot_left.x() <= top_right.x()) && - (box.top_right.x() >= bot_left.x())); + (box.top_right.x() >= bot_left.x())); } /********************************************************************** @@ -399,7 +381,7 @@ inline bool TBOX::x_overlap(const TBOX &box) const { * **********************************************************************/ -inline bool TBOX::major_x_overlap(const TBOX &box) const { +inline bool TBOX::major_x_overlap(const TBOX& box) const { int16_t overlap = box.width(); if (this->left() > box.left()) { overlap -= this->left() - box.left(); @@ -415,9 +397,9 @@ inline bool TBOX::major_x_overlap(const TBOX &box) const { * **********************************************************************/ -inline bool TBOX::y_overlap(const TBOX &box) const { +inline bool TBOX::y_overlap(const TBOX& box) const { return ((box.bot_left.y() <= top_right.y()) && - (box.top_right.y() >= bot_left.y())); + (box.top_right.y() >= bot_left.y())); } /********************************************************************** @@ -426,7 +408,7 @@ inline bool TBOX::y_overlap(const TBOX &box) const { * **********************************************************************/ -inline bool TBOX::major_y_overlap(const TBOX &box) const { +inline bool TBOX::major_y_overlap(const TBOX& box) const { int16_t overlap = box.height(); if (this->bottom() > box.bottom()) { overlap -= this->bottom() - box.bottom(); diff --git a/src/ccstruct/rejctmap.cpp b/src/ccstruct/rejctmap.cpp index 427faa5277..a809f0a35a 100644 --- a/src/ccstruct/rejctmap.cpp +++ b/src/ccstruct/rejctmap.cpp @@ -17,254 +17,204 @@ * **********************************************************************/ -#include "host.h" -#include "rejctmap.h" -#include "params.h" +#include "rejctmap.h" +#include "host.h" +#include "params.h" -bool REJ::perm_rejected() { //Is char perm reject? - return (flag (R_TESS_FAILURE) || - flag (R_SMALL_XHT) || - flag (R_EDGE_CHAR) || - flag (R_1IL_CONFLICT) || - flag (R_POSTNN_1IL) || - flag (R_REJ_CBLOB) || - flag (R_BAD_REPETITION) || flag (R_MM_REJECT)); +bool REJ::perm_rejected() { // Is char perm reject? + return (flag(R_TESS_FAILURE) || flag(R_SMALL_XHT) || flag(R_EDGE_CHAR) || + flag(R_1IL_CONFLICT) || flag(R_POSTNN_1IL) || flag(R_REJ_CBLOB) || + flag(R_BAD_REPETITION) || flag(R_MM_REJECT)); } - bool REJ::rej_before_nn_accept() { - return flag (R_POOR_MATCH) || - flag (R_NOT_TESS_ACCEPTED) || - flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER); + return flag(R_POOR_MATCH) || flag(R_NOT_TESS_ACCEPTED) || + flag(R_CONTAINS_BLANKS) || flag(R_BAD_PERMUTER); } - bool REJ::rej_between_nn_and_mm() { - return flag (R_HYPHEN) || - flag (R_DUBIOUS) || - flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP); -} - - -bool REJ::rej_between_mm_and_quality_accept() { - return flag (R_BAD_QUALITY); + return flag(R_HYPHEN) || flag(R_DUBIOUS) || flag(R_NO_ALPHANUMS) || + flag(R_MOSTLY_REJ) || flag(R_XHT_FIXUP); } +bool REJ::rej_between_mm_and_quality_accept() { return flag(R_BAD_QUALITY); } bool REJ::rej_between_quality_and_minimal_rej_accept() { - return flag (R_DOC_REJ) || - flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ); + return flag(R_DOC_REJ) || flag(R_BLOCK_REJ) || flag(R_ROW_REJ) || + flag(R_UNLV_REJ); } - bool REJ::rej_before_mm_accept() { - return rej_between_nn_and_mm () || - (rej_before_nn_accept () && - !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT)); + return rej_between_nn_and_mm() || + (rej_before_nn_accept() && !flag(R_NN_ACCEPT) && + !flag(R_HYPHEN_ACCEPT)); } - bool REJ::rej_before_quality_accept() { - return rej_between_mm_and_quality_accept () || - (!flag (R_MM_ACCEPT) && rej_before_mm_accept ()); + return rej_between_mm_and_quality_accept() || + (!flag(R_MM_ACCEPT) && rej_before_mm_accept()); } - -bool REJ::rejected() { //Is char rejected? - if (flag (R_MINIMAL_REJ_ACCEPT)) +bool REJ::rejected() { // Is char rejected? + if (flag(R_MINIMAL_REJ_ACCEPT)) return false; else - return (perm_rejected () || - rej_between_quality_and_minimal_rej_accept () || - (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ())); + return (perm_rejected() || rej_between_quality_and_minimal_rej_accept() || + (!flag(R_QUALITY_ACCEPT) && rej_before_quality_accept())); } - -bool REJ::accept_if_good_quality() { //potential rej? - return (rejected () && - !perm_rejected () && - flag (R_BAD_PERMUTER) && - !flag (R_POOR_MATCH) && - !flag (R_NOT_TESS_ACCEPTED) && - !flag (R_CONTAINS_BLANKS) && - (!rej_between_nn_and_mm () && - !rej_between_mm_and_quality_accept () && - !rej_between_quality_and_minimal_rej_accept ())); +bool REJ::accept_if_good_quality() { // potential rej? + return (rejected() && !perm_rejected() && flag(R_BAD_PERMUTER) && + !flag(R_POOR_MATCH) && !flag(R_NOT_TESS_ACCEPTED) && + !flag(R_CONTAINS_BLANKS) && + (!rej_between_nn_and_mm() && !rej_between_mm_and_quality_accept() && + !rej_between_quality_and_minimal_rej_accept())); } - -void REJ::setrej_tess_failure() { //Tess generated blank +void REJ::setrej_tess_failure() { // Tess generated blank set_flag(R_TESS_FAILURE); } - -void REJ::setrej_small_xht() { //Small xht char/wd +void REJ::setrej_small_xht() { // Small xht char/wd set_flag(R_SMALL_XHT); } - -void REJ::setrej_edge_char() { //Close to image edge +void REJ::setrej_edge_char() { // Close to image edge set_flag(R_EDGE_CHAR); } - -void REJ::setrej_1Il_conflict() { //Initial reject map +void REJ::setrej_1Il_conflict() { // Initial reject map set_flag(R_1IL_CONFLICT); } - -void REJ::setrej_postNN_1Il() { //1Il after NN +void REJ::setrej_postNN_1Il() { // 1Il after NN set_flag(R_POSTNN_1IL); } - -void REJ::setrej_rej_cblob() { //Insert duff blob +void REJ::setrej_rej_cblob() { // Insert duff blob set_flag(R_REJ_CBLOB); } - -void REJ::setrej_mm_reject() { //Matrix matcher +void REJ::setrej_mm_reject() { // Matrix matcher set_flag(R_MM_REJECT); } - -void REJ::setrej_bad_repetition() { //Odd repeated char +void REJ::setrej_bad_repetition() { // Odd repeated char set_flag(R_BAD_REPETITION); } - -void REJ::setrej_poor_match() { //Failed Rays heuristic +void REJ::setrej_poor_match() { // Failed Rays heuristic set_flag(R_POOR_MATCH); } - void REJ::setrej_not_tess_accepted() { - //TEMP reject_word + // TEMP reject_word set_flag(R_NOT_TESS_ACCEPTED); } - void REJ::setrej_contains_blanks() { - //TEMP reject_word + // TEMP reject_word set_flag(R_CONTAINS_BLANKS); } - -void REJ::setrej_bad_permuter() { //POTENTIAL reject_word +void REJ::setrej_bad_permuter() { // POTENTIAL reject_word set_flag(R_BAD_PERMUTER); } - -void REJ::setrej_hyphen() { //PostNN dubious hyphen or . +void REJ::setrej_hyphen() { // PostNN dubious hyphen or . set_flag(R_HYPHEN); } - -void REJ::setrej_dubious() { //PostNN dubious limit +void REJ::setrej_dubious() { // PostNN dubious limit set_flag(R_DUBIOUS); } - -void REJ::setrej_no_alphanums() { //TEMP reject_word +void REJ::setrej_no_alphanums() { // TEMP reject_word set_flag(R_NO_ALPHANUMS); } - -void REJ::setrej_mostly_rej() { //TEMP reject_word +void REJ::setrej_mostly_rej() { // TEMP reject_word set_flag(R_MOSTLY_REJ); } - -void REJ::setrej_xht_fixup() { //xht fixup +void REJ::setrej_xht_fixup() { // xht fixup set_flag(R_XHT_FIXUP); } - -void REJ::setrej_bad_quality() { //TEMP reject_word +void REJ::setrej_bad_quality() { // TEMP reject_word set_flag(R_BAD_QUALITY); } - -void REJ::setrej_doc_rej() { //TEMP reject_word +void REJ::setrej_doc_rej() { // TEMP reject_word set_flag(R_DOC_REJ); } - -void REJ::setrej_block_rej() { //TEMP reject_word +void REJ::setrej_block_rej() { // TEMP reject_word set_flag(R_BLOCK_REJ); } - -void REJ::setrej_row_rej() { //TEMP reject_word +void REJ::setrej_row_rej() { // TEMP reject_word set_flag(R_ROW_REJ); } - -void REJ::setrej_unlv_rej() { //TEMP reject_word +void REJ::setrej_unlv_rej() { // TEMP reject_word set_flag(R_UNLV_REJ); } - -void REJ::setrej_hyphen_accept() { //NN Flipped a char +void REJ::setrej_hyphen_accept() { // NN Flipped a char set_flag(R_HYPHEN_ACCEPT); } - -void REJ::setrej_nn_accept() { //NN Flipped a char +void REJ::setrej_nn_accept() { // NN Flipped a char set_flag(R_NN_ACCEPT); } - -void REJ::setrej_mm_accept() { //Matrix matcher +void REJ::setrej_mm_accept() { // Matrix matcher set_flag(R_MM_ACCEPT); } - -void REJ::setrej_quality_accept() { //Quality flip a char +void REJ::setrej_quality_accept() { // Quality flip a char set_flag(R_QUALITY_ACCEPT); } - void REJ::setrej_minimal_rej_accept() { - //Accept all except blank + // Accept all except blank set_flag(R_MINIMAL_REJ_ACCEPT); } - -void REJ::full_print(FILE *fp) { - fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F"); - fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F"); - fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F"); - fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F"); - fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F"); - fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F"); - fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F"); - fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F"); - fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F"); - fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n", - flag (R_NOT_TESS_ACCEPTED) ? "T" : "F"); - fprintf (fp, "R_CONTAINS_BLANKS: %s\n", - flag (R_CONTAINS_BLANKS) ? "T" : "F"); - fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F"); - fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F"); - fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F"); - fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F"); - fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F"); - fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F"); - fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F"); - fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F"); - fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F"); - fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F"); - fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F"); - fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F"); - fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F"); - fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F"); - fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F"); - fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n", - flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); -} - -REJMAP &REJMAP::operator=(const REJMAP &source) { +void REJ::full_print(FILE* fp) { + fprintf(fp, "R_TESS_FAILURE: %s\n", flag(R_TESS_FAILURE) ? "T" : "F"); + fprintf(fp, "R_SMALL_XHT: %s\n", flag(R_SMALL_XHT) ? "T" : "F"); + fprintf(fp, "R_EDGE_CHAR: %s\n", flag(R_EDGE_CHAR) ? "T" : "F"); + fprintf(fp, "R_1IL_CONFLICT: %s\n", flag(R_1IL_CONFLICT) ? "T" : "F"); + fprintf(fp, "R_POSTNN_1IL: %s\n", flag(R_POSTNN_1IL) ? "T" : "F"); + fprintf(fp, "R_REJ_CBLOB: %s\n", flag(R_REJ_CBLOB) ? "T" : "F"); + fprintf(fp, "R_MM_REJECT: %s\n", flag(R_MM_REJECT) ? "T" : "F"); + fprintf(fp, "R_BAD_REPETITION: %s\n", flag(R_BAD_REPETITION) ? "T" : "F"); + fprintf(fp, "R_POOR_MATCH: %s\n", flag(R_POOR_MATCH) ? "T" : "F"); + fprintf(fp, "R_NOT_TESS_ACCEPTED: %s\n", + flag(R_NOT_TESS_ACCEPTED) ? "T" : "F"); + fprintf(fp, "R_CONTAINS_BLANKS: %s\n", flag(R_CONTAINS_BLANKS) ? "T" : "F"); + fprintf(fp, "R_BAD_PERMUTER: %s\n", flag(R_BAD_PERMUTER) ? "T" : "F"); + fprintf(fp, "R_HYPHEN: %s\n", flag(R_HYPHEN) ? "T" : "F"); + fprintf(fp, "R_DUBIOUS: %s\n", flag(R_DUBIOUS) ? "T" : "F"); + fprintf(fp, "R_NO_ALPHANUMS: %s\n", flag(R_NO_ALPHANUMS) ? "T" : "F"); + fprintf(fp, "R_MOSTLY_REJ: %s\n", flag(R_MOSTLY_REJ) ? "T" : "F"); + fprintf(fp, "R_XHT_FIXUP: %s\n", flag(R_XHT_FIXUP) ? "T" : "F"); + fprintf(fp, "R_BAD_QUALITY: %s\n", flag(R_BAD_QUALITY) ? "T" : "F"); + fprintf(fp, "R_DOC_REJ: %s\n", flag(R_DOC_REJ) ? "T" : "F"); + fprintf(fp, "R_BLOCK_REJ: %s\n", flag(R_BLOCK_REJ) ? "T" : "F"); + fprintf(fp, "R_ROW_REJ: %s\n", flag(R_ROW_REJ) ? "T" : "F"); + fprintf(fp, "R_UNLV_REJ: %s\n", flag(R_UNLV_REJ) ? "T" : "F"); + fprintf(fp, "R_HYPHEN_ACCEPT: %s\n", flag(R_HYPHEN_ACCEPT) ? "T" : "F"); + fprintf(fp, "R_NN_ACCEPT: %s\n", flag(R_NN_ACCEPT) ? "T" : "F"); + fprintf(fp, "R_MM_ACCEPT: %s\n", flag(R_MM_ACCEPT) ? "T" : "F"); + fprintf(fp, "R_QUALITY_ACCEPT: %s\n", flag(R_QUALITY_ACCEPT) ? "T" : "F"); + fprintf(fp, "R_MINIMAL_REJ_ACCEPT: %s\n", + flag(R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); +} + +REJMAP& REJMAP::operator=(const REJMAP& source) { initialise(source.len); for (int i = 0; i < len; i++) { ptr[i] = source.ptr[i]; @@ -277,90 +227,78 @@ void REJMAP::initialise(int16_t length) { len = length; } - -int16_t REJMAP::accept_count() { //How many accepted? +int16_t REJMAP::accept_count() { // How many accepted? int i; int16_t count = 0; for (i = 0; i < len; i++) { - if (ptr[i].accepted ()) - count++; + if (ptr[i].accepted()) count++; } return count; } - -bool REJMAP::recoverable_rejects() { //Any non perm rejs? +bool REJMAP::recoverable_rejects() { // Any non perm rejs? for (int i = 0; i < len; i++) { - if (ptr[i].recoverable ()) - return true; + if (ptr[i].recoverable()) return true; } return false; } - -bool REJMAP::quality_recoverable_rejects() { //Any potential rejs? +bool REJMAP::quality_recoverable_rejects() { // Any potential rejs? for (int i = 0; i < len; i++) { - if (ptr[i].accept_if_good_quality ()) - return true; + if (ptr[i].accept_if_good_quality()) return true; } return false; } - -void REJMAP::remove_pos( //Cut out an element - int16_t pos //element to remove - ) { - ASSERT_HOST (pos >= 0); - ASSERT_HOST (pos < len); - ASSERT_HOST (len > 0); +void REJMAP::remove_pos( // Cut out an element + int16_t pos // element to remove +) { + ASSERT_HOST(pos >= 0); + ASSERT_HOST(pos < len); + ASSERT_HOST(len > 0); len--; for (; pos < len; pos++) ptr[pos] = ptr[pos + 1]; } - -void REJMAP::print(FILE *fp) { +void REJMAP::print(FILE* fp) { int i; char buff[512]; for (i = 0; i < len; i++) { - buff[i] = ptr[i].display_char (); + buff[i] = ptr[i].display_char(); } buff[i] = '\0'; - fprintf (fp, "\"%s\"", buff); + fprintf(fp, "\"%s\"", buff); } - -void REJMAP::full_print(FILE *fp) { +void REJMAP::full_print(FILE* fp) { int i; for (i = 0; i < len; i++) { - ptr[i].full_print (fp); - fprintf (fp, "\n"); + ptr[i].full_print(fp); + fprintf(fp, "\n"); } } - -void REJMAP::rej_word_small_xht() { //Reject whole word +void REJMAP::rej_word_small_xht() { // Reject whole word int i; for (i = 0; i < len; i++) { - ptr[i].setrej_small_xht (); + ptr[i].setrej_small_xht(); } } - -void REJMAP::rej_word_tess_failure() { //Reject whole word +void REJMAP::rej_word_tess_failure() { // Reject whole word int i; for (i = 0; i < len; i++) { - ptr[i].setrej_tess_failure (); + ptr[i].setrej_tess_failure(); } } - -void REJMAP::rej_word_not_tess_accepted() { //Reject whole word +void REJMAP::rej_word_not_tess_accepted() { // Reject whole word int i; for (i = 0; i < len; i++) { @@ -368,8 +306,7 @@ void REJMAP::rej_word_not_tess_accepted() { //Reject whole word } } - -void REJMAP::rej_word_contains_blanks() { //Reject whole word +void REJMAP::rej_word_contains_blanks() { // Reject whole word int i; for (i = 0; i < len; i++) { @@ -377,17 +314,15 @@ void REJMAP::rej_word_contains_blanks() { //Reject whole word } } - -void REJMAP::rej_word_bad_permuter() { //Reject whole word +void REJMAP::rej_word_bad_permuter() { // Reject whole word int i; for (i = 0; i < len; i++) { - if (ptr[i].accepted()) ptr[i].setrej_bad_permuter (); + if (ptr[i].accepted()) ptr[i].setrej_bad_permuter(); } } - -void REJMAP::rej_word_xht_fixup() { //Reject whole word +void REJMAP::rej_word_xht_fixup() { // Reject whole word int i; for (i = 0; i < len; i++) { @@ -395,8 +330,7 @@ void REJMAP::rej_word_xht_fixup() { //Reject whole word } } - -void REJMAP::rej_word_no_alphanums() { //Reject whole word +void REJMAP::rej_word_no_alphanums() { // Reject whole word int i; for (i = 0; i < len; i++) { @@ -404,8 +338,7 @@ void REJMAP::rej_word_no_alphanums() { //Reject whole word } } - -void REJMAP::rej_word_mostly_rej() { //Reject whole word +void REJMAP::rej_word_mostly_rej() { // Reject whole word int i; for (i = 0; i < len; i++) { @@ -413,8 +346,7 @@ void REJMAP::rej_word_mostly_rej() { //Reject whole word } } - -void REJMAP::rej_word_bad_quality() { //Reject whole word +void REJMAP::rej_word_bad_quality() { // Reject whole word int i; for (i = 0; i < len; i++) { @@ -422,8 +354,7 @@ void REJMAP::rej_word_bad_quality() { //Reject whole word } } - -void REJMAP::rej_word_doc_rej() { //Reject whole word +void REJMAP::rej_word_doc_rej() { // Reject whole word int i; for (i = 0; i < len; i++) { @@ -431,8 +362,7 @@ void REJMAP::rej_word_doc_rej() { //Reject whole word } } - -void REJMAP::rej_word_block_rej() { //Reject whole word +void REJMAP::rej_word_block_rej() { // Reject whole word int i; for (i = 0; i < len; i++) { @@ -440,8 +370,7 @@ void REJMAP::rej_word_block_rej() { //Reject whole word } } - -void REJMAP::rej_word_row_rej() { //Reject whole word +void REJMAP::rej_word_row_rej() { // Reject whole word int i; for (i = 0; i < len; i++) { diff --git a/src/ccstruct/rejctmap.h b/src/ccstruct/rejctmap.h index 39bd9e5477..c79aa3d291 100644 --- a/src/ccstruct/rejctmap.h +++ b/src/ccstruct/rejctmap.h @@ -38,15 +38,15 @@ IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!! **********************************************************************/ -#ifndef REJCTMAP_H -#define REJCTMAP_H +#ifndef REJCTMAP_H +#define REJCTMAP_H #ifdef __UNIX__ -#include +#include #endif #include -#include "bits16.h" -#include "params.h" +#include "bits16.h" +#include "params.h" enum REJ_FLAGS { /* Reject modes which are NEVER overridden */ @@ -91,21 +91,20 @@ enum REJ_FLAGS { /* REJECT MAP VALUES */ -#define MAP_ACCEPT '1' -#define MAP_REJECT_PERM '0' -#define MAP_REJECT_TEMP '2' -#define MAP_REJECT_POTENTIAL '3' +#define MAP_ACCEPT '1' +#define MAP_REJECT_PERM '0' +#define MAP_REJECT_TEMP '2' +#define MAP_REJECT_POTENTIAL '3' -class REJ -{ +class REJ { BITS16 flags1; BITS16 flags2; void set_flag(REJ_FLAGS rej_flag) { if (rej_flag < 16) - flags1.turn_on_bit (rej_flag); + flags1.turn_on_bit(rej_flag); else - flags2.turn_on_bit (rej_flag - 16); + flags2.turn_on_bit(rej_flag - 16); } bool rej_before_nn_accept(); @@ -115,151 +114,148 @@ class REJ bool rej_before_mm_accept(); bool rej_before_quality_accept(); - public: - REJ() = default; - - REJ( //classwise copy - const REJ &source) { - flags1 = source.flags1; - flags2 = source.flags2; - } - - REJ & operator= ( //assign REJ - const REJ & source) { //from this - flags1 = source.flags1; - flags2 = source.flags2; - return *this; - } - - bool flag(REJ_FLAGS rej_flag) { - if (rej_flag < 16) - return flags1.bit (rej_flag); - else - return flags2.bit (rej_flag - 16); - } - - char display_char() { - if (perm_rejected ()) - return MAP_REJECT_PERM; - else if (accept_if_good_quality ()) - return MAP_REJECT_POTENTIAL; - else if (rejected ()) - return MAP_REJECT_TEMP; - else - return MAP_ACCEPT; - } - - bool perm_rejected(); //Is char perm reject? - - bool rejected(); //Is char rejected? - - bool accepted() { //Is char accepted? - return !rejected (); - } - - //potential rej? - bool accept_if_good_quality(); - - bool recoverable() { - return (rejected () && !perm_rejected ()); - } - - void setrej_tess_failure(); //Tess generated blank - void setrej_small_xht(); //Small xht char/wd - void setrej_edge_char(); //Close to image edge - void setrej_1Il_conflict(); //Initial reject map - void setrej_postNN_1Il(); //1Il after NN - void setrej_rej_cblob(); //Insert duff blob - void setrej_mm_reject(); //Matrix matcher - //Odd repeated char - void setrej_bad_repetition(); - void setrej_poor_match(); //Failed Rays heuristic - //TEMP reject_word - void setrej_not_tess_accepted(); - //TEMP reject_word - void setrej_contains_blanks(); - void setrej_bad_permuter(); //POTENTIAL reject_word - void setrej_hyphen(); //PostNN dubious hyph or . - void setrej_dubious(); //PostNN dubious limit - void setrej_no_alphanums(); //TEMP reject_word - void setrej_mostly_rej(); //TEMP reject_word - void setrej_xht_fixup(); //xht fixup - void setrej_bad_quality(); //TEMP reject_word - void setrej_doc_rej(); //TEMP reject_word - void setrej_block_rej(); //TEMP reject_word - void setrej_row_rej(); //TEMP reject_word - void setrej_unlv_rej(); //TEMP reject_word - void setrej_nn_accept(); //NN Flipped a char - void setrej_hyphen_accept(); //Good aspect ratio - void setrej_mm_accept(); //Matrix matcher - //Quality flip a char - void setrej_quality_accept(); - //Accept all except blank - void setrej_minimal_rej_accept(); - - void full_print(FILE *fp); + public: + REJ() = default; + + REJ( // classwise copy + const REJ& source) { + flags1 = source.flags1; + flags2 = source.flags2; + } + + REJ& operator=( // assign REJ + const REJ& source) { // from this + flags1 = source.flags1; + flags2 = source.flags2; + return *this; + } + + bool flag(REJ_FLAGS rej_flag) { + if (rej_flag < 16) + return flags1.bit(rej_flag); + else + return flags2.bit(rej_flag - 16); + } + + char display_char() { + if (perm_rejected()) + return MAP_REJECT_PERM; + else if (accept_if_good_quality()) + return MAP_REJECT_POTENTIAL; + else if (rejected()) + return MAP_REJECT_TEMP; + else + return MAP_ACCEPT; + } + + bool perm_rejected(); // Is char perm reject? + + bool rejected(); // Is char rejected? + + bool accepted() { // Is char accepted? + return !rejected(); + } + + // potential rej? + bool accept_if_good_quality(); + + bool recoverable() { return (rejected() && !perm_rejected()); } + + void setrej_tess_failure(); // Tess generated blank + void setrej_small_xht(); // Small xht char/wd + void setrej_edge_char(); // Close to image edge + void setrej_1Il_conflict(); // Initial reject map + void setrej_postNN_1Il(); // 1Il after NN + void setrej_rej_cblob(); // Insert duff blob + void setrej_mm_reject(); // Matrix matcher + // Odd repeated char + void setrej_bad_repetition(); + void setrej_poor_match(); // Failed Rays heuristic + // TEMP reject_word + void setrej_not_tess_accepted(); + // TEMP reject_word + void setrej_contains_blanks(); + void setrej_bad_permuter(); // POTENTIAL reject_word + void setrej_hyphen(); // PostNN dubious hyph or . + void setrej_dubious(); // PostNN dubious limit + void setrej_no_alphanums(); // TEMP reject_word + void setrej_mostly_rej(); // TEMP reject_word + void setrej_xht_fixup(); // xht fixup + void setrej_bad_quality(); // TEMP reject_word + void setrej_doc_rej(); // TEMP reject_word + void setrej_block_rej(); // TEMP reject_word + void setrej_row_rej(); // TEMP reject_word + void setrej_unlv_rej(); // TEMP reject_word + void setrej_nn_accept(); // NN Flipped a char + void setrej_hyphen_accept(); // Good aspect ratio + void setrej_mm_accept(); // Matrix matcher + // Quality flip a char + void setrej_quality_accept(); + // Accept all except blank + void setrej_minimal_rej_accept(); + + void full_print(FILE* fp); }; -class REJMAP -{ +class REJMAP { std::unique_ptr ptr; // ptr to the chars - int16_t len; //Number of chars + int16_t len; // Number of chars public: REJMAP() : len(0) {} - REJMAP(const REJMAP &rejmap) { *this = rejmap; } + REJMAP(const REJMAP& rejmap) { *this = rejmap; } - REJMAP &operator=(const REJMAP &source); + REJMAP& operator=(const REJMAP& source); // Sets up the ptr array to length, whatever it was before. void initialise(int16_t length); - REJ &operator[]( // access function - int16_t index) const // map index + REJ& operator[]( // access function + int16_t index) const // map index { ASSERT_HOST(index < len); return ptr[index]; // no bounds checks - } - - int32_t length() const { //map length - return len; - } - - int16_t accept_count(); //How many accepted? - - int16_t reject_count() { //How many rejects? - return len - accept_count (); - } - - void remove_pos( //Cut out an element - int16_t pos); //element to remove - - void print(FILE *fp); - - void full_print(FILE *fp); - - bool recoverable_rejects(); //Any non perm rejs? - - bool quality_recoverable_rejects(); - //Any potential rejs? - - void rej_word_small_xht(); //Reject whole word - //Reject whole word - void rej_word_tess_failure(); - void rej_word_not_tess_accepted(); - //Reject whole word - //Reject whole word - void rej_word_contains_blanks(); - //Reject whole word - void rej_word_bad_permuter(); - void rej_word_xht_fixup(); //Reject whole word - //Reject whole word - void rej_word_no_alphanums(); - void rej_word_mostly_rej(); //Reject whole word - void rej_word_bad_quality(); //Reject whole word - void rej_word_doc_rej(); //Reject whole word - void rej_word_block_rej(); //Reject whole word - void rej_word_row_rej(); //Reject whole word + } + + int32_t length() const { // map length + return len; + } + + int16_t accept_count(); // How many accepted? + + int16_t reject_count() { // How many rejects? + return len - accept_count(); + } + + void remove_pos( // Cut out an element + int16_t pos); // element to remove + + void print(FILE* fp); + + void full_print(FILE* fp); + + bool recoverable_rejects(); // Any non perm rejs? + + bool quality_recoverable_rejects(); + // Any potential rejs? + + void rej_word_small_xht(); // Reject whole word + // Reject whole word + void rej_word_tess_failure(); + void rej_word_not_tess_accepted(); + // Reject whole word + // Reject whole word + void rej_word_contains_blanks(); + // Reject whole word + void rej_word_bad_permuter(); + void rej_word_xht_fixup(); // Reject whole word + // Reject whole word + void rej_word_no_alphanums(); + void rej_word_mostly_rej(); // Reject whole word + void rej_word_bad_quality(); // Reject whole word + void rej_word_doc_rej(); // Reject whole word + void rej_word_block_rej(); // Reject whole word + void rej_word_row_rej(); // Reject whole word }; #endif diff --git a/src/ccstruct/seam.cpp b/src/ccstruct/seam.cpp index 5a2f1b34cc..9f1a35e6f3 100644 --- a/src/ccstruct/seam.cpp +++ b/src/ccstruct/seam.cpp @@ -216,11 +216,10 @@ void SEAM::BreakPieces(const GenericVector& seams, void SEAM::JoinPieces(const GenericVector& seams, const GenericVector& blobs, int first, int last) { TESSLINE* outline = blobs[first]->outlines; - if (!outline) - return; + if (!outline) return; for (int x = first; x < last; ++x) { - SEAM *seam = seams[x]; + SEAM* seam = seams[x]; if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide(); while (outline->next) outline = outline->next; outline->next = blobs[x + 1]->outlines; @@ -250,9 +249,9 @@ float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, splits_[s].SplitOutline(); } float full_priority = - priority_ + - splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, - center_knob, width_change_knob); + priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, + centered_maxwidth, center_knob, + width_change_knob); for (int s = num_splits_ - 1; s >= 1; --s) { splits_[s].UnsplitOutlines(); } diff --git a/src/ccstruct/seam.h b/src/ccstruct/seam.h index 7179bd8a88..333e81001d 100644 --- a/src/ccstruct/seam.h +++ b/src/ccstruct/seam.h @@ -39,7 +39,7 @@ /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ -using PRIORITY = float; /* PRIORITY */ +using PRIORITY = float; /* PRIORITY */ class SEAM { public: diff --git a/src/ccstruct/split.cpp b/src/ccstruct/split.cpp index 4d72a7ea53..4dab8c5f63 100644 --- a/src/ccstruct/split.cpp +++ b/src/ccstruct/split.cpp @@ -30,8 +30,8 @@ #include "config_auto.h" #endif -#include "split.h" #include "coutln.h" +#include "split.h" #include "tprintf.h" #include @@ -52,9 +52,10 @@ BOOL_VAR(wordrec_display_splits, 0, "Display splits"); // Returns the bounding box of all the points in the split. TBOX SPLIT::bounding_box() const { - return TBOX( - std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y), - std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y)); + return TBOX(std::min(point1->pos.x, point2->pos.x), + std::min(point1->pos.y, point2->pos.y), + std::max(point1->pos.x, point2->pos.x), + std::max(point1->pos.y, point2->pos.y)); } // Hides the SPLIT so the outlines appear not to be cut by it. @@ -111,10 +112,12 @@ float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, } // grade_center_of_blob. if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) { - grade += std::min(static_cast(kCenterGradeCap), center_knob * abs(width1 - width2)); + grade += std::min(static_cast(kCenterGradeCap), + center_knob * abs(width1 - width2)); } // grade_width_change. - float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2)); + float width_change_grade = + 20 - (max_right - min_left - std::max(width1, width2)); if (width_change_grade > 0.0f) grade += width_change_grade * width_change_knob; return grade; @@ -146,8 +149,8 @@ bool SPLIT::IsLittleChunk(int min_points, int min_area) const { * * Create an EDGEPT and hook it into an existing list of edge points. **********************************************************************/ -EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) { - EDGEPT *this_edgept; +EDGEPT* make_edgept(int x, int y, EDGEPT* next, EDGEPT* prev) { + EDGEPT* this_edgept; /* Create point */ this_edgept = new EDGEPT; this_edgept->pos.x = x; @@ -207,9 +210,9 @@ EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) { * * Remove a given EDGEPT from its list and delete it. **********************************************************************/ -void remove_edgept(EDGEPT *point) { - EDGEPT *prev = point->prev; - EDGEPT *next = point->next; +void remove_edgept(EDGEPT* point) { + EDGEPT* prev = point->prev; + EDGEPT* next = point->next; // Add point's steps onto prev's steps if they are from the same outline. if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) { prev->step_count += point->step_count; diff --git a/src/ccstruct/split.h b/src/ccstruct/split.h index 440786e83a..c88f3cd80b 100644 --- a/src/ccstruct/split.h +++ b/src/ccstruct/split.h @@ -100,8 +100,8 @@ struct SPLIT { // Removes the split that was put between these two points. void UnsplitOutlines() const; - EDGEPT *point1; - EDGEPT *point2; + EDGEPT* point1; + EDGEPT* point2; }; /*---------------------------------------------------------------------- @@ -113,8 +113,8 @@ extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits"); /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev); +EDGEPT* make_edgept(int x, int y, EDGEPT* next, EDGEPT* prev); -void remove_edgept(EDGEPT *point); +void remove_edgept(EDGEPT* point); #endif diff --git a/src/ccstruct/statistc.cpp b/src/ccstruct/statistc.cpp index b825d57be7..12e1f1a7c0 100644 --- a/src/ccstruct/statistc.cpp +++ b/src/ccstruct/statistc.cpp @@ -22,13 +22,13 @@ #include "config_auto.h" #endif -#include "statistc.h" -#include -#include -#include -#include "helpers.h" -#include "scrollview.h" -#include "tprintf.h" +#include +#include +#include +#include "helpers.h" +#include "scrollview.h" +#include "statistc.h" +#include "tprintf.h" using tesseract::KDPairInc; @@ -42,7 +42,7 @@ STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) { min_bucket_value = 0; max_bucket_value_plus_1 = 1; } - rangemin_ = min_bucket_value; // setup + rangemin_ = min_bucket_value; // setup rangemax_ = max_bucket_value_plus_1; buckets_ = new int32_t[rangemax_ - rangemin_]; clear(); @@ -59,17 +59,18 @@ STATS::STATS() { * * Alter the range on an existing stats element. **********************************************************************/ -bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) { +bool STATS::set_range(int32_t min_bucket_value, + int32_t max_bucket_value_plus_1) { if (max_bucket_value_plus_1 <= min_bucket_value) { return false; } if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) { - delete [] buckets_; + delete[] buckets_; buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value]; } - rangemin_ = min_bucket_value; // setup + rangemin_ = min_bucket_value; // setup rangemax_ = max_bucket_value_plus_1; - clear(); // zero it + clear(); // zero it return true; } @@ -102,7 +103,7 @@ void STATS::add(int32_t value, int32_t count) { } value = ClipToRange(value, rangemin_, rangemax_ - 1); buckets_[value - rangemin_] += count; - total_count_ += count; // keep count of total + total_count_ += count; // keep count of total } /********************************************************************** @@ -114,15 +115,15 @@ int32_t STATS::mode() const { // get mode of samples if (buckets_ == nullptr) { return rangemin_; } - int32_t max = buckets_[0]; // max cell count - int32_t maxindex = 0; // index of max + int32_t max = buckets_[0]; // max cell count + int32_t maxindex = 0; // index of max for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) { if (buckets_[index] > max) { - max = buckets_[index]; // find biggest + max = buckets_[index]; // find biggest maxindex = index; } } - return maxindex + rangemin_; // index of biggest + return maxindex + rangemin_; // index of biggest } /********************************************************************** @@ -130,7 +131,7 @@ int32_t STATS::mode() const { // get mode of samples * * Find the mean of a stats class. **********************************************************************/ -double STATS::mean() const { //get mean of samples +double STATS::mean() const { // get mean of samples if (buckets_ == nullptr || total_count_ <= 0) { return static_cast(rangemin_); } @@ -146,7 +147,7 @@ double STATS::mean() const { //get mean of samples * * Find the standard deviation of a stats class. **********************************************************************/ -double STATS::sd() const { //standard deviation +double STATS::sd() const { // standard deviation if (buckets_ == nullptr || total_count_ <= 0) { return 0.0; } @@ -158,8 +159,7 @@ double STATS::sd() const { //standard deviation } double variance = static_cast(sum) / total_count_; variance = sqsum / total_count_ - variance * variance; - if (variance > 0.0) - return sqrt(variance); + if (variance > 0.0) return sqrt(variance); return 0.0; } @@ -186,11 +186,12 @@ double STATS::ile(double frac) const { int sum = 0; int index = 0; for (index = 0; index < rangemax_ - rangemin_ && sum < target; - sum += buckets_[index++]); + sum += buckets_[index++]) + ; if (index > 0) { ASSERT_HOST(buckets_[index - 1] > 0); return rangemin_ + index - - static_cast(sum - target) / buckets_[index - 1]; + static_cast(sum - target) / buckets_[index - 1]; } else { return static_cast(rangemin_); } @@ -206,7 +207,8 @@ int32_t STATS::min_bucket() const { // Find min return rangemin_; } int32_t min = 0; - for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++); + for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) + ; return rangemin_ + min; } @@ -221,7 +223,8 @@ int32_t STATS::max_bucket() const { // Find max return rangemin_; } int32_t max; - for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--); + for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) + ; return rangemin_ + max; } @@ -234,7 +237,7 @@ int32_t STATS::max_bucket() const { // Find max * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway * between 6 and 13 = 9.5 **********************************************************************/ -double STATS::median() const { //get median +double STATS::median() const { // get median if (buckets_ == nullptr) { return static_cast(rangemin_); } @@ -244,9 +247,11 @@ double STATS::median() const { //get median int32_t min_pile; int32_t max_pile; /* Find preceding non zero pile */ - for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--); + for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--) + ; /* Find following non zero pile */ - for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++); + for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++) + ; median = (min_pile + max_pile) / 2.0; } return median; @@ -262,14 +267,14 @@ bool STATS::local_min(int32_t x) const { return false; } x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_; - if (buckets_[x] == 0) - return true; - int32_t index; // table index - for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index); - if (index >= 0 && buckets_[index] < buckets_[x]) - return false; - for (index = x + 1; index < rangemax_ - rangemin_ && - buckets_[index] == buckets_[x]; ++index); + if (buckets_[x] == 0) return true; + int32_t index; // table index + for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index) + ; + if (index >= 0 && buckets_[index] < buckets_[x]) return false; + for (index = x + 1; + index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) + ; if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) return false; else @@ -291,7 +296,7 @@ void STATS::smooth(int32_t factor) { STATS result(rangemin_, rangemax_); int entrycount = rangemax_ - rangemin_; for (int entry = 0; entry < entrycount; entry++) { - //centre weight + // centre weight int count = buckets_[entry] * factor; for (int offset = 1; offset < factor; offset++) { if (entry - offset >= 0) @@ -315,46 +320,45 @@ void STATS::smooth(int32_t factor) { * The return value is the current number of clusters. **********************************************************************/ -int32_t STATS::cluster(float lower, // thresholds - float upper, - float multiple, // distance threshold - int32_t max_clusters, // max no to make - STATS *clusters) { // array of clusters - bool new_cluster; // added one - float *centres; // cluster centres - int32_t entry; // bucket index - int32_t cluster; // cluster index - int32_t best_cluster; // one to assign to - int32_t new_centre = 0; // residual mode - int32_t new_mode; // pile count of new_centre - int32_t count; // pile to place - float dist; // from cluster - float min_dist; // from best_cluster - int32_t cluster_count; // no of clusters - - if (buckets_ == nullptr || max_clusters < 1) - return 0; +int32_t STATS::cluster(float lower, // thresholds + float upper, + float multiple, // distance threshold + int32_t max_clusters, // max no to make + STATS* clusters) { // array of clusters + bool new_cluster; // added one + float* centres; // cluster centres + int32_t entry; // bucket index + int32_t cluster; // cluster index + int32_t best_cluster; // one to assign to + int32_t new_centre = 0; // residual mode + int32_t new_mode; // pile count of new_centre + int32_t count; // pile to place + float dist; // from cluster + float min_dist; // from best_cluster + int32_t cluster_count; // no of clusters + + if (buckets_ == nullptr || max_clusters < 1) return 0; centres = new float[max_clusters + 1]; - for (cluster_count = 1; cluster_count <= max_clusters - && clusters[cluster_count].buckets_ != nullptr - && clusters[cluster_count].total_count_ > 0; + for (cluster_count = 1; cluster_count <= max_clusters && + clusters[cluster_count].buckets_ != nullptr && + clusters[cluster_count].total_count_ > 0; cluster_count++) { centres[cluster_count] = - static_cast(clusters[cluster_count].ile(0.5)); + static_cast(clusters[cluster_count].ile(0.5)); new_centre = clusters[cluster_count].mode(); - for (entry = new_centre - 1; centres[cluster_count] - entry < lower - && entry >= rangemin_ - && pile_count(entry) <= pile_count(entry + 1); + for (entry = new_centre - 1; + centres[cluster_count] - entry < lower && entry >= rangemin_ && + pile_count(entry) <= pile_count(entry + 1); entry--) { count = pile_count(entry) - clusters[0].pile_count(entry); if (count > 0) { clusters[cluster_count].add(entry, count); - clusters[0].add (entry, count); + clusters[0].add(entry, count); } } - for (entry = new_centre + 1; entry - centres[cluster_count] < lower - && entry < rangemax_ - && pile_count(entry) <= pile_count(entry - 1); + for (entry = new_centre + 1; + entry - centres[cluster_count] < lower && entry < rangemax_ && + pile_count(entry) <= pile_count(entry - 1); entry++) { count = pile_count(entry) - clusters[0].pile_count(entry); if (count > 0) { @@ -373,24 +377,23 @@ int32_t STATS::cluster(float lower, // thresholds new_mode = 0; for (entry = 0; entry < rangemax_ - rangemin_; entry++) { count = buckets_[entry] - clusters[0].buckets_[entry]; - //remaining pile - if (count > 0) { //any to handle + // remaining pile + if (count > 0) { // any to handle min_dist = static_cast(INT32_MAX); best_cluster = 0; for (cluster = 1; cluster <= cluster_count; cluster++) { dist = entry + rangemin_ - centres[cluster]; - //find distance - if (dist < 0) - dist = -dist; + // find distance + if (dist < 0) dist = -dist; if (dist < min_dist) { - min_dist = dist; //find least + min_dist = dist; // find least best_cluster = cluster; } } - if (min_dist > upper //far enough for new - && (best_cluster == 0 - || entry + rangemin_ > centres[best_cluster] * multiple - || entry + rangemin_ < centres[best_cluster] / multiple)) { + if (min_dist > upper // far enough for new + && (best_cluster == 0 || + entry + rangemin_ > centres[best_cluster] * multiple || + entry + rangemin_ < centres[best_cluster] / multiple)) { if (count > new_mode) { new_mode = count; new_centre = entry + rangemin_; @@ -398,40 +401,42 @@ int32_t STATS::cluster(float lower, // thresholds } } } - // need new and room + // need new and room if (new_mode > 0 && cluster_count < max_clusters) { cluster_count++; new_cluster = true; if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) { - delete [] centres; + delete[] centres; return 0; } centres[cluster_count] = static_cast(new_centre); clusters[cluster_count].add(new_centre, new_mode); clusters[0].add(new_centre, new_mode); - for (entry = new_centre - 1; centres[cluster_count] - entry < lower - && entry >= rangemin_ - && pile_count (entry) <= pile_count(entry + 1); entry--) { + for (entry = new_centre - 1; + centres[cluster_count] - entry < lower && entry >= rangemin_ && + pile_count(entry) <= pile_count(entry + 1); + entry--) { count = pile_count(entry) - clusters[0].pile_count(entry); if (count > 0) { clusters[cluster_count].add(entry, count); clusters[0].add(entry, count); } } - for (entry = new_centre + 1; entry - centres[cluster_count] < lower - && entry < rangemax_ - && pile_count (entry) <= pile_count(entry - 1); entry++) { + for (entry = new_centre + 1; + entry - centres[cluster_count] < lower && entry < rangemax_ && + pile_count(entry) <= pile_count(entry - 1); + entry++) { count = pile_count(entry) - clusters[0].pile_count(entry); if (count > 0) { clusters[cluster_count].add(entry, count); - clusters[0].add (entry, count); + clusters[0].add(entry, count); } } centres[cluster_count] = - static_cast(clusters[cluster_count].ile(0.5)); + static_cast(clusters[cluster_count].ile(0.5)); } } while (new_cluster && cluster_count < max_clusters); - delete [] centres; + delete[] centres; return cluster_count; } @@ -465,7 +470,7 @@ static bool GatherPeak(int index, const int* src_buckets, int* used_buckets, // more useful than decreasing total count. // Returns the actual number of modes found. int STATS::top_n_modes(int max_modes, - GenericVector >* modes) const { + GenericVector>* modes) const { if (max_modes <= 0) return 0; int src_count = rangemax_ - rangemin_; // Used copies the counts in buckets_ as they get used. @@ -494,20 +499,19 @@ int STATS::top_n_modes(int max_modes, int total_count = max_count; int prev_pile = max_count; for (int offset = 1; max_index + offset < src_count; ++offset) { - if (!GatherPeak(max_index + offset, buckets_, used.buckets_, - &prev_pile, &total_count, &total_value)) + if (!GatherPeak(max_index + offset, buckets_, used.buckets_, &prev_pile, + &total_count, &total_value)) break; } prev_pile = buckets_[max_index]; for (int offset = 1; max_index - offset >= 0; ++offset) { - if (!GatherPeak(max_index - offset, buckets_, used.buckets_, - &prev_pile, &total_count, &total_value)) + if (!GatherPeak(max_index - offset, buckets_, used.buckets_, &prev_pile, + &total_count, &total_value)) break; } if (total_count > least_count || modes->size() < max_modes) { // We definitely want this mode, so if we have enough discard the least. - if (modes->size() == max_modes) - modes->truncate(max_modes - 1); + if (modes->size() == max_modes) modes->truncate(max_modes - 1); int target_index = 0; // Linear search for the target insertion point. while (target_index < modes->size() && @@ -540,16 +544,13 @@ void STATS::print() const { for (int index = min; index <= max; index++) { if (buckets_[index] != 0) { tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]); - if (++num_printed % 8 == 0) - tprintf ("\n"); + if (++num_printed % 8 == 0) tprintf("\n"); } } - tprintf ("\n"); + tprintf("\n"); print_summary(); } - - /********************************************************************** * STATS::print_summary * @@ -572,7 +573,6 @@ void STATS::print_summary() const { tprintf("SD= %.2f\n", sd()); } - /********************************************************************** * STATS::plot * @@ -583,23 +583,22 @@ void STATS::print_summary() const { void STATS::plot(ScrollView* window, // to draw in float xorigin, // bottom left float yorigin, - float xscale, // one x unit - float yscale, // one y unit - ScrollView::Color colour) const { // colour to draw in + float xscale, // one x unit + float yscale, // one y unit + ScrollView::Color colour) const { // colour to draw in if (buckets_ == nullptr) { return; } window->Pen(colour); for (int index = 0; index < rangemax_ - rangemin_; index++) { - window->Rectangle( xorigin + xscale * index, yorigin, - xorigin + xscale * (index + 1), - yorigin + yscale * buckets_[index]); + window->Rectangle(xorigin + xscale * index, yorigin, + xorigin + xscale * (index + 1), + yorigin + yscale * buckets_[index]); } } #endif - /********************************************************************** * STATS::plotline * @@ -610,8 +609,8 @@ void STATS::plot(ScrollView* window, // to draw in void STATS::plotline(ScrollView* window, // to draw in float xorigin, // bottom left float yorigin, - float xscale, // one x unit - float yscale, // one y unit + float xscale, // one x unit + float yscale, // one y unit ScrollView::Color colour) const { // colour to draw in if (buckets_ == nullptr) { return; @@ -625,7 +624,6 @@ void STATS::plotline(ScrollView* window, // to draw in } #endif - /********************************************************************** * choose_nth_item * @@ -633,32 +631,29 @@ void STATS::plotline(ScrollView* window, // to draw in * if the members were sorted, without actually sorting. **********************************************************************/ -int32_t choose_nth_item(int32_t index, float *array, int32_t count) { - int32_t next_sample; // next one to do - int32_t next_lesser; // space for new - int32_t prev_greater; // last one saved - int32_t equal_count; // no of equal ones - float pivot; // proposed median - float sample; // current sample +int32_t choose_nth_item(int32_t index, float* array, int32_t count) { + int32_t next_sample; // next one to do + int32_t next_lesser; // space for new + int32_t prev_greater; // last one saved + int32_t equal_count; // no of equal ones + float pivot; // proposed median + float sample; // current sample - if (count <= 1) - return 0; + if (count <= 1) return 0; if (count == 2) { if (array[0] < array[1]) { return index >= 1 ? 1 : 0; - } - else { + } else { return index >= 1 ? 0 : 1; } - } - else { + } else { if (index < 0) - index = 0; // ensure legal + index = 0; // ensure legal else if (index >= count) index = count - 1; - equal_count = (int32_t) (rand() % count); + equal_count = (int32_t)(rand() % count); pivot = array[equal_count]; - // fill gap + // fill gap array[equal_count] = array[0]; next_lesser = 0; prev_greater = count; @@ -666,17 +661,15 @@ int32_t choose_nth_item(int32_t index, float *array, int32_t count) { for (next_sample = 1; next_sample < prev_greater;) { sample = array[next_sample]; if (sample < pivot) { - // shuffle + // shuffle array[next_lesser++] = sample; next_sample++; - } - else if (sample > pivot) { + } else if (sample > pivot) { prev_greater--; - // juggle + // juggle array[next_sample] = array[prev_greater]; array[prev_greater] = sample; - } - else { + } else { equal_count++; next_sample++; } @@ -684,13 +677,13 @@ int32_t choose_nth_item(int32_t index, float *array, int32_t count) { for (next_sample = next_lesser; next_sample < prev_greater;) array[next_sample++] = pivot; if (index < next_lesser) - return choose_nth_item (index, array, next_lesser); + return choose_nth_item(index, array, next_lesser); else if (index < prev_greater) - return next_lesser; // in equal bracket + return next_lesser; // in equal bracket else - return choose_nth_item (index - prev_greater, - array + prev_greater, - count - prev_greater) + prev_greater; + return choose_nth_item(index - prev_greater, array + prev_greater, + count - prev_greater) + + prev_greater; } } @@ -700,60 +693,55 @@ int32_t choose_nth_item(int32_t index, float *array, int32_t count) { * Returns the index of what would be the nth item in the array * if the members were sorted, without actually sorting. **********************************************************************/ -int32_t choose_nth_item(int32_t index, void *array, int32_t count, size_t size, - int (*compar)(const void*, const void*)) { - int result; // of compar - int32_t next_sample; // next one to do - int32_t next_lesser; // space for new - int32_t prev_greater; // last one saved - int32_t equal_count; // no of equal ones - int32_t pivot; // proposed median - - if (count <= 1) - return 0; +int32_t choose_nth_item(int32_t index, void* array, int32_t count, size_t size, + int (*compar)(const void*, const void*)) { + int result; // of compar + int32_t next_sample; // next one to do + int32_t next_lesser; // space for new + int32_t prev_greater; // last one saved + int32_t equal_count; // no of equal ones + int32_t pivot; // proposed median + + if (count <= 1) return 0; if (count == 2) { - if (compar (array, (char *) array + size) < 0) { + if (compar(array, (char*)array + size) < 0) { return index >= 1 ? 1 : 0; - } - else { + } else { return index >= 1 ? 0 : 1; } } if (index < 0) - index = 0; // ensure legal + index = 0; // ensure legal else if (index >= count) index = count - 1; - pivot = (int32_t) (rand () % count); - swap_entries (array, size, pivot, 0); + pivot = (int32_t)(rand() % count); + swap_entries(array, size, pivot, 0); next_lesser = 0; prev_greater = count; equal_count = 1; for (next_sample = 1; next_sample < prev_greater;) { - result = - compar ((char *) array + size * next_sample, - (char *) array + size * next_lesser); + result = compar((char*)array + size * next_sample, + (char*)array + size * next_lesser); if (result < 0) { - swap_entries (array, size, next_lesser++, next_sample++); + swap_entries(array, size, next_lesser++, next_sample++); // shuffle - } - else if (result > 0) { + } else if (result > 0) { prev_greater--; swap_entries(array, size, prev_greater, next_sample); - } - else { + } else { equal_count++; next_sample++; } } if (index < next_lesser) - return choose_nth_item (index, array, next_lesser, size, compar); + return choose_nth_item(index, array, next_lesser, size, compar); else if (index < prev_greater) - return next_lesser; // in equal bracket + return next_lesser; // in equal bracket else - return choose_nth_item (index - prev_greater, - (char *) array + size * prev_greater, - count - prev_greater, size, - compar) + prev_greater; + return choose_nth_item(index - prev_greater, + (char*)array + size * prev_greater, + count - prev_greater, size, compar) + + prev_greater; } /********************************************************************** @@ -761,20 +749,20 @@ int32_t choose_nth_item(int32_t index, void *array, int32_t count, size_t size, * * Swap 2 entries of arbitrary size in-place in a table. **********************************************************************/ -void swap_entries(void *array, // array of entries - size_t size, // size of entry +void swap_entries(void* array, // array of entries + size_t size, // size of entry int32_t index1, // entries to swap int32_t index2) { char tmp; - char *ptr1; // to entries - char *ptr2; - size_t count; // of bytes + char* ptr1; // to entries + char* ptr2; + size_t count; // of bytes - ptr1 = static_cast(array) + index1 * size; - ptr2 = static_cast(array) + index2 * size; + ptr1 = static_cast(array) + index1 * size; + ptr2 = static_cast(array) + index2 * size; for (count = 0; count < size; count++) { tmp = *ptr1; *ptr1++ = *ptr2; - *ptr2++ = tmp; // tedious! + *ptr2++ = tmp; // tedious! } } diff --git a/src/ccstruct/statistc.h b/src/ccstruct/statistc.h index 5bae704bfc..e4ce9d5a81 100644 --- a/src/ccstruct/statistc.h +++ b/src/ccstruct/statistc.h @@ -25,8 +25,8 @@ #include "kdpair.h" #include "scrollview.h" -template class GenericVector; - +template +class GenericVector; // Simple histogram-based statistics for integer values in a known // range, such that the range is small compared to the number of samples. @@ -57,8 +57,8 @@ class STATS { // "Accessors" return various statistics on the data. int32_t mode() const; // get mode of samples - double mean() const; // get mean of samples - double sd() const; // standard deviation + double mean() const; // get mean of samples + double sd() const; // standard deviation // Returns the fractile value such that frac fraction (in [0,1]) of samples // has a value less than the return value. double ile(double frac) const; @@ -75,16 +75,14 @@ class STATS { // between 6 and 13 = 9.5 double median() const; // get median of samples // Returns the count of the given value. - int32_t pile_count(int32_t value ) const { - if (value <= rangemin_) - return buckets_[0]; - if (value >= rangemax_ - 1) - return buckets_[rangemax_ - rangemin_ - 1]; + int32_t pile_count(int32_t value) const { + if (value <= rangemin_) return buckets_[0]; + if (value >= rangemax_ - 1) return buckets_[rangemax_ - rangemin_ - 1]; return buckets_[value - rangemin_]; } // Returns the total count of all buckets. int32_t get_total() const { - return total_count_; // total of all piles + return total_count_; // total of all piles } // Returns true if x is a local min. bool local_min(int32_t x) const; @@ -100,69 +98,69 @@ class STATS { // max_clusters+1 in size as cluster 0 is used to indicate which samples // have been used. // The return value is the current number of clusters. - int32_t cluster(float lower, // thresholds - float upper, - float multiple, // distance threshold - int32_t max_clusters, // max no to make - STATS *clusters); // array of clusters - -// Finds (at most) the top max_modes modes, well actually the whole peak around -// each mode, returning them in the given modes vector as a pair in order of decreasing total count. -// Since the mean is the key and the count the data in the pair, a single call -// to sort on the output will re-sort by increasing mean of peak if that is -// more useful than decreasing total count. -// Returns the actual number of modes found. - int top_n_modes( - int max_modes, - GenericVector >* modes) const; + int32_t cluster(float lower, // thresholds + float upper, + float multiple, // distance threshold + int32_t max_clusters, // max no to make + STATS* clusters); // array of clusters + + // Finds (at most) the top max_modes modes, well actually the whole peak + // around each mode, returning them in the given modes vector as a pair in order of decreasing total count. Since + // the mean is the key and the count the data in the pair, a single call to + // sort on the output will re-sort by increasing mean of peak if that is more + // useful than decreasing total count. Returns the actual number of modes + // found. + int top_n_modes(int max_modes, + GenericVector>* modes) const; // Prints a summary and table of the histogram. void print() const; // Prints summary stats only of the histogram. void print_summary() const; - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED // Draws the histogram as a series of rectangles. - void plot(ScrollView* window, // window to draw in - float xorigin, // origin of histo - float yorigin, // gram - float xscale, // size of one unit - float yscale, // size of one uint + void plot(ScrollView* window, // window to draw in + float xorigin, // origin of histo + float yorigin, // gram + float xscale, // size of one unit + float yscale, // size of one uint ScrollView::Color colour) const; // colour to draw in // Draws a line graph of the histogram. - void plotline(ScrollView* window, // window to draw in - float xorigin, // origin of histo - float yorigin, // gram - float xscale, // size of one unit - float yscale, // size of one uint + void plotline(ScrollView* window, // window to draw in + float xorigin, // origin of histo + float yorigin, // gram + float xscale, // size of one unit + float yscale, // size of one uint ScrollView::Color colour) const; // colour to draw in - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED private: - int32_t rangemin_; // min of range + int32_t rangemin_; // min of range // rangemax_ is not well named as it is really one past the max. - int32_t rangemax_; // max of range - int32_t total_count_; // no of samples - int32_t* buckets_; // array of cells + int32_t rangemax_; // max of range + int32_t total_count_; // no of samples + int32_t* buckets_; // array of cells }; // Returns the nth ordered item from the array, as if they were // ordered, but without ordering them, in linear time. // The array does get shuffled! int32_t choose_nth_item(int32_t index, // index to choose - float *array, // array of items - int32_t count); // no of items + float* array, // array of items + int32_t count); // no of items // Generic version uses a defined comparator (with qsort semantics). -int32_t choose_nth_item(int32_t index, // index to choose - void *array, // array of items - int32_t count, // no of items - size_t size, // element size - int (*compar)(const void*, const void*)); // comparator +int32_t choose_nth_item(int32_t index, // index to choose + void* array, // array of items + int32_t count, // no of items + size_t size, // element size + int (*compar)(const void*, + const void*)); // comparator // Swaps 2 entries in an array in-place. -void swap_entries(void *array, // array of entries - size_t size, // size of entry +void swap_entries(void* array, // array of entries + size_t size, // size of entry int32_t index1, // entries to swap int32_t index2); diff --git a/src/ccstruct/stepblob.cpp b/src/ccstruct/stepblob.cpp index c58d56903d..b8ddcd6291 100644 --- a/src/ccstruct/stepblob.cpp +++ b/src/ccstruct/stepblob.cpp @@ -28,62 +28,59 @@ // Max perimeter to width ratio for a baseline position above box bottom. const double kMaxPerimeterWidthRatio = 8.0; -ELISTIZE (C_BLOB) +ELISTIZE(C_BLOB) /********************************************************************** * position_outline * * Position the outline in the given list at the relevant place * according to its nesting. **********************************************************************/ -static void position_outline( //put in place - C_OUTLINE *outline, //thing to place - C_OUTLINE_LIST *destlist //desstination list - ) { - C_OUTLINE *dest_outline; //outline from dest list - C_OUTLINE_IT it = destlist; //iterator - //iterator on children - C_OUTLINE_IT child_it = outline->child (); - - if (!it.empty ()) { +static void position_outline( // put in place + C_OUTLINE* outline, // thing to place + C_OUTLINE_LIST* destlist // desstination list +) { + C_OUTLINE* dest_outline; // outline from dest list + C_OUTLINE_IT it = destlist; // iterator + // iterator on children + C_OUTLINE_IT child_it = outline->child(); + + if (!it.empty()) { do { - dest_outline = it.data (); //get destination - //encloses dest + dest_outline = it.data(); // get destination + // encloses dest if (*dest_outline < *outline) { - //take off list - dest_outline = it.extract (); - //put this in place - it.add_after_then_move (outline); - //make it a child - child_it.add_to_end (dest_outline); - while (!it.at_last ()) { - it.forward (); //do rest of list - //check for other children - dest_outline = it.data (); + // take off list + dest_outline = it.extract(); + // put this in place + it.add_after_then_move(outline); + // make it a child + child_it.add_to_end(dest_outline); + while (!it.at_last()) { + it.forward(); // do rest of list + // check for other children + dest_outline = it.data(); if (*dest_outline < *outline) { - //take off list - dest_outline = it.extract (); - child_it.add_to_end (dest_outline); - //make it a child - if (it.empty ()) - break; + // take off list + dest_outline = it.extract(); + child_it.add_to_end(dest_outline); + // make it a child + if (it.empty()) break; } } - return; //finished + return; // finished } - //enclosed by dest + // enclosed by dest else if (*outline < *dest_outline) { - position_outline (outline, dest_outline->child ()); - //place in child list - return; //finished + position_outline(outline, dest_outline->child()); + // place in child list + return; // finished } - it.forward (); - } - while (!it.at_first ()); + it.forward(); + } while (!it.at_first()); } - it.add_to_end (outline); //at outer level + it.add_to_end(outline); // at outer level } - /********************************************************************** * plot_outline_list * @@ -92,29 +89,27 @@ static void position_outline( //put in place **********************************************************************/ #ifndef GRAPHICS_DISABLED -static void plot_outline_list( //draw outlines - C_OUTLINE_LIST *list, //outline to draw - ScrollView* window, //window to draw in - ScrollView::Color colour, //colour to use - ScrollView::Color child_colour //colour of children - ) { - C_OUTLINE *outline; //current outline - C_OUTLINE_IT it = list; //iterator - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - //draw it - outline->plot (window, colour); - if (!outline->child ()->empty ()) - plot_outline_list (outline->child (), window, - child_colour, child_colour); +static void plot_outline_list( // draw outlines + C_OUTLINE_LIST* list, // outline to draw + ScrollView* window, // window to draw in + ScrollView::Color colour, // colour to use + ScrollView::Color child_colour // colour of children +) { + C_OUTLINE* outline; // current outline + C_OUTLINE_IT it = list; // iterator + + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + outline = it.data(); + // draw it + outline->plot(window, colour); + if (!outline->child()->empty()) + plot_outline_list(outline->child(), window, child_colour, child_colour); } } // Draws the outlines in the given colour, and child_colour, normalized // using the given denorm, making use of sub-pixel accurate information // if available. -static void plot_normed_outline_list(const DENORM& denorm, - C_OUTLINE_LIST *list, +static void plot_normed_outline_list(const DENORM& denorm, C_OUTLINE_LIST* list, ScrollView::Color colour, ScrollView::Color child_colour, ScrollView* window) { @@ -129,26 +124,23 @@ static void plot_normed_outline_list(const DENORM& denorm, } #endif - /********************************************************************** * reverse_outline_list * * Reverse a list of outlines and their children. **********************************************************************/ -static void reverse_outline_list(C_OUTLINE_LIST *list) { - C_OUTLINE_IT it = list; // iterator +static void reverse_outline_list(C_OUTLINE_LIST* list) { + C_OUTLINE_IT it = list; // iterator for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { C_OUTLINE* outline = it.data(); - outline->reverse(); // reverse it + outline->reverse(); // reverse it outline->set_flag(COUT_INVERSE, TRUE); - if (!outline->child()->empty()) - reverse_outline_list(outline->child()); + if (!outline->child()->empty()) reverse_outline_list(outline->child()); } } - /********************************************************************** * C_BLOB::C_BLOB * @@ -157,7 +149,7 @@ static void reverse_outline_list(C_OUTLINE_LIST *list) { * The C_OUTLINEs are nested correctly in the blob. **********************************************************************/ -C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) { +C_BLOB::C_BLOB(C_OUTLINE_LIST* outline_list) { for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) { C_OUTLINE* outline = ol_it.extract(); // Position this outline in appropriate position in the hierarchy. @@ -232,7 +224,6 @@ void C_BLOB::CheckInverseFlagAndDirection() { } } - // Build and return a fake blob containing a single fake outline with no // steps. C_BLOB* C_BLOB::FakeBlob(const TBOX& box) { @@ -248,34 +239,33 @@ C_BLOB* C_BLOB::FakeBlob(const TBOX& box) { **********************************************************************/ TBOX C_BLOB::bounding_box() const { // bounding box - C_OUTLINE *outline; // current outline + C_OUTLINE* outline; // current outline // This is a read-only iteration of the outlines. C_OUTLINE_IT it = const_cast(&outlines); - TBOX box; // bounding box + TBOX box; // bounding box - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - box += outline->bounding_box (); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + outline = it.data(); + box += outline->bounding_box(); } return box; } - /********************************************************************** * C_BLOB::area * * Return the area of the blob. **********************************************************************/ -int32_t C_BLOB::area() { //area - C_OUTLINE *outline; //current outline - C_OUTLINE_IT it = &outlines; //outlines of blob - int32_t total; //total area +int32_t C_BLOB::area() { // area + C_OUTLINE* outline; // current outline + C_OUTLINE_IT it = &outlines; // outlines of blob + int32_t total; // total area total = 0; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - total += outline->area (); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + outline = it.data(); + total += outline->area(); } return total; } @@ -287,9 +277,9 @@ int32_t C_BLOB::area() { //area **********************************************************************/ int32_t C_BLOB::perimeter() { - C_OUTLINE *outline; // current outline - C_OUTLINE_IT it = &outlines; // outlines of blob - int32_t total; // total perimeter + C_OUTLINE* outline; // current outline + C_OUTLINE_IT it = &outlines; // outlines of blob + int32_t total; // total perimeter total = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { @@ -299,27 +289,25 @@ int32_t C_BLOB::perimeter() { return total; } - /********************************************************************** * C_BLOB::outer_area * * Return the area of the blob. **********************************************************************/ -int32_t C_BLOB::outer_area() { //area - C_OUTLINE *outline; //current outline - C_OUTLINE_IT it = &outlines; //outlines of blob - int32_t total; //total area +int32_t C_BLOB::outer_area() { // area + C_OUTLINE* outline; // current outline + C_OUTLINE_IT it = &outlines; // outlines of blob + int32_t total; // total area total = 0; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - total += outline->outer_area (); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + outline = it.data(); + total += outline->outer_area(); } return total; } - /********************************************************************** * C_BLOB::count_transitions * @@ -327,35 +315,34 @@ int32_t C_BLOB::outer_area() { //area * Chlid outlines are not counted. **********************************************************************/ -int32_t C_BLOB::count_transitions( //area - int32_t threshold //on size - ) { - C_OUTLINE *outline; //current outline - C_OUTLINE_IT it = &outlines; //outlines of blob - int32_t total; //total area +int32_t C_BLOB::count_transitions( // area + int32_t threshold // on size +) { + C_OUTLINE* outline; // current outline + C_OUTLINE_IT it = &outlines; // outlines of blob + int32_t total; // total area total = 0; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - total += outline->count_transitions (threshold); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + outline = it.data(); + total += outline->count_transitions(threshold); } return total; } - /********************************************************************** * C_BLOB::move * * Move C_BLOB by vector **********************************************************************/ -void C_BLOB::move( // reposition blob - const ICOORD vec // by vector - ) { +void C_BLOB::move( // reposition blob + const ICOORD vec // by vector +) { C_OUTLINE_IT it(&outlines); // iterator - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - it.data ()->move (vec); // move each outline + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) + it.data()->move(vec); // move each outline } // Static helper for C_BLOB::rotate to allow recursion of child outlines. @@ -391,7 +378,7 @@ void C_BLOB::rotate(const FCOORD& rotation) { // Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the // outline list and its children. static void ComputeEdgeOffsetsOutlineList(int threshold, Pix* pix, - C_OUTLINE_LIST *list) { + C_OUTLINE_LIST* list) { C_OUTLINE_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { C_OUTLINE* outline = it.data(); @@ -439,16 +426,14 @@ int16_t C_BLOB::EstimateBaselinePosition() { C_OUTLINE* outline = it.data(); ICOORD pos = outline->start_pos(); for (int s = 0; s < outline->pathlength(); ++s) { - if (pos.y() < y_mins[pos.x() - left]) - y_mins[pos.x() - left] = pos.y(); + if (pos.y() < y_mins[pos.x() - left]) y_mins[pos.x() - left] = pos.y(); pos += outline->step(s); } } // Find the total extent of the bottom or bottom + 1. int bottom_extent = 0; for (int x = 0; x <= width; ++x) { - if (y_mins[x] == bottom || y_mins[x] == bottom + 1) - ++bottom_extent; + if (y_mins[x] == bottom || y_mins[x] == bottom + 1) ++bottom_extent; } // Find the lowest run longer than the bottom extent that is not the bottom. int best_min = box.top(); @@ -464,9 +449,9 @@ int16_t C_BLOB::EstimateBaselinePosition() { // Possible contender. int total_run = run; // Find extent of current value or +1 to the right of x. - while (x + total_run <= width && - (y_mins[x + total_run] == y_at_x || - y_mins[x + total_run] == y_at_x + 1)) ++total_run; + while (x + total_run <= width && (y_mins[x + total_run] == y_at_x || + y_mins[x + total_run] == y_at_x + 1)) + ++total_run; // At least one end has to be higher so it is not a local max. if (prev_prev_y > y_at_x + 1 || x + total_run > width || y_mins[x + total_run] > y_at_x + 1) { @@ -485,8 +470,8 @@ int16_t C_BLOB::EstimateBaselinePosition() { return best_min == box.top() ? bottom : best_min; } -static void render_outline_list(C_OUTLINE_LIST *list, - int left, int top, Pix* pix) { +static void render_outline_list(C_OUTLINE_LIST* list, int left, int top, + Pix* pix) { C_OUTLINE_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { C_OUTLINE* outline = it.data(); @@ -496,8 +481,8 @@ static void render_outline_list(C_OUTLINE_LIST *list, } } -static void render_outline_list_outline(C_OUTLINE_LIST *list, - int left, int top, Pix* pix) { +static void render_outline_list_outline(C_OUTLINE_LIST* list, int left, int top, + Pix* pix) { C_OUTLINE_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { C_OUTLINE* outline = it.data(); @@ -537,10 +522,8 @@ void C_BLOB::plot(ScrollView* window, // window to draw in // Draws the blob in the given colour, and child_colour, normalized // using the given denorm, making use of sub-pixel accurate information // if available. -void C_BLOB::plot_normed(const DENORM& denorm, - ScrollView::Color blob_colour, - ScrollView::Color child_colour, - ScrollView* window) { +void C_BLOB::plot_normed(const DENORM& denorm, ScrollView::Color blob_colour, + ScrollView::Color child_colour, ScrollView* window) { plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour, window); } diff --git a/src/ccstruct/stepblob.h b/src/ccstruct/stepblob.h index be823f883b..a0a4719e3b 100644 --- a/src/ccstruct/stepblob.h +++ b/src/ccstruct/stepblob.h @@ -17,114 +17,108 @@ * **********************************************************************/ -#ifndef STEPBLOB_H -#define STEPBLOB_H +#ifndef STEPBLOB_H +#define STEPBLOB_H -#include "coutln.h" -#include "rect.h" +#include "coutln.h" +#include "rect.h" class C_BLOB; struct Pix; ELISTIZEH(C_BLOB) -class C_BLOB:public ELIST_LINK -{ - public: - C_BLOB() = default; - explicit C_BLOB(C_OUTLINE_LIST *outline_list); - // Simpler constructor to build a blob from a single outline that has - // already been fully initialized. - explicit C_BLOB(C_OUTLINE* outline); - - // Builds a set of one or more blobs from a list of outlines. - // Input: one outline on outline_list contains all the others, but the - // nesting and order are undefined. - // If good_blob is true, the blob is added to good_blobs_it, unless - // an illegal (generation-skipping) parent-child relationship is found. - // If so, the parent blob goes to bad_blobs_it, and the immediate children - // are promoted to the top level, recursively being sent to good_blobs_it. - // If good_blob is false, all created blobs will go to the bad_blobs_it. - // Output: outline_list is empty. One or more blobs are added to - // good_blobs_it and/or bad_blobs_it. - static void ConstructBlobsFromOutlines(bool good_blob, - C_OUTLINE_LIST* outline_list, - C_BLOB_IT* good_blobs_it, - C_BLOB_IT* bad_blobs_it); - - // Sets the COUT_INVERSE flag appropriately on the outlines and their - // children recursively, reversing the outlines if needed so that - // everything has an anticlockwise top-level. - void CheckInverseFlagAndDirection(); - - // Build and return a fake blob containing a single fake outline with no - // steps. - static C_BLOB* FakeBlob(const TBOX& box); - - C_OUTLINE_LIST *out_list() { //get outline list - return &outlines; - } - - TBOX bounding_box() const; // compute bounding box - int32_t area(); //compute area - int32_t perimeter(); // Total perimeter of outlines and 1st level children. - int32_t outer_area(); //compute area - int32_t count_transitions( //count maxima - int32_t threshold); //size threshold - - void move(const ICOORD vec); // repostion blob by vector - void rotate(const FCOORD& rotation); // Rotate by given vector. - - // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale - // if the supplied pix is 8-bit or the binary edges if nullptr. - void ComputeEdgeOffsets(int threshold, Pix* pix); - - // Estimates and returns the baseline position based on the shape of the - // outlines. - int16_t EstimateBaselinePosition(); - - // Returns a Pix rendering of the blob. pixDestroy after use. - Pix* render(); - // Returns a Pix rendering of the outline of the blob. (no fill). - // pixDestroy after use. - Pix* render_outline(); - - #ifndef GRAPHICS_DISABLED - void plot( //draw one - ScrollView* window, //window to draw in - ScrollView::Color blob_colour, //for outer bits - ScrollView::Color child_colour); //for holes - // Draws the blob in the given colour, and child_colour, normalized - // using the given denorm, making use of sub-pixel accurate information - // if available. - void plot_normed(const DENORM& denorm, - ScrollView::Color blob_colour, - ScrollView::Color child_colour, - ScrollView* window); - #endif // GRAPHICS_DISABLED - - C_BLOB& operator= (const C_BLOB & source) { - if (!outlines.empty ()) - outlines.clear(); - outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy); - return *this; - } - - static C_BLOB* deep_copy(const C_BLOB* src) { - C_BLOB* blob = new C_BLOB; - *blob = *src; - return blob; - } - - static int SortByXMiddle(const void *v1, const void *v2) { - const C_BLOB* blob1 = *static_cast(v1); - const C_BLOB* blob2 = *static_cast(v2); - return blob1->bounding_box().x_middle() - - blob2->bounding_box().x_middle(); - } - - - private: - C_OUTLINE_LIST outlines; //master elements +class C_BLOB : public ELIST_LINK { + public: + C_BLOB() = default; + explicit C_BLOB(C_OUTLINE_LIST* outline_list); + // Simpler constructor to build a blob from a single outline that has + // already been fully initialized. + explicit C_BLOB(C_OUTLINE* outline); + + // Builds a set of one or more blobs from a list of outlines. + // Input: one outline on outline_list contains all the others, but the + // nesting and order are undefined. + // If good_blob is true, the blob is added to good_blobs_it, unless + // an illegal (generation-skipping) parent-child relationship is found. + // If so, the parent blob goes to bad_blobs_it, and the immediate children + // are promoted to the top level, recursively being sent to good_blobs_it. + // If good_blob is false, all created blobs will go to the bad_blobs_it. + // Output: outline_list is empty. One or more blobs are added to + // good_blobs_it and/or bad_blobs_it. + static void ConstructBlobsFromOutlines(bool good_blob, + C_OUTLINE_LIST* outline_list, + C_BLOB_IT* good_blobs_it, + C_BLOB_IT* bad_blobs_it); + + // Sets the COUT_INVERSE flag appropriately on the outlines and their + // children recursively, reversing the outlines if needed so that + // everything has an anticlockwise top-level. + void CheckInverseFlagAndDirection(); + + // Build and return a fake blob containing a single fake outline with no + // steps. + static C_BLOB* FakeBlob(const TBOX& box); + + C_OUTLINE_LIST* out_list() { // get outline list + return &outlines; + } + + TBOX bounding_box() const; // compute bounding box + int32_t area(); // compute area + int32_t perimeter(); // Total perimeter of outlines and 1st level children. + int32_t outer_area(); // compute area + int32_t count_transitions( // count maxima + int32_t threshold); // size threshold + + void move(const ICOORD vec); // repostion blob by vector + void rotate(const FCOORD& rotation); // Rotate by given vector. + + // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale + // if the supplied pix is 8-bit or the binary edges if nullptr. + void ComputeEdgeOffsets(int threshold, Pix* pix); + + // Estimates and returns the baseline position based on the shape of the + // outlines. + int16_t EstimateBaselinePosition(); + + // Returns a Pix rendering of the blob. pixDestroy after use. + Pix* render(); + // Returns a Pix rendering of the outline of the blob. (no fill). + // pixDestroy after use. + Pix* render_outline(); + +#ifndef GRAPHICS_DISABLED + void plot( // draw one + ScrollView* window, // window to draw in + ScrollView::Color blob_colour, // for outer bits + ScrollView::Color child_colour); // for holes + // Draws the blob in the given colour, and child_colour, normalized + // using the given denorm, making use of sub-pixel accurate information + // if available. + void plot_normed(const DENORM& denorm, ScrollView::Color blob_colour, + ScrollView::Color child_colour, ScrollView* window); +#endif // GRAPHICS_DISABLED + + C_BLOB& operator=(const C_BLOB& source) { + if (!outlines.empty()) outlines.clear(); + outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy); + return *this; + } + + static C_BLOB* deep_copy(const C_BLOB* src) { + C_BLOB* blob = new C_BLOB; + *blob = *src; + return blob; + } + + static int SortByXMiddle(const void* v1, const void* v2) { + const C_BLOB* blob1 = *static_cast(v1); + const C_BLOB* blob2 = *static_cast(v2); + return blob1->bounding_box().x_middle() - blob2->bounding_box().x_middle(); + } + + private: + C_OUTLINE_LIST outlines; // master elements }; #endif diff --git a/src/ccstruct/vecfuncs.cpp b/src/ccstruct/vecfuncs.cpp index 86add656dd..7d4428a489 100644 --- a/src/ccstruct/vecfuncs.cpp +++ b/src/ccstruct/vecfuncs.cpp @@ -40,23 +40,21 @@ * * Show if the line is going in the positive or negative X direction. **********************************************************************/ -int direction(EDGEPT *point) { - int dir; /** direction to return **/ - EDGEPT *prev; /** prev point **/ - EDGEPT *next; /** next point **/ +int direction(EDGEPT* point) { + int dir; /** direction to return **/ + EDGEPT* prev; /** prev point **/ + EDGEPT* next; /** next point **/ dir = 0; prev = point->prev; next = point->next; - if (((prev->pos.x <= point->pos.x) && - (point->pos.x < next->pos.x)) || - ((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x))) + if (((prev->pos.x <= point->pos.x) && (point->pos.x < next->pos.x)) || + ((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x))) dir = 1; - if (((prev->pos.x >= point->pos.x) && - (point->pos.x > next->pos.x)) || - ((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x))) + if (((prev->pos.x >= point->pos.x) && (point->pos.x > next->pos.x)) || + ((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x))) dir = -1; return dir; diff --git a/src/ccstruct/vecfuncs.h b/src/ccstruct/vecfuncs.h index 733e231c78..1e997bd28f 100644 --- a/src/ccstruct/vecfuncs.h +++ b/src/ccstruct/vecfuncs.h @@ -39,9 +39,7 @@ struct EDGEPT; * into point (p). **********************************************************************/ -#define point_diff(p,p1,p2) \ -((p).x = (p1).x - (p2).x, \ - (p).y = (p1).y - (p2).y) +#define point_diff(p, p1, p2) ((p).x = (p1).x - (p2).x, (p).y = (p1).y - (p2).y) /********************************************************************** * CROSS @@ -49,8 +47,7 @@ struct EDGEPT; * cross product **********************************************************************/ -#define CROSS(a,b) \ -((a).x * (b).y - (a).y * (b).x) +#define CROSS(a, b) ((a).x * (b).y - (a).y * (b).x) /********************************************************************** * SCALAR @@ -58,8 +55,7 @@ struct EDGEPT; * scalar vector product **********************************************************************/ -#define SCALAR(a,b) \ -((a).x * (b).x + (a).y * (b).y) +#define SCALAR(a, b) ((a).x * (b).x + (a).y * (b).y) /********************************************************************** * LENGTH @@ -67,12 +63,11 @@ struct EDGEPT; * length of vector **********************************************************************/ -#define LENGTH(a) \ -((a).x * (a).x + (a).y * (a).y) +#define LENGTH(a) ((a).x * (a).x + (a).y * (a).y) /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -int direction(EDGEPT *point); +int direction(EDGEPT* point); #endif diff --git a/src/ccstruct/werd.cpp b/src/ccstruct/werd.cpp index f0a35071c9..29b352dfb5 100644 --- a/src/ccstruct/werd.cpp +++ b/src/ccstruct/werd.cpp @@ -17,19 +17,19 @@ * **********************************************************************/ +#include "werd.h" #include "blckerr.h" #include "helpers.h" #include "linlsq.h" -#include "werd.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif -#define FIRST_COLOUR ScrollView::RED //< first rainbow colour -#define LAST_COLOUR ScrollView::AQUAMARINE //< last rainbow colour -#define CHILD_COLOUR ScrollView::BROWN //< colour of children +#define FIRST_COLOUR ScrollView::RED //< first rainbow colour +#define LAST_COLOUR ScrollView::AQUAMARINE //< last rainbow colour +#define CHILD_COLOUR ScrollView::BROWN //< colour of children const ERRCODE CANT_SCALE_EDGESTEPS = "Attempted to scale an edgestep format word"; @@ -45,11 +45,8 @@ ELIST2IZE(WERD) * blank_count blanks in front of the word * text correct text, outlives this WERD */ -WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text) - : blanks(blank_count), - flags(0), - script_id_(0), - correct(text) { +WERD::WERD(C_BLOB_LIST* blob_list, uint8_t blank_count, const char* text) + : blanks(blank_count), flags(0), script_id_(0), correct(text) { C_BLOB_IT start_it = &cblobs; C_BLOB_IT rej_cblob_it = &rej_cblobs; C_OUTLINE_IT c_outline_it; @@ -73,8 +70,7 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text) with the concencus onto the reject list. */ start_it.set_to_list(&cblobs); - if (start_it.empty()) - return; + if (start_it.empty()) return; for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) { bool reject_blob = false; bool blob_inverted; @@ -82,8 +78,7 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text) c_outline_it.set_to_list(start_it.data()->out_list()); blob_inverted = c_outline_it.data()->flag(COUT_INVERSE); for (c_outline_it.mark_cycle_pt(); - !c_outline_it.cycled_list() && !reject_blob; - c_outline_it.forward()) { + !c_outline_it.cycled_list() && !reject_blob; c_outline_it.forward()) { reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted; } if (reject_blob) { @@ -99,8 +94,7 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text) flags.set_bit(W_INVERSE, (inverted_vote > non_inverted_vote)); start_it.set_to_list(&cblobs); - if (start_it.empty()) - return; + if (start_it.empty()) return; for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) { c_outline_it.set_to_list(start_it.data()->out_list()); if (c_outline_it.data()->flag(COUT_INVERSE) != flags.bit(W_INVERSE)) @@ -108,7 +102,6 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text) } } - /** * WERD::WERD * @@ -116,18 +109,17 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text) * The C_BLOBs are not copied so the source list is emptied. */ -WERD::WERD(C_BLOB_LIST * blob_list, //< In word order - WERD * clone) //< Source of flags - : flags(clone->flags), - script_id_(clone->script_id_), - correct(clone->correct) { +WERD::WERD(C_BLOB_LIST* blob_list, //< In word order + WERD* clone) //< Source of flags + : flags(clone->flags), + script_id_(clone->script_id_), + correct(clone->correct) { C_BLOB_IT start_it = blob_list; // iterator C_BLOB_IT end_it = blob_list; // another - while (!end_it.at_last ()) - end_it.forward (); //move to last - ((C_BLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it); - //move to our list + while (!end_it.at_last()) end_it.forward(); // move to last + ((C_BLOB_LIST*)(&cblobs))->assign_to_sublist(&start_it, &end_it); + // move to our list blanks = clone->blanks; // fprintf(stderr,"Wrong constructor!!!!\n"); } @@ -224,7 +216,6 @@ void WERD::join_on(WERD* other) { } } - /** * WERD::copy_on * @@ -286,7 +277,6 @@ void WERD::print() { tprintf("Script = %d\n", script_id_); } - /** * WERD::plot * @@ -294,7 +284,7 @@ void WERD::print() { */ #ifndef GRAPHICS_DISABLED -void WERD::plot(ScrollView *window, ScrollView::Color colour) { +void WERD::plot(ScrollView* window, ScrollView::Color colour) { C_BLOB_IT it = &cblobs; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->plot(window, colour, colour); @@ -305,8 +295,7 @@ void WERD::plot(ScrollView *window, ScrollView::Color colour) { // Get the next color in the (looping) rainbow. ScrollView::Color WERD::NextColor(ScrollView::Color colour) { ScrollView::Color next = static_cast(colour + 1); - if (next >= LAST_COLOUR || next < FIRST_COLOUR) - next = FIRST_COLOUR; + if (next >= LAST_COLOUR || next < FIRST_COLOUR) next = FIRST_COLOUR; return next; } @@ -326,15 +315,13 @@ void WERD::plot(ScrollView* window) { plot_rej_blobs(window); } - /** * WERD::plot_rej_blobs * * Draw the WERD rejected blobs in window - ALWAYS GREY */ - -void WERD::plot_rej_blobs(ScrollView *window) { +void WERD::plot_rej_blobs(ScrollView* window) { C_BLOB_IT it = &rej_cblobs; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->plot(window, ScrollView::GREY, ScrollView::GREY); @@ -342,15 +329,14 @@ void WERD::plot_rej_blobs(ScrollView *window) { } #endif // GRAPHICS_DISABLED - /** * WERD::shallow_copy() * * Make a shallow copy of a word */ -WERD *WERD::shallow_copy() { - WERD *new_word = new WERD; +WERD* WERD::shallow_copy() { + WERD* new_word = new WERD; new_word->blanks = blanks; new_word->flags = flags; @@ -359,31 +345,27 @@ WERD *WERD::shallow_copy() { return new_word; } - /** * WERD::operator= * * Assign a word, DEEP copying the blob list */ -WERD & WERD::operator= (const WERD & source) { - this->ELIST2_LINK::operator= (source); +WERD& WERD::operator=(const WERD& source) { + this->ELIST2_LINK::operator=(source); blanks = source.blanks; flags = source.flags; script_id_ = source.script_id_; dummy = source.dummy; correct = source.correct; - if (!cblobs.empty()) - cblobs.clear(); + if (!cblobs.empty()) cblobs.clear(); cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy); - if (!rej_cblobs.empty()) - rej_cblobs.clear(); + if (!rej_cblobs.empty()) rej_cblobs.clear(); rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy); return *this; } - /** * word_comparator() * @@ -391,9 +373,9 @@ WERD & WERD::operator= (const WERD & source) { * order of left edge. */ -int word_comparator(const void *word1p, const void *word2p) { - WERD *word1 = *(WERD **)word1p; - WERD *word2 = *(WERD **)word2p; +int word_comparator(const void* word1p, const void* word2p) { + WERD* word1 = *(WERD**)word1p; + WERD* word2 = *(WERD**)word2p; return word1->bounding_box().left() - word2->bounding_box().left(); } @@ -476,7 +458,7 @@ WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs, TBOX a_blob_box = a_blob->bounding_box(); if ((not_found_box.major_overlap(a_blob_box) || a_blob_box.major_overlap(not_found_box)) && - not_found_box.y_overlap_fraction(a_blob_box) > 0.8) { + not_found_box.y_overlap_fraction(a_blob_box) > 0.8) { // Already taken care of. delete not_found_it.extract(); break; diff --git a/src/ccstruct/werd.h b/src/ccstruct/werd.h index 0393f93943..18476108c6 100644 --- a/src/ccstruct/werd.h +++ b/src/ccstruct/werd.h @@ -17,182 +17,174 @@ * **********************************************************************/ -#ifndef WERD_H -#define WERD_H - -#include "params.h" -#include "bits16.h" -#include "elst2.h" -#include "strngs.h" -#include "blckerr.h" -#include "stepblob.h" - -enum WERD_FLAGS -{ - W_SEGMENTED, //< correctly segmented - W_ITALIC, //< italic text - W_BOLD, //< bold text - W_BOL, //< start of line - W_EOL, //< end of line - W_NORMALIZED, //< flags - W_SCRIPT_HAS_XHEIGHT, //< x-height concept makes sense. - W_SCRIPT_IS_LATIN, //< Special case latin for y. splitting. - W_DONT_CHOP, //< fixed pitch chopped - W_REP_CHAR, //< repeated character - W_FUZZY_SP, //< fuzzy space - W_FUZZY_NON, //< fuzzy nonspace - W_INVERSE //< white on black +#ifndef WERD_H +#define WERD_H + +#include "bits16.h" +#include "blckerr.h" +#include "elst2.h" +#include "params.h" +#include "stepblob.h" +#include "strngs.h" + +enum WERD_FLAGS { + W_SEGMENTED, //< correctly segmented + W_ITALIC, //< italic text + W_BOLD, //< bold text + W_BOL, //< start of line + W_EOL, //< end of line + W_NORMALIZED, //< flags + W_SCRIPT_HAS_XHEIGHT, //< x-height concept makes sense. + W_SCRIPT_IS_LATIN, //< Special case latin for y. splitting. + W_DONT_CHOP, //< fixed pitch chopped + W_REP_CHAR, //< repeated character + W_FUZZY_SP, //< fuzzy space + W_FUZZY_NON, //< fuzzy nonspace + W_INVERSE //< white on black }; -enum DISPLAY_FLAGS -{ +enum DISPLAY_FLAGS { /* Display flags bit number allocations */ - DF_BOX, //< Bounding box - DF_TEXT, //< Correct ascii - DF_POLYGONAL, //< Polyg approx - DF_EDGE_STEP, //< Edge steps - DF_BN_POLYGONAL, //< BL normalisd polyapx - DF_BLAMER //< Blamer information + DF_BOX, //< Bounding box + DF_TEXT, //< Correct ascii + DF_POLYGONAL, //< Polyg approx + DF_EDGE_STEP, //< Edge steps + DF_BN_POLYGONAL, //< BL normalisd polyapx + DF_BLAMER //< Blamer information }; -class ROW; //forward decl +class ROW; // forward decl class WERD : public ELIST2_LINK { - public: - WERD() = default; - // WERD constructed with: - // blob_list - blobs of the word (we take this list's contents) - // blanks - number of blanks before the word - // text - correct text (outlives WERD) - WERD(C_BLOB_LIST *blob_list, uint8_t blanks, const char *text); - - // WERD constructed from: - // blob_list - blobs in the word - // clone - werd to clone flags, etc from. - WERD(C_BLOB_LIST *blob_list, WERD *clone); - - // Construct a WERD from a single_blob and clone the flags from this. - // W_BOL and W_EOL flags are set according to the given values. - WERD* ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob); - - ~WERD() = default; - - // assignment - WERD & operator= (const WERD &source); - - // This method returns a new werd constructed using the blobs in the input - // all_blobs list, which correspond to the blobs in this werd object. The - // blobs used to construct the new word are consumed and removed from the - // input all_blobs list. - // Returns nullptr if the word couldn't be constructed. - // Returns original blobs for which no matches were found in the output list - // orphan_blobs (appends). - WERD *ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, - C_BLOB_LIST *orphan_blobs); - - // Accessors for reject / DUFF blobs in various formats - C_BLOB_LIST *rej_cblob_list() { // compact format - return &rej_cblobs; - } - - // Accessors for good blobs in various formats. - C_BLOB_LIST *cblob_list() { // get compact blobs - return &cblobs; - } - - uint8_t space() { // access function - return blanks; - } - void set_blanks(uint8_t new_blanks) { - blanks = new_blanks; - } - int script_id() const { - return script_id_; - } - void set_script_id(int id) { - script_id_ = id; - } - - // Returns the (default) bounding box including all the dots. - TBOX bounding_box() const; // compute bounding box - // Returns the bounding box including the desired combination of upper and - // lower noise/diacritic elements. - TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const; - // Returns the bounding box of only the good blobs. - TBOX true_bounding_box() const; - - const char *text() const { return correct.string(); } - void set_text(const char *new_text) { correct = new_text; } - - bool flag(WERD_FLAGS mask) const { return flags.bit(mask); } - void set_flag(WERD_FLAGS mask, bool value) { flags.set_bit(mask, value); } - - bool display_flag(uint8_t flag) const { return disp_flags.bit(flag); } - void set_display_flag(uint8_t flag, bool value) { - disp_flags.set_bit(flag, value); - } - - WERD *shallow_copy(); // shallow copy word - - // reposition word by vector - void move(const ICOORD vec); - - // join other's blobs onto this werd, emptying out other. - void join_on(WERD* other); - - // copy other's blobs onto this word, leaving other intact. - void copy_on(WERD* other); - - // tprintf word metadata (but not blob innards) - void print(); - - #ifndef GRAPHICS_DISABLED - // plot word on window in a uniform colour - void plot(ScrollView *window, ScrollView::Color colour); - - // Get the next color in the (looping) rainbow. - static ScrollView::Color NextColor(ScrollView::Color colour); - - // plot word on window in a rainbow of colours - void plot(ScrollView *window); - - // plot rejected blobs in a rainbow of colours - void plot_rej_blobs(ScrollView *window); - #endif // GRAPHICS_DISABLED - - // Removes noise from the word by moving small outlines to the rej_cblobs - // list, based on the size_threshold. - void CleanNoise(float size_threshold); - - // Extracts all the noise outlines and stuffs the pointers into the given - // vector of outlines. Afterwards, the outlines vector owns the pointers. - void GetNoiseOutlines(GenericVector *outlines); - // Adds the selected outlines to the indcated real blobs, and puts the rest - // back in rej_cblobs where they came from. Where the target_blobs entry is - // nullptr, a run of wanted outlines is put into a single new blob. - // Ownership of the outlines is transferred back to the word. (Hence - // GenericVector and not PointerVector.) - // Returns true if any new blob was added to the start of the word, which - // suggests that it might need joining to the word before it, and likewise - // sets make_next_word_fuzzy true if any new blob was added to the end. - bool AddSelectedOutlines(const GenericVector &wanted, - const GenericVector &target_blobs, - const GenericVector &outlines, - bool *make_next_word_fuzzy); + public: + WERD() = default; + // WERD constructed with: + // blob_list - blobs of the word (we take this list's contents) + // blanks - number of blanks before the word + // text - correct text (outlives WERD) + WERD(C_BLOB_LIST* blob_list, uint8_t blanks, const char* text); + + // WERD constructed from: + // blob_list - blobs in the word + // clone - werd to clone flags, etc from. + WERD(C_BLOB_LIST* blob_list, WERD* clone); + + // Construct a WERD from a single_blob and clone the flags from this. + // W_BOL and W_EOL flags are set according to the given values. + WERD* ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob); + + ~WERD() = default; + + // assignment + WERD& operator=(const WERD& source); + + // This method returns a new werd constructed using the blobs in the input + // all_blobs list, which correspond to the blobs in this werd object. The + // blobs used to construct the new word are consumed and removed from the + // input all_blobs list. + // Returns nullptr if the word couldn't be constructed. + // Returns original blobs for which no matches were found in the output list + // orphan_blobs (appends). + WERD* ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs, + C_BLOB_LIST* orphan_blobs); + + // Accessors for reject / DUFF blobs in various formats + C_BLOB_LIST* rej_cblob_list() { // compact format + return &rej_cblobs; + } + + // Accessors for good blobs in various formats. + C_BLOB_LIST* cblob_list() { // get compact blobs + return &cblobs; + } + + uint8_t space() { // access function + return blanks; + } + void set_blanks(uint8_t new_blanks) { blanks = new_blanks; } + int script_id() const { return script_id_; } + void set_script_id(int id) { script_id_ = id; } + + // Returns the (default) bounding box including all the dots. + TBOX bounding_box() const; // compute bounding box + // Returns the bounding box including the desired combination of upper and + // lower noise/diacritic elements. + TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const; + // Returns the bounding box of only the good blobs. + TBOX true_bounding_box() const; + + const char* text() const { return correct.string(); } + void set_text(const char* new_text) { correct = new_text; } + + bool flag(WERD_FLAGS mask) const { return flags.bit(mask); } + void set_flag(WERD_FLAGS mask, bool value) { flags.set_bit(mask, value); } + + bool display_flag(uint8_t flag) const { return disp_flags.bit(flag); } + void set_display_flag(uint8_t flag, bool value) { + disp_flags.set_bit(flag, value); + } + + WERD* shallow_copy(); // shallow copy word + + // reposition word by vector + void move(const ICOORD vec); + + // join other's blobs onto this werd, emptying out other. + void join_on(WERD* other); + + // copy other's blobs onto this word, leaving other intact. + void copy_on(WERD* other); + + // tprintf word metadata (but not blob innards) + void print(); + +#ifndef GRAPHICS_DISABLED + // plot word on window in a uniform colour + void plot(ScrollView* window, ScrollView::Color colour); + + // Get the next color in the (looping) rainbow. + static ScrollView::Color NextColor(ScrollView::Color colour); + + // plot word on window in a rainbow of colours + void plot(ScrollView* window); + + // plot rejected blobs in a rainbow of colours + void plot_rej_blobs(ScrollView* window); +#endif // GRAPHICS_DISABLED + + // Removes noise from the word by moving small outlines to the rej_cblobs + // list, based on the size_threshold. + void CleanNoise(float size_threshold); + + // Extracts all the noise outlines and stuffs the pointers into the given + // vector of outlines. Afterwards, the outlines vector owns the pointers. + void GetNoiseOutlines(GenericVector* outlines); + // Adds the selected outlines to the indcated real blobs, and puts the rest + // back in rej_cblobs where they came from. Where the target_blobs entry is + // nullptr, a run of wanted outlines is put into a single new blob. + // Ownership of the outlines is transferred back to the word. (Hence + // GenericVector and not PointerVector.) + // Returns true if any new blob was added to the start of the word, which + // suggests that it might need joining to the word before it, and likewise + // sets make_next_word_fuzzy true if any new blob was added to the end. + bool AddSelectedOutlines(const GenericVector& wanted, + const GenericVector& target_blobs, + const GenericVector& outlines, + bool* make_next_word_fuzzy); private: - uint8_t blanks; // no of blanks - uint8_t dummy; // padding - BITS16 flags; // flags about word - BITS16 disp_flags; // display flags - int16_t script_id_; // From unicharset. - STRING correct; // correct text - C_BLOB_LIST cblobs; // compacted blobs - C_BLOB_LIST rej_cblobs; // DUFF blobs + uint8_t blanks; // no of blanks + uint8_t dummy; // padding + BITS16 flags; // flags about word + BITS16 disp_flags; // display flags + int16_t script_id_; // From unicharset. + STRING correct; // correct text + C_BLOB_LIST cblobs; // compacted blobs + C_BLOB_LIST rej_cblobs; // DUFF blobs }; -ELIST2IZEH (WERD) -#include "ocrrow.h" // placed here due to +ELIST2IZEH(WERD) +#include "ocrrow.h" // placed here due to // compare words by increasing order of left edge, suitable for qsort(3) -int word_comparator(const void *word1p, const void *word2p); +int word_comparator(const void* word1p, const void* word2p); #endif diff --git a/src/ccutil/ambigs.cpp b/src/ccutil/ambigs.cpp index 402064aeff..7e01dacaea 100644 --- a/src/ccutil/ambigs.cpp +++ b/src/ccutil/ambigs.cpp @@ -68,12 +68,11 @@ void UnicharAmbigs::LoadUniversal(const UNICHARSET& encoder_set, } void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, - TFile *ambig_file, - int debug_level, + TFile* ambig_file, int debug_level, bool use_ambigs_for_adaption, - UNICHARSET *unicharset) { + UNICHARSET* unicharset) { int i, j; - UnicharIdVector *adaption_ambigs_entry; + UnicharIdVector* adaption_ambigs_entry; if (debug_level) tprintf("Reading ambiguities\n"); int test_ambig_part_size; @@ -81,7 +80,7 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, // The space for buffer is allocated on the heap to avoid // GCC frame size warning. const int kBufferSize = 10 + 2 * kMaxAmbigStringSize; - char *buffer = new char[kBufferSize]; + char* buffer = new char[kBufferSize]; char replacement_string[kMaxAmbigStringSize]; UNICHAR_ID test_unichar_ids[MAX_AMBIG_SIZE + 1]; int line_num = 0; @@ -92,7 +91,7 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, ASSERT_HOST(ambig_file->FGets(buffer, kBufferSize) != nullptr && strlen(buffer) > 0); if (*buffer == 'v') { - version = static_cast(strtol(buffer+1, nullptr, 10)); + version = static_cast(strtol(buffer + 1, nullptr, 10)); ++line_num; } else { ambig_file->Rewind(); @@ -101,24 +100,25 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, chomp_string(buffer); if (debug_level > 2) tprintf("read line %s\n", buffer); ++line_num; - if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set, - buffer, &test_ambig_part_size, test_unichar_ids, - &replacement_ambig_part_size, - replacement_string, &type)) continue; + if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set, buffer, + &test_ambig_part_size, test_unichar_ids, + &replacement_ambig_part_size, replacement_string, + &type)) + continue; // Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST. - AmbigSpec *ambig_spec = new AmbigSpec(); - if (!InsertIntoTable((type == REPLACE_AMBIG) ? replace_ambigs_ - : dang_ambigs_, - test_ambig_part_size, test_unichar_ids, - replacement_ambig_part_size, replacement_string, type, - ambig_spec, unicharset)) + AmbigSpec* ambig_spec = new AmbigSpec(); + if (!InsertIntoTable( + (type == REPLACE_AMBIG) ? replace_ambigs_ : dang_ambigs_, + test_ambig_part_size, test_unichar_ids, replacement_ambig_part_size, + replacement_string, type, ambig_spec, unicharset)) continue; // Update one_to_one_definite_ambigs_. - if (test_ambig_part_size == 1 && - replacement_ambig_part_size == 1 && type == DEFINITE_AMBIG) { + if (test_ambig_part_size == 1 && replacement_ambig_part_size == 1 && + type == DEFINITE_AMBIG) { if (one_to_one_definite_ambigs_[test_unichar_ids[0]] == nullptr) { - one_to_one_definite_ambigs_[test_unichar_ids[0]] = new UnicharIdVector(); + one_to_one_definite_ambigs_[test_unichar_ids[0]] = + new UnicharIdVector(); } one_to_one_definite_ambigs_[test_unichar_ids[0]]->push_back( ambig_spec->correct_ngram_id); @@ -141,7 +141,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, // Add the new unichar id to adaption_ambigs_entry (only if the // vector does not already contain it) keeping it in sorted order. for (j = 0; j < adaption_ambigs_entry->size() && - (*adaption_ambigs_entry)[j] > id_to_insert; ++j); + (*adaption_ambigs_entry)[j] > id_to_insert; + ++j) + ; if (j < adaption_ambigs_entry->size()) { if ((*adaption_ambigs_entry)[j] != id_to_insert) { adaption_ambigs_entry->insert(id_to_insert, j); @@ -174,10 +176,10 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, // Print what was read from the input file. if (debug_level > 1) { for (int tbl = 0; tbl < 2; ++tbl) { - const UnicharAmbigsVector &print_table = - (tbl == 0) ? replace_ambigs_ : dang_ambigs_; + const UnicharAmbigsVector& print_table = + (tbl == 0) ? replace_ambigs_ : dang_ambigs_; for (i = 0; i < print_table.size(); ++i) { - AmbigSpec_LIST *lst = print_table[i]; + AmbigSpec_LIST* lst = print_table[i]; if (lst == nullptr) continue; if (!lst->empty()) { tprintf("%s Ambiguities for %s:\n", @@ -186,18 +188,19 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, } AmbigSpec_IT lst_it(lst); for (lst_it.mark_cycle_pt(); !lst_it.cycled_list(); lst_it.forward()) { - AmbigSpec *ambig_spec = lst_it.data(); + AmbigSpec* ambig_spec = lst_it.data(); tprintf("wrong_ngram:"); UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, *unicharset); tprintf("correct_fragments:"); - UnicharIdArrayUtils::print(ambig_spec->correct_fragments, *unicharset); + UnicharIdArrayUtils::print(ambig_spec->correct_fragments, + *unicharset); } } } if (use_ambigs_for_adaption) { for (int vec_id = 0; vec_id < 2; ++vec_id) { - const GenericVector &vec = (vec_id == 0) ? - ambigs_for_adaption_ : reverse_ambigs_for_adaption_; + const GenericVector& vec = + (vec_id == 0) ? ambigs_for_adaption_ : reverse_ambigs_for_adaption_; for (i = 0; i < vec.size(); ++i) { adaption_ambigs_entry = vec[i]; if (adaption_ambigs_entry != nullptr) { @@ -205,8 +208,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, (vec_id == 0) ? "" : "Reverse ", unicharset->debug_str(i).string()); for (j = 0; j < adaption_ambigs_entry->size(); ++j) { - tprintf("%s ", unicharset->debug_str( - (*adaption_ambigs_entry)[j]).string()); + tprintf( + "%s ", + unicharset->debug_str((*adaption_ambigs_entry)[j]).string()); } tprintf("\n"); } @@ -217,9 +221,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, } bool UnicharAmbigs::ParseAmbiguityLine( - int line_num, int version, int debug_level, const UNICHARSET &unicharset, - char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids, - int *replacement_ambig_part_size, char *replacement_string, int *type) { + int line_num, int version, int debug_level, const UNICHARSET& unicharset, + char* buffer, int* test_ambig_part_size, UNICHAR_ID* test_unichar_ids, + int* replacement_ambig_part_size, char* replacement_string, int* type) { if (version > 1) { // Simpler format is just wrong-string correct-string type\n. STRING input(buffer); @@ -242,8 +246,7 @@ bool UnicharAmbigs::ParseAmbiguityLine( return false; } // Copy encoded string to output. - for (int i = 0; i < unichars.size(); ++i) - test_unichar_ids[i] = unichars[i]; + for (int i = 0; i < unichars.size(); ++i) test_unichar_ids[i] = unichars[i]; test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID; // Encode replacement-string to check validity. if (!unicharset.encode_string(fields[1].string(), true, &unichars, nullptr, @@ -264,8 +267,8 @@ bool UnicharAmbigs::ParseAmbiguityLine( return true; } int i; - char *token; - char *next_token; + char* token; + char* next_token; if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", test_ambig_part_size) || *test_ambig_part_size <= 0) { @@ -290,7 +293,7 @@ bool UnicharAmbigs::ParseAmbiguityLine( if (i != *test_ambig_part_size || !(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", replacement_ambig_part_size) || - *replacement_ambig_part_size <= 0) { + *replacement_ambig_part_size <= 0) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } @@ -331,20 +334,22 @@ bool UnicharAmbigs::ParseAmbiguityLine( return true; } -bool UnicharAmbigs::InsertIntoTable( - UnicharAmbigsVector &table, int test_ambig_part_size, - UNICHAR_ID *test_unichar_ids, int replacement_ambig_part_size, - const char *replacement_string, int type, - AmbigSpec *ambig_spec, UNICHARSET *unicharset) { +bool UnicharAmbigs::InsertIntoTable(UnicharAmbigsVector& table, + int test_ambig_part_size, + UNICHAR_ID* test_unichar_ids, + int replacement_ambig_part_size, + const char* replacement_string, int type, + AmbigSpec* ambig_spec, + UNICHARSET* unicharset) { ambig_spec->type = static_cast(type); if (test_ambig_part_size == 1 && replacement_ambig_part_size == 1 && unicharset->to_lower(test_unichar_ids[0]) == - unicharset->to_lower(unicharset->unichar_to_id(replacement_string))) { + unicharset->to_lower(unicharset->unichar_to_id(replacement_string))) { ambig_spec->type = CASE_AMBIG; } ambig_spec->wrong_ngram_size = - UnicharIdArrayUtils::copy(test_unichar_ids, ambig_spec->wrong_ngram); + UnicharIdArrayUtils::copy(test_unichar_ids, ambig_spec->wrong_ngram); // Since we need to maintain a constant number of unichar positions in // order to construct ambig_blob_choices vector in NoDangerousAmbig(), for @@ -358,8 +363,7 @@ bool UnicharAmbigs::InsertIntoTable( // Unicharset code assumes that the "base" ngram is inserted into // the unicharset before fragments of this ngram are inserted. unicharset->unichar_insert(replacement_string, OldUncleanUnichars::kTrue); - ambig_spec->correct_ngram_id = - unicharset->unichar_to_id(replacement_string); + ambig_spec->correct_ngram_id = unicharset->unichar_to_id(replacement_string); if (replacement_ambig_part_size > 1) { unicharset->set_isngram(ambig_spec->correct_ngram_id, true); } @@ -370,8 +374,8 @@ bool UnicharAmbigs::InsertIntoTable( if (test_ambig_part_size == 1) { unichar_id = ambig_spec->correct_ngram_id; } else { - STRING frag_str = CHAR_FRAGMENT::to_string( - replacement_string, i, test_ambig_part_size, false); + STRING frag_str = CHAR_FRAGMENT::to_string(replacement_string, i, + test_ambig_part_size, false); unicharset->unichar_insert(frag_str.string(), OldUncleanUnichars::kTrue); unichar_id = unicharset->unichar_to_id(frag_str.string()); } @@ -384,8 +388,8 @@ bool UnicharAmbigs::InsertIntoTable( if (table[test_unichar_ids[0]] == nullptr) { table[test_unichar_ids[0]] = new AmbigSpec_LIST(); } - if (table[test_unichar_ids[0]]->add_sorted( - AmbigSpec::compare_ambig_specs, true, ambig_spec)) + if (table[test_unichar_ids[0]]->add_sorted(AmbigSpec::compare_ambig_specs, + true, ambig_spec)) return true; delete ambig_spec; return false; diff --git a/src/ccutil/ambigs.h b/src/ccutil/ambigs.h index cedf4cfcd9..0383cc8170 100644 --- a/src/ccutil/ambigs.h +++ b/src/ccutil/ambigs.h @@ -22,31 +22,31 @@ #define TESSERACT_CCUTIL_AMBIGS_H_ #include "elst.h" +#include "genericvector.h" #include "tprintf.h" #include "unichar.h" #include "unicharset.h" -#include "genericvector.h" -#define MAX_AMBIG_SIZE 10 +#define MAX_AMBIG_SIZE 10 namespace tesseract { using UnicharIdVector = GenericVector; static const int kUnigramAmbigsBufferSize = 1000; -static const char kAmbigNgramSeparator[] = { ' ', '\0' }; +static const char kAmbigNgramSeparator[] = {' ', '\0'}; static const char kAmbigDelimiters[] = "\t "; static const char kIllegalMsg[] = - "Illegal ambiguity specification on line %d\n"; + "Illegal ambiguity specification on line %d\n"; static const char kIllegalUnicharMsg[] = - "Illegal unichar %s in ambiguity specification\n"; + "Illegal unichar %s in ambiguity specification\n"; enum AmbigType { - NOT_AMBIG, // the ngram pair is not ambiguous - REPLACE_AMBIG, // ocred ngram should always be substituted with correct - DEFINITE_AMBIG, // add correct ngram to the classifier results (1-1) - SIMILAR_AMBIG, // use pairwise classifier for ocred/correct pair (1-1) - CASE_AMBIG, // this is a case ambiguity (1-1) + NOT_AMBIG, // the ngram pair is not ambiguous + REPLACE_AMBIG, // ocred ngram should always be substituted with correct + DEFINITE_AMBIG, // add correct ngram to the classifier results (1-1) + SIMILAR_AMBIG, // use pairwise classifier for ocred/correct pair (1-1) + CASE_AMBIG, // this is a case ambiguity (1-1) AMBIG_TYPE_COUNT // number of enum entries }; @@ -59,7 +59,7 @@ class UnicharIdArrayUtils { // less than length of array2, if any array1[i] is less than array2[i]. // Returns 0 if the arrays are equal, 1 otherwise. // The function assumes that the arrays are terminated by INVALID_UNICHAR_ID. - static inline int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2) { + static inline int compare(const UNICHAR_ID* ptr1, const UNICHAR_ID* ptr2) { for (;;) { const UNICHAR_ID val1 = *ptr1++; const UNICHAR_ID val2 = *ptr2++; @@ -96,8 +96,8 @@ class UnicharIdArrayUtils { // Prints unichars corresponding to the unichar_ids in the given array. // The function assumes that array is terminated by INVALID_UNICHAR_ID. static inline void print(const UNICHAR_ID array[], - const UNICHARSET &unicharset) { - const UNICHAR_ID *ptr = array; + const UNICHARSET& unicharset) { + const UNICHAR_ID* ptr = array; if (*ptr == INVALID_UNICHAR_ID) tprintf("[Empty]"); while (*ptr != INVALID_UNICHAR_ID) { tprintf("%s ", unicharset.id_to_unichar(*ptr++)); @@ -119,9 +119,9 @@ class AmbigSpec : public ELIST_LINK { // Comparator function for sorting AmbigSpec_LISTs. The lists will // be sorted by their wrong_ngram arrays. Example of wrong_ngram vectors // in a a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1]. - static int compare_ambig_specs(const void *spec1, const void *spec2) { - const AmbigSpec *s1 = *static_cast(spec1); - const AmbigSpec *s2 = *static_cast(spec2); + static int compare_ambig_specs(const void* spec1, const void* spec2) { + const AmbigSpec* s1 = *static_cast(spec1); + const AmbigSpec* s2 = *static_cast(spec2); int result = UnicharIdArrayUtils::compare(s1->wrong_ngram, s2->wrong_ngram); if (result != 0) return result; return UnicharIdArrayUtils::compare(s1->correct_fragments, @@ -138,7 +138,7 @@ ELISTIZEH(AmbigSpec) // AMBIG_TABLE[i] stores a set of ambiguities whose // wrong ngram starts with unichar id i. -using UnicharAmbigsVector = GenericVector; +using UnicharAmbigsVector = GenericVector; class UnicharAmbigs { public: @@ -149,8 +149,8 @@ class UnicharAmbigs { one_to_one_definite_ambigs_.delete_data_pointers(); } - const UnicharAmbigsVector &dang_ambigs() const { return dang_ambigs_; } - const UnicharAmbigsVector &replace_ambigs() const { return replace_ambigs_; } + const UnicharAmbigsVector& dang_ambigs() const { return dang_ambigs_; } + const UnicharAmbigsVector& replace_ambigs() const { return replace_ambigs_; } // Initializes the ambigs by adding a nullptr pointer to each table. void InitUnicharAmbigs(const UNICHARSET& unicharset, @@ -171,12 +171,12 @@ class UnicharAmbigs { // unichar ids that are ambiguous to it. // encoder_set is used to encode the ambiguity strings, undisturbed by new // unichar_ids that may be created by adding the ambigs. - void LoadUnicharAmbigs(const UNICHARSET& encoder_set, - TFile *ambigs_file, int debug_level, - bool use_ambigs_for_adaption, UNICHARSET *unicharset); + void LoadUnicharAmbigs(const UNICHARSET& encoder_set, TFile* ambigs_file, + int debug_level, bool use_ambigs_for_adaption, + UNICHARSET* unicharset); // Returns definite 1-1 ambigs for the given unichar id. - inline const UnicharIdVector *OneToOneDefiniteAmbigs( + inline const UnicharIdVector* OneToOneDefiniteAmbigs( UNICHAR_ID unichar_id) const { if (one_to_one_definite_ambigs_.empty()) return nullptr; return one_to_one_definite_ambigs_[unichar_id]; @@ -187,8 +187,7 @@ class UnicharAmbigs { // in the 'wrong' part of the ambiguity. E.g. if DangAmbigs file consist of // m->rn,rn->m,m->iii, UnicharAmbigsForAdaption() called with unichar id of // m will return a pointer to a vector with unichar ids of r,n,i. - inline const UnicharIdVector *AmbigsForAdaption( - UNICHAR_ID unichar_id) const { + inline const UnicharIdVector* AmbigsForAdaption(UNICHAR_ID unichar_id) const { if (ambigs_for_adaption_.empty()) return nullptr; return ambigs_for_adaption_[unichar_id]; } @@ -196,7 +195,7 @@ class UnicharAmbigs { // Similar to the above, but return the vector of unichar ids for which // the given unichar_id is an ambiguity (appears in the 'wrong' part of // some ambiguity pair). - inline const UnicharIdVector *ReverseAmbigsForAdaption( + inline const UnicharIdVector* ReverseAmbigsForAdaption( UNICHAR_ID unichar_id) const { if (reverse_ambigs_for_adaption_.empty()) return nullptr; return reverse_ambigs_for_adaption_[unichar_id]; @@ -204,22 +203,22 @@ class UnicharAmbigs { private: bool ParseAmbiguityLine(int line_num, int version, int debug_level, - const UNICHARSET &unicharset, char *buffer, - int *test_ambig_part_size, - UNICHAR_ID *test_unichar_ids, - int *replacement_ambig_part_size, - char *replacement_string, int *type); - bool InsertIntoTable(UnicharAmbigsVector &table, - int test_ambig_part_size, UNICHAR_ID *test_unichar_ids, + const UNICHARSET& unicharset, char* buffer, + int* test_ambig_part_size, + UNICHAR_ID* test_unichar_ids, + int* replacement_ambig_part_size, + char* replacement_string, int* type); + bool InsertIntoTable(UnicharAmbigsVector& table, int test_ambig_part_size, + UNICHAR_ID* test_unichar_ids, int replacement_ambig_part_size, - const char *replacement_string, int type, - AmbigSpec *ambig_spec, UNICHARSET *unicharset); + const char* replacement_string, int type, + AmbigSpec* ambig_spec, UNICHARSET* unicharset); UnicharAmbigsVector dang_ambigs_; UnicharAmbigsVector replace_ambigs_; - GenericVector one_to_one_definite_ambigs_; - GenericVector ambigs_for_adaption_; - GenericVector reverse_ambigs_for_adaption_; + GenericVector one_to_one_definite_ambigs_; + GenericVector ambigs_for_adaption_; + GenericVector reverse_ambigs_for_adaption_; }; } // namespace tesseract diff --git a/src/ccutil/basedir.cpp b/src/ccutil/basedir.cpp index b7e194d21f..de6d9e9ca6 100644 --- a/src/ccutil/basedir.cpp +++ b/src/ccutil/basedir.cpp @@ -16,9 +16,9 @@ * **********************************************************************/ -#include "basedir.h" +#include "basedir.h" -#include +#include // Assuming that code_path is the name of some file in a desired directory, // returns the given code_path stripped back to the last slash, leaving @@ -26,7 +26,7 @@ // the input was the name of something in the current directory. // Useful for getting to the directory of argv[0], but does not search // any paths. -TESS_API void truncate_path(const char *code_path, STRING* trunc_path) { +TESS_API void truncate_path(const char* code_path, STRING* trunc_path) { int trunc_index = -1; if (code_path != nullptr) { const char* last_slash = strrchr(code_path, '/'); diff --git a/src/ccutil/basedir.h b/src/ccutil/basedir.h index 60d56ba66d..2ff8baa1be 100644 --- a/src/ccutil/basedir.h +++ b/src/ccutil/basedir.h @@ -17,15 +17,15 @@ * **********************************************************************/ -#ifndef BASEDIR_H -#define BASEDIR_H +#ifndef BASEDIR_H +#define BASEDIR_H -#include "platform.h" -#include "strngs.h" +#include "platform.h" +#include "strngs.h" // Returns the given code_path truncated to the last slash. // Useful for getting to the directory of argv[0], but does not search // any paths. -TESS_API void truncate_path(const char *code_path, STRING* trunc_path); +TESS_API void truncate_path(const char* code_path, STRING* trunc_path); #endif diff --git a/src/ccutil/bits16.h b/src/ccutil/bits16.h index 74eb551b26..0c823ab3ec 100644 --- a/src/ccutil/bits16.h +++ b/src/ccutil/bits16.h @@ -17,46 +17,41 @@ * **********************************************************************/ -#ifndef BITS16_H -#define BITS16_H +#ifndef BITS16_H +#define BITS16_H #include "host.h" -class DLLSYM BITS16 -{ - public: - uint16_t val; +class DLLSYM BITS16 { + public: + uint16_t val; - BITS16() { - val = 0; - } // constructor + BITS16() { val = 0; } // constructor - BITS16(uint16_t init) { - val = init; - } + BITS16(uint16_t init) { val = init; } - void turn_on_bit( // flip specified bit - uint8_t bit_num) { // bit to flip 0..7 - val = val | 01 << bit_num; - } + void turn_on_bit( // flip specified bit + uint8_t bit_num) { // bit to flip 0..7 + val = val | 01 << bit_num; + } + + void turn_off_bit( // flip specified bit + uint8_t bit_num) { // bit to flip 0..7 + val = val & ~(01 << bit_num); + } - void turn_off_bit( // flip specified bit - uint8_t bit_num) { // bit to flip 0..7 + void set_bit( // flip specified bit + uint8_t bit_num, // bit to flip 0..7 + bool value) { // value to flip to + if (value) + val = val | 01 << bit_num; + else val = val & ~(01 << bit_num); - } - - void set_bit( // flip specified bit - uint8_t bit_num, // bit to flip 0..7 - bool value) { // value to flip to - if (value) - val = val | 01 << bit_num; - else - val = val & ~(01 << bit_num); - } - - bool bit( // access bit - uint8_t bit_num) const { // bit to access - return (val >> bit_num) & 01; - } + } + + bool bit( // access bit + uint8_t bit_num) const { // bit to access + return (val >> bit_num) & 01; + } }; #endif diff --git a/src/ccutil/bitvector.cpp b/src/ccutil/bitvector.cpp index 072de9dfa1..dcc0339ea8 100644 --- a/src/ccutil/bitvector.cpp +++ b/src/ccutil/bitvector.cpp @@ -32,81 +32,57 @@ namespace tesseract { // For zero, the table has 255, but since it is a special case, most code // that uses this table will check for zero before looking up lsb_index_. const uint8_t BitVector::lsb_index_[256] = { - 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; + 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; // Fast lookup table to get the residual bits after zeroing the first (lowest) // set bit in a byte. const uint8_t BitVector::lsb_eroded_[256] = { - 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, - 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, - 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, - 0x10, 0x18, 0x18, 0x1a, 0x18, 0x1c, 0x1c, 0x1e, - 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, - 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, - 0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, - 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, - 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, - 0x40, 0x48, 0x48, 0x4a, 0x48, 0x4c, 0x4c, 0x4e, - 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, - 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, - 0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, - 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, - 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, - 0x70, 0x78, 0x78, 0x7a, 0x78, 0x7c, 0x7c, 0x7e, - 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, - 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, - 0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, - 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, - 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, - 0xa0, 0xa8, 0xa8, 0xaa, 0xa8, 0xac, 0xac, 0xae, - 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, - 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, - 0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, - 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, - 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, - 0xd0, 0xd8, 0xd8, 0xda, 0xd8, 0xdc, 0xdc, 0xde, - 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, - 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, - 0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, - 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe -}; + 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, + 0x08, 0x0c, 0x0c, 0x0e, 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, + 0x10, 0x18, 0x18, 0x1a, 0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, + 0x20, 0x24, 0x24, 0x26, 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, + 0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, + 0x38, 0x3c, 0x3c, 0x3e, 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, + 0x40, 0x48, 0x48, 0x4a, 0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, + 0x50, 0x54, 0x54, 0x56, 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, + 0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, + 0x68, 0x6c, 0x6c, 0x6e, 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, + 0x70, 0x78, 0x78, 0x7a, 0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, + 0x80, 0x84, 0x84, 0x86, 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, + 0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, + 0x98, 0x9c, 0x9c, 0x9e, 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, + 0xa0, 0xa8, 0xa8, 0xaa, 0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, + 0xb0, 0xb4, 0xb4, 0xb6, 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, + 0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, + 0xc8, 0xcc, 0xcc, 0xce, 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, + 0xd0, 0xd8, 0xd8, 0xda, 0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, + 0xe0, 0xe4, 0xe4, 0xe6, 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, + 0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, + 0xf8, 0xfc, 0xfc, 0xfe}; // Fast lookup table to give the number of set bits in a byte. const int BitVector::hamming_table_[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; BitVector::BitVector() : bit_size_(0), array_(nullptr) {} @@ -126,9 +102,7 @@ BitVector& BitVector::operator=(const BitVector& src) { return *this; } -BitVector::~BitVector() { - delete [] array_; -} +BitVector::~BitVector() { delete[] array_; } // Initializes the array to length * false. void BitVector::Init(int length) { @@ -141,7 +115,7 @@ bool BitVector::Serialize(FILE* fp) const { if (fwrite(&bit_size_, sizeof(bit_size_), 1, fp) != 1) return false; int wordlen = WordLength(); if (static_cast(fwrite(array_, sizeof(*array_), wordlen, fp)) != wordlen) - return false; + return false; return true; } @@ -156,20 +130,15 @@ bool BitVector::DeSerialize(bool swap, FILE* fp) { Alloc(new_bit_size); int wordlen = WordLength(); if (static_cast(fread(array_, sizeof(*array_), wordlen, fp)) != wordlen) - return false; + return false; if (swap) { - for (int i = 0; i < wordlen; ++i) - ReverseN(&array_[i], sizeof(array_[i])); + for (int i = 0; i < wordlen; ++i) ReverseN(&array_[i], sizeof(array_[i])); } return true; } -void BitVector::SetAllFalse() { - memset(array_, 0, ByteLength()); -} -void BitVector::SetAllTrue() { - memset(array_, ~0, ByteLength()); -} +void BitVector::SetAllFalse() { memset(array_, 0, ByteLength()); } +void BitVector::SetAllTrue() { memset(array_, ~0, ByteLength()); } // Returns the index of the next set bit after the given index. // Useful for quickly iterating through the set bits in a sparse vector. @@ -187,8 +156,7 @@ int BitVector::NextSetBit(int prev_bit) const { if (bit_index + 8 > next_bit && byte != 0) { while (bit_index + lsb_index_[byte] < next_bit && byte != 0) byte = lsb_eroded_[byte]; - if (byte != 0) - return bit_index + lsb_index_[byte]; + if (byte != 0) return bit_index + lsb_index_[byte]; } word >>= 8; bit_index += 8; @@ -228,20 +196,16 @@ int BitVector::NumSetBits() const { // sensible if they aren't the same size, but they should be really. void BitVector::operator|=(const BitVector& other) { int length = std::min(WordLength(), other.WordLength()); - for (int w = 0; w < length; ++w) - array_[w] |= other.array_[w]; + for (int w = 0; w < length; ++w) array_[w] |= other.array_[w]; } void BitVector::operator&=(const BitVector& other) { int length = std::min(WordLength(), other.WordLength()); - for (int w = 0; w < length; ++w) - array_[w] &= other.array_[w]; - for (int w = WordLength() - 1; w >= length; --w) - array_[w] = 0; + for (int w = 0; w < length; ++w) array_[w] &= other.array_[w]; + for (int w = WordLength() - 1; w >= length; --w) array_[w] = 0; } void BitVector::operator^=(const BitVector& other) { int length = std::min(WordLength(), other.WordLength()); - for (int w = 0; w < length; ++w) - array_[w] ^= other.array_[w]; + for (int w = 0; w < length; ++w) array_[w] ^= other.array_[w]; } // Set subtraction *this = v1 - v2. void BitVector::SetSubtract(const BitVector& v1, const BitVector& v2) { @@ -249,8 +213,7 @@ void BitVector::SetSubtract(const BitVector& v1, const BitVector& v2) { int length = std::min(v1.WordLength(), v2.WordLength()); for (int w = 0; w < length; ++w) array_[w] = v1.array_[w] ^ (v1.array_[w] & v2.array_[w]); - for (int w = WordLength() - 1; w >= length; --w) - array_[w] = v1.array_[w]; + for (int w = WordLength() - 1; w >= length; --w) array_[w] = v1.array_[w]; } // Allocates memory for a vector of the given length. @@ -260,10 +223,9 @@ void BitVector::Alloc(int length) { bit_size_ = length; int new_wordlength = WordLength(); if (new_wordlength != initial_wordlength) { - delete [] array_; + delete[] array_; array_ = new uint32_t[new_wordlength]; } } - } // namespace tesseract. diff --git a/src/ccutil/bitvector.h b/src/ccutil/bitvector.h index 9f105145a1..afc7a07a40 100644 --- a/src/ccutil/bitvector.h +++ b/src/ccutil/bitvector.h @@ -53,9 +53,7 @@ class BitVector { void Init(int length); // Returns the number of bits that are accessible in the vector. - int size() const { - return bit_size_; - } + int size() const { return bit_size_; } // Writes to the given file. Returns false in case of error. bool Serialize(FILE* fp) const; @@ -69,12 +67,8 @@ class BitVector { // Accessors to set/reset/get bits. // The range of index is [0, size()-1]. // There is debug-only bounds checking. - void SetBit(int index) { - array_[WordIndex(index)] |= BitMask(index); - } - void ResetBit(int index) { - array_[WordIndex(index)] &= ~BitMask(index); - } + void SetBit(int index) { array_[WordIndex(index)] |= BitMask(index); } + void ResetBit(int index) { array_[WordIndex(index)] &= ~BitMask(index); } void SetValue(int index, bool value) { if (value) SetBit(index); @@ -114,18 +108,12 @@ class BitVector { return index / kBitFactor; } // Returns a mask to select the appropriate bit for the given index. - uint32_t BitMask(int index) const { - return 1 << (index & (kBitFactor - 1)); - } + uint32_t BitMask(int index) const { return 1 << (index & (kBitFactor - 1)); } // Returns the number of array elements needed to represent the current // bit_size_. - int WordLength() const { - return (bit_size_ + kBitFactor - 1) / kBitFactor; - } + int WordLength() const { return (bit_size_ + kBitFactor - 1) / kBitFactor; } // Returns the number of bytes consumed by the array_. - int ByteLength() const { - return WordLength() * sizeof(*array_); - } + int ByteLength() const { return WordLength() * sizeof(*array_); } // Number of bits in this BitVector. int32_t bit_size_; diff --git a/src/ccutil/ccutil.cpp b/src/ccutil/ccutil.cpp index e33bda47be..5347677391 100644 --- a/src/ccutil/ccutil.cpp +++ b/src/ccutil/ccutil.cpp @@ -13,18 +13,22 @@ #include "ccutil.h" namespace tesseract { -CCUtil::CCUtil() : - params_(), +CCUtil::CCUtil() + : params_(), #ifdef _WIN32 - STRING_INIT_MEMBER(tessedit_module_name, WINDLLNAME, - "Module colocated with tessdata dir", ¶ms_), + STRING_INIT_MEMBER(tessedit_module_name, WINDLLNAME, + "Module colocated with tessdata dir", ¶ms_), #endif - INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", + INT_INIT_MEMBER(ambigs_debug_level, 0, + "Debug level for unichar ambiguities", ¶ms_), + BOOL_MEMBER(use_definite_ambigs_for_classifier, 0, + "Use definite" + " ambiguities when running character classifier", ¶ms_), - BOOL_MEMBER(use_definite_ambigs_for_classifier, 0, "Use definite" - " ambiguities when running character classifier", ¶ms_), - BOOL_MEMBER(use_ambigs_for_adaption, 0, "Use ambigs for deciding" - " whether to adapt to a character", ¶ms_) { + BOOL_MEMBER(use_ambigs_for_adaption, 0, + "Use ambigs for deciding" + " whether to adapt to a character", + ¶ms_) { } CCUtilMutex::CCUtilMutex() { @@ -52,4 +56,4 @@ void CCUtilMutex::Unlock() { } CCUtilMutex tprintfMutex; // should remain global -} // namespace tesseract +} // namespace tesseract diff --git a/src/ccutil/ccutil.h b/src/ccutil/ccutil.h index fd1b9f83d6..43365adc02 100644 --- a/src/ccutil/ccutil.h +++ b/src/ccutil/ccutil.h @@ -21,8 +21,8 @@ #include "ambigs.h" #include "errcode.h" -#include "strngs.h" #include "params.h" +#include "strngs.h" #include "unicharset.h" #ifndef _WIN32 @@ -39,6 +39,7 @@ class CCUtilMutex { void Lock(); void Unlock(); + private: #ifdef _WIN32 HANDLE mutex_; @@ -47,7 +48,6 @@ class CCUtilMutex { #endif }; - class CCUtil { public: CCUtil(); @@ -55,11 +55,10 @@ class CCUtil { public: // Read the arguments and set up the data path. - void main_setup( - const char *argv0, // program name - const char *basename // name of image - ); - ParamsVectors *params() { return ¶ms_; } + void main_setup(const char* argv0, // program name + const char* basename // name of image + ); + ParamsVectors* params() { return ¶ms_; } STRING datadir; // dir for data files STRING imagebasename; // name of image @@ -74,13 +73,13 @@ class CCUtil { ParamsVectors params_; public: - // Member parameters. - // These have to be declared and initialized after params_ member, since - // params_ should be initialized before parameters are added to it. - #ifdef _WIN32 +// Member parameters. +// These have to be declared and initialized after params_ member, since +// params_ should be initialized before parameters are added to it. +#ifdef _WIN32 STRING_VAR_H(tessedit_module_name, WINDLLNAME, "Module colocated with tessdata dir"); - #endif +#endif INT_VAR_H(ambigs_debug_level, 0, "Debug level for unichar ambiguities"); BOOL_VAR_H(use_definite_ambigs_for_classifier, 0, "Use definite ambiguities when running character classifier"); diff --git a/src/ccutil/clst.cpp b/src/ccutil/clst.cpp index 92e392b07a..01db48890b 100644 --- a/src/ccutil/clst.cpp +++ b/src/ccutil/clst.cpp @@ -17,8 +17,8 @@ * **********************************************************************/ -#include #include "clst.h" +#include /*********************************************************************** * MEMBER FUNCTIONS OF CLASS: CLIST @@ -37,20 +37,19 @@ * the consequential memory overhead. **********************************************************************/ -void -CLIST::internal_deep_clear ( //destroy all links -void (*zapper) (void *)) { //ptr to zapper functn - CLIST_LINK *ptr; - CLIST_LINK *next; +void CLIST::internal_deep_clear( // destroy all links + void (*zapper)(void*)) { // ptr to zapper functn + CLIST_LINK* ptr; + CLIST_LINK* next; - if (!empty ()) { - ptr = last->next; //set to first - last->next = nullptr; //break circle - last = nullptr; //set list empty + if (!empty()) { + ptr = last->next; // set to first + last->next = nullptr; // break circle + last = nullptr; // set list empty while (ptr) { next = ptr->next; - zapper (ptr->data); - delete(ptr); + zapper(ptr->data); + delete (ptr); ptr = next; } } @@ -65,17 +64,17 @@ void (*zapper) (void *)) { //ptr to zapper functn * **********************************************************************/ -void CLIST::shallow_clear() { //destroy all links - CLIST_LINK *ptr; - CLIST_LINK *next; +void CLIST::shallow_clear() { // destroy all links + CLIST_LINK* ptr; + CLIST_LINK* next; - if (!empty ()) { - ptr = last->next; //set to first - last->next = nullptr; //break circle - last = nullptr; //set list empty + if (!empty()) { + ptr = last->next; // set to first + last->next = nullptr; // break circle + last = nullptr; // set list empty while (ptr) { next = ptr->next; - delete(ptr); + delete (ptr); ptr = next; } } @@ -94,16 +93,15 @@ void CLIST::shallow_clear() { //destroy all links * end point is always the end_it position. **********************************************************************/ -void CLIST::assign_to_sublist( //to this list - CLIST_ITERATOR *start_it, //from list start - CLIST_ITERATOR *end_it) { //from list end +void CLIST::assign_to_sublist( // to this list + CLIST_ITERATOR* start_it, // from list start + CLIST_ITERATOR* end_it) { // from list end const ERRCODE LIST_NOT_EMPTY = - "Destination list must be empty before extracting a sublist"; + "Destination list must be empty before extracting a sublist"; - if (!empty ()) - LIST_NOT_EMPTY.error ("CLIST.assign_to_sublist", ABORT, nullptr); + if (!empty()) LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT, nullptr); - last = start_it->extract_sublist (end_it); + last = start_it->extract_sublist(end_it); } /*********************************************************************** @@ -112,12 +110,11 @@ void CLIST::assign_to_sublist( //to this list * Return count of elements on list **********************************************************************/ -int32_t CLIST::length() const { //count elements +int32_t CLIST::length() const { // count elements CLIST_ITERATOR it(const_cast(this)); int32_t count = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) - count++; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) count++; return count; } @@ -127,34 +124,33 @@ int32_t CLIST::length() const { //count elements * Sort elements on list **********************************************************************/ -void -CLIST::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { +void CLIST::sort( // sort elements + int comparator( // comparison routine + const void*, const void*)) { CLIST_ITERATOR it(this); int32_t count; - void **base; //ptr array to sort - void **current; + void** base; // ptr array to sort + void** current; int32_t i; /* Allocate an array of pointers, one per list element */ - count = length (); - base = (void **) malloc (count * sizeof (void *)); + count = length(); + base = (void**)malloc(count * sizeof(void*)); /* Extract all elements, putting the pointers in the array */ current = base; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - *current = it.extract (); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + *current = it.extract(); current++; } /* Sort the pointer array */ - qsort ((char *) base, count, sizeof (*base), comparator); + qsort((char*)base, count, sizeof(*base), comparator); /* Rebuild the list from the sorted pointers */ current = base; for (i = 0; i < count; i++) { - it.add_to_end (*current); + it.add_to_end(*current); current++; } free(base); @@ -167,8 +163,8 @@ const void *, const void *)) { // Time is linear to add pre-sorted items to an empty list. // If unique, then don't add duplicate entries. // Returns true if the element was added to the list. -bool CLIST::add_sorted(int comparator(const void*, const void*), - bool unique, void* new_data) { +bool CLIST::add_sorted(int comparator(const void*, const void*), bool unique, + void* new_data) { // Check for adding at the end. if (last == nullptr || comparator(&last->data, &new_data) < 0) { CLIST_LINK* new_element = new CLIST_LINK; @@ -186,10 +182,8 @@ bool CLIST::add_sorted(int comparator(const void*, const void*), CLIST_ITERATOR it(this); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { void* data = it.data(); - if (data == new_data && unique) - return false; - if (comparator(&data, &new_data) > 0) - break; + if (data == new_data && unique) return false; + if (comparator(&data, &new_data) > 0) break; } if (it.cycled_list()) it.add_to_end(new_data); @@ -205,8 +199,7 @@ bool CLIST::add_sorted(int comparator(const void*, const void*), // the set difference minuend - subtrahend to this, being the elements // of minuend that do not compare equal to anything in subtrahend. // If unique is true, any duplicates in minuend are also eliminated. -void CLIST::set_subtract(int comparator(const void*, const void*), - bool unique, +void CLIST::set_subtract(int comparator(const void*, const void*), bool unique, CLIST* minuend, CLIST* subtrahend) { shallow_clear(); CLIST_ITERATOR m_it(minuend); @@ -218,8 +211,7 @@ void CLIST::set_subtract(int comparator(const void*, const void*), void* subtra = nullptr; if (!s_it.empty()) { subtra = s_it.data(); - while (!s_it.at_last() && - comparator(&subtra, &minu) < 0) { + while (!s_it.at_last() && comparator(&subtra, &minu) < 0) { s_it.forward(); subtra = s_it.data(); } @@ -229,7 +221,6 @@ void CLIST::set_subtract(int comparator(const void*, const void*), } } - /*********************************************************************** * MEMBER FUNCTIONS OF CLASS: CLIST_ITERATOR * ========================================= @@ -242,33 +233,29 @@ void CLIST::set_subtract(int comparator(const void*, const void*), * REMEMBER: ALL LISTS ARE CIRCULAR. **********************************************************************/ -void *CLIST_ITERATOR::forward() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::forward", ABORT, nullptr); - #endif - if (list->empty ()) - return nullptr; +void* CLIST_ITERATOR::forward() { +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::forward", ABORT, nullptr); +#endif + if (list->empty()) return nullptr; - if (current) { //not removed so - //set previous + if (current) { // not removed so + // set previous prev = current; started_cycling = TRUE; // In case next is deleted by another iterator, get next from current. current = current->next; } else { - if (ex_current_was_cycle_pt) - cycle_pt = next; + if (ex_current_was_cycle_pt) cycle_pt = next; current = next; } - #ifndef NDEBUG - if (!current) - NULL_DATA.error ("CLIST_ITERATOR::forward", ABORT, nullptr); +#ifndef NDEBUG + if (!current) NULL_DATA.error("CLIST_ITERATOR::forward", ABORT, nullptr); if (!next) - NULL_NEXT.error ("CLIST_ITERATOR::forward", ABORT, - "This is: %p Current is: %p", this, current); - #endif + NULL_NEXT.error("CLIST_ITERATOR::forward", ABORT, + "This is: %p Current is: %p", this, current); +#endif next = current->next; return current->data; @@ -282,29 +269,27 @@ void *CLIST_ITERATOR::forward() { * (This function can't be INLINEd because it contains a loop) **********************************************************************/ -void *CLIST_ITERATOR::data_relative( //get data + or - ... - int8_t offset) { //offset from current - CLIST_LINK *ptr; +void* CLIST_ITERATOR::data_relative( // get data + or - ... + int8_t offset) { // offset from current + CLIST_LINK* ptr; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::data_relative", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("CLIST_ITERATOR::data_relative", ABORT, nullptr); +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::data_relative", ABORT, nullptr); + if (list->empty()) + EMPTY_LIST.error("CLIST_ITERATOR::data_relative", ABORT, nullptr); if (offset < -1) - BAD_PARAMETER.error ("CLIST_ITERATOR::data_relative", ABORT, - "offset < -l"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::data_relative", ABORT, "offset < -l"); +#endif if (offset == -1) ptr = prev; else - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next); + for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) + ; - #ifndef NDEBUG - if (!ptr) - NULL_DATA.error ("CLIST_ITERATOR::data_relative", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!ptr) NULL_DATA.error("CLIST_ITERATOR::data_relative", ABORT, nullptr); +#endif return ptr->data; } @@ -317,14 +302,12 @@ void *CLIST_ITERATOR::data_relative( //get data + or - ... * (This function can't be INLINEd because it contains a loop) **********************************************************************/ -void *CLIST_ITERATOR::move_to_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::move_to_last", ABORT, nullptr); - #endif +void* CLIST_ITERATOR::move_to_last() { +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::move_to_last", ABORT, nullptr); +#endif - while (current != list->last) - forward(); + while (current != list->last) forward(); if (current == nullptr) return nullptr; @@ -342,58 +325,53 @@ void *CLIST_ITERATOR::move_to_last() { * (This function hasn't been in-lined because its a bit big!) **********************************************************************/ -void CLIST_ITERATOR::exchange( //positions of 2 links - CLIST_ITERATOR *other_it) { //other iterator +void CLIST_ITERATOR::exchange( // positions of 2 links + CLIST_ITERATOR* other_it) { // other iterator const ERRCODE DONT_EXCHANGE_DELETED = - "Can't exchange deleted elements of lists"; + "Can't exchange deleted elements of lists"; - CLIST_LINK *old_current; + CLIST_LINK* old_current; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::exchange", ABORT, nullptr); +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::exchange", ABORT, nullptr); if (!other_it) - BAD_PARAMETER.error ("CLIST_ITERATOR::exchange", ABORT, "other_it nullptr"); + BAD_PARAMETER.error("CLIST_ITERATOR::exchange", ABORT, "other_it nullptr"); if (!(other_it->list)) - NO_LIST.error ("CLIST_ITERATOR::exchange", ABORT, "other_it"); - #endif + NO_LIST.error("CLIST_ITERATOR::exchange", ABORT, "other_it"); +#endif /* Do nothing if either list is empty or if both iterators reference the same link */ - if ((list->empty ()) || - (other_it->list->empty ()) || (current == other_it->current)) + if ((list->empty()) || (other_it->list->empty()) || + (current == other_it->current)) return; /* Error if either current element is deleted */ if (!current || !other_it->current) - DONT_EXCHANGE_DELETED.error ("CLIST_ITERATOR.exchange", ABORT, nullptr); + DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT, nullptr); /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements (other before this); non-doubleton adjacent elements (this before other); non-adjacent elements. */ - //adjacent links - if ((next == other_it->current) || - (other_it->next == current)) { - //doubleton list - if ((next == other_it->current) && - (other_it->next == current)) { + // adjacent links + if ((next == other_it->current) || (other_it->next == current)) { + // doubleton list + if ((next == other_it->current) && (other_it->next == current)) { prev = next = current; other_it->prev = other_it->next = other_it->current; - } - else { //non-doubleton with - //adjacent links - //other before this + } else { // non-doubleton with + // adjacent links + // other before this if (other_it->next == current) { other_it->prev->next = current; other_it->current->next = next; current->next = other_it->current; other_it->next = other_it->current; prev = current; - } - else { //this before other + } else { // this before other prev->next = other_it->current; current->next = other_it->next; other_it->current->next = current; @@ -401,8 +379,7 @@ void CLIST_ITERATOR::exchange( //positions of 2 link other_it->prev = other_it->current; } } - } - else { //no overlap + } else { // no overlap prev->next = other_it->current; current->next = other_it->next; other_it->prev->next = current; @@ -412,15 +389,11 @@ void CLIST_ITERATOR::exchange( //positions of 2 link /* update end of list pointer when necessary (remember that the 2 iterators may iterate over different lists!) */ - if (list->last == current) - list->last = other_it->current; - if (other_it->list->last == other_it->current) - other_it->list->last = current; + if (list->last == current) list->last = other_it->current; + if (other_it->list->last == other_it->current) other_it->list->last = current; - if (current == cycle_pt) - cycle_pt = other_it->cycle_pt; - if (other_it->current == other_it->cycle_pt) - other_it->cycle_pt = cycle_pt; + if (current == cycle_pt) cycle_pt = other_it->cycle_pt; + if (other_it->current == other_it->cycle_pt) other_it->cycle_pt = cycle_pt; /* The actual exchange - in all cases*/ @@ -439,68 +412,64 @@ void CLIST_ITERATOR::exchange( //positions of 2 link * (Can't inline this function because it contains a loop) **********************************************************************/ -CLIST_LINK *CLIST_ITERATOR::extract_sublist( //from this current - CLIST_ITERATOR *other_it) { //to other current +CLIST_LINK* CLIST_ITERATOR::extract_sublist( // from this current + CLIST_ITERATOR* other_it) { // to other current CLIST_ITERATOR temp_it = *this; - CLIST_LINK *end_of_new_list; + CLIST_LINK* end_of_new_list; const ERRCODE BAD_SUBLIST = "Can't find sublist end point in original list"; - #ifndef NDEBUG +#ifndef NDEBUG const ERRCODE BAD_EXTRACTION_PTS = - "Can't extract sublist from points on different lists"; + "Can't extract sublist from points on different lists"; const ERRCODE DONT_EXTRACT_DELETED = - "Can't extract a sublist marked by deleted points"; + "Can't extract a sublist marked by deleted points"; if (!other_it) - BAD_PARAMETER.error ("CLIST_ITERATOR::extract_sublist", ABORT, - "other_it nullptr"); - if (!list) - NO_LIST.error ("CLIST_ITERATOR::extract_sublist", ABORT, nullptr); + BAD_PARAMETER.error("CLIST_ITERATOR::extract_sublist", ABORT, + "other_it nullptr"); + if (!list) NO_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT, nullptr); if (list != other_it->list) - BAD_EXTRACTION_PTS.error ("CLIST_ITERATOR.extract_sublist", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("CLIST_ITERATOR::extract_sublist", ABORT, nullptr); + BAD_EXTRACTION_PTS.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr); + if (list->empty()) + EMPTY_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT, nullptr); if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error ("CLIST_ITERATOR.extract_sublist", ABORT, - nullptr); - #endif + DONT_EXTRACT_DELETED.error("CLIST_ITERATOR.extract_sublist", ABORT, + nullptr); +#endif ex_current_was_last = other_it->ex_current_was_last = false; ex_current_was_cycle_pt = false; other_it->ex_current_was_cycle_pt = false; - temp_it.mark_cycle_pt (); - do { //walk sublist - if (temp_it.cycled_list()) // can't find end pt - BAD_SUBLIST.error ("CLIST_ITERATOR.extract_sublist", ABORT, nullptr); + temp_it.mark_cycle_pt(); + do { // walk sublist + if (temp_it.cycled_list()) // can't find end pt + BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr); - if (temp_it.at_last ()) { + if (temp_it.at_last()) { list->last = prev; ex_current_was_last = other_it->ex_current_was_last = true; } - if (temp_it.current == cycle_pt) - ex_current_was_cycle_pt = true; + if (temp_it.current == cycle_pt) ex_current_was_cycle_pt = true; if (temp_it.current == other_it->cycle_pt) other_it->ex_current_was_cycle_pt = true; - temp_it.forward (); - } - while (temp_it.prev != other_it->current); + temp_it.forward(); + } while (temp_it.prev != other_it->current); - //circularise sublist + // circularise sublist other_it->current->next = current; end_of_new_list = other_it->current; - //sublist = whole list + // sublist = whole list if (prev == other_it->current) { list->last = nullptr; prev = current = next = nullptr; other_it->prev = other_it->current = other_it->next = nullptr; - } - else { + } else { prev->next = other_it->next; current = other_it->current = nullptr; next = other_it->next; diff --git a/src/ccutil/clst.h b/src/ccutil/clst.h index c87e9e78bb..7415e7151a 100644 --- a/src/ccutil/clst.h +++ b/src/ccutil/clst.h @@ -22,8 +22,8 @@ #include #include "host.h" -#include "serialis.h" #include "lsterr.h" +#include "serialis.h" class CLIST_ITERATOR; @@ -37,28 +37,27 @@ class CLIST_ITERATOR; * walks the list. **********************************************************************/ -class DLLSYM CLIST_LINK -{ +class DLLSYM CLIST_LINK { friend class CLIST_ITERATOR; friend class CLIST; - CLIST_LINK *next; - void *data; + CLIST_LINK* next; + void* data; - public: - CLIST_LINK() { //constructor - data = next = nullptr; - } + public: + CLIST_LINK() { // constructor + data = next = nullptr; + } - CLIST_LINK( // copy constructor - const CLIST_LINK &) { // don't copy link - data = next = nullptr; - } + CLIST_LINK( // copy constructor + const CLIST_LINK&) { // don't copy link + data = next = nullptr; + } - void operator=( // don't copy links - const CLIST_LINK &) { - data = next = nullptr; - } + void operator=( // don't copy links + const CLIST_LINK&) { + data = next = nullptr; + } }; /********************************************************************** @@ -67,72 +66,70 @@ class DLLSYM CLIST_LINK * Generic list class for singly linked CONS cell lists **********************************************************************/ -class DLLSYM CLIST -{ +class DLLSYM CLIST { friend class CLIST_ITERATOR; - CLIST_LINK *last; //End of list + CLIST_LINK* last; // End of list //(Points to head) - CLIST_LINK *First() { // return first + CLIST_LINK* First() { // return first return last != nullptr ? last->next : nullptr; } - public: - CLIST() { //constructor - last = nullptr; - } - - ~CLIST () { //destructor - shallow_clear(); - } + public: + CLIST() { // constructor + last = nullptr; + } - void internal_deep_clear ( //destroy all links - void (*zapper) (void *)); //ptr to zapper functn + ~CLIST() { // destructor + shallow_clear(); + } - void shallow_clear(); // clear list but don't - // delete data elements + void internal_deep_clear( // destroy all links + void (*zapper)(void*)); // ptr to zapper functn - bool empty() const { //is list empty? - return !last; - } + void shallow_clear(); // clear list but don't + // delete data elements - bool singleton() const { - return last != nullptr ? (last == last->next) : false; - } + bool empty() const { // is list empty? + return !last; + } - void shallow_copy( //dangerous!! - CLIST *from_list) { //beware destructors!! - last = from_list->last; - } + bool singleton() const { + return last != nullptr ? (last == last->next) : false; + } - void assign_to_sublist( //to this list - CLIST_ITERATOR *start_it, //from list start - CLIST_ITERATOR *end_it); //from list end - - int32_t length() const; //# elements in list - - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); - - // Assuming list has been sorted already, insert new_data to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - // If unique, then don't add duplicate entries. - // Returns true if the element was added to the list. - bool add_sorted(int comparator(const void*, const void*), - bool unique, void* new_data); - - // Assuming that the minuend and subtrahend are already sorted with - // the same comparison function, shallow clears this and then copies - // the set difference minuend - subtrahend to this, being the elements - // of minuend that do not compare equal to anything in subtrahend. - // If unique is true, any duplicates in minuend are also eliminated. - void set_subtract(int comparator(const void*, const void*), bool unique, - CLIST* minuend, CLIST* subtrahend); + void shallow_copy( // dangerous!! + CLIST* from_list) { // beware destructors!! + last = from_list->last; + } + void assign_to_sublist( // to this list + CLIST_ITERATOR* start_it, // from list start + CLIST_ITERATOR* end_it); // from list end + + int32_t length() const; //# elements in list + + void sort( // sort elements + int comparator( // comparison routine + const void*, const void*)); + + // Assuming list has been sorted already, insert new_data to + // keep the list sorted according to the same comparison function. + // Comparison function is the same as used by sort, i.e. uses double + // indirection. Time is O(1) to add to beginning or end. + // Time is linear to add pre-sorted items to an empty list. + // If unique, then don't add duplicate entries. + // Returns true if the element was added to the list. + bool add_sorted(int comparator(const void*, const void*), bool unique, + void* new_data); + + // Assuming that the minuend and subtrahend are already sorted with + // the same comparison function, shallow clears this and then copies + // the set difference minuend - subtrahend to this, being the elements + // of minuend that do not compare equal to anything in subtrahend. + // If unique is true, any duplicates in minuend are also eliminated. + void set_subtract(int comparator(const void*, const void*), bool unique, + CLIST* minuend, CLIST* subtrahend); }; /*********************************************************************** @@ -142,108 +139,103 @@ class DLLSYM CLIST *links **********************************************************************/ -class DLLSYM CLIST_ITERATOR -{ - friend void CLIST::assign_to_sublist(CLIST_ITERATOR *, CLIST_ITERATOR *); - - CLIST *list; //List being iterated - CLIST_LINK *prev; //prev element - CLIST_LINK *current; //current element - CLIST_LINK *next; //next element - bool ex_current_was_last; //current extracted - //was end of list - bool ex_current_was_cycle_pt; //current extracted - //was cycle point - CLIST_LINK *cycle_pt; //point we are cycling - //the list to. - bool started_cycling; //Have we moved off - //the start? - - CLIST_LINK *extract_sublist( //from this current... - CLIST_ITERATOR *other_it); //to other current - - public: - CLIST_ITERATOR() { //constructor - list = nullptr; - } //unassigned list - - CLIST_ITERATOR( //constructor - CLIST *list_to_iterate); - - void set_to_list( //change list - CLIST *list_to_iterate); - - void add_after_then_move( //add after current & - void *new_data); //move to new - - void add_after_stay_put( //add after current & - void *new_data); //stay at current - - void add_before_then_move( //add before current & - void *new_data); //move to new - - void add_before_stay_put( //add before current & - void *new_data); //stay at current - - void add_list_after( //add a list & - CLIST *list_to_add); //stay at current - - void add_list_before( //add a list & - CLIST *list_to_add); //move to it 1st item - - void *data() { //get current data - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::data", ABORT, nullptr); - if (!current) - NULL_DATA.error ("CLIST_ITERATOR::data", ABORT, nullptr); - #endif - return current->data; - } +class DLLSYM CLIST_ITERATOR { + friend void CLIST::assign_to_sublist(CLIST_ITERATOR*, CLIST_ITERATOR*); - void *data_relative( //get data + or - ... - int8_t offset); //offset from current + CLIST* list; // List being iterated + CLIST_LINK* prev; // prev element + CLIST_LINK* current; // current element + CLIST_LINK* next; // next element + bool ex_current_was_last; // current extracted + // was end of list + bool ex_current_was_cycle_pt; // current extracted + // was cycle point + CLIST_LINK* cycle_pt; // point we are cycling + // the list to. + bool started_cycling; // Have we moved off + // the start? - void *forward(); //move to next element + CLIST_LINK* extract_sublist( // from this current... + CLIST_ITERATOR* other_it); // to other current - void *extract(); //remove from list + public: + CLIST_ITERATOR() { // constructor + list = nullptr; + } // unassigned list - void *move_to_first(); //go to start of list + CLIST_ITERATOR( // constructor + CLIST* list_to_iterate); - void *move_to_last(); //go to end of list + void set_to_list( // change list + CLIST* list_to_iterate); - void mark_cycle_pt(); //remember current + void add_after_then_move( // add after current & + void* new_data); // move to new - bool empty() { //is list empty? - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::empty", ABORT, nullptr); - #endif - return list->empty (); - } + void add_after_stay_put( // add after current & + void* new_data); // stay at current - bool current_extracted() { //current extracted? - return !current; - } + void add_before_then_move( // add before current & + void* new_data); // move to new + + void add_before_stay_put( // add before current & + void* new_data); // stay at current + + void add_list_after( // add a list & + CLIST* list_to_add); // stay at current - bool at_first(); //Current is first? + void add_list_before( // add a list & + CLIST* list_to_add); // move to it 1st item - bool at_last(); //Current is last? + void* data() { // get current data +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::data", ABORT, nullptr); + if (!current) NULL_DATA.error("CLIST_ITERATOR::data", ABORT, nullptr); +#endif + return current->data; + } + + void* data_relative( // get data + or - ... + int8_t offset); // offset from current + + void* forward(); // move to next element + + void* extract(); // remove from list + + void* move_to_first(); // go to start of list + + void* move_to_last(); // go to end of list + + void mark_cycle_pt(); // remember current + + bool empty() { // is list empty? +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::empty", ABORT, nullptr); +#endif + return list->empty(); + } + + bool current_extracted() { // current extracted? + return !current; + } + + bool at_first(); // Current is first? - bool cycled_list(); //Completed a cycle? + bool at_last(); // Current is last? - void add_to_end( // add at end & - void *new_data); // don't move + bool cycled_list(); // Completed a cycle? - void exchange( //positions of 2 links - CLIST_ITERATOR *other_it); //other iterator + void add_to_end( // add at end & + void* new_data); // don't move - int32_t length(); //# elements in list + void exchange( // positions of 2 links + CLIST_ITERATOR* other_it); // other iterator - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); + int32_t length(); //# elements in list + void sort( // sort elements + int comparator( // comparison routine + const void*, const void*)); }; /*********************************************************************** @@ -253,19 +245,19 @@ class DLLSYM CLIST_ITERATOR * over. **********************************************************************/ -inline void CLIST_ITERATOR::set_to_list( //change list - CLIST *list_to_iterate) { - #ifndef NDEBUG +inline void CLIST_ITERATOR::set_to_list( // change list + CLIST* list_to_iterate) { +#ifndef NDEBUG if (!list_to_iterate) - BAD_PARAMETER.error ("CLIST_ITERATOR::set_to_list", ABORT, - "list_to_iterate is nullptr"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::set_to_list", ABORT, + "list_to_iterate is nullptr"); +#endif list = list_to_iterate; prev = list->last; - current = list->First (); + current = list->First(); next = current != nullptr ? current->next : nullptr; - cycle_pt = nullptr; //await explicit set + cycle_pt = nullptr; // await explicit set started_cycling = false; ex_current_was_last = false; ex_current_was_cycle_pt = false; @@ -277,7 +269,7 @@ inline void CLIST_ITERATOR::set_to_list( //change list * CONSTRUCTOR - set iterator to specified list; **********************************************************************/ -inline CLIST_ITERATOR::CLIST_ITERATOR(CLIST *list_to_iterate) { +inline CLIST_ITERATOR::CLIST_ITERATOR(CLIST* list_to_iterate) { set_to_list(list_to_iterate); } @@ -289,40 +281,35 @@ inline CLIST_ITERATOR::CLIST_ITERATOR(CLIST *list_to_iterate) { **********************************************************************/ inline void CLIST_ITERATOR::add_after_then_move( // element to add - void *new_data) { - CLIST_LINK *new_element; + void* new_data) { + CLIST_LINK* new_element; - #ifndef NDEBUG +#ifndef NDEBUG if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_after_then_move", ABORT, nullptr); + NO_LIST.error("CLIST_ITERATOR::add_after_then_move", ABORT, nullptr); if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_after_then_move", ABORT, - "new_data is nullptr"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::add_after_then_move", ABORT, + "new_data is nullptr"); +#endif new_element = new CLIST_LINK; new_element->data = new_data; - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; list->last = new_element; prev = next = new_element; - } - else { + } else { new_element->next = next; - if (current) { //not extracted + if (current) { // not extracted current->next = new_element; prev = current; - if (current == list->last) - list->last = new_element; - } - else { //current extracted + if (current == list->last) list->last = new_element; + } else { // current extracted prev->next = new_element; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; + if (ex_current_was_last) list->last = new_element; + if (ex_current_was_cycle_pt) cycle_pt = new_element; } } current = new_element; @@ -336,38 +323,34 @@ inline void CLIST_ITERATOR::add_after_then_move( // element to add **********************************************************************/ inline void CLIST_ITERATOR::add_after_stay_put( // element to add - void *new_data) { - CLIST_LINK *new_element; + void* new_data) { + CLIST_LINK* new_element; - #ifndef NDEBUG +#ifndef NDEBUG if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_after_stay_put", ABORT, nullptr); + NO_LIST.error("CLIST_ITERATOR::add_after_stay_put", ABORT, nullptr); if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_after_stay_put", ABORT, - "new_data is nullptr"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::add_after_stay_put", ABORT, + "new_data is nullptr"); +#endif new_element = new CLIST_LINK; new_element->data = new_data; - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; list->last = new_element; prev = next = new_element; ex_current_was_last = false; current = nullptr; - } - else { + } else { new_element->next = next; - if (current) { //not extracted + if (current) { // not extracted current->next = new_element; - if (prev == current) - prev = new_element; - if (current == list->last) - list->last = new_element; - } - else { //current extracted + if (prev == current) prev = new_element; + if (current == list->last) list->last = new_element; + } else { // current extracted prev->next = new_element; if (ex_current_was_last) { list->last = new_element; @@ -386,37 +369,33 @@ inline void CLIST_ITERATOR::add_after_stay_put( // element to add **********************************************************************/ inline void CLIST_ITERATOR::add_before_then_move( // element to add - void *new_data) { - CLIST_LINK *new_element; + void* new_data) { + CLIST_LINK* new_element; - #ifndef NDEBUG +#ifndef NDEBUG if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_before_then_move", ABORT, nullptr); + NO_LIST.error("CLIST_ITERATOR::add_before_then_move", ABORT, nullptr); if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_before_then_move", ABORT, - "new_data is nullptr"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::add_before_then_move", ABORT, + "new_data is nullptr"); +#endif new_element = new CLIST_LINK; new_element->data = new_data; - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; list->last = new_element; prev = next = new_element; - } - else { + } else { prev->next = new_element; - if (current) { //not extracted + if (current) { // not extracted new_element->next = current; next = current; - } - else { //current extracted + } else { // current extracted new_element->next = next; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; + if (ex_current_was_last) list->last = new_element; + if (ex_current_was_cycle_pt) cycle_pt = new_element; } } current = new_element; @@ -430,38 +409,34 @@ inline void CLIST_ITERATOR::add_before_then_move( // element to add **********************************************************************/ inline void CLIST_ITERATOR::add_before_stay_put( // element to add - void *new_data) { - CLIST_LINK *new_element; + void* new_data) { + CLIST_LINK* new_element; - #ifndef NDEBUG +#ifndef NDEBUG if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_before_stay_put", ABORT, nullptr); + NO_LIST.error("CLIST_ITERATOR::add_before_stay_put", ABORT, nullptr); if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_before_stay_put", ABORT, - "new_data is nullptr"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::add_before_stay_put", ABORT, + "new_data is nullptr"); +#endif new_element = new CLIST_LINK; new_element->data = new_data; - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; list->last = new_element; prev = next = new_element; ex_current_was_last = true; current = nullptr; - } - else { + } else { prev->next = new_element; - if (current) { //not extracted + if (current) { // not extracted new_element->next = current; - if (next == current) - next = new_element; - } - else { //current extracted + if (next == current) next = new_element; + } else { // current extracted new_element->next = next; - if (ex_current_was_last) - list->last = new_element; + if (ex_current_was_last) list->last = new_element; } prev = new_element; } @@ -475,33 +450,29 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add * iterator. **********************************************************************/ -inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_list_after", ABORT, nullptr); +inline void CLIST_ITERATOR::add_list_after(CLIST* list_to_add) { +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::add_list_after", ABORT, nullptr); if (!list_to_add) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_list_after", ABORT, - "list_to_add is nullptr"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::add_list_after", ABORT, + "list_to_add is nullptr"); +#endif - if (!list_to_add->empty ()) { - if (list->empty ()) { + if (!list_to_add->empty()) { + if (list->empty()) { list->last = list_to_add->last; prev = list->last; - next = list->First (); + next = list->First(); ex_current_was_last = true; current = nullptr; - } - else { - if (current) { //not extracted - current->next = list_to_add->First (); - if (current == list->last) - list->last = list_to_add->last; + } else { + if (current) { // not extracted + current->next = list_to_add->First(); + if (current == list->last) list->last = list_to_add->last; list_to_add->last->next = next; next = current->next; - } - else { //current extracted - prev->next = list_to_add->First (); + } else { // current extracted + prev->next = list_to_add->First(); if (ex_current_was_last) { list->last = list_to_add->last; ex_current_was_last = false; @@ -522,34 +493,29 @@ inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) { * iterator. **********************************************************************/ -inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_list_before", ABORT, nullptr); +inline void CLIST_ITERATOR::add_list_before(CLIST* list_to_add) { +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::add_list_before", ABORT, nullptr); if (!list_to_add) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_list_before", ABORT, - "list_to_add is nullptr"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::add_list_before", ABORT, + "list_to_add is nullptr"); +#endif - if (!list_to_add->empty ()) { - if (list->empty ()) { + if (!list_to_add->empty()) { + if (list->empty()) { list->last = list_to_add->last; prev = list->last; - current = list->First (); + current = list->First(); next = current->next; ex_current_was_last = false; - } - else { - prev->next = list_to_add->First (); - if (current) { //not extracted + } else { + prev->next = list_to_add->First(); + if (current) { // not extracted list_to_add->last->next = current; - } - else { //current extracted + } else { // current extracted list_to_add->last->next = next; - if (ex_current_was_last) - list->last = list_to_add->last; - if (ex_current_was_cycle_pt) - cycle_pt = prev->next; + if (ex_current_was_last) list->last = list_to_add->last; + if (ex_current_was_cycle_pt) cycle_pt = prev->next; } current = prev->next; next = current->next; @@ -567,23 +533,21 @@ inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) { * nullptr. If the data is to be deleted, this is the callers responsibility. **********************************************************************/ -inline void *CLIST_ITERATOR::extract() { - void *extracted_data; +inline void* CLIST_ITERATOR::extract() { + void* extracted_data; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::extract", ABORT, nullptr); - if (!current) //list empty or - //element extracted - NULL_CURRENT.error ("CLIST_ITERATOR::extract", - ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::extract", ABORT, nullptr); + if (!current) // list empty or + // element extracted + NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT, nullptr); +#endif if (list->singleton()) { // Special case where we do need to change the iterator. prev = next = list->last = nullptr; } else { - prev->next = next; //remove from list + prev->next = next; // remove from list if (current == list->last) { list->last = prev; @@ -595,7 +559,7 @@ inline void *CLIST_ITERATOR::extract() { // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. ex_current_was_cycle_pt = (current == cycle_pt); extracted_data = current->data; - delete(current); //destroy CONS cell + delete (current); // destroy CONS cell current = nullptr; return extracted_data; } @@ -607,13 +571,12 @@ inline void *CLIST_ITERATOR::extract() { * Return data just in case anyone wants it. **********************************************************************/ -inline void *CLIST_ITERATOR::move_to_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::move_to_first", ABORT, nullptr); - #endif +inline void* CLIST_ITERATOR::move_to_first() { +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::move_to_first", ABORT, nullptr); +#endif - current = list->First (); + current = list->First(); prev = list->last; next = current != nullptr ? current->next : nullptr; return current != nullptr ? current->data : nullptr; @@ -631,10 +594,9 @@ inline void *CLIST_ITERATOR::move_to_first() { **********************************************************************/ inline void CLIST_ITERATOR::mark_cycle_pt() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::mark_cycle_pt", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT, nullptr); +#endif if (current) cycle_pt = current; @@ -651,15 +613,15 @@ inline void CLIST_ITERATOR::mark_cycle_pt() { **********************************************************************/ inline bool CLIST_ITERATOR::at_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::at_first", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::at_first", ABORT, nullptr); +#endif - //we're at a deleted - return ((list->empty ()) || (current == list->First ()) || ((current == nullptr) && - (prev == list->last) && //NON-last pt between - !ex_current_was_last)); //first and last + // we're at a deleted + return ((list->empty()) || (current == list->First()) || + ((current == nullptr) && + (prev == list->last) && // NON-last pt between + !ex_current_was_last)); // first and last } /*********************************************************************** @@ -670,15 +632,15 @@ inline bool CLIST_ITERATOR::at_first() { **********************************************************************/ inline bool CLIST_ITERATOR::at_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::at_last", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::at_last", ABORT, nullptr); +#endif - //we're at a deleted - return ((list->empty ()) || (current == list->last) || ((current == nullptr) && - (prev == list->last) && //last point between - ex_current_was_last)); //first and last + // we're at a deleted + return ((list->empty()) || (current == list->last) || + ((current == nullptr) && + (prev == list->last) && // last point between + ex_current_was_last)); // first and last } /*********************************************************************** @@ -689,13 +651,11 @@ inline bool CLIST_ITERATOR::at_last() { **********************************************************************/ inline bool CLIST_ITERATOR::cycled_list() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::cycled_list", ABORT, nullptr); - #endif - - return ((list->empty ()) || ((current == cycle_pt) && started_cycling)); +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::cycled_list", ABORT, nullptr); +#endif + return ((list->empty()) || ((current == cycle_pt) && started_cycling)); } /*********************************************************************** @@ -706,12 +666,11 @@ inline bool CLIST_ITERATOR::cycled_list() { **********************************************************************/ inline int32_t CLIST_ITERATOR::length() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::length", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::length", ABORT, nullptr); +#endif - return list->length (); + return list->length(); } /*********************************************************************** @@ -721,16 +680,14 @@ inline int32_t CLIST_ITERATOR::length() { * **********************************************************************/ -inline void -CLIST_ITERATOR::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::sort", ABORT, nullptr); - #endif +inline void CLIST_ITERATOR::sort( // sort elements + int comparator( // comparison routine + const void*, const void*)) { +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::sort", ABORT, nullptr); +#endif - list->sort (comparator); + list->sort(comparator); move_to_first(); } @@ -745,26 +702,23 @@ const void *, const void *)) { **********************************************************************/ inline void CLIST_ITERATOR::add_to_end( // element to add - void *new_data) { - CLIST_LINK *new_element; + void* new_data) { + CLIST_LINK* new_element; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("CLIST_ITERATOR::add_to_end", ABORT, nullptr); +#ifndef NDEBUG + if (!list) NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT, nullptr); if (!new_data) - BAD_PARAMETER.error ("CLIST_ITERATOR::add_to_end", ABORT, - "new_data is nullptr"); - #endif + BAD_PARAMETER.error("CLIST_ITERATOR::add_to_end", ABORT, + "new_data is nullptr"); +#endif - if (this->at_last ()) { - this->add_after_stay_put (new_data); - } - else { - if (this->at_first ()) { - this->add_before_stay_put (new_data); + if (this->at_last()) { + this->add_after_stay_put(new_data); + } else { + if (this->at_first()) { + this->add_before_stay_put(new_data); list->last = prev; - } - else { //Iteratr is elsewhere + } else { // Iteratr is elsewhere new_element = new CLIST_LINK; new_element->data = new_data; @@ -775,14 +729,13 @@ inline void CLIST_ITERATOR::add_to_end( // element to add } } - /*********************************************************************** QUOTE_IT MACRO DEFINITION =========================== Replace with "". may be an arbitrary number of tokens ***********************************************************************/ -#define QUOTE_IT( parm ) #parm +#define QUOTE_IT(parm) #parm /*********************************************************************** CLISTIZE( CLASSNAME ) MACRO DEFINITION @@ -819,33 +772,33 @@ CLISTIZEH_C. #define CLISTIZEH_A(CLASSNAME) \ \ extern DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ - void *link); /*link to delete*/ \ + void* link); /*link to delete*/ \ \ - extern DLLSYM void \ - *CLASSNAME##_c1_copier( /*deep copy a link*/ \ - void *old_element); /*source link */ + extern DLLSYM void* \ + CLASSNAME##_c1_copier( /*deep copy a link*/ \ + void* old_element); /*source link */ #define CLISTIZEH_B(CLASSNAME) \ \ /*********************************************************************** \ - * CLASS - \ - *CLASSNAME##_CLIST \ - * \ - * List class for class \ - *CLASSNAME \ - * \ - **********************************************************************/ \ + * CLASS - \ + *CLASSNAME##_CLIST \ + * \ + * List class for class \ + *CLASSNAME \ + * \ + **********************************************************************/ \ \ class DLLSYM CLASSNAME##_CLIST : public CLIST { \ public: \ CLASSNAME##_CLIST() : CLIST() {} \ /* constructor */ \ \ - CLASSNAME##_CLIST( /* don't construct */ \ - const CLASSNAME##_CLIST &) /*by initial assign*/ \ + CLASSNAME##_CLIST( /* don't construct */ \ + const CLASSNAME##_CLIST&) /*by initial assign*/ \ { \ DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, \ - nullptr); \ + nullptr); \ } \ \ void deep_clear() /* delete elements */ \ @@ -854,48 +807,48 @@ CLISTIZEH_C. } \ \ void operator=(/* prevent assign */ \ - const CLASSNAME##_CLIST &) { \ - DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, nullptr); \ + const CLASSNAME##_CLIST&) { \ + DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, nullptr); \ } -#define CLISTIZEH_C(CLASSNAME) \ - } \ - ; \ - \ - /*********************************************************************** \ - * CLASS - CLASSNAME##_C_IT \ - * \ - * Iterator class for class CLASSNAME##_CLIST \ - * \ - * Note: We don't need to coerce pointers to member functions input \ - * parameters as these are automatically converted to the type of the base \ - * type. ("A ptr to a class may be converted to a pointer to a public base \ - * class of that class") \ - **********************************************************************/ \ - \ - class DLLSYM CLASSNAME##_C_IT : public CLIST_ITERATOR { \ - public: \ - CLASSNAME##_C_IT() : CLIST_ITERATOR() {} \ - \ - CLASSNAME##_C_IT(CLASSNAME##_CLIST *list) : CLIST_ITERATOR(list) {} \ - \ - CLASSNAME *data() { return (CLASSNAME *)CLIST_ITERATOR::data(); } \ - \ - CLASSNAME *data_relative(int8_t offset) { \ - return (CLASSNAME *)CLIST_ITERATOR::data_relative(offset); \ - } \ - \ - CLASSNAME *forward() { return (CLASSNAME *)CLIST_ITERATOR::forward(); } \ - \ - CLASSNAME *extract() { return (CLASSNAME *)CLIST_ITERATOR::extract(); } \ - \ - CLASSNAME *move_to_first() { \ - return (CLASSNAME *)CLIST_ITERATOR::move_to_first(); \ - } \ - \ - CLASSNAME *move_to_last() { \ - return (CLASSNAME *)CLIST_ITERATOR::move_to_last(); \ - } \ +#define CLISTIZEH_C(CLASSNAME) \ + } \ + ; \ + \ + /*********************************************************************** \ + * CLASS - CLASSNAME##_C_IT \ + * \ + * Iterator class for class CLASSNAME##_CLIST \ + * \ + * Note: We don't need to coerce pointers to member functions input \ + * parameters as these are automatically converted to the type of the base \ + * type. ("A ptr to a class may be converted to a pointer to a public base \ + * class of that class") \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_C_IT : public CLIST_ITERATOR { \ + public: \ + CLASSNAME##_C_IT() : CLIST_ITERATOR() {} \ + \ + CLASSNAME##_C_IT(CLASSNAME##_CLIST* list) : CLIST_ITERATOR(list) {} \ + \ + CLASSNAME* data() { return (CLASSNAME*)CLIST_ITERATOR::data(); } \ + \ + CLASSNAME* data_relative(int8_t offset) { \ + return (CLASSNAME*)CLIST_ITERATOR::data_relative(offset); \ + } \ + \ + CLASSNAME* forward() { return (CLASSNAME*)CLIST_ITERATOR::forward(); } \ + \ + CLASSNAME* extract() { return (CLASSNAME*)CLIST_ITERATOR::extract(); } \ + \ + CLASSNAME* move_to_first() { \ + return (CLASSNAME*)CLIST_ITERATOR::move_to_first(); \ + } \ + \ + CLASSNAME* move_to_last() { \ + return (CLASSNAME*)CLIST_ITERATOR::move_to_last(); \ + } \ }; #define CLISTIZEH(CLASSNAME) \ @@ -910,22 +863,22 @@ CLISTIZEH_C. CLISTIZE( CLASSNAME ) MACRO ***********************************************************************/ -#define CLISTIZE(CLASSNAME) \ - \ - /*********************************************************************** \ - * CLASSNAME##_c1_zapper \ - * \ - * A function which can delete a CLASSNAME element. This is passed to the \ - * generic deep_clear list member function so that when a list is cleared \ - *the \ - * elements on the list are properly destroyed from the base class, even \ - * though we don't use a virtual destructor function. \ - **********************************************************************/ \ - \ - DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ - void *link) /*link to delete*/ \ - { \ - delete (CLASSNAME *)link; \ +#define CLISTIZE(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASSNAME##_c1_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic deep_clear list member function so that when a list is cleared \ + *the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ + \ + DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ + void* link) /*link to delete*/ \ + { \ + delete (CLASSNAME*)link; \ } #endif diff --git a/src/ccutil/doubleptr.h b/src/ccutil/doubleptr.h index 3c59e21013..6da6cae961 100644 --- a/src/ccutil/doubleptr.h +++ b/src/ccutil/doubleptr.h @@ -78,9 +78,7 @@ class DoublePtr { } } // Returns the pointer to the other end of the double pointer. - DoublePtr* OtherEnd() const { - return other_end_; - } + DoublePtr* OtherEnd() const { return other_end_; } private: // Pointer to the other end of the link. It is always true that either diff --git a/src/ccutil/elst.cpp b/src/ccutil/elst.cpp index 3ce35f4b79..4b3db6a2ef 100644 --- a/src/ccutil/elst.cpp +++ b/src/ccutil/elst.cpp @@ -17,8 +17,8 @@ * **********************************************************************/ -#include #include "elst.h" +#include /*********************************************************************** * MEMBER FUNCTIONS OF CLASS: ELIST @@ -37,17 +37,16 @@ * the consequential memory overhead. **********************************************************************/ -void -ELIST::internal_clear ( //destroy all links -void (*zapper) (ELIST_LINK *)) { - //ptr to zapper functn - ELIST_LINK *ptr; - ELIST_LINK *next; - - if (!empty ()) { - ptr = last->next; //set to first - last->next = nullptr; //break circle - last = nullptr; //set list empty +void ELIST::internal_clear( // destroy all links + void (*zapper)(ELIST_LINK*)) { + // ptr to zapper functn + ELIST_LINK* ptr; + ELIST_LINK* next; + + if (!empty()) { + ptr = last->next; // set to first + last->next = nullptr; // break circle + last = nullptr; // set list empty while (ptr) { next = ptr->next; zapper(ptr); @@ -69,16 +68,15 @@ void (*zapper) (ELIST_LINK *)) { * end point is always the end_it position. **********************************************************************/ -void ELIST::assign_to_sublist( //to this list - ELIST_ITERATOR *start_it, //from list start - ELIST_ITERATOR *end_it) { //from list end +void ELIST::assign_to_sublist( // to this list + ELIST_ITERATOR* start_it, // from list start + ELIST_ITERATOR* end_it) { // from list end const ERRCODE LIST_NOT_EMPTY = - "Destination list must be empty before extracting a sublist"; + "Destination list must be empty before extracting a sublist"; - if (!empty ()) - LIST_NOT_EMPTY.error ("ELIST.assign_to_sublist", ABORT, nullptr); + if (!empty()) LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT, nullptr); - last = start_it->extract_sublist (end_it); + last = start_it->extract_sublist(end_it); } /*********************************************************************** @@ -91,8 +89,7 @@ int32_t ELIST::length() const { // count elements ELIST_ITERATOR it(const_cast(this)); int32_t count = 0; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - count++; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) count++; return count; } @@ -104,34 +101,33 @@ int32_t ELIST::length() const { // count elements * ( int (*)(const void *, const void *) **********************************************************************/ -void -ELIST::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { +void ELIST::sort( // sort elements + int comparator( // comparison routine + const void*, const void*)) { ELIST_ITERATOR it(this); int32_t count; - ELIST_LINK **base; //ptr array to sort - ELIST_LINK **current; + ELIST_LINK** base; // ptr array to sort + ELIST_LINK** current; int32_t i; /* Allocate an array of pointers, one per list element */ - count = length (); - base = (ELIST_LINK **) malloc (count * sizeof (ELIST_LINK *)); + count = length(); + base = (ELIST_LINK**)malloc(count * sizeof(ELIST_LINK*)); /* Extract all elements, putting the pointers in the array */ current = base; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - *current = it.extract (); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + *current = it.extract(); current++; } /* Sort the pointer array */ - qsort ((char *) base, count, sizeof (*base), comparator); + qsort((char*)base, count, sizeof(*base), comparator); /* Rebuild the list from the sorted pointers */ current = base; for (i = 0; i < count; i++) { - it.add_to_end (*current); + it.add_to_end(*current); current++; } free(base); @@ -147,9 +143,8 @@ const void *, const void *)) { // list) - new_link is not added to the list and the function returns the // pointer to the identical entry that already exists in the list // (otherwise the function returns new_link). -ELIST_LINK *ELIST::add_sorted_and_find( - int comparator(const void*, const void*), - bool unique, ELIST_LINK* new_link) { +ELIST_LINK* ELIST::add_sorted_and_find(int comparator(const void*, const void*), + bool unique, ELIST_LINK* new_link) { // Check for adding at the end. if (last == nullptr || comparator(&last, &new_link) < 0) { if (last == nullptr) { @@ -191,36 +186,32 @@ ELIST_LINK *ELIST::add_sorted_and_find( * REMEMBER: ALL LISTS ARE CIRCULAR. **********************************************************************/ -ELIST_LINK *ELIST_ITERATOR::forward() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::forward", ABORT, nullptr); - #endif - if (list->empty ()) - return nullptr; +ELIST_LINK* ELIST_ITERATOR::forward() { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::forward", ABORT, nullptr); +#endif + if (list->empty()) return nullptr; - if (current) { //not removed so - //set previous + if (current) { // not removed so + // set previous prev = current; started_cycling = TRUE; // In case next is deleted by another iterator, get next from current. current = current->next; } else { - if (ex_current_was_cycle_pt) - cycle_pt = next; + if (ex_current_was_cycle_pt) cycle_pt = next; current = next; } #ifndef NDEBUG - if (!current) - NULL_DATA.error ("ELIST_ITERATOR::forward", ABORT, nullptr); + if (!current) NULL_DATA.error("ELIST_ITERATOR::forward", ABORT, nullptr); #endif next = current->next; - #ifndef NDEBUG +#ifndef NDEBUG if (!next) - NULL_NEXT.error ("ELIST_ITERATOR::forward", ABORT, - "This is: %p Current is: %p", this, current); - #endif + NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT, + "This is: %p Current is: %p", this, current); +#endif return current; } @@ -232,29 +223,27 @@ ELIST_LINK *ELIST_ITERATOR::forward() { * (This function can't be INLINEd because it contains a loop) **********************************************************************/ -ELIST_LINK *ELIST_ITERATOR::data_relative( //get data + or - ... - int8_t offset) { //offset from current - ELIST_LINK *ptr; +ELIST_LINK* ELIST_ITERATOR::data_relative( // get data + or - ... + int8_t offset) { // offset from current + ELIST_LINK* ptr; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::data_relative", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("ELIST_ITERATOR::data_relative", ABORT, nullptr); +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT, nullptr); + if (list->empty()) + EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT, nullptr); if (offset < -1) - BAD_PARAMETER.error ("ELIST_ITERATOR::data_relative", ABORT, - "offset < -l"); - #endif + BAD_PARAMETER.error("ELIST_ITERATOR::data_relative", ABORT, "offset < -l"); +#endif if (offset == -1) ptr = prev; else - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next); + for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) + ; - #ifndef NDEBUG - if (!ptr) - NULL_DATA.error ("ELIST_ITERATOR::data_relative", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!ptr) NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT, nullptr); +#endif return ptr; } @@ -267,14 +256,12 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( //get data + or - ... * (This function can't be INLINEd because it contains a loop) **********************************************************************/ -ELIST_LINK *ELIST_ITERATOR::move_to_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::move_to_last", ABORT, nullptr); - #endif +ELIST_LINK* ELIST_ITERATOR::move_to_last() { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT, nullptr); +#endif - while (current != list->last) - forward(); + while (current != list->last) forward(); return current; } @@ -289,58 +276,53 @@ ELIST_LINK *ELIST_ITERATOR::move_to_last() { * (This function hasn't been in-lined because its a bit big!) **********************************************************************/ -void ELIST_ITERATOR::exchange( //positions of 2 links - ELIST_ITERATOR *other_it) { //other iterator +void ELIST_ITERATOR::exchange( // positions of 2 links + ELIST_ITERATOR* other_it) { // other iterator const ERRCODE DONT_EXCHANGE_DELETED = - "Can't exchange deleted elements of lists"; + "Can't exchange deleted elements of lists"; - ELIST_LINK *old_current; + ELIST_LINK* old_current; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::exchange", ABORT, nullptr); +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::exchange", ABORT, nullptr); if (!other_it) - BAD_PARAMETER.error ("ELIST_ITERATOR::exchange", ABORT, "other_it nullptr"); + BAD_PARAMETER.error("ELIST_ITERATOR::exchange", ABORT, "other_it nullptr"); if (!(other_it->list)) - NO_LIST.error ("ELIST_ITERATOR::exchange", ABORT, "other_it"); - #endif + NO_LIST.error("ELIST_ITERATOR::exchange", ABORT, "other_it"); +#endif /* Do nothing if either list is empty or if both iterators reference the same link */ - if ((list->empty ()) || - (other_it->list->empty ()) || (current == other_it->current)) + if ((list->empty()) || (other_it->list->empty()) || + (current == other_it->current)) return; /* Error if either current element is deleted */ if (!current || !other_it->current) - DONT_EXCHANGE_DELETED.error ("ELIST_ITERATOR.exchange", ABORT, nullptr); + DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT, nullptr); /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements (other before this); non-doubleton adjacent elements (this before other); non-adjacent elements. */ - //adjacent links - if ((next == other_it->current) || - (other_it->next == current)) { - //doubleton list - if ((next == other_it->current) && - (other_it->next == current)) { + // adjacent links + if ((next == other_it->current) || (other_it->next == current)) { + // doubleton list + if ((next == other_it->current) && (other_it->next == current)) { prev = next = current; other_it->prev = other_it->next = other_it->current; - } - else { //non-doubleton with - //adjacent links - //other before this + } else { // non-doubleton with + // adjacent links + // other before this if (other_it->next == current) { other_it->prev->next = current; other_it->current->next = next; current->next = other_it->current; other_it->next = other_it->current; prev = current; - } - else { //this before other + } else { // this before other prev->next = other_it->current; current->next = other_it->next; other_it->current->next = current; @@ -348,8 +330,7 @@ void ELIST_ITERATOR::exchange( //positions of 2 link other_it->prev = other_it->current; } } - } - else { //no overlap + } else { // no overlap prev->next = other_it->current; current->next = other_it->next; other_it->prev->next = current; @@ -359,15 +340,11 @@ void ELIST_ITERATOR::exchange( //positions of 2 link /* update end of list pointer when necessary (remember that the 2 iterators may iterate over different lists!) */ - if (list->last == current) - list->last = other_it->current; - if (other_it->list->last == other_it->current) - other_it->list->last = current; + if (list->last == current) list->last = other_it->current; + if (other_it->list->last == other_it->current) other_it->list->last = current; - if (current == cycle_pt) - cycle_pt = other_it->cycle_pt; - if (other_it->current == other_it->cycle_pt) - other_it->cycle_pt = cycle_pt; + if (current == cycle_pt) cycle_pt = other_it->cycle_pt; + if (other_it->current == other_it->cycle_pt) other_it->cycle_pt = cycle_pt; /* The actual exchange - in all cases*/ @@ -386,70 +363,66 @@ void ELIST_ITERATOR::exchange( //positions of 2 link * (Can't inline this function because it contains a loop) **********************************************************************/ -ELIST_LINK *ELIST_ITERATOR::extract_sublist( //from this current - ELIST_ITERATOR *other_it) { //to other current - #ifndef NDEBUG +ELIST_LINK* ELIST_ITERATOR::extract_sublist( // from this current + ELIST_ITERATOR* other_it) { // to other current +#ifndef NDEBUG const ERRCODE BAD_EXTRACTION_PTS = - "Can't extract sublist from points on different lists"; + "Can't extract sublist from points on different lists"; const ERRCODE DONT_EXTRACT_DELETED = - "Can't extract a sublist marked by deleted points"; - #endif + "Can't extract a sublist marked by deleted points"; +#endif const ERRCODE BAD_SUBLIST = "Can't find sublist end point in original list"; ELIST_ITERATOR temp_it = *this; - ELIST_LINK *end_of_new_list; + ELIST_LINK* end_of_new_list; - #ifndef NDEBUG +#ifndef NDEBUG if (!other_it) - BAD_PARAMETER.error ("ELIST_ITERATOR::extract_sublist", ABORT, - "other_it nullptr"); - if (!list) - NO_LIST.error ("ELIST_ITERATOR::extract_sublist", ABORT, nullptr); + BAD_PARAMETER.error("ELIST_ITERATOR::extract_sublist", ABORT, + "other_it nullptr"); + if (!list) NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT, nullptr); if (list != other_it->list) - BAD_EXTRACTION_PTS.error ("ELIST_ITERATOR.extract_sublist", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("ELIST_ITERATOR::extract_sublist", ABORT, nullptr); + BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr); + if (list->empty()) + EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT, nullptr); if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error ("ELIST_ITERATOR.extract_sublist", ABORT, - nullptr); - #endif + DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT, + nullptr); +#endif ex_current_was_last = other_it->ex_current_was_last = FALSE; ex_current_was_cycle_pt = FALSE; other_it->ex_current_was_cycle_pt = FALSE; - temp_it.mark_cycle_pt (); - do { //walk sublist - if (temp_it.cycled_list()) // can't find end pt - BAD_SUBLIST.error ("ELIST_ITERATOR.extract_sublist", ABORT, nullptr); + temp_it.mark_cycle_pt(); + do { // walk sublist + if (temp_it.cycled_list()) // can't find end pt + BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr); - if (temp_it.at_last ()) { + if (temp_it.at_last()) { list->last = prev; ex_current_was_last = other_it->ex_current_was_last = TRUE; } - if (temp_it.current == cycle_pt) - ex_current_was_cycle_pt = TRUE; + if (temp_it.current == cycle_pt) ex_current_was_cycle_pt = TRUE; if (temp_it.current == other_it->cycle_pt) other_it->ex_current_was_cycle_pt = TRUE; - temp_it.forward (); - } - while (temp_it.prev != other_it->current); + temp_it.forward(); + } while (temp_it.prev != other_it->current); - //circularise sublist + // circularise sublist other_it->current->next = current; end_of_new_list = other_it->current; - //sublist = whole list + // sublist = whole list if (prev == other_it->current) { list->last = nullptr; prev = current = next = nullptr; other_it->prev = other_it->current = other_it->next = nullptr; - } - else { + } else { prev->next = other_it->next; current = other_it->current = nullptr; next = other_it->next; diff --git a/src/ccutil/elst.h b/src/ccutil/elst.h index 05846a2651..40a2507af7 100644 --- a/src/ccutil/elst.h +++ b/src/ccutil/elst.h @@ -22,8 +22,8 @@ #include #include "host.h" -#include "serialis.h" #include "lsterr.h" +#include "serialis.h" class ELIST_ITERATOR; @@ -74,34 +74,32 @@ lists. /********************************************************************** * CLASS - ELIST_LINK * - * Generic link class for singly linked lists with embedded links + * Generic link class for singly linked lists with + *embedded links * * Note: No destructor - elements are assumed to be destroyed EITHER after * they have been extracted from a list OR by the ELIST destructor which * walks the list. **********************************************************************/ -class DLLSYM ELIST_LINK -{ +class DLLSYM ELIST_LINK { friend class ELIST_ITERATOR; friend class ELIST; - ELIST_LINK *next; + ELIST_LINK* next; - public: - ELIST_LINK() { - next = nullptr; - } - //constructor + public: + ELIST_LINK() { next = nullptr; } + // constructor - ELIST_LINK(const ELIST_LINK &) { // don't copy link. - next = nullptr; - } + ELIST_LINK(const ELIST_LINK&) { // don't copy link. + next = nullptr; + } - void operator=( // don't copy links - const ELIST_LINK &) { - next = nullptr; - } + void operator=( // don't copy links + const ELIST_LINK&) { + next = nullptr; + } }; /********************************************************************** @@ -110,181 +108,173 @@ class DLLSYM ELIST_LINK * Generic list class for singly linked lists with embedded links **********************************************************************/ -class DLLSYM ELIST -{ +class DLLSYM ELIST { friend class ELIST_ITERATOR; - ELIST_LINK *last; //End of list + ELIST_LINK* last; // End of list //(Points to head) - ELIST_LINK *First() { // return first + ELIST_LINK* First() { // return first return last ? last->next : nullptr; } - public: - ELIST() { //constructor - last = nullptr; - } - - void internal_clear ( //destroy all links - //ptr to zapper functn - void (*zapper) (ELIST_LINK *)); + public: + ELIST() { // constructor + last = nullptr; + } - bool empty() const { //is list empty? - return !last; - } + void internal_clear( // destroy all links + // ptr to zapper functn + void (*zapper)(ELIST_LINK*)); - bool singleton() const { - return last ? (last == last->next) : false; - } + bool empty() const { // is list empty? + return !last; + } - void shallow_copy( //dangerous!! - ELIST *from_list) { //beware destructors!! - last = from_list->last; - } + bool singleton() const { return last ? (last == last->next) : false; } - //ptr to copier functn - void internal_deep_copy (ELIST_LINK * (*copier) (ELIST_LINK *), - const ELIST * list); //list being copied - - void assign_to_sublist( //to this list - ELIST_ITERATOR *start_it, //from list start - ELIST_ITERATOR *end_it); //from list end - - int32_t length() const; // # elements in list - - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); - - // Assuming list has been sorted already, insert new_link to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - // If unique is set to true and comparator() returns 0 (an entry with the - // same information as the one contained in new_link is already in the - // list) - new_link is not added to the list and the function returns the - // pointer to the identical entry that already exists in the list - // (otherwise the function returns new_link). - ELIST_LINK *add_sorted_and_find(int comparator(const void*, const void*), - bool unique, ELIST_LINK* new_link); - - // Same as above, but returns true if the new entry was inserted, false - // if the identical entry already existed in the list. - bool add_sorted(int comparator(const void*, const void*), - bool unique, ELIST_LINK* new_link) { - return (add_sorted_and_find(comparator, unique, new_link) == new_link); - } + void shallow_copy( // dangerous!! + ELIST* from_list) { // beware destructors!! + last = from_list->last; + } + // ptr to copier functn + void internal_deep_copy(ELIST_LINK* (*copier)(ELIST_LINK*), + const ELIST* list); // list being copied + + void assign_to_sublist( // to this list + ELIST_ITERATOR* start_it, // from list start + ELIST_ITERATOR* end_it); // from list end + + int32_t length() const; // # elements in list + + void sort( // sort elements + int comparator( // comparison routine + const void*, const void*)); + + // Assuming list has been sorted already, insert new_link to + // keep the list sorted according to the same comparison function. + // Comparison function is the same as used by sort, i.e. uses double + // indirection. Time is O(1) to add to beginning or end. + // Time is linear to add pre-sorted items to an empty list. + // If unique is set to true and comparator() returns 0 (an entry with the + // same information as the one contained in new_link is already in the + // list) - new_link is not added to the list and the function returns the + // pointer to the identical entry that already exists in the list + // (otherwise the function returns new_link). + ELIST_LINK* add_sorted_and_find(int comparator(const void*, const void*), + bool unique, ELIST_LINK* new_link); + + // Same as above, but returns true if the new entry was inserted, false + // if the identical entry already existed in the list. + bool add_sorted(int comparator(const void*, const void*), bool unique, + ELIST_LINK* new_link) { + return (add_sorted_and_find(comparator, unique, new_link) == new_link); + } }; /*********************************************************************** * CLASS - ELIST_ITERATOR * - * Generic iterator class for singly linked lists with embedded links + * Generic iterator class for singly linked lists with + *embedded links **********************************************************************/ -class DLLSYM ELIST_ITERATOR -{ - friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *); - - ELIST *list; //List being iterated - ELIST_LINK *prev; //prev element - ELIST_LINK *current; //current element - ELIST_LINK *next; //next element - bool ex_current_was_last; //current extracted - //was end of list - bool ex_current_was_cycle_pt; //current extracted - //was cycle point - ELIST_LINK *cycle_pt; //point we are cycling - //the list to. - bool started_cycling; //Have we moved off - //the start? - - ELIST_LINK *extract_sublist( //from this current... - ELIST_ITERATOR *other_it); //to other current - - public: - ELIST_ITERATOR() { //constructor - list = nullptr; - } //unassigned list - - explicit ELIST_ITERATOR(ELIST *list_to_iterate); - - void set_to_list( //change list - ELIST *list_to_iterate); - - void add_after_then_move( //add after current & - ELIST_LINK *new_link); //move to new - - void add_after_stay_put( //add after current & - ELIST_LINK *new_link); //stay at current - - void add_before_then_move( //add before current & - ELIST_LINK *new_link); //move to new - - void add_before_stay_put( //add before current & - ELIST_LINK *new_link); //stay at current - - void add_list_after( //add a list & - ELIST *list_to_add); //stay at current - - void add_list_before( //add a list & - ELIST *list_to_add); //move to it 1st item - - ELIST_LINK *data() { //get current data - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::data", ABORT, nullptr); - if (!current) - NULL_DATA.error ("ELIST_ITERATOR::data", ABORT, nullptr); - #endif - return current; - } +class DLLSYM ELIST_ITERATOR { + friend void ELIST::assign_to_sublist(ELIST_ITERATOR*, ELIST_ITERATOR*); - ELIST_LINK *data_relative( //get data + or - ... - int8_t offset); //offset from current + ELIST* list; // List being iterated + ELIST_LINK* prev; // prev element + ELIST_LINK* current; // current element + ELIST_LINK* next; // next element + bool ex_current_was_last; // current extracted + // was end of list + bool ex_current_was_cycle_pt; // current extracted + // was cycle point + ELIST_LINK* cycle_pt; // point we are cycling + // the list to. + bool started_cycling; // Have we moved off + // the start? - ELIST_LINK *forward(); //move to next element + ELIST_LINK* extract_sublist( // from this current... + ELIST_ITERATOR* other_it); // to other current - ELIST_LINK *extract(); //remove from list + public: + ELIST_ITERATOR() { // constructor + list = nullptr; + } // unassigned list - ELIST_LINK *move_to_first(); //go to start of list + explicit ELIST_ITERATOR(ELIST* list_to_iterate); - ELIST_LINK *move_to_last(); //go to end of list + void set_to_list( // change list + ELIST* list_to_iterate); - void mark_cycle_pt(); //remember current + void add_after_then_move( // add after current & + ELIST_LINK* new_link); // move to new - bool empty() { //is list empty? - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::empty", ABORT, nullptr); - #endif - return list->empty (); - } + void add_after_stay_put( // add after current & + ELIST_LINK* new_link); // stay at current - bool current_extracted() { //current extracted? - return !current; - } + void add_before_then_move( // add before current & + ELIST_LINK* new_link); // move to new + + void add_before_stay_put( // add before current & + ELIST_LINK* new_link); // stay at current + + void add_list_after( // add a list & + ELIST* list_to_add); // stay at current + + void add_list_before( // add a list & + ELIST* list_to_add); // move to it 1st item + + ELIST_LINK* data() { // get current data +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::data", ABORT, nullptr); + if (!current) NULL_DATA.error("ELIST_ITERATOR::data", ABORT, nullptr); +#endif + return current; + } + + ELIST_LINK* data_relative( // get data + or - ... + int8_t offset); // offset from current + + ELIST_LINK* forward(); // move to next element + + ELIST_LINK* extract(); // remove from list + + ELIST_LINK* move_to_first(); // go to start of list + + ELIST_LINK* move_to_last(); // go to end of list + + void mark_cycle_pt(); // remember current + + bool empty() { // is list empty? +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::empty", ABORT, nullptr); +#endif + return list->empty(); + } - bool at_first(); //Current is first? + bool current_extracted() { // current extracted? + return !current; + } - bool at_last(); //Current is last? + bool at_first(); // Current is first? - bool cycled_list(); //Completed a cycle? + bool at_last(); // Current is last? - void add_to_end( // add at end & - ELIST_LINK *new_link); // don't move + bool cycled_list(); // Completed a cycle? - void exchange( //positions of 2 links - ELIST_ITERATOR *other_it); //other iterator + void add_to_end( // add at end & + ELIST_LINK* new_link); // don't move - int32_t length(); //# elements in list + void exchange( // positions of 2 links + ELIST_ITERATOR* other_it); // other iterator - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); + int32_t length(); //# elements in list + void sort( // sort elements + int comparator( // comparison routine + const void*, const void*)); }; /*********************************************************************** @@ -294,36 +284,34 @@ class DLLSYM ELIST_ITERATOR * over. **********************************************************************/ -inline void ELIST_ITERATOR::set_to_list( //change list - ELIST *list_to_iterate) { - #ifndef NDEBUG +inline void ELIST_ITERATOR::set_to_list( // change list + ELIST* list_to_iterate) { +#ifndef NDEBUG if (!list_to_iterate) - BAD_PARAMETER.error ("ELIST_ITERATOR::set_to_list", ABORT, - "list_to_iterate is nullptr"); - #endif + BAD_PARAMETER.error("ELIST_ITERATOR::set_to_list", ABORT, + "list_to_iterate is nullptr"); +#endif list = list_to_iterate; prev = list->last; - current = list->First (); + current = list->First(); next = current ? current->next : nullptr; - cycle_pt = nullptr; //await explicit set + cycle_pt = nullptr; // await explicit set started_cycling = FALSE; ex_current_was_last = FALSE; ex_current_was_cycle_pt = FALSE; } - /*********************************************************************** * ELIST_ITERATOR::ELIST_ITERATOR * * CONSTRUCTOR - set iterator to specified list; **********************************************************************/ -inline ELIST_ITERATOR::ELIST_ITERATOR(ELIST *list_to_iterate) { +inline ELIST_ITERATOR::ELIST_ITERATOR(ELIST* list_to_iterate) { set_to_list(list_to_iterate); } - /*********************************************************************** * ELIST_ITERATOR::add_after_then_move * @@ -332,43 +320,37 @@ inline ELIST_ITERATOR::ELIST_ITERATOR(ELIST *list_to_iterate) { **********************************************************************/ inline void ELIST_ITERATOR::add_after_then_move( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG + ELIST_LINK* new_element) { +#ifndef NDEBUG if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr); + NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_after_then_move", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST_ITERATOR::add_after_then_move", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr); +#endif - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; list->last = new_element; prev = next = new_element; - } - else { + } else { new_element->next = next; - if (current) { //not extracted + if (current) { // not extracted current->next = new_element; prev = current; - if (current == list->last) - list->last = new_element; - } - else { //current extracted + if (current == list->last) list->last = new_element; + } else { // current extracted prev->next = new_element; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; + if (ex_current_was_last) list->last = new_element; + if (ex_current_was_cycle_pt) cycle_pt = new_element; } } current = new_element; } - /*********************************************************************** * ELIST_ITERATOR::add_after_stay_put * @@ -377,35 +359,31 @@ inline void ELIST_ITERATOR::add_after_then_move( // element to add **********************************************************************/ inline void ELIST_ITERATOR::add_after_stay_put( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG + ELIST_LINK* new_element) { +#ifndef NDEBUG if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr); + NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_after_stay_put", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST_ITERATOR::add_after_stay_put", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr); +#endif - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; list->last = new_element; prev = next = new_element; ex_current_was_last = FALSE; current = nullptr; - } - else { + } else { new_element->next = next; - if (current) { //not extracted + if (current) { // not extracted current->next = new_element; - if (prev == current) - prev = new_element; - if (current == list->last) - list->last = new_element; - } - else { //current extracted + if (prev == current) prev = new_element; + if (current == list->last) list->last = new_element; + } else { // current extracted prev->next = new_element; if (ex_current_was_last) { list->last = new_element; @@ -416,7 +394,6 @@ inline void ELIST_ITERATOR::add_after_stay_put( // element to add } } - /*********************************************************************** * ELIST_ITERATOR::add_before_then_move * @@ -425,34 +402,30 @@ inline void ELIST_ITERATOR::add_after_stay_put( // element to add **********************************************************************/ inline void ELIST_ITERATOR::add_before_then_move( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG + ELIST_LINK* new_element) { +#ifndef NDEBUG if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr); + NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_before_then_move", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST_ITERATOR::add_before_then_move", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr); +#endif - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; list->last = new_element; prev = next = new_element; - } - else { + } else { prev->next = new_element; - if (current) { //not extracted + if (current) { // not extracted new_element->next = current; next = current; - } - else { //current extracted + } else { // current extracted new_element->next = next; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; + if (ex_current_was_last) list->last = new_element; + if (ex_current_was_cycle_pt) cycle_pt = new_element; } } current = new_element; @@ -466,35 +439,31 @@ inline void ELIST_ITERATOR::add_before_then_move( // element to add **********************************************************************/ inline void ELIST_ITERATOR::add_before_stay_put( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG + ELIST_LINK* new_element) { +#ifndef NDEBUG if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr); + NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_before_stay_put", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST_ITERATOR::add_before_stay_put", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr); +#endif - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; list->last = new_element; prev = next = new_element; ex_current_was_last = TRUE; current = nullptr; - } - else { + } else { prev->next = new_element; - if (current) { //not extracted + if (current) { // not extracted new_element->next = current; - if (next == current) - next = new_element; - } - else { //current extracted + if (next == current) next = new_element; + } else { // current extracted new_element->next = next; - if (ex_current_was_last) - list->last = new_element; + if (ex_current_was_last) list->last = new_element; } prev = new_element; } @@ -508,33 +477,29 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add * iterator. **********************************************************************/ -inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_list_after", ABORT, nullptr); +inline void ELIST_ITERATOR::add_list_after(ELIST* list_to_add) { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT, nullptr); if (!list_to_add) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_list_after", ABORT, - "list_to_add is nullptr"); - #endif + BAD_PARAMETER.error("ELIST_ITERATOR::add_list_after", ABORT, + "list_to_add is nullptr"); +#endif - if (!list_to_add->empty ()) { - if (list->empty ()) { + if (!list_to_add->empty()) { + if (list->empty()) { list->last = list_to_add->last; prev = list->last; - next = list->First (); + next = list->First(); ex_current_was_last = TRUE; current = nullptr; - } - else { - if (current) { //not extracted - current->next = list_to_add->First (); - if (current == list->last) - list->last = list_to_add->last; + } else { + if (current) { // not extracted + current->next = list_to_add->First(); + if (current == list->last) list->last = list_to_add->last; list_to_add->last->next = next; next = current->next; - } - else { //current extracted - prev->next = list_to_add->First (); + } else { // current extracted + prev->next = list_to_add->First(); if (ex_current_was_last) { list->last = list_to_add->last; ex_current_was_last = FALSE; @@ -547,7 +512,6 @@ inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) { } } - /*********************************************************************** * ELIST_ITERATOR::add_list_before * @@ -556,34 +520,29 @@ inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) { * iterator. **********************************************************************/ -inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_list_before", ABORT, nullptr); +inline void ELIST_ITERATOR::add_list_before(ELIST* list_to_add) { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT, nullptr); if (!list_to_add) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_list_before", ABORT, - "list_to_add is nullptr"); - #endif + BAD_PARAMETER.error("ELIST_ITERATOR::add_list_before", ABORT, + "list_to_add is nullptr"); +#endif - if (!list_to_add->empty ()) { - if (list->empty ()) { + if (!list_to_add->empty()) { + if (list->empty()) { list->last = list_to_add->last; prev = list->last; - current = list->First (); + current = list->First(); next = current->next; ex_current_was_last = FALSE; - } - else { - prev->next = list_to_add->First (); - if (current) { //not extracted + } else { + prev->next = list_to_add->First(); + if (current) { // not extracted list_to_add->last->next = current; - } - else { //current extracted + } else { // current extracted list_to_add->last->next = next; - if (ex_current_was_last) - list->last = list_to_add->last; - if (ex_current_was_cycle_pt) - cycle_pt = prev->next; + if (ex_current_was_last) list->last = list_to_add->last; + if (ex_current_was_cycle_pt) cycle_pt = prev->next; } current = prev->next; next = current->next; @@ -592,7 +551,6 @@ inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { } } - /*********************************************************************** * ELIST_ITERATOR::extract * @@ -602,23 +560,21 @@ inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { * is to be deleted, this is the callers responsibility. **********************************************************************/ -inline ELIST_LINK *ELIST_ITERATOR::extract() { - ELIST_LINK *extracted_link; +inline ELIST_LINK* ELIST_ITERATOR::extract() { + ELIST_LINK* extracted_link; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::extract", ABORT, nullptr); - if (!current) //list empty or - //element extracted - NULL_CURRENT.error ("ELIST_ITERATOR::extract", - ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::extract", ABORT, nullptr); + if (!current) // list empty or + // element extracted + NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT, nullptr); +#endif if (list->singleton()) { // Special case where we do need to change the iterator. prev = next = list->last = nullptr; } else { - prev->next = next; //remove from list + prev->next = next; // remove from list if (current == list->last) { list->last = prev; @@ -630,12 +586,11 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() { // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. ex_current_was_cycle_pt = (current == cycle_pt) ? TRUE : FALSE; extracted_link = current; - extracted_link->next = nullptr; //for safety + extracted_link->next = nullptr; // for safety current = nullptr; return extracted_link; } - /*********************************************************************** * ELIST_ITERATOR::move_to_first() * @@ -643,19 +598,17 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() { * Return data just in case anyone wants it. **********************************************************************/ -inline ELIST_LINK *ELIST_ITERATOR::move_to_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::move_to_first", ABORT, nullptr); - #endif +inline ELIST_LINK* ELIST_ITERATOR::move_to_first() { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT, nullptr); +#endif - current = list->First (); + current = list->First(); prev = list->last; next = current ? current->next : nullptr; return current; } - /*********************************************************************** * ELIST_ITERATOR::mark_cycle_pt() * @@ -668,10 +621,9 @@ inline ELIST_LINK *ELIST_ITERATOR::move_to_first() { **********************************************************************/ inline void ELIST_ITERATOR::mark_cycle_pt() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::mark_cycle_pt", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT, nullptr); +#endif if (current) cycle_pt = current; @@ -680,7 +632,6 @@ inline void ELIST_ITERATOR::mark_cycle_pt() { started_cycling = FALSE; } - /*********************************************************************** * ELIST_ITERATOR::at_first() * @@ -689,18 +640,17 @@ inline void ELIST_ITERATOR::mark_cycle_pt() { **********************************************************************/ inline bool ELIST_ITERATOR::at_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::at_first", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::at_first", ABORT, nullptr); +#endif - //we're at a deleted - return ((list->empty ()) || (current == list->First ()) || ((current == nullptr) && - (prev == list->last) && //NON-last pt between - !ex_current_was_last)); //first and last + // we're at a deleted + return ((list->empty()) || (current == list->First()) || + ((current == nullptr) && + (prev == list->last) && // NON-last pt between + !ex_current_was_last)); // first and last } - /*********************************************************************** * ELIST_ITERATOR::at_last() * @@ -709,18 +659,17 @@ inline bool ELIST_ITERATOR::at_first() { **********************************************************************/ inline bool ELIST_ITERATOR::at_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::at_last", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::at_last", ABORT, nullptr); +#endif - //we're at a deleted - return ((list->empty ()) || (current == list->last) || ((current == nullptr) && - (prev == list->last) && //last point between - ex_current_was_last)); //first and last + // we're at a deleted + return ((list->empty()) || (current == list->last) || + ((current == nullptr) && + (prev == list->last) && // last point between + ex_current_was_last)); // first and last } - /*********************************************************************** * ELIST_ITERATOR::cycled_list() * @@ -729,16 +678,13 @@ inline bool ELIST_ITERATOR::at_last() { **********************************************************************/ inline bool ELIST_ITERATOR::cycled_list() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::cycled_list", ABORT, nullptr); - #endif - - return ((list->empty ()) || ((current == cycle_pt) && started_cycling)); +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT, nullptr); +#endif + return ((list->empty()) || ((current == cycle_pt) && started_cycling)); } - /*********************************************************************** * ELIST_ITERATOR::length() * @@ -747,15 +693,13 @@ inline bool ELIST_ITERATOR::cycled_list() { **********************************************************************/ inline int32_t ELIST_ITERATOR::length() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::length", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::length", ABORT, nullptr); +#endif - return list->length (); + return list->length(); } - /*********************************************************************** * ELIST_ITERATOR::sort() * @@ -763,20 +707,17 @@ inline int32_t ELIST_ITERATOR::length() { * **********************************************************************/ -inline void -ELIST_ITERATOR::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::sort", ABORT, nullptr); - #endif +inline void ELIST_ITERATOR::sort( // sort elements + int comparator( // comparison routine + const void*, const void*)) { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::sort", ABORT, nullptr); +#endif - list->sort (comparator); + list->sort(comparator); move_to_first(); } - /*********************************************************************** * ELIST_ITERATOR::add_to_end * @@ -788,26 +729,23 @@ const void *, const void *)) { **********************************************************************/ inline void ELIST_ITERATOR::add_to_end( // element to add - ELIST_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST_ITERATOR::add_to_end", ABORT, nullptr); + ELIST_LINK* new_element) { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST_ITERATOR::add_to_end", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST_ITERATOR::add_to_end", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST_ITERATOR::add_to_end", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT, nullptr); +#endif - if (this->at_last ()) { - this->add_after_stay_put (new_element); - } - else { - if (this->at_first ()) { - this->add_before_stay_put (new_element); + if (this->at_last()) { + this->add_after_stay_put(new_element); + } else { + if (this->at_first()) { + this->add_before_stay_put(new_element); list->last = new_element; - } - else { //Iteratr is elsewhere + } else { // Iteratr is elsewhere new_element->next = list->last->next; list->last->next = new_element; list->last = new_element; @@ -815,7 +753,6 @@ inline void ELIST_ITERATOR::add_to_end( // element to add } } - /*********************************************************************** ******************** MACROS ************************************** ***********************************************************************/ @@ -826,7 +763,7 @@ inline void ELIST_ITERATOR::add_to_end( // element to add Replace with "". may be an arbitrary number of tokens ***********************************************************************/ -#define QUOTE_IT( parm ) #parm +#define QUOTE_IT(parm) #parm /*********************************************************************** ELISTIZE( CLASSNAME ) MACRO @@ -858,101 +795,100 @@ ELISTIZEH is a concatenation of 3 fragments ELISTIZEH_A, ELISTIZEH_B and ELISTIZEH_C. ***********************************************************************/ -#define ELISTIZEH_A(CLASSNAME) \ - \ -extern DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link); - -#define ELISTIZEH_B(CLASSNAME) \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_LIST \ -* \ -* List class for class CLASSNAME \ -* \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_LIST : public ELIST { \ - public: \ - CLASSNAME##_LIST():ELIST() {} \ - \ - void clear() { /* delete elements */\ - ELIST::internal_clear(&CLASSNAME##_zapper); \ - } \ - \ - ~CLASSNAME##_LIST() { \ - clear(); \ - } \ - \ - /* Become a deep copy of src_list*/ \ - void deep_copy(const CLASSNAME##_LIST* src_list, \ - CLASSNAME* (*copier)(const CLASSNAME*)); \ - \ -private: \ - /* Prevent assign and copy construction. */ \ - CLASSNAME##_LIST(const CLASSNAME##_LIST&) { \ - DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, nullptr);\ - } \ - void operator=(const CLASSNAME##_LIST&) { \ - DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, nullptr ); \ - } \ - -#define ELISTIZEH_C( CLASSNAME ) \ -}; \ - \ - \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_IT \ -* \ -* Iterator class for class CLASSNAME##_LIST \ -* \ -* Note: We don't need to coerce pointers to member functions input \ -* parameters as these are automatically converted to the type of the base \ -* type. ("A ptr to a class may be converted to a pointer to a public base \ -* class of that class") \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_IT : public ELIST_ITERATOR { \ - public: \ - CLASSNAME##_IT():ELIST_ITERATOR(){} \ - \ - /* TODO(rays) This constructor should be explicit, but that means changing \ - hundreds of incorrect initializations of iterators that use = over () */ \ - CLASSNAME##_IT(CLASSNAME##_LIST* list) : ELIST_ITERATOR(list) {} \ - \ - CLASSNAME* data() { \ - return reinterpret_cast(ELIST_ITERATOR::data()); \ - } \ - \ - CLASSNAME* data_relative(int8_t offset) { \ - return reinterpret_cast(ELIST_ITERATOR::data_relative(offset));\ - } \ - \ - CLASSNAME* forward() { \ - return reinterpret_cast(ELIST_ITERATOR::forward()); \ - } \ - \ - CLASSNAME* extract() { \ - return reinterpret_cast(ELIST_ITERATOR::extract()); \ - } \ - \ - CLASSNAME* move_to_first() { \ - return reinterpret_cast(ELIST_ITERATOR::move_to_first()); \ - } \ - \ - CLASSNAME* move_to_last() { \ - return reinterpret_cast(ELIST_ITERATOR::move_to_last()); \ - } \ -}; - -#define ELISTIZEH( CLASSNAME ) \ - \ -ELISTIZEH_A( CLASSNAME ) \ - \ -ELISTIZEH_B( CLASSNAME ) \ - \ -ELISTIZEH_C( CLASSNAME ) +#define ELISTIZEH_A(CLASSNAME) \ + \ + extern DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link); + +#define ELISTIZEH_B(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASS - CLASSNAME##_LIST \ + * \ + * List class for class CLASSNAME \ + * \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_LIST : public ELIST { \ + public: \ + CLASSNAME##_LIST() : ELIST() {} \ + \ + void clear() { /* delete elements */ \ + ELIST::internal_clear(&CLASSNAME##_zapper); \ + } \ + \ + ~CLASSNAME##_LIST() { clear(); } \ + \ + /* Become a deep copy of src_list*/ \ + void deep_copy(const CLASSNAME##_LIST* src_list, \ + CLASSNAME* (*copier)(const CLASSNAME*)); \ + \ + private: \ + /* Prevent assign and copy construction. */ \ + CLASSNAME##_LIST(const CLASSNAME##_LIST&) { \ + DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, \ + nullptr); \ + } \ + void operator=(const CLASSNAME##_LIST&) { \ + DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, nullptr); \ + } +#define ELISTIZEH_C(CLASSNAME) \ + } \ + ; \ + \ + /*********************************************************************** \ + * CLASS - CLASSNAME##_IT \ + * \ + * Iterator class for class CLASSNAME##_LIST \ + * \ + * Note: We don't need to coerce pointers to member functions input \ + * parameters as these are automatically converted to the type of the base \ + * type. ("A ptr to a class may be converted to a pointer to a public base \ + * class of that class") \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_IT : public ELIST_ITERATOR { \ + public: \ + CLASSNAME##_IT() : ELIST_ITERATOR() {} \ + \ + /* TODO(rays) This constructor should be explicit, but that means changing \ + hundreds of incorrect initializations of iterators that use = over () \ + */ \ + CLASSNAME##_IT(CLASSNAME##_LIST* list) : ELIST_ITERATOR(list) {} \ + \ + CLASSNAME* data() { \ + return reinterpret_cast(ELIST_ITERATOR::data()); \ + } \ + \ + CLASSNAME* data_relative(int8_t offset) { \ + return reinterpret_cast( \ + ELIST_ITERATOR::data_relative(offset)); \ + } \ + \ + CLASSNAME* forward() { \ + return reinterpret_cast(ELIST_ITERATOR::forward()); \ + } \ + \ + CLASSNAME* extract() { \ + return reinterpret_cast(ELIST_ITERATOR::extract()); \ + } \ + \ + CLASSNAME* move_to_first() { \ + return reinterpret_cast(ELIST_ITERATOR::move_to_first()); \ + } \ + \ + CLASSNAME* move_to_last() { \ + return reinterpret_cast(ELIST_ITERATOR::move_to_last()); \ + } \ + }; + +#define ELISTIZEH(CLASSNAME) \ + \ + ELISTIZEH_A(CLASSNAME) \ + \ + ELISTIZEH_B(CLASSNAME) \ + \ + ELISTIZEH_C(CLASSNAME) /*********************************************************************** ELISTIZE( CLASSNAME ) MACRO @@ -961,22 +897,22 @@ ELISTIZEH_C( CLASSNAME ) #define ELISTIZE(CLASSNAME) \ \ /*********************************************************************** \ - * CLASSNAME##_zapper \ - * \ - * A function which can delete a CLASSNAME element. This is passed to the \ - * generic clear list member function so that when a list is cleared the \ - * elements on the list are properly destroyed from the base class, even \ - * though we don't use a virtual destructor function. \ - **********************************************************************/ \ + * CLASSNAME##_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic clear list member function so that when a list is cleared the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ \ - DLLSYM void CLASSNAME##_zapper(ELIST_LINK *link) { \ - delete reinterpret_cast(link); \ + DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link) { \ + delete reinterpret_cast(link); \ } \ \ /* Become a deep copy of src_list*/ \ - void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list, \ - CLASSNAME *(*copier)(const CLASSNAME *)) { \ - CLASSNAME##_IT from_it(const_cast(src_list)); \ + void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST* src_list, \ + CLASSNAME* (*copier)(const CLASSNAME*)) { \ + CLASSNAME##_IT from_it(const_cast(src_list)); \ CLASSNAME##_IT to_it(this); \ \ for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ diff --git a/src/ccutil/elst2.cpp b/src/ccutil/elst2.cpp index 5d541eeda9..a185b8c987 100644 --- a/src/ccutil/elst2.cpp +++ b/src/ccutil/elst2.cpp @@ -17,9 +17,9 @@ * **********************************************************************/ +#include "elst2.h" #include #include "host.h" -#include "elst2.h" /*********************************************************************** * MEMBER FUNCTIONS OF CLASS: ELIST2 @@ -38,17 +38,16 @@ * the consequential memory overhead. **********************************************************************/ -void -ELIST2::internal_clear ( //destroy all links -void (*zapper) (ELIST2_LINK *)) { - //ptr to zapper functn - ELIST2_LINK *ptr; - ELIST2_LINK *next; - - if (!empty ()) { - ptr = last->next; //set to first - last->next = nullptr; //break circle - last = nullptr; //set list empty +void ELIST2::internal_clear( // destroy all links + void (*zapper)(ELIST2_LINK*)) { + // ptr to zapper functn + ELIST2_LINK* ptr; + ELIST2_LINK* next; + + if (!empty()) { + ptr = last->next; // set to first + last->next = nullptr; // break circle + last = nullptr; // set list empty while (ptr) { next = ptr->next; zapper(ptr); @@ -70,16 +69,16 @@ void (*zapper) (ELIST2_LINK *)) { * end point is always the end_it position. **********************************************************************/ -void ELIST2::assign_to_sublist( //to this list - ELIST2_ITERATOR *start_it, //from list start - ELIST2_ITERATOR *end_it) { //from list end +void ELIST2::assign_to_sublist( // to this list + ELIST2_ITERATOR* start_it, // from list start + ELIST2_ITERATOR* end_it) { // from list end const ERRCODE LIST_NOT_EMPTY = - "Destination list must be empty before extracting a sublist"; + "Destination list must be empty before extracting a sublist"; - if (!empty ()) - LIST_NOT_EMPTY.error ("ELIST2.assign_to_sublist", ABORT, nullptr); + if (!empty()) + LIST_NOT_EMPTY.error("ELIST2.assign_to_sublist", ABORT, nullptr); - last = start_it->extract_sublist (end_it); + last = start_it->extract_sublist(end_it); } /*********************************************************************** @@ -92,8 +91,7 @@ int32_t ELIST2::length() const { // count elements ELIST2_ITERATOR it(const_cast(this)); int32_t count = 0; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - count++; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) count++; return count; } @@ -105,34 +103,33 @@ int32_t ELIST2::length() const { // count elements * ( int (*)(const void *, const void *) **********************************************************************/ -void -ELIST2::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { +void ELIST2::sort( // sort elements + int comparator( // comparison routine + const void*, const void*)) { ELIST2_ITERATOR it(this); int32_t count; - ELIST2_LINK **base; //ptr array to sort - ELIST2_LINK **current; + ELIST2_LINK** base; // ptr array to sort + ELIST2_LINK** current; int32_t i; /* Allocate an array of pointers, one per list element */ - count = length (); - base = (ELIST2_LINK **) malloc (count * sizeof (ELIST2_LINK *)); + count = length(); + base = (ELIST2_LINK**)malloc(count * sizeof(ELIST2_LINK*)); /* Extract all elements, putting the pointers in the array */ current = base; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - *current = it.extract (); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + *current = it.extract(); current++; } /* Sort the pointer array */ - qsort ((char *) base, count, sizeof (*base), comparator); + qsort((char*)base, count, sizeof(*base), comparator); /* Rebuild the list from the sorted pointers */ current = base; for (i = 0; i < count; i++) { - it.add_to_end (*current); + it.add_to_end(*current); current++; } free(base); @@ -162,8 +159,7 @@ void ELIST2::add_sorted(int comparator(const void*, const void*), ELIST2_ITERATOR it(this); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ELIST2_LINK* link = it.data(); - if (comparator(&link, &new_link) > 0) - break; + if (comparator(&link, &new_link) > 0) break; } if (it.cycled_list()) it.add_to_end(new_link); @@ -184,38 +180,33 @@ void ELIST2::add_sorted(int comparator(const void*, const void*), * REMEMBER: ALL LISTS ARE CIRCULAR. **********************************************************************/ -ELIST2_LINK *ELIST2_ITERATOR::forward() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::forward", ABORT, nullptr); - #endif - if (list->empty ()) - return nullptr; +ELIST2_LINK* ELIST2_ITERATOR::forward() { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::forward", ABORT, nullptr); +#endif + if (list->empty()) return nullptr; - if (current) { //not removed so - //set previous + if (current) { // not removed so + // set previous prev = current; started_cycling = TRUE; // In case next is deleted by another iterator, get it from the current. current = current->next; - } - else { - if (ex_current_was_cycle_pt) - cycle_pt = next; + } else { + if (ex_current_was_cycle_pt) cycle_pt = next; current = next; } #ifndef NDEBUG - if (!current) - NULL_DATA.error ("ELIST2_ITERATOR::forward", ABORT, nullptr); + if (!current) NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT, nullptr); #endif next = current->next; #ifndef NDEBUG if (!next) - NULL_NEXT.error ("ELIST2_ITERATOR::forward", ABORT, - "This is: %p Current is: %p", this, current); + NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT, + "This is: %p Current is: %p", this, current); #endif return current; @@ -228,33 +219,29 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() { * REMEMBER: ALL LISTS ARE CIRCULAR. **********************************************************************/ -ELIST2_LINK *ELIST2_ITERATOR::backward() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::backward", ABORT, nullptr); - #endif - if (list->empty ()) - return nullptr; +ELIST2_LINK* ELIST2_ITERATOR::backward() { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::backward", ABORT, nullptr); +#endif + if (list->empty()) return nullptr; - if (current) { //not removed so - //set previous + if (current) { // not removed so + // set previous next = current; started_cycling = TRUE; // In case prev is deleted by another iterator, get it from current. current = current->prev; } else { - if (ex_current_was_cycle_pt) - cycle_pt = prev; + if (ex_current_was_cycle_pt) cycle_pt = prev; current = prev; } - #ifndef NDEBUG - if (!current) - NULL_DATA.error ("ELIST2_ITERATOR::backward", ABORT, nullptr); +#ifndef NDEBUG + if (!current) NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT, nullptr); if (!prev) - NULL_PREV.error ("ELIST2_ITERATOR::backward", ABORT, - "This is: %p Current is: %p", this, current); - #endif + NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT, + "This is: %p Current is: %p", this, current); +#endif prev = current->prev; return current; @@ -267,26 +254,26 @@ ELIST2_LINK *ELIST2_ITERATOR::backward() { * (This function can't be INLINEd because it contains a loop) **********************************************************************/ -ELIST2_LINK *ELIST2_ITERATOR::data_relative( //get data + or - .. - int8_t offset) { //offset from current - ELIST2_LINK *ptr; +ELIST2_LINK* ELIST2_ITERATOR::data_relative( // get data + or - .. + int8_t offset) { // offset from current + ELIST2_LINK* ptr; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::data_relative", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("ELIST2_ITERATOR::data_relative", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr); + if (list->empty()) + EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr); +#endif if (offset < 0) - for (ptr = current ? current : next; offset++ < 0; ptr = ptr->prev); + for (ptr = current ? current : next; offset++ < 0; ptr = ptr->prev) + ; else - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next); + for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) + ; - #ifndef NDEBUG - if (!ptr) - NULL_DATA.error ("ELIST2_ITERATOR::data_relative", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!ptr) NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr); +#endif return ptr; } @@ -301,50 +288,46 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( //get data + or - .. * (This function hasn't been in-lined because its a bit big!) **********************************************************************/ -void ELIST2_ITERATOR::exchange( //positions of 2 links - ELIST2_ITERATOR *other_it) { //other iterator +void ELIST2_ITERATOR::exchange( // positions of 2 links + ELIST2_ITERATOR* other_it) { // other iterator const ERRCODE DONT_EXCHANGE_DELETED = - "Can't exchange deleted elements of lists"; + "Can't exchange deleted elements of lists"; - ELIST2_LINK *old_current; + ELIST2_LINK* old_current; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::exchange", ABORT, nullptr); +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT, nullptr); if (!other_it) - BAD_PARAMETER.error ("ELIST2_ITERATOR::exchange", ABORT, "other_it nullptr"); + BAD_PARAMETER.error("ELIST2_ITERATOR::exchange", ABORT, "other_it nullptr"); if (!(other_it->list)) - NO_LIST.error ("ELIST2_ITERATOR::exchange", ABORT, "other_it"); - #endif + NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT, "other_it"); +#endif /* Do nothing if either list is empty or if both iterators reference the same link */ - if ((list->empty ()) || - (other_it->list->empty ()) || (current == other_it->current)) + if ((list->empty()) || (other_it->list->empty()) || + (current == other_it->current)) return; /* Error if either current element is deleted */ if (!current || !other_it->current) - DONT_EXCHANGE_DELETED.error ("ELIST2_ITERATOR.exchange", ABORT, nullptr); + DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT, nullptr); /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements (other before this); non-doubleton adjacent elements (this before other); non-adjacent elements. */ - //adjacent links - if ((next == other_it->current) || - (other_it->next == current)) { - //doubleton list - if ((next == other_it->current) && - (other_it->next == current)) { + // adjacent links + if ((next == other_it->current) || (other_it->next == current)) { + // doubleton list + if ((next == other_it->current) && (other_it->next == current)) { prev = next = current; other_it->prev = other_it->next = other_it->current; - } - else { //non-doubleton with - //adjacent links - //other before this + } else { // non-doubleton with + // adjacent links + // other before this if (other_it->next == current) { other_it->prev->next = current; other_it->current->next = next; @@ -355,8 +338,7 @@ void ELIST2_ITERATOR::exchange( //positions of 2 li other_it->next = other_it->current; prev = current; - } - else { //this before other + } else { // this before other prev->next = other_it->current; current->next = other_it->next; current->prev = other_it->current; @@ -368,8 +350,7 @@ void ELIST2_ITERATOR::exchange( //positions of 2 li other_it->prev = other_it->current; } } - } - else { //no overlap + } else { // no overlap prev->next = other_it->current; current->next = other_it->next; current->prev = other_it->prev; @@ -383,15 +364,11 @@ void ELIST2_ITERATOR::exchange( //positions of 2 li /* update end of list pointer when necessary (remember that the 2 iterators may iterate over different lists!) */ - if (list->last == current) - list->last = other_it->current; - if (other_it->list->last == other_it->current) - other_it->list->last = current; + if (list->last == current) list->last = other_it->current; + if (other_it->list->last == other_it->current) other_it->list->last = current; - if (current == cycle_pt) - cycle_pt = other_it->cycle_pt; - if (other_it->current == other_it->cycle_pt) - other_it->cycle_pt = cycle_pt; + if (current == cycle_pt) cycle_pt = other_it->cycle_pt; + if (other_it->current == other_it->cycle_pt) other_it->cycle_pt = cycle_pt; /* The actual exchange - in all cases*/ @@ -410,73 +387,70 @@ void ELIST2_ITERATOR::exchange( //positions of 2 li * (Can't inline this function because it contains a loop) **********************************************************************/ -ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( //from this current - ELIST2_ITERATOR *other_it) { //to other current - #ifndef NDEBUG +ELIST2_LINK* ELIST2_ITERATOR::extract_sublist( // from this current + ELIST2_ITERATOR* other_it) { // to other current +#ifndef NDEBUG const ERRCODE BAD_EXTRACTION_PTS = - "Can't extract sublist from points on different lists"; + "Can't extract sublist from points on different lists"; const ERRCODE DONT_EXTRACT_DELETED = - "Can't extract a sublist marked by deleted points"; - #endif + "Can't extract a sublist marked by deleted points"; +#endif const ERRCODE BAD_SUBLIST = "Can't find sublist end point in original list"; ELIST2_ITERATOR temp_it = *this; - ELIST2_LINK *end_of_new_list; + ELIST2_LINK* end_of_new_list; - #ifndef NDEBUG +#ifndef NDEBUG if (!other_it) - BAD_PARAMETER.error ("ELIST2_ITERATOR::extract_sublist", ABORT, - "other_it nullptr"); - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr); + BAD_PARAMETER.error("ELIST2_ITERATOR::extract_sublist", ABORT, + "other_it nullptr"); + if (!list) NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr); if (list != other_it->list) - BAD_EXTRACTION_PTS.error ("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr); - if (list->empty ()) - EMPTY_LIST.error ("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr); + BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr); + if (list->empty()) + EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr); if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error ("ELIST2_ITERATOR.extract_sublist", ABORT, - nullptr); - #endif + DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT, + nullptr); +#endif ex_current_was_last = other_it->ex_current_was_last = false; ex_current_was_cycle_pt = false; other_it->ex_current_was_cycle_pt = false; - temp_it.mark_cycle_pt (); - do { //walk sublist - if (temp_it.cycled_list()) // can't find end pt - BAD_SUBLIST.error ("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr); + temp_it.mark_cycle_pt(); + do { // walk sublist + if (temp_it.cycled_list()) // can't find end pt + BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr); - if (temp_it.at_last ()) { + if (temp_it.at_last()) { list->last = prev; ex_current_was_last = other_it->ex_current_was_last = true; } - if (temp_it.current == cycle_pt) - ex_current_was_cycle_pt = true; + if (temp_it.current == cycle_pt) ex_current_was_cycle_pt = true; if (temp_it.current == other_it->cycle_pt) other_it->ex_current_was_cycle_pt = true; - temp_it.forward (); + temp_it.forward(); } - //do INCLUSIVE list + // do INCLUSIVE list while (temp_it.prev != other_it->current); - //circularise sublist + // circularise sublist other_it->current->next = current; - //circularise sublist + // circularise sublist current->prev = other_it->current; end_of_new_list = other_it->current; - //sublist = whole list + // sublist = whole list if (prev == other_it->current) { list->last = nullptr; prev = current = next = nullptr; other_it->prev = other_it->current = other_it->next = nullptr; - } - else { + } else { prev->next = other_it->next; other_it->next->prev = prev; diff --git a/src/ccutil/elst2.h b/src/ccutil/elst2.h index 9de3d19a1e..3010d1edf7 100644 --- a/src/ccutil/elst2.h +++ b/src/ccutil/elst2.h @@ -22,8 +22,8 @@ #include #include "host.h" -#include "serialis.h" #include "lsterr.h" +#include "serialis.h" class ELIST2_ITERATOR; @@ -55,28 +55,27 @@ i) The duplication in source does not affect the run time code size - the * walks the list. **********************************************************************/ -class DLLSYM ELIST2_LINK -{ +class DLLSYM ELIST2_LINK { friend class ELIST2_ITERATOR; friend class ELIST2; - ELIST2_LINK *prev; - ELIST2_LINK *next; + ELIST2_LINK* prev; + ELIST2_LINK* next; - public: - ELIST2_LINK() { //constructor - prev = next = nullptr; - } + public: + ELIST2_LINK() { // constructor + prev = next = nullptr; + } - ELIST2_LINK( // copy constructor - const ELIST2_LINK &) { // don't copy link - prev = next = nullptr; - } + ELIST2_LINK( // copy constructor + const ELIST2_LINK&) { // don't copy link + prev = next = nullptr; + } - void operator=( // don't copy links - const ELIST2_LINK &) { - prev = next = nullptr; - } + void operator=( // don't copy links + const ELIST2_LINK&) { + prev = next = nullptr; + } }; /********************************************************************** @@ -85,60 +84,56 @@ class DLLSYM ELIST2_LINK * Generic list class for doubly linked lists with embedded links **********************************************************************/ -class DLLSYM ELIST2 -{ +class DLLSYM ELIST2 { friend class ELIST2_ITERATOR; - ELIST2_LINK *last; //End of list + ELIST2_LINK* last; // End of list //(Points to head) - ELIST2_LINK *First() { // return first + ELIST2_LINK* First() { // return first return last ? last->next : nullptr; } - public: - ELIST2() { //constructor - last = nullptr; - } + public: + ELIST2() { // constructor + last = nullptr; + } - void internal_clear ( //destroy all links - void (*zapper) (ELIST2_LINK *)); - //ptr to zapper functn + void internal_clear( // destroy all links + void (*zapper)(ELIST2_LINK*)); + // ptr to zapper functn - bool empty() const { //is list empty? - return !last; - } + bool empty() const { // is list empty? + return !last; + } - bool singleton() const { - return last ? (last == last->next) : false; - } + bool singleton() const { return last ? (last == last->next) : false; } - void shallow_copy( //dangerous!! - ELIST2 *from_list) { //beware destructors!! - last = from_list->last; - } - - //ptr to copier functn - void internal_deep_copy (ELIST2_LINK * (*copier) (ELIST2_LINK *), - const ELIST2 * list); //list being copied + void shallow_copy( // dangerous!! + ELIST2* from_list) { // beware destructors!! + last = from_list->last; + } - void assign_to_sublist( //to this list - ELIST2_ITERATOR *start_it, //from list start - ELIST2_ITERATOR *end_it); //from list end + // ptr to copier functn + void internal_deep_copy(ELIST2_LINK* (*copier)(ELIST2_LINK*), + const ELIST2* list); // list being copied - int32_t length() const; // # elements in list + void assign_to_sublist( // to this list + ELIST2_ITERATOR* start_it, // from list start + ELIST2_ITERATOR* end_it); // from list end - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); + int32_t length() const; // # elements in list - // Assuming list has been sorted already, insert new_link to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - void add_sorted(int comparator(const void*, const void*), - ELIST2_LINK* new_link); + void sort( // sort elements + int comparator( // comparison routine + const void*, const void*)); + // Assuming list has been sorted already, insert new_link to + // keep the list sorted according to the same comparison function. + // Comparison function is the same as used by sort, i.e. uses double + // indirection. Time is O(1) to add to beginning or end. + // Time is linear to add pre-sorted items to an empty list. + void add_sorted(int comparator(const void*, const void*), + ELIST2_LINK* new_link); }; /*********************************************************************** @@ -148,110 +143,106 @@ class DLLSYM ELIST2 *links **********************************************************************/ -class DLLSYM ELIST2_ITERATOR -{ - friend void ELIST2::assign_to_sublist(ELIST2_ITERATOR *, ELIST2_ITERATOR *); - - ELIST2 *list; //List being iterated - ELIST2_LINK *prev; //prev element - ELIST2_LINK *current; //current element - ELIST2_LINK *next; //next element - bool ex_current_was_last; //current extracted - //was end of list - bool ex_current_was_cycle_pt; //current extracted - //was cycle point - ELIST2_LINK *cycle_pt; //point we are cycling - //the list to. - bool started_cycling; //Have we moved off - //the start? - - ELIST2_LINK *extract_sublist( //from this current... - ELIST2_ITERATOR *other_it); //to other current - - public: - ELIST2_ITERATOR( //constructor - ELIST2 *list_to_iterate); - - void set_to_list( //change list - ELIST2 *list_to_iterate); - - void add_after_then_move( //add after current & - ELIST2_LINK *new_link); //move to new - - void add_after_stay_put( //add after current & - ELIST2_LINK *new_link); //stay at current - - void add_before_then_move( //add before current & - ELIST2_LINK *new_link); //move to new - - void add_before_stay_put( //add before current & - ELIST2_LINK *new_link); //stay at current - - void add_list_after( //add a list & - ELIST2 *list_to_add); //stay at current - - void add_list_before( //add a list & - ELIST2 *list_to_add); //move to it 1st item - - ELIST2_LINK *data() { //get current data - #ifndef NDEBUG - if (!current) - NULL_DATA.error ("ELIST2_ITERATOR::data", ABORT, nullptr); - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::data", ABORT, nullptr); - #endif - return current; - } +class DLLSYM ELIST2_ITERATOR { + friend void ELIST2::assign_to_sublist(ELIST2_ITERATOR*, ELIST2_ITERATOR*); - ELIST2_LINK *data_relative( //get data + or - ... - int8_t offset); //offset from current + ELIST2* list; // List being iterated + ELIST2_LINK* prev; // prev element + ELIST2_LINK* current; // current element + ELIST2_LINK* next; // next element + bool ex_current_was_last; // current extracted + // was end of list + bool ex_current_was_cycle_pt; // current extracted + // was cycle point + ELIST2_LINK* cycle_pt; // point we are cycling + // the list to. + bool started_cycling; // Have we moved off + // the start? - ELIST2_LINK *forward(); //move to next element + ELIST2_LINK* extract_sublist( // from this current... + ELIST2_ITERATOR* other_it); // to other current - ELIST2_LINK *backward(); //move to prev element + public: + ELIST2_ITERATOR( // constructor + ELIST2* list_to_iterate); - ELIST2_LINK *extract(); //remove from list + void set_to_list( // change list + ELIST2* list_to_iterate); - //go to start of list - ELIST2_LINK *move_to_first(); + void add_after_then_move( // add after current & + ELIST2_LINK* new_link); // move to new - ELIST2_LINK *move_to_last(); //go to end of list + void add_after_stay_put( // add after current & + ELIST2_LINK* new_link); // stay at current - void mark_cycle_pt(); //remember current + void add_before_then_move( // add before current & + ELIST2_LINK* new_link); // move to new - bool empty() { //is list empty? - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::empty", ABORT, nullptr); - #endif - return list->empty (); - } + void add_before_stay_put( // add before current & + ELIST2_LINK* new_link); // stay at current - bool current_extracted() { //current extracted? - return !current; - } + void add_list_after( // add a list & + ELIST2* list_to_add); // stay at current + + void add_list_before( // add a list & + ELIST2* list_to_add); // move to it 1st item + + ELIST2_LINK* data() { // get current data +#ifndef NDEBUG + if (!current) NULL_DATA.error("ELIST2_ITERATOR::data", ABORT, nullptr); + if (!list) NO_LIST.error("ELIST2_ITERATOR::data", ABORT, nullptr); +#endif + return current; + } + + ELIST2_LINK* data_relative( // get data + or - ... + int8_t offset); // offset from current + + ELIST2_LINK* forward(); // move to next element + + ELIST2_LINK* backward(); // move to prev element + + ELIST2_LINK* extract(); // remove from list - bool at_first(); //Current is first? + // go to start of list + ELIST2_LINK* move_to_first(); - bool at_last(); //Current is last? + ELIST2_LINK* move_to_last(); // go to end of list - bool cycled_list(); //Completed a cycle? + void mark_cycle_pt(); // remember current - void add_to_end( // add at end & - ELIST2_LINK *new_link); // don't move + bool empty() { // is list empty? +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::empty", ABORT, nullptr); +#endif + return list->empty(); + } + + bool current_extracted() { // current extracted? + return !current; + } + + bool at_first(); // Current is first? + + bool at_last(); // Current is last? + + bool cycled_list(); // Completed a cycle? - void exchange( //positions of 2 links - ELIST2_ITERATOR *other_it); //other iterator + void add_to_end( // add at end & + ELIST2_LINK* new_link); // don't move - int32_t length(); //# elements in list + void exchange( // positions of 2 links + ELIST2_ITERATOR* other_it); // other iterator - void sort ( //sort elements - int comparator ( //comparison routine - const void *, const void *)); + int32_t length(); //# elements in list - private: - // Don't use the following constructor. - ELIST2_ITERATOR(); + void sort( // sort elements + int comparator( // comparison routine + const void*, const void*)); + + private: + // Don't use the following constructor. + ELIST2_ITERATOR(); }; /*********************************************************************** @@ -261,19 +252,19 @@ class DLLSYM ELIST2_ITERATOR * over. **********************************************************************/ -inline void ELIST2_ITERATOR::set_to_list( //change list - ELIST2 *list_to_iterate) { - #ifndef NDEBUG +inline void ELIST2_ITERATOR::set_to_list( // change list + ELIST2* list_to_iterate) { +#ifndef NDEBUG if (!list_to_iterate) - BAD_PARAMETER.error ("ELIST2_ITERATOR::set_to_list", ABORT, - "list_to_iterate is nullptr"); - #endif + BAD_PARAMETER.error("ELIST2_ITERATOR::set_to_list", ABORT, + "list_to_iterate is nullptr"); +#endif list = list_to_iterate; prev = list->last; - current = list->First (); + current = list->First(); next = current ? current->next : nullptr; - cycle_pt = nullptr; //await explicit set + cycle_pt = nullptr; // await explicit set started_cycling = false; ex_current_was_last = false; ex_current_was_cycle_pt = false; @@ -285,7 +276,7 @@ inline void ELIST2_ITERATOR::set_to_list( //change list * CONSTRUCTOR - set iterator to specified list; **********************************************************************/ -inline ELIST2_ITERATOR::ELIST2_ITERATOR(ELIST2 *list_to_iterate) { +inline ELIST2_ITERATOR::ELIST2_ITERATOR(ELIST2* list_to_iterate) { set_to_list(list_to_iterate); } @@ -297,41 +288,36 @@ inline ELIST2_ITERATOR::ELIST2_ITERATOR(ELIST2 *list_to_iterate) { **********************************************************************/ inline void ELIST2_ITERATOR::add_after_then_move( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG + ELIST2_LINK* new_element) { +#ifndef NDEBUG if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr); + NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_after_then_move", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_then_move", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr); +#endif - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; new_element->prev = new_element; list->last = new_element; prev = next = new_element; - } - else { + } else { new_element->next = next; next->prev = new_element; - if (current) { //not extracted + if (current) { // not extracted new_element->prev = current; current->next = new_element; prev = current; - if (current == list->last) - list->last = new_element; - } - else { //current extracted + if (current == list->last) list->last = new_element; + } else { // current extracted new_element->prev = prev; prev->next = new_element; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; + if (ex_current_was_last) list->last = new_element; + if (ex_current_was_cycle_pt) cycle_pt = new_element; } } current = new_element; @@ -345,38 +331,34 @@ inline void ELIST2_ITERATOR::add_after_then_move( // element to add **********************************************************************/ inline void ELIST2_ITERATOR::add_after_stay_put( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG + ELIST2_LINK* new_element) { +#ifndef NDEBUG if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr); + NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_after_stay_put", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr); +#endif - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; new_element->prev = new_element; list->last = new_element; prev = next = new_element; ex_current_was_last = false; current = nullptr; - } - else { + } else { new_element->next = next; next->prev = new_element; - if (current) { //not extracted + if (current) { // not extracted new_element->prev = current; current->next = new_element; - if (prev == current) - prev = new_element; - if (current == list->last) - list->last = new_element; - } - else { //current extracted + if (prev == current) prev = new_element; + if (current == list->last) list->last = new_element; + } else { // current extracted new_element->prev = prev; prev->next = new_element; if (ex_current_was_last) { @@ -396,39 +378,35 @@ inline void ELIST2_ITERATOR::add_after_stay_put( // element to add **********************************************************************/ inline void ELIST2_ITERATOR::add_before_then_move( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG + ELIST2_LINK* new_element) { +#ifndef NDEBUG if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr); + NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_before_then_move", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_then_move", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr); +#endif - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; new_element->prev = new_element; list->last = new_element; prev = next = new_element; - } - else { + } else { prev->next = new_element; new_element->prev = prev; - if (current) { //not extracted + if (current) { // not extracted new_element->next = current; current->prev = new_element; next = current; - } - else { //current extracted + } else { // current extracted new_element->next = next; next->prev = new_element; - if (ex_current_was_last) - list->last = new_element; - if (ex_current_was_cycle_pt) - cycle_pt = new_element; + if (ex_current_was_last) list->last = new_element; + if (ex_current_was_cycle_pt) cycle_pt = new_element; } } current = new_element; @@ -442,40 +420,36 @@ inline void ELIST2_ITERATOR::add_before_then_move( // element to add **********************************************************************/ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG + ELIST2_LINK* new_element) { +#ifndef NDEBUG if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr); + NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_before_stay_put", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr); +#endif - if (list->empty ()) { + if (list->empty()) { new_element->next = new_element; new_element->prev = new_element; list->last = new_element; prev = next = new_element; ex_current_was_last = true; current = nullptr; - } - else { + } else { prev->next = new_element; new_element->prev = prev; - if (current) { //not extracted + if (current) { // not extracted new_element->next = current; current->prev = new_element; - if (next == current) - next = new_element; - } - else { //current extracted + if (next == current) next = new_element; + } else { // current extracted new_element->next = next; next->prev = new_element; - if (ex_current_was_last) - list->last = new_element; + if (ex_current_was_last) list->last = new_element; } prev = new_element; } @@ -489,35 +463,31 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add * iterator. **********************************************************************/ -inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_list_after", ABORT, nullptr); +inline void ELIST2_ITERATOR::add_list_after(ELIST2* list_to_add) { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT, nullptr); if (!list_to_add) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_list_after", ABORT, - "list_to_add is nullptr"); - #endif + BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_after", ABORT, + "list_to_add is nullptr"); +#endif - if (!list_to_add->empty ()) { - if (list->empty ()) { + if (!list_to_add->empty()) { + if (list->empty()) { list->last = list_to_add->last; prev = list->last; - next = list->First (); + next = list->First(); ex_current_was_last = true; current = nullptr; - } - else { - if (current) { //not extracted - current->next = list_to_add->First (); + } else { + if (current) { // not extracted + current->next = list_to_add->First(); current->next->prev = current; - if (current == list->last) - list->last = list_to_add->last; + if (current == list->last) list->last = list_to_add->last; list_to_add->last->next = next; next->prev = list_to_add->last; next = current->next; - } - else { //current extracted - prev->next = list_to_add->First (); + } else { // current extracted + prev->next = list_to_add->First(); prev->next->prev = prev; if (ex_current_was_last) { list->last = list_to_add->last; @@ -540,38 +510,33 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { * iterator. **********************************************************************/ -inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_list_before", ABORT, nullptr); +inline void ELIST2_ITERATOR::add_list_before(ELIST2* list_to_add) { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT, nullptr); if (!list_to_add) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_list_before", ABORT, - "list_to_add is nullptr"); - #endif + BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_before", ABORT, + "list_to_add is nullptr"); +#endif - if (!list_to_add->empty ()) { - if (list->empty ()) { + if (!list_to_add->empty()) { + if (list->empty()) { list->last = list_to_add->last; prev = list->last; - current = list->First (); + current = list->First(); next = current->next; ex_current_was_last = false; - } - else { - prev->next = list_to_add->First (); + } else { + prev->next = list_to_add->First(); prev->next->prev = prev; - if (current) { //not extracted + if (current) { // not extracted list_to_add->last->next = current; current->prev = list_to_add->last; - } - else { //current extracted + } else { // current extracted list_to_add->last->next = next; next->prev = list_to_add->last; - if (ex_current_was_last) - list->last = list_to_add->last; - if (ex_current_was_cycle_pt) - cycle_pt = prev->next; + if (ex_current_was_last) list->last = list_to_add->last; + if (ex_current_was_cycle_pt) cycle_pt = prev->next; } current = prev->next; next = current->next; @@ -589,23 +554,21 @@ inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) { * is to be deleted, this is the callers responsibility. **********************************************************************/ -inline ELIST2_LINK *ELIST2_ITERATOR::extract() { - ELIST2_LINK *extracted_link; +inline ELIST2_LINK* ELIST2_ITERATOR::extract() { + ELIST2_LINK* extracted_link; - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::extract", ABORT, nullptr); - if (!current) //list empty or - //element extracted - NULL_CURRENT.error ("ELIST2_ITERATOR::extract", - ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::extract", ABORT, nullptr); + if (!current) // list empty or + // element extracted + NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT, nullptr); +#endif if (list->singleton()) { // Special case where we do need to change the iterator. prev = next = list->last = nullptr; } else { - prev->next = next; //remove from list + prev->next = next; // remove from list next->prev = prev; if (current == list->last) { @@ -618,8 +581,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() { // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. ex_current_was_cycle_pt = (current == cycle_pt) ? true : false; extracted_link = current; - extracted_link->next = nullptr; //for safety - extracted_link->prev = nullptr; //for safety + extracted_link->next = nullptr; // for safety + extracted_link->prev = nullptr; // for safety current = nullptr; return extracted_link; } @@ -631,13 +594,12 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() { * Return data just in case anyone wants it. **********************************************************************/ -inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::move_to_first", ABORT, nullptr); - #endif +inline ELIST2_LINK* ELIST2_ITERATOR::move_to_first() { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT, nullptr); +#endif - current = list->First (); + current = list->First(); prev = list->last; next = current ? current->next : nullptr; return current; @@ -650,11 +612,10 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() { * Return data just in case anyone wants it. **********************************************************************/ -inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::move_to_last", ABORT, nullptr); - #endif +inline ELIST2_LINK* ELIST2_ITERATOR::move_to_last() { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT, nullptr); +#endif current = list->last; prev = current ? current->prev : nullptr; @@ -674,10 +635,9 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() { **********************************************************************/ inline void ELIST2_ITERATOR::mark_cycle_pt() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::mark_cycle_pt", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT, nullptr); +#endif if (current) cycle_pt = current; @@ -694,15 +654,15 @@ inline void ELIST2_ITERATOR::mark_cycle_pt() { **********************************************************************/ inline bool ELIST2_ITERATOR::at_first() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::at_first", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT, nullptr); +#endif - //we're at a deleted - return ((list->empty ()) || (current == list->First ()) || ((current == nullptr) && - (prev == list->last) && //NON-last pt between - !ex_current_was_last)); //first and last + // we're at a deleted + return ((list->empty()) || (current == list->First()) || + ((current == nullptr) && + (prev == list->last) && // NON-last pt between + !ex_current_was_last)); // first and last } /*********************************************************************** @@ -713,15 +673,15 @@ inline bool ELIST2_ITERATOR::at_first() { **********************************************************************/ inline bool ELIST2_ITERATOR::at_last() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::at_last", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT, nullptr); +#endif - //we're at a deleted - return ((list->empty ()) || (current == list->last) || ((current == nullptr) && - (prev == list->last) && //last point between - ex_current_was_last)); //first and last + // we're at a deleted + return ((list->empty()) || (current == list->last) || + ((current == nullptr) && + (prev == list->last) && // last point between + ex_current_was_last)); // first and last } /*********************************************************************** @@ -732,13 +692,11 @@ inline bool ELIST2_ITERATOR::at_last() { **********************************************************************/ inline bool ELIST2_ITERATOR::cycled_list() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::cycled_list", ABORT, nullptr); - #endif - - return ((list->empty ()) || ((current == cycle_pt) && started_cycling)); +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT, nullptr); +#endif + return ((list->empty()) || ((current == cycle_pt) && started_cycling)); } /*********************************************************************** @@ -749,12 +707,11 @@ inline bool ELIST2_ITERATOR::cycled_list() { **********************************************************************/ inline int32_t ELIST2_ITERATOR::length() { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::length", ABORT, nullptr); - #endif +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::length", ABORT, nullptr); +#endif - return list->length (); + return list->length(); } /*********************************************************************** @@ -764,16 +721,14 @@ inline int32_t ELIST2_ITERATOR::length() { * **********************************************************************/ -inline void -ELIST2_ITERATOR::sort ( //sort elements -int comparator ( //comparison routine -const void *, const void *)) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::sort", ABORT, nullptr); - #endif +inline void ELIST2_ITERATOR::sort( // sort elements + int comparator( // comparison routine + const void*, const void*)) { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::sort", ABORT, nullptr); +#endif - list->sort (comparator); + list->sort(comparator); move_to_first(); } @@ -788,26 +743,23 @@ const void *, const void *)) { **********************************************************************/ inline void ELIST2_ITERATOR::add_to_end( // element to add - ELIST2_LINK *new_element) { - #ifndef NDEBUG - if (!list) - NO_LIST.error ("ELIST2_ITERATOR::add_to_end", ABORT, nullptr); + ELIST2_LINK* new_element) { +#ifndef NDEBUG + if (!list) NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT, nullptr); if (!new_element) - BAD_PARAMETER.error ("ELIST2_ITERATOR::add_to_end", ABORT, - "new_element is nullptr"); + BAD_PARAMETER.error("ELIST2_ITERATOR::add_to_end", ABORT, + "new_element is nullptr"); if (new_element->next) - STILL_LINKED.error ("ELIST2_ITERATOR::add_to_end", ABORT, nullptr); - #endif + STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT, nullptr); +#endif - if (this->at_last ()) { - this->add_after_stay_put (new_element); - } - else { - if (this->at_first ()) { - this->add_before_stay_put (new_element); + if (this->at_last()) { + this->add_after_stay_put(new_element); + } else { + if (this->at_first()) { + this->add_before_stay_put(new_element); list->last = new_element; - } - else { //Iteratr is elsewhere + } else { // Iteratr is elsewhere new_element->next = list->last->next; new_element->prev = list->last; list->last->next->prev = new_element; @@ -817,14 +769,13 @@ inline void ELIST2_ITERATOR::add_to_end( // element to add } } - /*********************************************************************** QUOTE_IT MACRO DEFINITION =========================== Replace with "". may be an arbitrary number of tokens ***********************************************************************/ -#define QUOTE_IT( parm ) #parm +#define QUOTE_IT(parm) #parm /*********************************************************************** ELIST2IZE( CLASSNAME ) MACRO DEFINITION @@ -860,29 +811,29 @@ ELIST2IZEH_C. #define ELIST2IZEH_A(CLASSNAME) \ \ extern DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ - ELIST2_LINK *link); /*link to delete*/ + ELIST2_LINK* link); /*link to delete*/ #define ELIST2IZEH_B(CLASSNAME) \ \ /*********************************************************************** \ - * CLASS - \ - *CLASSNAME##_LIST \ - * \ - * List class for class \ - *CLASSNAME \ - * \ - **********************************************************************/ \ + * CLASS - \ + *CLASSNAME##_LIST \ + * \ + * List class for class \ + *CLASSNAME \ + * \ + **********************************************************************/ \ \ class DLLSYM CLASSNAME##_LIST : public ELIST2 { \ public: \ CLASSNAME##_LIST() : ELIST2() {} \ /* constructor */ \ \ - CLASSNAME##_LIST( /* don't construct */ \ - const CLASSNAME##_LIST &) /*by initial assign*/ \ + CLASSNAME##_LIST( /* don't construct */ \ + const CLASSNAME##_LIST&) /*by initial assign*/ \ { \ DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, \ - nullptr); \ + nullptr); \ } \ \ void clear() /* delete elements */ \ @@ -890,60 +841,58 @@ ELIST2IZEH_C. ELIST2::internal_clear(&CLASSNAME##_zapper); \ } \ \ - ~CLASSNAME##_LIST() /* destructor */ \ - { \ - clear(); \ - } \ + ~CLASSNAME##_LIST() /* destructor */ { clear(); } \ \ /* Become a deep copy of src_list*/ \ - void deep_copy(const CLASSNAME##_LIST *src_list, \ - CLASSNAME *(*copier)(const CLASSNAME *)); \ + void deep_copy(const CLASSNAME##_LIST* src_list, \ + CLASSNAME* (*copier)(const CLASSNAME*)); \ \ void operator=(/* prevent assign */ \ - const CLASSNAME##_LIST &) { \ - DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, nullptr); \ + const CLASSNAME##_LIST&) { \ + DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, nullptr); \ } -#define ELIST2IZEH_C(CLASSNAME) \ - } \ - ; \ - \ - /*********************************************************************** \ - * CLASS - CLASSNAME##_IT \ - * \ - * Iterator class for class CLASSNAME##_LIST \ - * \ - * Note: We don't need to coerce pointers to member functions input \ - * parameters as these are automatically converted to the type of the base \ - * type. ("A ptr to a class may be converted to a pointer to a public base \ - * class of that class") \ - **********************************************************************/ \ - \ - class DLLSYM CLASSNAME##_IT : public ELIST2_ITERATOR { \ - public: \ - CLASSNAME##_IT(CLASSNAME##_LIST *list) : ELIST2_ITERATOR(list) {} \ - \ - CLASSNAME *data() { return (CLASSNAME *)ELIST2_ITERATOR::data(); } \ - \ - CLASSNAME *data_relative(int8_t offset) { \ - return (CLASSNAME *)ELIST2_ITERATOR::data_relative(offset); \ - } \ - \ - CLASSNAME *forward() { return (CLASSNAME *)ELIST2_ITERATOR::forward(); } \ - \ - CLASSNAME *backward() { return (CLASSNAME *)ELIST2_ITERATOR::backward(); } \ - \ - CLASSNAME *extract() { return (CLASSNAME *)ELIST2_ITERATOR::extract(); } \ - \ - CLASSNAME *move_to_first() { \ - return (CLASSNAME *)ELIST2_ITERATOR::move_to_first(); \ - } \ - \ - CLASSNAME *move_to_last() { \ - return (CLASSNAME *)ELIST2_ITERATOR::move_to_last(); \ - } \ - private: \ - CLASSNAME##_IT(); \ +#define ELIST2IZEH_C(CLASSNAME) \ + } \ + ; \ + \ + /*********************************************************************** \ + * CLASS - CLASSNAME##_IT \ + * \ + * Iterator class for class CLASSNAME##_LIST \ + * \ + * Note: We don't need to coerce pointers to member functions input \ + * parameters as these are automatically converted to the type of the base \ + * type. ("A ptr to a class may be converted to a pointer to a public base \ + * class of that class") \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_IT : public ELIST2_ITERATOR { \ + public: \ + CLASSNAME##_IT(CLASSNAME##_LIST* list) : ELIST2_ITERATOR(list) {} \ + \ + CLASSNAME* data() { return (CLASSNAME*)ELIST2_ITERATOR::data(); } \ + \ + CLASSNAME* data_relative(int8_t offset) { \ + return (CLASSNAME*)ELIST2_ITERATOR::data_relative(offset); \ + } \ + \ + CLASSNAME* forward() { return (CLASSNAME*)ELIST2_ITERATOR::forward(); } \ + \ + CLASSNAME* backward() { return (CLASSNAME*)ELIST2_ITERATOR::backward(); } \ + \ + CLASSNAME* extract() { return (CLASSNAME*)ELIST2_ITERATOR::extract(); } \ + \ + CLASSNAME* move_to_first() { \ + return (CLASSNAME*)ELIST2_ITERATOR::move_to_first(); \ + } \ + \ + CLASSNAME* move_to_last() { \ + return (CLASSNAME*)ELIST2_ITERATOR::move_to_last(); \ + } \ + \ + private: \ + CLASSNAME##_IT(); \ }; #define ELIST2IZEH(CLASSNAME) \ @@ -961,24 +910,24 @@ ELIST2IZEH_C. #define ELIST2IZE(CLASSNAME) \ \ /*********************************************************************** \ - * CLASSNAME##_zapper \ - * \ - * A function which can delete a CLASSNAME element. This is passed to the \ - * generic clear list member function so that when a list is cleared the \ - * elements on the list are properly destroyed from the base class, even \ - * though we don't use a virtual destructor function. \ - **********************************************************************/ \ + * CLASSNAME##_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic clear list member function so that when a list is cleared the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ \ DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ - ELIST2_LINK *link) /*link to delete*/ \ + ELIST2_LINK* link) /*link to delete*/ \ { \ - delete (CLASSNAME *)link; \ + delete (CLASSNAME*)link; \ } \ \ /* Become a deep copy of src_list*/ \ - void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list, \ - CLASSNAME *(*copier)(const CLASSNAME *)) { \ - CLASSNAME##_IT from_it(const_cast(src_list)); \ + void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST* src_list, \ + CLASSNAME* (*copier)(const CLASSNAME*)) { \ + CLASSNAME##_IT from_it(const_cast(src_list)); \ CLASSNAME##_IT to_it(this); \ \ for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ diff --git a/src/ccutil/errcode.cpp b/src/ccutil/errcode.cpp index 09e6a0604d..e31eb3cd0a 100644 --- a/src/ccutil/errcode.cpp +++ b/src/ccutil/errcode.cpp @@ -17,18 +17,18 @@ * **********************************************************************/ -#include -#include -#include -#include +#include +#include +#include +#include #ifdef __UNIX__ -#include +#include #endif -#include "tprintf.h" -#include "errcode.h" +#include "errcode.h" +#include "tprintf.h" const ERRCODE BADERRACTION = "Illegal error action"; -#define MAX_MSG 1024 +#define MAX_MSG 1024 /********************************************************************** * error @@ -37,39 +37,39 @@ const ERRCODE BADERRACTION = "Illegal error action"; * Makes use of error messages and numbers in a common place. * **********************************************************************/ -void ERRCODE::error( // handle error -const char *caller, // name of caller -TessErrorLogCode action, // action to take -const char *format, ... // special message -) const { - va_list args; // variable args +void ERRCODE::error( // handle error + const char* caller, // name of caller + TessErrorLogCode action, // action to take + const char* format, + ... // special message + ) const { + va_list args; // variable args char msg[MAX_MSG]; - char *msgptr = msg; + char* msgptr = msg; if (caller != nullptr) - //name of caller - msgptr += sprintf (msgptr, "%s:", caller); - //actual message - msgptr += sprintf (msgptr, "Error:%s", message); + // name of caller + msgptr += sprintf(msgptr, "%s:", caller); + // actual message + msgptr += sprintf(msgptr, "Error:%s", message); if (format != nullptr) { - msgptr += sprintf (msgptr, ":"); - va_start(args, format); //variable list - #ifdef _WIN32 - //print remainder - msgptr += _vsnprintf (msgptr, MAX_MSG - 2 - (msgptr - msg), format, args); - msg[MAX_MSG - 2] = '\0'; //ensure termination - strcat (msg, "\n"); - #else - //print remainder - msgptr += vsprintf (msgptr, format, args); - //no specific - msgptr += sprintf (msgptr, "\n"); - #endif + msgptr += sprintf(msgptr, ":"); + va_start(args, format); // variable list +#ifdef _WIN32 + // print remainder + msgptr += _vsnprintf(msgptr, MAX_MSG - 2 - (msgptr - msg), format, args); + msg[MAX_MSG - 2] = '\0'; // ensure termination + strcat(msg, "\n"); +#else + // print remainder + msgptr += vsprintf(msgptr, format, args); + // no specific + msgptr += sprintf(msgptr, "\n"); +#endif va_end(args); - } - else - //no specific - msgptr += sprintf (msgptr, "\n"); + } else + // no specific + msgptr += sprintf(msgptr, "\n"); // %s is needed here so msg is printed correctly! fprintf(stderr, "%s", msg); @@ -78,14 +78,13 @@ const char *format, ... // special message switch (action) { case DBG: case TESSLOG: - return; //report only + return; // report only case TESSEXIT: - //err_exit(); + // err_exit(); case ABORT: // Create a deliberate segv as the stack trace is more useful that way. - if (!*p) - abort(); + if (!*p) abort(); default: - BADERRACTION.error ("error", ABORT, nullptr); + BADERRACTION.error("error", ABORT, nullptr); } } diff --git a/src/ccutil/errcode.h b/src/ccutil/errcode.h index 2f31a7b9ae..2668512002 100644 --- a/src/ccutil/errcode.h +++ b/src/ccutil/errcode.h @@ -17,74 +17,72 @@ * **********************************************************************/ -#ifndef ERRCODE_H -#define ERRCODE_H +#ifndef ERRCODE_H +#define ERRCODE_H -#include "host.h" +#include "host.h" /*Control parameters for error()*/ enum TessErrorLogCode { - DBG = -1, /*log without alert */ - TESSLOG = 0, /*alert user */ - TESSEXIT = 1, /*exit after erro */ - ABORT = 2 /*abort after error */ + DBG = -1, /*log without alert */ + TESSLOG = 0, /*alert user */ + TESSEXIT = 1, /*exit after erro */ + ABORT = 2 /*abort after error */ }; /* Explicit Error Abort codes */ -#define NO_ABORT_CODE 0 -#define LIST_ABORT 1 -#define MEMORY_ABORT 2 -#define FILE_ABORT 3 +#define NO_ABORT_CODE 0 +#define LIST_ABORT 1 +#define MEMORY_ABORT 2 +#define FILE_ABORT 3 /* Location of code at error codes Reserve 0..2 (status codes 0..23 for UNLV)*/ -#define LOC_UNUSED0 0 -#define LOC_UNUSED1 1 -#define LOC_UNUSED2 2 -#define LOC_INIT 3 -#define LOC_EDGE_PROG 4 +#define LOC_UNUSED0 0 +#define LOC_UNUSED1 1 +#define LOC_UNUSED2 2 +#define LOC_INIT 3 +#define LOC_EDGE_PROG 4 #define LOC_TEXT_ORD_ROWS 5 -#define LOC_TEXT_ORD_WORDS 6 -#define LOC_PASS1 7 -#define LOC_PASS2 8 +#define LOC_TEXT_ORD_WORDS 6 +#define LOC_PASS1 7 +#define LOC_PASS2 8 /* Reserve up to 8..13 for adding subloc 0/3 plus subsubloc 0/1/2 */ -#define LOC_FUZZY_SPACE 14 +#define LOC_FUZZY_SPACE 14 /* Reserve up to 14..20 for adding subloc 0/3 plus subsubloc 0/1/2 */ -#define LOC_MM_ADAPT 21 -#define LOC_DOC_BLK_REJ 22 +#define LOC_MM_ADAPT 21 +#define LOC_DOC_BLK_REJ 22 #define LOC_WRITE_RESULTS 23 -#define LOC_ADAPTIVE 24 +#define LOC_ADAPTIVE 24 /* DON'T DEFINE ANY LOCATION > 31 !!! */ /* Sub locatation determines whether pass2 was in normal mode or fix xht mode*/ -#define SUBLOC_NORM 0 -#define SUBLOC_FIX_XHT 3 +#define SUBLOC_NORM 0 +#define SUBLOC_FIX_XHT 3 /* Sub Sub locatation determines whether match_word_pass2 was in Tess matcher, NN matcher or somewhere else */ -#define SUBSUBLOC_OTHER 0 -#define SUBSUBLOC_TESS 1 -#define SUBSUBLOC_NN 2 +#define SUBSUBLOC_OTHER 0 +#define SUBSUBLOC_TESS 1 +#define SUBSUBLOC_NN 2 -class TESS_API ERRCODE { // error handler class - const char *message; // error message +class TESS_API ERRCODE { // error handler class + const char* message; // error message public: - void error( // error print function - const char *caller, // function location - TessErrorLogCode action, // action to take - const char *format, ... // fprintf format - ) const; - ERRCODE(const char *string) { - message = string; - } // initialize with string + void error( // error print function + const char* caller, // function location + TessErrorLogCode action, // action to take + const char* format, + ... // fprintf format + ) const; + ERRCODE(const char* string) { message = string; } // initialize with string }; const ERRCODE ASSERT_FAILED = "Assert failed"; -#define ASSERT_HOST(x) if (!(x)) \ - { \ - ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", \ - __FILE__, __LINE__); \ +#define ASSERT_HOST(x) \ + if (!(x)) { \ + ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", __FILE__, __LINE__); \ } #define ASSERT_HOST_MSG(x, ...) \ diff --git a/src/ccutil/fileerr.h b/src/ccutil/fileerr.h index d3b6993d21..2ee2ab1093 100644 --- a/src/ccutil/fileerr.h +++ b/src/ccutil/fileerr.h @@ -17,10 +17,10 @@ * **********************************************************************/ -#ifndef FILEERR_H -#define FILEERR_H +#ifndef FILEERR_H +#define FILEERR_H -#include "errcode.h" +#include "errcode.h" const ERRCODE CANTOPENFILE = "Can't open file"; const ERRCODE CANTCREATEFILE = "Can't create file"; diff --git a/src/ccutil/genericheap.h b/src/ccutil/genericheap.h index c78500d5e3..7e84bd4c11 100644 --- a/src/ccutil/genericheap.h +++ b/src/ccutil/genericheap.h @@ -60,33 +60,21 @@ class GenericHeap { GenericHeap() = default; // The initial size is only a GenericVector::reserve. It is not enforced as // the size limit of the heap. Caller must implement their own enforcement. - explicit GenericHeap(int initial_size) { - heap_.reserve(initial_size); - } + explicit GenericHeap(int initial_size) { heap_.reserve(initial_size); } // Simple accessors. - bool empty() const { - return heap_.empty(); - } - int size() const { - return heap_.size(); - } - int size_reserved() const { - return heap_.size_reserved(); - } + bool empty() const { return heap_.empty(); } + int size() const { return heap_.size(); } + int size_reserved() const { return heap_.size_reserved(); } void clear() { // Clear truncates to 0 to keep the number reserved in tact. heap_.truncate(0); } // Provides access to the underlying vector. // Caution! any changes that modify the keys will invalidate the heap! - GenericVector* heap() { - return &heap_; - } + GenericVector* heap() { return &heap_; } // Provides read-only access to an element of the underlying vector. - const Pair& get(int index) const { - return heap_[index]; - } + const Pair& get(int index) const { return heap_[index]; } // Add entry to the heap, keeping the smallest item at the top, by operator<. // Note that *entry is used as the source of operator=, but it is non-const @@ -105,9 +93,7 @@ class GenericHeap { } // Get the value of the top (smallest, defined by operator< ) element. - const Pair& PeekTop() const { - return heap_[0]; - } + const Pair& PeekTop() const { return heap_[0]; } // Get the value of the worst (largest, defined by operator< ) element. const Pair& PeekWorst() const { return heap_[IndexOfWorst()]; } @@ -117,10 +103,8 @@ class GenericHeap { // Time = O(log n). bool Pop(Pair* entry) { int new_size = heap_.size() - 1; - if (new_size < 0) - return false; // Already empty. - if (entry != nullptr) - *entry = heap_[0]; + if (new_size < 0) return false; // Already empty. + if (entry != nullptr) *entry = heap_[0]; if (new_size > 0) { // Sift the hole at the start of the heap_ downwards to match the last // element. @@ -141,8 +125,7 @@ class GenericHeap { int worst_index = IndexOfWorst(); if (worst_index < 0) return false; // It cannot be empty! // Extract the worst element from the heap, leaving a hole at worst_index. - if (entry != nullptr) - *entry = heap_[worst_index]; + if (entry != nullptr) *entry = heap_[worst_index]; int heap_size = heap_.size() - 1; if (heap_size > 0) { // Sift the hole upwards to match the last element of the heap_ @@ -207,8 +190,7 @@ class GenericHeap { int heap_size = heap_.size(); int child; while ((child = LeftChild(hole_index)) < heap_size) { - if (child + 1 < heap_size && heap_[child + 1] < heap_[child]) - ++child; + if (child + 1 < heap_size && heap_[child + 1] < heap_[child]) ++child; if (heap_[child] < pair) { heap_[hole_index] = heap_[child]; hole_index = child; @@ -221,12 +203,8 @@ class GenericHeap { // Functions to navigate the tree. Unlike the original implementation, we // store the root at index 0. - int ParentNode(int index) const { - return (index + 1) / 2 - 1; - } - int LeftChild(int index) const { - return index * 2 + 1; - } + int ParentNode(int index) const { return (index + 1) / 2 - 1; } + int LeftChild(int index) const { return index * 2 + 1; } private: GenericVector heap_; diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h index c7c574386a..88bb764aad 100644 --- a/src/ccutil/genericvector.h +++ b/src/ccutil/genericvector.h @@ -25,12 +25,12 @@ #include #include -#include "tesscallback.h" #include "errcode.h" #include "helpers.h" #include "ndminx.h" #include "serialis.h" #include "strngs.h" +#include "tesscallback.h" // Use PointerVector below in preference to GenericVector, as that // provides automatic deletion of pointers, [De]Serialize that works, and @@ -38,9 +38,7 @@ template class GenericVector { public: - GenericVector() { - init(kDefaultVectorSize); - } + GenericVector() { init(kDefaultVectorSize); } GenericVector(int size, T init_val) { init(size); init_to_size(size, init_val); @@ -51,8 +49,8 @@ class GenericVector { this->init(other.size()); this->operator+=(other); } - GenericVector &operator+=(const GenericVector& other); - GenericVector &operator=(const GenericVector& other); + GenericVector& operator+=(const GenericVector& other); + GenericVector& operator=(const GenericVector& other); ~GenericVector(); @@ -70,9 +68,7 @@ class GenericVector { } // Return the size used. - int size() const { - return size_used_; - } + int size() const { return size_used_; } // Workaround to avoid g++ -Wsign-compare warnings. size_t unsigned_size() const { static_assert(sizeof(size_used_) <= sizeof(size_t), @@ -80,23 +76,17 @@ class GenericVector { assert(0 <= size_used_); return static_cast(size_used_); } - int size_reserved() const { - return size_reserved_; - } + int size_reserved() const { return size_reserved_; } - int length() const { - return size_used_; - } + int length() const { return size_used_; } // Return true if empty. - bool empty() const { - return size_used_ == 0; - } + bool empty() const { return size_used_ == 0; } // Return the object from an index. - T &get(int index) const; - T &back() const; - T &operator[](int index) const; + T& get(int index) const; + T& back() const; + T& operator[](int index) const; // Returns the last object and removes it. T pop_back(); @@ -136,8 +126,7 @@ class GenericVector { // Truncates the array to the given size by removing the end. // If the current size is less, the array is not expanded. void truncate(int size) { - if (size < size_used_) - size_used_ = size; + if (size < size_used_) size_used_ = size; } // Add a callback to be called to delete the elements when the array took @@ -146,7 +135,7 @@ class GenericVector { // Add a callback to be called to compare the elements when needed (contains, // get_id, ...) - void set_compare_callback(TessResultCallback2* cb); + void set_compare_callback(TessResultCallback2* cb); // Clear the array, calling the clear callback function if any. // All the owned callbacks are also deleted. @@ -169,7 +158,7 @@ class GenericVector { // fread (and swapping)/fwrite. // Returns false on error or if the callback returns false. // DEPRECATED. Use [De]Serialize[Classes] instead. - bool write(FILE* f, TessResultCallback2* cb) const; + bool write(FILE* f, TessResultCallback2* cb) const; bool read(tesseract::TFile* f, TessResultCallback2* cb); // Writes a vector of simple types to the given file. Assumes that bitwise @@ -206,8 +195,8 @@ class GenericVector { // the pointed to the new larger array. // This function uses memcpy to copy the data, instead of invoking // operator=() for each element like double_the_size() does. - static T *double_the_size_memcpy(int current_size, T *data) { - T *data_new = new T[current_size * 2]; + static T* double_the_size_memcpy(int current_size, T* data) { + T* data_new = new T[current_size * 2]; memcpy(data_new, data, sizeof(T) * current_size); delete[] data; return data_new; @@ -239,8 +228,7 @@ class GenericVector { // Use binary_search to get the index of target, or its nearest candidate. bool bool_binary_search(const T& target) const { int index = binary_search(target); - if (index >= size_used_) - return false; + if (index >= size_used_) return false; return data_[index] == target; } // Searches the array (assuming sorted in ascending order, using sort()) for @@ -265,15 +253,13 @@ class GenericVector { // Compact the vector by deleting elements using operator!= on basic types. // The vector must be sorted. void compact_sorted() { - if (size_used_ == 0) - return; + if (size_used_ == 0) return; // First element is in no matter what, hence the i = 1. int last_write = 0; for (int i = 1; i < size_used_; ++i) { // Finds next unique item and writes it. - if (data_[last_write] != data_[i]) - data_[++last_write] = data_[i]; + if (data_[last_write] != data_[i]) data_[++last_write] = data_[i]; } // last_write is the index of a valid data cell, so add 1. size_used_ = last_write + 1; @@ -285,8 +271,7 @@ class GenericVector { int new_size = 0; int old_index = 0; // Until the callback returns true, the elements stay the same. - while (old_index < size_used_ && !delete_cb->Run(old_index++)) - ++new_size; + while (old_index < size_used_ && !delete_cb->Run(old_index++)) ++new_size; // Now just copy anything else that gets false from delete_cb. for (; old_index < size_used_; ++old_index) { if (!delete_cb->Run(old_index)) { @@ -311,7 +296,7 @@ class GenericVector { int choose_nth_item(int target_index) { // Make sure target_index is legal. if (target_index < 0) - target_index = 0; // ensure legal + target_index = 0; // ensure legal else if (target_index >= size_used_) target_index = size_used_ - 1; unsigned int seed = 1; @@ -330,8 +315,7 @@ class GenericVector { // Only uses operator< bool WithinBounds(const T& rangemin, const T& rangemax) const { for (int i = 0; i < size_used_; ++i) { - if (data_[i] < rangemin || rangemax < data_[i]) - return false; + if (data_[i] < rangemin || rangemax < data_[i]) return false; } return true; } @@ -347,12 +331,12 @@ class GenericVector { // vector are small enough that for efficiency it makes sense // to start with a larger initial size. static const int kDefaultVectorSize = 4; - int32_t size_used_; - int32_t size_reserved_; - T* data_; + int32_t size_used_; + int32_t size_reserved_; + T* data_; TessCallback1* clear_cb_; // Mutable because Run method is not const - mutable TessResultCallback2* compare_cb_; + mutable TessResultCallback2* compare_cb_; }; namespace tesseract { @@ -412,7 +396,7 @@ inline bool LoadFileLinesToStrings(const STRING& filename, } template -bool cmp_eq(T const & t1, T const & t2) { +bool cmp_eq(T const& t1, T const& t2) { return t1 == t2; } @@ -422,8 +406,8 @@ bool cmp_eq(T const & t1, T const & t2) { // return > 0 if t1 > t2 template int sort_cmp(const void* t1, const void* t2) { - const T* a = static_cast (t1); - const T* b = static_cast (t2); + const T* a = static_cast(t1); + const T* b = static_cast(t2); if (*a < *b) { return -1; } else if (*b < *a) { @@ -453,11 +437,11 @@ int sort_ptr_cmp(const void* t1, const void* t2) { // Subclass for a vector of pointers. Use in preference to GenericVector // as it provides automatic deletion and correct serialization, with the // corollary that all copy operations are deep copies of the pointed-to objects. -template +template class PointerVector : public GenericVector { public: - PointerVector() : GenericVector() { } - explicit PointerVector(int size) : GenericVector(size) { } + PointerVector() : GenericVector() {} + explicit PointerVector(int size) : GenericVector(size) {} ~PointerVector() { // Clear must be called here, even though it is called again by the base, // as the base will call the wrong clear. @@ -672,12 +656,10 @@ GenericVector::~GenericVector() { // copied. template void GenericVector::reserve(int size) { - if (size_reserved_ >= size || size <= 0) - return; + if (size_reserved_ >= size || size <= 0) return; if (size < kDefaultVectorSize) size = kDefaultVectorSize; T* new_array = new T[size]; - for (int i = 0; i < size_used_; ++i) - new_array[i] = data_[i]; + for (int i = 0; i < size_used_; ++i) new_array[i] = data_[i]; delete[] data_; data_ = new_array; size_reserved_ = size; @@ -687,8 +669,7 @@ template void GenericVector::double_the_size() { if (size_reserved_ == 0) { reserve(kDefaultVectorSize); - } - else { + } else { reserve(2 * size_reserved_); } } @@ -698,26 +679,24 @@ template void GenericVector::init_to_size(int size, T t) { reserve(size); size_used_ = size; - for (int i = 0; i < size; ++i) - data_[i] = t; + for (int i = 0; i < size; ++i) data_[i] = t; } - // Return the object from an index. template -T &GenericVector::get(int index) const { +T& GenericVector::get(int index) const { ASSERT_HOST(index >= 0 && index < size_used_); return data_[index]; } template -T &GenericVector::operator[](int index) const { +T& GenericVector::operator[](int index) const { assert(index >= 0 && index < size_used_); return data_[index]; } template -T &GenericVector::back() const { +T& GenericVector::back() const { ASSERT_HOST(size_used_ > 0); return data_[size_used_ - 1]; } @@ -741,10 +720,9 @@ void GenericVector::set(T t, int index) { template void GenericVector::insert(T t, int index) { ASSERT_HOST(index >= 0 && index <= size_used_); - if (size_reserved_ == size_used_) - double_the_size(); + if (size_reserved_ == size_used_) double_the_size(); for (int i = size_used_; i > index; --i) { - data_[i] = data_[i-1]; + data_[i] = data_[i - 1]; } data_[index] = t; size_used_++; @@ -756,7 +734,7 @@ template void GenericVector::remove(int index) { ASSERT_HOST(index >= 0 && index < size_used_); for (int i = index; i < size_used_ - 1; ++i) { - data_[i] = data_[i+1]; + data_[i] = data_[i + 1]; } size_used_--; } @@ -772,8 +750,7 @@ template int GenericVector::get_index(T object) const { for (int i = 0; i < size_used_; ++i) { ASSERT_HOST(compare_cb_ != nullptr); - if (compare_cb_->Run(object, data_[i])) - return i; + if (compare_cb_->Run(object, data_[i])) return i; } return -1; } @@ -788,8 +765,7 @@ bool GenericVector::contains(T object) const { template int GenericVector::push_back(T object) { int index = 0; - if (size_used_ == size_reserved_) - double_the_size(); + if (size_used_ == size_reserved_) double_the_size(); index = size_used_++; data_[index] = object; return index; @@ -798,18 +774,15 @@ int GenericVector::push_back(T object) { template int GenericVector::push_back_new(T object) { int index = get_index(object); - if (index >= 0) - return index; + if (index >= 0) return index; return push_back(object); } // Add an element in the array (front) template int GenericVector::push_front(T object) { - if (size_used_ == size_reserved_) - double_the_size(); - for (int i = size_used_; i > 0; --i) - data_[i] = data_[i-1]; + if (size_used_ == size_reserved_) double_the_size(); + for (int i = size_used_; i > 0; --i) data_[i] = data_[i - 1]; data_[0] = object; ++size_used_; return 0; @@ -821,7 +794,7 @@ void GenericVector::operator+=(T t) { } template -GenericVector &GenericVector::operator+=(const GenericVector& other) { +GenericVector& GenericVector::operator+=(const GenericVector& other) { this->reserve(size_used_ + other.size_used_); for (int i = 0; i < other.size(); ++i) { this->operator+=(other.data_[i]); @@ -830,7 +803,7 @@ GenericVector &GenericVector::operator+=(const GenericVector& other) { } template -GenericVector &GenericVector::operator=(const GenericVector& other) { +GenericVector& GenericVector::operator=(const GenericVector& other) { if (&other != this) { this->truncate(0); this->operator+=(other); @@ -849,7 +822,7 @@ void GenericVector::set_clear_callback(TessCallback1* cb) { // their ownership. template void GenericVector::set_compare_callback( - TessResultCallback2* cb) { + TessResultCallback2* cb) { compare_cb_ = cb; } @@ -858,8 +831,7 @@ template void GenericVector::clear() { if (size_reserved_ > 0) { if (clear_cb_ != nullptr) - for (int i = 0; i < size_used_; ++i) - clear_cb_->Run(data_[i]); + for (int i = 0; i < size_used_; ++i) clear_cb_->Run(data_[i]); delete[] data_; data_ = nullptr; size_used_ = 0; @@ -878,10 +850,9 @@ void GenericVector::delete_data_pointers() { } } - template bool GenericVector::write( - FILE* f, TessResultCallback2* cb) const { + FILE* f, TessResultCallback2* cb) const { if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false; if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false; if (cb != nullptr) { @@ -950,8 +921,7 @@ bool GenericVector::DeSerialize(bool swap, FILE* fp) { size_used_ = reserved; if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false; if (swap) { - for (int i = 0; i < size_used_; ++i) - ReverseN(&data_[i], sizeof(data_[i])); + for (int i = 0; i < size_used_; ++i) ReverseN(&data_[i], sizeof(data_[i])); } return true; } @@ -1069,8 +1039,7 @@ int GenericVector::choose_nth_item(int target_index, int start, int end, // Number of elements to process. int num_elements = end - start; // Trivial cases. - if (num_elements <= 1) - return start; + if (num_elements <= 1) return start; if (num_elements == 2) { if (data_[start] < data_[start + 1]) { return target_index > start ? start + 1 : start; @@ -1078,11 +1047,11 @@ int GenericVector::choose_nth_item(int target_index, int start, int end, return target_index > start ? start : start + 1; } } - // Place the pivot at start. - #ifndef rand_r // _MSC_VER, ANDROID +// Place the pivot at start. +#ifndef rand_r // _MSC_VER, ANDROID srand(*seed); - #define rand_r(seed) rand() - #endif // _MSC_VER +#define rand_r(seed) rand() +#endif // _MSC_VER int pivot = rand_r(seed) % num_elements + start; swap(pivot, start); // The invariant condition here is that items [start, next_lesser) are less @@ -1105,10 +1074,9 @@ int GenericVector::choose_nth_item(int target_index, int start, int end, if (target_index < next_lesser) return choose_nth_item(target_index, start, next_lesser, seed); else if (target_index < prev_greater) - return next_lesser; // In equal bracket. + return next_lesser; // In equal bracket. else return choose_nth_item(target_index, prev_greater, end, seed); } - #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_ diff --git a/src/ccutil/globaloc.cpp b/src/ccutil/globaloc.cpp index 62e92db484..bc4023cf8f 100644 --- a/src/ccutil/globaloc.cpp +++ b/src/ccutil/globaloc.cpp @@ -17,14 +17,14 @@ * **********************************************************************/ -#include +#include #ifdef __linux__ -#include // For SYS_gettid. -#include // For syscall itself. +#include // For SYS_gettid. +#include // For syscall itself. #endif -#include "allheaders.h" -#include "errcode.h" -#include "tprintf.h" +#include "allheaders.h" +#include "errcode.h" +#include "tprintf.h" // Size of thread-id array of pixes to keep in case of crash. const int kMaxNumThreadPixes = 32768; @@ -71,24 +71,16 @@ void signal_exit(int signal_code) { #endif } -void err_exit() { - ASSERT_HOST("Fatal error encountered!" == nullptr); -} - +void err_exit() { ASSERT_HOST("Fatal error encountered!" == nullptr); } void set_global_loc_code(int loc_code) { // global_loc_code = loc_code; - } - void set_global_subloc_code(int loc_code) { // global_subloc_code = loc_code; - } - void set_global_subsubloc_code(int loc_code) { // global_subsubloc_code = loc_code; - } diff --git a/src/ccutil/globaloc.h b/src/ccutil/globaloc.h index 1118fbc94b..e4e07f41e1 100644 --- a/src/ccutil/globaloc.h +++ b/src/ccutil/globaloc.h @@ -17,10 +17,10 @@ * **********************************************************************/ -#ifndef GLOBALOC_H -#define GLOBALOC_H +#ifndef GLOBALOC_H +#define GLOBALOC_H -#include "host.h" +#include "host.h" // Saves a clone of the given pix, and notes its resolution in thread-specific // data, so that the image can be written prior to a crash. diff --git a/src/ccutil/helpers.h b/src/ccutil/helpers.h index f27e29e966..dcf568e58a 100644 --- a/src/ccutil/helpers.h +++ b/src/ccutil/helpers.h @@ -43,9 +43,7 @@ class TRand { public: TRand() : seed_(1) {} // Sets the seed to the given value. - void set_seed(uint64_t seed) { - seed_ = seed; - } + void set_seed(uint64_t seed) { seed_ = seed; } // Sets the seed using a hash of a string. void set_seed(const std::string& str) { std::hash hasher; @@ -62,9 +60,7 @@ class TRand { return range * 2.0 * IntRand() / INT32_MAX - range; } // Returns a floating point value in the range [0, range]. - double UnsignedRand(double range) { - return range * IntRand() / INT32_MAX; - } + double UnsignedRand(double range) { return range * IntRand() / INT32_MAX; } private: // Steps the generator to the next value. @@ -80,7 +76,7 @@ class TRand { } // namespace tesseract // Remove newline (if any) at the end of the string. -inline void chomp_string(char *str) { +inline void chomp_string(char* str) { int last_index = static_cast(strlen(str)) - 1; while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) { @@ -89,21 +85,22 @@ inline void chomp_string(char *str) { } // Advance the current pointer of the file if it points to a newline character. -inline void SkipNewline(FILE *file) { +inline void SkipNewline(FILE* file) { if (fgetc(file) != '\n') fseek(file, -1, SEEK_CUR); } // Swaps the two args pointed to by the pointers. // Operator= and copy constructor must work on T. -template inline void Swap(T* p1, T* p2) { +template +inline void Swap(T* p1, T* p2) { T tmp(*p2); *p2 = *p1; *p1 = tmp; } // qsort function to sort 2 floats. -inline int sort_floats(const void *arg1, const void *arg2) { - float diff = *((float *) arg1) - *((float *) arg2); +inline int sort_floats(const void* arg1, const void* arg2) { + float diff = *((float*)arg1) - *((float*)arg2); if (diff > 0) { return 1; } else if (diff < 0) { @@ -119,52 +116,42 @@ inline int RoundUp(int n, int block_size) { } // Clip a numeric value to the interval [lower_bound, upper_bound]. -template +template inline T ClipToRange(const T& x, const T& lower_bound, const T& upper_bound) { - if (x < lower_bound) - return lower_bound; - if (x > upper_bound) - return upper_bound; + if (x < lower_bound) return lower_bound; + if (x > upper_bound) return upper_bound; return x; } // Extend the range [lower_bound, upper_bound] to include x. -template +template inline void UpdateRange(const T1& x, T2* lower_bound, T2* upper_bound) { - if (x < *lower_bound) - *lower_bound = x; - if (x > *upper_bound) - *upper_bound = x; + if (x < *lower_bound) *lower_bound = x; + if (x > *upper_bound) *upper_bound = x; } // Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi. -template -inline void UpdateRange(const T1& x_lo, const T1& x_hi, - T2* lower_bound, T2* upper_bound) { - if (x_lo < *lower_bound) - *lower_bound = x_lo; - if (x_hi > *upper_bound) - *upper_bound = x_hi; +template +inline void UpdateRange(const T1& x_lo, const T1& x_hi, T2* lower_bound, + T2* upper_bound) { + if (x_lo < *lower_bound) *lower_bound = x_lo; + if (x_hi > *upper_bound) *upper_bound = x_hi; } // Intersect the range [*lower2, *upper2] with the range [lower1, upper1], // putting the result back in [*lower2, *upper2]. // If non-intersecting ranges are given, we end up with *lower2 > *upper2. -template -inline void IntersectRange(const T& lower1, const T& upper1, - T* lower2, T* upper2) { - if (lower1 > *lower2) - *lower2 = lower1; - if (upper1 < *upper2) - *upper2 = upper1; +template +inline void IntersectRange(const T& lower1, const T& upper1, T* lower2, + T* upper2) { + if (lower1 > *lower2) *lower2 = lower1; + if (upper1 < *upper2) *upper2 = upper1; } // Proper modulo arithmetic operator. Returns a mod b that works for -ve a. // For any integer a and positive b, returns r : 0<=r bool NearlyEqual(T x, T y, T tolerance) { +template +bool NearlyEqual(T x, T y, T tolerance) { T diff = x - y; return diff <= tolerance && -diff <= tolerance; } diff --git a/src/ccutil/indexmapbidi.cpp b/src/ccutil/indexmapbidi.cpp index fe9e083e55..339a65396e 100644 --- a/src/ccutil/indexmapbidi.cpp +++ b/src/ccutil/indexmapbidi.cpp @@ -52,14 +52,12 @@ bool IndexMap::Serialize(FILE* fp) const { bool IndexMap::DeSerialize(bool swap, FILE* fp) { int32_t sparse_size; if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false; - if (swap) - ReverseN(&sparse_size, sizeof(sparse_size)); + if (swap) ReverseN(&sparse_size, sizeof(sparse_size)); sparse_size_ = sparse_size; if (!compact_map_.DeSerialize(swap, fp)) return false; return true; } - // Top-level init function in a single call to initialize a map to select // a single contiguous subrange [start, end) of the sparse space to be mapped // 1 to 1 to the compact space, with all other elements of the sparse space @@ -67,8 +65,7 @@ bool IndexMap::DeSerialize(bool swap, FILE* fp) { // No need to call Setup after this. void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) { Init(sparse_size, false); - for (int i = start; i < end; ++i) - SetMap(i, true); + for (int i = start; i < end; ++i) SetMap(i, true); Setup(); } @@ -79,8 +76,7 @@ void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) { void IndexMapBiDi::Init(int size, bool all_mapped) { sparse_map_.init_to_size(size, -1); if (all_mapped) { - for (int i = 0; i < size; ++i) - sparse_map_[i] = i; + for (int i = 0; i < size; ++i) sparse_map_[i] = i; } } @@ -156,15 +152,13 @@ void IndexMapBiDi::CompleteMerges() { for (int i = 0; i < sparse_map_.size(); ++i) { int compact_index = MasterCompactIndex(sparse_map_[i]); sparse_map_[i] = compact_index; - if (compact_index >= compact_size) - compact_size = compact_index + 1; + if (compact_index >= compact_size) compact_size = compact_index + 1; } // Re-generate the compact_map leaving holes for unused indices. compact_map_.init_to_size(compact_size, -1); for (int i = 0; i < sparse_map_.size(); ++i) { if (sparse_map_[i] >= 0) { - if (compact_map_[sparse_map_[i]] == -1) - compact_map_[sparse_map_[i]] = i; + if (compact_map_[sparse_map_[i]] == -1) compact_map_[sparse_map_[i]] = i; } } // Compact the compact_map, leaving tmp_compact_map saying where each diff --git a/src/ccutil/indexmapbidi.h b/src/ccutil/indexmapbidi.h index 398f3d1611..1fe20964ac 100644 --- a/src/ccutil/indexmapbidi.h +++ b/src/ccutil/indexmapbidi.h @@ -54,13 +54,9 @@ class IndexMap { return compact_map_[compact_index]; } // The size of the sparse space. - virtual int SparseSize() const { - return sparse_size_; - } + virtual int SparseSize() const { return sparse_size_; } // The size of the compact space. - int CompactSize() const { - return compact_map_.size(); - } + int CompactSize() const { return compact_map_.size(); } // Copy from the input. void CopyFrom(const IndexMap& src); @@ -139,9 +135,7 @@ class IndexMapBiDi : public IndexMap { return sparse_map_[sparse_index]; } // The size of the sparse space. - virtual int SparseSize() const { - return sparse_map_.size(); - } + virtual int SparseSize() const { return sparse_map_.size(); } // Copy from the input. void CopyFrom(const IndexMapBiDi& src); diff --git a/src/ccutil/kdpair.h b/src/ccutil/kdpair.h index 8a3052b7c0..40b56acab5 100644 --- a/src/ccutil/kdpair.h +++ b/src/ccutil/kdpair.h @@ -113,15 +113,9 @@ class KDPtrPair { } // Accessors. - const Key& key() const { - return key_; - } - void set_key(const Key& new_key) { - key_ = new_key; - } - const Data* data() const { - return data_; - } + const Key& key() const { return key_; } + void set_key(const Key& new_key) { key_ = new_key; } + const Data* data() const { return data_; } // Sets the data pointer, taking ownership of the data. void set_data(Data* new_data) { delete data_; @@ -148,9 +142,7 @@ struct KDPtrPairInc : public KDPtrPair { KDPtrPairInc() : KDPtrPair() {} KDPtrPairInc(Key k, Data* d) : KDPtrPair(k, d) {} KDPtrPairInc(KDPtrPairInc& src) : KDPtrPair(src) {} - void operator=(KDPtrPairInc& src) { - KDPtrPair::operator=(src); - } + void operator=(KDPtrPairInc& src) { KDPtrPair::operator=(src); } // Operator< facilitates sorting in increasing order. int operator<(const KDPtrPairInc& other) const { return this->key() < other.key(); @@ -165,9 +157,7 @@ struct KDPtrPairDec : public KDPtrPair { KDPtrPairDec() : KDPtrPair() {} KDPtrPairDec(Key k, Data* d) : KDPtrPair(k, d) {} KDPtrPairDec(KDPtrPairDec& src) : KDPtrPair(src) {} - void operator=(KDPtrPairDec& src) { - KDPtrPair::operator=(src); - } + void operator=(KDPtrPairDec& src) { KDPtrPair::operator=(src); } // Operator< facilitates sorting in decreasing order by using operator> on // the key values. int operator<(const KDPtrPairDec& other) const { diff --git a/src/ccutil/lsterr.h b/src/ccutil/lsterr.h index c585c3a41e..feb7e4a137 100644 --- a/src/ccutil/lsterr.h +++ b/src/ccutil/lsterr.h @@ -20,10 +20,9 @@ #ifndef TESSERACT_CCUTIL_LSTERR_H_ #define TESSERACT_CCUTIL_LSTERR_H_ -#include "errcode.h" //must be last include +#include "errcode.h" //must be last include -const ERRCODE DONT_CONSTRUCT_LIST_BY_COPY = -"Can't create a list by assignment"; +const ERRCODE DONT_CONSTRUCT_LIST_BY_COPY = "Can't create a list by assignment"; const ERRCODE DONT_ASSIGN_LISTS = "Can't assign to lists"; const ERRCODE SERIALISE_LINKS = "Attempted to (de)serialise a link element"; diff --git a/src/ccutil/mainblk.cpp b/src/ccutil/mainblk.cpp index 959603be45..1f7981e047 100644 --- a/src/ccutil/mainblk.cpp +++ b/src/ccutil/mainblk.cpp @@ -17,21 +17,21 @@ * **********************************************************************/ -#include "fileerr.h" +#include "fileerr.h" #ifdef __UNIX__ -#include -#include +#include +#include #else -#include +#include #endif -#include -#include "ccutil.h" +#include +#include "ccutil.h" -#define VARDIR "configs/" /**< variables files */ +#define VARDIR "configs/" /**< variables files */ #define EXTERN const ERRCODE NO_PATH = -"Warning:explicit path for executable will not be used for configs"; + "Warning:explicit path for executable will not be used for configs"; static const ERRCODE USAGE = "Usage"; namespace tesseract { @@ -46,14 +46,14 @@ namespace tesseract { * * @param argv0 - paths to the directory with language files and config files. * An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is - * used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If previous - * is not successful - use current directory. + * used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If + * previous is not successful - use current directory. * @param basename - name of image */ -void CCUtil::main_setup(const char *argv0, const char *basename) { - imagebasename = basename; /**< name of image */ +void CCUtil::main_setup(const char* argv0, const char* basename) { + imagebasename = basename; /**< name of image */ - char *tessdata_prefix = getenv("TESSDATA_PREFIX"); + char* tessdata_prefix = getenv("TESSDATA_PREFIX"); if (argv0 != nullptr && *argv0 != '\0') { /* Use tessdata prefix from the command line. */ @@ -69,8 +69,8 @@ void CCUtil::main_setup(const char *argv0, const char *basename) { char path[_MAX_PATH]; DWORD length = GetModuleFileName(nullptr, path, sizeof(path)); if (length > 0 && length < sizeof(path)) { - errno_t result = _splitpath_s(path, drive, sizeof(drive), - dir, sizeof(dir), nullptr, 0, nullptr, 0); + errno_t result = _splitpath_s(path, drive, sizeof(drive), dir, + sizeof(dir), nullptr, 0, nullptr, 0); if (result == ERANGE) { tprintf("Error: Path too long: %s\n", path); } @@ -97,7 +97,7 @@ void CCUtil::main_setup(const char *argv0, const char *basename) { } // check for missing directory separator - const char *lastchar = datadir.string(); + const char* lastchar = datadir.string(); lastchar += datadir.length() - 1; if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) datadir += "/"; diff --git a/src/ccutil/memry.cpp b/src/ccutil/memry.cpp index 1cab424aa7..227df78163 100644 --- a/src/ccutil/memry.cpp +++ b/src/ccutil/memry.cpp @@ -17,8 +17,8 @@ * **********************************************************************/ -#include "memry.h" -#include +#include "memry.h" +#include // With improvements in OS memory allocators, internal memory management // is no longer required, so all these functions now map to their malloc @@ -27,19 +27,13 @@ // TODO(rays) further cleanup by redirecting calls to new and creating proper // constructors. -char *alloc_string(int32_t count) { +char* alloc_string(int32_t count) { // Round up the amount allocated to a multiple of 4 return static_cast(malloc((count + 3) & ~3)); } -void free_string(char *string) { - free(string); -} +void free_string(char* string) { free(string); } -void *alloc_mem(int32_t count) { - return malloc(static_cast(count)); -} +void* alloc_mem(int32_t count) { return malloc(static_cast(count)); } -void free_mem(void *oldchunk) { - free(oldchunk); -} +void free_mem(void* oldchunk) { free(oldchunk); } diff --git a/src/ccutil/memry.h b/src/ccutil/memry.h index d917c8757c..64f640c974 100644 --- a/src/ccutil/memry.h +++ b/src/ccutil/memry.h @@ -17,19 +17,19 @@ * **********************************************************************/ -#ifndef MEMRY_H -#define MEMRY_H +#ifndef MEMRY_H +#define MEMRY_H -#include -#include "host.h" +#include +#include "host.h" // allocate string -extern char *alloc_string(int32_t count); +extern char* alloc_string(int32_t count); // free a string. -extern void free_string(char *string); +extern void free_string(char* string); // get some memory -extern void *alloc_mem(int32_t count); +extern void* alloc_mem(int32_t count); // free mem from alloc_mem -extern void free_mem(void *oldchunk); +extern void free_mem(void* oldchunk); #endif diff --git a/src/ccutil/ndminx.h b/src/ccutil/ndminx.h index 9eee76c028..844e9fa55e 100644 --- a/src/ccutil/ndminx.h +++ b/src/ccutil/ndminx.h @@ -17,15 +17,15 @@ * **********************************************************************/ -#ifndef NDMINX_H -#define NDMINX_H +#ifndef NDMINX_H +#define NDMINX_H #ifndef MAX -#define MAX(x,y) (((x) >= (y))?(x):(y)) +#define MAX(x, y) (((x) >= (y)) ? (x) : (y)) #endif #ifndef MIN -#define MIN(x,y) (((x) <= (y))?(x):(y)) +#define MIN(x, y) (((x) <= (y)) ? (x) : (y)) #endif #endif diff --git a/src/ccutil/nwmain.h b/src/ccutil/nwmain.h index 297da80802..8e2f4fc1fb 100644 --- a/src/ccutil/nwmain.h +++ b/src/ccutil/nwmain.h @@ -20,153 +20,144 @@ #ifndef RUNMAIN_H #define RUNMAIN_H -#include "host.h" -#include "params.h" +#include "host.h" +#include "params.h" -#define DECLARE_MAIN(ARGC,ARGV)\ -STRING_VAR(init_config_file,"config","Config file to read on startup");\ -REALLY_DECLARE_MAIN(ARGC,ARGV) +#define DECLARE_MAIN(ARGC, ARGV) \ + STRING_VAR(init_config_file, "config", "Config file to read on startup"); \ + REALLY_DECLARE_MAIN(ARGC, ARGV) -#define DECLARE_MAIN_CONFIG(ARGC,ARGV,NAME)\ -STRING_VAR(init_config_file,NAME,"Config file to read on startup");\ -REALLY_DECLARE_MAIN(ARGC,ARGV) +#define DECLARE_MAIN_CONFIG(ARGC, ARGV, NAME) \ + STRING_VAR(init_config_file, NAME, "Config file to read on startup"); \ + REALLY_DECLARE_MAIN(ARGC, ARGV) #ifndef __UNIX__ -#define REALLY_DECLARE_MAIN(ARGC,ARGV)\ -\ -/**********************************************************************\ -* parse_args\ -*\ -* Turn a list of args into a new list of args with each separate\ -* whitespace spaced string being an arg.\ -**********************************************************************/\ -\ -int32_t parse_args( /*refine arg list*/\ -int32_t argc, /*no of input args*/\ -char *argv[], /*input args*/\ -char *arglist[] /*output args*/\ -)\ -{\ - int32_t argcount; /*converted argc*/\ - char *testchar; /*char in option string*/\ - int32_t arg; /*current argument*/\ -\ - argcount=0; /*no of options*/\ - for (arg=0;argm_pszExeName);\ - argsin[1]=strdup(theapp->m_lpCmdLine);\ -/*allocate memory for the args. There can never be more than half*/\ -/*the total number of characters in the arguments.*/\ - argv=(char**)malloc(((strlen(argsin[0])+strlen(argsin[1]))/2+1)*sizeof(char*));\ -\ -/*now construct argv as it should be for C.*/\ - argc=parse_args(2,argsin,argv);\ -\ -/*call main(argc,argv) here*/\ - exit_code=real_main(argc,(const char **)argv);\ -\ -\ -/*now get rid of the main app window*/\ - if (theapp!=nullptr && theapp->m_pMainWnd!=nullptr)\ - PostMessage(theapp->m_pMainWnd->m_hWnd,WM_QUIT,0,0);\ - free(argsin[0]);\ - free(argsin[1]);\ - free(argv);\ - global_exit_code=exit_code;\ - return exit_code;\ -}\ -\ -int32_t real_main(int32_t ARGC,const char* ARGV[])\ +#define REALLY_DECLARE_MAIN(ARGC, ARGV) \ + \ + /********************************************************************** \ + * parse_args \ + * \ + * Turn a list of args into a new list of args with each separate \ + * whitespace spaced string being an arg. \ + **********************************************************************/ \ + \ + int32_t parse_args( /*refine arg list*/ \ + int32_t argc, /*no of input args*/ \ + char* argv[], /*input args*/ \ + char* arglist[] /*output args*/ \ + ) { \ + int32_t argcount; /*converted argc*/ \ + char* testchar; /*char in option string*/ \ + int32_t arg; /*current argument*/ \ + \ + argcount = 0; /*no of options*/ \ + for (arg = 0; arg < argc; arg++) { \ + testchar = argv[arg]; /*start of arg*/ \ + do { \ + while (*testchar && (*testchar == ' ' || *testchar == '"' || \ + *testchar == '\n' || *testchar == '\t')) \ + testchar++; /*skip white space*/ \ + if (*testchar) { \ + arglist[argcount++] = testchar; /*new arg*/ \ + do { \ + for (testchar++; \ + *testchar && *testchar != ' ' && *testchar != '"' && \ + *testchar != '\n' && *testchar != '\t'; \ + testchar++) \ + ; /*skip to white space*/ \ + } while (*testchar == '"' && testchar[1] != ' ' && \ + testchar[1] != '\0' && testchar[1] != '\n' && \ + testchar[1] != '\t'); \ + if (*testchar) *testchar++ = '\0'; /*turn to separate args*/ \ + } \ + } while (*testchar); \ + } \ + return argcount; /*new number of args*/ \ + } \ + \ + int32_t global_exit_code; \ + int32_t real_main(int32_t, const char**); \ + \ + int32_t run_main( /*the main thread*/ \ + CWinApp* theapp /*arguments*/ \ + ) { \ + char** argv; \ + char* argsin[2]; \ + int32_t argc; \ + int32_t exit_code; \ + \ + argsin[0] = strdup(theapp->m_pszExeName); \ + argsin[1] = strdup(theapp->m_lpCmdLine); \ + /*allocate memory for the args. There can never be more than half*/ \ + /*the total number of characters in the arguments.*/ \ + argv = (char**)malloc(((strlen(argsin[0]) + strlen(argsin[1])) / 2 + 1) * \ + sizeof(char*)); \ + \ + /*now construct argv as it should be for C.*/ \ + argc = parse_args(2, argsin, argv); \ + \ + /*call main(argc,argv) here*/ \ + exit_code = real_main(argc, (const char**)argv); \ + \ + /*now get rid of the main app window*/ \ + if (theapp != nullptr && theapp->m_pMainWnd != nullptr) \ + PostMessage(theapp->m_pMainWnd->m_hWnd, WM_QUIT, 0, 0); \ + free(argsin[0]); \ + free(argsin[1]); \ + free(argv); \ + global_exit_code = exit_code; \ + return exit_code; \ + } \ + \ + int32_t real_main(int32_t ARGC, const char* ARGV[]) #else -#define REALLY_DECLARE_MAIN(ARGC,ARGV)\ -\ -/**********************************************************************\ -* parse_args\ -*\ -* Turn a list of args into a new list of args with each separate\ -* whitespace spaced string being an arg.\ -**********************************************************************/\ -\ -int32_t parse_args( /*refine arg list*/\ -int32_t argc, /*no of input args*/\ -char *argv[], /*input args*/\ -char *arglist[] /*output args*/\ -)\ -{\ - int32_t argcount; /*converted argc*/\ - char *testchar; /*char in option string*/\ - int32_t arg; /*current argument*/\ -\ - argcount=0; /*no of options*/\ - for (arg=0;arg +template class ObjectCache { public: ObjectCache() = default; @@ -39,10 +39,10 @@ class ObjectCache { mu_.Lock(); for (int i = 0; i < cache_.size(); i++) { if (cache_[i].count > 0) { - tprintf("ObjectCache(%p)::~ObjectCache(): WARNING! LEAK! object %p " - "still has count %d (id %s)\n", - this, cache_[i].object, cache_[i].count, - cache_[i].id.string()); + tprintf( + "ObjectCache(%p)::~ObjectCache(): WARNING! LEAK! object %p " + "still has count %d (id %s)\n", + this, cache_[i].object, cache_[i].count, cache_[i].id.string()); } else { delete cache_[i].object; cache_[i].object = nullptr; @@ -57,9 +57,8 @@ class ObjectCache { // and return nullptr -- further attempts to load will fail (even // with a different loader) until DeleteUnusedObjects() is called. // We delete the given loader. - T *Get(STRING id, - TessResultCallback *loader) { - T *retval = nullptr; + T* Get(STRING id, TessResultCallback* loader) { + T* retval = nullptr; mu_.Lock(); for (int i = 0; i < cache_.size(); i++) { if (id == cache_[i].id) { @@ -73,7 +72,7 @@ class ObjectCache { } } cache_.push_back(ReferenceCount()); - ReferenceCount &rc = cache_.back(); + ReferenceCount& rc = cache_.back(); rc.id = id; retval = rc.object = loader->Run(); rc.count = (retval != nullptr) ? 1 : 0; @@ -83,7 +82,7 @@ class ObjectCache { // Decrement the count for t. // Return whether we knew about the given pointer. - bool Free(T *t) { + bool Free(T* t) { if (t == nullptr) return false; mu_.Lock(); for (int i = 0; i < cache_.size(); i++) { @@ -111,7 +110,7 @@ class ObjectCache { private: struct ReferenceCount { STRING id; // A unique ID to identify the object (think path on disk) - T *object; // A copy of the object in memory. Can be delete'd. + T* object; // A copy of the object in memory. Can be delete'd. int count; // A count of the number of active users of this object. }; @@ -121,5 +120,4 @@ class ObjectCache { } // namespace tesseract - #endif // TESSERACT_CCUTIL_OBJECT_CACHE_H_ diff --git a/src/ccutil/ocrclass.h b/src/ccutil/ocrclass.h index 790ae525d5..93c6908f65 100644 --- a/src/ccutil/ocrclass.h +++ b/src/ccutil/ocrclass.h @@ -24,28 +24,28 @@ * structure alignment up to 8. **********************************************************************/ -#ifndef CCUTIL_OCRCLASS_H_ -#define CCUTIL_OCRCLASS_H_ +#ifndef CCUTIL_OCRCLASS_H_ +#define CCUTIL_OCRCLASS_H_ #ifndef __GNUC__ #ifdef _WIN32 -#include "gettimeofday.h" +#include "gettimeofday.h" #endif #else -#include +#include #endif -#include -#include "host.h" +#include +#include "host.h" /*Maximum lengths of various strings*/ -#define MAX_FONT_NAME 34 /*name of font */ -#define MAX_OCR_NAME 32 /*name of engine */ -#define MAX_OCR_VERSION 17 /*version code of engine */ +#define MAX_FONT_NAME 34 /*name of font */ +#define MAX_OCR_NAME 32 /*name of engine */ +#define MAX_OCR_VERSION 17 /*version code of engine */ /*pitch set definitions are identical to RTF*/ -#define PITCH_DEF 0 /*default */ -#define PITCH_FIXED 1 /*fixed pitch */ -#define PITCH_VAR 2 /*variable pitch */ +#define PITCH_DEF 0 /*default */ +#define PITCH_FIXED 1 /*fixed pitch */ +#define PITCH_VAR 2 /*variable pitch */ /********************************************************************** * EANYCODE_CHAR @@ -71,26 +71,26 @@ * version. **********************************************************************/ -typedef struct { /*single character */ -// It should be noted that the format for char_code for version 2.0 and beyond -// is UTF8 which means that ASCII characters will come out as one structure but -// other characters will be returned in two or more instances of this structure -// with a single byte of the UTF8 code in each, but each will have the same -// bounding box. Programs which want to handle languagues with different -// characters sets will need to handle extended characters appropriately, but -// *all* code needs to be prepared to receive UTF8 coded characters for -// characters such as bullet and fancy quotes. - uint16_t char_code; /*character itself */ - int16_t left; /*of char (-1) */ - int16_t right; /*of char (-1) */ - int16_t top; /*of char (-1) */ - int16_t bottom; /*of char (-1) */ - int16_t font_index; /*what font (0) */ - uint8_t confidence; /*0=perfect, 100=reject (0/100) */ - uint8_t point_size; /*of char, 72=i inch, (10) */ - int8_t blanks; /*no of spaces before this char (1) */ - uint8_t formatting; /*char formatting (0) */ -} EANYCODE_CHAR; /*single character */ +typedef struct { /*single character */ + // It should be noted that the format for char_code for version 2.0 and beyond + // is UTF8 which means that ASCII characters will come out as one structure + // but other characters will be returned in two or more instances of this + // structure with a single byte of the UTF8 code in each, but each will have + // the same bounding box. Programs which want to handle languagues with + // different characters sets will need to handle extended characters + // appropriately, but *all* code needs to be prepared to receive UTF8 coded + // characters for characters such as bullet and fancy quotes. + uint16_t char_code; /*character itself */ + int16_t left; /*of char (-1) */ + int16_t right; /*of char (-1) */ + int16_t top; /*of char (-1) */ + int16_t bottom; /*of char (-1) */ + int16_t font_index; /*what font (0) */ + uint8_t confidence; /*0=perfect, 100=reject (0/100) */ + uint8_t point_size; /*of char, 72=i inch, (10) */ + int8_t blanks; /*no of spaces before this char (1) */ + uint8_t formatting; /*char formatting (0) */ +} EANYCODE_CHAR; /*single character */ /********************************************************************** * ETEXT_DESC @@ -112,16 +112,16 @@ typedef bool (*CANCEL_FUNC)(void* cancel_this, int words); typedef bool (*PROGRESS_FUNC)(int progress, int left, int right, int top, int bottom); -class ETEXT_DESC { // output header +class ETEXT_DESC { // output header public: int16_t count; /// chars in this buffer(0) int16_t progress; /// percent complete increasing (0-100) /** Progress monitor covers word recognition and it does not cover layout - * analysis. - * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */ - int8_t more_to_come; /// true if not last - volatile int8_t ocr_alive; /// ocr sets to 1, HP 0 - int8_t err_code; /// for errcode use + * analysis. + * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */ + int8_t more_to_come; /// true if not last + volatile int8_t ocr_alive; /// ocr sets to 1, HP 0 + int8_t err_code; /// for errcode use CANCEL_FUNC cancel; /// returns true to cancel PROGRESS_FUNC progress_callback; /// called whenever progress increases void* cancel_this; /// this or other data for cancel @@ -147,7 +147,7 @@ class ETEXT_DESC { // output header gettimeofday(&end_time, nullptr); int32_t deadline_secs = deadline_msecs / 1000; end_time.tv_sec += deadline_secs; - end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000; + end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000; if (end_time.tv_usec > 1000000) { end_time.tv_usec -= 1000000; ++end_time.tv_sec; @@ -159,8 +159,8 @@ class ETEXT_DESC { // output header if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false; struct timeval now; gettimeofday(&now, nullptr); - return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec && - now.tv_usec > end_time.tv_usec)); + return (now.tv_sec > end_time.tv_sec || + (now.tv_sec == end_time.tv_sec && now.tv_usec > end_time.tv_usec)); } }; diff --git a/src/ccutil/params.cpp b/src/ccutil/params.cpp index 1692577ee8..977cc3ca01 100644 --- a/src/ccutil/params.cpp +++ b/src/ccutil/params.cpp @@ -17,30 +17,29 @@ * **********************************************************************/ -#include -#include -#include +#include +#include +#include -#include "genericvector.h" -#include "scanutils.h" -#include "tprintf.h" -#include "params.h" +#include "genericvector.h" +#include "params.h" +#include "scanutils.h" +#include "tprintf.h" -#define PLUS '+' //flag states -#define MINUS '-' -#define EQUAL '=' +#define PLUS '+' // flag states +#define MINUS '-' +#define EQUAL '=' -tesseract::ParamsVectors *GlobalParams() { +tesseract::ParamsVectors* GlobalParams() { static tesseract::ParamsVectors global_params = tesseract::ParamsVectors(); return &global_params; } namespace tesseract { -bool ParamUtils::ReadParamsFile(const char *file, - SetParamConstraint constraint, - ParamsVectors *member_params) { - int16_t nameoffset; // offset for real name +bool ParamUtils::ReadParamsFile(const char* file, SetParamConstraint constraint, + ParamsVectors* member_params) { + int16_t nameoffset; // offset for real name if (*file == PLUS) { nameoffset = 1; @@ -58,28 +57,29 @@ bool ParamUtils::ReadParamsFile(const char *file, return ReadParamsFromFp(constraint, &fp, member_params); } -bool ParamUtils::ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, - ParamsVectors *member_params) { - char line[MAX_PATH]; // input line - bool anyerr = false; // true if any error - bool foundit; // found parameter - char *valptr; // value field +bool ParamUtils::ReadParamsFromFp(SetParamConstraint constraint, TFile* fp, + ParamsVectors* member_params) { + char line[MAX_PATH]; // input line + bool anyerr = false; // true if any error + bool foundit; // found parameter + char* valptr; // value field while (fp->FGets(line, MAX_PATH) != nullptr) { if (line[0] != '\r' && line[0] != '\n' && line[0] != '#') { chomp_string(line); // remove newline for (valptr = line; *valptr && *valptr != ' ' && *valptr != '\t'; - valptr++); - if (*valptr) { // found blank - *valptr = '\0'; // make name a string + valptr++) + ; + if (*valptr) { // found blank + *valptr = '\0'; // make name a string do - valptr++; // find end of blanks + valptr++; // find end of blanks while (*valptr == ' ' || *valptr == '\t'); } foundit = SetParam(line, valptr, constraint, member_params); if (!foundit) { - anyerr = true; // had an error + anyerr = true; // had an error tprintf("read_params_file: parameter not found: %s\n", line); exit(1); } @@ -88,62 +88,62 @@ bool ParamUtils::ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, return anyerr; } -bool ParamUtils::SetParam(const char *name, const char* value, +bool ParamUtils::SetParam(const char* name, const char* value, SetParamConstraint constraint, - ParamsVectors *member_params) { + ParamsVectors* member_params) { // Look for the parameter among string parameters. - StringParam *sp = FindParam(name, GlobalParams()->string_params, + StringParam* sp = FindParam(name, GlobalParams()->string_params, member_params->string_params); if (sp != nullptr && sp->constraint_ok(constraint)) sp->set_value(value); if (*value == '\0') return (sp != nullptr); // Look for the parameter among int parameters. int intval; - IntParam *ip = FindParam(name, GlobalParams()->int_params, + IntParam* ip = FindParam(name, GlobalParams()->int_params, member_params->int_params); if (ip && ip->constraint_ok(constraint) && sscanf(value, "%d", &intval) == 1) ip->set_value(intval); // Look for the parameter among bool parameters. - BoolParam *bp = FindParam(name, GlobalParams()->bool_params, + BoolParam* bp = FindParam(name, GlobalParams()->bool_params, member_params->bool_params); if (bp != nullptr && bp->constraint_ok(constraint)) { - if (*value == 'T' || *value == 't' || - *value == 'Y' || *value == 'y' || *value == '1') { + if (*value == 'T' || *value == 't' || *value == 'Y' || *value == 'y' || + *value == '1') { bp->set_value(true); - } else if (*value == 'F' || *value == 'f' || - *value == 'N' || *value == 'n' || *value == '0') { + } else if (*value == 'F' || *value == 'f' || *value == 'N' || + *value == 'n' || *value == '0') { bp->set_value(false); } } // Look for the parameter among double parameters. double doubleval; - DoubleParam *dp = FindParam(name, GlobalParams()->double_params, + DoubleParam* dp = FindParam(name, GlobalParams()->double_params, member_params->double_params); if (dp != nullptr && dp->constraint_ok(constraint)) { #ifdef EMBEDDED - doubleval = strtofloat(value); + doubleval = strtofloat(value); #else - if (sscanf(value, "%lf", &doubleval) == 1) + if (sscanf(value, "%lf", &doubleval) == 1) #endif - dp->set_value(doubleval); + dp->set_value(doubleval); } return (sp || ip || bp || dp); } -bool ParamUtils::GetParamAsString(const char *name, +bool ParamUtils::GetParamAsString(const char* name, const ParamsVectors* member_params, - STRING *value) { + STRING* value) { // Look for the parameter among string parameters. - StringParam *sp = FindParam(name, GlobalParams()->string_params, + StringParam* sp = FindParam(name, GlobalParams()->string_params, member_params->string_params); if (sp) { *value = sp->string(); return true; } // Look for the parameter among int parameters. - IntParam *ip = FindParam(name, GlobalParams()->int_params, + IntParam* ip = FindParam(name, GlobalParams()->int_params, member_params->int_params); if (ip) { char buf[128]; @@ -152,14 +152,14 @@ bool ParamUtils::GetParamAsString(const char *name, return true; } // Look for the parameter among bool parameters. - BoolParam *bp = FindParam(name, GlobalParams()->bool_params, + BoolParam* bp = FindParam(name, GlobalParams()->bool_params, member_params->bool_params); if (bp != nullptr) { - *value = BOOL8(*bp) ? "1": "0"; + *value = BOOL8(*bp) ? "1" : "0"; return true; } // Look for the parameter among double parameters. - DoubleParam *dp = FindParam(name, GlobalParams()->double_params, + DoubleParam* dp = FindParam(name, GlobalParams()->double_params, member_params->double_params); if (dp != nullptr) { char buf[128]; @@ -170,11 +170,11 @@ bool ParamUtils::GetParamAsString(const char *name, return false; } -void ParamUtils::PrintParams(FILE *fp, const ParamsVectors *member_params) { +void ParamUtils::PrintParams(FILE* fp, const ParamsVectors* member_params) { int v, i; int num_iterations = (member_params == nullptr) ? 1 : 2; for (v = 0; v < num_iterations; ++v) { - const ParamsVectors *vec = (v == 0) ? GlobalParams() : member_params; + const ParamsVectors* vec = (v == 0) ? GlobalParams() : member_params; for (i = 0; i < vec->int_params.size(); ++i) { fprintf(fp, "%s\t%d\t%s\n", vec->int_params[i]->name_str(), (int32_t)(*vec->int_params[i]), vec->int_params[i]->info_str()); @@ -185,11 +185,13 @@ void ParamUtils::PrintParams(FILE *fp, const ParamsVectors *member_params) { } for (int i = 0; i < vec->string_params.size(); ++i) { fprintf(fp, "%s\t%s\t%s\n", vec->string_params[i]->name_str(), - vec->string_params[i]->string(), vec->string_params[i]->info_str()); + vec->string_params[i]->string(), + vec->string_params[i]->info_str()); } for (int i = 0; i < vec->double_params.size(); ++i) { fprintf(fp, "%s\t%g\t%s\n", vec->double_params[i]->name_str(), - (double)(*vec->double_params[i]), vec->double_params[i]->info_str()); + (double)(*vec->double_params[i]), + vec->double_params[i]->info_str()); } } } @@ -199,7 +201,7 @@ void ParamUtils::ResetToDefaults(ParamsVectors* member_params) { int v, i; int num_iterations = (member_params == nullptr) ? 1 : 2; for (v = 0; v < num_iterations; ++v) { - ParamsVectors *vec = (v == 0) ? GlobalParams() : member_params; + ParamsVectors* vec = (v == 0) ? GlobalParams() : member_params; for (i = 0; i < vec->int_params.size(); ++i) { vec->int_params[i]->ResetToDefault(); } diff --git a/src/ccutil/params.h b/src/ccutil/params.h index 6c5db3e850..5ad1f102b0 100644 --- a/src/ccutil/params.h +++ b/src/ccutil/params.h @@ -17,13 +17,13 @@ * **********************************************************************/ -#ifndef PARAMS_H -#define PARAMS_H +#ifndef PARAMS_H +#define PARAMS_H -#include +#include -#include "genericvector.h" -#include "strngs.h" +#include "genericvector.h" +#include "strngs.h" namespace tesseract { @@ -41,10 +41,10 @@ enum SetParamConstraint { }; struct ParamsVectors { - GenericVector int_params; - GenericVector bool_params; - GenericVector string_params; - GenericVector double_params; + GenericVector int_params; + GenericVector bool_params; + GenericVector string_params; + GenericVector double_params; }; // Utility functions for working with Tesseract parameters. @@ -55,27 +55,25 @@ class ParamUtils { // ORed or ANDed with any current values. // Blank lines and lines beginning # are ignored. // Values may have any whitespace after the name and are the rest of line. - static bool ReadParamsFile( - const char *file, // filename to read - SetParamConstraint constraint, - ParamsVectors *member_params); + static bool ReadParamsFile(const char* file, // filename to read + SetParamConstraint constraint, + ParamsVectors* member_params); // Read parameters from the given file pointer. - static bool ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, - ParamsVectors *member_params); + static bool ReadParamsFromFp(SetParamConstraint constraint, TFile* fp, + ParamsVectors* member_params); // Set a parameters to have the given value. - static bool SetParam(const char *name, const char* value, + static bool SetParam(const char* name, const char* value, SetParamConstraint constraint, - ParamsVectors *member_params); + ParamsVectors* member_params); // Returns the pointer to the parameter with the given name (of the // appropriate type) if it was found in the vector obtained from // GlobalParams() or in the given member_params. - template - static T *FindParam(const char *name, - const GenericVector &global_vec, - const GenericVector &member_vec) { + template + static T* FindParam(const char* name, const GenericVector& global_vec, + const GenericVector& member_vec) { int i; for (i = 0; i < global_vec.size(); ++i) { if (strcmp(global_vec[i]->name_str(), name) == 0) return global_vec[i]; @@ -86,8 +84,8 @@ class ParamUtils { return nullptr; } // Removes the given pointer to the param from the given vector. - template - static void RemoveParam(T *param_ptr, GenericVector *vec) { + template + static void RemoveParam(T* param_ptr, GenericVector* vec) { for (int i = 0; i < vec->size(); ++i) { if ((*vec)[i] == param_ptr) { vec->remove(i); @@ -97,12 +95,12 @@ class ParamUtils { } // Fetches the value of the named param as a STRING. Returns false if not // found. - static bool GetParamAsString(const char *name, + static bool GetParamAsString(const char* name, const ParamsVectors* member_params, - STRING *value); + STRING* value); // Print parameters to the given file. - static void PrintParams(FILE *fp, const ParamsVectors *member_params); + static void PrintParams(FILE* fp, const ParamsVectors* member_params); // Resets all parameters back to default values; static void ResetToDefaults(ParamsVectors* member_params); @@ -113,36 +111,36 @@ class Param { public: ~Param() = default; - const char *name_str() const { return name_; } - const char *info_str() const { return info_; } + const char* name_str() const { return name_; } + const char* info_str() const { return info_; } bool is_init() const { return init_; } bool is_debug() const { return debug_; } bool constraint_ok(SetParamConstraint constraint) const { - return (constraint == SET_PARAM_CONSTRAINT_NONE || - (constraint == SET_PARAM_CONSTRAINT_DEBUG_ONLY && - this->is_debug()) || - (constraint == SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY && - !this->is_debug()) || - (constraint == SET_PARAM_CONSTRAINT_NON_INIT_ONLY && - !this->is_init())); + return ( + constraint == SET_PARAM_CONSTRAINT_NONE || + (constraint == SET_PARAM_CONSTRAINT_DEBUG_ONLY && this->is_debug()) || + (constraint == SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY && + !this->is_debug()) || + (constraint == SET_PARAM_CONSTRAINT_NON_INIT_ONLY && !this->is_init())); } protected: - Param(const char *name, const char *comment, bool init) : - name_(name), info_(comment), init_(init) { + Param(const char* name, const char* comment, bool init) + : name_(name), info_(comment), init_(init) { debug_ = (strstr(name, "debug") != nullptr) || (strstr(name, "display")); } - const char *name_; // name of this parameter - const char *info_; // for menus - bool init_; // needs to be set before init + const char* name_; // name of this parameter + const char* info_; // for menus + bool init_; // needs to be set before init bool debug_; }; class IntParam : public Param { - public: - IntParam(int32_t value, const char *name, const char *comment, bool init, - ParamsVectors *vec) : Param(name, comment, init) { + public: + IntParam(int32_t value, const char* name, const char* comment, bool init, + ParamsVectors* vec) + : Param(name, comment, init) { value_ = value; default_ = value; params_vec_ = &(vec->int_params); @@ -152,21 +150,20 @@ class IntParam : public Param { operator int32_t() const { return value_; } void operator=(int32_t value) { value_ = value; } void set_value(int32_t value) { value_ = value; } - void ResetToDefault() { - value_ = default_; - } + void ResetToDefault() { value_ = default_; } private: int32_t value_; int32_t default_; // Pointer to the vector that contains this param (not owened by this class). - GenericVector *params_vec_; + GenericVector* params_vec_; }; class BoolParam : public Param { public: - BoolParam(bool value, const char *name, const char *comment, bool init, - ParamsVectors *vec) : Param(name, comment, init) { + BoolParam(bool value, const char* name, const char* comment, bool init, + ParamsVectors* vec) + : Param(name, comment, init) { value_ = value; default_ = value; params_vec_ = &(vec->bool_params); @@ -176,50 +173,47 @@ class BoolParam : public Param { operator BOOL8() const { return value_; } void operator=(BOOL8 value) { value_ = value; } void set_value(BOOL8 value) { value_ = value; } - void ResetToDefault() { - value_ = default_; - } + void ResetToDefault() { value_ = default_; } private: BOOL8 value_; BOOL8 default_; // Pointer to the vector that contains this param (not owned by this class). - GenericVector *params_vec_; + GenericVector* params_vec_; }; class StringParam : public Param { public: - StringParam(const char *value, const char *name, - const char *comment, bool init, - ParamsVectors *vec) : Param(name, comment, init) { + StringParam(const char* value, const char* name, const char* comment, + bool init, ParamsVectors* vec) + : Param(name, comment, init) { value_ = value; default_ = value; params_vec_ = &(vec->string_params); vec->string_params.push_back(this); } ~StringParam() { ParamUtils::RemoveParam(this, params_vec_); } - operator STRING &() { return value_; } - const char *string() const { return value_.string(); } - const char *c_str() const { return value_.string(); } + operator STRING&() { return value_; } + const char* string() const { return value_.string(); } + const char* c_str() const { return value_.string(); } bool empty() { return value_.length() <= 0; } bool operator==(const STRING& other) { return value_ == other; } void operator=(const STRING& value) { value_ = value; } void set_value(const STRING& value) { value_ = value; } - void ResetToDefault() { - value_ = default_; - } + void ResetToDefault() { value_ = default_; } private: STRING value_; STRING default_; // Pointer to the vector that contains this param (not owened by this class). - GenericVector *params_vec_; + GenericVector* params_vec_; }; class DoubleParam : public Param { public: - DoubleParam(double value, const char *name, const char *comment, - bool init, ParamsVectors *vec) : Param(name, comment, init) { + DoubleParam(double value, const char* name, const char* comment, bool init, + ParamsVectors* vec) + : Param(name, comment, init) { value_ = value; default_ = value; params_vec_ = &(vec->double_params); @@ -229,15 +223,13 @@ class DoubleParam : public Param { operator double() const { return value_; } void operator=(double value) { value_ = value; } void set_value(double value) { value_ = value; } - void ResetToDefault() { - value_ = default_; - } + void ResetToDefault() { value_ = default_; } private: double value_; double default_; // Pointer to the vector that contains this param (not owned by this class). - GenericVector *params_vec_; + GenericVector* params_vec_; }; } // namespace tesseract @@ -251,7 +243,7 @@ class DoubleParam : public Param { // // TODO(daria): remove GlobalParams() when all global Tesseract // parameters are converted to members. -tesseract::ParamsVectors *GlobalParams(); +tesseract::ParamsVectors* GlobalParams(); /************************************************************************* * Note on defining parameters. @@ -261,64 +253,60 @@ tesseract::ParamsVectors *GlobalParams(); * (there is no such guarantee for parameters defined with the other macros). *************************************************************************/ -#define INT_VAR_H(name,val,comment)\ - tesseract::IntParam name +#define INT_VAR_H(name, val, comment) tesseract::IntParam name -#define BOOL_VAR_H(name,val,comment)\ - tesseract::BoolParam name +#define BOOL_VAR_H(name, val, comment) tesseract::BoolParam name -#define STRING_VAR_H(name,val,comment)\ - tesseract::StringParam name +#define STRING_VAR_H(name, val, comment) tesseract::StringParam name -#define double_VAR_H(name,val,comment)\ - tesseract::DoubleParam name +#define double_VAR_H(name, val, comment) tesseract::DoubleParam name -#define INT_VAR(name,val,comment)\ - tesseract::IntParam name(val,#name,comment,false,GlobalParams()) +#define INT_VAR(name, val, comment) \ + tesseract::IntParam name(val, #name, comment, false, GlobalParams()) -#define BOOL_VAR(name,val,comment)\ - tesseract::BoolParam name(val,#name,comment,false,GlobalParams()) +#define BOOL_VAR(name, val, comment) \ + tesseract::BoolParam name(val, #name, comment, false, GlobalParams()) -#define STRING_VAR(name,val,comment)\ - tesseract::StringParam name(val,#name,comment,false,GlobalParams()) +#define STRING_VAR(name, val, comment) \ + tesseract::StringParam name(val, #name, comment, false, GlobalParams()) -#define double_VAR(name,val,comment)\ - tesseract::DoubleParam name(val,#name,comment,false,GlobalParams()) +#define double_VAR(name, val, comment) \ + tesseract::DoubleParam name(val, #name, comment, false, GlobalParams()) -#define INT_INIT_VAR(name,val,comment)\ - tesseract::IntParam name(val,#name,comment,true,GlobalParams()) +#define INT_INIT_VAR(name, val, comment) \ + tesseract::IntParam name(val, #name, comment, true, GlobalParams()) -#define BOOL_INIT_VAR(name,val,comment)\ - tesseract::BoolParam name(val,#name,comment,true,GlobalParams()) +#define BOOL_INIT_VAR(name, val, comment) \ + tesseract::BoolParam name(val, #name, comment, true, GlobalParams()) -#define STRING_INIT_VAR(name,val,comment)\ - tesseract::StringParam name(val,#name,comment,true,GlobalParams()) +#define STRING_INIT_VAR(name, val, comment) \ + tesseract::StringParam name(val, #name, comment, true, GlobalParams()) -#define double_INIT_VAR(name,val,comment)\ - tesseract::DoubleParam name(val,#name,comment,true,GlobalParams()) +#define double_INIT_VAR(name, val, comment) \ + tesseract::DoubleParam name(val, #name, comment, true, GlobalParams()) -#define INT_MEMBER(name, val, comment, vec)\ +#define INT_MEMBER(name, val, comment, vec) \ name(val, #name, comment, false, vec) -#define BOOL_MEMBER(name, val, comment, vec)\ +#define BOOL_MEMBER(name, val, comment, vec) \ name(val, #name, comment, false, vec) -#define STRING_MEMBER(name, val, comment, vec)\ +#define STRING_MEMBER(name, val, comment, vec) \ name(val, #name, comment, false, vec) -#define double_MEMBER(name, val, comment, vec)\ +#define double_MEMBER(name, val, comment, vec) \ name(val, #name, comment, false, vec) -#define INT_INIT_MEMBER(name, val, comment, vec)\ +#define INT_INIT_MEMBER(name, val, comment, vec) \ name(val, #name, comment, true, vec) -#define BOOL_INIT_MEMBER(name, val, comment, vec)\ +#define BOOL_INIT_MEMBER(name, val, comment, vec) \ name(val, #name, comment, true, vec) -#define STRING_INIT_MEMBER(name, val, comment, vec)\ +#define STRING_INIT_MEMBER(name, val, comment, vec) \ name(val, #name, comment, true, vec) -#define double_INIT_MEMBER(name, val, comment, vec)\ +#define double_INIT_MEMBER(name, val, comment, vec) \ name(val, #name, comment, true, vec) #endif diff --git a/src/ccutil/platform.h b/src/ccutil/platform.h index c78f2e3331..7bf78584c5 100644 --- a/src/ccutil/platform.h +++ b/src/ccutil/platform.h @@ -32,7 +32,7 @@ #endif #ifdef __GNUC__ #define ultoa _ultoa -#endif /* __GNUC__ */ +#endif /* __GNUC__ */ #define SIGNED #if defined(_MSC_VER) #if (_MSC_VER < 1900) @@ -57,27 +57,27 @@ #endif #if defined(_WIN32) || defined(__CYGWIN__) - #if defined(TESS_EXPORTS) - #define TESS_API __declspec(dllexport) - #elif defined(TESS_IMPORTS) - #define TESS_API __declspec(dllimport) - #else - #define TESS_API - #endif - #define TESS_LOCAL +#if defined(TESS_EXPORTS) +#define TESS_API __declspec(dllexport) +#elif defined(TESS_IMPORTS) +#define TESS_API __declspec(dllimport) #else - #if __GNUC__ >= 4 - #if defined(TESS_EXPORTS) || defined(TESS_IMPORTS) - #define TESS_API __attribute__ ((visibility ("default"))) - #define TESS_LOCAL __attribute__ ((visibility ("hidden"))) - #else - #define TESS_API - #define TESS_LOCAL - #endif - #else - #define TESS_API - #define TESS_LOCAL - #endif +#define TESS_API +#endif +#define TESS_LOCAL +#else +#if __GNUC__ >= 4 +#if defined(TESS_EXPORTS) || defined(TESS_IMPORTS) +#define TESS_API __attribute__((visibility("default"))) +#define TESS_LOCAL __attribute__((visibility("hidden"))) +#else +#define TESS_API +#define TESS_LOCAL +#endif +#else +#define TESS_API +#define TESS_LOCAL +#endif #endif #endif // TESSERACT_CCUTIL_PLATFORM_H_ diff --git a/src/ccutil/qrsequence.h b/src/ccutil/qrsequence.h index e6fd9674ca..9bf27ca095 100644 --- a/src/ccutil/qrsequence.h +++ b/src/ccutil/qrsequence.h @@ -41,8 +41,7 @@ class QRSequenceGenerator { int GetVal() { const int kInvalidVal = -1; const int kMaxNaturalNumberValue = 1 << num_bits_; - if (next_num_ >= kMaxNaturalNumberValue) - return kInvalidVal; + if (next_num_ >= kMaxNaturalNumberValue) return kInvalidVal; int n = next_num_; while (next_num_ < kMaxNaturalNumberValue) { @@ -58,7 +57,7 @@ class QRSequenceGenerator { int GetBinaryReversedInteger(int in_val) const { int bit_pos = num_bits_; int out_val = 0; - while(bit_pos--) { + while (bit_pos--) { // Set the value of the last bit. out_val |= (in_val & 0x1); if (bit_pos > 0) { diff --git a/src/ccutil/scanutils.cpp b/src/ccutil/scanutils.cpp index f47546aed0..2180bf9526 100644 --- a/src/ccutil/scanutils.cpp +++ b/src/ccutil/scanutils.cpp @@ -23,35 +23,35 @@ #include "config_auto.h" #endif +#include +#include +#include #include +#include #include #include #include -#include -#include #include +#include #include -#include -#include -#include #include "scanutils.h" #include "tprintf.h" enum Flags { - FL_SPLAT = 0x01, // Drop the value, do not assign - FL_INV = 0x02, // Character-set with inverse - FL_WIDTH = 0x04, // Field width specified - FL_MINUS = 0x08, // Negative number + FL_SPLAT = 0x01, // Drop the value, do not assign + FL_INV = 0x02, // Character-set with inverse + FL_WIDTH = 0x04, // Field width specified + FL_MINUS = 0x08, // Negative number }; enum Ranks { RANK_CHAR = -2, - RANK_SHORT = -1, - RANK_INT = 0, + RANK_SHORT = -1, + RANK_INT = 0, RANK_LONG = 1, RANK_LONGLONG = 2, - RANK_PTR = std::numeric_limits::max() // Special value used for pointers + RANK_PTR = std::numeric_limits::max() // Special value used for pointers }; const enum Ranks kMinRank = RANK_CHAR; @@ -62,42 +62,37 @@ const enum Ranks kSizeTRank = RANK_LONG; const enum Ranks kPtrDiffRank = RANK_LONG; enum Bail { - BAIL_NONE = 0, // No error condition - BAIL_EOF, // Hit EOF - BAIL_ERR // Conversion mismatch + BAIL_NONE = 0, // No error condition + BAIL_EOF, // Hit EOF + BAIL_ERR // Conversion mismatch }; // Helper functions ------------------------------------------------------------ -inline size_t LongBit() { - return CHAR_BIT * sizeof(long); -} +inline size_t LongBit() { return CHAR_BIT * sizeof(long); } -static inline int -SkipSpace(FILE *s) { +static inline int SkipSpace(FILE* s) { int p; - while (isspace(p = fgetc(s))); + while (isspace(p = fgetc(s))) + ; ungetc(p, s); // Make sure next char is available for reading return p; } -static inline void -SetBit(unsigned long *bitmap, unsigned int bit) { - bitmap[bit/LongBit()] |= 1UL << (bit%LongBit()); +static inline void SetBit(unsigned long* bitmap, unsigned int bit) { + bitmap[bit / LongBit()] |= 1UL << (bit % LongBit()); } -static inline int -TestBit(unsigned long *bitmap, unsigned int bit) { - return static_cast(bitmap[bit/LongBit()] >> (bit%LongBit())) & 1; +static inline int TestBit(unsigned long* bitmap, unsigned int bit) { + return static_cast(bitmap[bit / LongBit()] >> (bit % LongBit())) & 1; } static inline int DigitValue(int ch, int base) { if (ch >= '0' && ch <= '9') { - if (base >= 10 || ch <= '7') - return ch-'0'; + if (base >= 10 || ch <= '7') return ch - '0'; } else if (ch >= 'A' && ch <= 'Z' && base == 16) { - return ch-'A'+10; + return ch - 'A' + 10; } else if (ch >= 'a' && ch <= 'z' && base == 16) { - return ch-'a'+10; + return ch - 'a' + 10; } return -1; } @@ -108,9 +103,9 @@ uintmax_t streamtoumax(FILE* s, int base) { uintmax_t v = 0; int d, c = 0; - for (c = fgetc(s); - isspace(static_cast(c)) && (c != EOF); - c = fgetc(s)) {} + for (c = fgetc(s); isspace(static_cast(c)) && (c != EOF); + c = fgetc(s)) { + } // Single optional + or - if (c == '-' || c == '+') { @@ -138,7 +133,7 @@ uintmax_t streamtoumax(FILE* s, int base) { // Actual number parsing for (; (c != EOF) && (d = DigitValue(c, base)) >= 0; c = fgetc(s)) - v = v*base + d; + v = v * base + d; ungetc(c, s); return minus ? -v : v; @@ -151,9 +146,9 @@ double streamtofloat(FILE* s) { int k = 1; int w = 0; - for (c = fgetc(s); - isspace(static_cast(c)) && (c != EOF); - c = fgetc(s)); + for (c = fgetc(s); isspace(static_cast(c)) && (c != EOF); + c = fgetc(s)) + ; // Single optional + or - if (c == '-' || c == '+') { @@ -162,16 +157,15 @@ double streamtofloat(FILE* s) { } // Actual number parsing - for (; c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) - v = v*10 + d; + for (; c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) v = v * 10 + d; if (c == '.') { for (c = fgetc(s); c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) { - w = w*10 + d; + w = w * 10 + d; k *= 10; } } - double f = static_cast(v) - + static_cast(w) / static_cast(k); + double f = + static_cast(v) + static_cast(w) / static_cast(k); if (c == 'e' || c == 'E') { c = fgetc(s); int expsign = 1; @@ -198,7 +192,7 @@ double strtofloat(const char* s) { int k = 1; int w = 0; - while(*s && isspace(static_cast(*s))) s++; + while (*s && isspace(static_cast(*s))) s++; // Single optional + or - if (*s == '-' || *s == '+') { @@ -207,26 +201,25 @@ double strtofloat(const char* s) { } // Actual number parsing - for (; *s && (d = DigitValue(*s, 10)) >= 0; s++) - v = v*10 + d; + for (; *s && (d = DigitValue(*s, 10)) >= 0; s++) v = v * 10 + d; if (*s == '.') { for (++s; *s && (d = DigitValue(*s, 10)) >= 0; s++) { - w = w*10 + d; + w = w * 10 + d; k *= 10; } } if (*s == 'e' || *s == 'E') tprintf("WARNING: Scientific Notation not supported!"); - double f = static_cast(v) - + static_cast(w) / static_cast(k); + double f = + static_cast(v) + static_cast(w) / static_cast(k); return minus ? -f : f; } -static int tvfscanf(FILE* stream, const char *format, va_list ap); +static int tvfscanf(FILE* stream, const char* format, va_list ap); -int tfscanf(FILE* stream, const char *format, ...) { +int tfscanf(FILE* stream, const char* format, ...) { va_list ap; int rv; @@ -239,7 +232,7 @@ int tfscanf(FILE* stream, const char *format, ...) { #ifdef EMBEDDED -int fscanf(FILE* stream, const char *format, ...) { +int fscanf(FILE* stream, const char* format, ...) { va_list ap; int rv; @@ -250,7 +243,7 @@ int fscanf(FILE* stream, const char *format, ...) { return rv; } -int vfscanf(FILE* stream, const char *format, ...) { +int vfscanf(FILE* stream, const char* format, ...) { va_list ap; int rv; @@ -262,30 +255,30 @@ int vfscanf(FILE* stream, const char *format, ...) { } #endif -static int tvfscanf(FILE* stream, const char *format, va_list ap) { - const char *p = format; +static int tvfscanf(FILE* stream, const char* format, va_list ap) { + const char* p = format; char ch; int q = 0; uintmax_t val = 0; - int rank = RANK_INT; // Default rank + int rank = RANK_INT; // Default rank unsigned int width = UINT_MAX; int base; int flags = 0; enum { - ST_NORMAL, // Ground state - ST_FLAGS, // Special flags - ST_WIDTH, // Field width - ST_MODIFIERS, // Length or conversion modifiers - ST_MATCH_INIT, // Initial state of %[ sequence - ST_MATCH, // Main state of %[ sequence - ST_MATCH_RANGE, // After - in a %[ sequence + ST_NORMAL, // Ground state + ST_FLAGS, // Special flags + ST_WIDTH, // Field width + ST_MODIFIERS, // Length or conversion modifiers + ST_MATCH_INIT, // Initial state of %[ sequence + ST_MATCH, // Main state of %[ sequence + ST_MATCH_RANGE, // After - in a %[ sequence } state = ST_NORMAL; - char *sarg = nullptr; // %s %c or %[ string argument + char* sarg = nullptr; // %s %c or %[ string argument enum Bail bail = BAIL_NONE; - int converted = 0; // Successful conversions - unsigned long matchmap[((1 << CHAR_BIT)+(CHAR_BIT * sizeof(long) - 1)) / - (CHAR_BIT * sizeof(long))]; - int matchinv = 0; // Is match map inverted? + int converted = 0; // Successful conversions + unsigned long matchmap[((1 << CHAR_BIT) + (CHAR_BIT * sizeof(long) - 1)) / + (CHAR_BIT * sizeof(long))]; + int matchinv = 0; // Is match map inverted? unsigned char range_start = 0; off_t start_off = ftell(stream); @@ -297,12 +290,13 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) { case ST_NORMAL: if (ch == '%') { state = ST_FLAGS; - flags = 0; rank = RANK_INT; width = UINT_MAX; + flags = 0; + rank = RANK_INT; + width = UINT_MAX; } else if (isspace(static_cast(ch))) { SkipSpace(stream); } else { - if (fgetc(stream) != ch) - bail = BAIL_ERR; // Match failure + if (fgetc(stream) != ch) bail = BAIL_ERR; // Match failure } break; @@ -310,218 +304,216 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) { if (ch == '*') { flags |= FL_SPLAT; } else if ('0' <= ch && ch <= '9') { - width = (ch-'0'); + width = (ch - '0'); state = ST_WIDTH; flags |= FL_WIDTH; } else { state = ST_MODIFIERS; - p--; // Process this character again + p--; // Process this character again } - break; + break; case ST_WIDTH: if (ch >= '0' && ch <= '9') { - width = width*10+(ch-'0'); + width = width * 10 + (ch - '0'); } else { state = ST_MODIFIERS; - p--; // Process this character again + p--; // Process this character again } - break; + break; case ST_MODIFIERS: switch (ch) { // Length modifiers - nonterminal sequences case 'h': - rank--; // Shorter rank - break; + rank--; // Shorter rank + break; case 'l': - rank++; // Longer rank - break; + rank++; // Longer rank + break; case 'j': rank = kIntMaxRank; - break; + break; case 'z': rank = kSizeTRank; - break; + break; case 't': rank = kPtrDiffRank; - break; + break; case 'L': case 'q': - rank = RANK_LONGLONG; // long double/long long - break; + rank = RANK_LONGLONG; // long double/long long + break; default: // Output modifiers - terminal sequences - state = ST_NORMAL; // Next state will be normal + state = ST_NORMAL; // Next state will be normal if (rank < kMinRank) // Canonicalize rank rank = kMinRank; else if (rank > kMaxRank) rank = kMaxRank; - switch (ch) { - case 'P': // Upper case pointer - case 'p': // Pointer - rank = RANK_PTR; - base = 0; - goto scan_int; - - case 'i': // Base-independent integer - base = 0; - goto scan_int; - - case 'd': // Decimal integer - base = 10; - goto scan_int; - - case 'o': // Octal integer - base = 8; - goto scan_int; - - case 'u': // Unsigned decimal integer - base = 10; - goto scan_int; - - case 'x': // Hexadecimal integer - case 'X': - base = 16; - goto scan_int; - - case 'n': // Number of characters consumed - val = ftell(stream) - start_off; - goto set_integer; - - scan_int: - q = SkipSpace(stream); - if ( q <= 0 ) { - bail = BAIL_EOF; - break; - } - val = streamtoumax(stream, base); - // fall through - - set_integer: - if (!(flags & FL_SPLAT)) { - converted++; - switch(rank) { - case RANK_CHAR: - *va_arg(ap, unsigned char *) - = static_cast(val); - break; - case RANK_SHORT: - *va_arg(ap, unsigned short *) - = static_cast(val); - break; - case RANK_INT: - *va_arg(ap, unsigned int *) - = static_cast(val); - break; - case RANK_LONG: - *va_arg(ap, unsigned long *) - = static_cast(val); - break; - case RANK_LONGLONG: - *va_arg(ap, unsigned long long *) - = static_cast(val); - break; - case RANK_PTR: - *va_arg(ap, void **) - = reinterpret_cast(static_cast(val)); + switch (ch) { + case 'P': // Upper case pointer + case 'p': // Pointer + rank = RANK_PTR; + base = 0; + goto scan_int; + + case 'i': // Base-independent integer + base = 0; + goto scan_int; + + case 'd': // Decimal integer + base = 10; + goto scan_int; + + case 'o': // Octal integer + base = 8; + goto scan_int; + + case 'u': // Unsigned decimal integer + base = 10; + goto scan_int; + + case 'x': // Hexadecimal integer + case 'X': + base = 16; + goto scan_int; + + case 'n': // Number of characters consumed + val = ftell(stream) - start_off; + goto set_integer; + + scan_int: + q = SkipSpace(stream); + if (q <= 0) { + bail = BAIL_EOF; break; } - } - break; + val = streamtoumax(stream, base); + // fall through - case 'f': // Preliminary float value parsing - case 'g': - case 'G': - case 'e': - case 'E': - q = SkipSpace(stream); - if (q <= 0) { - bail = BAIL_EOF; + set_integer: + if (!(flags & FL_SPLAT)) { + converted++; + switch (rank) { + case RANK_CHAR: + *va_arg(ap, unsigned char*) = + static_cast(val); + break; + case RANK_SHORT: + *va_arg(ap, unsigned short*) = + static_cast(val); + break; + case RANK_INT: + *va_arg(ap, unsigned int*) = + static_cast(val); + break; + case RANK_LONG: + *va_arg(ap, unsigned long*) = + static_cast(val); + break; + case RANK_LONGLONG: + *va_arg(ap, unsigned long long*) = + static_cast(val); + break; + case RANK_PTR: + *va_arg(ap, void**) = + reinterpret_cast(static_cast(val)); + break; + } + } break; - } - { - double fval = streamtofloat(stream); - if (!(flags & FL_SPLAT)) { - if (rank == RANK_INT) - *va_arg(ap, float *) = static_cast(fval); - else if (rank == RANK_LONG) - *va_arg(ap, double *) = static_cast(fval); - converted++; - } - } - break; - - case 'c': // Character - width = (flags & FL_WIDTH) ? width : 1; // Default width == 1 - sarg = va_arg(ap, char *); - while (width--) { - if ((q = fgetc(stream)) <= 0) { + case 'f': // Preliminary float value parsing + case 'g': + case 'G': + case 'e': + case 'E': + q = SkipSpace(stream); + if (q <= 0) { bail = BAIL_EOF; break; } - if (!(flags & FL_SPLAT)) { - *sarg++ = q; - converted++; + + { + double fval = streamtofloat(stream); + if (!(flags & FL_SPLAT)) { + if (rank == RANK_INT) + *va_arg(ap, float*) = static_cast(fval); + else if (rank == RANK_LONG) + *va_arg(ap, double*) = static_cast(fval); + converted++; + } } - } - break; + break; - case 's': // String - { - char *sp; - sp = sarg = va_arg(ap, char *); - while (width--) { - q = fgetc(stream); - if (isspace(static_cast(q)) || q <= 0) { - ungetc(q, stream); - break; + case 'c': // Character + width = (flags & FL_WIDTH) ? width : 1; // Default width == 1 + sarg = va_arg(ap, char*); + while (width--) { + if ((q = fgetc(stream)) <= 0) { + bail = BAIL_EOF; + break; + } + if (!(flags & FL_SPLAT)) { + *sarg++ = q; + converted++; + } } - if (!(flags & FL_SPLAT)) *sp = q; - sp++; - } - if (sarg == sp) { - bail = BAIL_EOF; - } else if (!(flags & FL_SPLAT)) { - *sp = '\0'; // Terminate output - converted++; - } else { - } - } - break; + break; - case '[': // Character range - sarg = va_arg(ap, char *); - state = ST_MATCH_INIT; - matchinv = 0; - memset(matchmap, 0, sizeof matchmap); - break; + case 's': // String + { + char* sp; + sp = sarg = va_arg(ap, char*); + while (width--) { + q = fgetc(stream); + if (isspace(static_cast(q)) || q <= 0) { + ungetc(q, stream); + break; + } + if (!(flags & FL_SPLAT)) *sp = q; + sp++; + } + if (sarg == sp) { + bail = BAIL_EOF; + } else if (!(flags & FL_SPLAT)) { + *sp = '\0'; // Terminate output + converted++; + } else { + } + } break; - case '%': // %% sequence - if (fgetc(stream) != '%' ) - bail = BAIL_ERR; - break; + case '[': // Character range + sarg = va_arg(ap, char*); + state = ST_MATCH_INIT; + matchinv = 0; + memset(matchmap, 0, sizeof matchmap); + break; - default: // Anything else - bail = BAIL_ERR; // Unknown sequence - break; - } + case '%': // %% sequence + if (fgetc(stream) != '%') bail = BAIL_ERR; + break; + + default: // Anything else + bail = BAIL_ERR; // Unknown sequence + break; + } } - break; + break; - case ST_MATCH_INIT: // Initial state for %[ match + case ST_MATCH_INIT: // Initial state for %[ match if (ch == '^' && !(flags & FL_INV)) { matchinv = 1; } else { SetBit(matchmap, static_cast(ch)); state = ST_MATCH; } - break; + break; - case ST_MATCH: // Main state for %[ match + case ST_MATCH: // Main state for %[ match if (ch == ']') { goto match_run; } else if (ch == '-') { @@ -530,26 +522,26 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) { } else { SetBit(matchmap, static_cast(ch)); } - break; + break; - case ST_MATCH_RANGE: // %[ match after - + case ST_MATCH_RANGE: // %[ match after - if (ch == ']') { SetBit(matchmap, static_cast('-')); goto match_run; } else { int i; - for (i = range_start ; i < (static_cast(ch)) ; i++) - SetBit(matchmap, i); + for (i = range_start; i < (static_cast(ch)); i++) + SetBit(matchmap, i); state = ST_MATCH; } - break; + break; - match_run: // Match expression finished + match_run: // Match expression finished char* oarg = sarg; while (width) { q = fgetc(stream); unsigned char qc = static_cast(q); - if (q <= 0 || !(TestBit(matchmap, qc)^matchinv)) { + if (q <= 0 || !(TestBit(matchmap, qc) ^ matchinv)) { ungetc(q, stream); break; } @@ -562,18 +554,17 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) { *sarg = '\0'; converted++; } - break; + break; } } - if (bail == BAIL_EOF && !converted) - converted = -1; // Return EOF (-1) + if (bail == BAIL_EOF && !converted) converted = -1; // Return EOF (-1) return converted; } #ifdef EMBEDDED -int creat(const char *pathname, mode_t mode) { +int creat(const char* pathname, mode_t mode) { return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode); } diff --git a/src/ccutil/scanutils.h b/src/ccutil/scanutils.h index dc3dfe085a..82e6239245 100644 --- a/src/ccutil/scanutils.h +++ b/src/ccutil/scanutils.h @@ -19,10 +19,10 @@ #ifndef TESSERACT_CCUTIL_SCANUTILS_H_ #define TESSERACT_CCUTIL_SCANUTILS_H_ -#include +#include #include +#include #include -#include /** * fscanf variant to ensure correct reading regardless of locale. @@ -34,7 +34,7 @@ * @note Note that scientific floating-point notation is not supported. * */ -int tfscanf(FILE* stream, const char *format, ...); +int tfscanf(FILE* stream, const char* format, ...); #ifdef EMBEDDED @@ -46,16 +46,16 @@ uintmax_t streamtoumax(FILE* s, int base); // Parse a file stream according to the given format. See the fscanf manpage // for more information, as this function attempts to mimic its behavior. // Note that scientific loating-point notation is not supported. -int fscanf(FILE* stream, const char *format, ...); +int fscanf(FILE* stream, const char* format, ...); // Parse a file stream according to the given format. See the fscanf manpage // for more information, as this function attempts to mimic its behavior. // Note that scientific loating-point notation is not supported. -int vfscanf(FILE* stream, const char *format, va_list ap); +int vfscanf(FILE* stream, const char* format, va_list ap); // Create a file at the specified path. See the creat manpage for more // information, as this function attempts to mimic its behavior. -int creat(const char *pathname, mode_t mode); +int creat(const char* pathname, mode_t mode); // Convert the specified C-String to a float. Returns the first parsed float, // or 0.0 if no floating point value could be found. Note that scientific diff --git a/src/ccutil/serialis.cpp b/src/ccutil/serialis.cpp index ddba5fb270..15fd356b76 100644 --- a/src/ccutil/serialis.cpp +++ b/src/ccutil/serialis.cpp @@ -31,8 +31,7 @@ TFile::TFile() swap_(false) {} TFile::~TFile() { - if (data_is_owned_) - delete data_; + if (data_is_owned_) delete data_; } bool TFile::Open(const STRING& filename, FileReader reader) { @@ -70,11 +69,9 @@ bool TFile::Open(FILE* fp, int64_t end_offset) { return false; } if (end_offset < 0) { - if (fseek(fp, 0, SEEK_END)) - return false; + if (fseek(fp, 0, SEEK_END)) return false; end_offset = ftell(fp); - if (fseek(fp, current_pos, SEEK_SET)) - return false; + if (fseek(fp, current_pos, SEEK_SET)) return false; } int size = end_offset - current_pos; is_writing_ = false; @@ -156,10 +153,8 @@ int TFile::FWrite(const void* buffer, int size, int count) { const char* buf = static_cast(buffer); // This isn't very efficient, but memory is so fast compared to disk // that it is relatively unimportant, and very simple. - for (int i = 0; i < total; ++i) - data_->push_back(buf[i]); + for (int i = 0; i < total; ++i) data_->push_back(buf[i]); return count; } - } // namespace tesseract. diff --git a/src/ccutil/serialis.h b/src/ccutil/serialis.h index f7b63e36a7..2cd4d1c65d 100644 --- a/src/ccutil/serialis.h +++ b/src/ccutil/serialis.h @@ -20,12 +20,13 @@ #ifndef SERIALIS_H #define SERIALIS_H +#include #include #include -#include #include "host.h" -template class GenericVector; +template +class GenericVector; class STRING; /*********************************************************************** @@ -34,7 +35,7 @@ class STRING; Replace with "". may be an arbitrary number of tokens ***********************************************************************/ -#define QUOTE_IT( parm ) #parm +#define QUOTE_IT(parm) #parm namespace tesseract { diff --git a/src/ccutil/sorthelper.h b/src/ccutil/sorthelper.h index d5b67db27b..f5822dd6af 100644 --- a/src/ccutil/sorthelper.h +++ b/src/ccutil/sorthelper.h @@ -36,7 +36,8 @@ template class SortHelper { public: // Simple pair class to hold the values and counts. - template struct SortPair { + template + struct SortPair { PairT value; int count; }; @@ -56,9 +57,7 @@ class SortHelper { } // Constructor takes a hint of the array size, but it need not be accurate. - explicit SortHelper(int sizehint) { - counts_.reserve(sizehint); - } + explicit SortHelper(int sizehint) { counts_.reserve(sizehint); } // Add a value that may be a duplicate of an existing value. // Uses a linear search. @@ -82,27 +81,25 @@ class SortHelper { for (int i = 0; i < counts_.size(); ++i) { if (counts_[i].count > best_count) { best_count = counts_[i].count; - if (max_value != nullptr) - *max_value = counts_[i].value; + if (max_value != nullptr) *max_value = counts_[i].value; } } return best_count; } // Returns the data array sorted by decreasing frequency. - const GenericVector >& SortByCount() { + const GenericVector>& SortByCount() { counts_.sort(&SortPairsByCount); return counts_; } // Returns the data array sorted by decreasing value. - const GenericVector >& SortByValue() { + const GenericVector>& SortByValue() { counts_.sort(&SortPairsByValue); return counts_; } private: - GenericVector > counts_; + GenericVector> counts_; }; - #endif // TESSERACT_CCUTIL_SORTHELPER_H_. diff --git a/src/ccutil/stderr.h b/src/ccutil/stderr.h index b5e96fbe7c..8e374c92b9 100644 --- a/src/ccutil/stderr.h +++ b/src/ccutil/stderr.h @@ -17,10 +17,10 @@ * **********************************************************************/ -#ifndef STDERR_H -#define STDERR_H +#ifndef STDERR_H +#define STDERR_H -#include "errcode.h" +#include "errcode.h" const ERRCODE MEMORY_OUT = "Out of memory"; #endif diff --git a/src/ccutil/strngs.cpp b/src/ccutil/strngs.cpp index 045ff3ed6b..48b57e38fb 100644 --- a/src/ccutil/strngs.cpp +++ b/src/ccutil/strngs.cpp @@ -52,7 +52,7 @@ const int kMaxDoubleSize = 16; const int kMinCapacity = 16; char* STRING::AllocData(int used, int capacity) { - data_ = (STRING_HEADER *)alloc_string(capacity + sizeof(STRING_HEADER)); + data_ = (STRING_HEADER*)alloc_string(capacity + sizeof(STRING_HEADER)); // header is the metadata for this memory block STRING_HEADER* header = GetHeader(); @@ -61,16 +61,14 @@ char* STRING::AllocData(int used, int capacity) { return GetCStr(); } -void STRING::DiscardData() { - free_string((char *)data_); -} +void STRING::DiscardData() { free_string((char*)data_); } -// This is a private method; ensure FixHeader is called (or used_ is well defined) -// beforehand +// This is a private method; ensure FixHeader is called (or used_ is well +// defined) beforehand char* STRING::ensure_cstr(int32_t min_capacity) { STRING_HEADER* orig_header = GetHeader(); if (min_capacity <= orig_header->capacity_) - return ((char *)this->data_) + sizeof(STRING_HEADER); + return ((char*)this->data_) + sizeof(STRING_HEADER); // if we are going to grow bigger, than double our existing // size, but if that still is not big enough then keep the @@ -90,18 +88,16 @@ char* STRING::ensure_cstr(int32_t min_capacity) { data_ = new_header; assert(InvariantOk()); - return ((char *)data_) + sizeof(STRING_HEADER); + return ((char*)data_) + sizeof(STRING_HEADER); } // This is const, but is modifying a mutable field // this way it can be used on const or non-const instances. void STRING::FixHeader() const { const STRING_HEADER* header = GetHeader(); - if (header->used_ < 0) - header->used_ = strlen(GetCStr()) + 1; + if (header->used_ < 0) header->used_ = strlen(GetCStr()) + 1; } - STRING::STRING() { // Empty STRINGs contain just the "\0". memcpy(AllocData(1, kMinCapacity), "", 1); @@ -109,9 +105,9 @@ STRING::STRING() { STRING::STRING(const STRING& str) { str.FixHeader(); - const STRING_HEADER* str_header = str.GetHeader(); - const int str_used = str_header->used_; - char *this_cstr = AllocData(str_used, str_used); + const STRING_HEADER* str_header = str.GetHeader(); + const int str_used = str_header->used_; + char* this_cstr = AllocData(str_used, str_used); memcpy(this_cstr, str.GetCStr(), str_used); assert(InvariantOk()); } @@ -128,7 +124,7 @@ STRING::STRING(const char* cstr) { assert(InvariantOk()); } -STRING::STRING(const char *data, int length) { +STRING::STRING(const char* data, int length) { if (data == nullptr) { // Empty STRINGs contain just the "\0". memcpy(AllocData(1, kMinCapacity), "", 1); @@ -139,9 +135,7 @@ STRING::STRING(const char *data, int length) { } } -STRING::~STRING() { - DiscardData(); -} +STRING::~STRING() { DiscardData(); } // TODO(rays) Change all callers to use TFile and remove the old functions. // Writes to the given file. Returns false in case of error. @@ -163,8 +157,7 @@ bool STRING::Serialize(TFile* fp) const { bool STRING::DeSerialize(bool swap, FILE* fp) { int32_t len; if (fread(&len, sizeof(len), 1, fp) != 1) return false; - if (swap) - ReverseN(&len, sizeof(len)); + if (swap) ReverseN(&len, sizeof(len)); truncate_at(len); if (static_cast(fread(GetCStr(), 1, len, fp)) != len) return false; return true; @@ -187,7 +180,7 @@ bool STRING::SkipDeSerialize(tesseract::TFile* fp) { } bool STRING::contains(const char c) const { - return (c != '\0') && (strchr (GetCStr(), c) != nullptr); + return (c != '\0') && (strchr(GetCStr(), c) != nullptr); } int32_t STRING::length() const { @@ -197,8 +190,7 @@ int32_t STRING::length() const { const char* STRING::string() const { const STRING_HEADER* header = GetHeader(); - if (header->used_ == 0) - return nullptr; + if (header->used_ == 0) return nullptr; // mark header length unreliable because tesseract might // cast away the const and mutate the string directly. @@ -206,9 +198,7 @@ const char* STRING::string() const { return GetCStr(); } -const char* STRING::c_str() const { - return string(); -} +const char* STRING::c_str() const { return string(); } /****** * The STRING_IS_PROTECTED interface adds additional support to migrate @@ -218,24 +208,20 @@ const char* STRING::c_str() const { * Also makes the [] operator return a const so it is immutable */ #if STRING_IS_PROTECTED -const char& STRING::operator[](int32_t index) const { - return GetCStr()[index]; -} +const char& STRING::operator[](int32_t index) const { return GetCStr()[index]; } void STRING::insert_range(int32_t index, const char* str, int len) { // if index is outside current range, then also grow size of string // to accmodate the requested range. STRING_HEADER* this_header = GetHeader(); int used = this_header->used_; - if (index > used) - used = index; + if (index > used) used = index; char* this_cstr = ensure_cstr(used + len + 1); if (index < used) { // move existing string from index to '\0' inclusive. - memmove(this_cstr + index + len, - this_cstr + index, - this_header->used_ - index); + memmove(this_cstr + index + len, this_cstr + index, + this_header->used_ - index); } else if (len > 0) { // We are going to overwrite previous null terminator, so write the new one. this_cstr[this_header->used_ + len - 1] = '\0'; @@ -243,8 +229,7 @@ void STRING::insert_range(int32_t index, const char* str, int len) { // If the old header did not have the terminator, // then we need to account for it now that we've added it. // Otherwise it was already accounted for; we just moved it. - if (this_header->used_ == 0) - ++this_header->used_; + if (this_header->used_ == 0) ++this_header->used_; } // Write new string to index. @@ -259,7 +244,7 @@ void STRING::erase_range(int32_t index, int len) { char* this_cstr = GetCStr(); STRING_HEADER* this_header = GetHeader(); - memcpy(this_cstr+index, this_cstr+index+len, + memcpy(this_cstr + index, this_cstr + index + len, this_header->used_ - index - len); this_header->used_ -= len; assert(InvariantOk()); @@ -279,11 +264,11 @@ char& STRING::operator[](int32_t index) const { // Code is casting away this const and mutating the string, // so mark used_ as -1 to flag it unreliable. GetHeader()->used_ = -1; - return ((char *)GetCStr())[index]; + return ((char*)GetCStr())[index]; } #endif -void STRING::split(const char c, GenericVector *splited) { +void STRING::split(const char c, GenericVector* splited) { int start_index = 0; const int len = length(); for (int i = 0; i < len; i++) { @@ -308,10 +293,10 @@ bool STRING::operator==(const STRING& str) const { const STRING_HEADER* str_header = str.GetHeader(); const STRING_HEADER* this_header = GetHeader(); const int this_used = this_header->used_; - const int str_used = str_header->used_; + const int str_used = str_header->used_; - return (this_used == str_used) - && (memcmp(GetCStr(), str.GetCStr(), this_used) == 0); + return (this_used == str_used) && + (memcmp(GetCStr(), str.GetCStr(), this_used) == 0); } bool STRING::operator!=(const STRING& str) const { @@ -320,10 +305,10 @@ bool STRING::operator!=(const STRING& str) const { const STRING_HEADER* str_header = str.GetHeader(); const STRING_HEADER* this_header = GetHeader(); const int this_used = this_header->used_; - const int str_used = str_header->used_; + const int str_used = str_header->used_; - return (this_used != str_used) - || (memcmp(GetCStr(), str.GetCStr(), this_used) != 0); + return (this_used != str_used) || + (memcmp(GetCStr(), str.GetCStr(), this_used) != 0); } bool STRING::operator!=(const char* cstr) const { @@ -334,8 +319,8 @@ bool STRING::operator!=(const char* cstr) const { return this_header->used_ > 1; // either '\0' or nullptr else { const int32_t length = strlen(cstr) + 1; - return (this_header->used_ != length) - || (memcmp(GetCStr(), cstr, length) != 0); + return (this_header->used_ != length) || + (memcmp(GetCStr(), cstr, length) != 0); } } @@ -355,13 +340,13 @@ STRING& STRING::operator=(const STRING& str) { return *this; } -STRING & STRING::operator+=(const STRING& str) { +STRING& STRING::operator+=(const STRING& str) { FixHeader(); str.FixHeader(); const STRING_HEADER* str_header = str.GetHeader(); const char* str_cstr = str.GetCStr(); - const int str_used = str_header->used_; - const int this_used = GetHeader()->used_; + const int str_used = str_header->used_; + const int this_used = GetHeader()->used_; char* this_cstr = ensure_cstr(this_used + str_used); STRING_HEADER* this_header = GetHeader(); // after ensure for realloc @@ -379,8 +364,7 @@ STRING & STRING::operator+=(const STRING& str) { } void STRING::add_str_int(const char* str, int number) { - if (str != nullptr) - *this += str; + if (str != nullptr) *this += str; // Allow space for the maximum possible length of int64_t. char num_buffer[kMaxIntSize]; snprintf(num_buffer, kMaxIntSize - 1, "%d", number); @@ -389,8 +373,7 @@ void STRING::add_str_int(const char* str, int number) { } // Appends the given string and double (as a %.8g) to this. void STRING::add_str_double(const char* str, double number) { - if (str != nullptr) - *this += str; + if (str != nullptr) *this += str; // Allow space for the maximum possible length of %8g. char num_buffer[kMaxDoubleSize]; snprintf(num_buffer, kMaxDoubleSize - 1, "%.8g", number); @@ -398,7 +381,7 @@ void STRING::add_str_double(const char* str, double number) { *this += num_buffer; } -STRING & STRING::operator=(const char* cstr) { +STRING& STRING::operator=(const char* cstr) { STRING_HEADER* this_header = GetHeader(); if (cstr) { const int len = strlen(cstr) + 1; @@ -419,7 +402,7 @@ STRING & STRING::operator=(const char* cstr) { return *this; } -void STRING::assign(const char *cstr, int len) { +void STRING::assign(const char* cstr, int len) { STRING_HEADER* this_header = GetHeader(); this_header->used_ = 0; // don't bother copying data if need to realloc char* this_cstr = ensure_cstr(len + 1); // +1 for '\0' @@ -432,7 +415,8 @@ void STRING::assign(const char *cstr, int len) { assert(InvariantOk()); } -STRING STRING::operator+(const STRING& str) const { +STRING +STRING::operator+(const STRING& str) const { STRING result(*this); result += str; @@ -440,8 +424,8 @@ STRING STRING::operator+(const STRING& str) const { return result; } - -STRING STRING::operator+(const char ch) const { +STRING +STRING::operator+(const char ch) const { STRING result; FixHeader(); const STRING_HEADER* this_header = GetHeader(); @@ -452,7 +436,7 @@ STRING STRING::operator+(const char ch) const { // copies '\0' but we'll overwrite that memcpy(result_cstr, GetCStr(), this_used); - result_cstr[result_used] = ch; // overwrite old '\0' + result_cstr[result_used] = ch; // overwrite old '\0' result_cstr[result_used + 1] = '\0'; // append on '\0' ++result_header->used_; @@ -460,8 +444,7 @@ STRING STRING::operator+(const char ch) const { return result; } - -STRING& STRING::operator+=(const char *str) { +STRING& STRING::operator+=(const char* str) { if (!str || !*str) // empty string has no effect return *this; @@ -485,21 +468,18 @@ STRING& STRING::operator+=(const char *str) { return *this; } - STRING& STRING::operator+=(const char ch) { - if (ch == '\0') - return *this; + if (ch == '\0') return *this; FixHeader(); - int this_used = GetHeader()->used_; + int this_used = GetHeader()->used_; char* this_cstr = ensure_cstr(this_used + 1); STRING_HEADER* this_header = GetHeader(); - if (this_used > 0) - --this_used; // undo old empty null if there was one + if (this_used > 0) --this_used; // undo old empty null if there was one - this_cstr[this_used++] = ch; // append ch to end - this_cstr[this_used++] = '\0'; // append '\0' after ch + this_cstr[this_used++] = ch; // append ch to end + this_cstr[this_used++] = '\0'; // append '\0' after ch this_header->used_ = this_used; assert(InvariantOk()); diff --git a/src/ccutil/strngs.h b/src/ccutil/strngs.h index aa3a72c27d..2736b6cfe5 100644 --- a/src/ccutil/strngs.h +++ b/src/ccutil/strngs.h @@ -17,8 +17,8 @@ * **********************************************************************/ -#ifndef STRNGS_H -#define STRNGS_H +#ifndef STRNGS_H +#define STRNGS_H #include #include @@ -38,144 +38,141 @@ class TFile; // like length arrays and many places cast away the const on string() // to mutate the string. Turning this off means that internally we // cannot assume we know the strlen. -#define STRING_IS_PROTECTED 0 - -template class GenericVector; - -class TESS_API STRING -{ - public: - STRING(); - STRING(const STRING &string); - STRING(const char *string); - STRING(const char *data, int length); - ~STRING(); - - // Writes to the given file. Returns false in case of error. - bool Serialize(FILE* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, FILE* fp); - // Writes to the given file. Returns false in case of error. - bool Serialize(tesseract::TFile* fp) const; - // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(tesseract::TFile* fp); - // As DeSerialize, but only seeks past the data - hence a static method. - static bool SkipDeSerialize(tesseract::TFile* fp); - - bool contains(const char c) const; - int32_t length() const; - int32_t size() const { return length(); } - // Workaround to avoid g++ -Wsign-compare warnings. - uint32_t unsigned_size() const { - const int32_t len = length(); - assert(0 <= len); - return static_cast(len); - } - const char *string() const; - const char *c_str() const; - - inline char* strdup() const { - int32_t len = length() + 1; - return strncpy(new char[len], GetCStr(), len); - } +#define STRING_IS_PROTECTED 0 + +template +class GenericVector; + +class TESS_API STRING { + public: + STRING(); + STRING(const STRING& string); + STRING(const char* string); + STRING(const char* data, int length); + ~STRING(); + + // Writes to the given file. Returns false in case of error. + bool Serialize(FILE* fp) const; + // Reads from the given file. Returns false in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + bool DeSerialize(bool swap, FILE* fp); + // Writes to the given file. Returns false in case of error. + bool Serialize(tesseract::TFile* fp) const; + // Reads from the given file. Returns false in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + bool DeSerialize(tesseract::TFile* fp); + // As DeSerialize, but only seeks past the data - hence a static method. + static bool SkipDeSerialize(tesseract::TFile* fp); + + bool contains(const char c) const; + int32_t length() const; + int32_t size() const { return length(); } + // Workaround to avoid g++ -Wsign-compare warnings. + uint32_t unsigned_size() const { + const int32_t len = length(); + assert(0 <= len); + return static_cast(len); + } + const char* string() const; + const char* c_str() const; + + inline char* strdup() const { + int32_t len = length() + 1; + return strncpy(new char[len], GetCStr(), len); + } #if STRING_IS_PROTECTED - const char &operator[] (int32_t index) const; - // len is number of chars in s to insert starting at index in this string - void insert_range(int32_t index, const char*s, int len); - void erase_range(int32_t index, int len); + const char& operator[](int32_t index) const; + // len is number of chars in s to insert starting at index in this string + void insert_range(int32_t index, const char* s, int len); + void erase_range(int32_t index, int len); #else - char &operator[] (int32_t index) const; + char& operator[](int32_t index) const; #endif - void split(const char c, GenericVector *splited); - void truncate_at(int32_t index); - - bool operator== (const STRING & string) const; - bool operator!= (const STRING & string) const; - bool operator!= (const char *string) const; - - STRING & operator= (const char *string); - STRING & operator= (const STRING & string); - - STRING operator+ (const STRING & string) const; - STRING operator+ (const char ch) const; - - STRING & operator+= (const char *string); - STRING & operator+= (const STRING & string); - STRING & operator+= (const char ch); - - // Assignment for strings which are not null-terminated. - void assign(const char *cstr, int len); - - // Appends the given string and int (as a %d) to this. - // += cannot be used for ints as there as a char += operator that would - // be ambiguous, and ints usually need a string before or between them - // anyway. - void add_str_int(const char* str, int number); - // Appends the given string and double (as a %.8g) to this. - void add_str_double(const char* str, double number); - - // ensure capacity but keep pointer encapsulated - inline void ensure(int32_t min_capacity) { ensure_cstr(min_capacity); } - - private: - typedef struct STRING_HEADER { - // How much space was allocated in the string buffer for char data. - int capacity_; - - // used_ is how much of the capacity is currently being used, - // including a '\0' terminator. - // - // If used_ is 0 then string is nullptr (not even the '\0') - // else if used_ > 0 then it is strlen() + 1 (because it includes '\0') - // else strlen is >= 0 (not nullptr) but needs to be computed. - // this condition is set when encapsulation is violated because - // an API returned a mutable string. - // - // capacity_ - used_ = excess capacity that the string can grow - // without reallocating - mutable int used_; - } STRING_HEADER; - - // To preserve the behavior of the old serialization, we only have space - // for one pointer in this structure. So we are embedding a data structure - // at the start of the storage that will hold additional state variables, - // then storing the actual string contents immediately after. - STRING_HEADER* data_; - - // returns the header part of the storage - inline STRING_HEADER* GetHeader() { - return data_; - } - inline const STRING_HEADER* GetHeader() const { - return data_; - } - - // returns the string data part of storage - inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); } - - inline const char* GetCStr() const { - return ((const char *)data_) + sizeof(STRING_HEADER); - } - inline bool InvariantOk() const { + void split(const char c, GenericVector* splited); + void truncate_at(int32_t index); + + bool operator==(const STRING& string) const; + bool operator!=(const STRING& string) const; + bool operator!=(const char* string) const; + + STRING& operator=(const char* string); + STRING& operator=(const STRING& string); + + STRING operator+(const STRING& string) const; + STRING operator+(const char ch) const; + + STRING& operator+=(const char* string); + STRING& operator+=(const STRING& string); + STRING& operator+=(const char ch); + + // Assignment for strings which are not null-terminated. + void assign(const char* cstr, int len); + + // Appends the given string and int (as a %d) to this. + // += cannot be used for ints as there as a char += operator that would + // be ambiguous, and ints usually need a string before or between them + // anyway. + void add_str_int(const char* str, int number); + // Appends the given string and double (as a %.8g) to this. + void add_str_double(const char* str, double number); + + // ensure capacity but keep pointer encapsulated + inline void ensure(int32_t min_capacity) { ensure_cstr(min_capacity); } + + private: + typedef struct STRING_HEADER { + // How much space was allocated in the string buffer for char data. + int capacity_; + + // used_ is how much of the capacity is currently being used, + // including a '\0' terminator. + // + // If used_ is 0 then string is nullptr (not even the '\0') + // else if used_ > 0 then it is strlen() + 1 (because it includes '\0') + // else strlen is >= 0 (not nullptr) but needs to be computed. + // this condition is set when encapsulation is violated because + // an API returned a mutable string. + // + // capacity_ - used_ = excess capacity that the string can grow + // without reallocating + mutable int used_; + } STRING_HEADER; + + // To preserve the behavior of the old serialization, we only have space + // for one pointer in this structure. So we are embedding a data structure + // at the start of the storage that will hold additional state variables, + // then storing the actual string contents immediately after. + STRING_HEADER* data_; + + // returns the header part of the storage + inline STRING_HEADER* GetHeader() { return data_; } + inline const STRING_HEADER* GetHeader() const { return data_; } + + // returns the string data part of storage + inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); } + + inline const char* GetCStr() const { + return ((const char*)data_) + sizeof(STRING_HEADER); + } + inline bool InvariantOk() const { #if STRING_IS_PROTECTED - return (GetHeader()->used_ == 0) ? - (string() == nullptr) : (GetHeader()->used_ == (strlen(string()) + 1)); + return (GetHeader()->used_ == 0) + ? (string() == nullptr) + : (GetHeader()->used_ == (strlen(string()) + 1)); #else - return true; + return true; #endif - } + } - // Ensure string has requested capacity as optimization - // to avoid unnecessary reallocations. - // The return value is a cstr buffer with at least requested capacity - char* ensure_cstr(int32_t min_capacity); + // Ensure string has requested capacity as optimization + // to avoid unnecessary reallocations. + // The return value is a cstr buffer with at least requested capacity + char* ensure_cstr(int32_t min_capacity); - void FixHeader() const; // make used_ non-negative, even if const + void FixHeader() const; // make used_ non-negative, even if const - char* AllocData(int used, int capacity); - void DiscardData(); + char* AllocData(int used, int capacity); + void DiscardData(); }; #endif diff --git a/src/ccutil/tesscallback.h b/src/ccutil/tesscallback.h index 2bfd831706..f052a2fc0f 100644 --- a/src/ccutil/tesscallback.h +++ b/src/ccutil/tesscallback.h @@ -25,17 +25,16 @@ struct TessCallbackUtils_ { static void FailIsRepeatable(const char* name); }; - class TessClosure { public: - virtual ~TessClosure() { } + virtual ~TessClosure() {} virtual void Run() = 0; }; template class TessResultCallback { public: - virtual ~TessResultCallback() { } + virtual ~TessResultCallback() {} virtual R Run() = 0; }; @@ -50,11 +49,9 @@ class _ConstTessMemberResultCallback_0_0 : public TessResultCallback { MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_0( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _ConstTessMemberResultCallback_0_0(const T* object, + MemberSignature member) + : object_(object), member_(member) {} virtual R Run() { if (!del) { @@ -71,8 +68,7 @@ class _ConstTessMemberResultCallback_0_0 : public TessResultCallback { }; template -class _ConstTessMemberResultCallback_0_0 - : public TessClosure { +class _ConstTessMemberResultCallback_0_0 : public TessClosure { public: typedef TessClosure base; typedef void (T::*MemberSignature)() const; @@ -82,11 +78,9 @@ class _ConstTessMemberResultCallback_0_0 MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_0( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _ConstTessMemberResultCallback_0_0(const T* object, + MemberSignature member) + : object_(object), member_(member) {} virtual void Run() { if (!del) { @@ -102,21 +96,17 @@ class _ConstTessMemberResultCallback_0_0 #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_0::base* -NewTessCallback( - const T1* obj, R (T2::*member)() const) { - return new _ConstTessMemberResultCallback_0_0( - obj, member); +inline typename _ConstTessMemberResultCallback_0_0::base* +NewTessCallback(const T1* obj, R (T2::*member)() const) { + return new _ConstTessMemberResultCallback_0_0(obj, member); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_0::base* -NewPermanentTessCallback( - const T1* obj, R (T2::*member)() const) { - return new _ConstTessMemberResultCallback_0_0( - obj, member); +inline typename _ConstTessMemberResultCallback_0_0::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)() const) { + return new _ConstTessMemberResultCallback_0_0(obj, member); } #endif @@ -124,18 +114,15 @@ template class _TessMemberResultCallback_0_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)() ; + typedef R (T::*MemberSignature)(); private: T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_0( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _TessMemberResultCallback_0_0(T* object, MemberSignature member) + : object_(object), member_(member) {} virtual R Run() { if (!del) { @@ -152,22 +139,18 @@ class _TessMemberResultCallback_0_0 : public TessResultCallback { }; template -class _TessMemberResultCallback_0_0 - : public TessClosure { +class _TessMemberResultCallback_0_0 : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)() ; + typedef void (T::*MemberSignature)(); private: T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_0( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _TessMemberResultCallback_0_0(T* object, MemberSignature member) + : object_(object), member_(member) {} virtual void Run() { if (!del) { @@ -183,21 +166,17 @@ class _TessMemberResultCallback_0_0 #ifndef SWIG template -inline typename _TessMemberResultCallback_0_0::base* -NewTessCallback( - T1* obj, R (T2::*member)() ) { - return new _TessMemberResultCallback_0_0( - obj, member); +inline typename _TessMemberResultCallback_0_0::base* +NewTessCallback(T1* obj, R (T2::*member)()) { + return new _TessMemberResultCallback_0_0(obj, member); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_0_0::base* -NewPermanentTessCallback( - T1* obj, R (T2::*member)() ) { - return new _TessMemberResultCallback_0_0( - obj, member); +inline typename _TessMemberResultCallback_0_0::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)()) { + return new _TessMemberResultCallback_0_0(obj, member); } #endif @@ -211,10 +190,8 @@ class _TessFunctionResultCallback_0_0 : public TessResultCallback { FunctionSignature function_; public: - inline _TessFunctionResultCallback_0_0( - FunctionSignature function) - : function_(function) { - } + inline _TessFunctionResultCallback_0_0(FunctionSignature function) + : function_(function) {} virtual R Run() { if (!del) { @@ -231,8 +208,7 @@ class _TessFunctionResultCallback_0_0 : public TessResultCallback { }; template -class _TessFunctionResultCallback_0_0 - : public TessClosure { +class _TessFunctionResultCallback_0_0 : public TessClosure { public: typedef TessClosure base; typedef void (*FunctionSignature)(); @@ -241,10 +217,8 @@ class _TessFunctionResultCallback_0_0 FunctionSignature function_; public: - inline _TessFunctionResultCallback_0_0( - FunctionSignature function) - : function_(function) { - } + inline _TessFunctionResultCallback_0_0(FunctionSignature function) + : function_(function) {} virtual void Run() { if (!del) { @@ -259,23 +233,28 @@ class _TessFunctionResultCallback_0_0 }; template -inline typename _TessFunctionResultCallback_0_0::base* -NewTessCallback(R (*function)()) { - return new _TessFunctionResultCallback_0_0(function); +inline typename _TessFunctionResultCallback_0_0::base* NewTessCallback( + R (*function)()) { + return new _TessFunctionResultCallback_0_0(function); } template -inline typename _TessFunctionResultCallback_0_0::base* +inline typename _TessFunctionResultCallback_0_0::base* NewPermanentTessCallback(R (*function)()) { - return new _TessFunctionResultCallback_0_0(function); + return new _TessFunctionResultCallback_0_0(function); } - - // Specified by TR1 [4.7.2] Reference modifications. -template struct remove_reference; -template struct remove_reference { typedef T type; }; -template struct remove_reference { typedef T type; }; +template +struct remove_reference; +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; // Identity::type is a typedef of T. Useful for preventing the // compiler from inferring the type of an argument in templates. @@ -296,9 +275,9 @@ class _ConstTessMemberResultCallback_1_0 : public TessResultCallback { typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_0(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_0(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual R Run() { if (!del) { @@ -315,7 +294,8 @@ class _ConstTessMemberResultCallback_1_0 : public TessResultCallback { }; template -class _ConstTessMemberResultCallback_1_0 : public TessClosure { +class _ConstTessMemberResultCallback_1_0 + : public TessClosure { public: typedef TessClosure base; typedef void (T::*MemberSignature)(P1) const; @@ -326,9 +306,9 @@ class _ConstTessMemberResultCallback_1_0 : public TessClosure typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_0(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_0(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual void Run() { if (!del) { @@ -344,17 +324,21 @@ class _ConstTessMemberResultCallback_1_0 : public TessClosure #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_1_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_0(obj, member, p1); +inline typename _ConstTessMemberResultCallback_1_0::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_0(obj, member, + p1); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_1_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_0(obj, member, p1); +inline typename _ConstTessMemberResultCallback_1_0::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_0(obj, member, + p1); } #endif @@ -362,17 +346,16 @@ template class _TessMemberResultCallback_1_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1) ; + typedef R (T::*MemberSignature)(P1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_0( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_0(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual R Run() { if (!del) { @@ -392,17 +375,16 @@ template class _TessMemberResultCallback_1_0 : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1) ; + typedef void (T::*MemberSignature)(P1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_0( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_0(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual void Run() { if (!del) { @@ -418,17 +400,18 @@ class _TessMemberResultCallback_1_0 : public TessClosure { #ifndef SWIG template -inline typename _TessMemberResultCallback_1_0::base* -NewTessCallback( T1* obj, R (T2::*member)(P1) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_0(obj, member, p1); +inline typename _TessMemberResultCallback_1_0::base* +NewTessCallback(T1* obj, R (T2::*member)(P1), typename Identity::type p1) { + return new _TessMemberResultCallback_1_0(obj, member, p1); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_1_0::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_0(obj, member, p1); +inline typename _TessMemberResultCallback_1_0::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_0(obj, member, p1); } #endif @@ -444,7 +427,7 @@ class _TessFunctionResultCallback_1_0 : public TessResultCallback { public: inline _TessFunctionResultCallback_1_0(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} virtual R Run() { if (!del) { @@ -472,7 +455,7 @@ class _TessFunctionResultCallback_1_0 : public TessClosure { public: inline _TessFunctionResultCallback_1_0(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} virtual void Run() { if (!del) { @@ -487,22 +470,22 @@ class _TessFunctionResultCallback_1_0 : public TessClosure { }; template -inline typename _TessFunctionResultCallback_1_0::base* +inline typename _TessFunctionResultCallback_1_0::base* NewTessCallback(R (*function)(P1), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_0(function, p1); + return new _TessFunctionResultCallback_1_0(function, p1); } template -inline typename _TessFunctionResultCallback_1_0::base* +inline typename _TessFunctionResultCallback_1_0::base* NewPermanentTessCallback(R (*function)(P1), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_0(function, p1); + return new _TessFunctionResultCallback_1_0(function, p1); } template class _ConstTessMemberResultCallback_2_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2) const; + typedef R (T::*MemberSignature)(P1, P2) const; private: const T* object_; @@ -511,16 +494,17 @@ class _ConstTessMemberResultCallback_2_0 : public TessResultCallback { typename remove_reference::type p2_; public: - inline _ConstTessMemberResultCallback_2_0(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_0(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_); + R result = (object_->*member_)(p1_, p2_); return result; } else { - R result = (object_->*member_)(p1_,p2_); + R result = (object_->*member_)(p1_, p2_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -530,10 +514,11 @@ class _ConstTessMemberResultCallback_2_0 : public TessResultCallback { }; template -class _ConstTessMemberResultCallback_2_0 : public TessClosure { +class _ConstTessMemberResultCallback_2_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2) const; + typedef void (T::*MemberSignature)(P1, P2) const; private: const T* object_; @@ -542,16 +527,16 @@ class _ConstTessMemberResultCallback_2_0 : public TessClos typename remove_reference::type p2_; public: - inline _ConstTessMemberResultCallback_2_0(const T* object, MemberSignature member, P1 p1, P2 p2) - : - object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_0(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_); + (object_->*member_)(p1_, p2_); } else { - (object_->*member_)(p1_,p2_); + (object_->*member_)(p1_, p2_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -561,17 +546,23 @@ class _ConstTessMemberResultCallback_2_0 : public TessClos #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_2_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_0(obj, member, p1, p2); +inline typename _ConstTessMemberResultCallback_2_0::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_0( + obj, member, p1, p2); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_2_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_0(obj, member, p1, p2); +inline typename _ConstTessMemberResultCallback_2_0::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1, P2) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_0( + obj, member, p1, p2); } #endif @@ -579,25 +570,25 @@ template class _TessMemberResultCallback_2_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2) ; + typedef R (T::*MemberSignature)(P1, P2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_0( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_0(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_); + R result = (object_->*member_)(p1_, p2_); return result; } else { - R result = (object_->*member_)(p1_,p2_); + R result = (object_->*member_)(p1_, p2_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -610,25 +601,24 @@ template class _TessMemberResultCallback_2_0 : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2) ; + typedef void (T::*MemberSignature)(P1, P2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_0( T* object, MemberSignature member, P1 p1, P2 p2) - : - object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_0(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_); + (object_->*member_)(p1_, p2_); } else { - (object_->*member_)(p1_,p2_); + (object_->*member_)(p1_, p2_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -638,17 +628,23 @@ class _TessMemberResultCallback_2_0 : public TessClosure { #ifndef SWIG template -inline typename _TessMemberResultCallback_2_0::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_0(obj, member, p1, p2); +inline typename _TessMemberResultCallback_2_0::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_0(obj, member, p1, + p2); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_2_0::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_0(obj, member, p1, p2); +inline typename _TessMemberResultCallback_2_0::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_0(obj, member, + p1, p2); } #endif @@ -656,7 +652,7 @@ template class _TessFunctionResultCallback_2_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2); + typedef R (*FunctionSignature)(P1, P2); private: FunctionSignature function_; @@ -664,15 +660,16 @@ class _TessFunctionResultCallback_2_0 : public TessResultCallback { typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_0(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_0(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} virtual R Run() { if (!del) { - R result = (*function_)(p1_,p2_); + R result = (*function_)(p1_, p2_); return result; } else { - R result = (*function_)(p1_,p2_); + R result = (*function_)(p1_, p2_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -685,7 +682,7 @@ template class _TessFunctionResultCallback_2_0 : public TessClosure { public: typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2); + typedef void (*FunctionSignature)(P1, P2); private: FunctionSignature function_; @@ -693,15 +690,15 @@ class _TessFunctionResultCallback_2_0 : public TessClosure { typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_0(FunctionSignature function, P1 p1, P2 p2) - : - function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_0(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} virtual void Run() { if (!del) { - (*function_)(p1_,p2_); + (*function_)(p1_, p2_); } else { - (*function_)(p1_,p2_); + (*function_)(p1_, p2_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -710,22 +707,25 @@ class _TessFunctionResultCallback_2_0 : public TessClosure { }; template -inline typename _TessFunctionResultCallback_2_0::base* -NewTessCallback(R (*function)(P1,P2), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_0(function, p1, p2); +inline typename _TessFunctionResultCallback_2_0::base* +NewTessCallback(R (*function)(P1, P2), typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_0(function, p1, p2); } template -inline typename _TessFunctionResultCallback_2_0::base* -NewPermanentTessCallback(R (*function)(P1,P2), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_0(function, p1, p2); +inline typename _TessFunctionResultCallback_2_0::base* +NewPermanentTessCallback(R (*function)(P1, P2), typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_0(function, p1, + p2); } template class _ConstTessMemberResultCallback_3_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3) const; + typedef R (T::*MemberSignature)(P1, P2, P3) const; private: const T* object_; @@ -735,17 +735,17 @@ class _ConstTessMemberResultCallback_3_0 : public TessResultCallback { typename remove_reference::type p3_; public: - inline _ConstTessMemberResultCallback_3_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : - object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_0(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_); + R result = (object_->*member_)(p1_, p2_, p3_); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_); + R result = (object_->*member_)(p1_, p2_, p3_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -755,10 +755,11 @@ class _ConstTessMemberResultCallback_3_0 : public TessResultCallback { }; template -class _ConstTessMemberResultCallback_3_0 : public TessClosure { +class _ConstTessMemberResultCallback_3_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3) const; + typedef void (T::*MemberSignature)(P1, P2, P3) const; private: const T* object_; @@ -768,15 +769,16 @@ class _ConstTessMemberResultCallback_3_0 : public Tess typename remove_reference::type p3_; public: - inline _ConstTessMemberResultCallback_3_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_0(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_,p3_); + (object_->*member_)(p1_, p2_, p3_); } else { - (object_->*member_)(p1_,p2_,p3_); + (object_->*member_)(p1_, p2_, p3_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -786,17 +788,27 @@ class _ConstTessMemberResultCallback_3_0 : public Tess #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_3_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_0(obj, member, p1, p2, p3); +inline + typename _ConstTessMemberResultCallback_3_0::base* + NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_0( + obj, member, p1, p2, p3); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_3_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_0(obj, member, p1, p2, p3); +inline + typename _ConstTessMemberResultCallback_3_0::base* + NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_0( + obj, member, p1, p2, p3); } #endif @@ -804,26 +816,26 @@ template class _TessMemberResultCallback_3_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3) ; + typedef R (T::*MemberSignature)(P1, P2, P3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_0( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_0(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_); + R result = (object_->*member_)(p1_, p2_, p3_); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_); + R result = (object_->*member_)(p1_, p2_, p3_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -833,28 +845,29 @@ class _TessMemberResultCallback_3_0 : public TessResultCallback { }; template -class _TessMemberResultCallback_3_0 : public TessClosure { +class _TessMemberResultCallback_3_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3) ; + typedef void (T::*MemberSignature)(P1, P2, P3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_0( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_0(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_,p3_); + (object_->*member_)(p1_, p2_, p3_); } else { - (object_->*member_)(p1_,p2_,p3_); + (object_->*member_)(p1_, p2_, p3_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -864,17 +877,24 @@ class _TessMemberResultCallback_3_0 : public TessClosu #ifndef SWIG template -inline typename _TessMemberResultCallback_3_0::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_0(obj, member, p1, p2, p3); +inline typename _TessMemberResultCallback_3_0::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_0(obj, member, + p1, p2, p3); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_3_0::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_0(obj, member, p1, p2, p3); +inline typename _TessMemberResultCallback_3_0::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_0( + obj, member, p1, p2, p3); } #endif @@ -882,7 +902,7 @@ template class _TessFunctionResultCallback_3_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2,P3); + typedef R (*FunctionSignature)(P1, P2, P3); private: FunctionSignature function_; @@ -891,15 +911,16 @@ class _TessFunctionResultCallback_3_0 : public TessResultCallback { typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_0(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_0(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} virtual R Run() { if (!del) { - R result = (*function_)(p1_,p2_,p3_); + R result = (*function_)(p1_, p2_, p3_); return result; } else { - R result = (*function_)(p1_,p2_,p3_); + R result = (*function_)(p1_, p2_, p3_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -909,10 +930,11 @@ class _TessFunctionResultCallback_3_0 : public TessResultCallback { }; template -class _TessFunctionResultCallback_3_0 : public TessClosure { +class _TessFunctionResultCallback_3_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2,P3); + typedef void (*FunctionSignature)(P1, P2, P3); private: FunctionSignature function_; @@ -921,14 +943,15 @@ class _TessFunctionResultCallback_3_0 : public TessClosur typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_0(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_0(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} virtual void Run() { if (!del) { - (*function_)(p1_,p2_,p3_); + (*function_)(p1_, p2_, p3_); } else { - (*function_)(p1_,p2_,p3_); + (*function_)(p1_, p2_, p3_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -937,22 +960,29 @@ class _TessFunctionResultCallback_3_0 : public TessClosur }; template -inline typename _TessFunctionResultCallback_3_0::base* -NewTessCallback(R (*function)(P1,P2,P3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_0(function, p1, p2, p3); +inline typename _TessFunctionResultCallback_3_0::base* +NewTessCallback(R (*function)(P1, P2, P3), typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_0(function, p1, + p2, p3); } template -inline typename _TessFunctionResultCallback_3_0::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_0(function, p1, p2, p3); +inline typename _TessFunctionResultCallback_3_0::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_0(function, p1, + p2, p3); } template class _ConstTessMemberResultCallback_4_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4) const; + typedef R (T::*MemberSignature)(P1, P2, P3, P4) const; private: const T* object_; @@ -963,16 +993,17 @@ class _ConstTessMemberResultCallback_4_0 : public TessResultCallback { typename remove_reference::type p4_; public: - inline _ConstTessMemberResultCallback_4_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_0(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -982,10 +1013,11 @@ class _ConstTessMemberResultCallback_4_0 : public TessResultCallback { }; template -class _ConstTessMemberResultCallback_4_0 : public TessClosure { +class _ConstTessMemberResultCallback_4_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4) const; + typedef void (T::*MemberSignature)(P1, P2, P3, P4) const; private: const T* object_; @@ -996,15 +1028,16 @@ class _ConstTessMemberResultCallback_4_0 : public typename remove_reference::type p4_; public: - inline _ConstTessMemberResultCallback_4_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_0(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_); + (object_->*member_)(p1_, p2_, p3_, p4_); } else { - (object_->*member_)(p1_,p2_,p3_,p4_); + (object_->*member_)(p1_, p2_, p3_, p4_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1014,17 +1047,28 @@ class _ConstTessMemberResultCallback_4_0 : public #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_4_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_0(obj, member, p1, p2, p3, p4); +inline typename _ConstTessMemberResultCallback_4_0::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, P4) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_0( + obj, member, p1, p2, p3, p4); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_4_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_0(obj, member, p1, p2, p3, p4); +inline typename _ConstTessMemberResultCallback_4_0::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, P4) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_0( + obj, member, p1, p2, p3, p4); } #endif @@ -1032,10 +1076,10 @@ template class _TessMemberResultCallback_4_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4) ; + typedef R (T::*MemberSignature)(P1, P2, P3, P4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -1043,16 +1087,16 @@ class _TessMemberResultCallback_4_0 : public TessResultCallback { typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_0( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_0(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1062,13 +1106,14 @@ class _TessMemberResultCallback_4_0 : public TessResultCallback { }; template -class _TessMemberResultCallback_4_0 : public TessClosure { +class _TessMemberResultCallback_4_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4) ; + typedef void (T::*MemberSignature)(P1, P2, P3, P4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -1076,15 +1121,15 @@ class _TessMemberResultCallback_4_0 : public TessC typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_0( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_0(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_); + (object_->*member_)(p1_, p2_, p3_, p4_); } else { - (object_->*member_)(p1_,p2_,p3_,p4_); + (object_->*member_)(p1_, p2_, p3_, p4_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1094,17 +1139,29 @@ class _TessMemberResultCallback_4_0 : public TessC #ifndef SWIG template -inline typename _TessMemberResultCallback_4_0::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_0(obj, member, p1, p2, p3, p4); +inline + typename _TessMemberResultCallback_4_0::base* + NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_0( + obj, member, p1, p2, p3, p4); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_4_0::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_0(obj, member, p1, p2, p3, p4); +inline + typename _TessMemberResultCallback_4_0::base* + NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_0( + obj, member, p1, p2, p3, p4); } #endif @@ -1112,7 +1169,7 @@ template class _TessFunctionResultCallback_4_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2,P3,P4); + typedef R (*FunctionSignature)(P1, P2, P3, P4); private: FunctionSignature function_; @@ -1122,15 +1179,16 @@ class _TessFunctionResultCallback_4_0 : public TessResultCallback { typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_0(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual R Run() { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_); + R result = (*function_)(p1_, p2_, p3_, p4_); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_); + R result = (*function_)(p1_, p2_, p3_, p4_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -1140,10 +1198,11 @@ class _TessFunctionResultCallback_4_0 : public TessResultCallback { }; template -class _TessFunctionResultCallback_4_0 : public TessClosure { +class _TessFunctionResultCallback_4_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2,P3,P4); + typedef void (*FunctionSignature)(P1, P2, P3, P4); private: FunctionSignature function_; @@ -1153,14 +1212,15 @@ class _TessFunctionResultCallback_4_0 : public TessCl typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_0(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual void Run() { if (!del) { - (*function_)(p1_,p2_,p3_,p4_); + (*function_)(p1_, p2_, p3_, p4_); } else { - (*function_)(p1_,p2_,p3_,p4_); + (*function_)(p1_, p2_, p3_, p4_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -1169,22 +1229,31 @@ class _TessFunctionResultCallback_4_0 : public TessCl }; template -inline typename _TessFunctionResultCallback_4_0::base* -NewTessCallback(R (*function)(P1,P2,P3,P4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_0(function, p1, p2, p3, p4); +inline typename _TessFunctionResultCallback_4_0::base* +NewTessCallback(R (*function)(P1, P2, P3, P4), typename Identity::type p1, + typename Identity::type p2, typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_0( + function, p1, p2, p3, p4); } template -inline typename _TessFunctionResultCallback_4_0::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_0(function, p1, p2, p3, p4); -} - -template +inline typename _TessFunctionResultCallback_4_0::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_0( + function, p1, p2, p3, p4); +} + +template class _ConstTessMemberResultCallback_5_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5) const; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5) const; private: const T* object_; @@ -1196,16 +1265,23 @@ class _ConstTessMemberResultCallback_5_0 : public TessResultCallback { typename remove_reference::type p5_; public: - inline _ConstTessMemberResultCallback_5_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_0(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1215,10 +1291,11 @@ class _ConstTessMemberResultCallback_5_0 : public TessResultCallback { }; template -class _ConstTessMemberResultCallback_5_0 : public TessClosure { +class _ConstTessMemberResultCallback_5_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5) const; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5) const; private: const T* object_; @@ -1230,15 +1307,22 @@ class _ConstTessMemberResultCallback_5_0 : pub typename remove_reference::type p5_; public: - inline _ConstTessMemberResultCallback_5_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_0(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1247,29 +1331,47 @@ class _ConstTessMemberResultCallback_5_0 : pub }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_0::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, P4, P5) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, + p5); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_0::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, + p5); } #endif -template +template class _TessMemberResultCallback_5_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5) ; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -1278,16 +1380,22 @@ class _TessMemberResultCallback_5_0 : public TessResultCallback { typename remove_reference::type p5_; public: - inline _TessMemberResultCallback_5_0( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_0(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1297,13 +1405,14 @@ class _TessMemberResultCallback_5_0 : public TessResultCallback { }; template -class _TessMemberResultCallback_5_0 : public TessClosure { +class _TessMemberResultCallback_5_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5) ; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -1312,15 +1421,21 @@ class _TessMemberResultCallback_5_0 : public T typename remove_reference::type p5_; public: - inline _TessMemberResultCallback_5_0( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_0(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1329,18 +1444,32 @@ class _TessMemberResultCallback_5_0 : public T }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_0::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_0::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_0( + obj, member, p1, p2, p3, p4, p5); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_0::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_0(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_0::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_0( + obj, member, p1, p2, p3, p4, p5); } #endif @@ -1348,7 +1477,7 @@ template class _TessFunctionResultCallback_5_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5); + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5); private: FunctionSignature function_; @@ -1359,15 +1488,16 @@ class _TessFunctionResultCallback_5_0 : public TessResultCallback { typename remove_reference::type p5_; public: - inline _TessFunctionResultCallback_5_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_0(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} virtual R Run() { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -1377,10 +1507,11 @@ class _TessFunctionResultCallback_5_0 : public TessResultCallback { }; template -class _TessFunctionResultCallback_5_0 : public TessClosure { +class _TessFunctionResultCallback_5_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5); + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5); private: FunctionSignature function_; @@ -1391,14 +1522,15 @@ class _TessFunctionResultCallback_5_0 : public Te typename remove_reference::type p5_; public: - inline _TessFunctionResultCallback_5_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_0(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} virtual void Run() { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_); + (*function_)(p1_, p2_, p3_, p4_, p5_); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_); + (*function_)(p1_, p2_, p3_, p4_, p5_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -1407,22 +1539,37 @@ class _TessFunctionResultCallback_5_0 : public Te }; template -inline typename _TessFunctionResultCallback_5_0::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_0(function, p1, p2, p3, p4, p5); +inline + typename _TessFunctionResultCallback_5_0::base* + NewTessCallback(R (*function)(P1, P2, P3, P4, P5), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_0( + function, p1, p2, p3, p4, p5); } template -inline typename _TessFunctionResultCallback_5_0::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_0(function, p1, p2, p3, p4, p5); -} - -template +inline typename _TessFunctionResultCallback_5_0::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_0( + function, p1, p2, p3, p4, p5); +} + +template class _ConstTessMemberResultCallback_6_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6) const; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6) const; private: const T* object_; @@ -1435,16 +1582,24 @@ class _ConstTessMemberResultCallback_6_0 : public TessResultCallback { typename remove_reference::type p6_; public: - inline _ConstTessMemberResultCallback_6_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_0(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1453,11 +1608,13 @@ class _ConstTessMemberResultCallback_6_0 : public TessResultCallback { } }; -template -class _ConstTessMemberResultCallback_6_0 : public TessClosure { +template +class _ConstTessMemberResultCallback_6_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6) const; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6) const; private: const T* object_; @@ -1470,15 +1627,23 @@ class _ConstTessMemberResultCallback_6_0 : typename remove_reference::type p6_; public: - inline _ConstTessMemberResultCallback_6_0(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_0(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1487,29 +1652,46 @@ class _ConstTessMemberResultCallback_6_0 : }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_0::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_0::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, + p5, p6); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_0::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_0::base* +NewPermanentTessCallback( + const T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_0(obj, member, p1, p2, p3, + p4, p5, p6); } #endif -template +template class _TessMemberResultCallback_6_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6) ; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -1519,16 +1701,23 @@ class _TessMemberResultCallback_6_0 : public TessResultCallback { typename remove_reference::type p6_; public: - inline _TessMemberResultCallback_6_0( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_0(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual R Run() { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1537,14 +1726,16 @@ class _TessMemberResultCallback_6_0 : public TessResultCallback { } }; -template -class _TessMemberResultCallback_6_0 : public TessClosure { +template +class _TessMemberResultCallback_6_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6) ; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -1554,15 +1745,22 @@ class _TessMemberResultCallback_6_0 : publ typename remove_reference::type p6_; public: - inline _TessMemberResultCallback_6_0( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_0(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual void Run() { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1571,26 +1769,44 @@ class _TessMemberResultCallback_6_0 : publ }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_6_0::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_0::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessMemberResultCallback_6_0( + obj, member, p1, p2, p3, p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_6_0::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_0::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessMemberResultCallback_6_0(obj, member, p1, p2, p3, p4, p5, + p6); } #endif -template +template class _TessFunctionResultCallback_6_0 : public TessResultCallback { public: typedef TessResultCallback base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6); + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, P6); private: FunctionSignature function_; @@ -1602,15 +1818,22 @@ class _TessFunctionResultCallback_6_0 : public TessResultCallback { typename remove_reference::type p6_; public: - inline _TessFunctionResultCallback_6_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_0(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual R Run() { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -1620,10 +1843,11 @@ class _TessFunctionResultCallback_6_0 : public TessResultCallback { }; template -class _TessFunctionResultCallback_6_0 : public TessClosure { +class _TessFunctionResultCallback_6_0 + : public TessClosure { public: typedef TessClosure base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6); + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, P6); private: FunctionSignature function_; @@ -1635,14 +1859,21 @@ class _TessFunctionResultCallback_6_0 : publi typename remove_reference::type p6_; public: - inline _TessFunctionResultCallback_6_0(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_0(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual void Run() { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -1651,78 +1882,91 @@ class _TessFunctionResultCallback_6_0 : publi }; template -inline typename _TessFunctionResultCallback_6_0::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_0(function, p1, p2, p3, p4, p5, p6); +inline typename _TessFunctionResultCallback_6_0::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, P6), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_0( + function, p1, p2, p3, p4, p5, p6); } template -inline typename _TessFunctionResultCallback_6_0::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_0(function, p1, p2, p3, p4, p5, p6); +inline typename _TessFunctionResultCallback_6_0::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, P6), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_0( + function, p1, p2, p3, p4, p5, p6); } template class TessCallback1 { public: - virtual ~TessCallback1() { } + virtual ~TessCallback1() {} virtual void Run(A1) = 0; }; template class TessResultCallback1 { public: - virtual ~TessResultCallback1() { } + virtual ~TessResultCallback1() {} virtual R Run(A1) = 0; }; - -template +template class TessCallback2 { public: - virtual ~TessCallback2() { } - virtual void Run(A1,A2) = 0; + virtual ~TessCallback2() {} + virtual void Run(A1, A2) = 0; }; -template +template class TessResultCallback2 { public: - virtual ~TessResultCallback2() { } - virtual R Run(A1,A2) = 0; + virtual ~TessResultCallback2() {} + virtual R Run(A1, A2) = 0; }; -template +template class TessCallback3 { public: - virtual ~TessCallback3() { } - virtual void Run(A1,A2,A3) = 0; + virtual ~TessCallback3() {} + virtual void Run(A1, A2, A3) = 0; }; -template +template class TessResultCallback3 { public: - virtual ~TessResultCallback3() { } - virtual R Run(A1,A2,A3) = 0; + virtual ~TessResultCallback3() {} + virtual R Run(A1, A2, A3) = 0; }; -template +template class TessCallback4 { public: - virtual ~TessCallback4() { } - virtual void Run(A1,A2,A3,A4) = 0; + virtual ~TessCallback4() {} + virtual void Run(A1, A2, A3, A4) = 0; }; -template +template class TessResultCallback4 { public: - virtual ~TessResultCallback4() { } - virtual R Run(A1,A2,A3,A4) = 0; + virtual ~TessResultCallback4() {} + virtual R Run(A1, A2, A3, A4) = 0; }; template -class _ConstTessMemberResultCallback_0_1 : public TessResultCallback1 { +class _ConstTessMemberResultCallback_0_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; + typedef TessResultCallback1 base; typedef R (T::*MemberSignature)(A1) const; private: @@ -1730,11 +1974,9 @@ class _ConstTessMemberResultCallback_0_1 : public TessResultCallback1 { MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_1( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _ConstTessMemberResultCallback_0_1(const T* object, + MemberSignature member) + : object_(object), member_(member) {} virtual R Run(A1 a1) { if (!del) { @@ -1752,7 +1994,7 @@ class _ConstTessMemberResultCallback_0_1 : public TessResultCallback1 { template class _ConstTessMemberResultCallback_0_1 - : public TessCallback1 { + : public TessCallback1 { public: typedef TessCallback1 base; typedef void (T::*MemberSignature)(A1) const; @@ -1762,11 +2004,9 @@ class _ConstTessMemberResultCallback_0_1 MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_1( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _ConstTessMemberResultCallback_0_1(const T* object, + MemberSignature member) + : object_(object), member_(member) {} virtual void Run(A1 a1) { if (!del) { @@ -1782,40 +2022,33 @@ class _ConstTessMemberResultCallback_0_1 #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_1::base* -NewTessCallback( - const T1* obj, R (T2::*member)(A1) const) { - return new _ConstTessMemberResultCallback_0_1( - obj, member); +inline typename _ConstTessMemberResultCallback_0_1::base* +NewTessCallback(const T1* obj, R (T2::*member)(A1) const) { + return new _ConstTessMemberResultCallback_0_1(obj, member); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_1::base* -NewPermanentTessCallback( - const T1* obj, R (T2::*member)(A1) const) { - return new _ConstTessMemberResultCallback_0_1( - obj, member); +inline typename _ConstTessMemberResultCallback_0_1::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(A1) const) { + return new _ConstTessMemberResultCallback_0_1(obj, member); } #endif template -class _TessMemberResultCallback_0_1 : public TessResultCallback1 { +class _TessMemberResultCallback_0_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(A1) ; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(A1); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_1( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _TessMemberResultCallback_0_1(T* object, MemberSignature member) + : object_(object), member_(member) {} virtual R Run(A1 a1) { if (!del) { @@ -1833,21 +2066,18 @@ class _TessMemberResultCallback_0_1 : public TessResultCallback1 { template class _TessMemberResultCallback_0_1 - : public TessCallback1 { + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(A1) ; + typedef void (T::*MemberSignature)(A1); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_1( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _TessMemberResultCallback_0_1(T* object, MemberSignature member) + : object_(object), member_(member) {} virtual void Run(A1 a1) { if (!del) { @@ -1863,38 +2093,32 @@ class _TessMemberResultCallback_0_1 #ifndef SWIG template -inline typename _TessMemberResultCallback_0_1::base* -NewTessCallback( - T1* obj, R (T2::*member)(A1) ) { - return new _TessMemberResultCallback_0_1( - obj, member); +inline typename _TessMemberResultCallback_0_1::base* +NewTessCallback(T1* obj, R (T2::*member)(A1)) { + return new _TessMemberResultCallback_0_1(obj, member); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_0_1::base* -NewPermanentTessCallback( - T1* obj, R (T2::*member)(A1) ) { - return new _TessMemberResultCallback_0_1( - obj, member); +inline typename _TessMemberResultCallback_0_1::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(A1)) { + return new _TessMemberResultCallback_0_1(obj, member); } #endif template -class _TessFunctionResultCallback_0_1 : public TessResultCallback1 { +class _TessFunctionResultCallback_0_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; + typedef TessResultCallback1 base; typedef R (*FunctionSignature)(A1); private: FunctionSignature function_; public: - inline _TessFunctionResultCallback_0_1( - FunctionSignature function) - : function_(function) { - } + inline _TessFunctionResultCallback_0_1(FunctionSignature function) + : function_(function) {} virtual R Run(A1 a1) { if (!del) { @@ -1912,7 +2136,7 @@ class _TessFunctionResultCallback_0_1 : public TessResultCallback1 { template class _TessFunctionResultCallback_0_1 - : public TessCallback1 { + : public TessCallback1 { public: typedef TessCallback1 base; typedef void (*FunctionSignature)(A1); @@ -1921,10 +2145,8 @@ class _TessFunctionResultCallback_0_1 FunctionSignature function_; public: - inline _TessFunctionResultCallback_0_1( - FunctionSignature function) - : function_(function) { - } + inline _TessFunctionResultCallback_0_1(FunctionSignature function) + : function_(function) {} virtual void Run(A1 a1) { if (!del) { @@ -1939,22 +2161,22 @@ class _TessFunctionResultCallback_0_1 }; template -inline typename _TessFunctionResultCallback_0_1::base* +inline typename _TessFunctionResultCallback_0_1::base* NewTessCallback(R (*function)(A1)) { - return new _TessFunctionResultCallback_0_1(function); + return new _TessFunctionResultCallback_0_1(function); } template -inline typename _TessFunctionResultCallback_0_1::base* +inline typename _TessFunctionResultCallback_0_1::base* NewPermanentTessCallback(R (*function)(A1)) { - return new _TessFunctionResultCallback_0_1(function); + return new _TessFunctionResultCallback_0_1(function); } template -class _ConstTessMemberResultCallback_1_1 : public TessResultCallback1 { +class _ConstTessMemberResultCallback_1_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,A1) const; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, A1) const; private: const T* object_; @@ -1962,16 +2184,16 @@ class _ConstTessMemberResultCallback_1_1 : public TessResultCallback1 { typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_1(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_1(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,a1); + R result = (object_->*member_)(p1_, a1); return result; } else { - R result = (object_->*member_)(p1_,a1); + R result = (object_->*member_)(p1_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -1981,10 +2203,11 @@ class _ConstTessMemberResultCallback_1_1 : public TessResultCallback1 { }; template -class _ConstTessMemberResultCallback_1_1 : public TessCallback1 { +class _ConstTessMemberResultCallback_1_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,A1) const; + typedef void (T::*MemberSignature)(P1, A1) const; private: const T* object_; @@ -1992,15 +2215,15 @@ class _ConstTessMemberResultCallback_1_1 : public TessCall typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_1(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_1(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,a1); + (object_->*member_)(p1_, a1); } else { - (object_->*member_)(p1_,a1); + (object_->*member_)(p1_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2010,42 +2233,45 @@ class _ConstTessMemberResultCallback_1_1 : public TessCall #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_1_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,A1) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_1(obj, member, p1); +inline typename _ConstTessMemberResultCallback_1_1::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, A1) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_1( + obj, member, p1); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_1_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,A1) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_1(obj, member, p1); +inline typename _ConstTessMemberResultCallback_1_1::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1, A1) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_1( + obj, member, p1); } #endif template -class _TessMemberResultCallback_1_1 : public TessResultCallback1 { +class _TessMemberResultCallback_1_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,A1) ; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_1( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_1(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,a1); + R result = (object_->*member_)(p1_, a1); return result; } else { - R result = (object_->*member_)(p1_,a1); + R result = (object_->*member_)(p1_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2055,26 +2281,26 @@ class _TessMemberResultCallback_1_1 : public TessResultCallback1 { }; template -class _TessMemberResultCallback_1_1 : public TessCallback1 { +class _TessMemberResultCallback_1_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,A1) ; + typedef void (T::*MemberSignature)(P1, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_1( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_1(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,a1); + (object_->*member_)(p1_, a1); } else { - (object_->*member_)(p1_,a1); + (object_->*member_)(p1_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2084,25 +2310,29 @@ class _TessMemberResultCallback_1_1 : public TessCallback1 #ifndef SWIG template -inline typename _TessMemberResultCallback_1_1::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,A1) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_1(obj, member, p1); +inline typename _TessMemberResultCallback_1_1::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, A1), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_1(obj, member, + p1); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_1_1::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,A1) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_1(obj, member, p1); +inline typename _TessMemberResultCallback_1_1::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, A1), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_1(obj, member, + p1); } #endif template -class _TessFunctionResultCallback_1_1 : public TessResultCallback1 { +class _TessFunctionResultCallback_1_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,A1); + typedef TessResultCallback1 base; + typedef R (*FunctionSignature)(P1, A1); private: FunctionSignature function_; @@ -2110,14 +2340,14 @@ class _TessFunctionResultCallback_1_1 : public TessResultCallback1 { public: inline _TessFunctionResultCallback_1_1(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} virtual R Run(A1 a1) { if (!del) { - R result = (*function_)(p1_,a1); + R result = (*function_)(p1_, a1); return result; } else { - R result = (*function_)(p1_,a1); + R result = (*function_)(p1_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -2127,10 +2357,11 @@ class _TessFunctionResultCallback_1_1 : public TessResultCallback1 { }; template -class _TessFunctionResultCallback_1_1 : public TessCallback1 { +class _TessFunctionResultCallback_1_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,A1); + typedef void (*FunctionSignature)(P1, A1); private: FunctionSignature function_; @@ -2138,13 +2369,13 @@ class _TessFunctionResultCallback_1_1 : public TessCallback1< public: inline _TessFunctionResultCallback_1_1(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} virtual void Run(A1 a1) { if (!del) { - (*function_)(p1_,a1); + (*function_)(p1_, a1); } else { - (*function_)(p1_,a1); + (*function_)(p1_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -2153,22 +2384,23 @@ class _TessFunctionResultCallback_1_1 : public TessCallback1< }; template -inline typename _TessFunctionResultCallback_1_1::base* -NewTessCallback(R (*function)(P1,A1), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_1(function, p1); +inline typename _TessFunctionResultCallback_1_1::base* +NewTessCallback(R (*function)(P1, A1), typename Identity::type p1) { + return new _TessFunctionResultCallback_1_1(function, p1); } template -inline typename _TessFunctionResultCallback_1_1::base* -NewPermanentTessCallback(R (*function)(P1,A1), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_1(function, p1); +inline typename _TessFunctionResultCallback_1_1::base* +NewPermanentTessCallback(R (*function)(P1, A1), + typename Identity::type p1) { + return new _TessFunctionResultCallback_1_1(function, p1); } template -class _ConstTessMemberResultCallback_2_1 : public TessResultCallback1 { +class _ConstTessMemberResultCallback_2_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,A1) const; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, A1) const; private: const T* object_; @@ -2177,16 +2409,17 @@ class _ConstTessMemberResultCallback_2_1 : public TessResultCallback1 { typename remove_reference::type p2_; public: - inline _ConstTessMemberResultCallback_2_1(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_1(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1); + R result = (object_->*member_)(p1_, p2_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1); + R result = (object_->*member_)(p1_, p2_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2196,10 +2429,11 @@ class _ConstTessMemberResultCallback_2_1 : public TessResultCallback1 { }; template -class _ConstTessMemberResultCallback_2_1 : public TessCallback1 { +class _ConstTessMemberResultCallback_2_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,A1) const; + typedef void (T::*MemberSignature)(P1, P2, A1) const; private: const T* object_; @@ -2208,15 +2442,16 @@ class _ConstTessMemberResultCallback_2_1 : public Tess typename remove_reference::type p2_; public: - inline _ConstTessMemberResultCallback_2_1(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_1(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,a1); + (object_->*member_)(p1_, p2_, a1); } else { - (object_->*member_)(p1_,p2_,a1); + (object_->*member_)(p1_, p2_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2226,43 +2461,51 @@ class _ConstTessMemberResultCallback_2_1 : public Tess #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_2_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_1(obj, member, p1, p2); +inline + typename _ConstTessMemberResultCallback_2_1::base* + NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, A1) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_1( + obj, member, p1, p2); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_2_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_1(obj, member, p1, p2); +inline + typename _ConstTessMemberResultCallback_2_1::base* + NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1, P2, A1) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_1( + obj, member, p1, p2); } #endif template -class _TessMemberResultCallback_2_1 : public TessResultCallback1 { +class _TessMemberResultCallback_2_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,A1) ; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_1( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_1(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1); + R result = (object_->*member_)(p1_, p2_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1); + R result = (object_->*member_)(p1_, p2_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2272,27 +2515,28 @@ class _TessMemberResultCallback_2_1 : public TessResultCallback1 { }; template -class _TessMemberResultCallback_2_1 : public TessCallback1 { +class _TessMemberResultCallback_2_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,A1) ; + typedef void (T::*MemberSignature)(P1, P2, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_1( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_1(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,a1); + (object_->*member_)(p1_, p2_, a1); } else { - (object_->*member_)(p1_,p2_,a1); + (object_->*member_)(p1_, p2_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2302,25 +2546,31 @@ class _TessMemberResultCallback_2_1 : public TessCallb #ifndef SWIG template -inline typename _TessMemberResultCallback_2_1::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,A1) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_1(obj, member, p1, p2); +inline typename _TessMemberResultCallback_2_1::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, A1), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_1(obj, member, + p1, p2); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_2_1::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,A1) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_1(obj, member, p1, p2); +inline typename _TessMemberResultCallback_2_1::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, A1), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_1( + obj, member, p1, p2); } #endif template -class _TessFunctionResultCallback_2_1 : public TessResultCallback1 { +class _TessFunctionResultCallback_2_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,A1); + typedef TessResultCallback1 base; + typedef R (*FunctionSignature)(P1, P2, A1); private: FunctionSignature function_; @@ -2328,15 +2578,16 @@ class _TessFunctionResultCallback_2_1 : public TessResultCallback1 { typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_1(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_1(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} virtual R Run(A1 a1) { if (!del) { - R result = (*function_)(p1_,p2_,a1); + R result = (*function_)(p1_, p2_, a1); return result; } else { - R result = (*function_)(p1_,p2_,a1); + R result = (*function_)(p1_, p2_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -2346,10 +2597,11 @@ class _TessFunctionResultCallback_2_1 : public TessResultCallback1 { }; template -class _TessFunctionResultCallback_2_1 : public TessCallback1 { +class _TessFunctionResultCallback_2_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,A1); + typedef void (*FunctionSignature)(P1, P2, A1); private: FunctionSignature function_; @@ -2357,14 +2609,15 @@ class _TessFunctionResultCallback_2_1 : public TessCallba typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_1(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_1(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} virtual void Run(A1 a1) { if (!del) { - (*function_)(p1_,p2_,a1); + (*function_)(p1_, p2_, a1); } else { - (*function_)(p1_,p2_,a1); + (*function_)(p1_, p2_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -2373,22 +2626,27 @@ class _TessFunctionResultCallback_2_1 : public TessCallba }; template -inline typename _TessFunctionResultCallback_2_1::base* -NewTessCallback(R (*function)(P1,P2,A1), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_1(function, p1, p2); +inline typename _TessFunctionResultCallback_2_1::base* +NewTessCallback(R (*function)(P1, P2, A1), typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_1(function, p1, + p2); } template -inline typename _TessFunctionResultCallback_2_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_1(function, p1, p2); +inline typename _TessFunctionResultCallback_2_1::base* +NewPermanentTessCallback(R (*function)(P1, P2, A1), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_1(function, p1, + p2); } template -class _ConstTessMemberResultCallback_3_1 : public TessResultCallback1 { +class _ConstTessMemberResultCallback_3_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1) const; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1) const; private: const T* object_; @@ -2398,16 +2656,17 @@ class _ConstTessMemberResultCallback_3_1 : public TessResultCallback1 { typename remove_reference::type p3_; public: - inline _ConstTessMemberResultCallback_3_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_1(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2417,10 +2676,11 @@ class _ConstTessMemberResultCallback_3_1 : public TessResultCallback1 { }; template -class _ConstTessMemberResultCallback_3_1 : public TessCallback1 { +class _ConstTessMemberResultCallback_3_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1) const; + typedef void (T::*MemberSignature)(P1, P2, P3, A1) const; private: const T* object_; @@ -2430,15 +2690,16 @@ class _ConstTessMemberResultCallback_3_1 : public typename remove_reference::type p3_; public: - inline _ConstTessMemberResultCallback_3_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_1(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1); + (object_->*member_)(p1_, p2_, p3_, a1); } else { - (object_->*member_)(p1_,p2_,p3_,a1); + (object_->*member_)(p1_, p2_, p3_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2448,44 +2709,53 @@ class _ConstTessMemberResultCallback_3_1 : public #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_3_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_1(obj, member, p1, p2, p3); +inline typename _ConstTessMemberResultCallback_3_1::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, A1) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_1( + obj, member, p1, p2, p3); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_3_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_1(obj, member, p1, p2, p3); +inline typename _ConstTessMemberResultCallback_3_1::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, A1) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_1( + obj, member, p1, p2, p3); } #endif template -class _TessMemberResultCallback_3_1 : public TessResultCallback1 { +class _TessMemberResultCallback_3_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1) ; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_1( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_1(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2495,28 +2765,29 @@ class _TessMemberResultCallback_3_1 : public TessResultCallback1 { }; template -class _TessMemberResultCallback_3_1 : public TessCallback1 { +class _TessMemberResultCallback_3_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1) ; + typedef void (T::*MemberSignature)(P1, P2, P3, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_1( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_1(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1); + (object_->*member_)(p1_, p2_, p3_, a1); } else { - (object_->*member_)(p1_,p2_,p3_,a1); + (object_->*member_)(p1_, p2_, p3_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2526,25 +2797,35 @@ class _TessMemberResultCallback_3_1 : public TessC #ifndef SWIG template -inline typename _TessMemberResultCallback_3_1::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_1(obj, member, p1, p2, p3); +inline + typename _TessMemberResultCallback_3_1::base* + NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_1( + obj, member, p1, p2, p3); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_3_1::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_1(obj, member, p1, p2, p3); +inline + typename _TessMemberResultCallback_3_1::base* + NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_1( + obj, member, p1, p2, p3); } #endif template -class _TessFunctionResultCallback_3_1 : public TessResultCallback1 { +class _TessFunctionResultCallback_3_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1); + typedef TessResultCallback1 base; + typedef R (*FunctionSignature)(P1, P2, P3, A1); private: FunctionSignature function_; @@ -2553,15 +2834,16 @@ class _TessFunctionResultCallback_3_1 : public TessResultCallback1 { typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_1(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_1(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} virtual R Run(A1 a1) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1); + R result = (*function_)(p1_, p2_, p3_, a1); return result; } else { - R result = (*function_)(p1_,p2_,p3_,a1); + R result = (*function_)(p1_, p2_, p3_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -2571,10 +2853,11 @@ class _TessFunctionResultCallback_3_1 : public TessResultCallback1 { }; template -class _TessFunctionResultCallback_3_1 : public TessCallback1 { +class _TessFunctionResultCallback_3_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1); + typedef void (*FunctionSignature)(P1, P2, P3, A1); private: FunctionSignature function_; @@ -2583,14 +2866,15 @@ class _TessFunctionResultCallback_3_1 : public TessCa typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_1(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_1(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} virtual void Run(A1 a1) { if (!del) { - (*function_)(p1_,p2_,p3_,a1); + (*function_)(p1_, p2_, p3_, a1); } else { - (*function_)(p1_,p2_,p3_,a1); + (*function_)(p1_, p2_, p3_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -2599,22 +2883,30 @@ class _TessFunctionResultCallback_3_1 : public TessCa }; template -inline typename _TessFunctionResultCallback_3_1::base* -NewTessCallback(R (*function)(P1,P2,P3,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_1(function, p1, p2, p3); +inline typename _TessFunctionResultCallback_3_1::base* +NewTessCallback(R (*function)(P1, P2, P3, A1), typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_1( + function, p1, p2, p3); } template -inline typename _TessFunctionResultCallback_3_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_1(function, p1, p2, p3); +inline typename _TessFunctionResultCallback_3_1::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_1( + function, p1, p2, p3); } -template -class _ConstTessMemberResultCallback_4_1 : public TessResultCallback1 { +template +class _ConstTessMemberResultCallback_4_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1) const; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1) const; private: const T* object_; @@ -2625,16 +2917,17 @@ class _ConstTessMemberResultCallback_4_1 : public TessResultCallback1 { typename remove_reference::type p4_; public: - inline _ConstTessMemberResultCallback_4_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_1(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2644,10 +2937,11 @@ class _ConstTessMemberResultCallback_4_1 : public TessResultCallback1 { }; template -class _ConstTessMemberResultCallback_4_1 : public TessCallback1 { +class _ConstTessMemberResultCallback_4_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1) const; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1) const; private: const T* object_; @@ -2658,15 +2952,16 @@ class _ConstTessMemberResultCallback_4_1 : pub typename remove_reference::type p4_; public: - inline _ConstTessMemberResultCallback_4_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_1(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, a1); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2675,29 +2970,46 @@ class _ConstTessMemberResultCallback_4_1 : pub }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_1(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_1::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, P4, A1) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_1(obj, member, p1, p2, p3, + p4); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_1(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_1::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_1(obj, member, p1, p2, p3, + p4); } #endif -template -class _TessMemberResultCallback_4_1 : public TessResultCallback1 { +template +class _TessMemberResultCallback_4_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1) ; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -2705,16 +3017,16 @@ class _TessMemberResultCallback_4_1 : public TessResultCallback1 { typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_1( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_1(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2724,13 +3036,14 @@ class _TessMemberResultCallback_4_1 : public TessResultCallback1 { }; template -class _TessMemberResultCallback_4_1 : public TessCallback1 { +class _TessMemberResultCallback_4_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1) ; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -2738,15 +3051,15 @@ class _TessMemberResultCallback_4_1 : public T typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_1( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_1(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, a1); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2755,26 +3068,39 @@ class _TessMemberResultCallback_4_1 : public T }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_4_1::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_1(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_1::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, A1), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_1( + obj, member, p1, p2, p3, p4); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_4_1::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_1(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_1::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_1( + obj, member, p1, p2, p3, p4); } #endif template -class _TessFunctionResultCallback_4_1 : public TessResultCallback1 { +class _TessFunctionResultCallback_4_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1); + typedef TessResultCallback1 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, A1); private: FunctionSignature function_; @@ -2784,15 +3110,16 @@ class _TessFunctionResultCallback_4_1 : public TessResultCallback1 { typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_1(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual R Run(A1 a1) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1); + R result = (*function_)(p1_, p2_, p3_, p4_, a1); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1); + R result = (*function_)(p1_, p2_, p3_, p4_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -2802,10 +3129,11 @@ class _TessFunctionResultCallback_4_1 : public TessResultCallback1 { }; template -class _TessFunctionResultCallback_4_1 : public TessCallback1 { +class _TessFunctionResultCallback_4_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1); + typedef void (*FunctionSignature)(P1, P2, P3, P4, A1); private: FunctionSignature function_; @@ -2815,14 +3143,15 @@ class _TessFunctionResultCallback_4_1 : public Te typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_1(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} virtual void Run(A1 a1) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1); + (*function_)(p1_, p2_, p3_, p4_, a1); } else { - (*function_)(p1_,p2_,p3_,p4_,a1); + (*function_)(p1_, p2_, p3_, p4_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -2831,22 +3160,35 @@ class _TessFunctionResultCallback_4_1 : public Te }; template -inline typename _TessFunctionResultCallback_4_1::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_1(function, p1, p2, p3, p4); +inline + typename _TessFunctionResultCallback_4_1::base* + NewTessCallback(R (*function)(P1, P2, P3, P4, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_1( + function, p1, p2, p3, p4); } template -inline typename _TessFunctionResultCallback_4_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_1(function, p1, p2, p3, p4); +inline typename _TessFunctionResultCallback_4_1::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_1( + function, p1, p2, p3, p4); } -template -class _ConstTessMemberResultCallback_5_1 : public TessResultCallback1 { +template +class _ConstTessMemberResultCallback_5_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1) const; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1) const; private: const T* object_; @@ -2858,16 +3200,23 @@ class _ConstTessMemberResultCallback_5_1 : public TessResultCallback1 { typename remove_reference::type p5_; public: - inline _ConstTessMemberResultCallback_5_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_1(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2876,11 +3225,13 @@ class _ConstTessMemberResultCallback_5_1 : public TessResultCallback1 { } }; -template -class _ConstTessMemberResultCallback_5_1 : public TessCallback1 { +template +class _ConstTessMemberResultCallback_5_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1) const; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1) const; private: const T* object_; @@ -2892,15 +3243,22 @@ class _ConstTessMemberResultCallback_5_1 : typename remove_reference::type p5_; public: - inline _ConstTessMemberResultCallback_5_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_1(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2909,29 +3267,47 @@ class _ConstTessMemberResultCallback_5_1 : }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_1::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, + p5); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_1::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_1(obj, member, p1, p2, p3, + p4, p5); } #endif -template -class _TessMemberResultCallback_5_1 : public TessResultCallback1 { +template +class _TessMemberResultCallback_5_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1) ; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -2940,16 +3316,22 @@ class _TessMemberResultCallback_5_1 : public TessResultCallback1 { typename remove_reference::type p5_; public: - inline _TessMemberResultCallback_5_1( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_1(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2958,14 +3340,16 @@ class _TessMemberResultCallback_5_1 : public TessResultCallback1 { } }; -template -class _TessMemberResultCallback_5_1 : public TessCallback1 { +template +class _TessMemberResultCallback_5_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1) ; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -2974,15 +3358,21 @@ class _TessMemberResultCallback_5_1 : publ typename remove_reference::type p5_; public: - inline _TessMemberResultCallback_5_1( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_1(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -2991,26 +3381,41 @@ class _TessMemberResultCallback_5_1 : publ }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_1::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_1::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_1( + obj, member, p1, p2, p3, p4, p5); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_1::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_1::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_1(obj, member, p1, p2, p3, p4, p5); } #endif -template -class _TessFunctionResultCallback_5_1 : public TessResultCallback1 { +template +class _TessFunctionResultCallback_5_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1); + typedef TessResultCallback1 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, A1); private: FunctionSignature function_; @@ -3021,15 +3426,16 @@ class _TessFunctionResultCallback_5_1 : public TessResultCallback1 { typename remove_reference::type p5_; public: - inline _TessFunctionResultCallback_5_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_1(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} virtual R Run(A1 a1) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3039,10 +3445,11 @@ class _TessFunctionResultCallback_5_1 : public TessResultCallback1 { }; template -class _TessFunctionResultCallback_5_1 : public TessCallback1 { +class _TessFunctionResultCallback_5_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1); + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, A1); private: FunctionSignature function_; @@ -3053,14 +3460,15 @@ class _TessFunctionResultCallback_5_1 : publi typename remove_reference::type p5_; public: - inline _TessFunctionResultCallback_5_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_1(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} virtual void Run(A1 a1) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3069,22 +3477,35 @@ class _TessFunctionResultCallback_5_1 : publi }; template -inline typename _TessFunctionResultCallback_5_1::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_1(function, p1, p2, p3, p4, p5); +inline typename _TessFunctionResultCallback_5_1::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, A1), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_1( + function, p1, p2, p3, p4, p5); } template -inline typename _TessFunctionResultCallback_5_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_1(function, p1, p2, p3, p4, p5); +inline typename _TessFunctionResultCallback_5_1::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_1( + function, p1, p2, p3, p4, p5); } -template -class _ConstTessMemberResultCallback_6_1 : public TessResultCallback1 { +template +class _ConstTessMemberResultCallback_6_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1) const; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1) const; private: const T* object_; @@ -3097,16 +3518,24 @@ class _ConstTessMemberResultCallback_6_1 : public TessResultCallback1 { typename remove_reference::type p6_; public: - inline _ConstTessMemberResultCallback_6_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_1(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3115,11 +3544,13 @@ class _ConstTessMemberResultCallback_6_1 : public TessResultCallback1 { } }; -template -class _ConstTessMemberResultCallback_6_1 : public TessCallback1 { +template +class _ConstTessMemberResultCallback_6_1 : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1) const; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1) const; private: const T* object_; @@ -3132,15 +3563,23 @@ class _ConstTessMemberResultCallback_6_1::type p6_; public: - inline _ConstTessMemberResultCallback_6_1(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_1(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3149,29 +3588,47 @@ class _ConstTessMemberResultCallback_6_1 -inline typename _ConstTessMemberResultCallback_6_1::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_1::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_1(obj, member, p1, p2, p3, + p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_1::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_1::base* +NewPermanentTessCallback( + const T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_1(obj, member, p1, p2, + p3, p4, p5, p6); } #endif -template -class _TessMemberResultCallback_6_1 : public TessResultCallback1 { +template +class _TessMemberResultCallback_6_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1) ; + typedef TessResultCallback1 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -3181,16 +3638,23 @@ class _TessMemberResultCallback_6_1 : public TessResultCallback1 { typename remove_reference::type p6_; public: - inline _TessMemberResultCallback_6_1( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_1(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual R Run(A1 a1) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3199,14 +3663,16 @@ class _TessMemberResultCallback_6_1 : public TessResultCallback1 { } }; -template -class _TessMemberResultCallback_6_1 : public TessCallback1 { +template +class _TessMemberResultCallback_6_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1) ; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -3216,15 +3682,22 @@ class _TessMemberResultCallback_6_1 : typename remove_reference::type p6_; public: - inline _TessMemberResultCallback_6_1( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_1(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual void Run(A1 a1) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3233,26 +3706,45 @@ class _TessMemberResultCallback_6_1 : }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_6_1::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_1::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, + p6); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_6_1::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_1::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessMemberResultCallback_6_1(obj, member, p1, p2, p3, p4, p5, + p6); } #endif -template -class _TessFunctionResultCallback_6_1 : public TessResultCallback1 { +template +class _TessFunctionResultCallback_6_1 : public TessResultCallback1 { public: - typedef TessResultCallback1 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1); + typedef TessResultCallback1 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1); private: FunctionSignature function_; @@ -3264,15 +3756,22 @@ class _TessFunctionResultCallback_6_1 : public TessResultCallback1 { typename remove_reference::type p6_; public: - inline _TessFunctionResultCallback_6_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_1(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual R Run(A1 a1) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3281,11 +3780,13 @@ class _TessFunctionResultCallback_6_1 : public TessResultCallback1 { } }; -template -class _TessFunctionResultCallback_6_1 : public TessCallback1 { +template +class _TessFunctionResultCallback_6_1 + : public TessCallback1 { public: typedef TessCallback1 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1); + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1); private: FunctionSignature function_; @@ -3297,14 +3798,21 @@ class _TessFunctionResultCallback_6_1 : p typename remove_reference::type p6_; public: - inline _TessFunctionResultCallback_6_1(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_1(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} virtual void Run(A1 a1) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3312,41 +3820,58 @@ class _TessFunctionResultCallback_6_1 : p } }; -template -inline typename _TessFunctionResultCallback_6_1::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_1(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_1::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_1(function, p1, p2, p3, p4, p5, + p6); } -template -inline typename _TessFunctionResultCallback_6_1::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_1(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_1::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_1(function, p1, p2, p3, p4, p5, + p6); } template -class _ConstTessMemberResultCallback_0_2 : public TessResultCallback2 { +class _ConstTessMemberResultCallback_0_2 + : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(A1,A2) const; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(A1, A2) const; private: const T* object_; MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_2( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _ConstTessMemberResultCallback_0_2(const T* object, + MemberSignature member) + : object_(object), member_(member) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(a1,a2); + R result = (object_->*member_)(a1, a2); return result; } else { - R result = (object_->*member_)(a1,a2); + R result = (object_->*member_)(a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3357,27 +3882,25 @@ class _ConstTessMemberResultCallback_0_2 : public TessResultCallback2 { template class _ConstTessMemberResultCallback_0_2 - : public TessCallback2 { + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(A1,A2) const; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(A1, A2) const; private: const T* object_; MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_2( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _ConstTessMemberResultCallback_0_2(const T* object, + MemberSignature member) + : object_(object), member_(member) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(a1,a2); + (object_->*member_)(a1, a2); } else { - (object_->*member_)(a1,a2); + (object_->*member_)(a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3387,47 +3910,42 @@ class _ConstTessMemberResultCallback_0_2 #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_2::base* -NewTessCallback( - const T1* obj, R (T2::*member)(A1,A2) const) { - return new _ConstTessMemberResultCallback_0_2( - obj, member); +inline typename _ConstTessMemberResultCallback_0_2::base* +NewTessCallback(const T1* obj, R (T2::*member)(A1, A2) const) { + return new _ConstTessMemberResultCallback_0_2(obj, + member); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_2::base* -NewPermanentTessCallback( - const T1* obj, R (T2::*member)(A1,A2) const) { - return new _ConstTessMemberResultCallback_0_2( - obj, member); +inline typename _ConstTessMemberResultCallback_0_2::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(A1, A2) const) { + return new _ConstTessMemberResultCallback_0_2(obj, + member); } #endif template -class _TessMemberResultCallback_0_2 : public TessResultCallback2 { +class _TessMemberResultCallback_0_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(A1,A2) ; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(A1, A2); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_2( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _TessMemberResultCallback_0_2(T* object, MemberSignature member) + : object_(object), member_(member) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(a1,a2); + R result = (object_->*member_)(a1, a2); return result; } else { - R result = (object_->*member_)(a1,a2); + R result = (object_->*member_)(a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3438,27 +3956,24 @@ class _TessMemberResultCallback_0_2 : public TessResultCallback2 { template class _TessMemberResultCallback_0_2 - : public TessCallback2 { + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(A1,A2) ; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(A1, A2); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_2( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _TessMemberResultCallback_0_2(T* object, MemberSignature member) + : object_(object), member_(member) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(a1,a2); + (object_->*member_)(a1, a2); } else { - (object_->*member_)(a1,a2); + (object_->*member_)(a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3468,45 +3983,39 @@ class _TessMemberResultCallback_0_2 #ifndef SWIG template -inline typename _TessMemberResultCallback_0_2::base* -NewTessCallback( - T1* obj, R (T2::*member)(A1,A2) ) { - return new _TessMemberResultCallback_0_2( - obj, member); +inline typename _TessMemberResultCallback_0_2::base* +NewTessCallback(T1* obj, R (T2::*member)(A1, A2)) { + return new _TessMemberResultCallback_0_2(obj, member); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_0_2::base* -NewPermanentTessCallback( - T1* obj, R (T2::*member)(A1,A2) ) { - return new _TessMemberResultCallback_0_2( - obj, member); +inline typename _TessMemberResultCallback_0_2::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(A1, A2)) { + return new _TessMemberResultCallback_0_2(obj, member); } #endif template -class _TessFunctionResultCallback_0_2 : public TessResultCallback2 { +class _TessFunctionResultCallback_0_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(A1,A2); + typedef TessResultCallback2 base; + typedef R (*FunctionSignature)(A1, A2); private: FunctionSignature function_; public: - inline _TessFunctionResultCallback_0_2( - FunctionSignature function) - : function_(function) { - } + inline _TessFunctionResultCallback_0_2(FunctionSignature function) + : function_(function) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (*function_)(a1,a2); + R result = (*function_)(a1, a2); return result; } else { - R result = (*function_)(a1,a2); + R result = (*function_)(a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3517,25 +4026,23 @@ class _TessFunctionResultCallback_0_2 : public TessResultCallback2 { template class _TessFunctionResultCallback_0_2 - : public TessCallback2 { + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(A1,A2); + typedef TessCallback2 base; + typedef void (*FunctionSignature)(A1, A2); private: FunctionSignature function_; public: - inline _TessFunctionResultCallback_0_2( - FunctionSignature function) - : function_(function) { - } + inline _TessFunctionResultCallback_0_2(FunctionSignature function) + : function_(function) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (*function_)(a1,a2); + (*function_)(a1, a2); } else { - (*function_)(a1,a2); + (*function_)(a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3544,22 +4051,23 @@ class _TessFunctionResultCallback_0_2 }; template -inline typename _TessFunctionResultCallback_0_2::base* -NewTessCallback(R (*function)(A1,A2)) { - return new _TessFunctionResultCallback_0_2(function); +inline typename _TessFunctionResultCallback_0_2::base* +NewTessCallback(R (*function)(A1, A2)) { + return new _TessFunctionResultCallback_0_2(function); } template -inline typename _TessFunctionResultCallback_0_2::base* -NewPermanentTessCallback(R (*function)(A1,A2)) { - return new _TessFunctionResultCallback_0_2(function); +inline typename _TessFunctionResultCallback_0_2::base* +NewPermanentTessCallback(R (*function)(A1, A2)) { + return new _TessFunctionResultCallback_0_2(function); } template -class _ConstTessMemberResultCallback_1_2 : public TessResultCallback2 { +class _ConstTessMemberResultCallback_1_2 + : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,A1,A2) const; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, A1, A2) const; private: const T* object_; @@ -3567,16 +4075,16 @@ class _ConstTessMemberResultCallback_1_2 : public TessResultCallback2 { typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_2(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_2(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,a1,a2); + R result = (object_->*member_)(p1_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,a1,a2); + R result = (object_->*member_)(p1_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3586,10 +4094,11 @@ class _ConstTessMemberResultCallback_1_2 : public TessResultCallback2 { }; template -class _ConstTessMemberResultCallback_1_2 : public TessCallback2 { +class _ConstTessMemberResultCallback_1_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,A1,A2) const; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, A1, A2) const; private: const T* object_; @@ -3597,15 +4106,15 @@ class _ConstTessMemberResultCallback_1_2 : public Tess typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_2(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_2(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,a1,a2); + (object_->*member_)(p1_, a1, a2); } else { - (object_->*member_)(p1_,a1,a2); + (object_->*member_)(p1_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3615,42 +4124,47 @@ class _ConstTessMemberResultCallback_1_2 : public Tess #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_1_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_2(obj, member, p1); +inline + typename _ConstTessMemberResultCallback_1_2::base* + NewTessCallback(const T1* obj, R (T2::*member)(P1, A1, A2) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_2( + obj, member, p1); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_1_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_2(obj, member, p1); +inline + typename _ConstTessMemberResultCallback_1_2::base* + NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1, A1, A2) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_2( + obj, member, p1); } #endif template -class _TessMemberResultCallback_1_2 : public TessResultCallback2 { +class _TessMemberResultCallback_1_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,A1,A2) ; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_2( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_2(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,a1,a2); + R result = (object_->*member_)(p1_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,a1,a2); + R result = (object_->*member_)(p1_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3660,26 +4174,26 @@ class _TessMemberResultCallback_1_2 : public TessResultCallback2 { }; template -class _TessMemberResultCallback_1_2 : public TessCallback2 { +class _TessMemberResultCallback_1_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,A1,A2) ; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_2( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_2(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,a1,a2); + (object_->*member_)(p1_, a1, a2); } else { - (object_->*member_)(p1_,a1,a2); + (object_->*member_)(p1_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3689,25 +4203,29 @@ class _TessMemberResultCallback_1_2 : public TessCallb #ifndef SWIG template -inline typename _TessMemberResultCallback_1_2::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,A1,A2) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_2(obj, member, p1); +inline typename _TessMemberResultCallback_1_2::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, A1, A2), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_2(obj, member, + p1); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_1_2::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,A1,A2) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_2(obj, member, p1); +inline typename _TessMemberResultCallback_1_2::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, A1, A2), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_2( + obj, member, p1); } #endif template -class _TessFunctionResultCallback_1_2 : public TessResultCallback2 { +class _TessFunctionResultCallback_1_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,A1,A2); + typedef TessResultCallback2 base; + typedef R (*FunctionSignature)(P1, A1, A2); private: FunctionSignature function_; @@ -3715,14 +4233,14 @@ class _TessFunctionResultCallback_1_2 : public TessResultCallback2 { public: inline _TessFunctionResultCallback_1_2(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (*function_)(p1_,a1,a2); + R result = (*function_)(p1_, a1, a2); return result; } else { - R result = (*function_)(p1_,a1,a2); + R result = (*function_)(p1_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3732,10 +4250,11 @@ class _TessFunctionResultCallback_1_2 : public TessResultCallback2 { }; template -class _TessFunctionResultCallback_1_2 : public TessCallback2 { +class _TessFunctionResultCallback_1_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,A1,A2); + typedef TessCallback2 base; + typedef void (*FunctionSignature)(P1, A1, A2); private: FunctionSignature function_; @@ -3743,13 +4262,13 @@ class _TessFunctionResultCallback_1_2 : public TessCallba public: inline _TessFunctionResultCallback_1_2(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (*function_)(p1_,a1,a2); + (*function_)(p1_, a1, a2); } else { - (*function_)(p1_,a1,a2); + (*function_)(p1_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3758,22 +4277,25 @@ class _TessFunctionResultCallback_1_2 : public TessCallba }; template -inline typename _TessFunctionResultCallback_1_2::base* -NewTessCallback(R (*function)(P1,A1,A2), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_2(function, p1); +inline typename _TessFunctionResultCallback_1_2::base* +NewTessCallback(R (*function)(P1, A1, A2), typename Identity::type p1) { + return new _TessFunctionResultCallback_1_2(function, p1); } template -inline typename _TessFunctionResultCallback_1_2::base* -NewPermanentTessCallback(R (*function)(P1,A1,A2), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_2(function, p1); +inline typename _TessFunctionResultCallback_1_2::base* +NewPermanentTessCallback(R (*function)(P1, A1, A2), + typename Identity::type p1) { + return new _TessFunctionResultCallback_1_2(function, + p1); } template -class _ConstTessMemberResultCallback_2_2 : public TessResultCallback2 { +class _ConstTessMemberResultCallback_2_2 + : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2) const; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, A1, A2) const; private: const T* object_; @@ -3782,16 +4304,17 @@ class _ConstTessMemberResultCallback_2_2 : public TessResultCallback2 { typename remove_reference::type p2_; public: - inline _ConstTessMemberResultCallback_2_2(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_2(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2); + R result = (object_->*member_)(p1_, p2_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1,a2); + R result = (object_->*member_)(p1_, p2_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3801,10 +4324,11 @@ class _ConstTessMemberResultCallback_2_2 : public TessResultCallback2 { }; template -class _ConstTessMemberResultCallback_2_2 : public TessCallback2 { +class _ConstTessMemberResultCallback_2_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2) const; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, A1, A2) const; private: const T* object_; @@ -3813,15 +4337,16 @@ class _ConstTessMemberResultCallback_2_2 : public typename remove_reference::type p2_; public: - inline _ConstTessMemberResultCallback_2_2(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_2(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,a1,a2); + (object_->*member_)(p1_, p2_, a1, a2); } else { - (object_->*member_)(p1_,p2_,a1,a2); + (object_->*member_)(p1_, p2_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3831,43 +4356,51 @@ class _ConstTessMemberResultCallback_2_2 : public #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_2_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_2(obj, member, p1, p2); +inline typename _ConstTessMemberResultCallback_2_2::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, A1, A2) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_2( + obj, member, p1, p2); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_2_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_2(obj, member, p1, p2); +inline typename _ConstTessMemberResultCallback_2_2::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1, P2, A1, A2) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_2( + obj, member, p1, p2); } #endif template -class _TessMemberResultCallback_2_2 : public TessResultCallback2 { +class _TessMemberResultCallback_2_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2) ; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_2( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_2(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2); + R result = (object_->*member_)(p1_, p2_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1,a2); + R result = (object_->*member_)(p1_, p2_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3877,27 +4410,28 @@ class _TessMemberResultCallback_2_2 : public TessResultCallback2 { }; template -class _TessMemberResultCallback_2_2 : public TessCallback2 { +class _TessMemberResultCallback_2_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2) ; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_2( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_2(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,a1,a2); + (object_->*member_)(p1_, p2_, a1, a2); } else { - (object_->*member_)(p1_,p2_,a1,a2); + (object_->*member_)(p1_, p2_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -3907,25 +4441,33 @@ class _TessMemberResultCallback_2_2 : public TessC #ifndef SWIG template -inline typename _TessMemberResultCallback_2_2::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,A1,A2) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_2(obj, member, p1, p2); +inline + typename _TessMemberResultCallback_2_2::base* + NewTessCallback(T1* obj, R (T2::*member)(P1, P2, A1, A2), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_2( + obj, member, p1, p2); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_2_2::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,A1,A2) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_2(obj, member, p1, p2); +inline + typename _TessMemberResultCallback_2_2::base* + NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, A1, A2), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_2( + obj, member, p1, p2); } #endif template -class _TessFunctionResultCallback_2_2 : public TessResultCallback2 { +class _TessFunctionResultCallback_2_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,A1,A2); + typedef TessResultCallback2 base; + typedef R (*FunctionSignature)(P1, P2, A1, A2); private: FunctionSignature function_; @@ -3933,15 +4475,16 @@ class _TessFunctionResultCallback_2_2 : public TessResultCallback2 { typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_2(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_2(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (*function_)(p1_,p2_,a1,a2); + R result = (*function_)(p1_, p2_, a1, a2); return result; } else { - R result = (*function_)(p1_,p2_,a1,a2); + R result = (*function_)(p1_, p2_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3951,10 +4494,11 @@ class _TessFunctionResultCallback_2_2 : public TessResultCallback2 { }; template -class _TessFunctionResultCallback_2_2 : public TessCallback2 { +class _TessFunctionResultCallback_2_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,A1,A2); + typedef TessCallback2 base; + typedef void (*FunctionSignature)(P1, P2, A1, A2); private: FunctionSignature function_; @@ -3962,14 +4506,15 @@ class _TessFunctionResultCallback_2_2 : public TessCa typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_2(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_2(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (*function_)(p1_,p2_,a1,a2); + (*function_)(p1_, p2_, a1, a2); } else { - (*function_)(p1_,p2_,a1,a2); + (*function_)(p1_, p2_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -3978,22 +4523,29 @@ class _TessFunctionResultCallback_2_2 : public TessCa }; template -inline typename _TessFunctionResultCallback_2_2::base* -NewTessCallback(R (*function)(P1,P2,A1,A2), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_2(function, p1, p2); +inline typename _TessFunctionResultCallback_2_2::base* +NewTessCallback(R (*function)(P1, P2, A1, A2), typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_2(function, + p1, p2); } template -inline typename _TessFunctionResultCallback_2_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1,A2), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_2(function, p1, p2); +inline typename _TessFunctionResultCallback_2_2::base* +NewPermanentTessCallback(R (*function)(P1, P2, A1, A2), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_2(function, + p1, p2); } -template -class _ConstTessMemberResultCallback_3_2 : public TessResultCallback2 { +template +class _ConstTessMemberResultCallback_3_2 + : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2) const; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1, A2) const; private: const T* object_; @@ -4003,16 +4555,17 @@ class _ConstTessMemberResultCallback_3_2 : public TessResultCallback2 { typename remove_reference::type p3_; public: - inline _ConstTessMemberResultCallback_3_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_2(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4022,10 +4575,11 @@ class _ConstTessMemberResultCallback_3_2 : public TessResultCallback2 { }; template -class _ConstTessMemberResultCallback_3_2 : public TessCallback2 { +class _ConstTessMemberResultCallback_3_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2) const; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, P3, A1, A2) const; private: const T* object_; @@ -4035,15 +4589,16 @@ class _ConstTessMemberResultCallback_3_2 : pub typename remove_reference::type p3_; public: - inline _ConstTessMemberResultCallback_3_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_2(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, a1, a2); } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4052,45 +4607,58 @@ class _ConstTessMemberResultCallback_3_2 : pub }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_2(obj, member, p1, p2, p3); +template +inline typename _ConstTessMemberResultCallback_3_2::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, A1, A2) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_2(obj, member, p1, p2, p3); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_2(obj, member, p1, p2, p3); +template +inline typename _ConstTessMemberResultCallback_3_2::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, A1, A2) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_2(obj, member, p1, p2, p3); } #endif -template -class _TessMemberResultCallback_3_2 : public TessResultCallback2 { +template +class _TessMemberResultCallback_3_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2) ; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_2( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_2(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4100,28 +4668,29 @@ class _TessMemberResultCallback_3_2 : public TessResultCallback2 { }; template -class _TessMemberResultCallback_3_2 : public TessCallback2 { +class _TessMemberResultCallback_3_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2) ; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, P3, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_2( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_2(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, a1, a2); } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4130,26 +4699,37 @@ class _TessMemberResultCallback_3_2 : public T }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_3_2::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_2(obj, member, p1, p2, p3); +template +inline typename _TessMemberResultCallback_3_2::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1, A2), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_2( + obj, member, p1, p2, p3); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_3_2::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_2(obj, member, p1, p2, p3); +template +inline typename _TessMemberResultCallback_3_2::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1, A2), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_2( + obj, member, p1, p2, p3); } #endif template -class _TessFunctionResultCallback_3_2 : public TessResultCallback2 { +class _TessFunctionResultCallback_3_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1,A2); + typedef TessResultCallback2 base; + typedef R (*FunctionSignature)(P1, P2, P3, A1, A2); private: FunctionSignature function_; @@ -4158,15 +4738,16 @@ class _TessFunctionResultCallback_3_2 : public TessResultCallback2 { typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_2(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_2(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1,a2); + R result = (*function_)(p1_, p2_, p3_, a1, a2); return result; } else { - R result = (*function_)(p1_,p2_,p3_,a1,a2); + R result = (*function_)(p1_, p2_, p3_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -4176,10 +4757,11 @@ class _TessFunctionResultCallback_3_2 : public TessResultCallback2 { }; template -class _TessFunctionResultCallback_3_2 : public TessCallback2 { +class _TessFunctionResultCallback_3_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1,A2); + typedef TessCallback2 base; + typedef void (*FunctionSignature)(P1, P2, P3, A1, A2); private: FunctionSignature function_; @@ -4188,14 +4770,15 @@ class _TessFunctionResultCallback_3_2 : public Te typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_2(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_2(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (*function_)(p1_,p2_,p3_,a1,a2); + (*function_)(p1_, p2_, p3_, a1, a2); } else { - (*function_)(p1_,p2_,p3_,a1,a2); + (*function_)(p1_, p2_, p3_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -4204,22 +4787,34 @@ class _TessFunctionResultCallback_3_2 : public Te }; template -inline typename _TessFunctionResultCallback_3_2::base* -NewTessCallback(R (*function)(P1,P2,P3,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_2(function, p1, p2, p3); +inline + typename _TessFunctionResultCallback_3_2::base* + NewTessCallback(R (*function)(P1, P2, P3, A1, A2), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_2( + function, p1, p2, p3); } template -inline typename _TessFunctionResultCallback_3_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_2(function, p1, p2, p3); +inline typename _TessFunctionResultCallback_3_2::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, A1, A2), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_2( + function, p1, p2, p3); } -template -class _ConstTessMemberResultCallback_4_2 : public TessResultCallback2 { +template +class _ConstTessMemberResultCallback_4_2 + : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2) const; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1, A2) const; private: const T* object_; @@ -4230,16 +4825,17 @@ class _ConstTessMemberResultCallback_4_2 : public TessResultCallback2 { typename remove_reference::type p4_; public: - inline _ConstTessMemberResultCallback_4_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_2(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4248,11 +4844,13 @@ class _ConstTessMemberResultCallback_4_2 : public TessResultCallback2 { } }; -template -class _ConstTessMemberResultCallback_4_2 : public TessCallback2 { +template +class _ConstTessMemberResultCallback_4_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2) const; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1, A2) const; private: const T* object_; @@ -4263,15 +4861,16 @@ class _ConstTessMemberResultCallback_4_2 : typename remove_reference::type p4_; public: - inline _ConstTessMemberResultCallback_4_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_2(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4280,29 +4879,46 @@ class _ConstTessMemberResultCallback_4_2 : }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_2::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, P4, A1, A2) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_2(obj, member, p1, p2, p3, + p4); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_2::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1, A2) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_2(obj, member, p1, p2, p3, + p4); } #endif -template -class _TessMemberResultCallback_4_2 : public TessResultCallback2 { +template +class _TessMemberResultCallback_4_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2) ; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -4310,16 +4926,16 @@ class _TessMemberResultCallback_4_2 : public TessResultCallback2 { typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_2( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_2(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4328,14 +4944,16 @@ class _TessMemberResultCallback_4_2 : public TessResultCallback2 { } }; -template -class _TessMemberResultCallback_4_2 : public TessCallback2 { +template +class _TessMemberResultCallback_4_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2) ; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -4343,15 +4961,15 @@ class _TessMemberResultCallback_4_2 : publ typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_2( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_2(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4360,26 +4978,40 @@ class _TessMemberResultCallback_4_2 : publ }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_4_2::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_2::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, A1, A2), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_2( + obj, member, p1, p2, p3, p4); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_4_2::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_2::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, A1, A2), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_2(obj, member, p1, p2, p3, p4); } #endif -template -class _TessFunctionResultCallback_4_2 : public TessResultCallback2 { +template +class _TessFunctionResultCallback_4_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1,A2); + typedef TessResultCallback2 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, A1, A2); private: FunctionSignature function_; @@ -4389,15 +5021,16 @@ class _TessFunctionResultCallback_4_2 : public TessResultCallback2 { typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_2(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2); + R result = (*function_)(p1_, p2_, p3_, p4_, a1, a2); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2); + R result = (*function_)(p1_, p2_, p3_, p4_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -4407,10 +5040,11 @@ class _TessFunctionResultCallback_4_2 : public TessResultCallback2 { }; template -class _TessFunctionResultCallback_4_2 : public TessCallback2 { +class _TessFunctionResultCallback_4_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1,A2); + typedef TessCallback2 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, A1, A2); private: FunctionSignature function_; @@ -4420,14 +5054,15 @@ class _TessFunctionResultCallback_4_2 : publi typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_2(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1,a2); + (*function_)(p1_, p2_, p3_, p4_, a1, a2); } else { - (*function_)(p1_,p2_,p3_,p4_,a1,a2); + (*function_)(p1_, p2_, p3_, p4_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -4436,22 +5071,35 @@ class _TessFunctionResultCallback_4_2 : publi }; template -inline typename _TessFunctionResultCallback_4_2::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_2(function, p1, p2, p3, p4); +inline typename _TessFunctionResultCallback_4_2::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, A1, A2), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_2( + function, p1, p2, p3, p4); } template -inline typename _TessFunctionResultCallback_4_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_2(function, p1, p2, p3, p4); +inline typename _TessFunctionResultCallback_4_2::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, A1, A2), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_2( + function, p1, p2, p3, p4); } -template -class _ConstTessMemberResultCallback_5_2 : public TessResultCallback2 { +template +class _ConstTessMemberResultCallback_5_2 + : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2) const; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2) const; private: const T* object_; @@ -4463,16 +5111,23 @@ class _ConstTessMemberResultCallback_5_2 : public TessResultCallback2 { typename remove_reference::type p5_; public: - inline _ConstTessMemberResultCallback_5_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_2(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4481,11 +5136,13 @@ class _ConstTessMemberResultCallback_5_2 : public TessResultCallback2 { } }; -template -class _ConstTessMemberResultCallback_5_2 : public TessCallback2 { +template +class _ConstTessMemberResultCallback_5_2 : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2) const; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2) const; private: const T* object_; @@ -4497,15 +5154,22 @@ class _ConstTessMemberResultCallback_5_2::type p5_; public: - inline _ConstTessMemberResultCallback_5_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_2(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4514,29 +5178,48 @@ class _ConstTessMemberResultCallback_5_2 -inline typename _ConstTessMemberResultCallback_5_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_2::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_2(obj, member, p1, p2, p3, + p4, p5); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_2::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_2(obj, member, p1, p2, + p3, p4, p5); } #endif -template -class _TessMemberResultCallback_5_2 : public TessResultCallback2 { +template +class _TessMemberResultCallback_5_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2) ; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -4545,16 +5228,22 @@ class _TessMemberResultCallback_5_2 : public TessResultCallback2 { typename remove_reference::type p5_; public: - inline _TessMemberResultCallback_5_2( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_2(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4563,14 +5252,16 @@ class _TessMemberResultCallback_5_2 : public TessResultCallback2 { } }; -template -class _TessMemberResultCallback_5_2 : public TessCallback2 { +template +class _TessMemberResultCallback_5_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2) ; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -4579,15 +5270,21 @@ class _TessMemberResultCallback_5_2 : typename remove_reference::type p5_; public: - inline _TessMemberResultCallback_5_2( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_2(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4596,26 +5293,41 @@ class _TessMemberResultCallback_5_2 : }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_2::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_2::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1, A2), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_2::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_2::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1, A2), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_2(obj, member, p1, p2, p3, p4, p5); } #endif -template -class _TessFunctionResultCallback_5_2 : public TessResultCallback2 { +template +class _TessFunctionResultCallback_5_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2); + typedef TessResultCallback2 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, A1, A2); private: FunctionSignature function_; @@ -4626,15 +5338,16 @@ class _TessFunctionResultCallback_5_2 : public TessResultCallback2 { typename remove_reference::type p5_; public: - inline _TessFunctionResultCallback_5_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_2(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -4643,11 +5356,13 @@ class _TessFunctionResultCallback_5_2 : public TessResultCallback2 { } }; -template -class _TessFunctionResultCallback_5_2 : public TessCallback2 { +template +class _TessFunctionResultCallback_5_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2); + typedef TessCallback2 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, A1, A2); private: FunctionSignature function_; @@ -4658,14 +5373,15 @@ class _TessFunctionResultCallback_5_2 : p typename remove_reference::type p5_; public: - inline _TessFunctionResultCallback_5_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_2(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -4673,23 +5389,39 @@ class _TessFunctionResultCallback_5_2 : p } }; -template -inline typename _TessFunctionResultCallback_5_2::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_2(function, p1, p2, p3, p4, p5); +template +inline typename _TessFunctionResultCallback_5_2::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, A1, A2), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_2(function, p1, p2, p3, p4, p5); } -template -inline typename _TessFunctionResultCallback_5_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_2(function, p1, p2, p3, p4, p5); +template +inline typename _TessFunctionResultCallback_5_2::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, A1, A2), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_2(function, p1, p2, p3, p4, p5); } -template -class _ConstTessMemberResultCallback_6_2 : public TessResultCallback2 { +template +class _ConstTessMemberResultCallback_6_2 + : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2) const; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2) const; private: const T* object_; @@ -4702,16 +5434,24 @@ class _ConstTessMemberResultCallback_6_2 : public TessResultCallback2 { typename remove_reference::type p6_; public: - inline _ConstTessMemberResultCallback_6_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_2(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4720,11 +5460,14 @@ class _ConstTessMemberResultCallback_6_2 : public TessResultCallback2 { } }; -template -class _ConstTessMemberResultCallback_6_2 : public TessCallback2 { +template +class _ConstTessMemberResultCallback_6_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2) const; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2) const; private: const T* object_; @@ -4737,15 +5480,23 @@ class _ConstTessMemberResultCallback_6_2::type p6_; public: - inline _ConstTessMemberResultCallback_6_2(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_2(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4754,29 +5505,47 @@ class _ConstTessMemberResultCallback_6_2 -inline typename _ConstTessMemberResultCallback_6_2::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_2::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_2(obj, member, p1, p2, + p3, p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_2::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_2::base* +NewPermanentTessCallback( + const T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_2( + obj, member, p1, p2, p3, p4, p5, p6); } #endif -template -class _TessMemberResultCallback_6_2 : public TessResultCallback2 { +template +class _TessMemberResultCallback_6_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2) ; + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -4786,16 +5555,23 @@ class _TessMemberResultCallback_6_2 : public TessResultCallback2 { typename remove_reference::type p6_; public: - inline _TessMemberResultCallback_6_2( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_2(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4804,14 +5580,16 @@ class _TessMemberResultCallback_6_2 : public TessResultCallback2 { } }; -template -class _TessMemberResultCallback_6_2 : public TessCallback2 { +template +class _TessMemberResultCallback_6_2 : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2) ; + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -4821,15 +5599,22 @@ class _TessMemberResultCallback_6_2::type p6_; public: - inline _TessMemberResultCallback_6_2( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_2(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4838,26 +5623,43 @@ class _TessMemberResultCallback_6_2 -inline typename _TessMemberResultCallback_6_2::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_2::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, + p5, p6); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_6_2::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_2::base* +NewPermanentTessCallback( + T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _TessMemberResultCallback_6_2(obj, member, p1, p2, p3, p4, + p5, p6); } #endif -template -class _TessFunctionResultCallback_6_2 : public TessResultCallback2 { +template +class _TessFunctionResultCallback_6_2 : public TessResultCallback2 { public: - typedef TessResultCallback2 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2); + typedef TessResultCallback2 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1, A2); private: FunctionSignature function_; @@ -4869,15 +5671,22 @@ class _TessFunctionResultCallback_6_2 : public TessResultCallback2 { typename remove_reference::type p6_; public: - inline _TessFunctionResultCallback_6_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_2(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2) { + virtual R Run(A1 a1, A2 a2) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -4886,11 +5695,13 @@ class _TessFunctionResultCallback_6_2 : public TessResultCallback2 { } }; -template -class _TessFunctionResultCallback_6_2 : public TessCallback2 { +template +class _TessFunctionResultCallback_6_2 + : public TessCallback2 { public: - typedef TessCallback2 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2); + typedef TessCallback2 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1, A2); private: FunctionSignature function_; @@ -4902,14 +5713,21 @@ class _TessFunctionResultCallback_6_2 typename remove_reference::type p6_; public: - inline _TessFunctionResultCallback_6_2(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_2(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2) { + virtual void Run(A1 a1, A2 a2) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -4917,41 +5735,58 @@ class _TessFunctionResultCallback_6_2 } }; -template -inline typename _TessFunctionResultCallback_6_2::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_2(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_2::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1, A2), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_2(function, p1, p2, p3, p4, + p5, p6); } -template -inline typename _TessFunctionResultCallback_6_2::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_2(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_2::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1, A2), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_2(function, p1, p2, p3, p4, + p5, p6); } template -class _ConstTessMemberResultCallback_0_3 : public TessResultCallback3 { +class _ConstTessMemberResultCallback_0_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(A1,A2,A3) const; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(A1, A2, A3) const; private: const T* object_; MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_3( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _ConstTessMemberResultCallback_0_3(const T* object, + MemberSignature member) + : object_(object), member_(member) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(a1,a2,a3); + R result = (object_->*member_)(a1, a2, a3); return result; } else { - R result = (object_->*member_)(a1,a2,a3); + R result = (object_->*member_)(a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4962,27 +5797,25 @@ class _ConstTessMemberResultCallback_0_3 : public TessResultCallback3 class _ConstTessMemberResultCallback_0_3 - : public TessCallback3 { + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(A1,A2,A3) const; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(A1, A2, A3) const; private: const T* object_; MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_3( - const T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _ConstTessMemberResultCallback_0_3(const T* object, + MemberSignature member) + : object_(object), member_(member) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(a1,a2,a3); + (object_->*member_)(a1, a2, a3); } else { - (object_->*member_)(a1,a2,a3); + (object_->*member_)(a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -4992,47 +5825,45 @@ class _ConstTessMemberResultCallback_0_3 #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_3::base* -NewTessCallback( - const T1* obj, R (T2::*member)(A1,A2,A3) const) { - return new _ConstTessMemberResultCallback_0_3( +inline + typename _ConstTessMemberResultCallback_0_3::base* + NewTessCallback(const T1* obj, R (T2::*member)(A1, A2, A3) const) { + return new _ConstTessMemberResultCallback_0_3( obj, member); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_3::base* -NewPermanentTessCallback( - const T1* obj, R (T2::*member)(A1,A2,A3) const) { - return new _ConstTessMemberResultCallback_0_3( +inline + typename _ConstTessMemberResultCallback_0_3::base* + NewPermanentTessCallback(const T1* obj, R (T2::*member)(A1, A2, A3) const) { + return new _ConstTessMemberResultCallback_0_3( obj, member); } #endif template -class _TessMemberResultCallback_0_3 : public TessResultCallback3 { +class _TessMemberResultCallback_0_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(A1,A2,A3) ; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_3( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _TessMemberResultCallback_0_3(T* object, MemberSignature member) + : object_(object), member_(member) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(a1,a2,a3); + R result = (object_->*member_)(a1, a2, a3); return result; } else { - R result = (object_->*member_)(a1,a2,a3); + R result = (object_->*member_)(a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5043,27 +5874,24 @@ class _TessMemberResultCallback_0_3 : public TessResultCallback3 { template class _TessMemberResultCallback_0_3 - : public TessCallback3 { + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(A1,A2,A3) ; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_3( - T* object, MemberSignature member) - : object_(object), - member_(member) { - } + inline _TessMemberResultCallback_0_3(T* object, MemberSignature member) + : object_(object), member_(member) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(a1,a2,a3); + (object_->*member_)(a1, a2, a3); } else { - (object_->*member_)(a1,a2,a3); + (object_->*member_)(a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5073,45 +5901,42 @@ class _TessMemberResultCallback_0_3 #ifndef SWIG template -inline typename _TessMemberResultCallback_0_3::base* -NewTessCallback( - T1* obj, R (T2::*member)(A1,A2,A3) ) { - return new _TessMemberResultCallback_0_3( - obj, member); +inline typename _TessMemberResultCallback_0_3::base* +NewTessCallback(T1* obj, R (T2::*member)(A1, A2, A3)) { + return new _TessMemberResultCallback_0_3(obj, + member); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_0_3::base* -NewPermanentTessCallback( - T1* obj, R (T2::*member)(A1,A2,A3) ) { - return new _TessMemberResultCallback_0_3( - obj, member); +inline typename _TessMemberResultCallback_0_3::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(A1, A2, A3)) { + return new _TessMemberResultCallback_0_3(obj, + member); } #endif template -class _TessFunctionResultCallback_0_3 : public TessResultCallback3 { +class _TessFunctionResultCallback_0_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(A1,A2,A3); + typedef TessResultCallback3 base; + typedef R (*FunctionSignature)(A1, A2, A3); private: FunctionSignature function_; public: - inline _TessFunctionResultCallback_0_3( - FunctionSignature function) - : function_(function) { - } + inline _TessFunctionResultCallback_0_3(FunctionSignature function) + : function_(function) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (*function_)(a1,a2,a3); + R result = (*function_)(a1, a2, a3); return result; } else { - R result = (*function_)(a1,a2,a3); + R result = (*function_)(a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -5122,25 +5947,23 @@ class _TessFunctionResultCallback_0_3 : public TessResultCallback3 { template class _TessFunctionResultCallback_0_3 - : public TessCallback3 { + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(A1,A2,A3); + typedef TessCallback3 base; + typedef void (*FunctionSignature)(A1, A2, A3); private: FunctionSignature function_; public: - inline _TessFunctionResultCallback_0_3( - FunctionSignature function) - : function_(function) { - } + inline _TessFunctionResultCallback_0_3(FunctionSignature function) + : function_(function) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (*function_)(a1,a2,a3); + (*function_)(a1, a2, a3); } else { - (*function_)(a1,a2,a3); + (*function_)(a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -5149,40 +5972,40 @@ class _TessFunctionResultCallback_0_3 }; template -inline typename _TessFunctionResultCallback_0_3::base* -NewTessCallback(R (*function)(A1,A2,A3)) { - return new _TessFunctionResultCallback_0_3(function); +inline typename _TessFunctionResultCallback_0_3::base* +NewTessCallback(R (*function)(A1, A2, A3)) { + return new _TessFunctionResultCallback_0_3(function); } template -inline typename _TessFunctionResultCallback_0_3::base* -NewPermanentTessCallback(R (*function)(A1,A2,A3)) { - return new _TessFunctionResultCallback_0_3(function); +inline typename _TessFunctionResultCallback_0_3::base* +NewPermanentTessCallback(R (*function)(A1, A2, A3)) { + return new _TessFunctionResultCallback_0_3(function); } template class _ConstTessMemberResultCallback_1_3 - : public TessResultCallback3 { + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3) const; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, A1, A2, A3) const; private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_3(T* object, - MemberSignature member, P1 p1) - : object_(object), member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_3(T* object, MemberSignature member, + P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3); + R result = (object_->*member_)(p1_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,a1,a2,a3); + R result = (object_->*member_)(p1_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5193,26 +6016,26 @@ class _ConstTessMemberResultCallback_1_3 template class _ConstTessMemberResultCallback_1_3 - : public TessCallback3 { + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3) const; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, A1, A2, A3) const; private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_3(T* object, - MemberSignature member, P1 p1) - : object_(object), member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_3(T* object, MemberSignature member, + P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,a1,a2,a3); + (object_->*member_)(p1_, a1, a2, a3); } else { - (object_->*member_)(p1_,a1,a2,a3); + (object_->*member_)(p1_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5222,42 +6045,48 @@ class _ConstTessMemberResultCallback_1_3 #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_1_3::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,A1,A2,A3) , typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_3(obj, member, p1); +inline typename _ConstTessMemberResultCallback_1_3::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, A1, A2, A3), + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_3( + obj, member, p1); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_1_3::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,A1,A2,A3) , typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_3(obj, member, p1); +inline typename _ConstTessMemberResultCallback_1_3::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, A1, A2, A3), + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_3( + obj, member, p1); } #endif template -class _TessMemberResultCallback_1_3 : public TessResultCallback3 { +class _TessMemberResultCallback_1_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3) ; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_3(T* object, - MemberSignature member, P1 p1) - : object_(object), member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_3(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3); + R result = (object_->*member_)(p1_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,a1,a2,a3); + R result = (object_->*member_)(p1_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5268,26 +6097,25 @@ class _TessMemberResultCallback_1_3 : public TessResultCallback3 { template class _TessMemberResultCallback_1_3 - : public TessCallback3 { + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3) ; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_3(T* object, - MemberSignature member, P1 p1) - : object_(object), member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_3(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,a1,a2,a3); + (object_->*member_)(p1_, a1, a2, a3); } else { - (object_->*member_)(p1_,a1,a2,a3); + (object_->*member_)(p1_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5297,25 +6125,31 @@ class _TessMemberResultCallback_1_3 #ifndef SWIG template -inline typename _TessMemberResultCallback_1_3::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,A1,A2,A3) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_3(obj, member, p1); +inline + typename _TessMemberResultCallback_1_3::base* + NewTessCallback(T1* obj, R (T2::*member)(P1, A1, A2, A3), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_3( + obj, member, p1); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_1_3::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,A1,A2,A3) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_3(obj, member, p1); +inline + typename _TessMemberResultCallback_1_3::base* + NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, A1, A2, A3), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_3( + obj, member, p1); } #endif template -class _TessFunctionResultCallback_1_3 : public TessCallback3 { +class _TessFunctionResultCallback_1_3 : public TessCallback3 { public: - typedef TessCallback3 base; - typedef R (*FunctionSignature)(P1,A1,A2,A3); + typedef TessCallback3 base; + typedef R (*FunctionSignature)(P1, A1, A2, A3); private: FunctionSignature function_; @@ -5323,14 +6157,14 @@ class _TessFunctionResultCallback_1_3 : public TessCallback3 { public: inline _TessFunctionResultCallback_1_3(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (*function_)(p1_,a1,a2,a3); + R result = (*function_)(p1_, a1, a2, a3); return result; } else { - R result = (*function_)(p1_,a1,a2,a3); + R result = (*function_)(p1_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -5341,10 +6175,10 @@ class _TessFunctionResultCallback_1_3 : public TessCallback3 { template class _TessFunctionResultCallback_1_3 - : public TessCallback3 { + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,A1,A2,A3); + typedef TessCallback3 base; + typedef void (*FunctionSignature)(P1, A1, A2, A3); private: FunctionSignature function_; @@ -5352,13 +6186,13 @@ class _TessFunctionResultCallback_1_3 public: inline _TessFunctionResultCallback_1_3(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (*function_)(p1_,a1,a2,a3); + (*function_)(p1_, a1, a2, a3); } else { - (*function_)(p1_,a1,a2,a3); + (*function_)(p1_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -5367,22 +6201,27 @@ class _TessFunctionResultCallback_1_3 }; template -inline typename _TessFunctionResultCallback_1_3::base* -NewTessCallback(R (*function)(P1,A1,A2,A3), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_3(function, p1); +inline typename _TessFunctionResultCallback_1_3::base* +NewTessCallback(R (*function)(P1, A1, A2, A3), typename Identity::type p1) { + return new _TessFunctionResultCallback_1_3(function, + p1); } template -inline typename _TessFunctionResultCallback_1_3::base* -NewPermanentTessCallback(R (*function)(P1,A1,A2,A3), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_3(function, p1); +inline typename _TessFunctionResultCallback_1_3::base* +NewPermanentTessCallback(R (*function)(P1, A1, A2, A3), + typename Identity::type p1) { + return new _TessFunctionResultCallback_1_3(function, + p1); } -template -class _ConstTessMemberResultCallback_2_3 : public TessResultCallback3 { +template +class _ConstTessMemberResultCallback_2_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3) const; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, A1, A2, A3) const; private: const T* object_; @@ -5391,16 +6230,17 @@ class _ConstTessMemberResultCallback_2_3 : public TessResultCallback3::type p2_; public: - inline _ConstTessMemberResultCallback_2_3(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_3(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5410,10 +6250,11 @@ class _ConstTessMemberResultCallback_2_3 : public TessResultCallback3 -class _ConstTessMemberResultCallback_2_3 : public TessCallback3 { +class _ConstTessMemberResultCallback_2_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3) const; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, A1, A2, A3) const; private: const T* object_; @@ -5422,15 +6263,16 @@ class _ConstTessMemberResultCallback_2_3 : pub typename remove_reference::type p2_; public: - inline _ConstTessMemberResultCallback_2_3(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_3(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3); + (object_->*member_)(p1_, p2_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,a1,a2,a3); + (object_->*member_)(p1_, p2_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5439,45 +6281,57 @@ class _ConstTessMemberResultCallback_2_3 : pub }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_3(obj, member, p1, p2); +template +inline typename _ConstTessMemberResultCallback_2_3::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, A1, A2, A3) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_3(obj, member, p1, p2); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_3(obj, member, p1, p2); +template +inline typename _ConstTessMemberResultCallback_2_3::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, A1, A2, A3) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_3(obj, member, p1, p2); } #endif -template -class _TessMemberResultCallback_2_3 : public TessResultCallback3 { +template +class _TessMemberResultCallback_2_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3) ; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_3( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_3(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5487,27 +6341,28 @@ class _TessMemberResultCallback_2_3 : public TessResultCallback3 { }; template -class _TessMemberResultCallback_2_3 : public TessCallback3 { +class _TessMemberResultCallback_2_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3) ; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_3( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_3(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3); + (object_->*member_)(p1_, p2_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,a1,a2,a3); + (object_->*member_)(p1_, p2_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5516,26 +6371,37 @@ class _TessMemberResultCallback_2_3 : public T }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_2_3::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_3(obj, member, p1, p2); +template +inline typename _TessMemberResultCallback_2_3::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_3( + obj, member, p1, p2); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_2_3::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_3(obj, member, p1, p2); +template +inline typename _TessMemberResultCallback_2_3::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_3( + obj, member, p1, p2); } #endif template -class _TessFunctionResultCallback_2_3 : public TessResultCallback3 { +class _TessFunctionResultCallback_2_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,A1,A2,A3); + typedef TessResultCallback3 base; + typedef R (*FunctionSignature)(P1, P2, A1, A2, A3); private: FunctionSignature function_; @@ -5543,15 +6409,16 @@ class _TessFunctionResultCallback_2_3 : public TessResultCallback3 { typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_3(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_3(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (*function_)(p1_,p2_,a1,a2,a3); + R result = (*function_)(p1_, p2_, a1, a2, a3); return result; } else { - R result = (*function_)(p1_,p2_,a1,a2,a3); + R result = (*function_)(p1_, p2_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -5561,10 +6428,11 @@ class _TessFunctionResultCallback_2_3 : public TessResultCallback3 { }; template -class _TessFunctionResultCallback_2_3 : public TessCallback3 { +class _TessFunctionResultCallback_2_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,A1,A2,A3); + typedef TessCallback3 base; + typedef void (*FunctionSignature)(P1, P2, A1, A2, A3); private: FunctionSignature function_; @@ -5572,14 +6440,15 @@ class _TessFunctionResultCallback_2_3 : public Te typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_3(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_3(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (*function_)(p1_,p2_,a1,a2,a3); + (*function_)(p1_, p2_, a1, a2, a3); } else { - (*function_)(p1_,p2_,a1,a2,a3); + (*function_)(p1_, p2_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -5588,22 +6457,32 @@ class _TessFunctionResultCallback_2_3 : public Te }; template -inline typename _TessFunctionResultCallback_2_3::base* -NewTessCallback(R (*function)(P1,P2,A1,A2,A3), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_3(function, p1, p2); +inline + typename _TessFunctionResultCallback_2_3::base* + NewTessCallback(R (*function)(P1, P2, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_3( + function, p1, p2); } template -inline typename _TessFunctionResultCallback_2_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1,A2,A3), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_3(function, p1, p2); +inline typename _TessFunctionResultCallback_2_3::base* +NewPermanentTessCallback(R (*function)(P1, P2, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_3( + function, p1, p2); } -template -class _ConstTessMemberResultCallback_3_3 : public TessResultCallback3 { +template +class _ConstTessMemberResultCallback_3_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3) const; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1, A2, A3) const; private: const T* object_; @@ -5613,16 +6492,17 @@ class _ConstTessMemberResultCallback_3_3 : public TessResultCallback3::type p3_; public: - inline _ConstTessMemberResultCallback_3_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_3(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5631,11 +6511,13 @@ class _ConstTessMemberResultCallback_3_3 : public TessResultCallback3 -class _ConstTessMemberResultCallback_3_3 : public TessCallback3 { +template +class _ConstTessMemberResultCallback_3_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3) const; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, P3, A1, A2, A3) const; private: const T* object_; @@ -5645,15 +6527,16 @@ class _ConstTessMemberResultCallback_3_3 : typename remove_reference::type p3_; public: - inline _ConstTessMemberResultCallback_3_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_3(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5662,45 +6545,60 @@ class _ConstTessMemberResultCallback_3_3 : }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_3(obj, member, p1, p2, p3); +template +inline typename _ConstTessMemberResultCallback_3_3::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, P3, A1, A2, A3) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_3(obj, member, p1, p2, p3); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_3(obj, member, p1, p2, p3); +template +inline typename _ConstTessMemberResultCallback_3_3::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, A1, A2, A3) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_3(obj, member, p1, p2, + p3); } #endif -template -class _TessMemberResultCallback_3_3 : public TessResultCallback3 { +template +class _TessMemberResultCallback_3_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3) ; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_3( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_3(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5709,29 +6607,31 @@ class _TessMemberResultCallback_3_3 : public TessResultCallback3 { } }; -template -class _TessMemberResultCallback_3_3 : public TessCallback3 { +template +class _TessMemberResultCallback_3_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3) ; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, P3, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_3( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_3(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5740,26 +6640,39 @@ class _TessMemberResultCallback_3_3 : publ }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_3_3::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_3(obj, member, p1, p2, p3); +template +inline typename _TessMemberResultCallback_3_3::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_3( + obj, member, p1, p2, p3); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_3_3::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_3(obj, member, p1, p2, p3); +template +inline typename _TessMemberResultCallback_3_3::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_3(obj, member, p1, p2, p3); } #endif -template -class _TessFunctionResultCallback_3_3 : public TessResultCallback3 { +template +class _TessFunctionResultCallback_3_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1,A2,A3); + typedef TessResultCallback3 base; + typedef R (*FunctionSignature)(P1, P2, P3, A1, A2, A3); private: FunctionSignature function_; @@ -5768,15 +6681,16 @@ class _TessFunctionResultCallback_3_3 : public TessResultCallback3 { typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_3(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_3(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3); + R result = (*function_)(p1_, p2_, p3_, a1, a2, a3); return result; } else { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3); + R result = (*function_)(p1_, p2_, p3_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -5786,10 +6700,11 @@ class _TessFunctionResultCallback_3_3 : public TessResultCallback3 { }; template -class _TessFunctionResultCallback_3_3 : public TessCallback3 { +class _TessFunctionResultCallback_3_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1,A2,A3); + typedef TessCallback3 base; + typedef void (*FunctionSignature)(P1, P2, P3, A1, A2, A3); private: FunctionSignature function_; @@ -5798,14 +6713,15 @@ class _TessFunctionResultCallback_3_3 : publi typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_3(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_3(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (*function_)(p1_,p2_,p3_,a1,a2,a3); + (*function_)(p1_, p2_, p3_, a1, a2, a3); } else { - (*function_)(p1_,p2_,p3_,a1,a2,a3); + (*function_)(p1_, p2_, p3_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -5814,22 +6730,33 @@ class _TessFunctionResultCallback_3_3 : publi }; template -inline typename _TessFunctionResultCallback_3_3::base* -NewTessCallback(R (*function)(P1,P2,P3,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_3(function, p1, p2, p3); +inline typename _TessFunctionResultCallback_3_3::base* +NewTessCallback(R (*function)(P1, P2, P3, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_3( + function, p1, p2, p3); } template -inline typename _TessFunctionResultCallback_3_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_3(function, p1, p2, p3); +inline typename _TessFunctionResultCallback_3_3::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_3( + function, p1, p2, p3); } -template -class _ConstTessMemberResultCallback_4_3 : public TessResultCallback3 { +template +class _ConstTessMemberResultCallback_4_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3) const; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3) const; private: const T* object_; @@ -5840,16 +6767,17 @@ class _ConstTessMemberResultCallback_4_3 : public TessResultCallback3::type p4_; public: - inline _ConstTessMemberResultCallback_4_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_3(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5858,11 +6786,14 @@ class _ConstTessMemberResultCallback_4_3 : public TessResultCallback3 -class _ConstTessMemberResultCallback_4_3 : public TessCallback3 { +template +class _ConstTessMemberResultCallback_4_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3) const; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3) const; private: const T* object_; @@ -5873,15 +6804,16 @@ class _ConstTessMemberResultCallback_4_3::type p4_; public: - inline _ConstTessMemberResultCallback_4_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_3(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5890,29 +6822,48 @@ class _ConstTessMemberResultCallback_4_3 -inline typename _ConstTessMemberResultCallback_4_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_3::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1, A2, A3) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_3(obj, member, p1, p2, p3, + p4); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_3::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1, A2, A3) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_3(obj, member, p1, p2, + p3, p4); } #endif -template -class _TessMemberResultCallback_4_3 : public TessResultCallback3 { +template +class _TessMemberResultCallback_4_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3) ; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -5920,16 +6871,16 @@ class _TessMemberResultCallback_4_3 : public TessResultCallback3 { typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_3( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_3(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5938,14 +6889,16 @@ class _TessMemberResultCallback_4_3 : public TessResultCallback3 { } }; -template -class _TessMemberResultCallback_4_3 : public TessCallback3 { +template +class _TessMemberResultCallback_4_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3) ; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -5953,15 +6906,15 @@ class _TessMemberResultCallback_4_3 : typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_3( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_3(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -5970,26 +6923,41 @@ class _TessMemberResultCallback_4_3 : }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_4_3::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_3::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_4_3::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_3::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_3(obj, member, p1, p2, p3, p4); } #endif -template -class _TessFunctionResultCallback_4_3 : public TessResultCallback3 { +template +class _TessFunctionResultCallback_4_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3); + typedef TessResultCallback3 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, A1, A2, A3); private: FunctionSignature function_; @@ -5999,15 +6967,16 @@ class _TessFunctionResultCallback_4_3 : public TessResultCallback3 { typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_3(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3); + R result = (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3); + R result = (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6016,11 +6985,13 @@ class _TessFunctionResultCallback_4_3 : public TessResultCallback3 { } }; -template -class _TessFunctionResultCallback_4_3 : public TessCallback3 { +template +class _TessFunctionResultCallback_4_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3); + typedef TessCallback3 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, A1, A2, A3); private: FunctionSignature function_; @@ -6030,14 +7001,15 @@ class _TessFunctionResultCallback_4_3 : p typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_3(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3); + (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3); } else { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3); + (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6045,23 +7017,38 @@ class _TessFunctionResultCallback_4_3 : p } }; -template -inline typename _TessFunctionResultCallback_4_3::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_3(function, p1, p2, p3, p4); +template +inline typename _TessFunctionResultCallback_4_3::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_3(function, p1, p2, p3, p4); } -template -inline typename _TessFunctionResultCallback_4_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_3(function, p1, p2, p3, p4); +template +inline typename _TessFunctionResultCallback_4_3::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_3(function, p1, p2, p3, p4); } -template -class _ConstTessMemberResultCallback_5_3 : public TessResultCallback3 { +template +class _ConstTessMemberResultCallback_5_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3) const; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3) const; private: const T* object_; @@ -6073,16 +7060,23 @@ class _ConstTessMemberResultCallback_5_3 : public TessResultCallback3::type p5_; public: - inline _ConstTessMemberResultCallback_5_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_3(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6091,11 +7085,14 @@ class _ConstTessMemberResultCallback_5_3 : public TessResultCallback3 -class _ConstTessMemberResultCallback_5_3 : public TessCallback3 { +template +class _ConstTessMemberResultCallback_5_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3) const; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3) const; private: const T* object_; @@ -6107,15 +7104,22 @@ class _ConstTessMemberResultCallback_5_3::type p5_; public: - inline _ConstTessMemberResultCallback_5_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_3(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6124,29 +7128,49 @@ class _ConstTessMemberResultCallback_5_3 -inline typename _ConstTessMemberResultCallback_5_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_3::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_3(obj, member, p1, p2, + p3, p4, p5); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_3::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_3(obj, member, p1, + p2, p3, p4, p5); } #endif -template -class _TessMemberResultCallback_5_3 : public TessResultCallback3 { +template +class _TessMemberResultCallback_5_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3) ; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -6155,16 +7179,22 @@ class _TessMemberResultCallback_5_3 : public TessResultCallback3 { typename remove_reference::type p5_; public: - inline _TessMemberResultCallback_5_3( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_3(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6173,14 +7203,16 @@ class _TessMemberResultCallback_5_3 : public TessResultCallback3 { } }; -template -class _TessMemberResultCallback_5_3 : public TessCallback3 { +template +class _TessMemberResultCallback_5_3 : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3) ; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -6189,15 +7221,21 @@ class _TessMemberResultCallback_5_3::type p5_; public: - inline _TessMemberResultCallback_5_3( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_3(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6206,26 +7244,45 @@ class _TessMemberResultCallback_5_3 -inline typename _TessMemberResultCallback_5_3::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_3::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, + p5); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_3::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_3::base* +NewPermanentTessCallback(T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_3(obj, member, p1, p2, p3, p4, + p5); } #endif -template -class _TessFunctionResultCallback_5_3 : public TessResultCallback3 { +template +class _TessFunctionResultCallback_5_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3); + typedef TessResultCallback3 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, A1, A2, A3); private: FunctionSignature function_; @@ -6236,15 +7293,16 @@ class _TessFunctionResultCallback_5_3 : public TessResultCallback3 { typename remove_reference::type p5_; public: - inline _TessFunctionResultCallback_5_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_3(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6253,11 +7311,13 @@ class _TessFunctionResultCallback_5_3 : public TessResultCallback3 { } }; -template -class _TessFunctionResultCallback_5_3 : public TessCallback3 { +template +class _TessFunctionResultCallback_5_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3); + typedef TessCallback3 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, A1, A2, A3); private: FunctionSignature function_; @@ -6268,14 +7328,15 @@ class _TessFunctionResultCallback_5_3 typename remove_reference::type p5_; public: - inline _TessFunctionResultCallback_5_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_3(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6283,23 +7344,41 @@ class _TessFunctionResultCallback_5_3 } }; -template -inline typename _TessFunctionResultCallback_5_3::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_3(function, p1, p2, p3, p4, p5); +template +inline typename _TessFunctionResultCallback_5_3::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_3(function, p1, p2, p3, p4, + p5); } -template -inline typename _TessFunctionResultCallback_5_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_3(function, p1, p2, p3, p4, p5); +template +inline typename _TessFunctionResultCallback_5_3::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_3(function, p1, p2, p3, p4, + p5); } -template -class _ConstTessMemberResultCallback_6_3 : public TessResultCallback3 { +template +class _ConstTessMemberResultCallback_6_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3) const; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3) const; private: const T* object_; @@ -6312,16 +7391,24 @@ class _ConstTessMemberResultCallback_6_3 : public TessResultCallback3::type p6_; public: - inline _ConstTessMemberResultCallback_6_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_3(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6330,11 +7417,14 @@ class _ConstTessMemberResultCallback_6_3 : public TessResultCallback3 -class _ConstTessMemberResultCallback_6_3 : public TessCallback3 { +template +class _ConstTessMemberResultCallback_6_3 + : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3) const; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3) const; private: const T* object_; @@ -6347,15 +7437,23 @@ class _ConstTessMemberResultCallback_6_3::type p6_; public: - inline _ConstTessMemberResultCallback_6_3(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_3(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6364,29 +7462,48 @@ class _ConstTessMemberResultCallback_6_3 -inline typename _ConstTessMemberResultCallback_6_3::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_3(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_3::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_3( + obj, member, p1, p2, p3, p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_3::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_3(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_3::base* +NewPermanentTessCallback( + const T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_3( + obj, member, p1, p2, p3, p4, p5, p6); } #endif -template -class _TessMemberResultCallback_6_3 : public TessResultCallback3 { +template +class _TessMemberResultCallback_6_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3) ; + typedef TessResultCallback3 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -6396,16 +7513,23 @@ class _TessMemberResultCallback_6_3 : public TessResultCallback3 { typename remove_reference::type p6_; public: - inline _TessMemberResultCallback_6_3( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_3(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6414,14 +7538,16 @@ class _TessMemberResultCallback_6_3 : public TessResultCallback3 { } }; -template -class _TessMemberResultCallback_6_3 : public TessCallback3 { +template +class _TessMemberResultCallback_6_3 : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3) ; + typedef TessCallback3 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -6431,15 +7557,22 @@ class _TessMemberResultCallback_6_3::type p6_; public: - inline _TessMemberResultCallback_6_3( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_3(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6448,26 +7581,44 @@ class _TessMemberResultCallback_6_3 -inline typename _TessMemberResultCallback_6_3::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_3(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_3::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessMemberResultCallback_6_3(obj, member, p1, p2, p3, + p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_6_3::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_3(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_3::base* +NewPermanentTessCallback( + T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _TessMemberResultCallback_6_3(obj, member, p1, p2, p3, + p4, p5, p6); } #endif -template -class _TessFunctionResultCallback_6_3 : public TessResultCallback3 { +template +class _TessFunctionResultCallback_6_3 + : public TessResultCallback3 { public: - typedef TessResultCallback3 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3); + typedef TessResultCallback3 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3); private: FunctionSignature function_; @@ -6479,15 +7630,22 @@ class _TessFunctionResultCallback_6_3 : public TessResultCallback3 { typename remove_reference::type p6_; public: - inline _TessFunctionResultCallback_6_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_3(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3) { + virtual R Run(A1 a1, A2 a2, A3 a3) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6496,11 +7654,13 @@ class _TessFunctionResultCallback_6_3 : public TessResultCallback3 { } }; -template -class _TessFunctionResultCallback_6_3 : public TessCallback3 { +template +class _TessFunctionResultCallback_6_3 : public TessCallback3 { public: - typedef TessCallback3 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3); + typedef TessCallback3 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3); private: FunctionSignature function_; @@ -6512,14 +7672,21 @@ class _TessFunctionResultCallback_6_3::type p6_; public: - inline _TessFunctionResultCallback_6_3(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_3(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3) { + virtual void Run(A1 a1, A2 a2, A3 a3) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6527,39 +7694,58 @@ class _TessFunctionResultCallback_6_3 -inline typename _TessFunctionResultCallback_6_3::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_3(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_3::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1, A2, A3), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_3(function, p1, p2, p3, + p4, p5, p6); } -template -inline typename _TessFunctionResultCallback_6_3::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_3(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_3::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1, A2, A3), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_3(function, p1, p2, p3, + p4, p5, p6); } template -class _ConstTessMemberResultCallback_0_4 : public TessResultCallback4 { +class _ConstTessMemberResultCallback_0_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(A1,A2,A3,A4) const; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(A1, A2, A3, A4) const; private: const T* object_; MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_4(const T* object, MemberSignature member) - : object_(object), - member_(member) { } + inline _ConstTessMemberResultCallback_0_4(const T* object, + MemberSignature member) + : object_(object), member_(member) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(a1,a2,a3,a4); + R result = (object_->*member_)(a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(a1,a2,a3,a4); + R result = (object_->*member_)(a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6569,25 +7755,26 @@ class _ConstTessMemberResultCallback_0_4 : public TessResultCallback4 -class _ConstTessMemberResultCallback_0_4 : public TessCallback4 { +class _ConstTessMemberResultCallback_0_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(A1,A2,A3,A4) const; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(A1, A2, A3, A4) const; private: const T* object_; MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_4(const T* object, MemberSignature member) - : object_(object), - member_(member) { } + inline _ConstTessMemberResultCallback_0_4(const T* object, + MemberSignature member) + : object_(object), member_(member) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(a1,a2,a3,a4); + (object_->*member_)(a1, a2, a3, a4); } else { - (object_->*member_)(a1,a2,a3,a4); + (object_->*member_)(a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6597,42 +7784,46 @@ class _ConstTessMemberResultCallback_0_4 : public #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(A1,A2,A3,A4) const) { - return new _ConstTessMemberResultCallback_0_4(obj, member); +inline typename _ConstTessMemberResultCallback_0_4::base* +NewTessCallback(const T1* obj, R (T2::*member)(A1, A2, A3, A4) const) { + return new _ConstTessMemberResultCallback_0_4( + obj, member); } #endif #ifndef SWIG template -inline typename _ConstTessMemberResultCallback_0_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(A1,A2,A3,A4) const) { - return new _ConstTessMemberResultCallback_0_4(obj, member); +inline typename _ConstTessMemberResultCallback_0_4::base* +NewPermanentTessCallback(const T1* obj, R (T2::*member)(A1, A2, A3, A4) const) { + return new _ConstTessMemberResultCallback_0_4( + obj, member); } #endif template -class _TessMemberResultCallback_0_4 : public TessResultCallback4 { +class _TessMemberResultCallback_0_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(A1,A2,A3,A4) ; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_4( T* object, MemberSignature member) - : object_(object), - member_(member) { } + inline _TessMemberResultCallback_0_4(T* object, MemberSignature member) + : object_(object), member_(member) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(a1,a2,a3,a4); + R result = (object_->*member_)(a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(a1,a2,a3,a4); - // zero out the pointer to ensure segfault if used again + R result = (object_->*member_)(a1, a2, a3, a4); + // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; return result; @@ -6641,25 +7832,25 @@ class _TessMemberResultCallback_0_4 : public TessResultCallback4 }; template -class _TessMemberResultCallback_0_4 : public TessCallback4 { +class _TessMemberResultCallback_0_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(A1,A2,A3,A4) ; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_4( T* object, MemberSignature member) - : object_(object), - member_(member) { } + inline _TessMemberResultCallback_0_4(T* object, MemberSignature member) + : object_(object), member_(member) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(a1,a2,a3,a4); + (object_->*member_)(a1, a2, a3, a4); } else { - (object_->*member_)(a1,a2,a3,a4); + (object_->*member_)(a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6669,39 +7860,44 @@ class _TessMemberResultCallback_0_4 : public TessC #ifndef SWIG template -inline typename _TessMemberResultCallback_0_4::base* -NewTessCallback( T1* obj, R (T2::*member)(A1,A2,A3,A4) ) { - return new _TessMemberResultCallback_0_4(obj, member); +inline + typename _TessMemberResultCallback_0_4::base* + NewTessCallback(T1* obj, R (T2::*member)(A1, A2, A3, A4)) { + return new _TessMemberResultCallback_0_4(obj, + member); } #endif #ifndef SWIG template -inline typename _TessMemberResultCallback_0_4::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(A1,A2,A3,A4) ) { - return new _TessMemberResultCallback_0_4(obj, member); +inline + typename _TessMemberResultCallback_0_4::base* + NewPermanentTessCallback(T1* obj, R (T2::*member)(A1, A2, A3, A4)) { + return new _TessMemberResultCallback_0_4( + obj, member); } #endif template -class _TessFunctionResultCallback_0_4 : public TessResultCallback4 { +class _TessFunctionResultCallback_0_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(A1,A2,A3,A4); + typedef TessResultCallback4 base; + typedef R (*FunctionSignature)(A1, A2, A3, A4); private: FunctionSignature function_; public: inline _TessFunctionResultCallback_0_4(FunctionSignature function) - : function_(function) { } + : function_(function) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (*function_)(a1,a2,a3,a4); + R result = (*function_)(a1, a2, a3, a4); return result; } else { - R result = (*function_)(a1,a2,a3,a4); + R result = (*function_)(a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6711,23 +7907,24 @@ class _TessFunctionResultCallback_0_4 : public TessResultCallback4 -class _TessFunctionResultCallback_0_4 : public TessCallback4 { +class _TessFunctionResultCallback_0_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(A1,A2,A3,A4); + typedef TessCallback4 base; + typedef void (*FunctionSignature)(A1, A2, A3, A4); private: FunctionSignature function_; public: inline _TessFunctionResultCallback_0_4(FunctionSignature function) - : function_(function) { } + : function_(function) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (*function_)(a1,a2,a3,a4); + (*function_)(a1, a2, a3, a4); } else { - (*function_)(a1,a2,a3,a4); + (*function_)(a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6736,22 +7933,25 @@ class _TessFunctionResultCallback_0_4 : public TessCa }; template -inline typename _TessFunctionResultCallback_0_4::base* -NewTessCallback(R (*function)(A1,A2,A3,A4)) { - return new _TessFunctionResultCallback_0_4(function); +inline typename _TessFunctionResultCallback_0_4::base* +NewTessCallback(R (*function)(A1, A2, A3, A4)) { + return new _TessFunctionResultCallback_0_4(function); } template -inline typename _TessFunctionResultCallback_0_4::base* -NewPermanentTessCallback(R (*function)(A1,A2,A3,A4)) { - return new _TessFunctionResultCallback_0_4(function); +inline typename _TessFunctionResultCallback_0_4::base* +NewPermanentTessCallback(R (*function)(A1, A2, A3, A4)) { + return new _TessFunctionResultCallback_0_4( + function); } -template -class _ConstTessMemberResultCallback_1_4 : public TessResultCallback4 { +template +class _ConstTessMemberResultCallback_1_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3,A4) const; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, A1, A2, A3, A4) const; private: const T* object_; @@ -6759,16 +7959,16 @@ class _ConstTessMemberResultCallback_1_4 : public TessResultCallback4::type p1_; public: - inline _ConstTessMemberResultCallback_1_4(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_4(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6778,10 +7978,11 @@ class _ConstTessMemberResultCallback_1_4 : public TessResultCallback4 -class _ConstTessMemberResultCallback_1_4 : public TessCallback4 { +class _ConstTessMemberResultCallback_1_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3,A4) const; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, A1, A2, A3, A4) const; private: const T* object_; @@ -6789,15 +7990,15 @@ class _ConstTessMemberResultCallback_1_4 : pub typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_4(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_4(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,a1,a2,a3,a4); + (object_->*member_)(p1_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,a1,a2,a3,a4); + (object_->*member_)(p1_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6806,43 +8007,53 @@ class _ConstTessMemberResultCallback_1_4 : pub }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2,A3,A4) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_4(obj, member, p1); +template +inline typename _ConstTessMemberResultCallback_1_4::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, A1, A2, A3, A4) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_4(obj, member, p1); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2,A3,A4) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_4(obj, member, p1); +template +inline typename _ConstTessMemberResultCallback_1_4::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, A1, A2, A3, A4) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_4(obj, member, p1); } #endif -template -class _TessMemberResultCallback_1_4 : public TessResultCallback4 { +template +class _TessMemberResultCallback_1_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3,A4) ; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_4( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_4(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6852,26 +8063,26 @@ class _TessMemberResultCallback_1_4 : public TessResultCallback4 }; template -class _TessMemberResultCallback_1_4 : public TessCallback4 { +class _TessMemberResultCallback_1_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3,A4) ; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_4( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_4(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,a1,a2,a3,a4); + (object_->*member_)(p1_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,a1,a2,a3,a4); + (object_->*member_)(p1_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6880,26 +8091,35 @@ class _TessMemberResultCallback_1_4 : public T }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_1_4::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,A1,A2,A3,A4) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_4(obj, member, p1); +template +inline typename _TessMemberResultCallback_1_4::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, A1, A2, A3, A4), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_4( + obj, member, p1); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_1_4::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,A1,A2,A3,A4) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_4(obj, member, p1); +template +inline typename _TessMemberResultCallback_1_4::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, A1, A2, A3, A4), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_4( + obj, member, p1); } #endif template -class _TessFunctionResultCallback_1_4 : public TessResultCallback4 { +class _TessFunctionResultCallback_1_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,A1,A2,A3,A4); + typedef TessResultCallback4 base; + typedef R (*FunctionSignature)(P1, A1, A2, A3, A4); private: FunctionSignature function_; @@ -6907,14 +8127,14 @@ class _TessFunctionResultCallback_1_4 : public TessResultCallback4 -class _TessFunctionResultCallback_1_4 : public TessCallback4 { +class _TessFunctionResultCallback_1_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,A1,A2,A3,A4); + typedef TessCallback4 base; + typedef void (*FunctionSignature)(P1, A1, A2, A3, A4); private: FunctionSignature function_; @@ -6935,13 +8156,13 @@ class _TessFunctionResultCallback_1_4 : public Te public: inline _TessFunctionResultCallback_1_4(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (*function_)(p1_,a1,a2,a3,a4); + (*function_)(p1_, a1, a2, a3, a4); } else { - (*function_)(p1_,a1,a2,a3,a4); + (*function_)(p1_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -6950,22 +8171,30 @@ class _TessFunctionResultCallback_1_4 : public Te }; template -inline typename _TessFunctionResultCallback_1_4::base* -NewTessCallback(R (*function)(P1,A1,A2,A3,A4), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_4(function, p1); +inline + typename _TessFunctionResultCallback_1_4::base* + NewTessCallback(R (*function)(P1, A1, A2, A3, A4), + typename Identity::type p1) { + return new _TessFunctionResultCallback_1_4( + function, p1); } template -inline typename _TessFunctionResultCallback_1_4::base* -NewPermanentTessCallback(R (*function)(P1,A1,A2,A3,A4), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_4(function, p1); +inline typename _TessFunctionResultCallback_1_4::base* +NewPermanentTessCallback(R (*function)(P1, A1, A2, A3, A4), + typename Identity::type p1) { + return new _TessFunctionResultCallback_1_4( + function, p1); } -template -class _ConstTessMemberResultCallback_2_4 : public TessResultCallback4 { +template +class _ConstTessMemberResultCallback_2_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3,A4) const; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, A1, A2, A3, A4) const; private: const T* object_; @@ -6974,16 +8203,17 @@ class _ConstTessMemberResultCallback_2_4 : public TessResultCallback4::type p2_; public: - inline _ConstTessMemberResultCallback_2_4(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_4(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -6992,11 +8222,13 @@ class _ConstTessMemberResultCallback_2_4 : public TessResultCallback4 -class _ConstTessMemberResultCallback_2_4 : public TessCallback4 { +template +class _ConstTessMemberResultCallback_2_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3,A4) const; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, A1, A2, A3, A4) const; private: const T* object_; @@ -7005,15 +8237,16 @@ class _ConstTessMemberResultCallback_2_4 : typename remove_reference::type p2_; public: - inline _ConstTessMemberResultCallback_2_4(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_4(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7022,44 +8255,57 @@ class _ConstTessMemberResultCallback_2_4 : }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_4(obj, member, p1, p2); +template +inline typename _ConstTessMemberResultCallback_2_4::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, P2, A1, A2, A3, A4) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_4(obj, member, p1, p2); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_4(obj, member, p1, p2); +template +inline typename _ConstTessMemberResultCallback_2_4::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, A1, A2, A3, A4) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_4(obj, member, p1, p2); } #endif -template -class _TessMemberResultCallback_2_4 : public TessResultCallback4 { +template +class _TessMemberResultCallback_2_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3,A4) ; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_4( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_4(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7068,28 +8314,30 @@ class _TessMemberResultCallback_2_4 : public TessResultCallback4 } }; -template -class _TessMemberResultCallback_2_4 : public TessCallback4 { +template +class _TessMemberResultCallback_2_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3,A4) ; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_4( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_4(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7098,26 +8346,38 @@ class _TessMemberResultCallback_2_4 : publ }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_2_4::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_4(obj, member, p1, p2); +template +inline typename _TessMemberResultCallback_2_4::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_4( + obj, member, p1, p2); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_2_4::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_4(obj, member, p1, p2); +template +inline typename _TessMemberResultCallback_2_4::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_4(obj, member, p1, p2); } #endif -template -class _TessFunctionResultCallback_2_4 : public TessResultCallback4 { +template +class _TessFunctionResultCallback_2_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,A1,A2,A3,A4); + typedef TessResultCallback4 base; + typedef R (*FunctionSignature)(P1, P2, A1, A2, A3, A4); private: FunctionSignature function_; @@ -7125,15 +8385,16 @@ class _TessFunctionResultCallback_2_4 : public TessResultCallback4::type p2_; public: - inline _TessFunctionResultCallback_2_4(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_4(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (*function_)(p1_,p2_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, a1, a2, a3, a4); return result; } else { - R result = (*function_)(p1_,p2_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -7143,10 +8404,11 @@ class _TessFunctionResultCallback_2_4 : public TessResultCallback4 -class _TessFunctionResultCallback_2_4 : public TessCallback4 { +class _TessFunctionResultCallback_2_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,A1,A2,A3,A4); + typedef TessCallback4 base; + typedef void (*FunctionSignature)(P1, P2, A1, A2, A3, A4); private: FunctionSignature function_; @@ -7154,14 +8416,15 @@ class _TessFunctionResultCallback_2_4 : publi typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_4(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_4(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (*function_)(p1_,p2_,a1,a2,a3,a4); + (*function_)(p1_, p2_, a1, a2, a3, a4); } else { - (*function_)(p1_,p2_,a1,a2,a3,a4); + (*function_)(p1_, p2_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -7170,22 +8433,32 @@ class _TessFunctionResultCallback_2_4 : publi }; template -inline typename _TessFunctionResultCallback_2_4::base* -NewTessCallback(R (*function)(P1,P2,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_4(function, p1, p2); +inline typename _TessFunctionResultCallback_2_4::base* +NewTessCallback(R (*function)(P1, P2, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_4( + function, p1, p2); } template -inline typename _TessFunctionResultCallback_2_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_4(function, p1, p2); +inline typename _TessFunctionResultCallback_2_4::base* +NewPermanentTessCallback(R (*function)(P1, P2, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_4( + function, p1, p2); } -template -class _ConstTessMemberResultCallback_3_4 : public TessResultCallback4 { +template +class _ConstTessMemberResultCallback_3_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4) const; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1, A2, A3, A4) const; private: const T* object_; @@ -7195,16 +8468,17 @@ class _ConstTessMemberResultCallback_3_4 : public TessResultCallback4::type p3_; public: - inline _ConstTessMemberResultCallback_3_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_4(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7213,11 +8487,14 @@ class _ConstTessMemberResultCallback_3_4 : public TessResultCallback4 -class _ConstTessMemberResultCallback_3_4 : public TessCallback4 { +template +class _ConstTessMemberResultCallback_3_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4) const; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, P3, A1, A2, A3, A4) const; private: const T* object_; @@ -7227,15 +8504,16 @@ class _ConstTessMemberResultCallback_3_4::type p3_; public: - inline _ConstTessMemberResultCallback_3_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_4(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7244,45 +8522,62 @@ class _ConstTessMemberResultCallback_3_4 -inline typename _ConstTessMemberResultCallback_3_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_4(obj, member, p1, p2, p3); +template +inline typename _ConstTessMemberResultCallback_3_4::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, A1, A2, A3, A4) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_4(obj, member, p1, p2, + p3); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_4(obj, member, p1, p2, p3); +template +inline typename _ConstTessMemberResultCallback_3_4::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, A1, A2, A3, A4) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_4(obj, member, p1, p2, + p3); } #endif -template -class _TessMemberResultCallback_3_4 : public TessResultCallback4 { +template +class _TessMemberResultCallback_3_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4) ; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_4( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_4(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7291,29 +8586,31 @@ class _TessMemberResultCallback_3_4 : public TessResultCallback4 } }; -template -class _TessMemberResultCallback_3_4 : public TessCallback4 { +template +class _TessMemberResultCallback_3_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4) ; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, P3, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_4( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_4(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7322,26 +8619,39 @@ class _TessMemberResultCallback_3_4 : }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_3_4::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_4(obj, member, p1, p2, p3); +template +inline typename _TessMemberResultCallback_3_4::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_4(obj, member, p1, p2, p3); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_3_4::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_4(obj, member, p1, p2, p3); +template +inline typename _TessMemberResultCallback_3_4::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_4(obj, member, p1, p2, p3); } #endif -template -class _TessFunctionResultCallback_3_4 : public TessResultCallback4 { +template +class _TessFunctionResultCallback_3_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1,A2,A3,A4); + typedef TessResultCallback4 base; + typedef R (*FunctionSignature)(P1, P2, P3, A1, A2, A3, A4); private: FunctionSignature function_; @@ -7350,15 +8660,16 @@ class _TessFunctionResultCallback_3_4 : public TessResultCallback4::type p3_; public: - inline _TessFunctionResultCallback_3_4(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_4(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, p3_, a1, a2, a3, a4); return result; } else { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, p3_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -7367,11 +8678,13 @@ class _TessFunctionResultCallback_3_4 : public TessResultCallback4 -class _TessFunctionResultCallback_3_4 : public TessCallback4 { +template +class _TessFunctionResultCallback_3_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1,A2,A3,A4); + typedef TessCallback4 base; + typedef void (*FunctionSignature)(P1, P2, P3, A1, A2, A3, A4); private: FunctionSignature function_; @@ -7380,14 +8693,15 @@ class _TessFunctionResultCallback_3_4 : p typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_4(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_4(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (*function_)(p1_,p2_,p3_,a1,a2,a3,a4); + (*function_)(p1_, p2_, p3_, a1, a2, a3, a4); } else { - (*function_)(p1_,p2_,p3_,a1,a2,a3,a4); + (*function_)(p1_, p2_, p3_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -7395,23 +8709,36 @@ class _TessFunctionResultCallback_3_4 : p } }; -template -inline typename _TessFunctionResultCallback_3_4::base* -NewTessCallback(R (*function)(P1,P2,P3,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_4(function, p1, p2, p3); +template +inline typename _TessFunctionResultCallback_3_4::base* +NewTessCallback(R (*function)(P1, P2, P3, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_4(function, p1, p2, p3); } -template -inline typename _TessFunctionResultCallback_3_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_4(function, p1, p2, p3); +template +inline typename _TessFunctionResultCallback_3_4::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_4(function, p1, p2, p3); } -template -class _ConstTessMemberResultCallback_4_4 : public TessResultCallback4 { +template +class _ConstTessMemberResultCallback_4_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4) const; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3, A4) const; private: const T* object_; @@ -7422,16 +8749,17 @@ class _ConstTessMemberResultCallback_4_4 : public TessResultCallback4::type p4_; public: - inline _ConstTessMemberResultCallback_4_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_4(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7440,11 +8768,14 @@ class _ConstTessMemberResultCallback_4_4 : public TessResultCallback4 -class _ConstTessMemberResultCallback_4_4 : public TessCallback4 { +template +class _ConstTessMemberResultCallback_4_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4) const; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3, A4) const; private: const T* object_; @@ -7455,15 +8786,16 @@ class _ConstTessMemberResultCallback_4_4::type p4_; public: - inline _ConstTessMemberResultCallback_4_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_4(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7472,29 +8804,48 @@ class _ConstTessMemberResultCallback_4_4 -inline typename _ConstTessMemberResultCallback_4_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_4::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1, A2, A3, A4) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_4(obj, member, p1, p2, + p3, p4); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_4::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1, A2, A3, A4) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_4(obj, member, p1, + p2, p3, p4); } #endif -template -class _TessMemberResultCallback_4_4 : public TessResultCallback4 { +template +class _TessMemberResultCallback_4_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4) ; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -7502,16 +8853,16 @@ class _TessMemberResultCallback_4_4 : public TessResultCallback4 typename remove_reference::type p4_; public: - inline _TessMemberResultCallback_4_4( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_4(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7520,14 +8871,16 @@ class _TessMemberResultCallback_4_4 : public TessResultCallback4 } }; -template -class _TessMemberResultCallback_4_4 : public TessCallback4 { +template +class _TessMemberResultCallback_4_4 : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4) ; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -7535,15 +8888,15 @@ class _TessMemberResultCallback_4_4::type p4_; public: - inline _TessMemberResultCallback_4_4( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_4(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7552,26 +8905,42 @@ class _TessMemberResultCallback_4_4 -inline typename _TessMemberResultCallback_4_4::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_4::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_4_4::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_4::base* +NewPermanentTessCallback(T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_4(obj, member, p1, p2, p3, p4); } #endif -template -class _TessFunctionResultCallback_4_4 : public TessResultCallback4 { +template +class _TessFunctionResultCallback_4_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3,A4); + typedef TessResultCallback4 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, A1, A2, A3, A4); private: FunctionSignature function_; @@ -7581,15 +8950,16 @@ class _TessFunctionResultCallback_4_4 : public TessResultCallback4::type p4_; public: - inline _TessFunctionResultCallback_4_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_4(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -7598,11 +8968,13 @@ class _TessFunctionResultCallback_4_4 : public TessResultCallback4 -class _TessFunctionResultCallback_4_4 : public TessCallback4 { +template +class _TessFunctionResultCallback_4_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3,A4); + typedef TessCallback4 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, A1, A2, A3, A4); private: FunctionSignature function_; @@ -7612,14 +8984,15 @@ class _TessFunctionResultCallback_4_4 typename remove_reference::type p4_; public: - inline _TessFunctionResultCallback_4_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_4(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); } else { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4); + (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -7627,23 +9000,38 @@ class _TessFunctionResultCallback_4_4 } }; -template -inline typename _TessFunctionResultCallback_4_4::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_4(function, p1, p2, p3, p4); +template +inline typename _TessFunctionResultCallback_4_4::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_4(function, p1, p2, p3, p4); } -template -inline typename _TessFunctionResultCallback_4_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_4(function, p1, p2, p3, p4); +template +inline typename _TessFunctionResultCallback_4_4::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_4(function, p1, p2, p3, p4); } -template -class _ConstTessMemberResultCallback_5_4 : public TessResultCallback4 { +template +class _ConstTessMemberResultCallback_5_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4) const; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4) const; private: const T* object_; @@ -7655,16 +9043,23 @@ class _ConstTessMemberResultCallback_5_4 : public TessResultCallback4::type p5_; public: - inline _ConstTessMemberResultCallback_5_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_4(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7673,11 +9068,14 @@ class _ConstTessMemberResultCallback_5_4 : public TessResultCallback4 -class _ConstTessMemberResultCallback_5_4 : public TessCallback4 { +template +class _ConstTessMemberResultCallback_5_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4) const; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4) const; private: const T* object_; @@ -7689,15 +9087,22 @@ class _ConstTessMemberResultCallback_5_4::type p5_; public: - inline _ConstTessMemberResultCallback_5_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_4(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7706,29 +9111,47 @@ class _ConstTessMemberResultCallback_5_4 -inline typename _ConstTessMemberResultCallback_5_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_4(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_4::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3, A4) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_4(obj, member, p1, + p2, p3, p4, p5); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_4(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_4::base* +NewPermanentTessCallback( + const T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3, A4) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_4( + obj, member, p1, p2, p3, p4, p5); } #endif -template -class _TessMemberResultCallback_5_4 : public TessResultCallback4 { +template +class _TessMemberResultCallback_5_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4) ; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -7737,16 +9160,22 @@ class _TessMemberResultCallback_5_4 : public TessResultCallback4 typename remove_reference::type p5_; public: - inline _TessMemberResultCallback_5_4( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_4(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7755,14 +9184,17 @@ class _TessMemberResultCallback_5_4 : public TessResultCallback4 } }; -template -class _TessMemberResultCallback_5_4 : public TessCallback4 { +template +class _TessMemberResultCallback_5_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4) ; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -7771,15 +9203,21 @@ class _TessMemberResultCallback_5_4::type p5_; public: - inline _TessMemberResultCallback_5_4( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_4(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7788,26 +9226,45 @@ class _TessMemberResultCallback_5_4 -inline typename _TessMemberResultCallback_5_4::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_4(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_4::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_4(obj, member, p1, p2, p3, + p4, p5); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_4::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_4(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_4::base* +NewPermanentTessCallback(T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_4(obj, member, p1, p2, p3, + p4, p5); } #endif -template -class _TessFunctionResultCallback_5_4 : public TessResultCallback4 { +template +class _TessFunctionResultCallback_5_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4); + typedef TessResultCallback4 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4); private: FunctionSignature function_; @@ -7818,15 +9275,16 @@ class _TessFunctionResultCallback_5_4 : public TessResultCallback4::type p5_; public: - inline _TessFunctionResultCallback_5_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_4(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -7835,11 +9293,14 @@ class _TessFunctionResultCallback_5_4 : public TessResultCallback4 -class _TessFunctionResultCallback_5_4 : public TessCallback4 { +template +class _TessFunctionResultCallback_5_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4); + typedef TessCallback4 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4); private: FunctionSignature function_; @@ -7850,14 +9311,15 @@ class _TessFunctionResultCallback_5_4::type p5_; public: - inline _TessFunctionResultCallback_5_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_4(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -7865,23 +9327,41 @@ class _TessFunctionResultCallback_5_4 -inline typename _TessFunctionResultCallback_5_4::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_4(function, p1, p2, p3, p4, p5); +template +inline typename _TessFunctionResultCallback_5_4::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_4(function, p1, p2, p3, + p4, p5); } -template -inline typename _TessFunctionResultCallback_5_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_4(function, p1, p2, p3, p4, p5); +template +inline typename _TessFunctionResultCallback_5_4::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_4(function, p1, p2, p3, + p4, p5); } -template -class _ConstTessMemberResultCallback_6_4 : public TessResultCallback4 { +template +class _ConstTessMemberResultCallback_6_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) const; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4) const; private: const T* object_; @@ -7894,16 +9374,26 @@ class _ConstTessMemberResultCallback_6_4 : public TessResultCallback4::type p6_; public: - inline _ConstTessMemberResultCallback_6_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_4(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7912,11 +9402,15 @@ class _ConstTessMemberResultCallback_6_4 : public TessResultCallback4 -class _ConstTessMemberResultCallback_6_4 : public TessCallback4 { +template +class _ConstTessMemberResultCallback_6_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) const; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, + A4) const; private: const T* object_; @@ -7929,15 +9423,23 @@ class _ConstTessMemberResultCallback_6_4::type p6_; public: - inline _ConstTessMemberResultCallback_6_4(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_4(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7946,29 +9448,50 @@ class _ConstTessMemberResultCallback_6_4 -inline typename _ConstTessMemberResultCallback_6_4::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_4(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_4< + true, R, T1, P1, P2, P3, P4, P5, P6, A1, A2, A3, A4>::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_4( + obj, member, p1, p2, p3, p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_4::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_4(obj, member, p1, p2, p3, p4, p5, p6); +template +inline + typename _ConstTessMemberResultCallback_6_4::base* + NewPermanentTessCallback( + const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_4( + obj, member, p1, p2, p3, p4, p5, p6); } #endif -template -class _TessMemberResultCallback_6_4 : public TessResultCallback4 { +template +class _TessMemberResultCallback_6_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) ; + typedef TessResultCallback4 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -7978,16 +9501,25 @@ class _TessMemberResultCallback_6_4 : public TessResultCallback4 typename remove_reference::type p6_; public: - inline _TessMemberResultCallback_6_4( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_4(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -7996,14 +9528,17 @@ class _TessMemberResultCallback_6_4 : public TessResultCallback4 } }; -template -class _TessMemberResultCallback_6_4 : public TessCallback4 { +template +class _TessMemberResultCallback_6_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) ; + typedef TessCallback4 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -8013,15 +9548,22 @@ class _TessMemberResultCallback_6_4::type p6_; public: - inline _TessMemberResultCallback_6_4( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_4(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8030,26 +9572,45 @@ class _TessMemberResultCallback_6_4 -inline typename _TessMemberResultCallback_6_4::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_4(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_4::base* +NewTessCallback(T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessMemberResultCallback_6_4(obj, member, p1, p2, + p3, p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_6_4::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_4(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_4::base* +NewPermanentTessCallback( + T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _TessMemberResultCallback_6_4(obj, member, p1, p2, + p3, p4, p5, p6); } #endif -template -class _TessFunctionResultCallback_6_4 : public TessResultCallback4 { +template +class _TessFunctionResultCallback_6_4 + : public TessResultCallback4 { public: - typedef TessResultCallback4 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4); + typedef TessResultCallback4 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4); private: FunctionSignature function_; @@ -8061,15 +9622,22 @@ class _TessFunctionResultCallback_6_4 : public TessResultCallback4::type p6_; public: - inline _TessFunctionResultCallback_6_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_4(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8078,11 +9646,14 @@ class _TessFunctionResultCallback_6_4 : public TessResultCallback4 -class _TessFunctionResultCallback_6_4 : public TessCallback4 { +template +class _TessFunctionResultCallback_6_4 + : public TessCallback4 { public: - typedef TessCallback4 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4); + typedef TessCallback4 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4); private: FunctionSignature function_; @@ -8094,14 +9665,21 @@ class _TessFunctionResultCallback_6_4::type p6_; public: - inline _TessFunctionResultCallback_6_4(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_4(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8109,53 +9687,73 @@ class _TessFunctionResultCallback_6_4 -inline typename _TessFunctionResultCallback_6_4::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_4(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_4::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_4(function, p1, p2, + p3, p4, p5, p6); } -template -inline typename _TessFunctionResultCallback_6_4::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_4(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_4::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_4(function, p1, p2, + p3, p4, p5, p6); } -template +template class TessCallback5 { public: - virtual ~TessCallback5() { } - virtual void Run(A1,A2,A3,A4,A5) = 0; + virtual ~TessCallback5() {} + virtual void Run(A1, A2, A3, A4, A5) = 0; }; -template +template class TessResultCallback5 { public: - virtual ~TessResultCallback5() { } - virtual R Run(A1,A2,A3,A4,A5) = 0; + virtual ~TessResultCallback5() {} + virtual R Run(A1, A2, A3, A4, A5) = 0; }; -template -class _ConstTessMemberResultCallback_0_5 : public TessResultCallback5 { +template +class _ConstTessMemberResultCallback_0_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(A1,A2,A3,A4,A5) const; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(A1, A2, A3, A4, A5) const; private: const T* object_; MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_5(const T* object, MemberSignature member) - : object_(object), - member_(member) { } + inline _ConstTessMemberResultCallback_0_5(const T* object, + MemberSignature member) + : object_(object), member_(member) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(a1,a2,a3,a4,a5); + R result = (object_->*member_)(a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(a1,a2,a3,a4,a5); + R result = (object_->*member_)(a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8165,25 +9763,26 @@ class _ConstTessMemberResultCallback_0_5 : public TessResultCallback5 -class _ConstTessMemberResultCallback_0_5 : public TessCallback5 { +class _ConstTessMemberResultCallback_0_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(A1,A2,A3,A4,A5) const; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(A1, A2, A3, A4, A5) const; private: const T* object_; MemberSignature member_; public: - inline _ConstTessMemberResultCallback_0_5(const T* object, MemberSignature member) - : object_(object), - member_(member) { } + inline _ConstTessMemberResultCallback_0_5(const T* object, + MemberSignature member) + : object_(object), member_(member) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(a1,a2,a3,a4,a5); + (object_->*member_)(a1, a2, a3, a4, a5); } else { - (object_->*member_)(a1,a2,a3,a4,a5); + (object_->*member_)(a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8192,42 +9791,50 @@ class _ConstTessMemberResultCallback_0_5 : pub }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(A1,A2,A3,A4,A5) const) { - return new _ConstTessMemberResultCallback_0_5(obj, member); +template +inline typename _ConstTessMemberResultCallback_0_5::base* +NewTessCallback(const T1* obj, R (T2::*member)(A1, A2, A3, A4, A5) const) { + return new _ConstTessMemberResultCallback_0_5(obj, member); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_0_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(A1,A2,A3,A4,A5) const) { - return new _ConstTessMemberResultCallback_0_5(obj, member); +template +inline typename _ConstTessMemberResultCallback_0_5::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(A1, A2, A3, A4, A5) const) { + return new _ConstTessMemberResultCallback_0_5(obj, member); } #endif -template -class _TessMemberResultCallback_0_5 : public TessResultCallback5 { +template +class _TessMemberResultCallback_0_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(A1,A2,A3,A4,A5) ; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_5( T* object, MemberSignature member) - : object_(object), - member_(member) { } + inline _TessMemberResultCallback_0_5(T* object, MemberSignature member) + : object_(object), member_(member) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(a1,a2,a3,a4,a5); + R result = (object_->*member_)(a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(a1,a2,a3,a4,a5); + R result = (object_->*member_)(a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8237,25 +9844,25 @@ class _TessMemberResultCallback_0_5 : public TessResultCallback5 -class _TessMemberResultCallback_0_5 : public TessCallback5 { +class _TessMemberResultCallback_0_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(A1,A2,A3,A4,A5) ; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; public: - inline _TessMemberResultCallback_0_5( T* object, MemberSignature member) - : object_(object), - member_(member) { } + inline _TessMemberResultCallback_0_5(T* object, MemberSignature member) + : object_(object), member_(member) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(a1,a2,a3,a4,a5); + (object_->*member_)(a1, a2, a3, a4, a5); } else { - (object_->*member_)(a1,a2,a3,a4,a5); + (object_->*member_)(a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8264,40 +9871,47 @@ class _TessMemberResultCallback_0_5 : public T }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_0_5::base* -NewTessCallback( T1* obj, R (T2::*member)(A1,A2,A3,A4,A5) ) { - return new _TessMemberResultCallback_0_5(obj, member); +template +inline typename _TessMemberResultCallback_0_5::base* +NewTessCallback(T1* obj, R (T2::*member)(A1, A2, A3, A4, A5)) { + return new _TessMemberResultCallback_0_5( + obj, member); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_0_5::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(A1,A2,A3,A4,A5) ) { - return new _TessMemberResultCallback_0_5(obj, member); +template +inline typename _TessMemberResultCallback_0_5::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(A1, A2, A3, A4, A5)) { + return new _TessMemberResultCallback_0_5( + obj, member); } #endif template -class _TessFunctionResultCallback_0_5 : public TessResultCallback5 { +class _TessFunctionResultCallback_0_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(A1,A2,A3,A4,A5); + typedef TessResultCallback5 base; + typedef R (*FunctionSignature)(A1, A2, A3, A4, A5); private: FunctionSignature function_; public: inline _TessFunctionResultCallback_0_5(FunctionSignature function) - : function_(function) { } + : function_(function) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (*function_)(a1,a2,a3,a4,a5); + R result = (*function_)(a1, a2, a3, a4, a5); return result; } else { - R result = (*function_)(a1,a2,a3,a4,a5); + R result = (*function_)(a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8307,23 +9921,24 @@ class _TessFunctionResultCallback_0_5 : public TessResultCallback5 -class _TessFunctionResultCallback_0_5 : public TessCallback5 { +class _TessFunctionResultCallback_0_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(A1,A2,A3,A4,A5); + typedef TessCallback5 base; + typedef void (*FunctionSignature)(A1, A2, A3, A4, A5); private: FunctionSignature function_; public: inline _TessFunctionResultCallback_0_5(FunctionSignature function) - : function_(function) { } + : function_(function) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (*function_)(a1,a2,a3,a4,a5); + (*function_)(a1, a2, a3, a4, a5); } else { - (*function_)(a1,a2,a3,a4,a5); + (*function_)(a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8332,22 +9947,28 @@ class _TessFunctionResultCallback_0_5 : public Te }; template -inline typename _TessFunctionResultCallback_0_5::base* -NewTessCallback(R (*function)(A1,A2,A3,A4,A5)) { - return new _TessFunctionResultCallback_0_5(function); +inline + typename _TessFunctionResultCallback_0_5::base* + NewTessCallback(R (*function)(A1, A2, A3, A4, A5)) { + return new _TessFunctionResultCallback_0_5( + function); } template -inline typename _TessFunctionResultCallback_0_5::base* -NewPermanentTessCallback(R (*function)(A1,A2,A3,A4,A5)) { - return new _TessFunctionResultCallback_0_5(function); +inline typename _TessFunctionResultCallback_0_5::base* +NewPermanentTessCallback(R (*function)(A1, A2, A3, A4, A5)) { + return new _TessFunctionResultCallback_0_5( + function); } -template -class _ConstTessMemberResultCallback_1_5 : public TessResultCallback5 { +template +class _ConstTessMemberResultCallback_1_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3,A4,A5) const; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -8355,16 +9976,16 @@ class _ConstTessMemberResultCallback_1_5 : public TessResultCallback5::type p1_; public: - inline _ConstTessMemberResultCallback_1_5(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_5(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8373,11 +9994,13 @@ class _ConstTessMemberResultCallback_1_5 : public TessResultCallback5 -class _ConstTessMemberResultCallback_1_5 : public TessCallback5 { +template +class _ConstTessMemberResultCallback_1_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3,A4,A5) const; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -8385,15 +10008,15 @@ class _ConstTessMemberResultCallback_1_5 : typename remove_reference::type p1_; public: - inline _ConstTessMemberResultCallback_1_5(const T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _ConstTessMemberResultCallback_1_5(const T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8402,43 +10025,53 @@ class _ConstTessMemberResultCallback_1_5 : }; #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2,A3,A4,A5) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_5(obj, member, p1); +template +inline typename _ConstTessMemberResultCallback_1_5::base* +NewTessCallback(const T1* obj, R (T2::*member)(P1, A1, A2, A3, A4, A5) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_5(obj, member, p1); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_1_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,A1,A2,A3,A4,A5) const, typename Identity::type p1) { - return new _ConstTessMemberResultCallback_1_5(obj, member, p1); +template +inline typename _ConstTessMemberResultCallback_1_5::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, A1, A2, A3, A4, A5) const, + typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_5(obj, member, p1); } #endif -template -class _TessMemberResultCallback_1_5 : public TessResultCallback5 { +template +class _TessMemberResultCallback_1_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,A1,A2,A3,A4,A5) ; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_5( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_5(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8447,27 +10080,28 @@ class _TessMemberResultCallback_1_5 : public TessResultCallback5 -class _TessMemberResultCallback_1_5 : public TessCallback5 { +template +class _TessMemberResultCallback_1_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,A1,A2,A3,A4,A5) ; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; public: - inline _TessMemberResultCallback_1_5( T* object, MemberSignature member, P1 p1) - : object_(object), - member_(member), p1_(p1) { } + inline _TessMemberResultCallback_1_5(T* object, MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8476,26 +10110,36 @@ class _TessMemberResultCallback_1_5 : publ }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_1_5::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,A1,A2,A3,A4,A5) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_5(obj, member, p1); +template +inline typename _TessMemberResultCallback_1_5::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, A1, A2, A3, A4, A5), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_5( + obj, member, p1); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_1_5::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,A1,A2,A3,A4,A5) , typename Identity::type p1) { - return new _TessMemberResultCallback_1_5(obj, member, p1); +template +inline typename _TessMemberResultCallback_1_5::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, A1, A2, A3, A4, A5), + typename Identity::type p1) { + return new _TessMemberResultCallback_1_5(obj, member, p1); } #endif -template -class _TessFunctionResultCallback_1_5 : public TessResultCallback5 { +template +class _TessFunctionResultCallback_1_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,A1,A2,A3,A4,A5); + typedef TessResultCallback5 base; + typedef R (*FunctionSignature)(P1, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -8503,14 +10147,14 @@ class _TessFunctionResultCallback_1_5 : public TessResultCallback5 -class _TessFunctionResultCallback_1_5 : public TessCallback5 { +class _TessFunctionResultCallback_1_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,A1,A2,A3,A4,A5); + typedef TessCallback5 base; + typedef void (*FunctionSignature)(P1, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -8531,13 +10176,13 @@ class _TessFunctionResultCallback_1_5 : publi public: inline _TessFunctionResultCallback_1_5(FunctionSignature function, P1 p1) - : function_(function), p1_(p1) { } + : function_(function), p1_(p1) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (*function_)(p1_,a1,a2,a3,a4,a5); + (*function_)(p1_, a1, a2, a3, a4, a5); } else { - (*function_)(p1_,a1,a2,a3,a4,a5); + (*function_)(p1_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8546,22 +10191,30 @@ class _TessFunctionResultCallback_1_5 : publi }; template -inline typename _TessFunctionResultCallback_1_5::base* -NewTessCallback(R (*function)(P1,A1,A2,A3,A4,A5), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_5(function, p1); +inline typename _TessFunctionResultCallback_1_5::base* +NewTessCallback(R (*function)(P1, A1, A2, A3, A4, A5), + typename Identity::type p1) { + return new _TessFunctionResultCallback_1_5( + function, p1); } template -inline typename _TessFunctionResultCallback_1_5::base* -NewPermanentTessCallback(R (*function)(P1,A1,A2,A3,A4,A5), typename Identity::type p1) { - return new _TessFunctionResultCallback_1_5(function, p1); +inline typename _TessFunctionResultCallback_1_5::base* +NewPermanentTessCallback(R (*function)(P1, A1, A2, A3, A4, A5), + typename Identity::type p1) { + return new _TessFunctionResultCallback_1_5( + function, p1); } -template -class _ConstTessMemberResultCallback_2_5 : public TessResultCallback5 { +template +class _ConstTessMemberResultCallback_2_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3,A4,A5) const; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -8570,16 +10223,17 @@ class _ConstTessMemberResultCallback_2_5 : public TessResultCallback5::type p2_; public: - inline _ConstTessMemberResultCallback_2_5(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_5(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8588,11 +10242,14 @@ class _ConstTessMemberResultCallback_2_5 : public TessResultCallback5 -class _ConstTessMemberResultCallback_2_5 : public TessCallback5 { +template +class _ConstTessMemberResultCallback_2_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3,A4,A5) const; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -8601,15 +10258,16 @@ class _ConstTessMemberResultCallback_2_5::type p2_; public: - inline _ConstTessMemberResultCallback_2_5(const T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _ConstTessMemberResultCallback_2_5(const T* object, + MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8618,44 +10276,59 @@ class _ConstTessMemberResultCallback_2_5 -inline typename _ConstTessMemberResultCallback_2_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_5(obj, member, p1, p2); +template +inline typename _ConstTessMemberResultCallback_2_5::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, A1, A2, A3, A4, A5) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_5(obj, member, p1, p2); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_2_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2) { - return new _ConstTessMemberResultCallback_2_5(obj, member, p1, p2); +template +inline typename _ConstTessMemberResultCallback_2_5::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, A1, A2, A3, A4, A5) const, + typename Identity::type p1, + typename Identity::type p2) { + return new _ConstTessMemberResultCallback_2_5(obj, member, p1, + p2); } #endif -template -class _TessMemberResultCallback_2_5 : public TessResultCallback5 { +template +class _TessMemberResultCallback_2_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,A1,A2,A3,A4,A5) ; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_5( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_5(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8664,28 +10337,30 @@ class _TessMemberResultCallback_2_5 : public TessResultCallback5 -class _TessMemberResultCallback_2_5 : public TessCallback5 { +template +class _TessMemberResultCallback_2_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,A1,A2,A3,A4,A5) ; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; public: - inline _TessMemberResultCallback_2_5( T* object, MemberSignature member, P1 p1, P2 p2) - : object_(object), - member_(member), p1_(p1), p2_(p2) { } + inline _TessMemberResultCallback_2_5(T* object, MemberSignature member, P1 p1, + P2 p2) + : object_(object), member_(member), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8694,26 +10369,38 @@ class _TessMemberResultCallback_2_5 : }; #ifndef SWIG -template -inline typename _TessMemberResultCallback_2_5::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_5(obj, member, p1, p2); +template +inline typename _TessMemberResultCallback_2_5::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_5(obj, member, p1, p2); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_2_5::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2) { - return new _TessMemberResultCallback_2_5(obj, member, p1, p2); +template +inline typename _TessMemberResultCallback_2_5::base* +NewPermanentTessCallback(T1* obj, R (T2::*member)(P1, P2, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessMemberResultCallback_2_5(obj, member, p1, p2); } #endif -template -class _TessFunctionResultCallback_2_5 : public TessResultCallback5 { +template +class _TessFunctionResultCallback_2_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,A1,A2,A3,A4,A5); + typedef TessResultCallback5 base; + typedef R (*FunctionSignature)(P1, P2, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -8721,15 +10408,16 @@ class _TessFunctionResultCallback_2_5 : public TessResultCallback5::type p2_; public: - inline _TessFunctionResultCallback_2_5(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_5(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (*function_)(p1_,p2_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, a1, a2, a3, a4, a5); return result; } else { - R result = (*function_)(p1_,p2_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8738,11 +10426,13 @@ class _TessFunctionResultCallback_2_5 : public TessResultCallback5 -class _TessFunctionResultCallback_2_5 : public TessCallback5 { +template +class _TessFunctionResultCallback_2_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,A1,A2,A3,A4,A5); + typedef TessCallback5 base; + typedef void (*FunctionSignature)(P1, P2, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -8750,14 +10440,15 @@ class _TessFunctionResultCallback_2_5 : p typename remove_reference::type p2_; public: - inline _TessFunctionResultCallback_2_5(FunctionSignature function, P1 p1, P2 p2) - : function_(function), p1_(p1), p2_(p2) { } + inline _TessFunctionResultCallback_2_5(FunctionSignature function, P1 p1, + P2 p2) + : function_(function), p1_(p1), p2_(p2) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (*function_)(p1_,p2_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, a1, a2, a3, a4, a5); } else { - (*function_)(p1_,p2_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8765,23 +10456,35 @@ class _TessFunctionResultCallback_2_5 : p } }; -template -inline typename _TessFunctionResultCallback_2_5::base* -NewTessCallback(R (*function)(P1,P2,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_5(function, p1, p2); +template +inline typename _TessFunctionResultCallback_2_5::base* +NewTessCallback(R (*function)(P1, P2, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_5(function, p1, p2); } -template -inline typename _TessFunctionResultCallback_2_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2) { - return new _TessFunctionResultCallback_2_5(function, p1, p2); +template +inline typename _TessFunctionResultCallback_2_5::base* +NewPermanentTessCallback(R (*function)(P1, P2, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2) { + return new _TessFunctionResultCallback_2_5(function, p1, p2); } -template -class _ConstTessMemberResultCallback_3_5 : public TessResultCallback5 { +template +class _ConstTessMemberResultCallback_3_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4,A5) const; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -8791,16 +10494,17 @@ class _ConstTessMemberResultCallback_3_5 : public TessResultCallback5::type p3_; public: - inline _ConstTessMemberResultCallback_3_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_5(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8809,11 +10513,14 @@ class _ConstTessMemberResultCallback_3_5 : public TessResultCallback5 -class _ConstTessMemberResultCallback_3_5 : public TessCallback5 { +template +class _ConstTessMemberResultCallback_3_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4,A5) const; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, P3, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -8823,15 +10530,16 @@ class _ConstTessMemberResultCallback_3_5::type p3_; public: - inline _ConstTessMemberResultCallback_3_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _ConstTessMemberResultCallback_3_5(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8840,45 +10548,62 @@ class _ConstTessMemberResultCallback_3_5 -inline typename _ConstTessMemberResultCallback_3_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_5(obj, member, p1, p2, p3); +template +inline typename _ConstTessMemberResultCallback_3_5::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, A1, A2, A3, A4, A5) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_5(obj, member, p1, p2, + p3); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_3_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _ConstTessMemberResultCallback_3_5(obj, member, p1, p2, p3); +template +inline typename _ConstTessMemberResultCallback_3_5::base* +NewPermanentTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, A1, A2, A3, A4, A5) const, + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _ConstTessMemberResultCallback_3_5(obj, member, p1, + p2, p3); } #endif -template -class _TessMemberResultCallback_3_5 : public TessResultCallback5 { +template +class _TessMemberResultCallback_3_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4,A5) ; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, P3, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_5( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_5(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8887,29 +10612,32 @@ class _TessMemberResultCallback_3_5 : public TessResultCallback5 -class _TessMemberResultCallback_3_5 : public TessCallback5 { +template +class _TessMemberResultCallback_3_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,A1,A2,A3,A4,A5) ; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, P3, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; typename remove_reference::type p3_; public: - inline _TessMemberResultCallback_3_5( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessMemberResultCallback_3_5(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -8918,26 +10646,40 @@ class _TessMemberResultCallback_3_5 -inline typename _TessMemberResultCallback_3_5::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_5(obj, member, p1, p2, p3); +template +inline typename _TessMemberResultCallback_3_5::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_5(obj, member, p1, p2, p3); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_3_5::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessMemberResultCallback_3_5(obj, member, p1, p2, p3); +template +inline typename _TessMemberResultCallback_3_5::base* +NewPermanentTessCallback(T1* obj, + R (T2::*member)(P1, P2, P3, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessMemberResultCallback_3_5(obj, member, p1, p2, p3); } #endif -template -class _TessFunctionResultCallback_3_5 : public TessResultCallback5 { +template +class _TessFunctionResultCallback_3_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,P3,A1,A2,A3,A4,A5); + typedef TessResultCallback5 base; + typedef R (*FunctionSignature)(P1, P2, P3, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -8946,15 +10688,16 @@ class _TessFunctionResultCallback_3_5 : public TessResultCallback5::type p3_; public: - inline _TessFunctionResultCallback_3_5(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_5(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); return result; } else { - R result = (*function_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8963,11 +10706,13 @@ class _TessFunctionResultCallback_3_5 : public TessResultCallback5 -class _TessFunctionResultCallback_3_5 : public TessCallback5 { +template +class _TessFunctionResultCallback_3_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,P3,A1,A2,A3,A4,A5); + typedef TessCallback5 base; + typedef void (*FunctionSignature)(P1, P2, P3, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -8976,14 +10721,15 @@ class _TessFunctionResultCallback_3_5 typename remove_reference::type p3_; public: - inline _TessFunctionResultCallback_3_5(FunctionSignature function, P1 p1, P2 p2, P3 p3) - : function_(function), p1_(p1), p2_(p2), p3_(p3) { } + inline _TessFunctionResultCallback_3_5(FunctionSignature function, P1 p1, + P2 p2, P3 p3) + : function_(function), p1_(p1), p2_(p2), p3_(p3) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (*function_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); } else { - (*function_)(p1_,p2_,p3_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, p3_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -8991,23 +10737,36 @@ class _TessFunctionResultCallback_3_5 } }; -template -inline typename _TessFunctionResultCallback_3_5::base* -NewTessCallback(R (*function)(P1,P2,P3,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_5(function, p1, p2, p3); +template +inline typename _TessFunctionResultCallback_3_5::base* +NewTessCallback(R (*function)(P1, P2, P3, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_5(function, p1, p2, p3); } -template -inline typename _TessFunctionResultCallback_3_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3) { - return new _TessFunctionResultCallback_3_5(function, p1, p2, p3); +template +inline typename _TessFunctionResultCallback_3_5::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3) { + return new _TessFunctionResultCallback_3_5(function, p1, p2, p3); } -template -class _ConstTessMemberResultCallback_4_5 : public TessResultCallback5 { +template +class _ConstTessMemberResultCallback_4_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5) const; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -9018,16 +10777,17 @@ class _ConstTessMemberResultCallback_4_5 : public TessResultCallback5::type p4_; public: - inline _ConstTessMemberResultCallback_4_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_5(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9036,11 +10796,14 @@ class _ConstTessMemberResultCallback_4_5 : public TessResultCallback5 -class _ConstTessMemberResultCallback_4_5 : public TessCallback5 { +template +class _ConstTessMemberResultCallback_4_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5) const; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -9051,15 +10814,16 @@ class _ConstTessMemberResultCallback_4_5::type p4_; public: - inline _ConstTessMemberResultCallback_4_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _ConstTessMemberResultCallback_4_5(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9068,29 +10832,46 @@ class _ConstTessMemberResultCallback_4_5 -inline typename _ConstTessMemberResultCallback_4_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_5(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_5::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1, A2, A3, A4, A5) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_5(obj, member, p1, + p2, p3, p4); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_4_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _ConstTessMemberResultCallback_4_5(obj, member, p1, p2, p3, p4); +template +inline typename _ConstTessMemberResultCallback_4_5::base* +NewPermanentTessCallback( + const T1* obj, R (T2::*member)(P1, P2, P3, P4, A1, A2, A3, A4, A5) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4) { + return new _ConstTessMemberResultCallback_4_5( + obj, member, p1, p2, p3, p4); } #endif -template -class _TessMemberResultCallback_4_5 : public TessResultCallback5 { +template +class _TessMemberResultCallback_4_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5) ; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -9098,16 +10879,16 @@ class _TessMemberResultCallback_4_5 : public TessResultCallback5::type p4_; public: - inline _TessMemberResultCallback_4_5( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_5(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + R result = (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9116,14 +10897,17 @@ class _TessMemberResultCallback_4_5 : public TessResultCallback5 -class _TessMemberResultCallback_4_5 : public TessCallback5 { +template +class _TessMemberResultCallback_4_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5) ; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -9131,15 +10915,15 @@ class _TessMemberResultCallback_4_5::type p4_; public: - inline _TessMemberResultCallback_4_5( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessMemberResultCallback_4_5(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4) + : object_(object), member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9148,26 +10932,44 @@ class _TessMemberResultCallback_4_5 -inline typename _TessMemberResultCallback_4_5::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_5(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_5::base* +NewTessCallback(T1* obj, R (T2::*member)(P1, P2, P3, P4, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_5(obj, member, p1, p2, p3, + p4); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_4_5::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessMemberResultCallback_4_5(obj, member, p1, p2, p3, p4); +template +inline typename _TessMemberResultCallback_4_5::base* +NewPermanentTessCallback(T1* obj, + R (T2::*member)(P1, P2, P3, P4, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessMemberResultCallback_4_5(obj, member, p1, p2, p3, + p4); } #endif -template -class _TessFunctionResultCallback_4_5 : public TessResultCallback5 { +template +class _TessFunctionResultCallback_4_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5); + typedef TessResultCallback5 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -9177,15 +10979,16 @@ class _TessFunctionResultCallback_4_5 : public TessResultCallback5::type p4_; public: - inline _TessFunctionResultCallback_4_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_5(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -9194,11 +10997,14 @@ class _TessFunctionResultCallback_4_5 : public TessResultCallback5 -class _TessFunctionResultCallback_4_5 : public TessCallback5 { +template +class _TessFunctionResultCallback_4_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,A1,A2,A3,A4,A5); + typedef TessCallback5 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -9208,14 +11014,15 @@ class _TessFunctionResultCallback_4_5::type p4_; public: - inline _TessFunctionResultCallback_4_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) { } + inline _TessFunctionResultCallback_4_5(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); } else { - (*function_)(p1_,p2_,p3_,p4_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, p3_, p4_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -9223,23 +11030,40 @@ class _TessFunctionResultCallback_4_5 -inline typename _TessFunctionResultCallback_4_5::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_5(function, p1, p2, p3, p4); +template +inline typename _TessFunctionResultCallback_4_5::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_5(function, p1, p2, p3, + p4); } -template -inline typename _TessFunctionResultCallback_4_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4) { - return new _TessFunctionResultCallback_4_5(function, p1, p2, p3, p4); +template +inline typename _TessFunctionResultCallback_4_5::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4) { + return new _TessFunctionResultCallback_4_5(function, p1, p2, p3, + p4); } -template -class _ConstTessMemberResultCallback_5_5 : public TessResultCallback5 { +template +class _ConstTessMemberResultCallback_5_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) const; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5) const; private: const T* object_; @@ -9251,17 +11075,25 @@ class _ConstTessMemberResultCallback_5_5 : public TessResultCallback5::type p5_; public: - inline _ConstTessMemberResultCallback_5_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_5(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9270,11 +11102,15 @@ class _ConstTessMemberResultCallback_5_5 : public TessResultCallback5 -class _ConstTessMemberResultCallback_5_5 : public TessCallback5 { +template +class _ConstTessMemberResultCallback_5_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) const; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4, + A5) const; private: const T* object_; @@ -9286,15 +11122,22 @@ class _ConstTessMemberResultCallback_5_5::type p5_; public: - inline _ConstTessMemberResultCallback_5_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _ConstTessMemberResultCallback_5_5(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9303,29 +11146,49 @@ class _ConstTessMemberResultCallback_5_5 -inline typename _ConstTessMemberResultCallback_5_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_5(obj, member, p1, p2, p3, p4, p5); +template +inline typename _ConstTessMemberResultCallback_5_5< + true, R, T1, P1, P2, P3, P4, P5, A1, A2, A3, A4, A5>::base* +NewTessCallback(const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_5( + obj, member, p1, p2, p3, p4, p5); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_5_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _ConstTessMemberResultCallback_5_5(obj, member, p1, p2, p3, p4, p5); +template +inline + typename _ConstTessMemberResultCallback_5_5::base* + NewPermanentTessCallback( + const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _ConstTessMemberResultCallback_5_5( + obj, member, p1, p2, p3, p4, p5); } #endif -template -class _TessMemberResultCallback_5_5 : public TessResultCallback5 { +template +class _TessMemberResultCallback_5_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) ; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -9334,16 +11197,24 @@ class _TessMemberResultCallback_5_5 : public TessResultCallback5::type p5_; public: - inline _TessMemberResultCallback_5_5( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_5(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9352,14 +11223,17 @@ class _TessMemberResultCallback_5_5 : public TessResultCallback5 -class _TessMemberResultCallback_5_5 : public TessCallback5 { +template +class _TessMemberResultCallback_5_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) ; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -9368,15 +11242,21 @@ class _TessMemberResultCallback_5_5::type p5_; public: - inline _TessMemberResultCallback_5_5( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessMemberResultCallback_5_5(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9385,26 +11265,44 @@ class _TessMemberResultCallback_5_5 -inline typename _TessMemberResultCallback_5_5::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_5(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_5::base* +NewTessCallback(T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_5(obj, member, p1, p2, + p3, p4, p5); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_5_5::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessMemberResultCallback_5_5(obj, member, p1, p2, p3, p4, p5); +template +inline typename _TessMemberResultCallback_5_5::base* +NewPermanentTessCallback( + T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessMemberResultCallback_5_5(obj, member, p1, p2, + p3, p4, p5); } #endif -template -class _TessFunctionResultCallback_5_5 : public TessResultCallback5 { +template +class _TessFunctionResultCallback_5_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5); + typedef TessResultCallback5 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -9415,15 +11313,16 @@ class _TessFunctionResultCallback_5_5 : public TessResultCallback5::type p5_; public: - inline _TessFunctionResultCallback_5_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_5(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -9432,11 +11331,14 @@ class _TessFunctionResultCallback_5_5 : public TessResultCallback5 -class _TessFunctionResultCallback_5_5 : public TessCallback5 { +template +class _TessFunctionResultCallback_5_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5); + typedef TessCallback5 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -9447,14 +11349,15 @@ class _TessFunctionResultCallback_5_5::type p5_; public: - inline _TessFunctionResultCallback_5_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) { } + inline _TessFunctionResultCallback_5_5(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5) + : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, p3_, p4_, p5_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -9462,23 +11365,42 @@ class _TessFunctionResultCallback_5_5 -inline typename _TessFunctionResultCallback_5_5::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_5(function, p1, p2, p3, p4, p5); +template +inline typename _TessFunctionResultCallback_5_5::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_5(function, p1, p2, + p3, p4, p5); } -template -inline typename _TessFunctionResultCallback_5_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5) { - return new _TessFunctionResultCallback_5_5(function, p1, p2, p3, p4, p5); +template +inline typename _TessFunctionResultCallback_5_5::base* +NewPermanentTessCallback(R (*function)(P1, P2, P3, P4, P5, A1, A2, A3, A4, A5), + typename Identity::type p1, + typename Identity::type p2, + typename Identity::type p3, + typename Identity::type p4, + typename Identity::type p5) { + return new _TessFunctionResultCallback_5_5(function, p1, p2, + p3, p4, p5); } -template -class _ConstTessMemberResultCallback_6_5 : public TessResultCallback5 { +template +class _ConstTessMemberResultCallback_6_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) const; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, + A5) const; private: const T* object_; @@ -9491,16 +11413,26 @@ class _ConstTessMemberResultCallback_6_5 : public TessResultCallback5::type p6_; public: - inline _ConstTessMemberResultCallback_6_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_5(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9509,11 +11441,15 @@ class _ConstTessMemberResultCallback_6_5 : public TessResultCallback5 -class _ConstTessMemberResultCallback_6_5 : public TessCallback5 { +template +class _ConstTessMemberResultCallback_6_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) const; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, + A5) const; private: const T* object_; @@ -9526,15 +11462,23 @@ class _ConstTessMemberResultCallback_6_5::type p6_; public: - inline _ConstTessMemberResultCallback_6_5(const T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _ConstTessMemberResultCallback_6_5(const T* object, + MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9543,29 +11487,50 @@ class _ConstTessMemberResultCallback_6_5 -inline typename _ConstTessMemberResultCallback_6_5::base* -NewTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_5(obj, member, p1, p2, p3, p4, p5, p6); +template +inline + typename _ConstTessMemberResultCallback_6_5::base* + NewTessCallback( + const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_5( + obj, member, p1, p2, p3, p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _ConstTessMemberResultCallback_6_5::base* -NewPermanentTessCallback(const T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) const, typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _ConstTessMemberResultCallback_6_5(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _ConstTessMemberResultCallback_6_5< + false, R, T1, P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5>::base* +NewPermanentTessCallback( + const T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5) const, + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _ConstTessMemberResultCallback_6_5( + obj, member, p1, p2, p3, p4, p5, p6); } #endif -template -class _TessMemberResultCallback_6_5 : public TessResultCallback5 { +template +class _TessMemberResultCallback_6_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) ; + typedef TessResultCallback5 base; + typedef R (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -9575,16 +11540,25 @@ class _TessMemberResultCallback_6_5 : public TessResultCallback5::type p6_; public: - inline _TessMemberResultCallback_6_5( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_5(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); return result; } else { - R result = (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + R result = + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9593,14 +11567,18 @@ class _TessMemberResultCallback_6_5 : public TessResultCallback5 -class _TessMemberResultCallback_6_5 : public TessCallback5 { +template +class _TessMemberResultCallback_6_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (T::*MemberSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) ; + typedef TessCallback5 base; + typedef void (T::*MemberSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, + A5); private: - T* object_; + T* object_; MemberSignature member_; typename remove_reference::type p1_; typename remove_reference::type p2_; @@ -9610,15 +11588,22 @@ class _TessMemberResultCallback_6_5::type p6_; public: - inline _TessMemberResultCallback_6_5( T* object, MemberSignature member, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : object_(object), - member_(member), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessMemberResultCallback_6_5(T* object, MemberSignature member, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : object_(object), + member_(member), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); } else { - (object_->*member_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + (object_->*member_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again member_ = nullptr; delete this; @@ -9627,26 +11612,45 @@ class _TessMemberResultCallback_6_5 -inline typename _TessMemberResultCallback_6_5::base* -NewTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_5(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_5::base* +NewTessCallback(T1* obj, + R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessMemberResultCallback_6_5( + obj, member, p1, p2, p3, p4, p5, p6); } #endif #ifndef SWIG -template -inline typename _TessMemberResultCallback_6_5::base* -NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5) , typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessMemberResultCallback_6_5(obj, member, p1, p2, p3, p4, p5, p6); +template +inline typename _TessMemberResultCallback_6_5::base* +NewPermanentTessCallback( + T1* obj, R (T2::*member)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _TessMemberResultCallback_6_5( + obj, member, p1, p2, p3, p4, p5, p6); } #endif -template -class _TessFunctionResultCallback_6_5 : public TessResultCallback5 { +template +class _TessFunctionResultCallback_6_5 + : public TessResultCallback5 { public: - typedef TessResultCallback5 base; - typedef R (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5); + typedef TessResultCallback5 base; + typedef R (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -9658,15 +11662,22 @@ class _TessFunctionResultCallback_6_5 : public TessResultCallback5::type p6_; public: - inline _TessFunctionResultCallback_6_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_5(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual R Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual R Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); return result; } else { - R result = (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + R result = (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -9675,11 +11686,14 @@ class _TessFunctionResultCallback_6_5 : public TessResultCallback5 -class _TessFunctionResultCallback_6_5 : public TessCallback5 { +template +class _TessFunctionResultCallback_6_5 + : public TessCallback5 { public: - typedef TessCallback5 base; - typedef void (*FunctionSignature)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5); + typedef TessCallback5 base; + typedef void (*FunctionSignature)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5); private: FunctionSignature function_; @@ -9691,14 +11705,21 @@ class _TessFunctionResultCallback_6_5::type p6_; public: - inline _TessFunctionResultCallback_6_5(FunctionSignature function, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) - : function_(function), p1_(p1), p2_(p2), p3_(p3), p4_(p4), p5_(p5), p6_(p6) { } + inline _TessFunctionResultCallback_6_5(FunctionSignature function, P1 p1, + P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) + : function_(function), + p1_(p1), + p2_(p2), + p3_(p3), + p4_(p4), + p5_(p5), + p6_(p6) {} - virtual void Run(A1 a1,A2 a2,A3 a3,A4 a4,A5 a5) { + virtual void Run(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { if (!del) { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); } else { - (*function_)(p1_,p2_,p3_,p4_,p5_,p6_,a1,a2,a3,a4,a5); + (*function_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2, a3, a4, a5); // zero out the pointer to ensure segfault if used again function_ = nullptr; delete this; @@ -9706,16 +11727,32 @@ class _TessFunctionResultCallback_6_5 -inline typename _TessFunctionResultCallback_6_5::base* -NewTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_5(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_5::base* +NewTessCallback(R (*function)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, + typename Identity::type p6) { + return new _TessFunctionResultCallback_6_5( + function, p1, p2, p3, p4, p5, p6); } -template -inline typename _TessFunctionResultCallback_6_5::base* -NewPermanentTessCallback(R (*function)(P1,P2,P3,P4,P5,P6,A1,A2,A3,A4,A5), typename Identity::type p1, typename Identity::type p2, typename Identity::type p3, typename Identity::type p4, typename Identity::type p5, typename Identity::type p6) { - return new _TessFunctionResultCallback_6_5(function, p1, p2, p3, p4, p5, p6); +template +inline typename _TessFunctionResultCallback_6_5::base* +NewPermanentTessCallback( + R (*function)(P1, P2, P3, P4, P5, P6, A1, A2, A3, A4, A5), + typename Identity::type p1, typename Identity::type p2, + typename Identity::type p3, typename Identity::type p4, + typename Identity::type p5, typename Identity::type p6) { + return new _TessFunctionResultCallback_6_5( + function, p1, p2, p3, p4, p5, p6); } #endif // TESS_CALLBACK_SPECIALIZATIONS_H_ diff --git a/src/ccutil/tessdatamanager.cpp b/src/ccutil/tessdatamanager.cpp index 28c3b590c4..dbfe31fd03 100644 --- a/src/ccutil/tessdatamanager.cpp +++ b/src/ccutil/tessdatamanager.cpp @@ -26,32 +26,31 @@ #include #include "helpers.h" +#include "params.h" #include "serialis.h" #include "strngs.h" #include "tprintf.h" -#include "params.h" namespace tesseract { -TessdataManager::TessdataManager() : reader_(nullptr), is_loaded_(false), swap_(false) { +TessdataManager::TessdataManager() + : reader_(nullptr), is_loaded_(false), swap_(false) { SetVersionString(PACKAGE_VERSION); } TessdataManager::TessdataManager(FileReader reader) - : reader_(reader), - is_loaded_(false), - swap_(false) { + : reader_(reader), is_loaded_(false), swap_(false) { SetVersionString(PACKAGE_VERSION); } // Lazily loads from the the given filename. Won't actually read the file // until it needs it. -void TessdataManager::LoadFileLater(const char *data_file_name) { +void TessdataManager::LoadFileLater(const char* data_file_name) { Clear(); data_file_name_ = data_file_name; } -bool TessdataManager::Init(const char *data_file_name) { +bool TessdataManager::Init(const char* data_file_name) { GenericVector data; if (reader_ == nullptr) { if (!LoadDataFromFile(data_file_name, &data)) return false; @@ -62,7 +61,7 @@ bool TessdataManager::Init(const char *data_file_name) { } // Loads from the given memory buffer as if a file. -bool TessdataManager::LoadMemBuffer(const char *name, const char *data, +bool TessdataManager::LoadMemBuffer(const char* name, const char* data, int size) { Clear(); data_file_name_ = name; @@ -97,7 +96,7 @@ bool TessdataManager::LoadMemBuffer(const char *name, const char *data, } // Overwrites a single entry of the given type. -void TessdataManager::OverwriteEntry(TessdataType type, const char *data, +void TessdataManager::OverwriteEntry(TessdataType type, const char* data, int size) { is_loaded_ = true; entries_[type].resize_no_init(size); @@ -105,7 +104,7 @@ void TessdataManager::OverwriteEntry(TessdataType type, const char *data, } // Saves to the given filename. -bool TessdataManager::SaveFile(const STRING &filename, +bool TessdataManager::SaveFile(const STRING& filename, FileWriter writer) const { ASSERT_HOST(is_loaded_); GenericVector data; @@ -117,7 +116,7 @@ bool TessdataManager::SaveFile(const STRING &filename, } // Serializes to the given vector. -void TessdataManager::Serialize(GenericVector *data) const { +void TessdataManager::Serialize(GenericVector* data) const { ASSERT_HOST(is_loaded_); // Compute the offset_table and total size. int64_t offset_table[TESSDATA_NUM_ENTRIES]; @@ -166,15 +165,15 @@ void TessdataManager::Directory() const { // Opens the given TFile pointer to the given component type. // Returns false in case of failure. -bool TessdataManager::GetComponent(TessdataType type, TFile *fp) { +bool TessdataManager::GetComponent(TessdataType type, TFile* fp) { if (!is_loaded_ && !Init(data_file_name_.string())) return false; - const TessdataManager *const_this = this; + const TessdataManager* const_this = this; return const_this->GetComponent(type, fp); } // As non-const version except it can't load the component if not already // loaded. -bool TessdataManager::GetComponent(TessdataType type, TFile *fp) const { +bool TessdataManager::GetComponent(TessdataType type, TFile* fp) const { ASSERT_HOST(is_loaded_); if (entries_[type].empty()) return false; fp->Open(&entries_[type][0], entries_[type].size()); @@ -189,21 +188,20 @@ std::string TessdataManager::VersionString() const { } // Sets the version string to the given v_str. -void TessdataManager::SetVersionString(const std::string &v_str) { +void TessdataManager::SetVersionString(const std::string& v_str) { entries_[TESSDATA_VERSION].resize_no_init(v_str.size()); memcpy(&entries_[TESSDATA_VERSION][0], v_str.data(), v_str.size()); } -bool TessdataManager::CombineDataFiles( - const char *language_data_path_prefix, - const char *output_filename) { +bool TessdataManager::CombineDataFiles(const char* language_data_path_prefix, + const char* output_filename) { // Load individual tessdata components from files. for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { TessdataType type; ASSERT_HOST(TessdataTypeFromFileSuffix(kTessdataFileSuffixes[i], &type)); STRING filename = language_data_path_prefix; filename += kTessdataFileSuffixes[i]; - FILE *fp = fopen(filename.string(), "rb"); + FILE* fp = fopen(filename.string(), "rb"); if (fp != nullptr) { fclose(fp); if (!LoadDataFromFile(filename, &entries_[type])) { @@ -225,10 +223,9 @@ bool TessdataManager::CombineDataFiles( return SaveFile(output_filename, nullptr); } -bool TessdataManager::OverwriteComponents( - const char *new_traineddata_filename, - char **component_filenames, - int num_new_components) { +bool TessdataManager::OverwriteComponents(const char* new_traineddata_filename, + char** component_filenames, + int num_new_components) { // Open the files with the new components. for (int i = 0; i < num_new_components; ++i) { TessdataType type; @@ -244,7 +241,7 @@ bool TessdataManager::OverwriteComponents( return SaveFile(new_traineddata_filename, nullptr); } -bool TessdataManager::ExtractToFile(const char *filename) { +bool TessdataManager::ExtractToFile(const char* filename) { TessdataType type = TESSDATA_NUM_ENTRIES; ASSERT_HOST( tesseract::TessdataManager::TessdataTypeFromFileName(filename, &type)); @@ -252,23 +249,25 @@ bool TessdataManager::ExtractToFile(const char *filename) { return SaveDataToFile(entries_[type], filename); } -bool TessdataManager::TessdataTypeFromFileSuffix(const char *suffix, - TessdataType *type) { +bool TessdataManager::TessdataTypeFromFileSuffix(const char* suffix, + TessdataType* type) { for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) { *type = static_cast(i); return true; } } - tprintf("TessdataManager can't determine which tessdata" - " component is represented by %s\n", suffix); + tprintf( + "TessdataManager can't determine which tessdata" + " component is represented by %s\n", + suffix); return false; } -bool TessdataManager::TessdataTypeFromFileName(const char *filename, - TessdataType *type) { +bool TessdataManager::TessdataTypeFromFileName(const char* filename, + TessdataType* type) { // Get the file suffix (extension) - const char *suffix = strrchr(filename, '.'); + const char* suffix = strrchr(filename, '.'); if (suffix == nullptr || *(++suffix) == '\0') return false; return TessdataTypeFromFileSuffix(suffix, type); } diff --git a/src/ccutil/tessdatamanager.h b/src/ccutil/tessdatamanager.h index f003adb42d..21e4f82bb0 100644 --- a/src/ccutil/tessdatamanager.h +++ b/src/ccutil/tessdatamanager.h @@ -86,7 +86,7 @@ enum TessdataType { * kTessdataFileSuffixes[i] indicates the file suffix for * tessdata of type i (from TessdataType enum). */ -static const char *const kTessdataFileSuffixes[] = { +static const char* const kTessdataFileSuffixes[] = { kLangConfigFileSuffix, // 0 kUnicharsetFileSuffix, // 1 kAmbigsFileSuffix, // 2 @@ -122,7 +122,6 @@ static const char *const kTessdataFileSuffixes[] = { */ static const int kMaxNumTessdataEntries = 1000; - class TessdataManager { public: TessdataManager(); @@ -135,22 +134,22 @@ class TessdataManager { // Lazily loads from the the given filename. Won't actually read the file // until it needs it. - void LoadFileLater(const char *data_file_name); + void LoadFileLater(const char* data_file_name); /** * Opens and reads the given data file right now. * @return true on success. */ - bool Init(const char *data_file_name); + bool Init(const char* data_file_name); // Loads from the given memory buffer as if a file, remembering name as some // arbitrary source id for caching. - bool LoadMemBuffer(const char *name, const char *data, int size); + bool LoadMemBuffer(const char* name, const char* data, int size); // Overwrites a single entry of the given type. - void OverwriteEntry(TessdataType type, const char *data, int size); + void OverwriteEntry(TessdataType type, const char* data, int size); // Saves to the given filename. - bool SaveFile(const STRING &filename, FileWriter writer) const; + bool SaveFile(const STRING& filename, FileWriter writer) const; // Serializes to the given vector. - void Serialize(GenericVector *data) const; + void Serialize(GenericVector* data) const; // Resets to the initial state, keeping the reader. void Clear(); @@ -163,15 +162,15 @@ class TessdataManager { } // Opens the given TFile pointer to the given component type. // Returns false in case of failure. - bool GetComponent(TessdataType type, TFile *fp); + bool GetComponent(TessdataType type, TFile* fp); // As non-const version except it can't load the component if not already // loaded. - bool GetComponent(TessdataType type, TFile *fp) const; + bool GetComponent(TessdataType type, TFile* fp) const; // Returns the current version string. std::string VersionString() const; // Sets the version string to the given v_str. - void SetVersionString(const std::string &v_str); + void SetVersionString(const std::string& v_str); // Returns true if the base Tesseract components are present. bool IsBaseAvailable() const { @@ -183,24 +182,23 @@ class TessdataManager { bool IsLSTMAvailable() const { return !entries_[TESSDATA_LSTM].empty(); } // Return the name of the underlying data file. - const STRING &GetDataFileName() const { return data_file_name_; } + const STRING& GetDataFileName() const { return data_file_name_; } /** * Reads all the standard tesseract config and data files for a language * at the given path and bundles them up into one binary data file. * Returns true if the combined traineddata file was successfully written. */ - bool CombineDataFiles(const char *language_data_path_prefix, - const char *output_filename); + bool CombineDataFiles(const char* language_data_path_prefix, + const char* output_filename); /** * Gets the individual components from the data_file_ with which the class was * initialized. Overwrites the components specified by component_filenames. * Writes the updated traineddata file to new_traineddata_filename. */ - bool OverwriteComponents(const char *new_traineddata_filename, - char **component_filenames, - int num_new_components); + bool OverwriteComponents(const char* new_traineddata_filename, + char** component_filenames, int num_new_components); /** * Extracts tessdata component implied by the name of the input file from @@ -212,7 +210,7 @@ class TessdataManager { * @return true if the component was successfully extracted, false if the * component was not present in the traineddata loaded into TessdataManager. */ - bool ExtractToFile(const char *filename); + bool ExtractToFile(const char* filename); /** * Fills type with TessdataType of the tessdata component represented by the @@ -220,15 +218,15 @@ class TessdataManager { * @return true if the tessdata component type could be determined * from the given file name. */ - static bool TessdataTypeFromFileSuffix(const char *suffix, - TessdataType *type); + static bool TessdataTypeFromFileSuffix(const char* suffix, + TessdataType* type); /** * Tries to determine tessdata component file suffix from filename, * returns true on success. */ - static bool TessdataTypeFromFileName(const char *filename, - TessdataType *type); + static bool TessdataTypeFromFileName(const char* filename, + TessdataType* type); private: // Name of file it came from. diff --git a/src/ccutil/tprintf.cpp b/src/ccutil/tprintf.cpp index da196581d3..8d34c8134a 100644 --- a/src/ccutil/tprintf.cpp +++ b/src/ccutil/tprintf.cpp @@ -22,39 +22,39 @@ #include "config_auto.h" #endif -#include -#include -#include "ccutil.h" -#include "params.h" -#include "strngs.h" -#include "tprintf.h" +#include +#include +#include "ccutil.h" +#include "params.h" +#include "strngs.h" +#include "tprintf.h" -#define MAX_MSG_LEN 65536 +#define MAX_MSG_LEN 65536 #define EXTERN // Since tprintf is protected by a mutex, these parameters can remain global. DLLSYM STRING_VAR(debug_file, "", "File to send tprintf output to"); -DLLSYM void -tprintf_internal( // Trace printf - const char *format, ... // Message +DLLSYM void tprintf_internal( // Trace printf + const char* format, + ... // Message ) { tesseract::tprintfMutex.Lock(); - va_list args; // variable args - static FILE *debugfp = nullptr; // debug file - // debug window + va_list args; // variable args + static FILE* debugfp = nullptr; // debug file + // debug window int32_t offset = 0; // into message static char msg[MAX_MSG_LEN + 1]; va_start(args, format); // variable list - // Format into msg - #ifdef _WIN32 +// Format into msg +#ifdef _WIN32 offset += _vsnprintf(msg + offset, MAX_MSG_LEN - offset, format, args); if (strcmp(debug_file.string(), "/dev/null") == 0) debug_file.set_value("nul"); - #else +#else offset += vsnprintf(msg + offset, MAX_MSG_LEN - offset, format, args); - #endif +#endif va_end(args); if (debugfp == nullptr && strlen(debug_file.string()) > 0) { diff --git a/src/ccutil/tprintf.h b/src/ccutil/tprintf.h index 27a3382369..a077de25c2 100644 --- a/src/ccutil/tprintf.h +++ b/src/ccutil/tprintf.h @@ -17,13 +17,12 @@ * **********************************************************************/ -#ifndef TESSERACT_CCUTIL_TPRINTF_H -#define TESSERACT_CCUTIL_TPRINTF_H +#ifndef TESSERACT_CCUTIL_TPRINTF_H +#define TESSERACT_CCUTIL_TPRINTF_H #include "params.h" -extern DLLSYM STRING_VAR_H(debug_file, "", - "File to send tprintf output to"); +extern DLLSYM STRING_VAR_H(debug_file, "", "File to send tprintf output to"); extern DLLSYM BOOL_VAR_H(debug_window_on, TRUE, "Send tprintf to window unless file set"); @@ -31,6 +30,7 @@ extern DLLSYM BOOL_VAR_H(debug_window_on, TRUE, #define tprintf(...) tprintf_internal(__VA_ARGS__) extern TESS_API void tprintf_internal( // Trace printf - const char *format, ...); // Message + const char* format, + ...); // Message #endif // define TESSERACT_CCUTIL_TPRINTF_H diff --git a/src/ccutil/unichar.cpp b/src/ccutil/unichar.cpp index 9ac853be31..7599746b58 100644 --- a/src/ccutil/unichar.cpp +++ b/src/ccutil/unichar.cpp @@ -34,26 +34,22 @@ UNICHAR::UNICHAR(const char* utf8_str, int len) { int total_len = 0; int step = 0; if (len < 0) { - for (len = 0; len < UNICHAR_LEN && utf8_str[len] != 0; ++len); + for (len = 0; len < UNICHAR_LEN && utf8_str[len] != 0; ++len) + ; } for (total_len = 0; total_len < len; total_len += step) { step = utf8_step(utf8_str + total_len); - if (total_len + step > UNICHAR_LEN) - break; // Too long. - if (step == 0) - break; // Illegal first byte. + if (total_len + step > UNICHAR_LEN) break; // Too long. + if (step == 0) break; // Illegal first byte. int i; for (i = 1; i < step; ++i) - if ((utf8_str[total_len + i] & 0xc0) != 0x80) - break; - if (i < step) - break; // Illegal surrogate + if ((utf8_str[total_len + i] & 0xc0) != 0x80) break; + if (i < step) break; // Illegal surrogate } memcpy(chars, utf8_str, total_len); if (total_len < UNICHAR_LEN) { chars[UNICHAR_LEN - 1] = total_len; - while (total_len < UNICHAR_LEN - 1) - chars[total_len++] = 0; + while (total_len < UNICHAR_LEN - 1) chars[total_len++] = 0; } } @@ -97,27 +93,25 @@ UNICHAR::UNICHAR(int unicode) { // Get the first character as UCS-4. int UNICHAR::first_uni() const { - static const int utf8_offsets[5] = { - 0, 0, 0x3080, 0xE2080, 0x3C82080 - }; + static const int utf8_offsets[5] = {0, 0, 0x3080, 0xE2080, 0x3C82080}; int uni = 0; int len = utf8_step(chars); const char* src = chars; switch (len) { - default: - break; - case 4: - uni += static_cast(*src++); - uni <<= 6; - case 3: - uni += static_cast(*src++); - uni <<= 6; - case 2: - uni += static_cast(*src++); - uni <<= 6; - case 1: - uni += static_cast(*src++); + default: + break; + case 4: + uni += static_cast(*src++); + uni <<= 6; + case 3: + uni += static_cast(*src++); + uni <<= 6; + case 2: + uni += static_cast(*src++); + uni <<= 6; + case 1: + uni += static_cast(*src++); } uni -= utf8_offsets[len]; return uni; @@ -135,15 +129,17 @@ char* UNICHAR::utf8_str() const { // Get the number of bytes in the first character of the given utf8 string. int UNICHAR::utf8_step(const char* utf8_str) { static const char utf8_bytes[256] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0 - }; + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}; return utf8_bytes[static_cast(*utf8_str)]; } @@ -195,9 +191,7 @@ int UNICHAR::const_iterator::utf8_len() const { return len; } -bool UNICHAR::const_iterator::is_legal() const { - return utf8_step(it_) > 0; -} +bool UNICHAR::const_iterator::is_legal() const { return utf8_step(it_) > 0; } UNICHAR::const_iterator UNICHAR::begin(const char* utf8_str, const int len) { return UNICHAR::const_iterator(utf8_str); diff --git a/src/ccutil/unichar.h b/src/ccutil/unichar.h index fe115055c0..013fb6549e 100644 --- a/src/ccutil/unichar.h +++ b/src/ccutil/unichar.h @@ -57,9 +57,7 @@ using char32 = signed int; // such as fi, ffl etc. These are also stored as utf8. class UNICHAR { public: - UNICHAR() { - memset(chars, 0, UNICHAR_LEN); - } + UNICHAR() { memset(chars, 0, UNICHAR_LEN); } // Construct from a utf8 string. If len<0 then the string is null terminated. // If the string is too long to fit in the UNICHAR then it takes only what @@ -77,13 +75,11 @@ class UNICHAR { // Get the length of the UTF8 string. int utf8_len() const { int len = chars[UNICHAR_LEN - 1]; - return len >=0 && len < UNICHAR_LEN ? len : UNICHAR_LEN; + return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN; } // Get a UTF8 string, but NOT nullptr terminated. - const char* utf8() const { - return chars; - } + const char* utf8() const { return chars; } // Get a terminated UTF8 string: Must delete[] it after use. char* utf8_str() const; @@ -106,13 +102,13 @@ class UNICHAR { // tprintf("Char = %s\n", buf); // } class const_iterator { - using CI = const_iterator ; + using CI = const_iterator; public: // Step to the next UTF8 character. // If the current position is at an illegal UTF8 character, then print an - // error message and step by one byte. If the current position is at a nullptr - // value, don't step past it. + // error message and step by one byte. If the current position is at a + // nullptr value, don't step past it. const_iterator& operator++(); // Return the UCS-4 value at the current position. diff --git a/src/ccutil/unicharcompress.cpp b/src/ccutil/unicharcompress.cpp index 64b238fcee..695c854d0d 100644 --- a/src/ccutil/unicharcompress.cpp +++ b/src/ccutil/unicharcompress.cpp @@ -318,8 +318,8 @@ bool UnicharCompress::DeSerialize(TFile* fp) { // will encode a single index to a UTF8-string, but Chinese, Japanese, Korean // and the Indic scripts will contain a many-to-many mapping. // See the class comment above for details. -STRING UnicharCompress::GetEncodingAsString( - const UNICHARSET& unicharset) const { +STRING +UnicharCompress::GetEncodingAsString(const UNICHARSET& unicharset) const { STRING encoding; for (int c = 0; c < encoder_.size(); ++c) { const RecodedCharID& code = encoder_[c]; diff --git a/src/ccutil/unicharmap.cpp b/src/ccutil/unicharmap.cpp index 236f6b72e3..71df31809f 100644 --- a/src/ccutil/unicharmap.cpp +++ b/src/ccutil/unicharmap.cpp @@ -17,24 +17,20 @@ // /////////////////////////////////////////////////////////////////////// +#include "unicharmap.h" #include -#include "unichar.h" #include "host.h" -#include "unicharmap.h" +#include "unichar.h" -UNICHARMAP::UNICHARMAP() : -nodes(nullptr) { -} +UNICHARMAP::UNICHARMAP() : nodes(nullptr) {} -UNICHARMAP::~UNICHARMAP() { - delete[] nodes; -} +UNICHARMAP::~UNICHARMAP() { delete[] nodes; } // Search the given unichar representation in the tree, using length characters // from it maximum. Each character in the string is interpreted as an index in // an array of nodes. -UNICHAR_ID UNICHARMAP::unichar_to_id(const char* const unichar_repr, - int length) const { +UNICHAR_ID +UNICHARMAP::unichar_to_id(const char* const unichar_repr, int length) const { UNICHARMAP_NODE* current_nodes = nodes; assert(*unichar_repr != '\0'); @@ -63,13 +59,13 @@ void UNICHARMAP::insert(const char* const unichar_repr, UNICHAR_ID id) { if (*current_nodes_pointer == nullptr) *current_nodes_pointer = new UNICHARMAP_NODE[256]; if (current_char[1] == '\0') { - (*current_nodes_pointer) - [static_cast(*current_char)].id = id; + (*current_nodes_pointer)[static_cast(*current_char)].id = + id; return; } current_nodes_pointer = - &((*current_nodes_pointer) - [static_cast(*current_char)].children); + &((*current_nodes_pointer)[static_cast(*current_char)] + .children); ++current_char; } while (true); } @@ -78,8 +74,7 @@ void UNICHARMAP::insert(const char* const unichar_repr, UNICHAR_ID id) { // from it maximum. Each character in the string is interpreted as an index in // an array of nodes. Stop once the tree does not have anymore nodes or once we // found the right unichar_repr. -bool UNICHARMAP::contains(const char* const unichar_repr, - int length) const { +bool UNICHARMAP::contains(const char* const unichar_repr, int length) const { if (unichar_repr == nullptr || *unichar_repr == '\0') return false; if (length <= 0 || length > UNICHAR_LEN) return false; int index = 0; @@ -119,12 +114,7 @@ void UNICHARMAP::clear() { nodes = nullptr; } -UNICHARMAP::UNICHARMAP_NODE::UNICHARMAP_NODE() : -children(nullptr), -id(-1) { -} +UNICHARMAP::UNICHARMAP_NODE::UNICHARMAP_NODE() : children(nullptr), id(-1) {} // Recursively delete the children -UNICHARMAP::UNICHARMAP_NODE::~UNICHARMAP_NODE() { - delete[] children; -} +UNICHARMAP::UNICHARMAP_NODE::~UNICHARMAP_NODE() { delete[] children; } diff --git a/src/ccutil/unicharmap.h b/src/ccutil/unicharmap.h index 45170c4f2c..13eca83faf 100644 --- a/src/ccutil/unicharmap.h +++ b/src/ccutil/unicharmap.h @@ -26,7 +26,6 @@ // UNICHAR_ID. class UNICHARMAP { public: - // Create an empty UNICHARMAP UNICHARMAP(); @@ -55,11 +54,9 @@ class UNICHARMAP { void clear(); private: - // The UNICHARMAP is represented as a tree whose nodes are of type // UNICHARMAP_NODE. struct UNICHARMAP_NODE { - UNICHARMAP_NODE(); ~UNICHARMAP_NODE(); diff --git a/src/ccutil/unicharset.cpp b/src/ccutil/unicharset.cpp index d651ad6cd0..2a9ca861c7 100644 --- a/src/ccutil/unicharset.cpp +++ b/src/ccutil/unicharset.cpp @@ -60,13 +60,12 @@ const double kMinCapHeightFraction = 0.05; /*static */ const char* UNICHARSET::kCustomLigatures[][2] = { - {"ct", "\uE003"}, // c + t -> U+E003 - {"ſh", "\uE006"}, // long-s + h -> U+E006 - {"ſi", "\uE007"}, // long-s + i -> U+E007 - {"ſl", "\uE008"}, // long-s + l -> U+E008 - {"ſſ", "\uE009"}, // long-s + long-s -> U+E009 - {nullptr, nullptr} -}; + {"ct", "\uE003"}, // c + t -> U+E003 + {"ſh", "\uE006"}, // long-s + h -> U+E006 + {"ſi", "\uE007"}, // long-s + i -> U+E007 + {"ſl", "\uE008"}, // long-s + l -> U+E008 + {"ſſ", "\uE009"}, // long-s + long-s -> U+E009 + {nullptr, nullptr}}; // List of mappings to make when ingesting strings from the outside. // The substitutions clean up text that should exist for rendering of @@ -79,16 +78,11 @@ const char* UNICHARSET::kCleanupMaps[][2] = { // List of strings for the SpecialUnicharCodes. Keep in sync with the enum. const char* UNICHARSET::kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT] = { - " ", - "Joined", - "|Broken|0|1" -}; + " ", "Joined", "|Broken|0|1"}; const char* UNICHARSET::null_script = "NULL"; -UNICHARSET::UNICHAR_PROPERTIES::UNICHAR_PROPERTIES() { - Init(); -} +UNICHARSET::UNICHAR_PROPERTIES::UNICHAR_PROPERTIES() { Init(); } // Initialize all properties to sensible default values. void UNICHARSET::UNICHAR_PROPERTIES::Init() { @@ -172,30 +166,26 @@ void UNICHARSET::UNICHAR_PROPERTIES::CopyFrom(const UNICHAR_PROPERTIES& src) { fragment = saved_fragment; } -UNICHARSET::UNICHARSET() : - unichars(nullptr), - ids(), - size_used(0), - size_reserved(0), - script_table(nullptr), - script_table_size_used(0) { +UNICHARSET::UNICHARSET() + : unichars(nullptr), + ids(), + size_used(0), + size_reserved(0), + script_table(nullptr), + script_table_size_used(0) { clear(); for (int i = 0; i < SPECIAL_UNICHAR_CODES_COUNT; ++i) { unichar_insert(kSpecialUnicharCodes[i]); - if (i == UNICHAR_JOINED) - set_isngram(i, true); + if (i == UNICHAR_JOINED) set_isngram(i, true); } } -UNICHARSET::~UNICHARSET() { - clear(); -} +UNICHARSET::~UNICHARSET() { clear(); } void UNICHARSET::reserve(int unichars_number) { if (unichars_number > size_reserved) { UNICHAR_SLOT* unichars_new = new UNICHAR_SLOT[unichars_number]; - for (int i = 0; i < size_used; ++i) - unichars_new[i] = unichars[i]; + for (int i = 0; i < size_used; ++i) unichars_new[i] = unichars[i]; for (int j = size_used; j < unichars_number; ++j) { unichars_new[j].properties.script_id = add_script(null_script); } @@ -214,8 +204,8 @@ UNICHARSET::unichar_to_id(const char* const unichar_repr) const { : INVALID_UNICHAR_ID; } -UNICHAR_ID UNICHARSET::unichar_to_id(const char* const unichar_repr, - int length) const { +UNICHAR_ID +UNICHARSET::unichar_to_id(const char* const unichar_repr, int length) const { assert(length > 0 && length <= UNICHAR_LEN); std::string cleaned(unichar_repr, length); if (!old_style_included_) cleaned = CleanupString(unichar_repr, length); @@ -240,8 +230,8 @@ int UNICHARSET::step(const char* str) const { // Return whether the given UTF-8 string is encodable with this UNICHARSET. // If not encodable, write the first byte offset which cannot be converted // into the second (return) argument. -bool UNICHARSET::encodable_string(const char *str, - int *first_bad_position) const { +bool UNICHARSET::encodable_string(const char* str, + int* first_bad_position) const { GenericVector encoding; return encode_string(str, true, &encoding, nullptr, first_bad_position); } @@ -315,7 +305,8 @@ const char* UNICHARSET::id_to_unichar_ext(UNICHAR_ID id) const { // Return a STRING that reformats the utf8 str into the str followed // by its hex unicodes. -STRING UNICHARSET::debug_utf8_str(const char* str) { +STRING +UNICHARSET::debug_utf8_str(const char* str) { STRING result = str; result += " ["; int step = 1; @@ -339,9 +330,10 @@ STRING UNICHARSET::debug_utf8_str(const char* str) { // Return a STRING containing debug information on the unichar, including // the id_to_unichar, its hex unicodes and the properties. -STRING UNICHARSET::debug_str(UNICHAR_ID id) const { +STRING +UNICHARSET::debug_str(UNICHAR_ID id) const { if (id == INVALID_UNICHAR_ID) return STRING(id_to_unichar(id)); - const CHAR_FRAGMENT *fragment = this->get_fragment(id); + const CHAR_FRAGMENT* fragment = this->get_fragment(id); if (fragment) { return fragment->to_string(); } @@ -390,7 +382,6 @@ bool UNICHARSET::get_isprivate(UNICHAR_ID unichar_id) const { return (uni >= 0xE000 && uni <= 0xF8FF); } - // Sets all ranges to empty, so they can be expanded to set the values. void UNICHARSET::set_ranges_empty() { for (int id = 0; id < size_used; ++id) { @@ -484,9 +475,9 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) { // not overlap, making their x-height calculations distinct. bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const { int overlap = std::min(unichars[id1].properties.max_top, - unichars[id2].properties.max_top) - - std::max(unichars[id1].properties.min_top, - unichars[id2].properties.min_top); + unichars[id2].properties.max_top) - + std::max(unichars[id1].properties.min_top, + unichars[id2].properties.min_top); return overlap <= 0; } @@ -508,8 +499,7 @@ void UNICHARSET::encode_string(const char* str, int str_index, int str_length, // This is the best result so far. *best_total_length = str_index; *best_encoding = *encoding; - if (best_lengths != nullptr) - *best_lengths = *lengths; + if (best_lengths != nullptr) *best_lengths = *lengths; } if (str_index == str_length) return; int encoding_index = encoding->size(); @@ -524,8 +514,7 @@ void UNICHARSET::encode_string(const char* str, int str_index, int str_length, lengths->push_back(length); encode_string(str, str_index + length, str_length, encoding, lengths, best_total_length, best_encoding, best_lengths); - if (*best_total_length == str_length) - return; // Tail recursion success! + if (*best_total_length == str_length) return; // Tail recursion success! // Failed with that length, truncate back and try again. encoding->truncate(encoding_index); lengths->truncate(encoding_index); @@ -600,16 +589,11 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str, unsigned int UNICHARSET::get_properties(UNICHAR_ID id) const { unsigned int properties = 0; - if (this->get_isalpha(id)) - properties |= ISALPHA_MASK; - if (this->get_islower(id)) - properties |= ISLOWER_MASK; - if (this->get_isupper(id)) - properties |= ISUPPER_MASK; - if (this->get_isdigit(id)) - properties |= ISDIGIT_MASK; - if (this->get_ispunctuation(id)) - properties |= ISPUNCTUATION_MASK; + if (this->get_isalpha(id)) properties |= ISALPHA_MASK; + if (this->get_islower(id)) properties |= ISLOWER_MASK; + if (this->get_isupper(id)) properties |= ISUPPER_MASK; + if (this->get_isdigit(id)) properties |= ISDIGIT_MASK; + if (this->get_ispunctuation(id)) properties |= ISPUNCTUATION_MASK; return properties; } @@ -659,7 +643,7 @@ void UNICHARSET::unichar_insert(const char* const unichar_repr, this->unichars[size_used].properties.fragment = frag; if (frag != nullptr && this->contains_unichar(frag->get_unichar())) { this->unichars[size_used].properties.script_id = - this->get_script(frag->get_unichar()); + this->get_script(frag->get_unichar()); } this->unichars[size_used].properties.enabled = true; ids.insert(unichars[size_used].representation, size_used); @@ -688,7 +672,7 @@ bool UNICHARSET::eq(UNICHAR_ID unichar_id, return strcmp(this->id_to_unichar(unichar_id), unichar_repr) == 0; } -bool UNICHARSET::save_to_string(STRING *str) const { +bool UNICHARSET::save_to_string(STRING* str) const { const int kFileBufSize = 1024; char buffer[kFileBufSize + 1]; snprintf(buffer, kFileBufSize, "%d\n", this->size()); @@ -705,18 +689,18 @@ bool UNICHARSET::save_to_string(STRING *str) const { unsigned int properties = this->get_properties(id); if (strcmp(this->id_to_unichar(id), " ") == 0) { snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties, - this->get_script_from_script_id(this->get_script(id)), - this->get_other_case(id)); + this->get_script_from_script_id(this->get_script(id)), + this->get_other_case(id)); } else { snprintf(buffer, kFileBufSize, - "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n", - this->id_to_unichar(id), properties, - min_bottom, max_bottom, min_top, max_top, width, width_sd, - bearing, bearing_sd, advance, advance_sd, - this->get_script_from_script_id(this->get_script(id)), - this->get_other_case(id), this->get_direction(id), - this->get_mirror(id), this->get_normed_unichar(id), - this->debug_str(id).string()); + "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n", + this->id_to_unichar(id), properties, min_bottom, max_bottom, + min_top, max_top, width, width_sd, bearing, bearing_sd, advance, + advance_sd, + this->get_script_from_script_id(this->get_script(id)), + this->get_other_case(id), this->get_direction(id), + this->get_mirror(id), this->get_normed_unichar(id), + this->debug_str(id).string()); } *str += buffer; } @@ -726,17 +710,17 @@ bool UNICHARSET::save_to_string(STRING *str) const { // TODO(rays) Replace with TFile everywhere. class InMemoryFilePointer { public: - InMemoryFilePointer(const char *memory, int mem_size) - : memory_(memory), fgets_ptr_(memory), mem_size_(mem_size) { } + InMemoryFilePointer(const char* memory, int mem_size) + : memory_(memory), fgets_ptr_(memory), mem_size_(mem_size) {} - char *fgets(char *orig_dst, int size) { - const char *src_end = memory_ + mem_size_; - char *dst_end = orig_dst + size - 1; + char* fgets(char* orig_dst, int size) { + const char* src_end = memory_ + mem_size_; + char* dst_end = orig_dst + size - 1; if (size < 1) { return fgets_ptr_ < src_end ? orig_dst : nullptr; } - char *dst = orig_dst; + char* dst = orig_dst; char ch = '^'; while (fgets_ptr_ < src_end && dst < dst_end && ch != '\n') { ch = *dst++ = *fgets_ptr_++; @@ -746,15 +730,15 @@ class InMemoryFilePointer { } private: - const char *memory_; - const char *fgets_ptr_; + const char* memory_; + const char* fgets_ptr_; const int mem_size_; }; -bool UNICHARSET::load_from_inmemory_file( - const char *memory, int mem_size, bool skip_fragments) { +bool UNICHARSET::load_from_inmemory_file(const char* memory, int mem_size, + bool skip_fragments) { InMemoryFilePointer mem_fp(memory, mem_size); - TessResultCallback2 *fgets_cb = + TessResultCallback2* fgets_cb = NewPermanentTessCallback(&mem_fp, &InMemoryFilePointer::fgets); bool success = load_via_fgets(fgets_cb, skip_fragments); delete fgets_cb; @@ -763,25 +747,24 @@ bool UNICHARSET::load_from_inmemory_file( class LocalFilePointer { public: - LocalFilePointer(FILE *stream) : fp_(stream) {} - char *fgets(char *dst, int size) { - return ::fgets(dst, size, fp_); - } + LocalFilePointer(FILE* stream) : fp_(stream) {} + char* fgets(char* dst, int size) { return ::fgets(dst, size, fp_); } + private: - FILE *fp_; + FILE* fp_; }; -bool UNICHARSET::load_from_file(FILE *file, bool skip_fragments) { +bool UNICHARSET::load_from_file(FILE* file, bool skip_fragments) { LocalFilePointer lfp(file); - TessResultCallback2 *fgets_cb = + TessResultCallback2* fgets_cb = NewPermanentTessCallback(&lfp, &LocalFilePointer::fgets); bool success = load_via_fgets(fgets_cb, skip_fragments); delete fgets_cb; return success; } -bool UNICHARSET::load_from_file(tesseract::TFile *file, bool skip_fragments) { - TessResultCallback2 *fgets_cb = +bool UNICHARSET::load_from_file(tesseract::TFile* file, bool skip_fragments) { + TessResultCallback2* fgets_cb = NewPermanentTessCallback(file, &tesseract::TFile::FGets); bool success = load_via_fgets(fgets_cb, skip_fragments); delete fgets_cb; @@ -789,8 +772,7 @@ bool UNICHARSET::load_from_file(tesseract::TFile *file, bool skip_fragments) { } bool UNICHARSET::load_via_fgets( - TessResultCallback2 *fgets_cb, - bool skip_fragments) { + TessResultCallback2* fgets_cb, bool skip_fragments) { int unicharset_size; char buffer[256]; @@ -823,44 +805,39 @@ bool UNICHARSET::load_via_fgets( UNICHAR_ID mirror = id; char normed[64]; int v = -1; - if (fgets_cb->Run(buffer, sizeof (buffer)) == nullptr || - ((v = sscanf(buffer, - "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - &width, &width_sd, &bearing, &bearing_sd, - &advance, &advance_sd, script, &other_case, - &direction, &mirror, normed)) != 17 && - (v = sscanf(buffer, - "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - &width, &width_sd, &bearing, &bearing_sd, - &advance, &advance_sd, script, &other_case, - &direction, &mirror)) != 16 && - (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - script, &other_case, &direction, &mirror)) != 10 && - (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d", unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - script, &other_case)) != 8 && - (v = sscanf(buffer, "%s %x %63s %d", unichar, &properties, - script, &other_case)) != 4 && - (v = sscanf(buffer, "%s %x %63s", - unichar, &properties, script)) != 3 && - (v = sscanf(buffer, "%s %x", unichar, &properties)) != 2)) { + if (fgets_cb->Run(buffer, sizeof(buffer)) == nullptr || + ((v = sscanf( + buffer, "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s", + unichar, &properties, &min_bottom, &max_bottom, &min_top, + &max_top, &width, &width_sd, &bearing, &bearing_sd, &advance, + &advance_sd, script, &other_case, &direction, &mirror, normed)) != + 17 && + (v = sscanf( + buffer, "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d", + unichar, &properties, &min_bottom, &max_bottom, &min_top, + &max_top, &width, &width_sd, &bearing, &bearing_sd, &advance, + &advance_sd, script, &other_case, &direction, &mirror)) != 16 && + (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d", unichar, + &properties, &min_bottom, &max_bottom, &min_top, &max_top, + script, &other_case, &direction, &mirror)) != 10 && + (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d", unichar, &properties, + &min_bottom, &max_bottom, &min_top, &max_top, script, + &other_case)) != 8 && + (v = sscanf(buffer, "%s %x %63s %d", unichar, &properties, script, + &other_case)) != 4 && + (v = sscanf(buffer, "%s %x %63s", unichar, &properties, script)) != + 3 && + (v = sscanf(buffer, "%s %x", unichar, &properties)) != 2)) { return false; } // Skip fragments if needed. - CHAR_FRAGMENT *frag = nullptr; + CHAR_FRAGMENT* frag = nullptr; if (skip_fragments && (frag = CHAR_FRAGMENT::parse_from_string(unichar))) { int num_pieces = frag->get_total(); delete frag; // Skip multi-element fragments, but keep singles like UNICHAR_BROKEN in. - if (num_pieces > 1) - continue; + if (num_pieces > 1) continue; } // Insert unichar into unicharset and set its properties. if (strcmp(unichar, "NULL") == 0) @@ -884,7 +861,7 @@ bool UNICHARSET::load_via_fgets( this->set_other_case( id, (v > 3 && other_case < unicharset_size) ? other_case : id); this->set_mirror(id, (v > 8 && mirror < unicharset_size) ? mirror : id); - this->set_normed(id, (v>16) ? normed : unichar); + this->set_normed(id, (v > 16) ? normed : unichar); } post_load_setup(); return true; @@ -906,8 +883,7 @@ void UNICHARSET::post_load_setup() { int min_top = 0; int max_top = UINT8_MAX; get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top); - if (min_top > 0) - top_bottom_set_ = true; + if (min_top > 0) top_bottom_set_ = true; if (get_isalpha(id)) { if (get_islower(id) || get_isupper(id)) ++net_case_alphas; @@ -922,7 +898,8 @@ void UNICHARSET::post_load_setup() { } script_has_upper_lower_ = net_case_alphas > 0; - script_has_xheight_ = script_has_upper_lower_ || + script_has_xheight_ = + script_has_upper_lower_ || (x_height_alphas > cap_height_alphas * kMinXHeightFraction && cap_height_alphas > x_height_alphas * kMinCapHeightFraction); @@ -952,7 +929,7 @@ void UNICHARSET::post_load_setup() { if (script_counts[s] > script_counts[default_sid_] && s != common_sid_) default_sid_ = s; } - delete [] script_counts; + delete[] script_counts; } // Returns true if right_to_left scripts are significant in the unicharset, @@ -967,7 +944,8 @@ bool UNICHARSET::major_right_to_left() const { if (dir == UNICHARSET::U_LEFT_TO_RIGHT) ltr_count++; if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || - dir == UNICHARSET::U_ARABIC_NUMBER) rtl_count++; + dir == UNICHARSET::U_ARABIC_NUMBER) + rtl_count++; } return rtl_count > ltr_count; } @@ -1029,8 +1007,7 @@ bool UNICHARSET::AnyRepeatedUnicodes() const { int UNICHARSET::add_script(const char* script) { for (int i = 0; i < script_table_size_used; ++i) { - if (strcmp(script, script_table[i]) == 0) - return i; + if (strcmp(script, script_table[i]) == 0) return i; } if (script_table_size_reserved == 0) { script_table_size_reserved = 8; @@ -1051,8 +1028,9 @@ int UNICHARSET::add_script(const char* script) { // Returns the string that represents a fragment // with the given unichar, pos and total. -STRING CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, - bool natural) { +STRING +CHAR_FRAGMENT::to_string(const char* unichar, int pos, int total, + bool natural) { if (total == 1) return STRING(unichar); STRING result = ""; result += kSeparator; @@ -1064,8 +1042,8 @@ STRING CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, return result; } -CHAR_FRAGMENT *CHAR_FRAGMENT::parse_from_string(const char *string) { - const char *ptr = string; +CHAR_FRAGMENT* CHAR_FRAGMENT::parse_from_string(const char* string) { + const char* ptr = string; int len = strlen(string); if (len < kMinLen || *ptr != kSeparator) { return nullptr; // this string can not represent a fragment @@ -1081,11 +1059,11 @@ CHAR_FRAGMENT *CHAR_FRAGMENT::parse_from_string(const char *string) { char unichar[UNICHAR_LEN + 1]; strncpy(unichar, ptr, step); unichar[step] = '\0'; // null terminate unichar - ptr += step; // move to the next fragment separator + ptr += step; // move to the next fragment separator int pos = 0; int total = 0; bool natural = false; - char *end_ptr = nullptr; + char* end_ptr = nullptr; for (int i = 0; i < 2; i++) { if (ptr > string + len || *ptr != kSeparator) { if (i == 1 && *ptr == kNaturalFlag) @@ -1095,21 +1073,20 @@ CHAR_FRAGMENT *CHAR_FRAGMENT::parse_from_string(const char *string) { } ptr++; // move to the next character i == 0 ? pos = static_cast(strtol(ptr, &end_ptr, 10)) - : total = static_cast(strtol(ptr, &end_ptr, 10)); + : total = static_cast(strtol(ptr, &end_ptr, 10)); ptr = end_ptr; } if (ptr != string + len) { return nullptr; // malformed fragment representation } - CHAR_FRAGMENT *fragment = new CHAR_FRAGMENT(); + CHAR_FRAGMENT* fragment = new CHAR_FRAGMENT(); fragment->set_all(unichar, pos, total, natural); return fragment; } int UNICHARSET::get_script_id_from_name(const char* script_name) const { for (int i = 0; i < script_table_size_used; ++i) { - if (strcmp(script_name, script_table[i]) == 0) - return i; + if (strcmp(script_name, script_table[i]) == 0) return i; } return 0; // 0 is always the null_script } diff --git a/src/ccutil/unicharset.h b/src/ccutil/unicharset.h index 6ff00a1700..772461eb87 100644 --- a/src/ccutil/unicharset.h +++ b/src/ccutil/unicharset.h @@ -56,13 +56,13 @@ class CHAR_FRAGMENT { static const int kMaxChunks = 5; // Setters and Getters. - inline void set_all(const char *unichar, int pos, int total, bool natural) { + inline void set_all(const char* unichar, int pos, int total, bool natural) { set_unichar(unichar); set_pos(pos); set_total(total); set_natural(natural); } - inline void set_unichar(const char *uch) { + inline void set_unichar(const char* uch) { strncpy(this->unichar, uch, UNICHAR_LEN); this->unichar[UNICHAR_LEN] = '\0'; } @@ -74,29 +74,26 @@ class CHAR_FRAGMENT { // Returns the string that represents a fragment // with the given unichar, pos and total. - static STRING to_string(const char *unichar, int pos, int total, + static STRING to_string(const char* unichar, int pos, int total, bool natural); // Returns the string that represents this fragment. - STRING to_string() const { - return to_string(unichar, pos, total, natural); - } + STRING to_string() const { return to_string(unichar, pos, total, natural); } // Checks whether a fragment has the same unichar, // position and total as the given inputs. - inline bool equals(const char *other_unichar, - int other_pos, int other_total) const { + inline bool equals(const char* other_unichar, int other_pos, + int other_total) const { return (strcmp(this->unichar, other_unichar) == 0 && this->pos == other_pos && this->total == other_total); } - inline bool equals(const CHAR_FRAGMENT *other) const { - return this->equals(other->get_unichar(), - other->get_pos(), + inline bool equals(const CHAR_FRAGMENT* other) const { + return this->equals(other->get_unichar(), other->get_pos(), other->get_total()); } // Checks whether a given fragment is a continuation of this fragment. // Assumes that the given fragment pointer is not nullptr. - inline bool is_continuation_of(const CHAR_FRAGMENT *fragment) const { + inline bool is_continuation_of(const CHAR_FRAGMENT* fragment) const { return (strcmp(this->unichar, fragment->get_unichar()) == 0 && this->total == fragment->get_total() && this->pos == fragment->get_pos() + 1); @@ -106,7 +103,7 @@ class CHAR_FRAGMENT { inline bool is_beginning() const { return this->pos == 0; } // Returns true if this fragment is an ending fragment. - inline bool is_ending() const { return this->pos == this->total-1; } + inline bool is_ending() const { return this->pos == this->total - 1; } // Returns true if the fragment was a separate component to begin with, // ie did not need chopping to be isolated, but may have been separated @@ -128,7 +125,7 @@ class CHAR_FRAGMENT { // // Note: The caller is responsible for deallocating memory // associated with the returned pointer. - static CHAR_FRAGMENT *parse_from_string(const char *str); + static CHAR_FRAGMENT* parse_from_string(const char* str); private: char unichar[UNICHAR_LEN + 1]; @@ -155,26 +152,26 @@ class UNICHARSET { // ICU 2.0 UCharDirection enum (from third_party/icu/include/unicode/uchar.h) enum Direction { - U_LEFT_TO_RIGHT = 0, - U_RIGHT_TO_LEFT = 1, - U_EUROPEAN_NUMBER = 2, - U_EUROPEAN_NUMBER_SEPARATOR = 3, - U_EUROPEAN_NUMBER_TERMINATOR = 4, - U_ARABIC_NUMBER = 5, - U_COMMON_NUMBER_SEPARATOR = 6, - U_BLOCK_SEPARATOR = 7, - U_SEGMENT_SEPARATOR = 8, - U_WHITE_SPACE_NEUTRAL = 9, - U_OTHER_NEUTRAL = 10, - U_LEFT_TO_RIGHT_EMBEDDING = 11, - U_LEFT_TO_RIGHT_OVERRIDE = 12, - U_RIGHT_TO_LEFT_ARABIC = 13, - U_RIGHT_TO_LEFT_EMBEDDING = 14, - U_RIGHT_TO_LEFT_OVERRIDE = 15, - U_POP_DIRECTIONAL_FORMAT = 16, - U_DIR_NON_SPACING_MARK = 17, - U_BOUNDARY_NEUTRAL = 18, - U_CHAR_DIRECTION_COUNT + U_LEFT_TO_RIGHT = 0, + U_RIGHT_TO_LEFT = 1, + U_EUROPEAN_NUMBER = 2, + U_EUROPEAN_NUMBER_SEPARATOR = 3, + U_EUROPEAN_NUMBER_TERMINATOR = 4, + U_ARABIC_NUMBER = 5, + U_COMMON_NUMBER_SEPARATOR = 6, + U_BLOCK_SEPARATOR = 7, + U_SEGMENT_SEPARATOR = 8, + U_WHITE_SPACE_NEUTRAL = 9, + U_OTHER_NEUTRAL = 10, + U_LEFT_TO_RIGHT_EMBEDDING = 11, + U_LEFT_TO_RIGHT_OVERRIDE = 12, + U_RIGHT_TO_LEFT_ARABIC = 13, + U_RIGHT_TO_LEFT_EMBEDDING = 14, + U_RIGHT_TO_LEFT_OVERRIDE = 15, + U_POP_DIRECTIONAL_FORMAT = 16, + U_DIR_NON_SPACING_MARK = 17, + U_BOUNDARY_NEUTRAL = 18, + U_CHAR_DIRECTION_COUNT }; // Create an empty UNICHARSET @@ -200,7 +197,7 @@ class UNICHARSET { // Returns true if the given UTF-8 string is encodable with this UNICHARSET. // If not encodable, write the first byte offset which cannot be converted // into the second (return) argument. - bool encodable_string(const char *str, int *first_bad_position) const; + bool encodable_string(const char* str, int* first_bad_position) const; // Encodes the given UTF-8 string with this UNICHARSET. // Any part of the string that cannot be encoded (because the utf8 can't @@ -219,8 +216,7 @@ class UNICHARSET { // Use CleanupString to perform the cleaning. bool encode_string(const char* str, bool give_up_on_failure, GenericVector* encoding, - GenericVector* lengths, - int* encoded_length) const; + GenericVector* lengths, int* encoded_length) const; // Return the unichar representation corresponding to the given UNICHAR_ID // within the UNICHARSET. @@ -246,7 +242,7 @@ class UNICHARSET { // Return a STRING containing debug information on the unichar, including // the id_to_unichar, its hex unicodes and the properties. STRING debug_str(UNICHAR_ID id) const; - STRING debug_str(const char * unichar_repr) const { + STRING debug_str(const char* unichar_repr) const { return debug_str(unichar_to_id(unichar_repr)); } @@ -278,7 +274,7 @@ class UNICHARSET { // Relies on the fact that unichar ids are contiguous in the unicharset. bool contains_unichar_id(UNICHAR_ID unichar_id) const { return unichar_id != INVALID_UNICHAR_ID && unichar_id < size_used && - unichar_id >= 0; + unichar_id >= 0; } // Return true if the given unichar representation exists within the set. @@ -300,8 +296,7 @@ class UNICHARSET { // Clear the UNICHARSET (all the previous data is lost). void clear() { if (script_table != nullptr) { - for (int i = 0; i < script_table_size_used; ++i) - delete[] script_table[i]; + for (int i = 0; i < script_table_size_used; ++i) delete[] script_table[i]; delete[] script_table; script_table = nullptr; script_table_size_used = 0; @@ -333,16 +328,14 @@ class UNICHARSET { } // Return the size of the set (the number of different UNICHAR it holds). - int size() const { - return size_used; - } + int size() const { return size_used; } // Reserve enough memory space for the given number of UNICHARS void reserve(int unichars_number); // Opens the file indicated by filename and saves unicharset to that file. // Returns true if the operation is successful. - bool save_to_file(const char * const filename) const { + bool save_to_file(const char* const filename) const { FILE* file = fopen(filename, "w+b"); if (file == nullptr) return false; bool result = save_to_file(file); @@ -352,13 +345,13 @@ class UNICHARSET { // Saves the content of the UNICHARSET to the given file. // Returns true if the operation is successful. - bool save_to_file(FILE *file) const { + bool save_to_file(FILE* file) const { STRING str; if (!save_to_string(&str)) return false; if (fwrite(&str[0], str.length(), 1, file) != 1) return false; return true; } - bool save_to_file(tesseract::TFile *file) const { + bool save_to_file(tesseract::TFile* file) const { STRING str; if (!save_to_string(&str)) return false; if (file->FWrite(&str[0], str.length(), 1) != 1) return false; @@ -367,7 +360,7 @@ class UNICHARSET { // Saves the content of the UNICHARSET to the given STRING. // Returns true if the operation is successful. - bool save_to_string(STRING *str) const; + bool save_to_string(STRING* str) const; // Load a unicharset from a unicharset file that has been loaded into // the given memory buffer. @@ -396,10 +389,9 @@ class UNICHARSET { // Loads the UNICHARSET from the given file. The previous data is lost. // Returns true if the operation is successful. - bool load_from_file(FILE *file, bool skip_fragments); - bool load_from_file(FILE *file) { return load_from_file(file, false); } - bool load_from_file(tesseract::TFile *file, bool skip_fragments); - + bool load_from_file(FILE* file, bool skip_fragments); + bool load_from_file(FILE* file) { return load_from_file(file, false); } + bool load_from_file(tesseract::TFile* file, bool skip_fragments); // Sets up internal data after loading the file, based on the char // properties. Called from load_from_file, but also needs to be run @@ -531,9 +523,7 @@ class UNICHARSET { bool get_isprivate(UNICHAR_ID unichar_id) const; // Returns true if the ids have useful min/max top/bottom values. - bool top_bottom_useful() const { - return top_bottom_set_; - } + bool top_bottom_useful() const { return top_bottom_set_; } // Sets all ranges to empty, so they can be expanded to set the values. void set_ranges_empty(); // Sets all the properties for this unicharset given a src_unicharset with @@ -562,8 +552,7 @@ class UNICHARSET { // baseline-normalized coordinates, ie, where the baseline is // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight // (See normalis.h for the definitions). - void get_top_bottom(UNICHAR_ID unichar_id, - int* min_bottom, int* max_bottom, + void get_top_bottom(UNICHAR_ID unichar_id, int* min_bottom, int* max_bottom, int* min_top, int* max_top) const { if (INVALID_UNICHAR_ID == unichar_id) { *min_bottom = *min_top = 0; @@ -576,8 +565,7 @@ class UNICHARSET { *min_top = unichars[unichar_id].properties.min_top; *max_top = unichars[unichar_id].properties.max_top; } - void set_top_bottom(UNICHAR_ID unichar_id, - int min_bottom, int max_bottom, + void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top) { unichars[unichar_id].properties.min_bottom = ClipToRange(min_bottom, 0, UINT8_MAX); @@ -590,11 +578,12 @@ class UNICHARSET { } // Returns the width stats (as mean, sd) of the given unichar relative to the // median advance of all characters in the character set. - void get_width_stats(UNICHAR_ID unichar_id, - float* width, float* width_sd) const { + void get_width_stats(UNICHAR_ID unichar_id, float* width, + float* width_sd) const { if (INVALID_UNICHAR_ID == unichar_id) { *width = 0.0f; - *width_sd = 0.0f;; + *width_sd = 0.0f; + ; return; } ASSERT_HOST(contains_unichar_id(unichar_id)); @@ -607,8 +596,8 @@ class UNICHARSET { } // Returns the stats of the x-bearing (as mean, sd) of the given unichar // relative to the median advance of all characters in the character set. - void get_bearing_stats(UNICHAR_ID unichar_id, - float* bearing, float* bearing_sd) const { + void get_bearing_stats(UNICHAR_ID unichar_id, float* bearing, + float* bearing_sd) const { if (INVALID_UNICHAR_ID == unichar_id) { *bearing = *bearing_sd = 0.0f; return; @@ -617,15 +606,15 @@ class UNICHARSET { *bearing = unichars[unichar_id].properties.bearing; *bearing_sd = unichars[unichar_id].properties.bearing_sd; } - void set_bearing_stats(UNICHAR_ID unichar_id, - float bearing, float bearing_sd) { + void set_bearing_stats(UNICHAR_ID unichar_id, float bearing, + float bearing_sd) { unichars[unichar_id].properties.bearing = bearing; unichars[unichar_id].properties.bearing_sd = bearing_sd; } // Returns the stats of the x-advance of the given unichar (as mean, sd) // relative to the median advance of all characters in the character set. - void get_advance_stats(UNICHAR_ID unichar_id, - float* advance, float* advance_sd) const { + void get_advance_stats(UNICHAR_ID unichar_id, float* advance, + float* advance_sd) const { if (INVALID_UNICHAR_ID == unichar_id) { *advance = *advance_sd = 0; return; @@ -634,8 +623,8 @@ class UNICHARSET { *advance = unichars[unichar_id].properties.advance; *advance_sd = unichars[unichar_id].properties.advance_sd; } - void set_advance_stats(UNICHAR_ID unichar_id, - float advance, float advance_sd) { + void set_advance_stats(UNICHAR_ID unichar_id, float advance, + float advance_sd) { unichars[unichar_id].properties.advance = advance; unichars[unichar_id].properties.advance_sd = advance_sd; } @@ -685,10 +674,10 @@ class UNICHARSET { // Returns the direction property of the given unichar. Direction get_direction(UNICHAR_ID unichar_id) const { - if (INVALID_UNICHAR_ID == unichar_id) return UNICHARSET::U_OTHER_NEUTRAL; - ASSERT_HOST(contains_unichar_id(unichar_id)); - return unichars[unichar_id].properties.direction; - } + if (INVALID_UNICHAR_ID == unichar_id) return UNICHARSET::U_OTHER_NEUTRAL; + ASSERT_HOST(contains_unichar_id(unichar_id)); + return unichars[unichar_id].properties.direction; + } // Get mirror unichar id in the properties for the given unichar id. UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const { @@ -718,8 +707,8 @@ class UNICHARSET { // at these codes and they should not be used. bool has_special_codes() const { return get_fragment(UNICHAR_BROKEN) != nullptr && - strcmp(id_to_unichar(UNICHAR_BROKEN), - kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0; + strcmp(id_to_unichar(UNICHAR_BROKEN), + kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0; } // Returns true if there are any repeated unicodes in the normalized @@ -728,7 +717,7 @@ class UNICHARSET { // Return a pointer to the CHAR_FRAGMENT class if the given // unichar id represents a character fragment. - const CHAR_FRAGMENT *get_fragment(UNICHAR_ID unichar_id) const { + const CHAR_FRAGMENT* get_fragment(UNICHAR_ID unichar_id) const { if (INVALID_UNICHAR_ID == unichar_id) return nullptr; ASSERT_HOST(contains_unichar_id(unichar_id)); return unichars[unichar_id].properties.fragment; @@ -778,7 +767,7 @@ class UNICHARSET { // Return a pointer to the CHAR_FRAGMENT class struct if the given // unichar representation represents a character fragment. - const CHAR_FRAGMENT *get_fragment(const char* const unichar_repr) const { + const CHAR_FRAGMENT* get_fragment(const char* const unichar_repr) const { if (unichar_repr == nullptr || unichar_repr[0] == '\0' || !ids.contains(unichar_repr, false)) { return nullptr; @@ -788,41 +777,36 @@ class UNICHARSET { // Return the isalpha property of the given unichar representation. // Only the first length characters from unichar_repr are used. - bool get_isalpha(const char* const unichar_repr, - int length) const { + bool get_isalpha(const char* const unichar_repr, int length) const { return get_isalpha(unichar_to_id(unichar_repr, length)); } // Return the islower property of the given unichar representation. // Only the first length characters from unichar_repr are used. - bool get_islower(const char* const unichar_repr, - int length) const { + bool get_islower(const char* const unichar_repr, int length) const { return get_islower(unichar_to_id(unichar_repr, length)); } // Return the isupper property of the given unichar representation. // Only the first length characters from unichar_repr are used. - bool get_isupper(const char* const unichar_repr, - int length) const { + bool get_isupper(const char* const unichar_repr, int length) const { return get_isupper(unichar_to_id(unichar_repr, length)); } // Return the isdigit property of the given unichar representation. // Only the first length characters from unichar_repr are used. - bool get_isdigit(const char* const unichar_repr, - int length) const { + bool get_isdigit(const char* const unichar_repr, int length) const { return get_isdigit(unichar_to_id(unichar_repr, length)); } // Return the ispunctuation property of the given unichar representation. // Only the first length characters from unichar_repr are used. - bool get_ispunctuation(const char* const unichar_repr, - int length) const { + bool get_ispunctuation(const char* const unichar_repr, int length) const { return get_ispunctuation(unichar_to_id(unichar_repr, length)); } // Returns normalized version of unichar with the given unichar_id. - const char *get_normed_unichar(UNICHAR_ID unichar_id) const { + const char* get_normed_unichar(UNICHAR_ID unichar_id) const { if (unichar_id == UNICHAR_SPACE) return " "; return unichars[unichar_id].properties.normed.string(); } @@ -837,20 +821,16 @@ class UNICHARSET { // Only the first length characters from unichar_repr are used. // The returned pointer will always be the same for the same script, it's // managed by unicharset and thus MUST NOT be deleted - int get_script(const char* const unichar_repr, - int length) const { + int get_script(const char* const unichar_repr, int length) const { return get_script(unichar_to_id(unichar_repr, length)); } // Return the (current) number of scripts in the script table - int get_script_table_size() const { - return script_table_size_used; - } + int get_script_table_size() const { return script_table_size_used; } // Return the script string from its id const char* get_script_from_script_id(int id) const { - if (id >= script_table_size_used || id < 0) - return null_script; + if (id >= script_table_size_used || id < 0) return null_script; return script_table[id]; } @@ -876,7 +856,6 @@ class UNICHARSET { return unichars[unichar_id].properties.enabled; } - int null_sid() const { return null_sid_; } int common_sid() const { return common_sid_; } int latin_sid() const { return latin_sid_; } @@ -890,19 +869,14 @@ class UNICHARSET { int default_sid() const { return default_sid_; } // Returns true if the unicharset has the concept of upper/lower case. - bool script_has_upper_lower() const { - return script_has_upper_lower_; - } + bool script_has_upper_lower() const { return script_has_upper_lower_; } // Returns true if the unicharset has the concept of x-height. // script_has_xheight can be true even if script_has_upper_lower is not, // when the script has a sufficiently predominant top line with ascenders, // such as Devanagari and Thai. - bool script_has_xheight() const { - return script_has_xheight_; - } + bool script_has_xheight() const { return script_has_xheight_; } private: - struct UNICHAR_PROPERTIES { UNICHAR_PROPERTIES(); // Initializes all properties to sensible default values. @@ -920,13 +894,13 @@ class UNICHARSET { // Copies the properties from src into this. void CopyFrom(const UNICHAR_PROPERTIES& src); - bool isalpha; - bool islower; - bool isupper; - bool isdigit; - bool ispunctuation; - bool isngram; - bool enabled; + bool isalpha; + bool islower; + bool isupper; + bool isdigit; + bool ispunctuation; + bool isngram; + bool enabled; // Possible limits of the top and bottom of the bounding box in // baseline-normalized coordinates, ie, where the baseline is // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight @@ -943,9 +917,9 @@ class UNICHARSET { float bearing_sd; float advance; float advance_sd; - int script_id; + int script_id; UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar - Direction direction; // direction of this unichar + Direction direction; // direction of this unichar // Mirror property is useful for reverse DAWG lookup for words in // right-to-left languages (e.g. "(word)" would be in // '[open paren]' 'w' 'o' 'r' 'd' '[close paren]' in a UTF8 string. @@ -962,7 +936,7 @@ class UNICHARSET { // a fragment of a character, otherwise should be set to nullptr. // It is assumed that character fragments are added to the unicharset // after the corresponding 'base' characters. - CHAR_FRAGMENT *fragment; + CHAR_FRAGMENT* fragment; }; struct UNICHAR_SLOT { @@ -983,8 +957,7 @@ class UNICHARSET { // best_lengths (may be null) contains the lengths of best_encoding. void encode_string(const char* str, int str_index, int str_length, GenericVector* encoding, - GenericVector* lengths, - int* best_total_length, + GenericVector* lengths, int* best_total_length, GenericVector* best_encoding, GenericVector* best_lengths) const; @@ -993,13 +966,12 @@ class UNICHARSET { // Returns false if no valid match was found in the unicharset. // NOTE that script_id, mirror, and other_case refer to this unicharset on // return and will need redirecting if the target unicharset is different. - bool GetStrProperties(const char* utf8_str, - UNICHAR_PROPERTIES* props) const; + bool GetStrProperties(const char* utf8_str, UNICHAR_PROPERTIES* props) const; // Load ourselves from a "file" where our only interface to the file is // an implementation of fgets(). This is the parsing primitive accessed by // the public routines load_from_file() and load_from_inmemory_file(). - bool load_via_fgets(TessResultCallback2 *fgets_cb, + bool load_via_fgets(TessResultCallback2* fgets_cb, bool skip_fragments); // List of mappings to make when ingesting strings from the outside. diff --git a/src/ccutil/unicity_table.h b/src/ccutil/unicity_table.h index 0c0c5d11e0..f6e67e5ea5 100644 --- a/src/ccutil/unicity_table.h +++ b/src/ccutil/unicity_table.h @@ -20,9 +20,9 @@ #ifndef TESSERACT_CCUTIL_UNICITY_TABLE_H_ #define TESSERACT_CCUTIL_UNICITY_TABLE_H_ -#include "tesscallback.h" #include "errcode.h" #include "genericvector.h" +#include "tesscallback.h" // A class to uniquify objects, manipulating them using integers ids. // T requirements: @@ -44,10 +44,10 @@ class UnicityTable { int size() const; /// Return the object from an id. - const T &get(int id) const; + const T& get(int id) const; // Return the pointer to an object with the given id. - T *get_mutable(int id); + T* get_mutable(int id); /// Return the id of the T object. /// This method NEEDS a compare_callback to be passed to @@ -69,7 +69,7 @@ class UnicityTable { /// Add a callback to be called to compare the elements when needed (contains, /// get_id, ...) - void set_compare_callback(TessResultCallback2* cb); + void set_compare_callback(TessResultCallback2* cb); /// Clear the table, calling the callback function if any. /// All the owned Callbacks are also deleted. @@ -85,14 +85,14 @@ class UnicityTable { /// The Callback given must be permanent since they will be called more than /// once. The given callback will be deleted at the end. /// Returns false on read/write error. - bool write(FILE* f, TessResultCallback2* cb) const; + bool write(FILE* f, TessResultCallback2* cb) const; bool read(tesseract::TFile* f, TessResultCallback2* cb); private: GenericVector table_; // Mutable because Run method is not const - mutable TessResultCallback2* compare_cb_; + mutable TessResultCallback2* compare_cb_; }; template @@ -105,10 +105,7 @@ class UnicityTableEqEq : public UnicityTable { }; template -UnicityTable::UnicityTable() : - compare_cb_(0) { -} - +UnicityTable::UnicityTable() : compare_cb_(0) {} template UnicityTable::~UnicityTable() { @@ -116,7 +113,7 @@ UnicityTable::~UnicityTable() { } template -int UnicityTable::size() const{ +int UnicityTable::size() const { return table_.size(); } @@ -129,12 +126,12 @@ void UnicityTable::reserve(int size) { // Return the object from an id. template -const T &UnicityTable::get(int id) const { +const T& UnicityTable::get(int id) const { return table_.get(id); } // Returns the pointer to the object with the given id. template -T *UnicityTable::get_mutable(int id) { +T* UnicityTable::get_mutable(int id) { return &(table_.get(id)); } // Return true if the id is valid @@ -175,7 +172,8 @@ void UnicityTable::set_clear_callback(TessCallback1* cb) { // Add a callback to be called to delete the elements when the table took // their ownership. template -void UnicityTable::set_compare_callback(TessResultCallback2* cb) { +void UnicityTable::set_compare_callback( + TessResultCallback2* cb) { table_.set_compare_callback(cb); compare_cb_ = cb; } @@ -188,7 +186,7 @@ void UnicityTable::clear() { template bool UnicityTable::write( - FILE* f, TessResultCallback2* cb) const { + FILE* f, TessResultCallback2* cb) const { return table_.write(f, cb); } diff --git a/src/ccutil/unicodes.cpp b/src/ccutil/unicodes.cpp index a5c7b8bd0d..058966014e 100644 --- a/src/ccutil/unicodes.cpp +++ b/src/ccutil/unicodes.cpp @@ -22,36 +22,36 @@ namespace tesseract { -const char *kUTF8LineSeparator = "\u2028"; // "\xe2\x80\xa8"; -const char *kUTF8ParagraphSeparator = "\u2029"; // "\xe2\x80\xa9"; -const char *kLRM = "\u200E"; // Left-to-Right Mark -const char *kRLM = "\u200F"; // Right-to-Left Mark -const char *kRLE = "\u202A"; // Right-to-Left Embedding -const char *kPDF = "\u202C"; // Pop Directional Formatting +const char* kUTF8LineSeparator = "\u2028"; // "\xe2\x80\xa8"; +const char* kUTF8ParagraphSeparator = "\u2029"; // "\xe2\x80\xa9"; +const char* kLRM = "\u200E"; // Left-to-Right Mark +const char* kRLM = "\u200F"; // Right-to-Left Mark +const char* kRLE = "\u202A"; // Right-to-Left Embedding +const char* kPDF = "\u202C"; // Pop Directional Formatting -const char *kHyphenLikeUTF8[] = { - "-", // ASCII hyphen-minus - "\u05BE", // word hyphen in hybrew - "\u2010", // hyphen - "\u2011", // non-breaking hyphen - "\u2012", // a hyphen the same width as digits - "\u2013", // en dash - "\u2014", // em dash - "\u2015", // horizontal bar - "\u2212", // arithmetic minus sign - "\uFE58", // small em dash - "\uFE63", // small hyphen-minus - "\uFF0D", // fullwidth hyphen-minus - nullptr, // end of our list +const char* kHyphenLikeUTF8[] = { + "-", // ASCII hyphen-minus + "\u05BE", // word hyphen in hybrew + "\u2010", // hyphen + "\u2011", // non-breaking hyphen + "\u2012", // a hyphen the same width as digits + "\u2013", // en dash + "\u2014", // em dash + "\u2015", // horizontal bar + "\u2212", // arithmetic minus sign + "\uFE58", // small em dash + "\uFE63", // small hyphen-minus + "\uFF0D", // fullwidth hyphen-minus + nullptr, // end of our list }; -const char *kApostropheLikeUTF8[] = { - "'", // ASCII apostrophe - "`", // ASCII backtick - "\u2018", // opening single quote - "\u2019", // closing single quote - "\u2032", // mathematical prime mark - nullptr, // end of our list. +const char* kApostropheLikeUTF8[] = { + "'", // ASCII apostrophe + "`", // ASCII backtick + "\u2018", // opening single quote + "\u2019", // closing single quote + "\u2032", // mathematical prime mark + nullptr, // end of our list. }; -} // namespace +} // namespace tesseract diff --git a/src/ccutil/unicodes.h b/src/ccutil/unicodes.h index 7bab9b0059..ad0218755b 100644 --- a/src/ccutil/unicodes.h +++ b/src/ccutil/unicodes.h @@ -22,18 +22,18 @@ namespace tesseract { -extern const char *kUTF8LineSeparator; -extern const char *kUTF8ParagraphSeparator; -extern const char *kLRM; //< Left-to-Right Mark -extern const char *kRLM; //< Right-to-Left Mark -extern const char *kRLE; //< Right-to-Left Embedding -extern const char *kPDF; //< Pop Directional Formatting +extern const char* kUTF8LineSeparator; +extern const char* kUTF8ParagraphSeparator; +extern const char* kLRM; //< Left-to-Right Mark +extern const char* kRLM; //< Right-to-Left Mark +extern const char* kRLE; //< Right-to-Left Embedding +extern const char* kPDF; //< Pop Directional Formatting /// The following are confusable internal word punctuation symbols /// which we normalize to the first variant when matching in dawgs. -extern const char *kHyphenLikeUTF8[]; -extern const char *kApostropheLikeUTF8[]; +extern const char* kHyphenLikeUTF8[]; +extern const char* kApostropheLikeUTF8[]; -} // namespace +} // namespace tesseract #endif // TESSERACT_CCUTIL_UNICODES_H_ diff --git a/src/classify/adaptive.cpp b/src/classify/adaptive.cpp index 2ee6aeea69..29bbf04d75 100644 --- a/src/classify/adaptive.cpp +++ b/src/classify/adaptive.cpp @@ -20,9 +20,9 @@ Include Files and Type Defines ----------------------------------------------------------------------------*/ #include "adaptive.h" +#include "classify.h" #include "emalloc.h" #include "globals.h" -#include "classify.h" #ifdef __UNIX__ #include @@ -47,25 +47,23 @@ using tesseract::TFile; * @note Exceptions: none * @note History: Thu Mar 14 13:06:09 1991, DSJ, Created. */ -void AddAdaptedClass(ADAPT_TEMPLATES Templates, - ADAPT_CLASS Class, +void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId) { INT_CLASS IntClass; - assert (Templates != nullptr); - assert (Class != nullptr); - assert (LegalClassId (ClassId)); - assert (UnusedClassIdIn (Templates->Templates, ClassId)); - assert (Class->NumPermConfigs == 0); + assert(Templates != nullptr); + assert(Class != nullptr); + assert(LegalClassId(ClassId)); + assert(UnusedClassIdIn(Templates->Templates, ClassId)); + assert(Class->NumPermConfigs == 0); - IntClass = NewIntClass (1, 1); - AddIntClass (Templates->Templates, ClassId, IntClass); + IntClass = NewIntClass(1, 1); + AddIntClass(Templates->Templates, ClassId, IntClass); - assert (Templates->Class[ClassId] == nullptr); + assert(Templates->Class[ClassId] == nullptr); Templates->Class[ClassId] = Class; -} /* AddAdaptedClass */ - +} /* AddAdaptedClass */ /*---------------------------------------------------------------------------*/ /** @@ -79,21 +77,21 @@ void AddAdaptedClass(ADAPT_TEMPLATES Templates, * @note History: Thu Mar 14 13:34:23 1991, DSJ, Created. */ void FreeTempConfig(TEMP_CONFIG Config) { - assert (Config != nullptr); - FreeBitVector (Config->Protos); + assert(Config != nullptr); + FreeBitVector(Config->Protos); free(Config); -} /* FreeTempConfig */ +} /* FreeTempConfig */ /*---------------------------------------------------------------------------*/ -void FreeTempProto(void *arg) { - PROTO proto = (PROTO) arg; +void FreeTempProto(void* arg) { + PROTO proto = (PROTO)arg; free(proto); } void FreePermConfig(PERM_CONFIG Config) { assert(Config != nullptr); - delete [] Config->Ambigs; + delete[] Config->Ambigs; free(Config); } @@ -108,44 +106,42 @@ void FreePermConfig(PERM_CONFIG Config) { * @note Exceptions: none * @note History: Thu Mar 14 12:58:13 1991, DSJ, Created. */ -ADAPT_CLASS NewAdaptedClass() { +ADAPT_CLASS +NewAdaptedClass() { ADAPT_CLASS Class; - Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT)); + Class = (ADAPT_CLASS)Emalloc(sizeof(ADAPT_CLASS_STRUCT)); Class->NumPermConfigs = 0; Class->MaxNumTimesSeen = 0; Class->TempProtos = NIL_LIST; - Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); - Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); - zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS)); - zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS)); + Class->PermProtos = NewBitVector(MAX_NUM_PROTOS); + Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS); + zero_all_bits(Class->PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS)); + zero_all_bits(Class->PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS)); - for (int i = 0; i < MAX_NUM_CONFIGS; i++) - TempConfigFor (Class, i) = nullptr; + for (int i = 0; i < MAX_NUM_CONFIGS; i++) TempConfigFor(Class, i) = nullptr; return (Class); -} /* NewAdaptedClass */ - +} /* NewAdaptedClass */ /*-------------------------------------------------------------------------*/ void free_adapted_class(ADAPT_CLASS adapt_class) { for (int i = 0; i < MAX_NUM_CONFIGS; i++) { - if (ConfigIsPermanent (adapt_class, i) - && PermConfigFor (adapt_class, i) != nullptr) - FreePermConfig (PermConfigFor (adapt_class, i)); - else if (!ConfigIsPermanent (adapt_class, i) - && TempConfigFor (adapt_class, i) != nullptr) - FreeTempConfig (TempConfigFor (adapt_class, i)); + if (ConfigIsPermanent(adapt_class, i) && + PermConfigFor(adapt_class, i) != nullptr) + FreePermConfig(PermConfigFor(adapt_class, i)); + else if (!ConfigIsPermanent(adapt_class, i) && + TempConfigFor(adapt_class, i) != nullptr) + FreeTempConfig(TempConfigFor(adapt_class, i)); } - FreeBitVector (adapt_class->PermProtos); - FreeBitVector (adapt_class->PermConfigs); - destroy_nodes (adapt_class->TempProtos, FreeTempProto); + FreeBitVector(adapt_class->PermProtos); + FreeBitVector(adapt_class->PermConfigs); + destroy_nodes(adapt_class->TempProtos, FreeTempProto); Efree(adapt_class); } - /*---------------------------------------------------------------------------*/ namespace tesseract { /** @@ -159,12 +155,13 @@ namespace tesseract { * @note Exceptions: none * @note History: Fri Mar 8 10:15:28 1991, DSJ, Created. */ -ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) { +ADAPT_TEMPLATES +Classify::NewAdaptedTemplates(bool InitFromUnicharset) { ADAPT_TEMPLATES Templates; - Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)); + Templates = (ADAPT_TEMPLATES)Emalloc(sizeof(ADAPT_TEMPLATES_STRUCT)); - Templates->Templates = NewIntTemplates (); + Templates->Templates = NewIntTemplates(); Templates->NumPermClasses = 0; Templates->NumNonEmptyClasses = 0; @@ -178,29 +175,27 @@ ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) { return (Templates); -} /* NewAdaptedTemplates */ +} /* NewAdaptedTemplates */ // Returns FontinfoId of the given config of the given adapted class. int Classify::GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId) { - return (ConfigIsPermanent(Class, ConfigId) ? - PermConfigFor(Class, ConfigId)->FontinfoId : - TempConfigFor(Class, ConfigId)->FontinfoId); + return (ConfigIsPermanent(Class, ConfigId) + ? PermConfigFor(Class, ConfigId)->FontinfoId + : TempConfigFor(Class, ConfigId)->FontinfoId); } } // namespace tesseract /*----------------------------------------------------------------------------*/ void free_adapted_templates(ADAPT_TEMPLATES templates) { - if (templates != nullptr) { for (int i = 0; i < (templates->Templates)->NumClasses; i++) - free_adapted_class (templates->Class[i]); - free_int_templates (templates->Templates); + free_adapted_class(templates->Class[i]); + free_int_templates(templates->Templates); Efree(templates); } } - /*---------------------------------------------------------------------------*/ /** * This routine allocates and returns a new temporary config. @@ -213,22 +208,22 @@ void free_adapted_templates(ADAPT_TEMPLATES templates) { * @note Exceptions: none * @note History: Thu Mar 14 13:28:21 1991, DSJ, Created. */ -TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) { +TEMP_CONFIG +NewTempConfig(int MaxProtoId, int FontinfoId) { int NumProtos = MaxProtoId + 1; TEMP_CONFIG Config = (TEMP_CONFIG)malloc(sizeof(TEMP_CONFIG_STRUCT)); - Config->Protos = NewBitVector (NumProtos); + Config->Protos = NewBitVector(NumProtos); Config->NumTimesSeen = 1; Config->MaxProtoId = MaxProtoId; - Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos); - zero_all_bits (Config->Protos, Config->ProtoVectorSize); + Config->ProtoVectorSize = WordsInVectorOfSize(NumProtos); + zero_all_bits(Config->Protos, Config->ProtoVectorSize); Config->FontinfoId = FontinfoId; return (Config); -} /* NewTempConfig */ - +} /* NewTempConfig */ /*---------------------------------------------------------------------------*/ /** @@ -240,10 +235,10 @@ TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) { * @note Exceptions: none * @note History: Thu Mar 14 13:31:31 1991, DSJ, Created. */ -TEMP_PROTO NewTempProto() { +TEMP_PROTO +NewTempProto() { return (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT)); -} /* NewTempProto */ - +} /* NewTempProto */ /*---------------------------------------------------------------------------*/ namespace tesseract { @@ -258,33 +253,30 @@ namespace tesseract { * @note Exceptions: none * @note History: Wed Mar 20 13:35:29 1991, DSJ, Created. */ -void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { +void Classify::PrintAdaptedTemplates(FILE* File, ADAPT_TEMPLATES Templates) { INT_CLASS IClass; ADAPT_CLASS AClass; - fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n"); - fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n", - Templates->NumNonEmptyClasses, Templates->NumPermClasses); - fprintf (File, " Id NC NPC NP NPP\n"); - fprintf (File, "------------------------\n"); + fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n"); + fprintf(File, "Num classes = %d; Num permanent classes = %d\n\n", + Templates->NumNonEmptyClasses, Templates->NumPermClasses); + fprintf(File, " Id NC NPC NP NPP\n"); + fprintf(File, "------------------------\n"); for (int i = 0; i < (Templates->Templates)->NumClasses; i++) { IClass = Templates->Templates->Class[i]; AClass = Templates->Class[i]; - if (!IsEmptyAdaptedClass (AClass)) { - fprintf (File, "%5d %s %3d %3d %3d %3d\n", - i, unicharset.id_to_unichar(i), - IClass->NumConfigs, AClass->NumPermConfigs, - IClass->NumProtos, - IClass->NumProtos - count (AClass->TempProtos)); + if (!IsEmptyAdaptedClass(AClass)) { + fprintf(File, "%5d %s %3d %3d %3d %3d\n", i, unicharset.id_to_unichar(i), + IClass->NumConfigs, AClass->NumPermConfigs, IClass->NumProtos, + IClass->NumProtos - count(AClass->TempProtos)); } } - fprintf (File, "\n"); + fprintf(File, "\n"); -} /* PrintAdaptedTemplates */ +} /* PrintAdaptedTemplates */ } // namespace tesseract - /*---------------------------------------------------------------------------*/ /** * Read an adapted class description from file and return @@ -297,19 +289,20 @@ void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { * @note Exceptions: none * @note History: Tue Mar 19 14:11:01 1991, DSJ, Created. */ -ADAPT_CLASS ReadAdaptedClass(TFile *fp) { +ADAPT_CLASS +ReadAdaptedClass(TFile* fp) { int NumTempProtos; int NumConfigs; int i; ADAPT_CLASS Class; /* first read high level adapted class structure */ - Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT)); + Class = (ADAPT_CLASS)Emalloc(sizeof(ADAPT_CLASS_STRUCT)); fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1); /* then read in the definitions of the permanent protos and configs */ - Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); - Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); + Class->PermProtos = NewBitVector(MAX_NUM_PROTOS); + Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS); fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS)); fp->FRead(Class->PermConfigs, sizeof(uint32_t), @@ -321,21 +314,20 @@ ADAPT_CLASS ReadAdaptedClass(TFile *fp) { for (i = 0; i < NumTempProtos; i++) { TEMP_PROTO TempProto = (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT)); fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1); - Class->TempProtos = push_last (Class->TempProtos, TempProto); + Class->TempProtos = push_last(Class->TempProtos, TempProto); } /* then read in the adapted configs */ fp->FRead(&NumConfigs, sizeof(int), 1); for (i = 0; i < NumConfigs; i++) - if (test_bit (Class->PermConfigs, i)) + if (test_bit(Class->PermConfigs, i)) Class->Config[i].Perm = ReadPermConfig(fp); else Class->Config[i].Temp = ReadTempConfig(fp); return (Class); -} /* ReadAdaptedClass */ - +} /* ReadAdaptedClass */ /*---------------------------------------------------------------------------*/ namespace tesseract { @@ -350,11 +342,12 @@ namespace tesseract { * @note Exceptions: none * @note History: Mon Mar 18 15:18:10 1991, DSJ, Created. */ -ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) { +ADAPT_TEMPLATES +Classify::ReadAdaptedTemplates(TFile* fp) { ADAPT_TEMPLATES Templates; /* first read the high level adaptive template struct */ - Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)); + Templates = (ADAPT_TEMPLATES)Emalloc(sizeof(ADAPT_TEMPLATES_STRUCT)); fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1); /* then read in the basic integer templates */ @@ -366,10 +359,9 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) { } return (Templates); -} /* ReadAdaptedTemplates */ +} /* ReadAdaptedTemplates */ } // namespace tesseract - /*---------------------------------------------------------------------------*/ /** * Read a permanent configuration description from file @@ -382,7 +374,8 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) { * @note Exceptions: none * @note History: Tue Mar 19 14:25:26 1991, DSJ, Created. */ -PERM_CONFIG ReadPermConfig(TFile *fp) { +PERM_CONFIG +ReadPermConfig(TFile* fp) { PERM_CONFIG Config = (PERM_CONFIG)malloc(sizeof(PERM_CONFIG_STRUCT)); uint8_t NumAmbigs; fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1); @@ -393,8 +386,7 @@ PERM_CONFIG ReadPermConfig(TFile *fp) { return (Config); -} /* ReadPermConfig */ - +} /* ReadPermConfig */ /*---------------------------------------------------------------------------*/ /** @@ -408,17 +400,17 @@ PERM_CONFIG ReadPermConfig(TFile *fp) { * @note Exceptions: none * @note History: Tue Mar 19 14:29:59 1991, DSJ, Created. */ -TEMP_CONFIG ReadTempConfig(TFile *fp) { +TEMP_CONFIG +ReadTempConfig(TFile* fp) { TEMP_CONFIG Config = (TEMP_CONFIG)malloc(sizeof(TEMP_CONFIG_STRUCT)); fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1); - Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG); + Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG); fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize); return (Config); -} /* ReadTempConfig */ - +} /* ReadTempConfig */ /*---------------------------------------------------------------------------*/ /** @@ -433,39 +425,38 @@ TEMP_CONFIG ReadTempConfig(TFile *fp) { * @note Exceptions: none * @note History: Tue Mar 19 13:33:51 1991, DSJ, Created. */ -void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) { +void WriteAdaptedClass(FILE* File, ADAPT_CLASS Class, int NumConfigs) { int NumTempProtos; LIST TempProtos; int i; /* first write high level adapted class structure */ - fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File); + fwrite((char*)Class, sizeof(ADAPT_CLASS_STRUCT), 1, File); /* then write out the definitions of the permanent protos and configs */ - fwrite ((char *) Class->PermProtos, sizeof (uint32_t), - WordsInVectorOfSize (MAX_NUM_PROTOS), File); - fwrite ((char *) Class->PermConfigs, sizeof (uint32_t), - WordsInVectorOfSize (MAX_NUM_CONFIGS), File); + fwrite((char*)Class->PermProtos, sizeof(uint32_t), + WordsInVectorOfSize(MAX_NUM_PROTOS), File); + fwrite((char*)Class->PermConfigs, sizeof(uint32_t), + WordsInVectorOfSize(MAX_NUM_CONFIGS), File); /* then write out the list of temporary protos */ - NumTempProtos = count (Class->TempProtos); - fwrite ((char *) &NumTempProtos, sizeof (int), 1, File); + NumTempProtos = count(Class->TempProtos); + fwrite((char*)&NumTempProtos, sizeof(int), 1, File); TempProtos = Class->TempProtos; - iterate (TempProtos) { + iterate(TempProtos) { void* proto = first_node(TempProtos); - fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File); + fwrite((char*)proto, sizeof(TEMP_PROTO_STRUCT), 1, File); } /* then write out the adapted configs */ - fwrite ((char *) &NumConfigs, sizeof (int), 1, File); + fwrite((char*)&NumConfigs, sizeof(int), 1, File); for (i = 0; i < NumConfigs; i++) - if (test_bit (Class->PermConfigs, i)) - WritePermConfig (File, Class->Config[i].Perm); + if (test_bit(Class->PermConfigs, i)) + WritePermConfig(File, Class->Config[i].Perm); else - WriteTempConfig (File, Class->Config[i].Temp); - -} /* WriteAdaptedClass */ + WriteTempConfig(File, Class->Config[i].Temp); +} /* WriteAdaptedClass */ /*---------------------------------------------------------------------------*/ namespace tesseract { @@ -479,29 +470,28 @@ namespace tesseract { * @note Exceptions: none * @note History: Mon Mar 18 15:07:32 1991, DSJ, Created. */ -void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { +void Classify::WriteAdaptedTemplates(FILE* File, ADAPT_TEMPLATES Templates) { int i; /* first write the high level adaptive template struct */ - fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File); + fwrite((char*)Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File); /* then write out the basic integer templates */ - WriteIntTemplates (File, Templates->Templates, unicharset); + WriteIntTemplates(File, Templates->Templates, unicharset); /* then write out the adaptive info for each class */ for (i = 0; i < (Templates->Templates)->NumClasses; i++) { - WriteAdaptedClass (File, Templates->Class[i], - Templates->Templates->Class[i]->NumConfigs); + WriteAdaptedClass(File, Templates->Class[i], + Templates->Templates->Class[i]->NumConfigs); } -} /* WriteAdaptedTemplates */ +} /* WriteAdaptedTemplates */ } // namespace tesseract - /*---------------------------------------------------------------------------*/ /** * This routine writes a binary representation of a * permanent configuration to File. - * + * * @param File open file to write Config to * @param Config permanent config to write to File * @@ -509,17 +499,16 @@ void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { * @note Exceptions: none * @note History: Tue Mar 19 13:55:44 1991, DSJ, Created. */ -void WritePermConfig(FILE *File, PERM_CONFIG Config) { +void WritePermConfig(FILE* File, PERM_CONFIG Config) { uint8_t NumAmbigs = 0; - assert (Config != nullptr); + assert(Config != nullptr); while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs; - fwrite((char *) &NumAmbigs, sizeof(uint8_t), 1, File); + fwrite((char*)&NumAmbigs, sizeof(uint8_t), 1, File); fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); fwrite(&(Config->FontinfoId), sizeof(int), 1, File); -} /* WritePermConfig */ - +} /* WritePermConfig */ /*---------------------------------------------------------------------------*/ /** @@ -533,11 +522,11 @@ void WritePermConfig(FILE *File, PERM_CONFIG Config) { * @note Exceptions: none * @note History: Tue Mar 19 14:00:28 1991, DSJ, Created. */ -void WriteTempConfig(FILE *File, TEMP_CONFIG Config) { - assert (Config != nullptr); +void WriteTempConfig(FILE* File, TEMP_CONFIG Config) { + assert(Config != nullptr); - fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File); - fwrite ((char *) Config->Protos, sizeof (uint32_t), - Config->ProtoVectorSize, File); + fwrite((char*)Config, sizeof(TEMP_CONFIG_STRUCT), 1, File); + fwrite((char*)Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, + File); -} /* WriteTempConfig */ +} /* WriteTempConfig */ diff --git a/src/classify/adaptive.h b/src/classify/adaptive.h index 82ec43a669..349b02c2e9 100644 --- a/src/classify/adaptive.h +++ b/src/classify/adaptive.h @@ -21,46 +21,40 @@ /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ -#include "oldlist.h" -#include "intproto.h" #include +#include "intproto.h" +#include "oldlist.h" -typedef struct -{ +typedef struct { uint16_t ProtoId; uint16_t dummy; PROTO_STRUCT Proto; } - TEMP_PROTO_STRUCT; -typedef TEMP_PROTO_STRUCT *TEMP_PROTO; +typedef TEMP_PROTO_STRUCT* TEMP_PROTO; -typedef struct -{ +typedef struct { uint8_t NumTimesSeen; uint8_t ProtoVectorSize; PROTO_ID MaxProtoId; BIT_VECTOR Protos; int FontinfoId; // font information inferred from pre-trained templates } TEMP_CONFIG_STRUCT; -typedef TEMP_CONFIG_STRUCT *TEMP_CONFIG; +typedef TEMP_CONFIG_STRUCT* TEMP_CONFIG; -typedef struct -{ - UNICHAR_ID *Ambigs; +typedef struct { + UNICHAR_ID* Ambigs; int FontinfoId; // font information inferred from pre-trained templates } PERM_CONFIG_STRUCT; -typedef PERM_CONFIG_STRUCT *PERM_CONFIG; +typedef PERM_CONFIG_STRUCT* PERM_CONFIG; -typedef union -{ +typedef union { TEMP_CONFIG Temp; PERM_CONFIG Perm; } ADAPTED_CONFIG; -typedef struct -{ +typedef struct { uint8_t NumPermConfigs; uint8_t MaxNumTimesSeen; // maximum number of times any TEMP_CONFIG was seen uint8_t dummy[2]; // (cut at matcher_min_examples_for_prototyping) @@ -69,72 +63,73 @@ typedef struct LIST TempProtos; ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]; } ADAPT_CLASS_STRUCT; -typedef ADAPT_CLASS_STRUCT *ADAPT_CLASS; +typedef ADAPT_CLASS_STRUCT* ADAPT_CLASS; -typedef struct -{ +typedef struct { INT_TEMPLATES Templates; int NumNonEmptyClasses; uint8_t NumPermClasses; uint8_t dummy[3]; ADAPT_CLASS Class[MAX_NUM_CLASSES]; } ADAPT_TEMPLATES_STRUCT; -typedef ADAPT_TEMPLATES_STRUCT *ADAPT_TEMPLATES; +typedef ADAPT_TEMPLATES_STRUCT* ADAPT_TEMPLATES; /*---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------*/ #define NumNonEmptyClassesIn(Template) ((Template)->NumNonEmptyClasses) -#define IsEmptyAdaptedClass(Class) ((Class)->NumPermConfigs == 0 && \ -(Class)->TempProtos == NIL_LIST) +#define IsEmptyAdaptedClass(Class) \ + ((Class)->NumPermConfigs == 0 && (Class)->TempProtos == NIL_LIST) -#define ConfigIsPermanent(Class,ConfigId) \ -(test_bit ((Class)->PermConfigs, ConfigId)) +#define ConfigIsPermanent(Class, ConfigId) \ + (test_bit((Class)->PermConfigs, ConfigId)) -#define MakeConfigPermanent(Class,ConfigId) \ -(SET_BIT ((Class)->PermConfigs, ConfigId)) +#define MakeConfigPermanent(Class, ConfigId) \ + (SET_BIT((Class)->PermConfigs, ConfigId)) -#define MakeProtoPermanent(Class,ProtoId) \ -(SET_BIT ((Class)->PermProtos, ProtoId)) +#define MakeProtoPermanent(Class, ProtoId) \ + (SET_BIT((Class)->PermProtos, ProtoId)) -#define TempConfigFor(Class,ConfigId) \ -((Class)->Config[ConfigId].Temp) +#define TempConfigFor(Class, ConfigId) ((Class)->Config[ConfigId].Temp) -#define PermConfigFor(Class,ConfigId) \ -((Class)->Config[ConfigId].Perm) +#define PermConfigFor(Class, ConfigId) ((Class)->Config[ConfigId].Perm) -#define IncreaseConfidence(TempConfig) \ -((TempConfig)->NumTimesSeen++) +#define IncreaseConfidence(TempConfig) ((TempConfig)->NumTimesSeen++) -void AddAdaptedClass(ADAPT_TEMPLATES Templates, - ADAPT_CLASS Class, - CLASS_ID ClassId); +void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, + CLASS_ID ClassId); -void FreeTempProto(void *arg); +void FreeTempProto(void* arg); void FreeTempConfig(TEMP_CONFIG Config); -ADAPT_CLASS NewAdaptedClass(); +ADAPT_CLASS +NewAdaptedClass(); void free_adapted_class(ADAPT_CLASS adapt_class); void free_adapted_templates(ADAPT_TEMPLATES templates); -TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId); +TEMP_CONFIG +NewTempConfig(int MaxProtoId, int FontinfoId); -TEMP_PROTO NewTempProto(); +TEMP_PROTO +NewTempProto(); -ADAPT_CLASS ReadAdaptedClass(tesseract::TFile *File); +ADAPT_CLASS +ReadAdaptedClass(tesseract::TFile* File); -PERM_CONFIG ReadPermConfig(tesseract::TFile *File); +PERM_CONFIG +ReadPermConfig(tesseract::TFile* File); -TEMP_CONFIG ReadTempConfig(tesseract::TFile *File); +TEMP_CONFIG +ReadTempConfig(tesseract::TFile* File); -void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs); +void WriteAdaptedClass(FILE* File, ADAPT_CLASS Class, int NumConfigs); -void WritePermConfig(FILE *File, PERM_CONFIG Config); +void WritePermConfig(FILE* File, PERM_CONFIG Config); -void WriteTempConfig(FILE *File, TEMP_CONFIG Config); +void WriteTempConfig(FILE* File, TEMP_CONFIG Config); #endif diff --git a/src/classify/adaptmatch.cpp b/src/classify/adaptmatch.cpp index ec45e1e9a8..0bb0e379e8 100644 --- a/src/classify/adaptmatch.cpp +++ b/src/classify/adaptmatch.cpp @@ -24,7 +24,6 @@ #endif #include -#include "shapeclassifier.h" #include "ambigs.h" #include "blobclass.h" #include "blobs.h" @@ -49,6 +48,7 @@ #include "pageres.h" #include "params.h" #include "picofeat.h" +#include "shapeclassifier.h" #include "shapetable.h" #include "tessclassifier.h" #include "trainingsample.h" @@ -56,29 +56,29 @@ #include "werd.h" #include +#include #include -#include #include -#include +#include #ifdef __UNIX__ #include #endif #define ADAPT_TEMPLATE_SUFFIX ".a" -#define MAX_MATCHES 10 +#define MAX_MATCHES 10 #define UNLIKELY_NUM_FEAT 200 -#define NO_DEBUG 0 +#define NO_DEBUG 0 #define MAX_ADAPTABLE_WERD_SIZE 40 -#define ADAPTABLE_WERD_ADJUSTMENT (0.05) +#define ADAPTABLE_WERD_ADJUSTMENT (0.05) -#define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT) +#define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT) #define WORST_POSSIBLE_RATING (0.0f) -using tesseract::UnicharRating; using tesseract::ScoredFont; +using tesseract::UnicharRating; struct ADAPT_RESULTS { int32_t BlobLength; @@ -131,8 +131,7 @@ inline bool MarginalMatch(float confidence, float matcher_great_threshold) { // vector (index it will go at) if not present. static int FindScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) { for (int i = 0; i < results.match.size(); i++) { - if (results.match[i].unichar_id == id) - return i; + if (results.match[i].unichar_id == id) return i; } return results.match.size(); } @@ -145,13 +144,12 @@ static float ScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) { return results.match[index].rating; } -void InitMatcherRatings(FLOAT32 *Rating); +void InitMatcherRatings(FLOAT32* Rating); -int MakeTempProtoPerm(void *item1, void *item2); +int MakeTempProtoPerm(void* item1, void* item2); void SetAdaptiveThreshold(FLOAT32 Threshold); - /*----------------------------------------------------------------------------- Public Code -----------------------------------------------------------------------------*/ @@ -183,9 +181,9 @@ namespace tesseract { * contains the detailed results of the integer matcher. * */ -void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) { +void Classify::AdaptiveClassifier(TBLOB* Blob, BLOB_CHOICE_LIST* Choices) { assert(Choices != nullptr); - ADAPT_RESULTS *Results = new ADAPT_RESULTS; + ADAPT_RESULTS* Results = new ADAPT_RESULTS; Results->Initialize(); ASSERT_HOST(AdaptedTemplates != nullptr); @@ -209,18 +207,17 @@ void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) { } #ifndef GRAPHICS_DISABLED - if (classify_enable_adaptive_debugger) - DebugAdaptiveClassifier(Blob, Results); + if (classify_enable_adaptive_debugger) DebugAdaptiveClassifier(Blob, Results); #endif delete Results; -} /* AdaptiveClassifier */ +} /* AdaptiveClassifier */ // If *win is nullptr, sets it to a new ScrollView() object with title msg. // Clears the window and draws baselines. -void Classify::RefreshDebugWindow(ScrollView **win, const char *msg, - int y_offset, const TBOX &wbox) { - #ifndef GRAPHICS_DISABLED +void Classify::RefreshDebugWindow(ScrollView** win, const char* msg, + int y_offset, const TBOX& wbox) { +#ifndef GRAPHICS_DISABLED const int kSampleSpaceWidth = 500; if (*win == nullptr) { *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200, @@ -228,13 +225,12 @@ void Classify::RefreshDebugWindow(ScrollView **win, const char *msg, } (*win)->Clear(); (*win)->Pen(64, 64, 64); - (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset, - kSampleSpaceWidth, kBlnBaselineOffset); + (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset, kSampleSpaceWidth, + kBlnBaselineOffset); (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset, kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset); - (*win)->ZoomToRectangle(wbox.left(), wbox.top(), - wbox.right(), wbox.bottom()); - #endif // GRAPHICS_DISABLED + (*win)->ZoomToRectangle(wbox.left(), wbox.top(), wbox.right(), wbox.bottom()); +#endif // GRAPHICS_DISABLED } // Learns the given word using its chopped_word, seam_array, denorm, @@ -256,14 +252,13 @@ void Classify::LearnWord(const char* fontname, WERD_RES* word) { tprintf("\n\nAdapting to word = %s\n", word->best_choice->debug_string().string()); thresholds = new float[word_len]; - word->ComputeAdaptionThresholds(certainty_scale, - matcher_perfect_threshold, + word->ComputeAdaptionThresholds(certainty_scale, matcher_perfect_threshold, matcher_good_threshold, matcher_rating_margin, thresholds); } int start_blob = 0; - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (classify_debug_character_fragments) { if (learn_fragmented_word_debug_win_ != nullptr) { window_wait(learn_fragmented_word_debug_win_); @@ -275,11 +270,11 @@ void Classify::LearnWord(const char* fontname, WERD_RES* word) { word->chopped_word->plot(learn_fragmented_word_debug_win_); ScrollView::Update(); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED for (int ch = 0; ch < word_len; ++ch) { if (classify_debug_character_fragments) { - tprintf("\nLearning %s\n", word->correct_text[ch].string()); + tprintf("\nLearning %s\n", word->correct_text[ch].string()); } if (word->correct_text[ch].length() > 0) { float threshold = thresholds != nullptr ? thresholds[ch] : 0.0f; @@ -301,22 +296,21 @@ void Classify::LearnWord(const char* fontname, WERD_RES* word) { } // Learn the fragments. if (!garbage) { - bool pieces_all_natural = word->PiecesAllNatural(start_blob, - word->best_state[ch]); + bool pieces_all_natural = + word->PiecesAllNatural(start_blob, word->best_state[ch]); if (pieces_all_natural || !prioritize_division) { for (frag = 0; frag < word->best_state[ch]; ++frag) { GenericVector tokens; word->correct_text[ch].split(' ', &tokens); - tokens[0] = CHAR_FRAGMENT::to_string( - tokens[0].string(), frag, word->best_state[ch], - pieces_all_natural); + tokens[0] = CHAR_FRAGMENT::to_string(tokens[0].string(), frag, + word->best_state[ch], + pieces_all_natural); STRING full_string; for (int i = 0; i < tokens.size(); i++) { full_string += tokens[i]; - if (i != tokens.size() - 1) - full_string += ' '; + if (i != tokens.size() - 1) full_string += ' '; } LearnPieces(fontname, start_blob + frag, 1, threshold, CST_FRAGMENT, full_string.string(), word); @@ -354,7 +348,7 @@ void Classify::LearnWord(const char* fontname, WERD_RES* word) { } start_blob += word->best_state[ch]; } - delete [] thresholds; + delete[] thresholds; } // LearnWord. // Builds a blob of length fragments, from the word, starting at start, @@ -382,10 +376,9 @@ void Classify::LearnPieces(const char* fontname, int start, int length, TBLOB* blob = word->chopped_word->blobs[start]; // Rotate the blob if needed for classification. TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded(); - if (rotated_blob == nullptr) - rotated_blob = blob; + if (rotated_blob == nullptr) rotated_blob = blob; - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED // Draw debug windows showing the blob that is being learned if needed. if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) { RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600, @@ -396,26 +389,24 @@ void Classify::LearnPieces(const char* fontname, int start, int length, } if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) { ASSERT_HOST(learn_fragments_debug_win_ != nullptr); // set up in LearnWord - blob->plot(learn_fragments_debug_win_, - ScrollView::BLUE, ScrollView::BROWN); + blob->plot(learn_fragments_debug_win_, ScrollView::BLUE, ScrollView::BROWN); learn_fragments_debug_win_->Update(); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED if (fontname != nullptr) { classify_norm_method.set_value(character); // force char norm spc 30/11/93 - tess_bn_matching.set_value(false); // turn it off + tess_bn_matching.set_value(false); // turn it off tess_cn_matching.set_value(false); DENORM bl_denorm, cn_denorm; INT_FX_RESULT_STRUCT fx_info; - SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm, - &bl_denorm, &cn_denorm, &fx_info); + SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm, &bl_denorm, + &cn_denorm, &fx_info); LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text); } else if (unicharset.contains_unichar(correct_text)) { UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text); - int font_id = word->fontinfo != nullptr - ? fontinfo_table_.get_id(*word->fontinfo) - : 0; + int font_id = + word->fontinfo != nullptr ? fontinfo_table_.get_id(*word->fontinfo) : 0; if (classify_learning_debug_level >= 1) tprintf("Adapting to char = %s, thr= %g font_id= %d\n", unicharset.id_to_unichar(class_id), threshold, font_id); @@ -456,19 +447,19 @@ void Classify::LearnPieces(const char* fontname, int start, int length, */ void Classify::EndAdaptiveClassifier() { STRING Filename; - FILE *File; + FILE* File; - if (AdaptedTemplates != nullptr && - classify_enable_adaptive_matcher && classify_save_adapted_templates) { + if (AdaptedTemplates != nullptr && classify_enable_adaptive_matcher && + classify_save_adapted_templates) { Filename = imagefile + ADAPT_TEMPLATE_SUFFIX; - File = fopen (Filename.string(), "wb"); + File = fopen(Filename.string(), "wb"); if (File == nullptr) - cprintf ("Unable to save adapted templates to %s!\n", Filename.string()); + cprintf("Unable to save adapted templates to %s!\n", Filename.string()); else { - cprintf ("\nSaving adapted templates to %s ...", Filename.string()); + cprintf("\nSaving adapted templates to %s ...", Filename.string()); fflush(stdout); WriteAdaptedTemplates(File, AdaptedTemplates); - cprintf ("\n"); + cprintf("\n"); fclose(File); } } @@ -502,8 +493,7 @@ void Classify::EndAdaptiveClassifier() { shape_table_ = nullptr; delete static_classifier_; static_classifier_ = nullptr; -} /* EndAdaptiveClassifier */ - +} /* EndAdaptiveClassifier */ /*---------------------------------------------------------------------------*/ /** @@ -524,8 +514,7 @@ void Classify::EndAdaptiveClassifier() { * @note History: Mon Mar 11 12:49:34 1991, DSJ, Created. */ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) { - if (!classify_enable_adaptive_matcher) - return; + if (!classify_enable_adaptive_matcher) return; if (AllProtosOn != nullptr) EndAdaptiveClassifier(); // Don't leak with multiple inits. @@ -565,7 +554,7 @@ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) { zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS)); for (int i = 0; i < MAX_NUM_CLASSES; i++) { - BaselineCutoffs[i] = 0; + BaselineCutoffs[i] = 0; } if (classify_use_pre_adapted_templates) { @@ -589,11 +578,10 @@ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) { } } } else { - if (AdaptedTemplates != nullptr) - free_adapted_templates(AdaptedTemplates); + if (AdaptedTemplates != nullptr) free_adapted_templates(AdaptedTemplates); AdaptedTemplates = NewAdaptedTemplates(true); } -} /* InitAdaptiveClassifier */ +} /* InitAdaptiveClassifier */ void Classify::ResetAdaptiveClassifierInternal() { if (classify_learning_debug_level > 0) { @@ -657,8 +645,7 @@ void Classify::SettupPass1() { getDict().SettupStopperPass1(); -} /* SettupPass1 */ - +} /* SettupPass1 */ /*---------------------------------------------------------------------------*/ /** @@ -676,8 +663,7 @@ void Classify::SettupPass2() { EnableLearning = FALSE; getDict().SettupStopperPass2(); -} /* SettupPass2 */ - +} /* SettupPass2 */ /*---------------------------------------------------------------------------*/ /** @@ -699,11 +685,8 @@ void Classify::SettupPass2() { * @note Exceptions: none * @note History: Thu Mar 14 12:49:39 1991, DSJ, Created. */ -void Classify::InitAdaptedClass(TBLOB *Blob, - CLASS_ID ClassId, - int FontinfoId, - ADAPT_CLASS Class, - ADAPT_TEMPLATES Templates) { +void Classify::InitAdaptedClass(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, + ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) { FEATURE_SET Features; int Fid, Pid; FEATURE Feature; @@ -728,14 +711,14 @@ void Classify::InitAdaptedClass(TBLOB *Blob, if (Templates == AdaptedTemplates) BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId]; - IClass = ClassForClassId (Templates->Templates, ClassId); + IClass = ClassForClassId(Templates->Templates, ClassId); for (Fid = 0; Fid < Features->NumFeatures; Fid++) { - Pid = AddIntProto (IClass); - assert (Pid != NO_PROTO); + Pid = AddIntProto(IClass); + assert(Pid != NO_PROTO); Feature = Features->Features[Fid]; - TempProto = NewTempProto (); + TempProto = NewTempProto(); Proto = &(TempProto->Proto); /* compute proto params - NOTE that Y_DIM_OFFSET must be used because @@ -748,30 +731,27 @@ void Classify::InitAdaptedClass(TBLOB *Blob, FillABC(Proto); TempProto->ProtoId = Pid; - SET_BIT (Config->Protos, Pid); + SET_BIT(Config->Protos, Pid); ConvertProto(Proto, Pid, IClass); AddProtoToProtoPruner(Proto, Pid, IClass, classify_learning_debug_level >= 2); - Class->TempProtos = push (Class->TempProtos, TempProto); + Class->TempProtos = push(Class->TempProtos, TempProto); } FreeFeatureSet(Features); AddIntConfig(IClass); - ConvertConfig (AllProtosOn, 0, IClass); + ConvertConfig(AllProtosOn, 0, IClass); if (classify_learning_debug_level >= 1) { tprintf("Added new class '%s' with class id %d and %d protos.\n", unicharset.id_to_unichar(ClassId), ClassId, NumFeatures); - if (classify_learning_debug_level > 1) - DisplayAdaptedChar(Blob, IClass); + if (classify_learning_debug_level > 1) DisplayAdaptedChar(Blob, IClass); } - if (IsEmptyAdaptedClass(Class)) - (Templates->NumNonEmptyClasses)++; -} /* InitAdaptedClass */ - + if (IsEmptyAdaptedClass(Class)) (Templates->NumNonEmptyClasses)++; +} /* InitAdaptedClass */ /*---------------------------------------------------------------------------*/ /** @@ -794,9 +774,8 @@ void Classify::InitAdaptedClass(TBLOB *Blob, * @note Exceptions: none * @note History: Tue Mar 12 17:55:18 1991, DSJ, Created. */ -int Classify::GetAdaptiveFeatures(TBLOB *Blob, - INT_FEATURE_ARRAY IntFeatures, - FEATURE_SET *FloatFeatures) { +int Classify::GetAdaptiveFeatures(TBLOB* Blob, INT_FEATURE_ARRAY IntFeatures, + FEATURE_SET* FloatFeatures) { FEATURE_SET Features; int NumFeatures; @@ -813,8 +792,7 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob, *FloatFeatures = Features; return NumFeatures; -} /* GetAdaptiveFeatures */ - +} /* GetAdaptiveFeatures */ /*----------------------------------------------------------------------------- Private Code @@ -836,8 +814,8 @@ bool Classify::AdaptableWord(WERD_RES* word) { if (word->best_choice == nullptr) return false; int BestChoiceLength = word->best_choice->length(); float adaptable_score = - getDict().segment_penalty_dict_case_ok + ADAPTABLE_WERD_ADJUSTMENT; - return // rules that apply in general - simplest to compute first + getDict().segment_penalty_dict_case_ok + ADAPTABLE_WERD_ADJUSTMENT; + return // rules that apply in general - simplest to compute first BestChoiceLength > 0 && BestChoiceLength == word->rebuild_word->NumBlobs() && BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && @@ -880,8 +858,7 @@ void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, FEATURE_SET FloatFeatures; int NewTempConfigId; - if (!LegalClassId (ClassId)) - return; + if (!LegalClassId(ClassId)) return; int_result.unichar_id = ClassId; Class = adaptive_templates->Class[ClassId]; @@ -905,9 +882,8 @@ void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, reset_bit(MatchingFontConfigs, cfg); } } - im_.Match(IClass, AllProtosOn, MatchingFontConfigs, - NumFeatures, IntFeatures, - &int_result, classify_adapt_feature_threshold, + im_.Match(IClass, AllProtosOn, MatchingFontConfigs, NumFeatures, + IntFeatures, &int_result, classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows); FreeBitVector(MatchingFontConfigs); @@ -939,8 +915,7 @@ void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, if (classify_learning_debug_level >= 1) { tprintf("Found poor match to temp config %d = %4.1f%%.\n", int_result.config, int_result.rating * 100.0); - if (classify_learning_debug_level > 2) - DisplayAdaptedChar(Blob, IClass); + if (classify_learning_debug_level > 2) DisplayAdaptedChar(Blob, IClass); } NewTempConfigId = MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId, @@ -959,32 +934,30 @@ void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, } FreeFeatureSet(FloatFeatures); } -} /* AdaptToChar */ +} /* AdaptToChar */ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { #ifndef GRAPHICS_DISABLED INT_FX_RESULT_STRUCT fx_info; GenericVector bl_features; - TrainingSample* sample = - BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info, - &bl_features); + TrainingSample* sample = BlobToTrainingSample(*blob, classify_nonlinear_norm, + &fx_info, &bl_features); if (sample == nullptr) return; UnicharRating int_result; - im_.Match(int_class, AllProtosOn, AllConfigsOn, - bl_features.size(), &bl_features[0], - &int_result, classify_adapt_feature_threshold, + im_.Match(int_class, AllProtosOn, AllConfigsOn, bl_features.size(), + &bl_features[0], &int_result, classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows); - tprintf("Best match to temp config %d = %4.1f%%.\n", - int_result.config, int_result.rating * 100.0); + tprintf("Best match to temp config %d = %4.1f%%.\n", int_result.config, + int_result.rating * 100.0); if (classify_learning_debug_level >= 2) { uint32_t ConfigMask; ConfigMask = 1 << int_result.config; ShowMatchDisplay(); im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask, - bl_features.size(), &bl_features[0], - &int_result, classify_adapt_feature_threshold, - 6 | 0x19, matcher_debug_separate_windows); + bl_features.size(), &bl_features[0], &int_result, + classify_adapt_feature_threshold, 6 | 0x19, + matcher_debug_separate_windows); UpdateMatchDisplay(); } @@ -1013,7 +986,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { * @note History: Tue Mar 12 18:19:29 1991, DSJ, Created. */ void Classify::AddNewResult(const UnicharRating& new_result, - ADAPT_RESULTS *results) { + ADAPT_RESULTS* results) { int old_match = FindScoredUnichar(new_result.unichar_id, *results); if (new_result.rating + matcher_bad_match_pad < results->best_rating || @@ -1041,8 +1014,7 @@ void Classify::AddNewResult(const UnicharRating& new_result, results->best_rating = new_result.rating; results->best_unichar_id = new_result.unichar_id; } -} /* AddNewResult */ - +} /* AddNewResult */ /*---------------------------------------------------------------------------*/ /** @@ -1068,21 +1040,17 @@ void Classify::AddNewResult(const UnicharRating& new_result, */ void Classify::AmbigClassifier( const GenericVector& int_features, - const INT_FX_RESULT_STRUCT& fx_info, - const TBLOB *blob, - INT_TEMPLATES templates, - ADAPT_CLASS *classes, - UNICHAR_ID *ambiguities, - ADAPT_RESULTS *results) { + const INT_FX_RESULT_STRUCT& fx_info, const TBLOB* blob, + INT_TEMPLATES templates, ADAPT_CLASS* classes, UNICHAR_ID* ambiguities, + ADAPT_RESULTS* results) { if (int_features.empty()) return; uint8_t* CharNormArray = new uint8_t[unicharset.size()]; UnicharRating int_result; - results->BlobLength = GetCharNormFeature(fx_info, templates, nullptr, - CharNormArray); + results->BlobLength = + GetCharNormFeature(fx_info, templates, nullptr, CharNormArray); bool debug = matcher_debug_level >= 2 || classify_debug_level > 1; - if (debug) - tprintf("AM Matches = "); + if (debug) tprintf("AM Matches = "); int top = blob->bounding_box().top(); int bottom = blob->bounding_box().bottom(); @@ -1090,10 +1058,8 @@ void Classify::AmbigClassifier( CLASS_ID class_id = *ambiguities; int_result.unichar_id = class_id; - im_.Match(ClassForClassId(templates, class_id), - AllProtosOn, AllConfigsOn, - int_features.size(), &int_features[0], - &int_result, + im_.Match(ClassForClassId(templates, class_id), AllProtosOn, AllConfigsOn, + int_features.size(), &int_features[0], &int_result, classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows); @@ -1103,19 +1069,16 @@ void Classify::AmbigClassifier( CharNormArray, &int_result, results); ambiguities++; } - delete [] CharNormArray; -} /* AmbigClassifier */ + delete[] CharNormArray; +} /* AmbigClassifier */ /*---------------------------------------------------------------------------*/ /// Factored-out calls to IntegerMatcher based on class pruner results. /// Returns integer matcher results inside CLASS_PRUNER_RESULTS structure. -void Classify::MasterMatcher(INT_TEMPLATES templates, - int16_t num_features, +void Classify::MasterMatcher(INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT* features, - const uint8_t* norm_factors, - ADAPT_CLASS* classes, - int debug, - int matcher_multiplier, + const uint8_t* norm_factors, ADAPT_CLASS* classes, + int debug, int matcher_multiplier, const TBOX& blob_box, const GenericVector& results, ADAPT_RESULTS* final_results) { @@ -1124,23 +1087,21 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, UnicharRating int_result; for (int c = 0; c < results.size(); c++) { CLASS_ID class_id = results[c].Class; - BIT_VECTOR protos = classes != nullptr ? classes[class_id]->PermProtos - : AllProtosOn; - BIT_VECTOR configs = classes != nullptr ? classes[class_id]->PermConfigs - : AllConfigsOn; + BIT_VECTOR protos = + classes != nullptr ? classes[class_id]->PermProtos : AllProtosOn; + BIT_VECTOR configs = + classes != nullptr ? classes[class_id]->PermConfigs : AllConfigsOn; int_result.unichar_id = class_id; - im_.Match(ClassForClassId(templates, class_id), - protos, configs, - num_features, features, - &int_result, classify_adapt_feature_threshold, debug, + im_.Match(ClassForClassId(templates, class_id), protos, configs, + num_features, features, &int_result, + classify_adapt_feature_threshold, debug, matcher_debug_separate_windows); bool debug = matcher_debug_level >= 2 || classify_debug_level > 1; - ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top, - results[c].Rating, - final_results->BlobLength, - matcher_multiplier, norm_factors, - &int_result, final_results); + ExpandShapesAndApplyCorrections( + classes, debug, class_id, bottom, top, results[c].Rating, + final_results->BlobLength, matcher_multiplier, norm_factors, + &int_result, final_results); } } @@ -1152,8 +1113,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, void Classify::ExpandShapesAndApplyCorrections( ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, - const uint8_t* cn_factors, - UnicharRating* int_result, ADAPT_RESULTS* final_results) { + const uint8_t* cn_factors, UnicharRating* int_result, + ADAPT_RESULTS* final_results) { if (classes != nullptr) { // Adapted result. Convert configs to fontinfo_ids. int_result->adapted = true; @@ -1165,9 +1126,8 @@ void Classify::ExpandShapesAndApplyCorrections( // Pre-trained result. Map fonts using font_sets_. int_result->adapted = false; for (int f = 0; f < int_result->fonts.size(); ++f) { - int_result->fonts[f].fontinfo_id = - ClassAndConfigIDToFontOrShapeID(class_id, - int_result->fonts[f].fontinfo_id); + int_result->fonts[f].fontinfo_id = ClassAndConfigIDToFontOrShapeID( + class_id, int_result->fonts[f].fontinfo_id); } if (shape_table_ != nullptr) { // Two possible cases: @@ -1187,7 +1147,9 @@ void Classify::ExpandShapesAndApplyCorrections( // Find the mapped_result for unichar_id. int r = 0; for (r = 0; r < mapped_results.size() && - mapped_results[r].unichar_id != unichar_id; ++r) {} + mapped_results[r].unichar_id != unichar_id; + ++r) { + } if (r == mapped_results.size()) { mapped_results.push_back(*int_result); mapped_results[r].unichar_id = unichar_id; @@ -1200,22 +1162,20 @@ void Classify::ExpandShapesAndApplyCorrections( } } for (int m = 0; m < mapped_results.size(); ++m) { - mapped_results[m].rating = - ComputeCorrectedRating(debug, mapped_results[m].unichar_id, - cp_rating, int_result->rating, - int_result->feature_misses, bottom, top, - blob_length, matcher_multiplier, cn_factors); + mapped_results[m].rating = ComputeCorrectedRating( + debug, mapped_results[m].unichar_id, cp_rating, int_result->rating, + int_result->feature_misses, bottom, top, blob_length, + matcher_multiplier, cn_factors); AddNewResult(mapped_results[m], final_results); } return; } } if (unicharset.get_enabled(class_id)) { - int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating, - int_result->rating, - int_result->feature_misses, - bottom, top, blob_length, - matcher_multiplier, cn_factors); + int_result->rating = + ComputeCorrectedRating(debug, class_id, cp_rating, int_result->rating, + int_result->feature_misses, bottom, top, + blob_length, matcher_multiplier, cn_factors); AddNewResult(*int_result, final_results); } } @@ -1225,45 +1185,38 @@ void Classify::ExpandShapesAndApplyCorrections( // for non-alnums being vertical misfits. Returns the corrected confidence. double Classify::ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, - int feature_misses, - int bottom, int top, + int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t* cn_factors) { // Compute class feature corrections. - double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length, - cn_factors[unichar_id], - matcher_multiplier); + double cn_corrected = im_.ApplyCNCorrection( + 1.0 - im_rating, blob_length, cn_factors[unichar_id], matcher_multiplier); double miss_penalty = tessedit_class_miss_scale * feature_misses; double vertical_penalty = 0.0; // Penalize non-alnums for being vertical misfits. if (!unicharset.get_isalpha(unichar_id) && - !unicharset.get_isdigit(unichar_id) && - cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) { + !unicharset.get_isdigit(unichar_id) && cn_factors[unichar_id] != 0 && + classify_misfit_junk_penalty > 0.0) { int min_bottom, max_bottom, min_top, max_top; - unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, - &min_top, &max_top); + unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, + &max_top); if (debug) { - tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n", - top, min_top, max_top, bottom, min_bottom, max_bottom); + tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n", top, min_top, + max_top, bottom, min_bottom, max_bottom); } - if (top < min_top || top > max_top || - bottom < min_bottom || bottom > max_bottom) { + if (top < min_top || top > max_top || bottom < min_bottom || + bottom > max_bottom) { vertical_penalty = classify_misfit_junk_penalty; } } double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty); - if (result < WORST_POSSIBLE_RATING) - result = WORST_POSSIBLE_RATING; + if (result < WORST_POSSIBLE_RATING) result = WORST_POSSIBLE_RATING; if (debug) { tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n", - unicharset.id_to_unichar(unichar_id), - result * 100.0, - cp_rating * 100.0, - (1.0 - im_rating) * 100.0, - (cn_corrected - (1.0 - im_rating)) * 100.0, - cn_factors[unichar_id], - miss_penalty * 100.0, - vertical_penalty * 100.0); + unicharset.id_to_unichar(unichar_id), result * 100.0, + cp_rating * 100.0, (1.0 - im_rating) * 100.0, + (cn_corrected - (1.0 - im_rating)) * 100.0, cn_factors[unichar_id], + miss_penalty * 100.0, vertical_penalty * 100.0); } return result; } @@ -1288,10 +1241,10 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id, * @note Exceptions: none * @note History: Tue Mar 12 19:38:03 1991, DSJ, Created. */ -UNICHAR_ID *Classify::BaselineClassifier( - TBLOB *Blob, const GenericVector& int_features, - const INT_FX_RESULT_STRUCT& fx_info, - ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) { +UNICHAR_ID* Classify::BaselineClassifier( + TBLOB* Blob, const GenericVector& int_features, + const INT_FX_RESULT_STRUCT& fx_info, ADAPT_TEMPLATES Templates, + ADAPT_RESULTS* Results) { if (int_features.empty()) return nullptr; uint8_t* CharNormArray = new uint8_t[unicharset.size()]; ClearCharNormArray(CharNormArray); @@ -1304,19 +1257,18 @@ UNICHAR_ID *Classify::BaselineClassifier( tprintf("BL Matches = "); MasterMatcher(Templates->Templates, int_features.size(), &int_features[0], - CharNormArray, - Templates->Class, matcher_debug_flags, 0, + CharNormArray, Templates->Class, matcher_debug_flags, 0, Blob->bounding_box(), Results->CPResults, Results); - delete [] CharNormArray; + delete[] CharNormArray; CLASS_ID ClassId = Results->best_unichar_id; if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0) return nullptr; - return Templates->Class[ClassId]-> - Config[Results->match[Results->best_match_index].config].Perm->Ambigs; -} /* BaselineClassifier */ - + return Templates->Class[ClassId] + ->Config[Results->match[Results->best_match_index].config] + .Perm->Ambigs; +} /* BaselineClassifier */ /*---------------------------------------------------------------------------*/ /** @@ -1337,26 +1289,24 @@ UNICHAR_ID *Classify::BaselineClassifier( * @note Exceptions: none * @note History: Tue Mar 12 16:02:52 1991, DSJ, Created. */ -int Classify::CharNormClassifier(TBLOB *blob, - const TrainingSample& sample, - ADAPT_RESULTS *adapt_results) { +int Classify::CharNormClassifier(TBLOB* blob, const TrainingSample& sample, + ADAPT_RESULTS* adapt_results) { // This is the length that is used for scaling ratings vs certainty. adapt_results->BlobLength = IntCastRounded(sample.outline_length() / kStandardFeatureLength); GenericVector unichar_results; - static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0, - -1, &unichar_results); + static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0, -1, + &unichar_results); // Convert results to the format used internally by AdaptiveClassifier. for (int r = 0; r < unichar_results.size(); ++r) { AddNewResult(unichar_results[r], adapt_results); } return sample.num_features(); -} /* CharNormClassifier */ +} /* CharNormClassifier */ // As CharNormClassifier, but operates on a TrainingSample and outputs to // a GenericVector of ShapeRating without conversion to classes. -int Classify::CharNormTrainingSample(bool pruner_only, - int keep_this, +int Classify::CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample& sample, GenericVector* results) { results->clear(); @@ -1371,19 +1321,20 @@ int Classify::CharNormTrainingSample(bool pruner_only, // Compute the char_norm_array from the saved cn_feature. FEATURE norm_feature = sample.GetCNFeature(); uint8_t* char_norm_array = new uint8_t[unicharset.size()]; - int num_pruner_classes = std::max(unicharset.size(), - PreTrainedTemplates->NumClasses); + int num_pruner_classes = + std::max(unicharset.size(), PreTrainedTemplates->NumClasses); uint8_t* pruner_norm_array = new uint8_t[num_pruner_classes]; adapt_results->BlobLength = static_cast(ActualOutlineLength(norm_feature) * 20 + 0.5); ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array, pruner_norm_array); - PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(), - pruner_norm_array, - shape_table_ != nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs, - &adapt_results->CPResults); - delete [] pruner_norm_array; + PruneClasses( + PreTrainedTemplates, num_features, keep_this, sample.features(), + pruner_norm_array, + shape_table_ != nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs, + &adapt_results->CPResults); + delete[] pruner_norm_array; if (keep_this >= 0) { adapt_results->CPResults[0].Class = keep_this; adapt_results->CPResults.truncate(1); @@ -1397,21 +1348,19 @@ int Classify::CharNormTrainingSample(bool pruner_only, } } else { MasterMatcher(PreTrainedTemplates, num_features, sample.features(), - char_norm_array, - nullptr, matcher_debug_flags, - classify_integer_matcher_multiplier, - blob_box, adapt_results->CPResults, adapt_results); + char_norm_array, nullptr, matcher_debug_flags, + classify_integer_matcher_multiplier, blob_box, + adapt_results->CPResults, adapt_results); // Convert master matcher results to output format. for (int i = 0; i < adapt_results->match.size(); i++) { results->push_back(adapt_results->match[i]); } results->sort(&UnicharRating::SortDescendingRating); } - delete [] char_norm_array; + delete[] char_norm_array; delete adapt_results; return num_features; -} /* CharNormTrainingSample */ - +} /* CharNormTrainingSample */ /*---------------------------------------------------------------------------*/ /** @@ -1428,13 +1377,13 @@ int Classify::CharNormTrainingSample(bool pruner_only, * @note Exceptions: none * @note History: Tue Mar 12 18:36:52 1991, DSJ, Created. */ -void Classify::ClassifyAsNoise(ADAPT_RESULTS *results) { +void Classify::ClassifyAsNoise(ADAPT_RESULTS* results) { float rating = results->BlobLength / matcher_avg_noise_size; rating *= rating; rating /= 1.0 + rating; AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results); -} /* ClassifyAsNoise */ +} /* ClassifyAsNoise */ /// The function converts the given match ratings to the list of blob /// choices with ratings and certainties (used by the context checkers). @@ -1443,8 +1392,8 @@ void Classify::ClassifyAsNoise(ADAPT_RESULTS *results) { /// For each classification result check the unicharset for "definite" /// ambiguities and modify the resulting Choices accordingly. void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, - ADAPT_RESULTS *Results, - BLOB_CHOICE_LIST *Choices) { + ADAPT_RESULTS* Results, + BLOB_CHOICE_LIST* Choices) { assert(Choices != nullptr); FLOAT32 Rating; FLOAT32 Certainty; @@ -1460,17 +1409,17 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, int max_matches = MAX_MATCHES; if (shape_table_ != nullptr) { max_matches = shape_table_->MaxNumUnichars() * 2; - if (max_matches < MAX_MATCHES) - max_matches = MAX_MATCHES; + if (max_matches < MAX_MATCHES) max_matches = MAX_MATCHES; } float best_certainty = -MAX_FLOAT32; for (int i = 0; i < Results->match.size(); i++) { const UnicharRating& result = Results->match[i]; bool adapted = result.adapted; - bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != nullptr); - if (temp_it.length()+1 == max_matches && - !contains_nonfrag && current_is_frag) { + bool current_is_frag = + (unicharset.get_fragment(result.unichar_id) != nullptr); + if (temp_it.length() + 1 == max_matches && !contains_nonfrag && + current_is_frag) { continue; // look for a non-fragmented character to fill the // last spot in Choices if only fragments are present } @@ -1480,7 +1429,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, // So we need to assign a poor, but not infinitely bad score. if (Results->BlobLength == 0) { Certainty = -20; - Rating = 100; // should be -certainty * real_blob_length + Rating = 100; // should be -certainty * real_blob_length } else { Rating = Certainty = (1.0f - result.rating); Rating *= rating_scale * Results->BlobLength; @@ -1492,21 +1441,20 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, // whether adapted or static. // TODO(rays) find some way of automatically tuning these constants. if (Certainty > best_certainty) { - best_certainty = std::min(Certainty, static_cast(classify_adapted_pruning_threshold)); + best_certainty = std::min( + Certainty, static_cast(classify_adapted_pruning_threshold)); } else if (adapted && Certainty / classify_adapted_pruning_factor < best_certainty) { continue; // Don't accept bad adapted results. } float min_xheight, max_xheight, yshift; - denorm.XHeightRange(result.unichar_id, unicharset, box, - &min_xheight, &max_xheight, &yshift); - BLOB_CHOICE* choice = - new BLOB_CHOICE(result.unichar_id, Rating, Certainty, - unicharset.get_script(result.unichar_id), - min_xheight, max_xheight, yshift, - adapted ? BCC_ADAPTED_CLASSIFIER - : BCC_STATIC_CLASSIFIER); + denorm.XHeightRange(result.unichar_id, unicharset, box, &min_xheight, + &max_xheight, &yshift); + BLOB_CHOICE* choice = new BLOB_CHOICE( + result.unichar_id, Rating, Certainty, + unicharset.get_script(result.unichar_id), min_xheight, max_xheight, + yshift, adapted ? BCC_ADAPTED_CLASSIFIER : BCC_STATIC_CLASSIFIER); choice->set_fonts(result.fonts); temp_it.add_to_end(choice); contains_nonfrag |= !current_is_frag; // update contains_nonfrag @@ -1516,7 +1464,6 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, Results->match.truncate(choices_length); } // ConvertMatchesToChoices - /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED /** @@ -1529,8 +1476,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, * @note Exceptions: none * @note History: Wed Mar 13 16:44:41 1991, DSJ, Created. */ -void Classify::DebugAdaptiveClassifier(TBLOB *blob, - ADAPT_RESULTS *Results) { +void Classify::DebugAdaptiveClassifier(TBLOB* blob, ADAPT_RESULTS* Results) { if (static_classifier_ == nullptr) return; INT_FX_RESULT_STRUCT fx_info; GenericVector bl_features; @@ -1539,7 +1485,7 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob, if (sample == nullptr) return; static_classifier_->DebugDisplay(*sample, blob->denorm().pix(), Results->best_unichar_id); -} /* DebugAdaptiveClassifier */ +} /* DebugAdaptiveClassifier */ #endif /*---------------------------------------------------------------------------*/ @@ -1565,14 +1511,13 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob, * @note Exceptions: none * @note History: Tue Mar 12 08:50:11 1991, DSJ, Created. */ -void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { - UNICHAR_ID *Ambiguities; +void Classify::DoAdaptiveMatch(TBLOB* Blob, ADAPT_RESULTS* Results) { + UNICHAR_ID* Ambiguities; INT_FX_RESULT_STRUCT fx_info; GenericVector bl_features; - TrainingSample* sample = - BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, - &bl_features); + TrainingSample* sample = BlobToTrainingSample(*Blob, classify_nonlinear_norm, + &fx_info, &bl_features); if (sample == nullptr) return; if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min || @@ -1588,11 +1533,8 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { Results->match.empty()) { CharNormClassifier(Blob, *sample, Results); } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) { - AmbigClassifier(bl_features, fx_info, Blob, - PreTrainedTemplates, - AdaptedTemplates->Class, - Ambiguities, - Results); + AmbigClassifier(bl_features, fx_info, Blob, PreTrainedTemplates, + AdaptedTemplates->Class, Ambiguities, Results); } } @@ -1603,7 +1545,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { if (!Results->HasNonfragment || Results->match.empty()) ClassifyAsNoise(Results); delete sample; -} /* DoAdaptiveMatch */ +} /* DoAdaptiveMatch */ /*---------------------------------------------------------------------------*/ /** @@ -1622,18 +1564,16 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { * @note Exceptions: none * @note History: Fri Mar 15 08:08:22 1991, DSJ, Created. */ -UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, - CLASS_ID CorrectClass) { - ADAPT_RESULTS *Results = new ADAPT_RESULTS(); - UNICHAR_ID *Ambiguities; +UNICHAR_ID* Classify::GetAmbiguities(TBLOB* Blob, CLASS_ID CorrectClass) { + ADAPT_RESULTS* Results = new ADAPT_RESULTS(); + UNICHAR_ID* Ambiguities; int i; Results->Initialize(); INT_FX_RESULT_STRUCT fx_info; GenericVector bl_features; - TrainingSample* sample = - BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, - &bl_features); + TrainingSample* sample = BlobToTrainingSample(*Blob, classify_nonlinear_norm, + &fx_info, &bl_features); if (sample == nullptr) { delete Results; return nullptr; @@ -1649,7 +1589,7 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, Ambiguities = new UNICHAR_ID[Results->match.size() + 1]; if (Results->match.size() > 1 || (Results->match.size() == 1 && - Results->match[0].unichar_id != CorrectClass)) { + Results->match[0].unichar_id != CorrectClass)) { for (i = 0; i < Results->match.size(); i++) Ambiguities[i] = Results->match[i].unichar_id; Ambiguities[i] = -1; @@ -1659,15 +1599,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, delete Results; return Ambiguities; -} /* GetAmbiguities */ +} /* GetAmbiguities */ // Returns true if the given blob looks too dissimilar to any character // present in the classifier templates. -bool Classify::LooksLikeGarbage(TBLOB *blob) { - BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); +bool Classify::LooksLikeGarbage(TBLOB* blob) { + BLOB_CHOICE_LIST* ratings = new BLOB_CHOICE_LIST(); AdaptiveClassifier(blob, ratings); BLOB_CHOICE_IT ratings_it(ratings); - const UNICHARSET &unicharset = getDict().getUnicharset(); + const UNICHARSET& unicharset = getDict().getUnicharset(); if (classify_debug_character_fragments) { print_ratings_list("======================\nLooksLikeGarbage() got ", ratings, unicharset); @@ -1679,8 +1619,7 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) { } float certainty = ratings_it.data()->certainty(); delete ratings; - return certainty < - classify_character_fragments_garbage_certainty_threshold; + return certainty < classify_character_fragments_garbage_certainty_threshold; } delete ratings; return true; // no whole characters in ratings @@ -1726,7 +1665,7 @@ int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, ComputeCharNormArrays(norm_feature, templates, char_norm_array, pruner_norm_array); return IntCastRounded(fx_info.Length / kStandardFeatureLength); -} /* GetCharNormFeature */ +} /* GetCharNormFeature */ // Computes the char_norm_array for the unicharset and, if not nullptr, the // pruner_array as appropriate according to the existence of the shape_table. @@ -1745,7 +1684,7 @@ void Classify::ComputeCharNormArrays(FEATURE_STRUCT* norm_feature, // the corresponding unichars in the CharNormArray. for (int id = 0; id < templates->NumClasses; ++id) { int font_set_id = templates->Class[id]->font_set_id; - const FontSet &fs = fontset_table_.get(font_set_id); + const FontSet& fs = fontset_table_.get(font_set_id); for (int config = 0; config < fs.size; ++config) { const Shape& shape = shape_table_->GetShape(fs.configs[config]); for (int c = 0; c < shape.size(); ++c) { @@ -1775,11 +1714,10 @@ void Classify::ComputeCharNormArrays(FEATURE_STRUCT* norm_feature, * @note History: Fri Mar 15 08:49:46 1991, DSJ, Created. */ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int FontinfoId, - int NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_SET FloatFeatures) { + CLASS_ID ClassId, int FontinfoId, + int NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_SET FloatFeatures) { INT_CLASS IClass; ADAPT_CLASS Class; PROTO_ID OldProtos[MAX_NUM_PROTOS]; @@ -1810,21 +1748,17 @@ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, OldMaxProtoId = IClass->NumProtos - 1; - NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff, - BlobLength, NumFeatures, Features, - OldProtos, classify_adapt_proto_threshold, - debug_level); + NumOldProtos = im_.FindGoodProtos( + IClass, AllProtosOn, AllConfigsOff, BlobLength, NumFeatures, Features, + OldProtos, classify_adapt_proto_threshold, debug_level); MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS); zero_all_bits(TempProtoMask, MaskSize); - for (i = 0; i < NumOldProtos; i++) - SET_BIT(TempProtoMask, OldProtos[i]); + for (i = 0; i < NumOldProtos; i++) SET_BIT(TempProtoMask, OldProtos[i]); - NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn, - BlobLength, NumFeatures, Features, - BadFeatures, - classify_adapt_feature_threshold, - debug_level); + NumBadFeatures = im_.FindBadFeatures( + IClass, TempProtoMask, AllConfigsOn, BlobLength, NumFeatures, Features, + BadFeatures, classify_adapt_feature_threshold, debug_level); MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures, IClass, Class, TempProtoMask); @@ -1842,13 +1776,13 @@ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize); if (classify_learning_debug_level >= 1) - cprintf("Making new temp config %d fontinfo id %d" - " using %d old and %d new protos.\n", - ConfigId, Config->FontinfoId, - NumOldProtos, MaxProtoId - OldMaxProtoId); + cprintf( + "Making new temp config %d fontinfo id %d" + " using %d old and %d new protos.\n", + ConfigId, Config->FontinfoId, NumOldProtos, MaxProtoId - OldMaxProtoId); return ConfigId; -} /* MakeNewTemporaryConfig */ +} /* MakeNewTemporaryConfig */ /*---------------------------------------------------------------------------*/ /** @@ -1871,15 +1805,13 @@ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, * Exceptions: none * History: Fri Mar 15 11:39:38 1991, DSJ, Created. */ -PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, - int NumBadFeat, - FEATURE_ID BadFeat[], - INT_CLASS IClass, - ADAPT_CLASS Class, - BIT_VECTOR TempProtoMask) { - FEATURE_ID *ProtoStart; - FEATURE_ID *ProtoEnd; - FEATURE_ID *LastBad; +PROTO_ID +Classify::MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, + FEATURE_ID BadFeat[], INT_CLASS IClass, + ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) { + FEATURE_ID* ProtoStart; + FEATURE_ID* ProtoEnd; + FEATURE_ID* LastBad; TEMP_PROTO TempProto; PROTO Proto; FEATURE F1, F2; @@ -1895,8 +1827,7 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, Y1 = F1->Params[PicoFeatY]; A1 = F1->Params[PicoFeatDir]; - for (ProtoEnd = ProtoStart + 1, - SegmentLength = GetPicoFeatureLength(); + for (ProtoEnd = ProtoStart + 1, SegmentLength = GetPicoFeatureLength(); ProtoEnd < LastBad; ProtoEnd++, SegmentLength += GetPicoFeatureLength()) { F2 = Features->Features[*ProtoEnd]; @@ -1905,12 +1836,10 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, A2 = F2->Params[PicoFeatDir]; AngleDelta = fabs(A1 - A2); - if (AngleDelta > 0.5) - AngleDelta = 1.0 - AngleDelta; + if (AngleDelta > 0.5) AngleDelta = 1.0 - AngleDelta; if (AngleDelta > matcher_clustering_max_angle_delta || - fabs(X1 - X2) > SegmentLength || - fabs(Y1 - Y2) > SegmentLength) + fabs(X1 - X2) > SegmentLength || fabs(Y1 - Y2) > SegmentLength) break; } @@ -1920,8 +1849,7 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, A2 = F2->Params[PicoFeatDir]; Pid = AddIntProto(IClass); - if (Pid == NO_PROTO) - return (NO_PROTO); + if (Pid == NO_PROTO) return (NO_PROTO); TempProto = NewTempProto(); Proto = &(TempProto->Proto); @@ -1945,7 +1873,7 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, Class->TempProtos = push(Class->TempProtos, TempProto); } return IClass->NumProtos - 1; -} /* MakeNewTempProtos */ +} /* MakeNewTempProtos */ /*---------------------------------------------------------------------------*/ /** @@ -1960,11 +1888,9 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, * @note Exceptions: none * @note History: Thu Mar 14 15:54:08 1991, DSJ, Created. */ -void Classify::MakePermanent(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int ConfigId, - TBLOB *Blob) { - UNICHAR_ID *Ambigs; +void Classify::MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, + int ConfigId, TBLOB* Blob) { + UNICHAR_ID* Ambigs; TEMP_CONFIG Config; ADAPT_CLASS Class; PROTO_KEY ProtoKey; @@ -1973,8 +1899,7 @@ void Classify::MakePermanent(ADAPT_TEMPLATES Templates, Config = TempConfigFor(Class, ConfigId); MakeConfigPermanent(Class, ConfigId); - if (Class->NumPermConfigs == 0) - Templates->NumPermClasses++; + if (Class->NumPermConfigs == 0) Templates->NumPermClasses++; Class->NumPermConfigs++; // Initialize permanent config. @@ -1995,16 +1920,17 @@ void Classify::MakePermanent(ADAPT_TEMPLATES Templates, PermConfigFor(Class, ConfigId) = Perm; if (classify_learning_debug_level >= 1) { - tprintf("Making config %d for %s (ClassId %d) permanent:" - " fontinfo id %d, ambiguities '", - ConfigId, getDict().getUnicharset().debug_str(ClassId).string(), - ClassId, PermConfigFor(Class, ConfigId)->FontinfoId); - for (UNICHAR_ID *AmbigsPointer = Ambigs; - *AmbigsPointer >= 0; ++AmbigsPointer) + tprintf( + "Making config %d for %s (ClassId %d) permanent:" + " fontinfo id %d, ambiguities '", + ConfigId, getDict().getUnicharset().debug_str(ClassId).string(), + ClassId, PermConfigFor(Class, ConfigId)->FontinfoId); + for (UNICHAR_ID* AmbigsPointer = Ambigs; *AmbigsPointer >= 0; + ++AmbigsPointer) tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer)); tprintf("'.\n"); } -} /* MakePermanent */ +} /* MakePermanent */ } // namespace tesseract /*---------------------------------------------------------------------------*/ @@ -2022,29 +1948,29 @@ void Classify::MakePermanent(ADAPT_TEMPLATES Templates, * @note Exceptions: none * @note History: Thu Mar 14 18:49:54 1991, DSJ, Created. */ -int MakeTempProtoPerm(void *item1, void *item2) { +int MakeTempProtoPerm(void* item1, void* item2) { ADAPT_CLASS Class; TEMP_CONFIG Config; TEMP_PROTO TempProto; - PROTO_KEY *ProtoKey; + PROTO_KEY* ProtoKey; - TempProto = (TEMP_PROTO) item1; - ProtoKey = (PROTO_KEY *) item2; + TempProto = (TEMP_PROTO)item1; + ProtoKey = (PROTO_KEY*)item2; Class = ProtoKey->Templates->Class[ProtoKey->ClassId]; Config = TempConfigFor(Class, ProtoKey->ConfigId); if (TempProto->ProtoId > Config->MaxProtoId || - !test_bit (Config->Protos, TempProto->ProtoId)) + !test_bit(Config->Protos, TempProto->ProtoId)) return FALSE; MakeProtoPermanent(Class, TempProto->ProtoId); AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId, - ProtoKey->Templates->Templates); + ProtoKey->Templates->Templates); FreeTempProto(TempProto); return TRUE; -} /* MakeTempProtoPerm */ +} /* MakeTempProtoPerm */ /*---------------------------------------------------------------------------*/ namespace tesseract { @@ -2063,7 +1989,7 @@ void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS& results) { tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).string()); results.match[i].Print(); } -} /* PrintAdaptiveMatchResults */ +} /* PrintAdaptiveMatchResults */ /*---------------------------------------------------------------------------*/ /** @@ -2081,17 +2007,17 @@ void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS& results) { * @note Exceptions: none * @note History: Tue Mar 12 13:51:03 1991, DSJ, Created. */ -void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) { +void Classify::RemoveBadMatches(ADAPT_RESULTS* Results) { int Next, NextGood; FLOAT32 BadMatchThreshold; static const char* romans = "i v x I V X"; BadMatchThreshold = Results->best_rating - matcher_bad_match_pad; if (classify_bln_numeric_mode) { - UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ? - unicharset.unichar_to_id("1") : -1; - UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ? - unicharset.unichar_to_id("0") : -1; + UNICHAR_ID unichar_id_one = + unicharset.contains_unichar("1") ? unicharset.unichar_to_id("1") : -1; + UNICHAR_ID unichar_id_zero = + unicharset.contains_unichar("0") ? unicharset.unichar_to_id("0") : -1; float scored_one = ScoredUnichar(unichar_id_one, *Results); float scored_zero = ScoredUnichar(unichar_id_zero, *Results); @@ -2099,8 +2025,8 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) { const UnicharRating& match = Results->match[Next]; if (match.rating >= BadMatchThreshold) { if (!unicharset.get_isalpha(match.unichar_id) || - strstr(romans, - unicharset.id_to_unichar(match.unichar_id)) != nullptr) { + strstr(romans, unicharset.id_to_unichar(match.unichar_id)) != + nullptr) { } else if (unicharset.eq(match.unichar_id, "l") && scored_one < BadMatchThreshold) { Results->match[Next].unichar_id = unichar_id_one; @@ -2131,7 +2057,7 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) { } } Results->match.truncate(NextGood); -} /* RemoveBadMatches */ +} /* RemoveBadMatches */ /*----------------------------------------------------------------------------*/ /** @@ -2143,9 +2069,9 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) { * * @note History: Tue Mar 12 13:51:03 1991, DSJ, Created. */ -void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { +void Classify::RemoveExtraPuncs(ADAPT_RESULTS* Results) { int Next, NextGood; - int punc_count; /*no of garbage characters */ + int punc_count; /*no of garbage characters */ int digit_count; /*garbage characters */ static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^"; @@ -2156,16 +2082,14 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { for (Next = NextGood = 0; Next < Results->match.size(); Next++) { const UnicharRating& match = Results->match[Next]; bool keep = true; - if (strstr(punc_chars, - unicharset.id_to_unichar(match.unichar_id)) != nullptr) { - if (punc_count >= 2) - keep = false; + if (strstr(punc_chars, unicharset.id_to_unichar(match.unichar_id)) != + nullptr) { + if (punc_count >= 2) keep = false; punc_count++; } else { - if (strstr(digit_chars, - unicharset.id_to_unichar(match.unichar_id)) != nullptr) { - if (digit_count >= 1) - keep = false; + if (strstr(digit_chars, unicharset.id_to_unichar(match.unichar_id)) != + nullptr) { + if (digit_count >= 1) keep = false; digit_count++; } } @@ -2178,7 +2102,7 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { } } Results->match.truncate(NextGood); -} /* RemoveExtraPuncs */ +} /* RemoveExtraPuncs */ /*---------------------------------------------------------------------------*/ /** @@ -2195,12 +2119,12 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { * @note History: Tue Apr 9 08:33:13 1991, DSJ, Created. */ void Classify::SetAdaptiveThreshold(FLOAT32 Threshold) { - Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold); + Threshold = (Threshold == matcher_good_threshold) ? 0.9 : (1.0 - Threshold); classify_adapt_proto_threshold.set_value( ClipToRange(255 * Threshold, 0, 255)); classify_adapt_feature_threshold.set_value( ClipToRange(255 * Threshold, 0, 255)); -} /* SetAdaptiveThreshold */ +} /* SetAdaptiveThreshold */ /*---------------------------------------------------------------------------*/ /** @@ -2230,9 +2154,8 @@ void Classify::ShowBestMatchFor(int shape_id, } UnicharRating cn_result; classify_norm_method.set_value(character); - im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), - AllProtosOn, AllConfigsOn, - num_features, features, &cn_result, + im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), AllProtosOn, + AllConfigsOn, num_features, features, &cn_result, classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows); tprintf("\n"); @@ -2246,12 +2169,13 @@ void Classify::ShowBestMatchFor(int shape_id, matcher_debug_separate_windows); UpdateMatchDisplay(); #endif // GRAPHICS_DISABLED -} /* ShowBestMatchFor */ +} /* ShowBestMatchFor */ // Returns a string for the classifier class_id: either the corresponding // unicharset debug_str or the shape_table_ debug str. -STRING Classify::ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, - int class_id, int config_id) const { +STRING +Classify::ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, int class_id, + int config_id) const { STRING class_string; if (templates == PreTrainedTemplates && shape_table_ != nullptr) { int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id); @@ -2267,9 +2191,8 @@ int Classify::ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const { int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id; // Older inttemps have no font_ids. - if (font_set_id < 0) - return kBlankFontinfoId; - const FontSet &fs = fontset_table_.get(font_set_id); + if (font_set_id < 0) return kBlankFontinfoId; + const FontSet& fs = fontset_table_.get(font_set_id); ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size); return fs.configs[int_result_config]; } @@ -2280,10 +2203,9 @@ int Classify::ShapeIDToClassID(int shape_id) const { for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) { int font_set_id = PreTrainedTemplates->Class[id]->font_set_id; ASSERT_HOST(font_set_id >= 0); - const FontSet &fs = fontset_table_.get(font_set_id); + const FontSet& fs = fontset_table_.get(font_set_id); for (int config = 0; config < fs.size; ++config) { - if (fs.configs[config] == shape_id) - return id; + if (fs.configs[config] == shape_id) return id; } } tprintf("Shape %d not found\n", shape_id); @@ -2293,7 +2215,7 @@ int Classify::ShapeIDToClassID(int shape_id) const { // Returns true if the given TEMP_CONFIG is good enough to make it // a permanent config. bool Classify::TempConfigReliable(CLASS_ID class_id, - const TEMP_CONFIG &config) { + const TEMP_CONFIG& config) { if (classify_learning_debug_level >= 1) { tprintf("NumTimesSeen for config of %s is %d\n", getDict().getUnicharset().debug_str(class_id).string(), @@ -2306,21 +2228,20 @@ bool Classify::TempConfigReliable(CLASS_ID class_id, } else if (use_ambigs_for_adaption) { // Go through the ambigs vector and see whether we have already seen // enough times all the characters represented by the ambigs vector. - const UnicharIdVector *ambigs = - getDict().getUnicharAmbigs().AmbigsForAdaption(class_id); + const UnicharIdVector* ambigs = + getDict().getUnicharAmbigs().AmbigsForAdaption(class_id); int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size(); for (int ambig = 0; ambig < ambigs_size; ++ambig) { ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]]; assert(ambig_class != nullptr); if (ambig_class->NumPermConfigs == 0 && - ambig_class->MaxNumTimesSeen < - matcher_min_examples_for_prototyping) { + ambig_class->MaxNumTimesSeen < matcher_min_examples_for_prototyping) { if (classify_learning_debug_level >= 1) { - tprintf("Ambig %s has not been seen enough times," - " not making config for %s permanent\n", - getDict().getUnicharset().debug_str( - (*ambigs)[ambig]).string(), - getDict().getUnicharset().debug_str(class_id).string()); + tprintf( + "Ambig %s has not been seen enough times," + " not making config for %s permanent\n", + getDict().getUnicharset().debug_str((*ambigs)[ambig]).string(), + getDict().getUnicharset().debug_str(class_id).string()); } return false; } @@ -2329,9 +2250,9 @@ bool Classify::TempConfigReliable(CLASS_ID class_id, return true; } -void Classify::UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob) { - const UnicharIdVector *ambigs = - getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id); +void Classify::UpdateAmbigsGroup(CLASS_ID class_id, TBLOB* Blob) { + const UnicharIdVector* ambigs = + getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id); int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size(); if (classify_learning_debug_level >= 1) { tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n", @@ -2343,12 +2264,11 @@ void Classify::UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob) { for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) { if (ConfigIsPermanent(ambigs_class, cfg)) continue; const TEMP_CONFIG config = - TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg); + TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg); if (config != nullptr && TempConfigReliable(ambig_class_id, config)) { if (classify_learning_debug_level >= 1) { tprintf("Making config %d of %s permanent\n", cfg, - getDict().getUnicharset().debug_str( - ambig_class_id).string()); + getDict().getUnicharset().debug_str(ambig_class_id).string()); } MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob); } diff --git a/src/classify/blobclass.cpp b/src/classify/blobclass.cpp index ae6e590eb9..f676f948a4 100644 --- a/src/classify/blobclass.cpp +++ b/src/classify/blobclass.cpp @@ -48,9 +48,9 @@ void ExtractFontName(const STRING& filename, STRING* fontname) { if (*fontname == kUnknownFontName) { // filename is expected to be of the form [lang].[fontname].exp[num] // The [lang], [fontname] and [num] fields should not have '.' characters. - const char *basename = strrchr(filename.string(), '/'); - const char *firstdot = strchr(basename ? basename : filename.string(), '.'); - const char *lastdot = strrchr(filename.string(), '.'); + const char* basename = strrchr(filename.string(), '/'); + const char* firstdot = strchr(basename ? basename : filename.string(), '.'); + const char* lastdot = strrchr(filename.string(), '.'); if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) { ++firstdot; *fontname = firstdot; @@ -90,7 +90,7 @@ void Classify::LearnBlob(const STRING& fontname, TBLOB* blob, tprintf("Blob learned was invalid!\n"); } FreeCharDescription(CharDesc); -} // LearnBlob +} // LearnBlob // Writes stored training data to a .tr file based on the given filename. // Returns false on error. diff --git a/src/classify/blobclass.h b/src/classify/blobclass.h index be09465bd4..96d7bac8fe 100644 --- a/src/classify/blobclass.h +++ b/src/classify/blobclass.h @@ -15,8 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef BLOBCLASS_H -#define BLOBCLASS_H +#ifndef BLOBCLASS_H +#define BLOBCLASS_H /**---------------------------------------------------------------------------- Include Files and Type Defines @@ -27,12 +27,12 @@ Macros ----------------------------------------------------------------------------*/ /* macros for controlling the display of recognized characters */ -#define EnableCharDisplay() (DisplayCharacters = TRUE) -#define DisableCharDisplay() (DisplayCharacters = FALSE) +#define EnableCharDisplay() (DisplayCharacters = TRUE) +#define DisableCharDisplay() (DisplayCharacters = FALSE) /* macros for controlling the display of the entire match list */ -#define EnableMatchDisplay() (DisplayMatchList = TRUE) -#define DisableMatchDisplay() (DisplayMatchList = FALSE) +#define EnableMatchDisplay() (DisplayMatchList = TRUE) +#define DisableMatchDisplay() (DisplayMatchList = FALSE) /**---------------------------------------------------------------------------- Public Function Prototypes diff --git a/src/classify/classify.cpp b/src/classify/classify.cpp index b78e4bd963..3e28dbd508 100644 --- a/src/classify/classify.cpp +++ b/src/classify/classify.cpp @@ -21,6 +21,7 @@ #include "config_auto.h" #endif +#include #include "classify.h" #include "fontinfo.h" #include "intproto.h" @@ -29,7 +30,6 @@ #include "shapeclassifier.h" #include "shapetable.h" #include "unicity_table.h" -#include namespace tesseract { Classify::Classify() @@ -78,8 +78,8 @@ Classify::Classify() "Non-linear stroke-density normalization", this->params()), INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()), INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()), - INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", - this->params()), + INT_MEMBER(classify_learning_debug_level, 0, + "Learning Debug Level: ", this->params()), double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", this->params()), double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)", @@ -166,8 +166,7 @@ Classify::Classify() NewPermanentTessCallback(CompareFontInfo)); fontinfo_table_.set_clear_callback( NewPermanentTessCallback(FontInfoDeleteCallback)); - fontset_table_.set_compare_callback( - NewPermanentTessCallback(CompareFontSet)); + fontset_table_.set_compare_callback(NewPermanentTessCallback(CompareFontSet)); fontset_table_.set_clear_callback( NewPermanentTessCallback(FontSetDeleteCallback)); AdaptedTemplates = nullptr; @@ -193,7 +192,6 @@ Classify::~Classify() { delete learn_fragments_debug_win_; } - // Takes ownership of the given classifier, and uses it for future calls // to CharNormClassifier. void Classify::SetStaticClassifier(ShapeClassifier* static_classifier) { @@ -204,8 +202,8 @@ void Classify::SetStaticClassifier(ShapeClassifier* static_classifier) { // Moved from speckle.cpp // Adds a noise classification result that is a bit worse than the worst // current result, or the worst possible result if no current results. -void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) { - BLOB_CHOICE_IT bc_it(choices); +void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST* choices) { + BLOB_CHOICE_IT bc_it(choices); // If there is no classifier result, we will use the worst possible certainty // and corresponding rating. float certainty = -getDict().certainty_scale; @@ -217,21 +215,20 @@ void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) { rating = worst_choice->rating() + speckle_rating_penalty; // Compute the rating to correspond to the certainty. (Used to be kept // the same, but that messes up the language model search.) - certainty = -rating * getDict().certainty_scale / - (rating_scale * blob_length); + certainty = + -rating * getDict().certainty_scale / (rating_scale * blob_length); } - BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty, - -1, 0.0f, MAX_FLOAT32, 0, - BCC_SPECKLE_CLASSIFIER); + BLOB_CHOICE* blob_choice = + new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty, -1, 0.0f, MAX_FLOAT32, + 0, BCC_SPECKLE_CLASSIFIER); bc_it.add_to_end(blob_choice); } // Returns true if the blob is small enough to be a large speckle. -bool Classify::LargeSpeckle(const TBLOB &blob) { +bool Classify::LargeSpeckle(const TBLOB& blob) { double speckle_size = kBlnXHeight * speckle_large_max_size; TBOX bbox = blob.bounding_box(); return bbox.width() < speckle_size && bbox.height() < speckle_size; } - } // namespace tesseract diff --git a/src/classify/classify.h b/src/classify/classify.h index 9c8cf64e2b..4af623147b 100644 --- a/src/classify/classify.h +++ b/src/classify/classify.h @@ -29,8 +29,8 @@ #include "intfx.h" #include "intmatcher.h" #include "normalis.h" -#include "ratngs.h" #include "ocrfeatures.h" +#include "ratngs.h" #include "unicity_table.h" class ScrollView; @@ -62,13 +62,9 @@ class Classify : public CCStruct { public: Classify(); virtual ~Classify(); - virtual Dict& getDict() { - return dict_; - } + virtual Dict& getDict() { return dict_; } - const ShapeTable* shape_table() const { - return shape_table_; - } + const ShapeTable* shape_table() const { return shape_table_; } // Takes ownership of the given classifier, and uses it for future calls // to CharNormClassifier. @@ -76,10 +72,10 @@ class Classify : public CCStruct { // Adds a noise classification result that is a bit worse than the worst // current result, or the worst possible result if no current results. - void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices); + void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST* choices); // Returns true if the blob is small enough to be a large speckle. - bool LargeSpeckle(const TBLOB &blob); + bool LargeSpeckle(const TBLOB& blob); /* adaptive.cpp ************************************************************/ ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset); @@ -104,12 +100,12 @@ class Classify : public CCStruct { const uint16_t* expected_num_features, GenericVector* results); void ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs); - void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); - void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); + void PrintAdaptedTemplates(FILE* File, ADAPT_TEMPLATES Templates); + void WriteAdaptedTemplates(FILE* File, ADAPT_TEMPLATES Templates); ADAPT_TEMPLATES ReadAdaptedTemplates(TFile* File); /* normmatch.cpp ************************************************************/ - FLOAT32 ComputeNormMatch(CLASS_ID ClassId, - const FEATURE_STRUCT& feature, bool DebugMatch); + FLOAT32 ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT& feature, + bool DebugMatch); void FreeNormProtos(); NORM_PROTOS* ReadNormProtos(TFile* fp); /* protos.cpp ***************************************************************/ @@ -138,26 +134,16 @@ class Classify : public CCStruct { CharSegmentationType segmentation, const char* correct_text, WERD_RES* word); void InitAdaptiveClassifier(TessdataManager* mgr); - void InitAdaptedClass(TBLOB *Blob, - CLASS_ID ClassId, - int FontinfoId, - ADAPT_CLASS Class, - ADAPT_TEMPLATES Templates); + void InitAdaptedClass(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, + ADAPT_CLASS Class, ADAPT_TEMPLATES Templates); void AmbigClassifier(const GenericVector& int_features, - const INT_FX_RESULT_STRUCT& fx_info, - const TBLOB *blob, - INT_TEMPLATES templates, - ADAPT_CLASS *classes, - UNICHAR_ID *ambiguities, - ADAPT_RESULTS *results); - void MasterMatcher(INT_TEMPLATES templates, - int16_t num_features, + const INT_FX_RESULT_STRUCT& fx_info, const TBLOB* blob, + INT_TEMPLATES templates, ADAPT_CLASS* classes, + UNICHAR_ID* ambiguities, ADAPT_RESULTS* results); + void MasterMatcher(INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT* features, - const uint8_t* norm_factors, - ADAPT_CLASS* classes, - int debug, - int matcher_multiplier, - const TBOX& blob_box, + const uint8_t* norm_factors, ADAPT_CLASS* classes, + int debug, int matcher_multiplier, const TBOX& blob_box, const GenericVector& results, ADAPT_RESULTS* final_results); // Converts configs to fonts, and if the result is not adapted, and a @@ -165,12 +151,9 @@ class Classify : public CCStruct { // unichar_ids represented, before applying a set of corrections to the // distance rating in int_result, (see ComputeCorrectedRating.) // The results are added to the final_results output. - void ExpandShapesAndApplyCorrections(ADAPT_CLASS* classes, - bool debug, - int class_id, - int bottom, int top, - float cp_rating, - int blob_length, + void ExpandShapesAndApplyCorrections(ADAPT_CLASS* classes, bool debug, + int class_id, int bottom, int top, + float cp_rating, int blob_length, int matcher_multiplier, const uint8_t* cn_factors, UnicharRating* int_result, @@ -180,48 +163,38 @@ class Classify : public CCStruct { // for non-alnums being vertical misfits. Returns the corrected distance. double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, - int bottom, int top, - int blob_length, int matcher_multiplier, + int bottom, int top, int blob_length, + int matcher_multiplier, const uint8_t* cn_factors); void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, - ADAPT_RESULTS *Results, - BLOB_CHOICE_LIST *Choices); - void AddNewResult(const UnicharRating& new_result, ADAPT_RESULTS *results); - int GetAdaptiveFeatures(TBLOB *Blob, - INT_FEATURE_ARRAY IntFeatures, - FEATURE_SET *FloatFeatures); + ADAPT_RESULTS* Results, + BLOB_CHOICE_LIST* Choices); + void AddNewResult(const UnicharRating& new_result, ADAPT_RESULTS* results); + int GetAdaptiveFeatures(TBLOB* Blob, INT_FEATURE_ARRAY IntFeatures, + FEATURE_SET* FloatFeatures); #ifndef GRAPHICS_DISABLED - void DebugAdaptiveClassifier(TBLOB *Blob, - ADAPT_RESULTS *Results); + void DebugAdaptiveClassifier(TBLOB* Blob, ADAPT_RESULTS* Results); #endif - PROTO_ID MakeNewTempProtos(FEATURE_SET Features, - int NumBadFeat, - FEATURE_ID BadFeat[], - INT_CLASS IClass, - ADAPT_CLASS Class, - BIT_VECTOR TempProtoMask); - int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int FontinfoId, - int NumFeatures, + PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, + FEATURE_ID BadFeat[], INT_CLASS IClass, + ADAPT_CLASS Class, BIT_VECTOR TempProtoMask); + int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, + int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures); - void MakePermanent(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int ConfigId, - TBLOB *Blob); + void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, + TBLOB* Blob); void PrintAdaptiveMatchResults(const ADAPT_RESULTS& results); - void RemoveExtraPuncs(ADAPT_RESULTS *Results); - void RemoveBadMatches(ADAPT_RESULTS *Results); + void RemoveExtraPuncs(ADAPT_RESULTS* Results); + void RemoveBadMatches(ADAPT_RESULTS* Results); void SetAdaptiveThreshold(FLOAT32 Threshold); - void ShowBestMatchFor(int shape_id, - const INT_FEATURE_STRUCT* features, + void ShowBestMatchFor(int shape_id, const INT_FEATURE_STRUCT* features, int num_features); // Returns a string for the classifier class_id: either the corresponding // unicharset debug_str or the shape_table_ debug str. - STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, - int class_id, int config_id) const; + STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, int class_id, + int config_id) const; // Converts a classifier class_id index with a config ID to: // shape_table_ present: a shape_table_ index OR // No shape_table_: a font ID. @@ -237,21 +210,20 @@ class Classify : public CCStruct { // Converts a shape_table_ index to a classifier class_id index (not a // unichar-id!). Uses a search, so not fast. int ShapeIDToClassID(int shape_id) const; - UNICHAR_ID *BaselineClassifier( - TBLOB *Blob, const GenericVector& int_features, - const INT_FX_RESULT_STRUCT& fx_info, - ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results); - int CharNormClassifier(TBLOB *blob, - const TrainingSample& sample, - ADAPT_RESULTS *adapt_results); + UNICHAR_ID* BaselineClassifier( + TBLOB* Blob, const GenericVector& int_features, + const INT_FX_RESULT_STRUCT& fx_info, ADAPT_TEMPLATES Templates, + ADAPT_RESULTS* Results); + int CharNormClassifier(TBLOB* blob, const TrainingSample& sample, + ADAPT_RESULTS* adapt_results); // As CharNormClassifier, but operates on a TrainingSample and outputs to // a GenericVector of ShapeRating without conversion to classes. int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample& sample, GenericVector* results); - UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass); - void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results); + UNICHAR_ID* GetAmbiguities(TBLOB* Blob, CLASS_ID CorrectClass); + void DoAdaptiveMatch(TBLOB* Blob, ADAPT_RESULTS* Results); void AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates); void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class); @@ -259,34 +231,32 @@ class Classify : public CCStruct { void EndAdaptiveClassifier(); void SettupPass1(); void SettupPass2(); - void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices); - void ClassifyAsNoise(ADAPT_RESULTS *Results); + void AdaptiveClassifier(TBLOB* Blob, BLOB_CHOICE_LIST* Choices); + void ClassifyAsNoise(ADAPT_RESULTS* Results); void ResetAdaptiveClassifierInternal(); void SwitchAdaptiveClassifier(); void StartBackupAdaptiveClassifier(); int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, - INT_TEMPLATES templates, - uint8_t* pruner_norm_array, + INT_TEMPLATES templates, uint8_t* pruner_norm_array, uint8_t* char_norm_array); // Computes the char_norm_array for the unicharset and, if not nullptr, the // pruner_array as appropriate according to the existence of the shape_table. // The norm_feature is deleted as it is almost certainly no longer needed. void ComputeCharNormArrays(FEATURE_STRUCT* norm_feature, INT_TEMPLATES_STRUCT* templates, - uint8_t* char_norm_array, - uint8_t* pruner_array); + uint8_t* char_norm_array, uint8_t* pruner_array); - bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config); - void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob); + bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG& config); + void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB* Blob); bool AdaptiveClassifierIsFull() const { return NumAdaptationsFailed > 0; } bool AdaptiveClassifierIsEmpty() const { return AdaptedTemplates->NumPermClasses == 0; } - bool LooksLikeGarbage(TBLOB *blob); - void RefreshDebugWindow(ScrollView **win, const char *msg, - int y_offset, const TBOX &wbox); + bool LooksLikeGarbage(TBLOB* blob); + void RefreshDebugWindow(ScrollView** win, const char* msg, int y_offset, + const TBOX& wbox); // intfx.cpp // Computes the DENORMS for bl(baseline) and cn(character) normalization // during feature extraction. The input denorm describes the current state @@ -322,8 +292,7 @@ class Classify : public CCStruct { // number of cn features generated for each outline in the blob (in order). // Thus after the first outline, there were (*outline_cn_counts)[0] features, // after the second outline, there were (*outline_cn_counts)[1] features etc. - static void ExtractFeatures(const TBLOB& blob, - bool nonlinear_norm, + static void ExtractFeatures(const TBLOB& blob, bool nonlinear_norm, GenericVector* bl_features, GenericVector* cn_features, INT_FX_RESULT_STRUCT* results, @@ -335,27 +304,23 @@ class Classify : public CCStruct { void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures); /* intproto.cpp *************************************************************/ INT_TEMPLATES ReadIntTemplates(TFile* fp); - void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, + void WriteIntTemplates(FILE* File, INT_TEMPLATES Templates, const UNICHARSET& target_unicharset); - CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on, + CLASS_ID GetClassToDebug(const char* Prompt, bool* adaptive_on, bool* pretrained_on, int* shape_id); void ShowMatchDisplay(); /* font detection ***********************************************************/ - UnicityTable& get_fontinfo_table() { - return fontinfo_table_; - } + UnicityTable& get_fontinfo_table() { return fontinfo_table_; } const UnicityTable& get_fontinfo_table() const { return fontinfo_table_; } - UnicityTable& get_fontset_table() { - return fontset_table_; - } + UnicityTable& get_fontset_table() { return fontset_table_; } /* mfoutline.cpp ***********************************************************/ - void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale); + void NormalizeOutlines(LIST Outlines, FLOAT32* XScale, FLOAT32* YScale); /* outfeat.cpp ***********************************************************/ - FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob); + FEATURE_SET ExtractOutlineFeatures(TBLOB* Blob); /* picofeat.cpp ***********************************************************/ - FEATURE_SET ExtractPicoFeatures(TBLOB *Blob); + FEATURE_SET ExtractPicoFeatures(TBLOB* Blob); FEATURE_SET ExtractIntCNFeatures(const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info); FEATURE_SET ExtractIntGeoFeatures(const TBLOB& blob, @@ -392,7 +357,7 @@ class Classify : public CCStruct { /* control knobs used to control normalization of outlines */ INT_VAR_H(classify_norm_method, character, "Normalization Method ..."); double_VAR_H(classify_char_norm_range, 0.2, - "Character Normalization Range ..."); + "Character Normalization Range ..."); double_VAR_H(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ..."); double_VAR_H(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ..."); double_VAR_H(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ..."); @@ -482,7 +447,7 @@ class Classify : public CCStruct { BIT_VECTOR TempProtoMask; bool EnableLearning; /* normmatch.cpp */ - NORM_PROTOS *NormProtos; + NORM_PROTOS* NormProtos; /* font detection ***********************************************************/ UnicityTable fontinfo_table_; // Without shape training, each class_id, config pair represents a single diff --git a/src/classify/cluster.cpp b/src/classify/cluster.cpp index 4b2567b712..7e4a695b64 100644 --- a/src/classify/cluster.cpp +++ b/src/classify/cluster.cpp @@ -15,130 +15,1219 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#include "const.h" #include "cluster.h" +#include +#include "const.h" +#include "danerror.h" #include "emalloc.h" #include "genericheap.h" #include "helpers.h" #include "kdpair.h" #include "matrix.h" #include "tprintf.h" -#include "danerror.h" -#include -#define HOTELLING 1 // If true use Hotelling's test to decide where to split. -#define FTABLE_X 10 // Size of FTable. +#define HOTELLING 1 // If true use Hotelling's test to decide where to split. +#define FTABLE_X 10 // Size of FTable. #define FTABLE_Y 100 // Size of FTable. -// Table of values approximating the cumulative F-distribution for a confidence of 1%. +// Table of values approximating the cumulative F-distribution for a confidence +// of 1%. const double FTable[FTABLE_Y][FTABLE_X] = { - {4052.19, 4999.52, 5403.34, 5624.62, 5763.65, 5858.97, 5928.33, 5981.10, 6022.50, 6055.85,}, - {98.502, 99.000, 99.166, 99.249, 99.300, 99.333, 99.356, 99.374, 99.388, 99.399,}, - {34.116, 30.816, 29.457, 28.710, 28.237, 27.911, 27.672, 27.489, 27.345, 27.229,}, - {21.198, 18.000, 16.694, 15.977, 15.522, 15.207, 14.976, 14.799, 14.659, 14.546,}, - {16.258, 13.274, 12.060, 11.392, 10.967, 10.672, 10.456, 10.289, 10.158, 10.051,}, - {13.745, 10.925, 9.780, 9.148, 8.746, 8.466, 8.260, 8.102, 7.976, 7.874,}, - {12.246, 9.547, 8.451, 7.847, 7.460, 7.191, 6.993, 6.840, 6.719, 6.620,}, - {11.259, 8.649, 7.591, 7.006, 6.632, 6.371, 6.178, 6.029, 5.911, 5.814,}, - {10.561, 8.022, 6.992, 6.422, 6.057, 5.802, 5.613, 5.467, 5.351, 5.257,}, - {10.044, 7.559, 6.552, 5.994, 5.636, 5.386, 5.200, 5.057, 4.942, 4.849,}, - { 9.646, 7.206, 6.217, 5.668, 5.316, 5.069, 4.886, 4.744, 4.632, 4.539,}, - { 9.330, 6.927, 5.953, 5.412, 5.064, 4.821, 4.640, 4.499, 4.388, 4.296,}, - { 9.074, 6.701, 5.739, 5.205, 4.862, 4.620, 4.441, 4.302, 4.191, 4.100,}, - { 8.862, 6.515, 5.564, 5.035, 4.695, 4.456, 4.278, 4.140, 4.030, 3.939,}, - { 8.683, 6.359, 5.417, 4.893, 4.556, 4.318, 4.142, 4.004, 3.895, 3.805,}, - { 8.531, 6.226, 5.292, 4.773, 4.437, 4.202, 4.026, 3.890, 3.780, 3.691,}, - { 8.400, 6.112, 5.185, 4.669, 4.336, 4.102, 3.927, 3.791, 3.682, 3.593,}, - { 8.285, 6.013, 5.092, 4.579, 4.248, 4.015, 3.841, 3.705, 3.597, 3.508,}, - { 8.185, 5.926, 5.010, 4.500, 4.171, 3.939, 3.765, 3.631, 3.523, 3.434,}, - { 8.096, 5.849, 4.938, 4.431, 4.103, 3.871, 3.699, 3.564, 3.457, 3.368,}, - { 8.017, 5.780, 4.874, 4.369, 4.042, 3.812, 3.640, 3.506, 3.398, 3.310,}, - { 7.945, 5.719, 4.817, 4.313, 3.988, 3.758, 3.587, 3.453, 3.346, 3.258,}, - { 7.881, 5.664, 4.765, 4.264, 3.939, 3.710, 3.539, 3.406, 3.299, 3.211,}, - { 7.823, 5.614, 4.718, 4.218, 3.895, 3.667, 3.496, 3.363, 3.256, 3.168,}, - { 7.770, 5.568, 4.675, 4.177, 3.855, 3.627, 3.457, 3.324, 3.217, 3.129,}, - { 7.721, 5.526, 4.637, 4.140, 3.818, 3.591, 3.421, 3.288, 3.182, 3.094,}, - { 7.677, 5.488, 4.601, 4.106, 3.785, 3.558, 3.388, 3.256, 3.149, 3.062,}, - { 7.636, 5.453, 4.568, 4.074, 3.754, 3.528, 3.358, 3.226, 3.120, 3.032,}, - { 7.598, 5.420, 4.538, 4.045, 3.725, 3.499, 3.330, 3.198, 3.092, 3.005,}, - { 7.562, 5.390, 4.510, 4.018, 3.699, 3.473, 3.305, 3.173, 3.067, 2.979,}, - { 7.530, 5.362, 4.484, 3.993, 3.675, 3.449, 3.281, 3.149, 3.043, 2.955,}, - { 7.499, 5.336, 4.459, 3.969, 3.652, 3.427, 3.258, 3.127, 3.021, 2.934,}, - { 7.471, 5.312, 4.437, 3.948, 3.630, 3.406, 3.238, 3.106, 3.000, 2.913,}, - { 7.444, 5.289, 4.416, 3.927, 3.611, 3.386, 3.218, 3.087, 2.981, 2.894,}, - { 7.419, 5.268, 4.396, 3.908, 3.592, 3.368, 3.200, 3.069, 2.963, 2.876,}, - { 7.396, 5.248, 4.377, 3.890, 3.574, 3.351, 3.183, 3.052, 2.946, 2.859,}, - { 7.373, 5.229, 4.360, 3.873, 3.558, 3.334, 3.167, 3.036, 2.930, 2.843,}, - { 7.353, 5.211, 4.343, 3.858, 3.542, 3.319, 3.152, 3.021, 2.915, 2.828,}, - { 7.333, 5.194, 4.327, 3.843, 3.528, 3.305, 3.137, 3.006, 2.901, 2.814,}, - { 7.314, 5.179, 4.313, 3.828, 3.514, 3.291, 3.124, 2.993, 2.888, 2.801,}, - { 7.296, 5.163, 4.299, 3.815, 3.501, 3.278, 3.111, 2.980, 2.875, 2.788,}, - { 7.280, 5.149, 4.285, 3.802, 3.488, 3.266, 3.099, 2.968, 2.863, 2.776,}, - { 7.264, 5.136, 4.273, 3.790, 3.476, 3.254, 3.087, 2.957, 2.851, 2.764,}, - { 7.248, 5.123, 4.261, 3.778, 3.465, 3.243, 3.076, 2.946, 2.840, 2.754,}, - { 7.234, 5.110, 4.249, 3.767, 3.454, 3.232, 3.066, 2.935, 2.830, 2.743,}, - { 7.220, 5.099, 4.238, 3.757, 3.444, 3.222, 3.056, 2.925, 2.820, 2.733,}, - { 7.207, 5.087, 4.228, 3.747, 3.434, 3.213, 3.046, 2.916, 2.811, 2.724,}, - { 7.194, 5.077, 4.218, 3.737, 3.425, 3.204, 3.037, 2.907, 2.802, 2.715,}, - { 7.182, 5.066, 4.208, 3.728, 3.416, 3.195, 3.028, 2.898, 2.793, 2.706,}, - { 7.171, 5.057, 4.199, 3.720, 3.408, 3.186, 3.020, 2.890, 2.785, 2.698,}, - { 7.159, 5.047, 4.191, 3.711, 3.400, 3.178, 3.012, 2.882, 2.777, 2.690,}, - { 7.149, 5.038, 4.182, 3.703, 3.392, 3.171, 3.005, 2.874, 2.769, 2.683,}, - { 7.139, 5.030, 4.174, 3.695, 3.384, 3.163, 2.997, 2.867, 2.762, 2.675,}, - { 7.129, 5.021, 4.167, 3.688, 3.377, 3.156, 2.990, 2.860, 2.755, 2.668,}, - { 7.119, 5.013, 4.159, 3.681, 3.370, 3.149, 2.983, 2.853, 2.748, 2.662,}, - { 7.110, 5.006, 4.152, 3.674, 3.363, 3.143, 2.977, 2.847, 2.742, 2.655,}, - { 7.102, 4.998, 4.145, 3.667, 3.357, 3.136, 2.971, 2.841, 2.736, 2.649,}, - { 7.093, 4.991, 4.138, 3.661, 3.351, 3.130, 2.965, 2.835, 2.730, 2.643,}, - { 7.085, 4.984, 4.132, 3.655, 3.345, 3.124, 2.959, 2.829, 2.724, 2.637,}, - { 7.077, 4.977, 4.126, 3.649, 3.339, 3.119, 2.953, 2.823, 2.718, 2.632,}, - { 7.070, 4.971, 4.120, 3.643, 3.333, 3.113, 2.948, 2.818, 2.713, 2.626,}, - { 7.062, 4.965, 4.114, 3.638, 3.328, 3.108, 2.942, 2.813, 2.708, 2.621,}, - { 7.055, 4.959, 4.109, 3.632, 3.323, 3.103, 2.937, 2.808, 2.703, 2.616,}, - { 7.048, 4.953, 4.103, 3.627, 3.318, 3.098, 2.932, 2.803, 2.698, 2.611,}, - { 7.042, 4.947, 4.098, 3.622, 3.313, 3.093, 2.928, 2.798, 2.693, 2.607,}, - { 7.035, 4.942, 4.093, 3.618, 3.308, 3.088, 2.923, 2.793, 2.689, 2.602,}, - { 7.029, 4.937, 4.088, 3.613, 3.304, 3.084, 2.919, 2.789, 2.684, 2.598,}, - { 7.023, 4.932, 4.083, 3.608, 3.299, 3.080, 2.914, 2.785, 2.680, 2.593,}, - { 7.017, 4.927, 4.079, 3.604, 3.295, 3.075, 2.910, 2.781, 2.676, 2.589,}, - { 7.011, 4.922, 4.074, 3.600, 3.291, 3.071, 2.906, 2.777, 2.672, 2.585,}, - { 7.006, 4.917, 4.070, 3.596, 3.287, 3.067, 2.902, 2.773, 2.668, 2.581,}, - { 7.001, 4.913, 4.066, 3.591, 3.283, 3.063, 2.898, 2.769, 2.664, 2.578,}, - { 6.995, 4.908, 4.062, 3.588, 3.279, 3.060, 2.895, 2.765, 2.660, 2.574,}, - { 6.990, 4.904, 4.058, 3.584, 3.275, 3.056, 2.891, 2.762, 2.657, 2.570,}, - { 6.985, 4.900, 4.054, 3.580, 3.272, 3.052, 2.887, 2.758, 2.653, 2.567,}, - { 6.981, 4.896, 4.050, 3.577, 3.268, 3.049, 2.884, 2.755, 2.650, 2.563,}, - { 6.976, 4.892, 4.047, 3.573, 3.265, 3.046, 2.881, 2.751, 2.647, 2.560,}, - { 6.971, 4.888, 4.043, 3.570, 3.261, 3.042, 2.877, 2.748, 2.644, 2.557,}, - { 6.967, 4.884, 4.040, 3.566, 3.258, 3.039, 2.874, 2.745, 2.640, 2.554,}, - { 6.963, 4.881, 4.036, 3.563, 3.255, 3.036, 2.871, 2.742, 2.637, 2.551,}, - { 6.958, 4.877, 4.033, 3.560, 3.252, 3.033, 2.868, 2.739, 2.634, 2.548,}, - { 6.954, 4.874, 4.030, 3.557, 3.249, 3.030, 2.865, 2.736, 2.632, 2.545,}, - { 6.950, 4.870, 4.027, 3.554, 3.246, 3.027, 2.863, 2.733, 2.629, 2.542,}, - { 6.947, 4.867, 4.024, 3.551, 3.243, 3.025, 2.860, 2.731, 2.626, 2.539,}, - { 6.943, 4.864, 4.021, 3.548, 3.240, 3.022, 2.857, 2.728, 2.623, 2.537,}, - { 6.939, 4.861, 4.018, 3.545, 3.238, 3.019, 2.854, 2.725, 2.621, 2.534,}, - { 6.935, 4.858, 4.015, 3.543, 3.235, 3.017, 2.852, 2.723, 2.618, 2.532,}, - { 6.932, 4.855, 4.012, 3.540, 3.233, 3.014, 2.849, 2.720, 2.616, 2.529,}, - { 6.928, 4.852, 4.010, 3.538, 3.230, 3.012, 2.847, 2.718, 2.613, 2.527,}, - { 6.925, 4.849, 4.007, 3.535, 3.228, 3.009, 2.845, 2.715, 2.611, 2.524,}, - { 6.922, 4.846, 4.004, 3.533, 3.225, 3.007, 2.842, 2.713, 2.609, 2.522,}, - { 6.919, 4.844, 4.002, 3.530, 3.223, 3.004, 2.840, 2.711, 2.606, 2.520,}, - { 6.915, 4.841, 3.999, 3.528, 3.221, 3.002, 2.838, 2.709, 2.604, 2.518,}, - { 6.912, 4.838, 3.997, 3.525, 3.218, 3.000, 2.835, 2.706, 2.602, 2.515,}, - { 6.909, 4.836, 3.995, 3.523, 3.216, 2.998, 2.833, 2.704, 2.600, 2.513,}, - { 6.906, 4.833, 3.992, 3.521, 3.214, 2.996, 2.831, 2.702, 2.598, 2.511,}, - { 6.904, 4.831, 3.990, 3.519, 3.212, 2.994, 2.829, 2.700, 2.596, 2.509,}, - { 6.901, 4.829, 3.988, 3.517, 3.210, 2.992, 2.827, 2.698, 2.594, 2.507,}, - { 6.898, 4.826, 3.986, 3.515, 3.208, 2.990, 2.825, 2.696, 2.592, 2.505,}, - { 6.895, 4.824, 3.984, 3.513, 3.206, 2.988, 2.823, 2.694, 2.590, 2.503} -}; + { + 4052.19, + 4999.52, + 5403.34, + 5624.62, + 5763.65, + 5858.97, + 5928.33, + 5981.10, + 6022.50, + 6055.85, + }, + { + 98.502, + 99.000, + 99.166, + 99.249, + 99.300, + 99.333, + 99.356, + 99.374, + 99.388, + 99.399, + }, + { + 34.116, + 30.816, + 29.457, + 28.710, + 28.237, + 27.911, + 27.672, + 27.489, + 27.345, + 27.229, + }, + { + 21.198, + 18.000, + 16.694, + 15.977, + 15.522, + 15.207, + 14.976, + 14.799, + 14.659, + 14.546, + }, + { + 16.258, + 13.274, + 12.060, + 11.392, + 10.967, + 10.672, + 10.456, + 10.289, + 10.158, + 10.051, + }, + { + 13.745, + 10.925, + 9.780, + 9.148, + 8.746, + 8.466, + 8.260, + 8.102, + 7.976, + 7.874, + }, + { + 12.246, + 9.547, + 8.451, + 7.847, + 7.460, + 7.191, + 6.993, + 6.840, + 6.719, + 6.620, + }, + { + 11.259, + 8.649, + 7.591, + 7.006, + 6.632, + 6.371, + 6.178, + 6.029, + 5.911, + 5.814, + }, + { + 10.561, + 8.022, + 6.992, + 6.422, + 6.057, + 5.802, + 5.613, + 5.467, + 5.351, + 5.257, + }, + { + 10.044, + 7.559, + 6.552, + 5.994, + 5.636, + 5.386, + 5.200, + 5.057, + 4.942, + 4.849, + }, + { + 9.646, + 7.206, + 6.217, + 5.668, + 5.316, + 5.069, + 4.886, + 4.744, + 4.632, + 4.539, + }, + { + 9.330, + 6.927, + 5.953, + 5.412, + 5.064, + 4.821, + 4.640, + 4.499, + 4.388, + 4.296, + }, + { + 9.074, + 6.701, + 5.739, + 5.205, + 4.862, + 4.620, + 4.441, + 4.302, + 4.191, + 4.100, + }, + { + 8.862, + 6.515, + 5.564, + 5.035, + 4.695, + 4.456, + 4.278, + 4.140, + 4.030, + 3.939, + }, + { + 8.683, + 6.359, + 5.417, + 4.893, + 4.556, + 4.318, + 4.142, + 4.004, + 3.895, + 3.805, + }, + { + 8.531, + 6.226, + 5.292, + 4.773, + 4.437, + 4.202, + 4.026, + 3.890, + 3.780, + 3.691, + }, + { + 8.400, + 6.112, + 5.185, + 4.669, + 4.336, + 4.102, + 3.927, + 3.791, + 3.682, + 3.593, + }, + { + 8.285, + 6.013, + 5.092, + 4.579, + 4.248, + 4.015, + 3.841, + 3.705, + 3.597, + 3.508, + }, + { + 8.185, + 5.926, + 5.010, + 4.500, + 4.171, + 3.939, + 3.765, + 3.631, + 3.523, + 3.434, + }, + { + 8.096, + 5.849, + 4.938, + 4.431, + 4.103, + 3.871, + 3.699, + 3.564, + 3.457, + 3.368, + }, + { + 8.017, + 5.780, + 4.874, + 4.369, + 4.042, + 3.812, + 3.640, + 3.506, + 3.398, + 3.310, + }, + { + 7.945, + 5.719, + 4.817, + 4.313, + 3.988, + 3.758, + 3.587, + 3.453, + 3.346, + 3.258, + }, + { + 7.881, + 5.664, + 4.765, + 4.264, + 3.939, + 3.710, + 3.539, + 3.406, + 3.299, + 3.211, + }, + { + 7.823, + 5.614, + 4.718, + 4.218, + 3.895, + 3.667, + 3.496, + 3.363, + 3.256, + 3.168, + }, + { + 7.770, + 5.568, + 4.675, + 4.177, + 3.855, + 3.627, + 3.457, + 3.324, + 3.217, + 3.129, + }, + { + 7.721, + 5.526, + 4.637, + 4.140, + 3.818, + 3.591, + 3.421, + 3.288, + 3.182, + 3.094, + }, + { + 7.677, + 5.488, + 4.601, + 4.106, + 3.785, + 3.558, + 3.388, + 3.256, + 3.149, + 3.062, + }, + { + 7.636, + 5.453, + 4.568, + 4.074, + 3.754, + 3.528, + 3.358, + 3.226, + 3.120, + 3.032, + }, + { + 7.598, + 5.420, + 4.538, + 4.045, + 3.725, + 3.499, + 3.330, + 3.198, + 3.092, + 3.005, + }, + { + 7.562, + 5.390, + 4.510, + 4.018, + 3.699, + 3.473, + 3.305, + 3.173, + 3.067, + 2.979, + }, + { + 7.530, + 5.362, + 4.484, + 3.993, + 3.675, + 3.449, + 3.281, + 3.149, + 3.043, + 2.955, + }, + { + 7.499, + 5.336, + 4.459, + 3.969, + 3.652, + 3.427, + 3.258, + 3.127, + 3.021, + 2.934, + }, + { + 7.471, + 5.312, + 4.437, + 3.948, + 3.630, + 3.406, + 3.238, + 3.106, + 3.000, + 2.913, + }, + { + 7.444, + 5.289, + 4.416, + 3.927, + 3.611, + 3.386, + 3.218, + 3.087, + 2.981, + 2.894, + }, + { + 7.419, + 5.268, + 4.396, + 3.908, + 3.592, + 3.368, + 3.200, + 3.069, + 2.963, + 2.876, + }, + { + 7.396, + 5.248, + 4.377, + 3.890, + 3.574, + 3.351, + 3.183, + 3.052, + 2.946, + 2.859, + }, + { + 7.373, + 5.229, + 4.360, + 3.873, + 3.558, + 3.334, + 3.167, + 3.036, + 2.930, + 2.843, + }, + { + 7.353, + 5.211, + 4.343, + 3.858, + 3.542, + 3.319, + 3.152, + 3.021, + 2.915, + 2.828, + }, + { + 7.333, + 5.194, + 4.327, + 3.843, + 3.528, + 3.305, + 3.137, + 3.006, + 2.901, + 2.814, + }, + { + 7.314, + 5.179, + 4.313, + 3.828, + 3.514, + 3.291, + 3.124, + 2.993, + 2.888, + 2.801, + }, + { + 7.296, + 5.163, + 4.299, + 3.815, + 3.501, + 3.278, + 3.111, + 2.980, + 2.875, + 2.788, + }, + { + 7.280, + 5.149, + 4.285, + 3.802, + 3.488, + 3.266, + 3.099, + 2.968, + 2.863, + 2.776, + }, + { + 7.264, + 5.136, + 4.273, + 3.790, + 3.476, + 3.254, + 3.087, + 2.957, + 2.851, + 2.764, + }, + { + 7.248, + 5.123, + 4.261, + 3.778, + 3.465, + 3.243, + 3.076, + 2.946, + 2.840, + 2.754, + }, + { + 7.234, + 5.110, + 4.249, + 3.767, + 3.454, + 3.232, + 3.066, + 2.935, + 2.830, + 2.743, + }, + { + 7.220, + 5.099, + 4.238, + 3.757, + 3.444, + 3.222, + 3.056, + 2.925, + 2.820, + 2.733, + }, + { + 7.207, + 5.087, + 4.228, + 3.747, + 3.434, + 3.213, + 3.046, + 2.916, + 2.811, + 2.724, + }, + { + 7.194, + 5.077, + 4.218, + 3.737, + 3.425, + 3.204, + 3.037, + 2.907, + 2.802, + 2.715, + }, + { + 7.182, + 5.066, + 4.208, + 3.728, + 3.416, + 3.195, + 3.028, + 2.898, + 2.793, + 2.706, + }, + { + 7.171, + 5.057, + 4.199, + 3.720, + 3.408, + 3.186, + 3.020, + 2.890, + 2.785, + 2.698, + }, + { + 7.159, + 5.047, + 4.191, + 3.711, + 3.400, + 3.178, + 3.012, + 2.882, + 2.777, + 2.690, + }, + { + 7.149, + 5.038, + 4.182, + 3.703, + 3.392, + 3.171, + 3.005, + 2.874, + 2.769, + 2.683, + }, + { + 7.139, + 5.030, + 4.174, + 3.695, + 3.384, + 3.163, + 2.997, + 2.867, + 2.762, + 2.675, + }, + { + 7.129, + 5.021, + 4.167, + 3.688, + 3.377, + 3.156, + 2.990, + 2.860, + 2.755, + 2.668, + }, + { + 7.119, + 5.013, + 4.159, + 3.681, + 3.370, + 3.149, + 2.983, + 2.853, + 2.748, + 2.662, + }, + { + 7.110, + 5.006, + 4.152, + 3.674, + 3.363, + 3.143, + 2.977, + 2.847, + 2.742, + 2.655, + }, + { + 7.102, + 4.998, + 4.145, + 3.667, + 3.357, + 3.136, + 2.971, + 2.841, + 2.736, + 2.649, + }, + { + 7.093, + 4.991, + 4.138, + 3.661, + 3.351, + 3.130, + 2.965, + 2.835, + 2.730, + 2.643, + }, + { + 7.085, + 4.984, + 4.132, + 3.655, + 3.345, + 3.124, + 2.959, + 2.829, + 2.724, + 2.637, + }, + { + 7.077, + 4.977, + 4.126, + 3.649, + 3.339, + 3.119, + 2.953, + 2.823, + 2.718, + 2.632, + }, + { + 7.070, + 4.971, + 4.120, + 3.643, + 3.333, + 3.113, + 2.948, + 2.818, + 2.713, + 2.626, + }, + { + 7.062, + 4.965, + 4.114, + 3.638, + 3.328, + 3.108, + 2.942, + 2.813, + 2.708, + 2.621, + }, + { + 7.055, + 4.959, + 4.109, + 3.632, + 3.323, + 3.103, + 2.937, + 2.808, + 2.703, + 2.616, + }, + { + 7.048, + 4.953, + 4.103, + 3.627, + 3.318, + 3.098, + 2.932, + 2.803, + 2.698, + 2.611, + }, + { + 7.042, + 4.947, + 4.098, + 3.622, + 3.313, + 3.093, + 2.928, + 2.798, + 2.693, + 2.607, + }, + { + 7.035, + 4.942, + 4.093, + 3.618, + 3.308, + 3.088, + 2.923, + 2.793, + 2.689, + 2.602, + }, + { + 7.029, + 4.937, + 4.088, + 3.613, + 3.304, + 3.084, + 2.919, + 2.789, + 2.684, + 2.598, + }, + { + 7.023, + 4.932, + 4.083, + 3.608, + 3.299, + 3.080, + 2.914, + 2.785, + 2.680, + 2.593, + }, + { + 7.017, + 4.927, + 4.079, + 3.604, + 3.295, + 3.075, + 2.910, + 2.781, + 2.676, + 2.589, + }, + { + 7.011, + 4.922, + 4.074, + 3.600, + 3.291, + 3.071, + 2.906, + 2.777, + 2.672, + 2.585, + }, + { + 7.006, + 4.917, + 4.070, + 3.596, + 3.287, + 3.067, + 2.902, + 2.773, + 2.668, + 2.581, + }, + { + 7.001, + 4.913, + 4.066, + 3.591, + 3.283, + 3.063, + 2.898, + 2.769, + 2.664, + 2.578, + }, + { + 6.995, + 4.908, + 4.062, + 3.588, + 3.279, + 3.060, + 2.895, + 2.765, + 2.660, + 2.574, + }, + { + 6.990, + 4.904, + 4.058, + 3.584, + 3.275, + 3.056, + 2.891, + 2.762, + 2.657, + 2.570, + }, + { + 6.985, + 4.900, + 4.054, + 3.580, + 3.272, + 3.052, + 2.887, + 2.758, + 2.653, + 2.567, + }, + { + 6.981, + 4.896, + 4.050, + 3.577, + 3.268, + 3.049, + 2.884, + 2.755, + 2.650, + 2.563, + }, + { + 6.976, + 4.892, + 4.047, + 3.573, + 3.265, + 3.046, + 2.881, + 2.751, + 2.647, + 2.560, + }, + { + 6.971, + 4.888, + 4.043, + 3.570, + 3.261, + 3.042, + 2.877, + 2.748, + 2.644, + 2.557, + }, + { + 6.967, + 4.884, + 4.040, + 3.566, + 3.258, + 3.039, + 2.874, + 2.745, + 2.640, + 2.554, + }, + { + 6.963, + 4.881, + 4.036, + 3.563, + 3.255, + 3.036, + 2.871, + 2.742, + 2.637, + 2.551, + }, + { + 6.958, + 4.877, + 4.033, + 3.560, + 3.252, + 3.033, + 2.868, + 2.739, + 2.634, + 2.548, + }, + { + 6.954, + 4.874, + 4.030, + 3.557, + 3.249, + 3.030, + 2.865, + 2.736, + 2.632, + 2.545, + }, + { + 6.950, + 4.870, + 4.027, + 3.554, + 3.246, + 3.027, + 2.863, + 2.733, + 2.629, + 2.542, + }, + { + 6.947, + 4.867, + 4.024, + 3.551, + 3.243, + 3.025, + 2.860, + 2.731, + 2.626, + 2.539, + }, + { + 6.943, + 4.864, + 4.021, + 3.548, + 3.240, + 3.022, + 2.857, + 2.728, + 2.623, + 2.537, + }, + { + 6.939, + 4.861, + 4.018, + 3.545, + 3.238, + 3.019, + 2.854, + 2.725, + 2.621, + 2.534, + }, + { + 6.935, + 4.858, + 4.015, + 3.543, + 3.235, + 3.017, + 2.852, + 2.723, + 2.618, + 2.532, + }, + { + 6.932, + 4.855, + 4.012, + 3.540, + 3.233, + 3.014, + 2.849, + 2.720, + 2.616, + 2.529, + }, + { + 6.928, + 4.852, + 4.010, + 3.538, + 3.230, + 3.012, + 2.847, + 2.718, + 2.613, + 2.527, + }, + { + 6.925, + 4.849, + 4.007, + 3.535, + 3.228, + 3.009, + 2.845, + 2.715, + 2.611, + 2.524, + }, + { + 6.922, + 4.846, + 4.004, + 3.533, + 3.225, + 3.007, + 2.842, + 2.713, + 2.609, + 2.522, + }, + { + 6.919, + 4.844, + 4.002, + 3.530, + 3.223, + 3.004, + 2.840, + 2.711, + 2.606, + 2.520, + }, + { + 6.915, + 4.841, + 3.999, + 3.528, + 3.221, + 3.002, + 2.838, + 2.709, + 2.604, + 2.518, + }, + { + 6.912, + 4.838, + 3.997, + 3.525, + 3.218, + 3.000, + 2.835, + 2.706, + 2.602, + 2.515, + }, + { + 6.909, + 4.836, + 3.995, + 3.523, + 3.216, + 2.998, + 2.833, + 2.704, + 2.600, + 2.513, + }, + { + 6.906, + 4.833, + 3.992, + 3.521, + 3.214, + 2.996, + 2.831, + 2.702, + 2.598, + 2.511, + }, + { + 6.904, + 4.831, + 3.990, + 3.519, + 3.212, + 2.994, + 2.829, + 2.700, + 2.596, + 2.509, + }, + { + 6.901, + 4.829, + 3.988, + 3.517, + 3.210, + 2.992, + 2.827, + 2.698, + 2.594, + 2.507, + }, + { + 6.898, + 4.826, + 3.986, + 3.515, + 3.208, + 2.990, + 2.825, + 2.696, + 2.592, + 2.505, + }, + {6.895, 4.824, 3.984, 3.513, 3.206, 2.988, 2.823, 2.694, 2.590, 2.503}}; /** define the variance which will be used as a minimum variance for any dimension of any feature. Since most features are calculated from numbers with a precision no better than 1 in 128, the variance should never be less than the square of this number for parameters whose range is 1. */ -#define MINVARIANCE 0.0004 +#define MINVARIANCE 0.0004 /** define the absolute minimum number of samples which must be present in order to accurately test hypotheses about underlying probability @@ -147,8 +1236,8 @@ const double FTable[FTABLE_Y][FTABLE_X] = { equal to MINSAMPLES but can be set to a lower number for early testing when very few samples are available. */ #define MINSAMPLESPERBUCKET 5 -#define MINSAMPLES (MINBUCKETS * MINSAMPLESPERBUCKET) -#define MINSAMPLESNEEDED 1 +#define MINSAMPLES (MINBUCKETS * MINSAMPLESPERBUCKET) +#define MINSAMPLESNEEDED 1 /** define the size of the table which maps normalized samples to histogram buckets. Also define the number of standard deviations @@ -156,12 +1245,12 @@ const double FTable[FTABLE_Y][FTABLE_X] = { The mapping table will be defined in such a way that it covers the specified number of standard deviations on either side of the mean. BUCKETTABLESIZE should always be even. */ -#define BUCKETTABLESIZE 1024 -#define NORMALEXTENT 3.0 +#define BUCKETTABLESIZE 1024 +#define NORMALEXTENT 3.0 struct TEMPCLUSTER { - CLUSTER *Cluster; - CLUSTER *Neighbor; + CLUSTER* Cluster; + CLUSTER* Neighbor; }; using ClusterPair = tesseract::KDPairInc; @@ -169,23 +1258,23 @@ using ClusterHeap = tesseract::GenericHeap; struct STATISTICS { FLOAT32 AvgVariance; - FLOAT32 *CoVariance; - FLOAT32 *Min; // largest negative distance from the mean - FLOAT32 *Max; // largest positive distance from the mean + FLOAT32* CoVariance; + FLOAT32* Min; // largest negative distance from the mean + FLOAT32* Max; // largest positive distance from the mean }; struct BUCKETS { - DISTRIBUTION Distribution; // distribution being tested for - uint32_t SampleCount; // # of samples in histogram - FLOAT64 Confidence; // confidence level of test - FLOAT64 ChiSquared; // test threshold - uint16_t NumberOfBuckets; // number of cells in histogram - uint16_t Bucket[BUCKETTABLESIZE];// mapping to histogram buckets - uint32_t *Count; // frequency of occurrence histogram - FLOAT32 *ExpectedCount; // expected histogram + DISTRIBUTION Distribution; // distribution being tested for + uint32_t SampleCount; // # of samples in histogram + FLOAT64 Confidence; // confidence level of test + FLOAT64 ChiSquared; // test threshold + uint16_t NumberOfBuckets; // number of cells in histogram + uint16_t Bucket[BUCKETTABLESIZE]; // mapping to histogram buckets + uint32_t* Count; // frequency of occurrence histogram + FLOAT32* ExpectedCount; // expected histogram }; -struct CHISTRUCT{ +struct CHISTRUCT { uint16_t DegreesOfFreedom; FLOAT64 Alpha; FLOAT64 ChiSquared; @@ -193,18 +1282,18 @@ struct CHISTRUCT{ // For use with KDWalk / MakePotentialClusters struct ClusteringContext { - ClusterHeap *heap; // heap used to hold temp clusters, "best" on top - TEMPCLUSTER *candidates; // array of potential clusters - KDTREE *tree; // kd-tree to be searched for neighbors - int32_t next; // next candidate to be used + ClusterHeap* heap; // heap used to hold temp clusters, "best" on top + TEMPCLUSTER* candidates; // array of potential clusters + KDTREE* tree; // kd-tree to be searched for neighbors + int32_t next; // next candidate to be used }; -typedef FLOAT64 (*DENSITYFUNC) (int32_t); -typedef FLOAT64 (*SOLVEFUNC) (CHISTRUCT *, double); +typedef FLOAT64 (*DENSITYFUNC)(int32_t); +typedef FLOAT64 (*SOLVEFUNC)(CHISTRUCT*, double); -#define Odd(N) ((N)%2) -#define Mirror(N,R) ((R) - (N) - 1) -#define Abs(N) ( ( (N) < 0 ) ? ( -(N) ) : (N) ) +#define Odd(N) ((N) % 2) +#define Mirror(N, R) ((R) - (N)-1) +#define Abs(N) (((N) < 0) ? (-(N)) : (N)) //--------------Global Data Definitions and Declarations---------------------- /** the following variables describe a discrete normal distribution @@ -214,7 +1303,7 @@ typedef FLOAT64 (*SOLVEFUNC) (CHISTRUCT *, double); discrete range of x. x=0 is mapped to -NORMALEXTENT standard deviations and x=BUCKETTABLESIZE is mapped to +NORMALEXTENT standard deviations. */ -#define SqrtOf2Pi 2.506628275 +#define SqrtOf2Pi 2.506628275 static const FLOAT64 kNormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT); static const FLOAT64 kNormalVariance = (BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT); @@ -224,162 +1313,133 @@ static const FLOAT64 kNormalMean = BUCKETTABLESIZE / 2; /** define lookup tables used to compute the number of histogram buckets that should be used for a given number of samples. */ -#define LOOKUPTABLESIZE 8 +#define LOOKUPTABLESIZE 8 #define MAXDEGREESOFFREEDOM MAXBUCKETS static const uint32_t kCountTable[LOOKUPTABLESIZE] = { - MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000 -}; // number of samples + MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000}; // number of samples static const uint16_t kBucketsTable[LOOKUPTABLESIZE] = { - MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS -}; // number of buckets + MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS}; // number of buckets /*------------------------------------------------------------------------- Private Function Prototypes --------------------------------------------------------------------------*/ -void CreateClusterTree(CLUSTERER *Clusterer); +void CreateClusterTree(CLUSTERER* Clusterer); -void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster, +void MakePotentialClusters(ClusteringContext* context, CLUSTER* Cluster, int32_t Level); -CLUSTER *FindNearestNeighbor(KDTREE *Tree, - CLUSTER *Cluster, - FLOAT32 *Distance); +CLUSTER* FindNearestNeighbor(KDTREE* Tree, CLUSTER* Cluster, FLOAT32* Distance); -CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster); +CLUSTER* MakeNewCluster(CLUSTERER* Clusterer, TEMPCLUSTER* TempCluster); -int32_t MergeClusters (int16_t N, -PARAM_DESC ParamDesc[], -int32_t n1, -int32_t n2, -FLOAT32 m[], -FLOAT32 m1[], FLOAT32 m2[]); +int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, + FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]); -void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config); +void ComputePrototypes(CLUSTERER* Clusterer, CLUSTERCONFIG* Config); -PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, - CLUSTERCONFIG *Config, - CLUSTER *Cluster); +PROTOTYPE* MakePrototype(CLUSTERER* Clusterer, CLUSTERCONFIG* Config, + CLUSTER* Cluster); -PROTOTYPE *MakeDegenerateProto(uint16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics, - PROTOSTYLE Style, +PROTOTYPE* MakeDegenerateProto(uint16_t N, CLUSTER* Cluster, + STATISTICS* Statistics, PROTOSTYLE Style, int32_t MinSamples); -PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, - CLUSTERCONFIG *Config, - CLUSTER *Cluster, - STATISTICS *Statistics); - -PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *Buckets); - -PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *Buckets); - -PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *NormalBuckets, +PROTOTYPE* TestEllipticalProto(CLUSTERER* Clusterer, CLUSTERCONFIG* Config, + CLUSTER* Cluster, STATISTICS* Statistics); + +PROTOTYPE* MakeSphericalProto(CLUSTERER* Clusterer, CLUSTER* Cluster, + STATISTICS* Statistics, BUCKETS* Buckets); + +PROTOTYPE* MakeEllipticalProto(CLUSTERER* Clusterer, CLUSTER* Cluster, + STATISTICS* Statistics, BUCKETS* Buckets); + +PROTOTYPE* MakeMixedProto(CLUSTERER* Clusterer, CLUSTER* Cluster, + STATISTICS* Statistics, BUCKETS* NormalBuckets, FLOAT64 Confidence); -void MakeDimRandom(uint16_t i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc); +void MakeDimRandom(uint16_t i, PROTOTYPE* Proto, PARAM_DESC* ParamDesc); -void MakeDimUniform(uint16_t i, PROTOTYPE *Proto, STATISTICS *Statistics); +void MakeDimUniform(uint16_t i, PROTOTYPE* Proto, STATISTICS* Statistics); -STATISTICS *ComputeStatistics (int16_t N, -PARAM_DESC ParamDesc[], CLUSTER * Cluster); +STATISTICS* ComputeStatistics(int16_t N, PARAM_DESC ParamDesc[], + CLUSTER* Cluster); -PROTOTYPE *NewSphericalProto(uint16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics); +PROTOTYPE* NewSphericalProto(uint16_t N, CLUSTER* Cluster, + STATISTICS* Statistics); -PROTOTYPE *NewEllipticalProto(int16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics); +PROTOTYPE* NewEllipticalProto(int16_t N, CLUSTER* Cluster, + STATISTICS* Statistics); -PROTOTYPE *NewMixedProto(int16_t N, CLUSTER *Cluster, STATISTICS *Statistics); +PROTOTYPE* NewMixedProto(int16_t N, CLUSTER* Cluster, STATISTICS* Statistics); -PROTOTYPE *NewSimpleProto(int16_t N, CLUSTER *Cluster); +PROTOTYPE* NewSimpleProto(int16_t N, CLUSTER* Cluster); -bool Independent(PARAM_DESC* ParamDesc, - int16_t N, FLOAT32* CoVariance, FLOAT32 Independence); +bool Independent(PARAM_DESC* ParamDesc, int16_t N, FLOAT32* CoVariance, + FLOAT32 Independence); -BUCKETS *GetBuckets(CLUSTERER* clusterer, - DISTRIBUTION Distribution, - uint32_t SampleCount, - FLOAT64 Confidence); +BUCKETS* GetBuckets(CLUSTERER* clusterer, DISTRIBUTION Distribution, + uint32_t SampleCount, FLOAT64 Confidence); -BUCKETS *MakeBuckets(DISTRIBUTION Distribution, - uint32_t SampleCount, +BUCKETS* MakeBuckets(DISTRIBUTION Distribution, uint32_t SampleCount, FLOAT64 Confidence); uint16_t OptimumNumberOfBuckets(uint32_t SampleCount); -FLOAT64 ComputeChiSquared(uint16_t DegreesOfFreedom, FLOAT64 Alpha); +FLOAT64 +ComputeChiSquared(uint16_t DegreesOfFreedom, FLOAT64 Alpha); -FLOAT64 NormalDensity(int32_t x); +FLOAT64 +NormalDensity(int32_t x); -FLOAT64 UniformDensity(int32_t x); +FLOAT64 +UniformDensity(int32_t x); -FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx); +FLOAT64 +Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx); -void FillBuckets(BUCKETS *Buckets, - CLUSTER *Cluster, - uint16_t Dim, - PARAM_DESC *ParamDesc, - FLOAT32 Mean, - FLOAT32 StdDev); +void FillBuckets(BUCKETS* Buckets, CLUSTER* Cluster, uint16_t Dim, + PARAM_DESC* ParamDesc, FLOAT32 Mean, FLOAT32 StdDev); -uint16_t NormalBucket(PARAM_DESC *ParamDesc, - FLOAT32 x, - FLOAT32 Mean, - FLOAT32 StdDev); +uint16_t NormalBucket(PARAM_DESC* ParamDesc, FLOAT32 x, FLOAT32 Mean, + FLOAT32 StdDev); -uint16_t UniformBucket(PARAM_DESC *ParamDesc, - FLOAT32 x, - FLOAT32 Mean, - FLOAT32 StdDev); +uint16_t UniformBucket(PARAM_DESC* ParamDesc, FLOAT32 x, FLOAT32 Mean, + FLOAT32 StdDev); bool DistributionOK(BUCKETS* Buckets); -void FreeStatistics(STATISTICS *Statistics); +void FreeStatistics(STATISTICS* Statistics); -void FreeBuckets(BUCKETS *Buckets); +void FreeBuckets(BUCKETS* Buckets); -void FreeCluster(CLUSTER *Cluster); +void FreeCluster(CLUSTER* Cluster); uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets); -int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, - void *arg2); // uint16_t *DesiredNumberOfBuckets); +int NumBucketsMatch(void* arg1, // BUCKETS *Histogram, + void* arg2); // uint16_t *DesiredNumberOfBuckets); -int ListEntryMatch(void *arg1, void *arg2); +int ListEntryMatch(void* arg1, void* arg2); -void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount); +void AdjustBuckets(BUCKETS* Buckets, uint32_t NewSampleCount); -void InitBuckets(BUCKETS *Buckets); +void InitBuckets(BUCKETS* Buckets); -int AlphaMatch(void *arg1, // CHISTRUCT *ChiStruct, - void *arg2); // CHISTRUCT *SearchKey); +int AlphaMatch(void* arg1, // CHISTRUCT *ChiStruct, + void* arg2); // CHISTRUCT *SearchKey); -CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, FLOAT64 Alpha); +CHISTRUCT* NewChiStruct(uint16_t DegreesOfFreedom, FLOAT64 Alpha); -FLOAT64 Solve(SOLVEFUNC Function, - void *FunctionParams, - FLOAT64 InitialGuess, - FLOAT64 Accuracy); +FLOAT64 +Solve(SOLVEFUNC Function, void* FunctionParams, FLOAT64 InitialGuess, + FLOAT64 Accuracy); -FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x); +FLOAT64 +ChiArea(CHISTRUCT* ChiParams, FLOAT64 x); -bool MultipleCharSamples(CLUSTERER* Clusterer, - CLUSTER* Cluster, +bool MultipleCharSamples(CLUSTERER* Clusterer, CLUSTER* Cluster, FLOAT32 MaxIllegal); double InvertMatrix(const float* input, int size, float* inv); @@ -395,13 +1455,12 @@ double InvertMatrix(const float* input, int size, float* inv); * @note Exceptions: None * @note History: 5/29/89, DSJ, Created. */ -CLUSTERER * -MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]) { - CLUSTERER *Clusterer; +CLUSTERER* MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[]) { + CLUSTERER* Clusterer; int i; // allocate main clusterer data structure and init simple fields - Clusterer = (CLUSTERER *) Emalloc (sizeof (CLUSTERER)); + Clusterer = (CLUSTERER*)Emalloc(sizeof(CLUSTERER)); Clusterer->SampleSize = SampleSize; Clusterer->NumberOfSamples = 0; Clusterer->NumChar = 0; @@ -411,8 +1470,7 @@ MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]) { Clusterer->ProtoList = NIL_LIST; // maintain a copy of param descriptors in the clusterer data structure - Clusterer->ParamDesc = - (PARAM_DESC *) Emalloc (SampleSize * sizeof (PARAM_DESC)); + Clusterer->ParamDesc = (PARAM_DESC*)Emalloc(SampleSize * sizeof(PARAM_DESC)); for (i = 0; i < SampleSize; i++) { Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular; Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential; @@ -421,11 +1479,11 @@ MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]) { Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2; Clusterer->ParamDesc[i].MidRange = - (ParamDesc[i].Max + ParamDesc[i].Min) / 2; + (ParamDesc[i].Max + ParamDesc[i].Min) / 2; } // allocate a kd tree to hold the samples - Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc); + Clusterer->KDTree = MakeKDTree(SampleSize, ParamDesc); // Initialize cache of histogram buckets to minimize recomputing them. for (int d = 0; d < DISTRIBUTION_COUNT; ++d) { @@ -434,7 +1492,7 @@ MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]) { } return Clusterer; -} // MakeClusterer +} // MakeClusterer /** * This routine creates a new sample data structure to hold @@ -452,20 +1510,19 @@ MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]) { * ClusterSamples has been called * @note History: 5/29/89, DSJ, Created. */ -SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, +SAMPLE* MakeSample(CLUSTERER* Clusterer, const FLOAT32* Feature, int32_t CharID) { - SAMPLE *Sample; + SAMPLE* Sample; int i; // see if the samples have already been clustered - if so trap an error if (Clusterer->Root != nullptr) - DoError (ALREADYCLUSTERED, - "Can't add samples after they have been clustered"); + DoError(ALREADYCLUSTERED, + "Can't add samples after they have been clustered"); // allocate the new sample and initialize it - Sample = (SAMPLE *) Emalloc (sizeof (SAMPLE) + - (Clusterer->SampleSize - - 1) * sizeof (FLOAT32)); + Sample = (SAMPLE*)Emalloc(sizeof(SAMPLE) + + (Clusterer->SampleSize - 1) * sizeof(FLOAT32)); Sample->Clustered = FALSE; Sample->Prototype = FALSE; Sample->SampleCount = 1; @@ -473,20 +1530,18 @@ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, Sample->Right = nullptr; Sample->CharID = CharID; - for (i = 0; i < Clusterer->SampleSize; i++) - Sample->Mean[i] = Feature[i]; + for (i = 0; i < Clusterer->SampleSize; i++) Sample->Mean[i] = Feature[i]; // add the sample to the KD tree - keep track of the total # of samples Clusterer->NumberOfSamples++; - KDStore (Clusterer->KDTree, Sample->Mean, (char *) Sample); - if (CharID >= Clusterer->NumChar) - Clusterer->NumChar = CharID + 1; + KDStore(Clusterer->KDTree, Sample->Mean, (char*)Sample); + if (CharID >= Clusterer->NumChar) Clusterer->NumChar = CharID + 1; // execute hook for monitoring clustering operation // (*SampleCreationHook)( Sample ); return (Sample); -} // MakeSample +} // MakeSample /** * This routine first checks to see if the samples in this @@ -509,26 +1564,25 @@ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, * @note Exceptions: None * @note History: 5/29/89, DSJ, Created. */ -LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { - //only create cluster tree if samples have never been clustered before - if (Clusterer->Root == nullptr) - CreateClusterTree(Clusterer); +LIST ClusterSamples(CLUSTERER* Clusterer, CLUSTERCONFIG* Config) { + // only create cluster tree if samples have never been clustered before + if (Clusterer->Root == nullptr) CreateClusterTree(Clusterer); - //deallocate the old prototype list if one exists - FreeProtoList (&Clusterer->ProtoList); + // deallocate the old prototype list if one exists + FreeProtoList(&Clusterer->ProtoList); Clusterer->ProtoList = NIL_LIST; - //compute prototypes starting at the root node in the tree + // compute prototypes starting at the root node in the tree ComputePrototypes(Clusterer, Config); // We don't need the cluster pointers in the protos any more, so null them // out, which makes it safe to delete the clusterer. LIST proto_list = Clusterer->ProtoList; iterate(proto_list) { - PROTOTYPE *proto = reinterpret_cast(first_node(proto_list)); + PROTOTYPE* proto = reinterpret_cast(first_node(proto_list)); proto->Cluster = nullptr; } return Clusterer->ProtoList; -} // ClusterSamples +} // ClusterSamples /** * This routine frees all of the memory allocated to the @@ -543,13 +1597,11 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ -void FreeClusterer(CLUSTERER *Clusterer) { +void FreeClusterer(CLUSTERER* Clusterer) { if (Clusterer != nullptr) { free(Clusterer->ParamDesc); - if (Clusterer->KDTree != nullptr) - FreeKDTree (Clusterer->KDTree); - if (Clusterer->Root != nullptr) - FreeCluster (Clusterer->Root); + if (Clusterer->KDTree != nullptr) FreeKDTree(Clusterer->KDTree); + if (Clusterer->Root != nullptr) FreeCluster(Clusterer->Root); // Free up all used buckets structures. for (int d = 0; d < DISTRIBUTION_COUNT; ++d) { for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c) @@ -559,7 +1611,7 @@ void FreeClusterer(CLUSTERER *Clusterer) { free(Clusterer); } -} // FreeClusterer +} // FreeClusterer /** * This routine frees all of the memory allocated to the @@ -570,9 +1622,9 @@ void FreeClusterer(CLUSTERER *Clusterer) { * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ -void FreeProtoList(LIST *ProtoList) { +void FreeProtoList(LIST* ProtoList) { destroy_nodes(*ProtoList, FreePrototype); -} // FreeProtoList +} // FreeProtoList /** * This routine deallocates the memory consumed by the specified @@ -584,12 +1636,11 @@ void FreeProtoList(LIST *ProtoList) { * @note Exceptions: None * @note History: 5/30/89, DSJ, Created. */ -void FreePrototype(void *arg) { //PROTOTYPE *Prototype) - PROTOTYPE *Prototype = (PROTOTYPE *) arg; +void FreePrototype(void* arg) { // PROTOTYPE *Prototype) + PROTOTYPE* Prototype = (PROTOTYPE*)arg; // unmark the corresponding cluster (if there is one - if (Prototype->Cluster != nullptr) - Prototype->Cluster->Prototype = FALSE; + if (Prototype->Cluster != nullptr) Prototype->Cluster->Prototype = FALSE; // deallocate the prototype statistics and then the prototype itself free(Prototype->Distrib); @@ -600,7 +1651,7 @@ void FreePrototype(void *arg) { //PROTOTYPE *Prototype) free(Prototype->Weight.Elliptical); } free(Prototype); -} // FreePrototype +} // FreePrototype /** * This routine is used to find all of the samples which @@ -617,20 +1668,18 @@ void FreePrototype(void *arg) { //PROTOTYPE *Prototype) * @note Exceptions: None * @note History: 6/16/89, DSJ, Created. */ -CLUSTER *NextSample(LIST *SearchState) { - CLUSTER *Cluster; +CLUSTER* NextSample(LIST* SearchState) { + CLUSTER* Cluster; - if (*SearchState == NIL_LIST) - return (nullptr); - Cluster = (CLUSTER *) first_node (*SearchState); - *SearchState = pop (*SearchState); + if (*SearchState == NIL_LIST) return (nullptr); + Cluster = (CLUSTER*)first_node(*SearchState); + *SearchState = pop(*SearchState); while (TRUE) { - if (Cluster->Left == nullptr) - return (Cluster); - *SearchState = push (*SearchState, Cluster->Right); + if (Cluster->Left == nullptr) return (Cluster); + *SearchState = push(*SearchState, Cluster->Right); Cluster = Cluster->Left; } -} // NextSample +} // NextSample /** * This routine returns the mean of the specified @@ -641,9 +1690,10 @@ CLUSTER *NextSample(LIST *SearchState) { * @note Exceptions: none * @note History: 7/6/89, DSJ, Created. */ -FLOAT32 Mean(PROTOTYPE *Proto, uint16_t Dimension) { +FLOAT32 +Mean(PROTOTYPE* Proto, uint16_t Dimension) { return (Proto->Mean[Dimension]); -} // Mean +} // Mean /** * This routine returns the standard deviation of the @@ -654,18 +1704,17 @@ FLOAT32 Mean(PROTOTYPE *Proto, uint16_t Dimension) { * @note Exceptions: none * @note History: 7/6/89, DSJ, Created. */ -FLOAT32 StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) { +FLOAT32 +StandardDeviation(PROTOTYPE* Proto, uint16_t Dimension) { switch (Proto->Style) { case spherical: - return ((FLOAT32) sqrt ((double) Proto->Variance.Spherical)); + return ((FLOAT32)sqrt((double)Proto->Variance.Spherical)); case elliptical: - return ((FLOAT32) - sqrt ((double) Proto->Variance.Elliptical[Dimension])); + return ((FLOAT32)sqrt((double)Proto->Variance.Elliptical[Dimension])); case mixed: switch (Proto->Distrib[Dimension]) { case normal: - return ((FLOAT32) - sqrt ((double) Proto->Variance.Elliptical[Dimension])); + return ((FLOAT32)sqrt((double)Proto->Variance.Elliptical[Dimension])); case uniform: case D_random: return (Proto->Variance.Elliptical[Dimension]); @@ -674,8 +1723,7 @@ FLOAT32 StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) { } } return 0.0f; -} // StandardDeviation - +} // StandardDeviation /*--------------------------------------------------------------------------- Private Code @@ -695,16 +1743,16 @@ FLOAT32 StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) { * @note Exceptions: None * @note History: 5/29/89, DSJ, Created. */ -void CreateClusterTree(CLUSTERER *Clusterer) { +void CreateClusterTree(CLUSTERER* Clusterer) { ClusteringContext context; ClusterPair HeapEntry; - TEMPCLUSTER *PotentialCluster; + TEMPCLUSTER* PotentialCluster; // each sample and its nearest neighbor form a "potential" cluster // save these in a heap with the "best" potential clusters on top context.tree = Clusterer->KDTree; - context.candidates = (TEMPCLUSTER *) - Emalloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER)); + context.candidates = + (TEMPCLUSTER*)Emalloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER)); context.next = 0; context.heap = new ClusterHeap(Clusterer->NumberOfSamples); KDWalk(context.tree, (void_proc)MakePotentialClusters, &context); @@ -722,9 +1770,8 @@ void CreateClusterTree(CLUSTERER *Clusterer) { // if main cluster is not yet clustered, but its nearest neighbor is // then we must find a new nearest neighbor else if (PotentialCluster->Neighbor->Clustered) { - PotentialCluster->Neighbor = - FindNearestNeighbor(context.tree, PotentialCluster->Cluster, - &HeapEntry.key); + PotentialCluster->Neighbor = FindNearestNeighbor( + context.tree, PotentialCluster->Cluster, &HeapEntry.key); if (PotentialCluster->Neighbor != nullptr) { context.heap->Push(&HeapEntry); } @@ -732,11 +1779,9 @@ void CreateClusterTree(CLUSTERER *Clusterer) { // if neither cluster is already clustered, form permanent cluster else { - PotentialCluster->Cluster = - MakeNewCluster(Clusterer, PotentialCluster); - PotentialCluster->Neighbor = - FindNearestNeighbor(context.tree, PotentialCluster->Cluster, - &HeapEntry.key); + PotentialCluster->Cluster = MakeNewCluster(Clusterer, PotentialCluster); + PotentialCluster->Neighbor = FindNearestNeighbor( + context.tree, PotentialCluster->Cluster, &HeapEntry.key); if (PotentialCluster->Neighbor != nullptr) { context.heap->Push(&HeapEntry); } @@ -744,14 +1789,14 @@ void CreateClusterTree(CLUSTERER *Clusterer) { } // the root node in the cluster tree is now the only node in the kd-tree - Clusterer->Root = (CLUSTER *) RootOf(Clusterer->KDTree); + Clusterer->Root = (CLUSTER*)RootOf(Clusterer->KDTree); // free up the memory used by the K-D tree, heap, and temp clusters FreeKDTree(context.tree); Clusterer->KDTree = nullptr; delete context.heap; free(context.candidates); -} // CreateClusterTree +} // CreateClusterTree /** * This routine is designed to be used in concert with the @@ -762,21 +1807,19 @@ void CreateClusterTree(CLUSTERER *Clusterer) { * @param Cluster current cluster being visited in kd-tree walk * @param Level level of this cluster in the kd-tree */ -void MakePotentialClusters(ClusteringContext *context, - CLUSTER *Cluster, int32_t Level) { +void MakePotentialClusters(ClusteringContext* context, CLUSTER* Cluster, + int32_t Level) { ClusterPair HeapEntry; int next = context->next; context->candidates[next].Cluster = Cluster; HeapEntry.data = &(context->candidates[next]); - context->candidates[next].Neighbor = - FindNearestNeighbor(context->tree, - context->candidates[next].Cluster, - &HeapEntry.key); + context->candidates[next].Neighbor = FindNearestNeighbor( + context->tree, context->candidates[next].Cluster, &HeapEntry.key); if (context->candidates[next].Neighbor != nullptr) { context->heap->Push(&HeapEntry); context->next++; } -} // MakePotentialClusters +} // MakePotentialClusters /** * This routine searches the specified kd-tree for the nearest @@ -794,20 +1837,19 @@ void MakePotentialClusters(ClusteringContext *context, * @note History: 5/29/89, DSJ, Created. * 7/13/89, DSJ, Removed visibility of kd-tree node data struct */ -CLUSTER * -FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance) -#define MAXNEIGHBORS 2 -#define MAXDISTANCE MAX_FLOAT32 +CLUSTER* FindNearestNeighbor(KDTREE* Tree, CLUSTER* Cluster, FLOAT32* Distance) +#define MAXNEIGHBORS 2 +#define MAXDISTANCE MAX_FLOAT32 { - CLUSTER *Neighbor[MAXNEIGHBORS]; + CLUSTER* Neighbor[MAXNEIGHBORS]; FLOAT32 Dist[MAXNEIGHBORS]; int NumberOfNeighbors; int32_t i; - CLUSTER *BestNeighbor; + CLUSTER* BestNeighbor; // find the 2 nearest neighbors of the cluster KDNearestNeighborSearch(Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE, - &NumberOfNeighbors, (void **)Neighbor, Dist); + &NumberOfNeighbors, (void**)Neighbor, Dist); // search for the nearest neighbor that is not the cluster itself *Distance = MAXDISTANCE; @@ -819,7 +1861,7 @@ FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance) } } return BestNeighbor; -} // FindNearestNeighbor +} // FindNearestNeighbor /** * This routine creates a new permanent cluster from the @@ -833,12 +1875,12 @@ FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance) * @note History: 5/29/89, DSJ, Created. * 7/13/89, DSJ, Removed visibility of kd-tree node data struct */ -CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { - CLUSTER *Cluster; +CLUSTER* MakeNewCluster(CLUSTERER* Clusterer, TEMPCLUSTER* TempCluster) { + CLUSTER* Cluster; // allocate the new cluster and initialize it - Cluster = (CLUSTER *) Emalloc( - sizeof(CLUSTER) + (Clusterer->SampleSize - 1) * sizeof(FLOAT32)); + Cluster = (CLUSTER*)Emalloc(sizeof(CLUSTER) + + (Clusterer->SampleSize - 1) * sizeof(FLOAT32)); Cluster->Clustered = FALSE; Cluster->Prototype = FALSE; Cluster->Left = TempCluster->Cluster; @@ -860,7 +1902,7 @@ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { // add the new cluster to the KD tree KDStore(Clusterer->KDTree, Cluster->Mean, Cluster); return Cluster; -} // MakeNewCluster +} // MakeNewCluster /** * This routine merges two clusters into one larger cluster. @@ -877,12 +1919,8 @@ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { * @note Exceptions: None * @note History: 5/31/89, DSJ, Created. */ -int32_t MergeClusters(int16_t N, - PARAM_DESC ParamDesc[], - int32_t n1, - int32_t n2, - FLOAT32 m[], - FLOAT32 m1[], FLOAT32 m2[]) { +int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, + FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]) { int32_t i, n; n = n1 + n2; @@ -893,22 +1931,17 @@ int32_t MergeClusters(int16_t N, // then normalize the mean back into the accepted range if ((*m2 - *m1) > ParamDesc->HalfRange) { *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n; - if (*m < ParamDesc->Min) - *m += ParamDesc->Range; - } - else if ((*m1 - *m2) > ParamDesc->HalfRange) { + if (*m < ParamDesc->Min) *m += ParamDesc->Range; + } else if ((*m1 - *m2) > ParamDesc->HalfRange) { *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n; - if (*m < ParamDesc->Min) - *m += ParamDesc->Range; - } - else + if (*m < ParamDesc->Min) *m += ParamDesc->Range; + } else *m = (n1 * *m1 + n2 * *m2) / n; - } - else + } else *m = (n1 * *m1 + n2 * *m2) / n; } return n; -} // MergeClusters +} // MergeClusters /** * This routine decides which clusters in the cluster tree @@ -921,33 +1954,32 @@ int32_t MergeClusters(int16_t N, * @note Exceptions: None * @note History: 5/30/89, DSJ, Created. */ -void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { +void ComputePrototypes(CLUSTERER* Clusterer, CLUSTERCONFIG* Config) { LIST ClusterStack = NIL_LIST; - CLUSTER *Cluster; - PROTOTYPE *Prototype; + CLUSTER* Cluster; + PROTOTYPE* Prototype; // use a stack to keep track of clusters waiting to be processed // initially the only cluster on the stack is the root cluster if (Clusterer->Root != nullptr) - ClusterStack = push (NIL_LIST, Clusterer->Root); + ClusterStack = push(NIL_LIST, Clusterer->Root); // loop until we have analyzed all clusters which are potential prototypes while (ClusterStack != NIL_LIST) { // remove the next cluster to be analyzed from the stack // try to make a prototype from the cluster // if successful, put it on the proto list, else split the cluster - Cluster = (CLUSTER *) first_node (ClusterStack); - ClusterStack = pop (ClusterStack); + Cluster = (CLUSTER*)first_node(ClusterStack); + ClusterStack = pop(ClusterStack); Prototype = MakePrototype(Clusterer, Config, Cluster); if (Prototype != nullptr) { - Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype); - } - else { - ClusterStack = push (ClusterStack, Cluster->Right); - ClusterStack = push (ClusterStack, Cluster->Left); + Clusterer->ProtoList = push(Clusterer->ProtoList, Prototype); + } else { + ClusterStack = push(ClusterStack, Cluster->Right); + ClusterStack = push(ClusterStack, Cluster->Left); } } -} // ComputePrototypes +} // ComputePrototypes /** * This routine attempts to create a prototype from the @@ -966,15 +1998,14 @@ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { * @note Exceptions: None * @note History: 6/19/89, DSJ, Created. */ -PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, - CLUSTERCONFIG *Config, - CLUSTER *Cluster) { - STATISTICS *Statistics; - PROTOTYPE *Proto; - BUCKETS *Buckets; +PROTOTYPE* MakePrototype(CLUSTERER* Clusterer, CLUSTERCONFIG* Config, + CLUSTER* Cluster) { + STATISTICS* Statistics; + PROTOTYPE* Proto; + BUCKETS* Buckets; // filter out clusters which contain samples from the same character - if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal)) + if (MultipleCharSamples(Clusterer, Cluster, Config->MaxIllegal)) return nullptr; // compute the covariance matrix and ranges for the cluster @@ -986,7 +2017,7 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, // character samples have been removed (as above) Proto = MakeDegenerateProto( Clusterer->SampleSize, Cluster, Statistics, Config->ProtoStyle, - (int32_t) (Config->MinSamples * Clusterer->NumChar)); + (int32_t)(Config->MinSamples * Clusterer->NumChar)); if (Proto != nullptr) { FreeStatistics(Statistics); return Proto; @@ -1007,8 +2038,8 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, } // create a histogram data structure used to evaluate distributions - Buckets = GetBuckets(Clusterer, normal, Cluster->SampleCount, - Config->Confidence); + Buckets = + GetBuckets(Clusterer, normal, Cluster->SampleCount, Config->Confidence); // create a prototype based on the statistics and test it switch (Config->ProtoStyle) { @@ -1024,18 +2055,16 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, break; case automatic: Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets); - if (Proto != nullptr) - break; + if (Proto != nullptr) break; Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets); - if (Proto != nullptr) - break; + if (Proto != nullptr) break; Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets, Config->Confidence); break; } FreeStatistics(Statistics); return Proto; -} // MakePrototype +} // MakePrototype /** * This routine checks for clusters which are degenerate and @@ -1060,34 +2089,30 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, * 7/12/89, DSJ, Changed name and added check for 0 stddev. * 8/8/89, DSJ, Removed check for 0 stddev (handled elsewhere). */ -PROTOTYPE *MakeDegenerateProto( //this was MinSample - uint16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics, - PROTOSTYLE Style, - int32_t MinSamples) { - PROTOTYPE *Proto = nullptr; +PROTOTYPE* MakeDegenerateProto( // this was MinSample + uint16_t N, CLUSTER* Cluster, STATISTICS* Statistics, PROTOSTYLE Style, + int32_t MinSamples) { + PROTOTYPE* Proto = nullptr; - if (MinSamples < MINSAMPLESNEEDED) - MinSamples = MINSAMPLESNEEDED; + if (MinSamples < MINSAMPLESNEEDED) MinSamples = MINSAMPLESNEEDED; if (Cluster->SampleCount < MinSamples) { switch (Style) { case spherical: - Proto = NewSphericalProto (N, Cluster, Statistics); + Proto = NewSphericalProto(N, Cluster, Statistics); break; case elliptical: case automatic: - Proto = NewEllipticalProto (N, Cluster, Statistics); + Proto = NewEllipticalProto(N, Cluster, Statistics); break; case mixed: - Proto = NewMixedProto (N, Cluster, Statistics); + Proto = NewMixedProto(N, Cluster, Statistics); break; } Proto->Significant = FALSE; } return (Proto); -} // MakeDegenerateProto +} // MakeDegenerateProto /** * This routine tests the specified cluster to see if ** @@ -1102,10 +2127,8 @@ PROTOTYPE *MakeDegenerateProto( //this was MinSample * @param Statistics statistical info about cluster * @return Pointer to new elliptical prototype or nullptr. */ -PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, - CLUSTERCONFIG *Config, - CLUSTER *Cluster, - STATISTICS *Statistics) { +PROTOTYPE* TestEllipticalProto(CLUSTERER* Clusterer, CLUSTERCONFIG* Config, + CLUSTER* Cluster, STATISTICS* Statistics) { // Fraction of the number of samples used as a range around 1 within // which a cluster has the magic size that allows a boost to the // FTable by kFTableBoostMargin, thus allowing clusters near the @@ -1117,15 +2140,13 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, int N = Clusterer->SampleSize; CLUSTER* Left = Cluster->Left; CLUSTER* Right = Cluster->Right; - if (Left == nullptr || Right == nullptr) - return nullptr; + if (Left == nullptr || Right == nullptr) return nullptr; int TotalDims = Left->SampleCount + Right->SampleCount; - if (TotalDims < N + 1 || TotalDims < 2) - return nullptr; + if (TotalDims < N + 1 || TotalDims < 2) return nullptr; const int kMatrixSize = N * N * sizeof(FLOAT32); - FLOAT32 *Covariance = static_cast(Emalloc(kMatrixSize)); - FLOAT32 *Inverse = static_cast(Emalloc(kMatrixSize)); - FLOAT32 *Delta = static_cast(Emalloc(N * sizeof(FLOAT32))); + FLOAT32* Covariance = static_cast(Emalloc(kMatrixSize)); + FLOAT32* Inverse = static_cast(Emalloc(kMatrixSize)); + FLOAT32* Delta = static_cast(Emalloc(N * sizeof(FLOAT32))); // Compute a new covariance matrix that only uses essential features. for (int i = 0; i < N; ++i) { int row_offset = i * N; @@ -1163,7 +2184,7 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, for (int x = 0; x < N; ++x) { double temp = 0.0; for (int y = 0; y < N; ++y) { - temp += Inverse[y + N*x] * Delta[y]; + temp += Inverse[y + N * x] * Delta[y]; } Tsq += Delta[x] * temp; } @@ -1174,14 +2195,13 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, // Statistical Methods in Medical Research p 473 // By Peter Armitage, Geoffrey Berry, J. N. S. Matthews. // Tsq *= Left->SampleCount * Right->SampleCount / TotalDims; - double F = Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2)*EssentialN); + double F = + Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2) * EssentialN); int Fx = EssentialN; - if (Fx > FTABLE_X) - Fx = FTABLE_X; + if (Fx > FTABLE_X) Fx = FTABLE_X; --Fx; int Fy = TotalDims - EssentialN - 1; - if (Fy > FTABLE_Y) - Fy = FTABLE_Y; + if (Fy > FTABLE_Y) Fy = FTABLE_Y; --Fy; double FTarget = FTable[Fy][Fx]; if (Config->MagicSamples > 0 && @@ -1191,7 +2211,7 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, FTarget += kFTableBoostMargin; } if (F < FTarget) { - return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics); + return NewEllipticalProto(Clusterer->SampleSize, Cluster, Statistics); } return nullptr; } @@ -1209,29 +2229,24 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, * @note Exceptions: None * @note History: 6/1/89, DSJ, Created. */ -PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *Buckets) { - PROTOTYPE *Proto = nullptr; +PROTOTYPE* MakeSphericalProto(CLUSTERER* Clusterer, CLUSTER* Cluster, + STATISTICS* Statistics, BUCKETS* Buckets) { + PROTOTYPE* Proto = nullptr; int i; // check that each dimension is a normal distribution for (i = 0; i < Clusterer->SampleSize; i++) { - if (Clusterer->ParamDesc[i].NonEssential) - continue; + if (Clusterer->ParamDesc[i].NonEssential) continue; - FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Cluster->Mean[i], - sqrt ((FLOAT64) (Statistics->AvgVariance))); - if (!DistributionOK (Buckets)) - break; + FillBuckets(Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Cluster->Mean[i], sqrt((FLOAT64)(Statistics->AvgVariance))); + if (!DistributionOK(Buckets)) break; } // if all dimensions matched a normal distribution, make a proto if (i >= Clusterer->SampleSize) - Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics); + Proto = NewSphericalProto(Clusterer->SampleSize, Cluster, Statistics); return (Proto); -} // MakeSphericalProto +} // MakeSphericalProto /** * This routine tests the specified cluster to see if it can @@ -1246,30 +2261,25 @@ PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, * @note Exceptions: None * @note History: 6/12/89, DSJ, Created. */ -PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *Buckets) { - PROTOTYPE *Proto = nullptr; +PROTOTYPE* MakeEllipticalProto(CLUSTERER* Clusterer, CLUSTER* Cluster, + STATISTICS* Statistics, BUCKETS* Buckets) { + PROTOTYPE* Proto = nullptr; int i; // check that each dimension is a normal distribution for (i = 0; i < Clusterer->SampleSize; i++) { - if (Clusterer->ParamDesc[i].NonEssential) - continue; + if (Clusterer->ParamDesc[i].NonEssential) continue; - FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Cluster->Mean[i], - sqrt ((FLOAT64) Statistics-> - CoVariance[i * (Clusterer->SampleSize + 1)])); - if (!DistributionOK (Buckets)) - break; + FillBuckets( + Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), Cluster->Mean[i], + sqrt((FLOAT64)Statistics->CoVariance[i * (Clusterer->SampleSize + 1)])); + if (!DistributionOK(Buckets)) break; } // if all dimensions matched a normal distribution, make a proto if (i >= Clusterer->SampleSize) - Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics); + Proto = NewEllipticalProto(Clusterer->SampleSize, Cluster, Statistics); return (Proto); -} // MakeEllipticalProto +} // MakeEllipticalProto /** * This routine tests each dimension of the specified cluster to @@ -1288,47 +2298,40 @@ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, * @note Exceptions: None * @note History: 6/12/89, DSJ, Created. */ -PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, - CLUSTER *Cluster, - STATISTICS *Statistics, - BUCKETS *NormalBuckets, +PROTOTYPE* MakeMixedProto(CLUSTERER* Clusterer, CLUSTER* Cluster, + STATISTICS* Statistics, BUCKETS* NormalBuckets, FLOAT64 Confidence) { - PROTOTYPE *Proto; + PROTOTYPE* Proto; int i; - BUCKETS *UniformBuckets = nullptr; - BUCKETS *RandomBuckets = nullptr; + BUCKETS* UniformBuckets = nullptr; + BUCKETS* RandomBuckets = nullptr; // create a mixed proto to work on - initially assume all dimensions normal*/ - Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics); + Proto = NewMixedProto(Clusterer->SampleSize, Cluster, Statistics); // find the proper distribution for each dimension for (i = 0; i < Clusterer->SampleSize; i++) { - if (Clusterer->ParamDesc[i].NonEssential) - continue; + if (Clusterer->ParamDesc[i].NonEssential) continue; - FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Proto->Mean[i], - sqrt ((FLOAT64) Proto->Variance.Elliptical[i])); - if (DistributionOK (NormalBuckets)) - continue; + FillBuckets(NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Proto->Mean[i], sqrt((FLOAT64)Proto->Variance.Elliptical[i])); + if (DistributionOK(NormalBuckets)) continue; if (RandomBuckets == nullptr) RandomBuckets = - GetBuckets(Clusterer, D_random, Cluster->SampleCount, Confidence); - MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i])); - FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Proto->Mean[i], Proto->Variance.Elliptical[i]); - if (DistributionOK (RandomBuckets)) - continue; + GetBuckets(Clusterer, D_random, Cluster->SampleCount, Confidence); + MakeDimRandom(i, Proto, &(Clusterer->ParamDesc[i])); + FillBuckets(RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Proto->Mean[i], Proto->Variance.Elliptical[i]); + if (DistributionOK(RandomBuckets)) continue; if (UniformBuckets == nullptr) UniformBuckets = - GetBuckets(Clusterer, uniform, Cluster->SampleCount, Confidence); + GetBuckets(Clusterer, uniform, Cluster->SampleCount, Confidence); MakeDimUniform(i, Proto, Statistics); - FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), - Proto->Mean[i], Proto->Variance.Elliptical[i]); - if (DistributionOK (UniformBuckets)) - continue; + FillBuckets(UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Proto->Mean[i], Proto->Variance.Elliptical[i]); + if (DistributionOK(UniformBuckets)) continue; break; } // if any dimension failed to match a distribution, discard the proto @@ -1337,7 +2340,7 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, Proto = nullptr; } return (Proto); -} // MakeMixedProto +} // MakeMixedProto /** * This routine alters the ith dimension of the specified @@ -1349,7 +2352,7 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, * @note Exceptions: None * @note History: 6/20/89, DSJ, Created. */ -void MakeDimRandom(uint16_t i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { +void MakeDimRandom(uint16_t i, PROTOTYPE* Proto, PARAM_DESC* ParamDesc) { Proto->Distrib[i] = D_random; Proto->Mean[i] = ParamDesc->MidRange; Proto->Variance.Elliptical[i] = ParamDesc->HalfRange; @@ -1358,10 +2361,10 @@ void MakeDimRandom(uint16_t i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i]; Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range; Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; - Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); + Proto->LogMagnitude = log((double)Proto->TotalMagnitude); // note that the proto Weight is irrelevant for D_random protos -} // MakeDimRandom +} // MakeDimRandom /** * This routine alters the ith dimension of the specified @@ -1373,24 +2376,22 @@ void MakeDimRandom(uint16_t i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { * @note Exceptions: None * @note History: 6/20/89, DSJ, Created. */ -void MakeDimUniform(uint16_t i, PROTOTYPE *Proto, STATISTICS *Statistics) { +void MakeDimUniform(uint16_t i, PROTOTYPE* Proto, STATISTICS* Statistics) { Proto->Distrib[i] = uniform; - Proto->Mean[i] = Proto->Cluster->Mean[i] + - (Statistics->Min[i] + Statistics->Max[i]) / 2; - Proto->Variance.Elliptical[i] = - (Statistics->Max[i] - Statistics->Min[i]) / 2; + Proto->Mean[i] = + Proto->Cluster->Mean[i] + (Statistics->Min[i] + Statistics->Max[i]) / 2; + Proto->Variance.Elliptical[i] = (Statistics->Max[i] - Statistics->Min[i]) / 2; if (Proto->Variance.Elliptical[i] < MINVARIANCE) Proto->Variance.Elliptical[i] = MINVARIANCE; // subtract out the previous magnitude of this dimension from the total Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i]; - Proto->Magnitude.Elliptical[i] = - 1.0 / (2.0 * Proto->Variance.Elliptical[i]); + Proto->Magnitude.Elliptical[i] = 1.0 / (2.0 * Proto->Variance.Elliptical[i]); Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; - Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); + Proto->LogMagnitude = log((double)Proto->TotalMagnitude); // note that the proto Weight is irrelevant for uniform protos -} // MakeDimUniform +} // MakeDimUniform /** * This routine searches the cluster tree for all leaf nodes @@ -1408,24 +2409,24 @@ void MakeDimUniform(uint16_t i, PROTOTYPE *Proto, STATISTICS *Statistics) { * @note Exceptions: None * @note History: 6/2/89, DSJ, Created. */ -STATISTICS * -ComputeStatistics (int16_t N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { - STATISTICS *Statistics; +STATISTICS* ComputeStatistics(int16_t N, PARAM_DESC ParamDesc[], + CLUSTER* Cluster) { + STATISTICS* Statistics; int i, j; - FLOAT32 *CoVariance; - FLOAT32 *Distance; + FLOAT32* CoVariance; + FLOAT32* Distance; LIST SearchState; - SAMPLE *Sample; + SAMPLE* Sample; uint32_t SampleCountAdjustedForBias; // allocate memory to hold the statistics results - Statistics = (STATISTICS *) Emalloc (sizeof (STATISTICS)); - Statistics->CoVariance = (FLOAT32 *) Emalloc (N * N * sizeof (FLOAT32)); - Statistics->Min = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); - Statistics->Max = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); + Statistics = (STATISTICS*)Emalloc(sizeof(STATISTICS)); + Statistics->CoVariance = (FLOAT32*)Emalloc(N * N * sizeof(FLOAT32)); + Statistics->Min = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); + Statistics->Max = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); // allocate temporary memory to hold the sample to mean distances - Distance = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); + Distance = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); // initialize the statistics Statistics->AvgVariance = 1.0; @@ -1433,12 +2434,11 @@ ComputeStatistics (int16_t N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { for (i = 0; i < N; i++) { Statistics->Min[i] = 0.0; Statistics->Max[i] = 0.0; - for (j = 0; j < N; j++, CoVariance++) - *CoVariance = 0; + for (j = 0; j < N; j++, CoVariance++) *CoVariance = 0; } // find each sample in the cluster and merge it into the statistics InitSampleSearch(SearchState, Cluster); - while ((Sample = NextSample (&SearchState)) != nullptr) { + while ((Sample = NextSample(&SearchState)) != nullptr) { for (i = 0; i < N; i++) { Distance[i] = Sample->Mean[i] - Cluster->Mean[i]; if (ParamDesc[i].Circular) { @@ -1447,10 +2447,8 @@ ComputeStatistics (int16_t N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { if (Distance[i] < -ParamDesc[i].HalfRange) Distance[i] += ParamDesc[i].Range; } - if (Distance[i] < Statistics->Min[i]) - Statistics->Min[i] = Distance[i]; - if (Distance[i] > Statistics->Max[i]) - Statistics->Max[i] = Distance[i]; + if (Distance[i] < Statistics->Min[i]) Statistics->Min[i] = Distance[i]; + if (Distance[i] > Statistics->Max[i]) Statistics->Max[i] = Distance[i]; } CoVariance = Statistics->CoVariance; for (i = 0; i < N; i++) @@ -1467,21 +2465,20 @@ ComputeStatistics (int16_t N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { SampleCountAdjustedForBias = 1; CoVariance = Statistics->CoVariance; for (i = 0; i < N; i++) - for (j = 0; j < N; j++, CoVariance++) { - *CoVariance /= SampleCountAdjustedForBias; - if (j == i) { - if (*CoVariance < MINVARIANCE) - *CoVariance = MINVARIANCE; - Statistics->AvgVariance *= *CoVariance; + for (j = 0; j < N; j++, CoVariance++) { + *CoVariance /= SampleCountAdjustedForBias; + if (j == i) { + if (*CoVariance < MINVARIANCE) *CoVariance = MINVARIANCE; + Statistics->AvgVariance *= *CoVariance; + } } - } - Statistics->AvgVariance = (float)pow((double)Statistics->AvgVariance, - 1.0 / N); + Statistics->AvgVariance = + (float)pow((double)Statistics->AvgVariance, 1.0 / N); // release temporary memory and return free(Distance); return (Statistics); -} // ComputeStatistics +} // ComputeStatistics /** * This routine creates a spherical prototype data structure to @@ -1496,26 +2493,25 @@ ComputeStatistics (int16_t N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { * @note Exceptions: None * @note History: 6/19/89, DSJ, Created. */ -PROTOTYPE *NewSphericalProto(uint16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics) { - PROTOTYPE *Proto; +PROTOTYPE* NewSphericalProto(uint16_t N, CLUSTER* Cluster, + STATISTICS* Statistics) { + PROTOTYPE* Proto; - Proto = NewSimpleProto (N, Cluster); + Proto = NewSimpleProto(N, Cluster); Proto->Variance.Spherical = Statistics->AvgVariance; if (Proto->Variance.Spherical < MINVARIANCE) Proto->Variance.Spherical = MINVARIANCE; Proto->Magnitude.Spherical = - 1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical)); - Proto->TotalMagnitude = (float)pow((double)Proto->Magnitude.Spherical, - (double) N); + 1.0 / sqrt((double)(2.0 * PI * Proto->Variance.Spherical)); + Proto->TotalMagnitude = + (float)pow((double)Proto->Magnitude.Spherical, (double)N); Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; - Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); + Proto->LogMagnitude = log((double)Proto->TotalMagnitude); return (Proto); -} // NewSphericalProto +} // NewSphericalProto /** * This routine creates an elliptical prototype data structure to @@ -1529,17 +2525,16 @@ PROTOTYPE *NewSphericalProto(uint16_t N, * @note Exceptions: None * @note History: 6/19/89, DSJ, Created. */ -PROTOTYPE *NewEllipticalProto(int16_t N, - CLUSTER *Cluster, - STATISTICS *Statistics) { - PROTOTYPE *Proto; - FLOAT32 *CoVariance; +PROTOTYPE* NewEllipticalProto(int16_t N, CLUSTER* Cluster, + STATISTICS* Statistics) { + PROTOTYPE* Proto; + FLOAT32* CoVariance; int i; - Proto = NewSimpleProto (N, Cluster); - Proto->Variance.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); - Proto->Magnitude.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); - Proto->Weight.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); + Proto = NewSimpleProto(N, Cluster); + Proto->Variance.Elliptical = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); + Proto->Magnitude.Elliptical = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); + Proto->Weight.Elliptical = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); CoVariance = Statistics->CoVariance; Proto->TotalMagnitude = 1.0; @@ -1549,14 +2544,14 @@ PROTOTYPE *NewEllipticalProto(int16_t N, Proto->Variance.Elliptical[i] = MINVARIANCE; Proto->Magnitude.Elliptical[i] = - 1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i])); + 1.0 / sqrt((double)(2.0 * PI * Proto->Variance.Elliptical[i])); Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i]; Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; } - Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); + Proto->LogMagnitude = log((double)Proto->TotalMagnitude); Proto->Style = elliptical; return (Proto); -} // NewEllipticalProto +} // NewEllipticalProto /** * This routine creates a mixed prototype data structure to @@ -1573,19 +2568,19 @@ PROTOTYPE *NewEllipticalProto(int16_t N, * @note Exceptions: None * @note History: 6/19/89, DSJ, Created. */ -PROTOTYPE *NewMixedProto(int16_t N, CLUSTER *Cluster, STATISTICS *Statistics) { - PROTOTYPE *Proto; +PROTOTYPE* NewMixedProto(int16_t N, CLUSTER* Cluster, STATISTICS* Statistics) { + PROTOTYPE* Proto; int i; - Proto = NewEllipticalProto (N, Cluster, Statistics); - Proto->Distrib = (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION)); + Proto = NewEllipticalProto(N, Cluster, Statistics); + Proto->Distrib = (DISTRIBUTION*)Emalloc(N * sizeof(DISTRIBUTION)); for (i = 0; i < N; i++) { Proto->Distrib[i] = normal; } Proto->Style = mixed; return (Proto); -} // NewMixedProto +} // NewMixedProto /** * This routine allocates memory to hold a simple prototype @@ -1597,15 +2592,14 @@ PROTOTYPE *NewMixedProto(int16_t N, CLUSTER *Cluster, STATISTICS *Statistics) { * @note Exceptions: None * @note History: 6/19/89, DSJ, Created. */ -PROTOTYPE *NewSimpleProto(int16_t N, CLUSTER *Cluster) { - PROTOTYPE *Proto; +PROTOTYPE* NewSimpleProto(int16_t N, CLUSTER* Cluster) { + PROTOTYPE* Proto; int i; - Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE)); - Proto->Mean = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); + Proto = (PROTOTYPE*)Emalloc(sizeof(PROTOTYPE)); + Proto->Mean = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); - for (i = 0; i < N; i++) - Proto->Mean[i] = Cluster->Mean[i]; + for (i = 0; i < N; i++) Proto->Mean[i] = Cluster->Mean[i]; Proto->Distrib = nullptr; Proto->Significant = TRUE; @@ -1615,7 +2609,7 @@ PROTOTYPE *NewSimpleProto(int16_t N, CLUSTER *Cluster) { Proto->Cluster = Cluster; Proto->Cluster->Prototype = TRUE; return (Proto); -} // NewSimpleProto +} // NewSimpleProto /** * This routine returns TRUE if the specified covariance @@ -1637,36 +2631,32 @@ PROTOTYPE *NewSimpleProto(int16_t N, CLUSTER *Cluster) { * @note Exceptions: None * @note History: 6/4/89, DSJ, Created. */ -bool -Independent(PARAM_DESC* ParamDesc, - int16_t N, FLOAT32* CoVariance, FLOAT32 Independence) { +bool Independent(PARAM_DESC* ParamDesc, int16_t N, FLOAT32* CoVariance, + FLOAT32 Independence) { int i, j; - FLOAT32 *VARii; // points to ith on-diagonal element - FLOAT32 *VARjj; // points to jth on-diagonal element + FLOAT32* VARii; // points to ith on-diagonal element + FLOAT32* VARjj; // points to jth on-diagonal element FLOAT32 CorrelationCoeff; VARii = CoVariance; for (i = 0; i < N; i++, VARii += N + 1) { - if (ParamDesc[i].NonEssential) - continue; + if (ParamDesc[i].NonEssential) continue; VARjj = VARii + N + 1; CoVariance = VARii + 1; for (j = i + 1; j < N; j++, CoVariance++, VARjj += N + 1) { - if (ParamDesc[j].NonEssential) - continue; + if (ParamDesc[j].NonEssential) continue; if ((*VARii == 0.0) || (*VARjj == 0.0)) CorrelationCoeff = 0.0; else CorrelationCoeff = - sqrt (sqrt (*CoVariance * *CoVariance / (*VARii * *VARjj))); - if (CorrelationCoeff > Independence) - return false; + sqrt(sqrt(*CoVariance * *CoVariance / (*VARii * *VARjj))); + if (CorrelationCoeff > Independence) return false; } } return true; -} // Independent +} // Independent /** * This routine returns a histogram data structure which can @@ -1685,13 +2675,11 @@ Independent(PARAM_DESC* ParamDesc, * @note Exceptions: none * @note History: Thu Aug 3 12:58:10 1989, DSJ, Created. */ -BUCKETS *GetBuckets(CLUSTERER* clusterer, - DISTRIBUTION Distribution, - uint32_t SampleCount, - FLOAT64 Confidence) { +BUCKETS* GetBuckets(CLUSTERER* clusterer, DISTRIBUTION Distribution, + uint32_t SampleCount, FLOAT64 Confidence) { // Get an old bucket structure with the same number of buckets. uint16_t NumberOfBuckets = OptimumNumberOfBuckets(SampleCount); - BUCKETS *Buckets = + BUCKETS* Buckets = clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS]; // If a matching bucket structure is not found, make one and save it. @@ -1706,13 +2694,12 @@ BUCKETS *GetBuckets(CLUSTERER* clusterer, if (Confidence != Buckets->Confidence) { Buckets->Confidence = Confidence; Buckets->ChiSquared = ComputeChiSquared( - DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), - Confidence); + DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), Confidence); } InitBuckets(Buckets); } return Buckets; -} // GetBuckets +} // GetBuckets /** * This routine creates a histogram data structure which can @@ -1732,13 +2719,12 @@ BUCKETS *GetBuckets(CLUSTERER* clusterer, * @note Exceptions: None * @note History: 6/4/89, DSJ, Created. */ -BUCKETS *MakeBuckets(DISTRIBUTION Distribution, - uint32_t SampleCount, +BUCKETS* MakeBuckets(DISTRIBUTION Distribution, uint32_t SampleCount, FLOAT64 Confidence) { - const DENSITYFUNC DensityFunction[] = - { NormalDensity, UniformDensity, UniformDensity }; + const DENSITYFUNC DensityFunction[] = {NormalDensity, UniformDensity, + UniformDensity}; int i, j; - BUCKETS *Buckets; + BUCKETS* Buckets; FLOAT64 BucketProbability; FLOAT64 NextBucketBoundary; FLOAT64 Probability; @@ -1749,13 +2735,13 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, bool Symmetrical; // allocate memory needed for data structure - Buckets = static_cast(Emalloc(sizeof(BUCKETS))); + Buckets = static_cast(Emalloc(sizeof(BUCKETS))); Buckets->NumberOfBuckets = OptimumNumberOfBuckets(SampleCount); Buckets->SampleCount = SampleCount; Buckets->Confidence = Confidence; - Buckets->Count = - static_cast(Emalloc(Buckets->NumberOfBuckets * sizeof(uint32_t))); - Buckets->ExpectedCount = static_cast( + Buckets->Count = static_cast( + Emalloc(Buckets->NumberOfBuckets * sizeof(uint32_t))); + Buckets->ExpectedCount = static_cast( Emalloc(Buckets->NumberOfBuckets * sizeof(FLOAT32))); // initialize simple fields @@ -1772,47 +2758,45 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, if (Symmetrical) { // allocate buckets so that all have approx. equal probability - BucketProbability = 1.0 / (FLOAT64) (Buckets->NumberOfBuckets); + BucketProbability = 1.0 / (FLOAT64)(Buckets->NumberOfBuckets); // distribution is symmetric so fill in upper half then copy CurrentBucket = Buckets->NumberOfBuckets / 2; - if (Odd (Buckets->NumberOfBuckets)) + if (Odd(Buckets->NumberOfBuckets)) NextBucketBoundary = BucketProbability / 2; else NextBucketBoundary = BucketProbability; Probability = 0.0; LastProbDensity = - (*DensityFunction[(int) Distribution]) (BUCKETTABLESIZE / 2); + (*DensityFunction[(int)Distribution])(BUCKETTABLESIZE / 2); for (i = BUCKETTABLESIZE / 2; i < BUCKETTABLESIZE; i++) { - ProbDensity = (*DensityFunction[(int) Distribution]) (i + 1); - ProbabilityDelta = Integral (LastProbDensity, ProbDensity, 1.0); + ProbDensity = (*DensityFunction[(int)Distribution])(i + 1); + ProbabilityDelta = Integral(LastProbDensity, ProbDensity, 1.0); Probability += ProbabilityDelta; if (Probability > NextBucketBoundary) { - if (CurrentBucket < Buckets->NumberOfBuckets - 1) - CurrentBucket++; + if (CurrentBucket < Buckets->NumberOfBuckets - 1) CurrentBucket++; NextBucketBoundary += BucketProbability; } Buckets->Bucket[i] = CurrentBucket; Buckets->ExpectedCount[CurrentBucket] += - (FLOAT32) (ProbabilityDelta * SampleCount); + (FLOAT32)(ProbabilityDelta * SampleCount); LastProbDensity = ProbDensity; } // place any leftover probability into the last bucket Buckets->ExpectedCount[CurrentBucket] += - (FLOAT32) ((0.5 - Probability) * SampleCount); + (FLOAT32)((0.5 - Probability) * SampleCount); // copy upper half of distribution to lower half for (i = 0, j = BUCKETTABLESIZE - 1; i < j; i++, j--) - Buckets->Bucket[i] = - Mirror(Buckets->Bucket[j], Buckets->NumberOfBuckets); + Buckets->Bucket[i] = Mirror(Buckets->Bucket[j], Buckets->NumberOfBuckets); // copy upper half of expected counts to lower half for (i = 0, j = Buckets->NumberOfBuckets - 1; i <= j; i++, j--) Buckets->ExpectedCount[i] += Buckets->ExpectedCount[j]; } return Buckets; -} // MakeBuckets +} // MakeBuckets /** * This routine computes the optimum number of histogram @@ -1833,19 +2817,18 @@ uint16_t OptimumNumberOfBuckets(uint32_t SampleCount) { uint8_t Last, Next; FLOAT32 Slope; - if (SampleCount < kCountTable[0]) - return kBucketsTable[0]; + if (SampleCount < kCountTable[0]) return kBucketsTable[0]; for (Last = 0, Next = 1; Next < LOOKUPTABLESIZE; Last++, Next++) { if (SampleCount <= kCountTable[Next]) { - Slope = (FLOAT32) (kBucketsTable[Next] - kBucketsTable[Last]) / - (FLOAT32) (kCountTable[Next] - kCountTable[Last]); - return ((uint16_t) (kBucketsTable[Last] + - Slope * (SampleCount - kCountTable[Last]))); + Slope = (FLOAT32)(kBucketsTable[Next] - kBucketsTable[Last]) / + (FLOAT32)(kCountTable[Next] - kCountTable[Last]); + return ((uint16_t)(kBucketsTable[Last] + + Slope * (SampleCount - kCountTable[Last]))); } } return kBucketsTable[Last]; -} // OptimumNumberOfBuckets +} // OptimumNumberOfBuckets /** * This routine computes the chi-squared value which will @@ -1866,43 +2849,40 @@ uint16_t OptimumNumberOfBuckets(uint32_t SampleCount) { * @note History: 6/5/89, DSJ, Created. */ FLOAT64 -ComputeChiSquared (uint16_t DegreesOfFreedom, FLOAT64 Alpha) -#define CHIACCURACY 0.01 -#define MINALPHA (1e-200) +ComputeChiSquared(uint16_t DegreesOfFreedom, FLOAT64 Alpha) +#define CHIACCURACY 0.01 +#define MINALPHA (1e-200) { static LIST ChiWith[MAXDEGREESOFFREEDOM + 1]; - CHISTRUCT *OldChiSquared; + CHISTRUCT* OldChiSquared; CHISTRUCT SearchKey; // limit the minimum alpha that can be used - if alpha is too small // it may not be possible to compute chi-squared. Alpha = ClipToRange(Alpha, MINALPHA, 1.0); - if (Odd (DegreesOfFreedom)) - DegreesOfFreedom++; + if (Odd(DegreesOfFreedom)) DegreesOfFreedom++; /* find the list of chi-squared values which have already been computed for the specified number of degrees of freedom. Search the list for the desired chi-squared. */ SearchKey.Alpha = Alpha; - OldChiSquared = (CHISTRUCT *) first_node (search (ChiWith[DegreesOfFreedom], - &SearchKey, AlphaMatch)); + OldChiSquared = (CHISTRUCT*)first_node( + search(ChiWith[DegreesOfFreedom], &SearchKey, AlphaMatch)); if (OldChiSquared == nullptr) { - OldChiSquared = NewChiStruct (DegreesOfFreedom, Alpha); - OldChiSquared->ChiSquared = Solve (ChiArea, OldChiSquared, - (FLOAT64) DegreesOfFreedom, - (FLOAT64) CHIACCURACY); - ChiWith[DegreesOfFreedom] = push (ChiWith[DegreesOfFreedom], - OldChiSquared); - } - else { + OldChiSquared = NewChiStruct(DegreesOfFreedom, Alpha); + OldChiSquared->ChiSquared = + Solve(ChiArea, OldChiSquared, (FLOAT64)DegreesOfFreedom, + (FLOAT64)CHIACCURACY); + ChiWith[DegreesOfFreedom] = push(ChiWith[DegreesOfFreedom], OldChiSquared); + } else { // further optimization might move OldChiSquared to front of list } return (OldChiSquared->ChiSquared); -} // ComputeChiSquared +} // ComputeChiSquared /** * This routine computes the probability density function @@ -1920,12 +2900,13 @@ ComputeChiSquared (uint16_t DegreesOfFreedom, FLOAT64 Alpha) * @note Exceptions: None * @note History: 6/4/89, DSJ, Created. */ -FLOAT64 NormalDensity(int32_t x) { +FLOAT64 +NormalDensity(int32_t x) { FLOAT64 Distance; Distance = x - kNormalMean; return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance); -} // NormalDensity +} // NormalDensity /** * This routine computes the probability density function @@ -1936,14 +2917,15 @@ FLOAT64 NormalDensity(int32_t x) { * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. */ -FLOAT64 UniformDensity(int32_t x) { - static FLOAT64 UniformDistributionDensity = (FLOAT64) 1.0 / BUCKETTABLESIZE; +FLOAT64 +UniformDensity(int32_t x) { + static FLOAT64 UniformDistributionDensity = (FLOAT64)1.0 / BUCKETTABLESIZE; if ((x >= 0.0) && (x <= BUCKETTABLESIZE)) return UniformDistributionDensity; else - return (FLOAT64) 0.0; -} // UniformDensity + return (FLOAT64)0.0; +} // UniformDensity /** * This routine computes a trapezoidal approximation to the @@ -1955,9 +2937,10 @@ FLOAT64 UniformDensity(int32_t x) { * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. */ -FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { +FLOAT64 +Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { return (f1 + f2) * Dx / 2.0; -} // Integral +} // Integral /** * This routine counts the number of cluster samples which @@ -1981,20 +2964,15 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. */ -void FillBuckets(BUCKETS *Buckets, - CLUSTER *Cluster, - uint16_t Dim, - PARAM_DESC *ParamDesc, - FLOAT32 Mean, - FLOAT32 StdDev) { +void FillBuckets(BUCKETS* Buckets, CLUSTER* Cluster, uint16_t Dim, + PARAM_DESC* ParamDesc, FLOAT32 Mean, FLOAT32 StdDev) { uint16_t BucketID; int i; LIST SearchState; - SAMPLE *Sample; + SAMPLE* Sample; // initialize the histogram bucket counts to 0 - for (i = 0; i < Buckets->NumberOfBuckets; i++) - Buckets->Count[i] = 0; + for (i = 0; i < Buckets->NumberOfBuckets; i++) Buckets->Count[i] = 0; if (StdDev == 0.0) { /* if the standard deviation is zero, then we can't statistically @@ -2005,7 +2983,7 @@ void FillBuckets(BUCKETS *Buckets, InitSampleSearch(SearchState, Cluster); i = 0; - while ((Sample = NextSample (&SearchState)) != nullptr) { + while ((Sample = NextSample(&SearchState)) != nullptr) { if (Sample->Mean[Dim] > Mean) BucketID = Buckets->NumberOfBuckets - 1; else if (Sample->Mean[Dim] < Mean) @@ -2014,23 +2992,19 @@ void FillBuckets(BUCKETS *Buckets, BucketID = i; Buckets->Count[BucketID] += 1; i++; - if (i >= Buckets->NumberOfBuckets) - i = 0; + if (i >= Buckets->NumberOfBuckets) i = 0; } - } - else { + } else { // search for all samples in the cluster and add to histogram buckets InitSampleSearch(SearchState, Cluster); - while ((Sample = NextSample (&SearchState)) != nullptr) { + while ((Sample = NextSample(&SearchState)) != nullptr) { switch (Buckets->Distribution) { case normal: - BucketID = NormalBucket (ParamDesc, Sample->Mean[Dim], - Mean, StdDev); + BucketID = NormalBucket(ParamDesc, Sample->Mean[Dim], Mean, StdDev); break; case D_random: case uniform: - BucketID = UniformBucket (ParamDesc, Sample->Mean[Dim], - Mean, StdDev); + BucketID = UniformBucket(ParamDesc, Sample->Mean[Dim], Mean, StdDev); break; default: BucketID = 0; @@ -2038,7 +3012,7 @@ void FillBuckets(BUCKETS *Buckets, Buckets->Count[Buckets->Bucket[BucketID]] += 1; } } -} // FillBuckets +} // FillBuckets /** * This routine determines which bucket x falls into in the @@ -2053,10 +3027,8 @@ void FillBuckets(BUCKETS *Buckets, * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. */ -uint16_t NormalBucket(PARAM_DESC *ParamDesc, - FLOAT32 x, - FLOAT32 Mean, - FLOAT32 StdDev) { +uint16_t NormalBucket(PARAM_DESC* ParamDesc, FLOAT32 x, FLOAT32 Mean, + FLOAT32 StdDev) { FLOAT32 X; // wraparound circular parameters if necessary @@ -2068,12 +3040,10 @@ uint16_t NormalBucket(PARAM_DESC *ParamDesc, } X = ((x - Mean) / StdDev) * kNormalStdDev + kNormalMean; - if (X < 0) - return 0; - if (X > BUCKETTABLESIZE - 1) - return ((uint16_t) (BUCKETTABLESIZE - 1)); - return (uint16_t) floor((FLOAT64) X); -} // NormalBucket + if (X < 0) return 0; + if (X > BUCKETTABLESIZE - 1) return ((uint16_t)(BUCKETTABLESIZE - 1)); + return (uint16_t)floor((FLOAT64)X); +} // NormalBucket /** * This routine determines which bucket x falls into in the @@ -2088,10 +3058,8 @@ uint16_t NormalBucket(PARAM_DESC *ParamDesc, * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. */ -uint16_t UniformBucket(PARAM_DESC *ParamDesc, - FLOAT32 x, - FLOAT32 Mean, - FLOAT32 StdDev) { +uint16_t UniformBucket(PARAM_DESC* ParamDesc, FLOAT32 x, FLOAT32 Mean, + FLOAT32 StdDev) { FLOAT32 X; // wraparound circular parameters if necessary @@ -2103,12 +3071,10 @@ uint16_t UniformBucket(PARAM_DESC *ParamDesc, } X = ((x - Mean) / (2 * StdDev) * BUCKETTABLESIZE + BUCKETTABLESIZE / 2.0); - if (X < 0) - return 0; - if (X > BUCKETTABLESIZE - 1) - return (uint16_t) (BUCKETTABLESIZE - 1); - return (uint16_t) floor((FLOAT64) X); -} // UniformBucket + if (X < 0) return 0; + if (X > BUCKETTABLESIZE - 1) return (uint16_t)(BUCKETTABLESIZE - 1); + return (uint16_t)floor((FLOAT64)X); +} // UniformBucket /** * This routine performs a chi-square goodness of fit test @@ -2131,8 +3097,8 @@ bool DistributionOK(BUCKETS* Buckets) { TotalDifference = 0.0; for (i = 0; i < Buckets->NumberOfBuckets; i++) { FrequencyDifference = Buckets->Count[i] - Buckets->ExpectedCount[i]; - TotalDifference += (FrequencyDifference * FrequencyDifference) / - Buckets->ExpectedCount[i]; + TotalDifference += + (FrequencyDifference * FrequencyDifference) / Buckets->ExpectedCount[i]; } // test to see if the difference is more than expected @@ -2140,7 +3106,7 @@ bool DistributionOK(BUCKETS* Buckets) { return false; else return true; -} // DistributionOK +} // DistributionOK /** * This routine frees the memory used by the statistics @@ -2150,23 +3116,23 @@ bool DistributionOK(BUCKETS* Buckets) { * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. */ -void FreeStatistics(STATISTICS *Statistics) { +void FreeStatistics(STATISTICS* Statistics) { free(Statistics->CoVariance); free(Statistics->Min); free(Statistics->Max); free(Statistics); -} // FreeStatistics +} // FreeStatistics /** * This routine properly frees the memory used by a BUCKETS. * * @param buckets pointer to data structure to be freed */ -void FreeBuckets(BUCKETS *buckets) { +void FreeBuckets(BUCKETS* buckets) { Efree(buckets->Count); Efree(buckets->ExpectedCount); Efree(buckets); -} // FreeBuckets +} // FreeBuckets /** * This routine frees the memory consumed by the specified @@ -2180,13 +3146,13 @@ void FreeBuckets(BUCKETS *buckets) { * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ -void FreeCluster(CLUSTER *Cluster) { +void FreeCluster(CLUSTER* Cluster) { if (Cluster != nullptr) { - FreeCluster (Cluster->Left); - FreeCluster (Cluster->Right); + FreeCluster(Cluster->Left); + FreeCluster(Cluster->Right); free(Cluster); } -} // FreeCluster +} // FreeCluster /** * This routine computes the degrees of freedom that should @@ -2202,17 +3168,17 @@ void FreeCluster(CLUSTER *Cluster) { * @note Exceptions: none * @note History: Thu Aug 3 14:04:18 1989, DSJ, Created. */ -uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets) { - static uint8_t DegreeOffsets[] = { 3, 3, 1 }; +uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, + uint16_t HistogramBuckets) { + static uint8_t DegreeOffsets[] = {3, 3, 1}; uint16_t AdjustedNumBuckets; - AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[(int) Distribution]; - if (Odd (AdjustedNumBuckets)) - AdjustedNumBuckets++; + AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[(int)Distribution]; + if (Odd(AdjustedNumBuckets)) AdjustedNumBuckets++; return (AdjustedNumBuckets); -} // DegreesOfFreedom +} // DegreesOfFreedom /** * This routine is used to search a list of histogram data @@ -2224,14 +3190,14 @@ uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets) * @note Exceptions: none * @note History: Thu Aug 3 14:17:33 1989, DSJ, Created. */ -int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, - void *arg2) { // uint16_t *DesiredNumberOfBuckets) - BUCKETS *Histogram = (BUCKETS *) arg1; - uint16_t *DesiredNumberOfBuckets = (uint16_t *) arg2; +int NumBucketsMatch(void* arg1, // BUCKETS *Histogram, + void* arg2) { // uint16_t *DesiredNumberOfBuckets) + BUCKETS* Histogram = (BUCKETS*)arg1; + uint16_t* DesiredNumberOfBuckets = (uint16_t*)arg2; return (*DesiredNumberOfBuckets == Histogram->NumberOfBuckets); -} // NumBucketsMatch +} // NumBucketsMatch /** * This routine is used to search a list for a list node @@ -2241,11 +3207,11 @@ int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, * @note Exceptions: none * @note History: Thu Aug 3 14:23:58 1989, DSJ, Created. */ -int ListEntryMatch(void *arg1, //ListNode - void *arg2) { //Key +int ListEntryMatch(void* arg1, // ListNode + void* arg2) { // Key return (arg1 == arg2); -} // ListEntryMatch +} // ListEntryMatch /** * This routine multiplies each ExpectedCount histogram entry @@ -2257,12 +3223,11 @@ int ListEntryMatch(void *arg1, //ListNode * @note Exceptions: none * @note History: Thu Aug 3 14:31:14 1989, DSJ, Created. */ -void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount) { +void AdjustBuckets(BUCKETS* Buckets, uint32_t NewSampleCount) { int i; FLOAT64 AdjustFactor; - AdjustFactor = (((FLOAT64) NewSampleCount) / - ((FLOAT64) Buckets->SampleCount)); + AdjustFactor = (((FLOAT64)NewSampleCount) / ((FLOAT64)Buckets->SampleCount)); for (i = 0; i < Buckets->NumberOfBuckets; i++) { Buckets->ExpectedCount[i] *= AdjustFactor; @@ -2270,7 +3235,7 @@ void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount) { Buckets->SampleCount = NewSampleCount; -} // AdjustBuckets +} // AdjustBuckets /** * This routine sets the bucket counts in the specified histogram @@ -2280,14 +3245,14 @@ void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount) { * @note Exceptions: none * @note History: Thu Aug 3 14:31:14 1989, DSJ, Created. */ -void InitBuckets(BUCKETS *Buckets) { +void InitBuckets(BUCKETS* Buckets) { int i; for (i = 0; i < Buckets->NumberOfBuckets; i++) { Buckets->Count[i] = 0; } -} // InitBuckets +} // InitBuckets /** * This routine is used to search a list of structures which @@ -2303,14 +3268,14 @@ void InitBuckets(BUCKETS *Buckets) { * @note Exceptions: none * @note History: Thu Aug 3 14:17:33 1989, DSJ, Created. */ -int AlphaMatch(void *arg1, //CHISTRUCT *ChiStruct, - void *arg2) { //CHISTRUCT *SearchKey) - CHISTRUCT *ChiStruct = (CHISTRUCT *) arg1; - CHISTRUCT *SearchKey = (CHISTRUCT *) arg2; +int AlphaMatch(void* arg1, // CHISTRUCT *ChiStruct, + void* arg2) { // CHISTRUCT *SearchKey) + CHISTRUCT* ChiStruct = (CHISTRUCT*)arg1; + CHISTRUCT* SearchKey = (CHISTRUCT*)arg2; return (ChiStruct->Alpha == SearchKey->Alpha); -} // AlphaMatch +} // AlphaMatch /** * This routine allocates a new data structure which is used @@ -2323,15 +3288,15 @@ int AlphaMatch(void *arg1, //CHISTRUCT *ChiStruct * @note Exceptions: none * @note History: Fri Aug 4 11:04:59 1989, DSJ, Created. */ -CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, FLOAT64 Alpha) { - CHISTRUCT *NewChiStruct; +CHISTRUCT* NewChiStruct(uint16_t DegreesOfFreedom, FLOAT64 Alpha) { + CHISTRUCT* NewChiStruct; - NewChiStruct = (CHISTRUCT *) Emalloc (sizeof (CHISTRUCT)); + NewChiStruct = (CHISTRUCT*)Emalloc(sizeof(CHISTRUCT)); NewChiStruct->DegreesOfFreedom = DegreesOfFreedom; NewChiStruct->Alpha = Alpha; return (NewChiStruct); -} // NewChiStruct +} // NewChiStruct /** * This routine attempts to find an x value at which Function @@ -2349,10 +3314,10 @@ CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, FLOAT64 Alpha) { * @note History: Fri Aug 4 11:08:59 1989, DSJ, Created. */ FLOAT64 -Solve (SOLVEFUNC Function, -void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) -#define INITIALDELTA 0.1 -#define DELTARATIO 0.1 +Solve(SOLVEFUNC Function, void* FunctionParams, FLOAT64 InitialGuess, + FLOAT64 Accuracy) +#define INITIALDELTA 0.1 +#define DELTARATIO 0.1 { FLOAT64 x; FLOAT64 f; @@ -2366,8 +3331,8 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) Delta = INITIALDELTA; LastPosX = MAX_FLOAT32; LastNegX = -MAX_FLOAT32; - f = (*Function) ((CHISTRUCT *) FunctionParams, x); - while (Abs (LastPosX - LastNegX) > Accuracy) { + f = (*Function)((CHISTRUCT*)FunctionParams, x); + while (Abs(LastPosX - LastNegX) > Accuracy) { // keep track of outer bounds of current estimate if (f < 0) LastNegX = x; @@ -2375,25 +3340,23 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) LastPosX = x; // compute the approx. slope of f(x) at the current point - Slope = - ((*Function) ((CHISTRUCT *) FunctionParams, x + Delta) - f) / Delta; + Slope = ((*Function)((CHISTRUCT*)FunctionParams, x + Delta) - f) / Delta; // compute the next solution guess */ xDelta = f / Slope; x -= xDelta; // reduce the delta used for computing slope to be a fraction of - //the amount moved to get to the new guess - NewDelta = Abs (xDelta) * DELTARATIO; - if (NewDelta < Delta) - Delta = NewDelta; + // the amount moved to get to the new guess + NewDelta = Abs(xDelta) * DELTARATIO; + if (NewDelta < Delta) Delta = NewDelta; // compute the value of the function at the new guess - f = (*Function) ((CHISTRUCT *) FunctionParams, x); + f = (*Function)((CHISTRUCT*)FunctionParams, x); } return (x); -} // Solve +} // Solve /** * This routine computes the area under a chi density curve @@ -2415,7 +3378,8 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) * @note Exceptions: none * @note History: Fri Aug 4 12:48:41 1989, DSJ, Created. */ -FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) { +FLOAT64 +ChiArea(CHISTRUCT* ChiParams, FLOAT64 x) { int i, N; FLOAT64 SeriesTotal; FLOAT64 Denominator; @@ -2430,9 +3394,9 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) { PowerOfx *= x; SeriesTotal += PowerOfx / Denominator; } - return ((SeriesTotal * exp (-0.5 * x)) - ChiParams->Alpha); + return ((SeriesTotal * exp(-0.5 * x)) - ChiParams->Alpha); -} // ChiArea +} // ChiArea /** * This routine looks at all samples in the specified cluster. @@ -2461,16 +3425,15 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) { * 2/22/90, DSJ, Added MaxIllegal control rather than always * splitting illegal clusters. */ -bool -MultipleCharSamples(CLUSTERER* Clusterer, - CLUSTER* Cluster, FLOAT32 MaxIllegal) -#define ILLEGAL_CHAR 2 +bool MultipleCharSamples(CLUSTERER* Clusterer, CLUSTER* Cluster, + FLOAT32 MaxIllegal) +#define ILLEGAL_CHAR 2 { - static BOOL8 *CharFlags = nullptr; + static BOOL8* CharFlags = nullptr; static int32_t NumFlags = 0; int i; LIST SearchState; - SAMPLE *Sample; + SAMPLE* Sample; int32_t CharID; int32_t NumCharInCluster; int32_t NumIllegalInCluster; @@ -2483,26 +3446,24 @@ MultipleCharSamples(CLUSTERER* Clusterer, if (Clusterer->NumChar > NumFlags) { free(CharFlags); NumFlags = Clusterer->NumChar; - CharFlags = (BOOL8 *) Emalloc (NumFlags * sizeof (BOOL8)); + CharFlags = (BOOL8*)Emalloc(NumFlags * sizeof(BOOL8)); } - for (i = 0; i < NumFlags; i++) - CharFlags[i] = FALSE; + for (i = 0; i < NumFlags; i++) CharFlags[i] = FALSE; // find each sample in the cluster and check if we have seen it before InitSampleSearch(SearchState, Cluster); - while ((Sample = NextSample (&SearchState)) != nullptr) { + while ((Sample = NextSample(&SearchState)) != nullptr) { CharID = Sample->CharID; if (CharFlags[CharID] == FALSE) { CharFlags[CharID] = TRUE; - } - else { + } else { if (CharFlags[CharID] == TRUE) { NumIllegalInCluster++; CharFlags[CharID] = ILLEGAL_CHAR; } NumCharInCluster--; - PercentIllegal = (FLOAT32) NumIllegalInCluster / NumCharInCluster; + PercentIllegal = (FLOAT32)NumIllegalInCluster / NumCharInCluster; if (PercentIllegal > MaxIllegal) { destroy(SearchState); return true; @@ -2511,7 +3472,7 @@ MultipleCharSamples(CLUSTERER* Clusterer, } return false; -} // MultipleCharSamples +} // MultipleCharSamples /** * Compute the inverse of a matrix using LU decomposition with partial pivoting. @@ -2529,7 +3490,7 @@ double InvertMatrix(const float* input, int size, float* inv) { int col; for (row = 0; row < size; row++) { for (col = 0; col < size; col++) { - U[row][col] = input[row*size + col]; + U[row][col] = input[row * size + col]; L[row][col] = row == col ? 1.0 : 0.0; U_inv[row][col] = 0.0; } @@ -2586,7 +3547,7 @@ double InvertMatrix(const float* input, int size, float* inv) { for (int k = row; k < size; ++k) { sum += U_inv[row][k] * L[k][col]; } - inv[row*size + col] = sum; + inv[row * size + col] = sum; } } // Check matrix product. @@ -2595,7 +3556,7 @@ double InvertMatrix(const float* input, int size, float* inv) { for (col = 0; col < size; col++) { double sum = 0.0; for (int k = 0; k < size; ++k) { - sum += input[row*size + k] * inv[k *size + col]; + sum += input[row * size + k] * inv[k * size + col]; } if (row != col) { error_sum += Abs(sum); diff --git a/src/classify/cluster.h b/src/classify/cluster.h index e4a176a58e..40a88a2d51 100644 --- a/src/classify/cluster.h +++ b/src/classify/cluster.h @@ -15,120 +15,121 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef CLUSTER_H -#define CLUSTER_H +#ifndef CLUSTER_H +#define CLUSTER_H #include "kdtree.h" #include "oldlist.h" struct BUCKETS; -#define MINBUCKETS 5 -#define MAXBUCKETS 39 +#define MINBUCKETS 5 +#define MAXBUCKETS 39 /*---------------------------------------------------------------------- Types ----------------------------------------------------------------------*/ typedef struct sample { - unsigned Clustered:1; // TRUE if included in a higher cluster - unsigned Prototype:1; // TRUE if cluster represented by a proto - unsigned SampleCount:30; // number of samples in this cluster - struct sample *Left; // ptr to left sub-cluster - struct sample *Right; // ptr to right sub-cluster - int32_t CharID; // identifier of char sample came from - FLOAT32 Mean[1]; // mean of cluster - SampleSize floats + unsigned Clustered : 1; // TRUE if included in a higher cluster + unsigned Prototype : 1; // TRUE if cluster represented by a proto + unsigned SampleCount : 30; // number of samples in this cluster + struct sample* Left; // ptr to left sub-cluster + struct sample* Right; // ptr to right sub-cluster + int32_t CharID; // identifier of char sample came from + FLOAT32 Mean[1]; // mean of cluster - SampleSize floats } CLUSTER; -typedef CLUSTER SAMPLE; // can refer to as either sample or cluster +typedef CLUSTER SAMPLE; // can refer to as either sample or cluster -typedef enum { - spherical, elliptical, mixed, automatic -} PROTOSTYLE; +typedef enum { spherical, elliptical, mixed, automatic } PROTOSTYLE; -typedef struct { // parameters to control clustering - PROTOSTYLE ProtoStyle; // specifies types of protos to be made - FLOAT32 MinSamples; // min # of samples per proto - % of total - FLOAT32 MaxIllegal; // max percentage of samples in a cluster which have +typedef struct { // parameters to control clustering + PROTOSTYLE ProtoStyle; // specifies types of protos to be made + FLOAT32 MinSamples; // min # of samples per proto - % of total + FLOAT32 MaxIllegal; // max percentage of samples in a cluster which have // more than 1 feature in that cluster - FLOAT32 Independence; // desired independence between dimensions - FLOAT64 Confidence; // desired confidence in prototypes created - int MagicSamples; // Ideal number of samples in a cluster. + FLOAT32 Independence; // desired independence between dimensions + FLOAT64 Confidence; // desired confidence in prototypes created + int MagicSamples; // Ideal number of samples in a cluster. } CLUSTERCONFIG; -typedef enum { - normal, uniform, D_random, DISTRIBUTION_COUNT -} DISTRIBUTION; +typedef enum { normal, uniform, D_random, DISTRIBUTION_COUNT } DISTRIBUTION; typedef union { FLOAT32 Spherical; - FLOAT32 *Elliptical; + FLOAT32* Elliptical; } FLOATUNION; typedef struct { - unsigned Significant:1; // TRUE if prototype is significant - unsigned Merged:1; // Merged after clustering so do not output - // but kept for display purposes. If it has no - // samples then it was actually merged. - // Otherwise it matched an already significant - // cluster. - unsigned Style:2; // spherical, elliptical, or mixed - unsigned NumSamples:28; // number of samples in the cluster - CLUSTER *Cluster; // ptr to cluster which made prototype - DISTRIBUTION *Distrib; // different distribution for each dimension - FLOAT32 *Mean; // prototype mean - FLOAT32 TotalMagnitude; // total magnitude over all dimensions - FLOAT32 LogMagnitude; // log base e of TotalMagnitude - FLOATUNION Variance; // prototype variance - FLOATUNION Magnitude; // magnitude of density function - FLOATUNION Weight; // weight of density function + unsigned Significant : 1; // TRUE if prototype is significant + unsigned Merged : 1; // Merged after clustering so do not output + // but kept for display purposes. If it has no + // samples then it was actually merged. + // Otherwise it matched an already significant + // cluster. + unsigned Style : 2; // spherical, elliptical, or mixed + unsigned NumSamples : 28; // number of samples in the cluster + CLUSTER* Cluster; // ptr to cluster which made prototype + DISTRIBUTION* Distrib; // different distribution for each dimension + FLOAT32* Mean; // prototype mean + FLOAT32 TotalMagnitude; // total magnitude over all dimensions + FLOAT32 LogMagnitude; // log base e of TotalMagnitude + FLOATUNION Variance; // prototype variance + FLOATUNION Magnitude; // magnitude of density function + FLOATUNION Weight; // weight of density function } PROTOTYPE; typedef struct { - int16_t SampleSize; // number of parameters per sample - PARAM_DESC *ParamDesc; // description of each parameter - int32_t NumberOfSamples; // total number of samples being clustered - KDTREE *KDTree; // for optimal nearest neighbor searching - CLUSTER *Root; // ptr to root cluster of cluster tree - LIST ProtoList; // list of prototypes - int32_t NumChar; // # of characters represented by samples + int16_t SampleSize; // number of parameters per sample + PARAM_DESC* ParamDesc; // description of each parameter + int32_t NumberOfSamples; // total number of samples being clustered + KDTREE* KDTree; // for optimal nearest neighbor searching + CLUSTER* Root; // ptr to root cluster of cluster tree + LIST ProtoList; // list of prototypes + int32_t NumChar; // # of characters represented by samples // cache of reusable histograms by distribution type and number of buckets. BUCKETS* bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS + 1 - MINBUCKETS]; } CLUSTERER; typedef struct { - int32_t NumSamples; // number of samples in list - int32_t MaxNumSamples; // maximum size of list - SAMPLE *Sample[1]; // array of ptrs to sample data structures + int32_t NumSamples; // number of samples in list + int32_t MaxNumSamples; // maximum size of list + SAMPLE* Sample[1]; // array of ptrs to sample data structures } SAMPLELIST; // low level cluster tree analysis routines. -#define InitSampleSearch(S,C) (((C)==nullptr)?(S=NIL_LIST):(S=push(NIL_LIST,(C)))) +#define InitSampleSearch(S, C) \ + (((C) == nullptr) ? (S = NIL_LIST) : (S = push(NIL_LIST, (C)))) /*-------------------------------------------------------------------------- Public Function Prototypes --------------------------------------------------------------------------*/ -CLUSTERER *MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]); +CLUSTERER* MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[]); -SAMPLE *MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, int32_t CharID); +SAMPLE* MakeSample(CLUSTERER* Clusterer, const FLOAT32* Feature, + int32_t CharID); -LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config); +LIST ClusterSamples(CLUSTERER* Clusterer, CLUSTERCONFIG* Config); -void FreeClusterer(CLUSTERER *Clusterer); +void FreeClusterer(CLUSTERER* Clusterer); -void FreeProtoList(LIST *ProtoList); +void FreeProtoList(LIST* ProtoList); -void FreePrototype(void *arg); // PROTOTYPE *Prototype); +void FreePrototype(void* arg); // PROTOTYPE *Prototype); -CLUSTER *NextSample(LIST *SearchState); +CLUSTER* NextSample(LIST* SearchState); -FLOAT32 Mean(PROTOTYPE *Proto, uint16_t Dimension); +FLOAT32 +Mean(PROTOTYPE* Proto, uint16_t Dimension); -FLOAT32 StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension); +FLOAT32 +StandardDeviation(PROTOTYPE* Proto, uint16_t Dimension); int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, - FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]); + FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]); -//--------------Global Data Definitions and Declarations--------------------------- +//--------------Global Data Definitions and +// Declarations--------------------------- // define errors that can be trapped -#define ALREADYCLUSTERED 4000 +#define ALREADYCLUSTERED 4000 #endif diff --git a/src/classify/clusttool.cpp b/src/classify/clusttool.cpp index 093a5c6fca..d061902dd8 100644 --- a/src/classify/clusttool.cpp +++ b/src/classify/clusttool.cpp @@ -18,17 +18,17 @@ //--------------------------Include Files---------------------------------- #include "clusttool.h" +#include +#include #include "const.h" #include "danerror.h" #include "emalloc.h" #include "scanutils.h" -#include -#include using tesseract::TFile; //---------------Global Data Definitions and Declarations-------------------- -#define TOKENSIZE 80 //< max size of tokens read from an input file +#define TOKENSIZE 80 //< max size of tokens read from an input file #define QUOTED_TOKENSIZE "79" #define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space //#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block @@ -44,7 +44,7 @@ using tesseract::TFile; * @note Exceptions: ILLEGALSAMPLESIZE illegal format or range * @note History: 6/6/89, DSJ, Created. */ -uint16_t ReadSampleSize(TFile *fp) { +uint16_t ReadSampleSize(TFile* fp) { int SampleSize = 0; const int kMaxLineSize = 100; @@ -52,7 +52,7 @@ uint16_t ReadSampleSize(TFile *fp) { if (fp->FGets(line, kMaxLineSize) == nullptr || sscanf(line, "%d", &SampleSize) != 1 || (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE)) - DoError (ILLEGALSAMPLESIZE, "Illegal sample size"); + DoError(ILLEGALSAMPLESIZE, "Illegal sample size"); return (SampleSize); } @@ -70,11 +70,11 @@ uint16_t ReadSampleSize(TFile *fp) { * @note Globals: None * @note History: 6/6/89, DSJ, Created. */ -PARAM_DESC *ReadParamDesc(TFile *fp, uint16_t N) { - PARAM_DESC *ParamDesc; +PARAM_DESC* ReadParamDesc(TFile* fp, uint16_t N) { + PARAM_DESC* ParamDesc; char linear_token[TOKENSIZE], essential_token[TOKENSIZE]; - ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC)); + ParamDesc = (PARAM_DESC*)Emalloc(N * sizeof(PARAM_DESC)); for (int i = 0; i < N; i++) { const int kMaxLineSize = TOKENSIZE * 4; char line[kMaxLineSize]; @@ -115,9 +115,9 @@ PARAM_DESC *ReadParamDesc(TFile *fp, uint16_t N) { * @note Globals: None * @note History: 6/6/89, DSJ, Created. */ -PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { +PROTOTYPE* ReadPrototype(TFile* fp, uint16_t N) { char sig_token[TOKENSIZE], shape_token[TOKENSIZE]; - PROTOTYPE *Proto; + PROTOTYPE* Proto; int SampleCount; int i; @@ -129,7 +129,7 @@ PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { tprintf("Invalid prototype: %s\n", line); return nullptr; } - Proto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE)); + Proto = (PROTOTYPE*)Emalloc(sizeof(PROTOTYPE)); Proto->Cluster = nullptr; if (sig_token[0] == 's') Proto->Significant = TRUE; @@ -155,7 +155,8 @@ PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { Proto->NumSamples = SampleCount; Proto->Mean = ReadNFloats(fp, N, nullptr); - if (Proto->Mean == nullptr) DoError(ILLEGALMEANSPEC, "Illegal prototype mean"); + if (Proto->Mean == nullptr) + DoError(ILLEGALMEANSPEC, "Illegal prototype mean"); switch (Proto->Style) { case spherical: @@ -172,8 +173,8 @@ PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr); if (Proto->Variance.Elliptical == nullptr) DoError(ILLEGALVARIANCESPEC, "Illegal prototype variance"); - Proto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); - Proto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); + Proto->Magnitude.Elliptical = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); + Proto->Weight.Elliptical = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); Proto->TotalMagnitude = 1.0; for (i = 0; i < N; i++) { Proto->Magnitude.Elliptical[i] = @@ -206,7 +207,7 @@ PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { * @note Exceptions: ILLEGALFLOAT * @note History: 6/6/89, DSJ, Created. */ -FLOAT32 *ReadNFloats(TFile *fp, uint16_t N, FLOAT32 Buffer[]) { +FLOAT32* ReadNFloats(TFile* fp, uint16_t N, FLOAT32 Buffer[]) { const int kMaxLineSize = 1024; char line[kMaxLineSize]; if (fp->FGets(line, kMaxLineSize) == nullptr) { @@ -216,13 +217,13 @@ FLOAT32 *ReadNFloats(TFile *fp, uint16_t N, FLOAT32 Buffer[]) { bool needs_free = false; if (Buffer == nullptr) { - Buffer = static_cast(Emalloc(N * sizeof(FLOAT32))); + Buffer = static_cast(Emalloc(N * sizeof(FLOAT32))); needs_free = true; } - char *startptr = line; + char* startptr = line; for (int i = 0; i < N; i++) { - char *endptr; + char* endptr; Buffer[i] = strtof(startptr, &endptr); if (endptr == startptr) { tprintf("Read of %d floats failed!\n", N); @@ -245,21 +246,21 @@ FLOAT32 *ReadNFloats(TFile *fp, uint16_t N, FLOAT32 Buffer[]) { * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ -void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]) { +void WriteParamDesc(FILE* File, uint16_t N, const PARAM_DESC ParamDesc[]) { int i; for (i = 0; i < N; i++) { if (ParamDesc[i].Circular) - fprintf (File, "circular "); + fprintf(File, "circular "); else - fprintf (File, "linear "); + fprintf(File, "linear "); if (ParamDesc[i].NonEssential) - fprintf (File, "non-essential "); + fprintf(File, "non-essential "); else - fprintf (File, "essential "); + fprintf(File, "essential "); - fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); + fprintf(File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); } } @@ -274,42 +275,41 @@ void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]) { * @note Exceptions: None * @note History: 6/12/89, DSJ, Created. */ -void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) { +void WritePrototype(FILE* File, uint16_t N, PROTOTYPE* Proto) { int i; if (Proto->Significant) - fprintf (File, "significant "); + fprintf(File, "significant "); else - fprintf (File, "insignificant "); - WriteProtoStyle (File, (PROTOSTYLE) Proto->Style); - fprintf (File, "%6d\n\t", Proto->NumSamples); - WriteNFloats (File, N, Proto->Mean); - fprintf (File, "\t"); + fprintf(File, "insignificant "); + WriteProtoStyle(File, (PROTOSTYLE)Proto->Style); + fprintf(File, "%6d\n\t", Proto->NumSamples); + WriteNFloats(File, N, Proto->Mean); + fprintf(File, "\t"); switch (Proto->Style) { case spherical: - WriteNFloats (File, 1, &(Proto->Variance.Spherical)); + WriteNFloats(File, 1, &(Proto->Variance.Spherical)); break; case elliptical: - WriteNFloats (File, N, Proto->Variance.Elliptical); + WriteNFloats(File, N, Proto->Variance.Elliptical); break; case mixed: - for (i = 0; i < N; i++) - switch (Proto->Distrib[i]) { - case normal: - fprintf (File, " %9s", "normal"); - break; - case uniform: - fprintf (File, " %9s", "uniform"); - break; - case D_random: - fprintf (File, " %9s", "random"); - break; - case DISTRIBUTION_COUNT: - ASSERT_HOST(!"Distribution count not allowed!"); - } - fprintf (File, "\n\t"); - WriteNFloats (File, N, Proto->Variance.Elliptical); + for (i = 0; i < N; i++) switch (Proto->Distrib[i]) { + case normal: + fprintf(File, " %9s", "normal"); + break; + case uniform: + fprintf(File, " %9s", "uniform"); + break; + case D_random: + fprintf(File, " %9s", "random"); + break; + case DISTRIBUTION_COUNT: + ASSERT_HOST(!"Distribution count not allowed!"); + } + fprintf(File, "\n\t"); + WriteNFloats(File, N, Proto->Variance.Elliptical); } } @@ -324,9 +324,8 @@ void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) { * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ -void WriteNFloats(FILE * File, uint16_t N, FLOAT32 Array[]) { - for (int i = 0; i < N; i++) - fprintf(File, " %9.6f", Array[i]); +void WriteNFloats(FILE* File, uint16_t N, FLOAT32 Array[]) { + for (int i = 0; i < N; i++) fprintf(File, " %9.6f", Array[i]); fprintf(File, "\n"); } @@ -341,19 +340,19 @@ void WriteNFloats(FILE * File, uint16_t N, FLOAT32 Array[]) { * @note Exceptions: None * @note History: 6/8/89, DSJ, Created. */ -void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { +void WriteProtoStyle(FILE* File, PROTOSTYLE ProtoStyle) { switch (ProtoStyle) { case spherical: - fprintf (File, "spherical"); + fprintf(File, "spherical"); break; case elliptical: - fprintf (File, "elliptical"); + fprintf(File, "elliptical"); break; case mixed: - fprintf (File, "mixed"); + fprintf(File, "mixed"); break; case automatic: - fprintf (File, "automatic"); + fprintf(File, "automatic"); break; } } @@ -373,23 +372,22 @@ void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { * @return None * @note Exceptions: None * @note History: 6/12/89, DSJ, Created. -*/ + */ void WriteProtoList(FILE* File, uint16_t N, PARAM_DESC* ParamDesc, LIST ProtoList, bool WriteSigProtos, bool WriteInsigProtos) { - PROTOTYPE *Proto; + PROTOTYPE* Proto; /* write file header */ - fprintf(File,"%0d\n",N); - WriteParamDesc(File,N,ParamDesc); + fprintf(File, "%0d\n", N); + WriteParamDesc(File, N, ParamDesc); /* write prototypes */ - iterate(ProtoList) - { - Proto = (PROTOTYPE *) first_node ( ProtoList ); - if ((Proto->Significant && WriteSigProtos) || - (!Proto->Significant && WriteInsigProtos)) - WritePrototype(File, N, Proto); - } + iterate(ProtoList) { + Proto = (PROTOTYPE*)first_node(ProtoList); + if ((Proto->Significant && WriteSigProtos) || + (!Proto->Significant && WriteInsigProtos)) + WritePrototype(File, N, Proto); + } } diff --git a/src/classify/clusttool.h b/src/classify/clusttool.h index ff0ad6edd5..12e9d906ca 100644 --- a/src/classify/clusttool.h +++ b/src/classify/clusttool.h @@ -28,25 +28,24 @@ /*------------------------------------------------------------------------- Public Function Prototype --------------------------------------------------------------------------*/ -uint16_t ReadSampleSize(tesseract::TFile *fp); +uint16_t ReadSampleSize(tesseract::TFile* fp); -PARAM_DESC *ReadParamDesc(tesseract::TFile *fp, uint16_t N); +PARAM_DESC* ReadParamDesc(tesseract::TFile* fp, uint16_t N); -PROTOTYPE *ReadPrototype(tesseract::TFile *fp, uint16_t N); +PROTOTYPE* ReadPrototype(tesseract::TFile* fp, uint16_t N); -FLOAT32 *ReadNFloats(tesseract::TFile *fp, uint16_t N, FLOAT32 Buffer[]); +FLOAT32* ReadNFloats(tesseract::TFile* fp, uint16_t N, FLOAT32 Buffer[]); -void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]); +void WriteParamDesc(FILE* File, uint16_t N, const PARAM_DESC ParamDesc[]); -void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto); +void WritePrototype(FILE* File, uint16_t N, PROTOTYPE* Proto); -void WriteNFloats (FILE * File, uint16_t N, FLOAT32 Array[]); +void WriteNFloats(FILE* File, uint16_t N, FLOAT32 Array[]); -void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle); +void WriteProtoStyle(FILE* File, PROTOSTYLE ProtoStyle); void WriteProtoList(FILE* File, uint16_t N, PARAM_DESC* ParamDesc, - LIST ProtoList, bool WriteSigProtos, - bool WriteInsigProtos); + LIST ProtoList, bool WriteSigProtos, bool WriteInsigProtos); //--------------Global Data Definitions and Declarations--------------------- // define errors that can be trapped @@ -54,11 +53,11 @@ void WriteProtoList(FILE* File, uint16_t N, PARAM_DESC* ParamDesc, #define ILLEGALCIRCULARSPEC 5001 #define ILLEGALMINMAXSPEC 5002 #define ILLEGALSIGNIFICANCESPEC 5003 -#define ILLEGALSTYLESPEC 5004 -#define ILLEGALSAMPLECOUNT 5005 +#define ILLEGALSTYLESPEC 5004 +#define ILLEGALSAMPLECOUNT 5005 #define ILLEGALMEANSPEC 5006 #define ILLEGALVARIANCESPEC 5007 #define ILLEGALDISTRIBUTION 5008 -#define ILLEGALFLOAT 5009 -#define ILLEGALESSENTIALSPEC 5013 +#define ILLEGALFLOAT 5009 +#define ILLEGALESSENTIALSPEC 5013 #endif // TESSERACT_CLASSIFY_CLUSTTOOL_H_ diff --git a/src/classify/cutoffs.cpp b/src/classify/cutoffs.cpp index 359241c568..ebb4c85bd1 100644 --- a/src/classify/cutoffs.cpp +++ b/src/classify/cutoffs.cpp @@ -32,7 +32,7 @@ #define REALLY_QUOTE_IT(x) QUOTE_IT(x) -#define MAX_CUTOFF 1000 +#define MAX_CUTOFF 1000 namespace tesseract { /** @@ -57,8 +57,7 @@ void Classify::ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs) { tprintf("Error during read of shapetable pffmtable!\n"); } } - for (int i = 0; i < MAX_NUM_CLASSES; i++) - Cutoffs[i] = MAX_CUTOFF; + for (int i = 0; i < MAX_NUM_CLASSES; i++) Cutoffs[i] = MAX_CUTOFF; const int kMaxLineSize = 100; char line[kMaxLineSize]; diff --git a/src/classify/errorcounter.cpp b/src/classify/errorcounter.cpp index 5283a0e539..f427c91201 100644 --- a/src/classify/errorcounter.cpp +++ b/src/classify/errorcounter.cpp @@ -37,11 +37,11 @@ const double kRatingEpsilon = 1.0 / 32; // If the classifier makes a CT_UNICHAR_TOPN_ERR error, and the appropriate // report_level is set (4 or greater), it will then call the classifier again // with a debug flag and a keep_this argument to find out what is going on. -double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, - int report_level, CountTypes boosting_mode, - const FontInfoTable& fontinfo_table, - const GenericVector& page_images, SampleIterator* it, - double* unichar_error, double* scaled_error, STRING* fonts_report) { +double ErrorCounter::ComputeErrorRate( + ShapeClassifier* classifier, int report_level, CountTypes boosting_mode, + const FontInfoTable& fontinfo_table, const GenericVector& page_images, + SampleIterator* it, double* unichar_error, double* scaled_error, + STRING* fonts_report) { const int fontsize = it->sample_set()->NumFonts(); ErrorCounter counter(classifier->GetUnicharset(), fontsize); GenericVector results; @@ -56,7 +56,8 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, TrainingSample* mutable_sample = it->MutableSample(); int page_index = mutable_sample->page_num(); Pix* page_pix = 0 <= page_index && page_index < page_images.size() - ? page_images[page_index] : nullptr; + ? page_images[page_index] + : nullptr; // No debug, no keep this. classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results); @@ -66,13 +67,12 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED || correct_id == UNICHAR_BROKEN)) { // This is junk so use the special counter. - debug_it = counter.AccumulateJunk(report_level > 3, - results, - mutable_sample); + debug_it = + counter.AccumulateJunk(report_level > 3, results, mutable_sample); } else { - debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode, - fontinfo_table, - results, mutable_sample); + debug_it = + counter.AccumulateErrors(report_level > 3, boosting_mode, + fontinfo_table, results, mutable_sample); } if (debug_it && error_samples > 0) { // Running debug, keep the correct answer, and debug the classifier. @@ -86,14 +86,14 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, } const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC; // Create the appropriate error report. - unscaled_error = counter.ReportErrors(report_level, boosting_mode, - fontinfo_table, - *it, unichar_error, fonts_report); + unscaled_error = + counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it, + unichar_error, fonts_report); if (scaled_error != nullptr) *scaled_error = counter.scaled_error_; if (report_level > 1) { // It is useful to know the time in microseconds/char. - tprintf("Errors computed in %.2fs at %.1f μs/char\n", - total_time, 1000000.0 * total_time / total_samples); + tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time, + 1000000.0 * total_time / total_samples); } return unscaled_error; } @@ -104,11 +104,12 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, // If the new_classifier makes a boosting_mode error that the old_classifier // does not, it will then call the new_classifier again with a debug flag // and a keep_this argument to find out what is going on. -void ErrorCounter::DebugNewErrors( - ShapeClassifier* new_classifier, ShapeClassifier* old_classifier, - CountTypes boosting_mode, - const FontInfoTable& fontinfo_table, - const GenericVector& page_images, SampleIterator* it) { +void ErrorCounter::DebugNewErrors(ShapeClassifier* new_classifier, + ShapeClassifier* old_classifier, + CountTypes boosting_mode, + const FontInfoTable& fontinfo_table, + const GenericVector& page_images, + SampleIterator* it) { int fontsize = it->sample_set()->NumFonts(); ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize); ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize); @@ -122,7 +123,8 @@ void ErrorCounter::DebugNewErrors( TrainingSample* mutable_sample = it->MutableSample(); int page_index = mutable_sample->page_num(); Pix* page_pix = 0 <= page_index && page_index < page_images.size() - ? page_images[page_index] : nullptr; + ? page_images[page_index] + : nullptr; // No debug, no keep this. old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results); @@ -135,7 +137,7 @@ void ErrorCounter::DebugNewErrors( INVALID_UNICHAR_ID, &results); if (correct_id != 0 && new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table, - results, mutable_sample)) { + results, mutable_sample)) { tprintf("New Error on sample %d: Classifier debug output:\n", it->GlobalSampleIndex()); ++total_new_errors; @@ -155,10 +157,12 @@ void ErrorCounter::DebugNewErrors( // Constructor is private. Only anticipated use of ErrorCounter is via // the static ComputeErrorRate. ErrorCounter::ErrorCounter(const UNICHARSET& unicharset, int fontsize) - : scaled_error_(0.0), rating_epsilon_(kRatingEpsilon), - unichar_counts_(unicharset.size(), unicharset.size(), 0), - ok_score_hist_(0, 101), bad_score_hist_(0, 101), - unicharset_(unicharset) { + : scaled_error_(0.0), + rating_epsilon_(kRatingEpsilon), + unichar_counts_(unicharset.size(), unicharset.size(), 0), + ok_score_hist_(0, 101), + bad_score_hist_(0, 101), + unicharset_(unicharset) { Counts empty_counts; font_counts_.init_to_size(fontsize, empty_counts); multi_unichar_counts_.init_to_size(unicharset.size(), 0); @@ -232,11 +236,11 @@ bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode, // TODO(rays) It is easy to add counters for individual font attributes // here if we want them. if (font_table.SetContainsFontProperties( - font_id, results[answer_actual_rank].fonts)) { + font_id, results[answer_actual_rank].fonts)) { // Font attributes were matched. // Check for multiple properties. if (font_table.SetContainsMultipleFontProperties( - results[answer_actual_rank].fonts)) + results[answer_actual_rank].fonts)) ++font_counts_[font_id].n[CT_OK_MULTI_FONT]; } else { // Font attributes weren't matched. @@ -263,28 +267,23 @@ bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode, // Compute mean number of return values and mean rank of correct answer. font_counts_[font_id].n[CT_NUM_RESULTS] += num_results; font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank; - if (joined) - ++font_counts_[font_id].n[CT_OK_JOINED]; - if (broken) - ++font_counts_[font_id].n[CT_OK_BROKEN]; + if (joined) ++font_counts_[font_id].n[CT_OK_JOINED]; + if (broken) ++font_counts_[font_id].n[CT_OK_BROKEN]; } // If it was an error for boosting then sum the weight. if (sample->is_error()) { scaled_error_ += sample->weight(); if (debug) { - tprintf("%d results for char %s font %d :", - num_results, unicharset_.id_to_unichar(unichar_id), - font_id); + tprintf("%d results for char %s font %d :", num_results, + unicharset_.id_to_unichar(unichar_id), font_id); for (int i = 0; i < num_results; ++i) { - tprintf(" %.3f : %s\n", - results[i].rating, + tprintf(" %.3f : %s\n", results[i].rating, unicharset_.id_to_unichar(results[i].unichar_id)); } return true; } int percent = 0; - if (num_results > 0) - percent = IntCastRounded(results[0].rating * 100); + if (num_results > 0) percent = IntCastRounded(results[0].rating * 100); bad_score_hist_.add(percent, 1); } else { int percent = 0; @@ -306,8 +305,7 @@ bool ErrorCounter::AccumulateJunk(bool debug, const int font_id = sample->font_id(); const int unichar_id = sample->class_id(); int percent = 0; - if (num_results > 0) - percent = IntCastRounded(results[0].rating * 100); + if (num_results > 0) percent = IntCastRounded(results[0].rating * 100); if (num_results > 0 && results[0].unichar_id != unichar_id) { // This is a junk error. ++font_counts_[font_id].n[CT_ACCEPTED_JUNK]; @@ -340,8 +338,7 @@ bool ErrorCounter::AccumulateJunk(bool debug, double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, const FontInfoTable& fontinfo_table, const SampleIterator& it, - double* unichar_error, - STRING* fonts_report) { + double* unichar_error, STRING* fonts_report) { // Compute totals over all the fonts and report individual font results // when required. Counts totals; @@ -376,8 +373,8 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, // Report the totals. STRING total_report; if (any_results) { - tprintf("TOTAL Scaled Err=%.4g%%, %s\n", - scaled_error_ * 100.0, total_report.string()); + tprintf("TOTAL Scaled Err=%.4g%%, %s\n", scaled_error_ * 100.0, + total_report.string()); } // Report the worst substitution error only for now. if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) { @@ -397,8 +394,8 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, if (worst_err > 0) { tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", worst_uni_id, unicharset_.id_to_unichar(worst_uni_id), - unicharset_.id_to_unichar(worst_result_id), - worst_err, totals.n[CT_UNICHAR_TOP1_ERR], + unicharset_.id_to_unichar(worst_result_id), worst_err, + totals.n[CT_UNICHAR_TOP1_ERR], 100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]); } } @@ -406,8 +403,7 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, for (int u = 0; u < multi_unichar_counts_.size(); ++u) { if (multi_unichar_counts_[u] > 0) { tprintf("%d multiple answers for unichar: %s\n", - multi_unichar_counts_[u], - unicharset_.id_to_unichar(u)); + multi_unichar_counts_[u], unicharset_.id_to_unichar(u)); } } tprintf("OK Score histogram:\n"); @@ -417,11 +413,9 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, } double rates[CT_SIZE]; - if (!ComputeRates(totals, rates)) - return 0.0; + if (!ComputeRates(totals, rates)) return 0.0; // Set output values if asked for. - if (unichar_error != nullptr) - *unichar_error = rates[CT_UNICHAR_TOP1_ERR]; + if (unichar_error != nullptr) *unichar_error = rates[CT_UNICHAR_TOP1_ERR]; return rates[boosting_mode]; } @@ -433,54 +427,48 @@ bool ErrorCounter::ReportString(bool even_if_empty, const Counts& counts, STRING* report) { // Compute the error rates. double rates[CT_SIZE]; - if (!ComputeRates(counts, rates) && !even_if_empty) - return false; + if (!ComputeRates(counts, rates) && !even_if_empty) return false; // Using %.4g%%, the length of the output string should exactly match the // length of the format string, but in case of overflow, allow for +eddd // on each number. const int kMaxExtraLength = 5; // Length of +eddd. // Keep this format string and the snprintf in sync with the CountTypes enum. - const char* format_str = "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] " - "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, " - "FontAttr=%.4g%%, Multi=%.4g%%, " - "Answers=%.3g, Rank=%.3g, " - "OKjunk=%.4g%%, Badjunk=%.4g%%"; - const size_t max_str_len = strlen(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1; + const char* format_str = + "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] " + "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, " + "FontAttr=%.4g%%, Multi=%.4g%%, " + "Answers=%.3g, Rank=%.3g, " + "OKjunk=%.4g%%, Badjunk=%.4g%%"; + const size_t max_str_len = + strlen(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1; char* formatted_str = new char[max_str_len]; - snprintf(formatted_str, max_str_len, format_str, - rates[CT_UNICHAR_TOP1_ERR] * 100.0, - rates[CT_UNICHAR_TOP2_ERR] * 100.0, - rates[CT_UNICHAR_TOPN_ERR] * 100.0, - rates[CT_UNICHAR_TOPTOP_ERR] * 100.0, - rates[CT_OK_MULTI_UNICHAR] * 100.0, - rates[CT_OK_JOINED] * 100.0, - rates[CT_OK_BROKEN] * 100.0, - rates[CT_REJECT] * 100.0, - rates[CT_FONT_ATTR_ERR] * 100.0, - rates[CT_OK_MULTI_FONT] * 100.0, - rates[CT_NUM_RESULTS], - rates[CT_RANK], - 100.0 * rates[CT_REJECTED_JUNK], - 100.0 * rates[CT_ACCEPTED_JUNK]); + snprintf( + formatted_str, max_str_len, format_str, + rates[CT_UNICHAR_TOP1_ERR] * 100.0, rates[CT_UNICHAR_TOP2_ERR] * 100.0, + rates[CT_UNICHAR_TOPN_ERR] * 100.0, rates[CT_UNICHAR_TOPTOP_ERR] * 100.0, + rates[CT_OK_MULTI_UNICHAR] * 100.0, rates[CT_OK_JOINED] * 100.0, + rates[CT_OK_BROKEN] * 100.0, rates[CT_REJECT] * 100.0, + rates[CT_FONT_ATTR_ERR] * 100.0, rates[CT_OK_MULTI_FONT] * 100.0, + rates[CT_NUM_RESULTS], rates[CT_RANK], 100.0 * rates[CT_REJECTED_JUNK], + 100.0 * rates[CT_ACCEPTED_JUNK]); *report = formatted_str; - delete [] formatted_str; + delete[] formatted_str; // Now append each field of counts with a tab in front so the result can // be loaded into a spreadsheet. - for (int ct = 0; ct < CT_SIZE; ++ct) - report->add_str_int("\t", counts.n[ct]); + for (int ct = 0; ct < CT_SIZE; ++ct) report->add_str_int("\t", counts.n[ct]); return true; } // Computes the error rates and returns in rates which is an array of size // CT_SIZE. Returns false if there is no data, leaving rates unchanged. bool ErrorCounter::ComputeRates(const Counts& counts, double rates[CT_SIZE]) { - const int ok_samples = counts.n[CT_UNICHAR_TOP_OK] + counts.n[CT_UNICHAR_TOP1_ERR] + - counts.n[CT_REJECT]; - const int junk_samples = counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK]; + const int ok_samples = counts.n[CT_UNICHAR_TOP_OK] + + counts.n[CT_UNICHAR_TOP1_ERR] + counts.n[CT_REJECT]; + const int junk_samples = + counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK]; // Compute rates for normal chars. double denominator = static_cast(std::max(ok_samples, 1)); - for (int ct = 0; ct <= CT_RANK; ++ct) - rates[ct] = counts.n[ct] / denominator; + for (int ct = 0; ct <= CT_RANK; ++ct) rates[ct] = counts.n[ct] / denominator; // Compute rates for junk. denominator = static_cast(std::max(junk_samples, 1)); for (int ct = CT_REJECTED_JUNK; ct <= CT_ACCEPTED_JUNK; ++ct) @@ -488,14 +476,10 @@ bool ErrorCounter::ComputeRates(const Counts& counts, double rates[CT_SIZE]) { return ok_samples != 0 || junk_samples != 0; } -ErrorCounter::Counts::Counts() { - memset(n, 0, sizeof(n[0]) * CT_SIZE); -} +ErrorCounter::Counts::Counts() { memset(n, 0, sizeof(n[0]) * CT_SIZE); } // Adds other into this for computing totals. void ErrorCounter::Counts::operator+=(const Counts& other) { - for (int ct = 0; ct < CT_SIZE; ++ct) - n[ct] += other.n[ct]; + for (int ct = 0; ct < CT_SIZE; ++ct) n[ct] += other.n[ct]; } - } // namespace tesseract. diff --git a/src/classify/errorcounter.h b/src/classify/errorcounter.h index a2d3d6aede..690526a96b 100644 --- a/src/classify/errorcounter.h +++ b/src/classify/errorcounter.h @@ -21,7 +21,8 @@ #include "statistc.h" struct Pix; -template class UnicityTable; +template +class UnicityTable; namespace tesseract { @@ -67,26 +68,26 @@ struct UnicharRating; // // Keep in sync with the ReportString function. enum CountTypes { - CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id. + CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id. // The rank of the results in TOP1, TOP2, TOPN is determined by a gap of // kRatingEpsilon from the first result in each group. The real top choice // is measured using TOPTOP. - CT_UNICHAR_TOP1_ERR, // Top shape does not contain correct unichar id. - CT_UNICHAR_TOP2_ERR, // Top 2 shapes don't contain correct unichar id. - CT_UNICHAR_TOPN_ERR, // No output shape contains correct unichar id. - CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct. - CT_OK_MULTI_UNICHAR, // Top shape id has correct unichar id, and others. - CT_OK_JOINED, // Top shape id is correct but marked joined. - CT_OK_BROKEN, // Top shape id is correct but marked broken. - CT_REJECT, // Classifier hates this. - CT_FONT_ATTR_ERR, // Top unichar OK, but font attributes incorrect. - CT_OK_MULTI_FONT, // CT_FONT_ATTR_OK but there are multiple font attrs. - CT_NUM_RESULTS, // Number of answers produced. - CT_RANK, // Rank of correct answer. - CT_REJECTED_JUNK, // Junk that was correctly rejected. - CT_ACCEPTED_JUNK, // Junk that was incorrectly classified otherwise. - - CT_SIZE // Number of types for array sizing. + CT_UNICHAR_TOP1_ERR, // Top shape does not contain correct unichar id. + CT_UNICHAR_TOP2_ERR, // Top 2 shapes don't contain correct unichar id. + CT_UNICHAR_TOPN_ERR, // No output shape contains correct unichar id. + CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct. + CT_OK_MULTI_UNICHAR, // Top shape id has correct unichar id, and others. + CT_OK_JOINED, // Top shape id is correct but marked joined. + CT_OK_BROKEN, // Top shape id is correct but marked broken. + CT_REJECT, // Classifier hates this. + CT_FONT_ATTR_ERR, // Top unichar OK, but font attributes incorrect. + CT_OK_MULTI_FONT, // CT_FONT_ATTR_OK but there are multiple font attrs. + CT_NUM_RESULTS, // Number of answers produced. + CT_RANK, // Rank of correct answer. + CT_REJECTED_JUNK, // Junk that was correctly rejected. + CT_ACCEPTED_JUNK, // Junk that was incorrectly classified otherwise. + + CT_SIZE // Number of types for array sizing. }; // Class to encapsulate all the functionality and sub-structures required @@ -119,14 +120,12 @@ class ErrorCounter { // both human-readable form and as a tab-separated list of error counts. // The human-readable form is all before the first tab. // * The return value is the un-weighted version of the scaled_error. - static double ComputeErrorRate(ShapeClassifier* classifier, - int report_level, CountTypes boosting_mode, + static double ComputeErrorRate(ShapeClassifier* classifier, int report_level, + CountTypes boosting_mode, const FontInfoTable& fontinfo_table, const GenericVector& page_images, - SampleIterator* it, - double* unichar_error, - double* scaled_error, - STRING* fonts_report); + SampleIterator* it, double* unichar_error, + double* scaled_error, STRING* fonts_report); // Tests a pair of classifiers, debugging errors of the new against the old. // See errorcounter.h for description of arguments. // Iterates over the samples, calling the classifiers in normal/silent mode. @@ -181,13 +180,12 @@ class ErrorCounter { // error rate to return. // The fontinfo_table from MasterTrainer provides the names of fonts. // The it determines the current subset of the training samples. - // If not nullptr, the top-choice unichar error rate is saved in unichar_error. - // If not nullptr, the report string is saved in fonts_report. + // If not nullptr, the top-choice unichar error rate is saved in + // unichar_error. If not nullptr, the report string is saved in fonts_report. // (Ignoring report_level). double ReportErrors(int report_level, CountTypes boosting_mode, const FontInfoTable& fontinfo_table, - const SampleIterator& it, - double* unichar_error, + const SampleIterator& it, double* unichar_error, STRING* fonts_report); // Sets the report string to a combined human and machine-readable report @@ -201,7 +199,6 @@ class ErrorCounter { // CT_SIZE. Returns false if there is no data, leaving rates unchanged. static bool ComputeRates(const Counts& counts, double rates[CT_SIZE]); - // Total scaled error used by boosting algorithms. double scaled_error_; // Difference in result rating to be thought of as an "equal" choice. diff --git a/src/classify/featdefs.cpp b/src/classify/featdefs.cpp index 1fce5d8cf2..db72e2fae0 100644 --- a/src/classify/featdefs.cpp +++ b/src/classify/featdefs.cpp @@ -19,15 +19,15 @@ Include Files and Type Defines -----------------------------------------------------------------------------*/ #include "featdefs.h" -#include "emalloc.h" #include "danerror.h" +#include "emalloc.h" #include "scanutils.h" -#include #include +#include /** define errors triggered by this module */ -#define ILLEGAL_NUM_SETS 3001 +#define ILLEGAL_NUM_SETS 3001 #define PICO_FEATURE_LENGTH 0.05 @@ -40,81 +40,58 @@ const char* kIntFeatureType = "if"; const char* kGeoFeatureType = "tb"; // Define all of the parameters for the MicroFeature type. -StartParamDesc(MicroFeatureParams) -DefineParam(0, 0, -0.5, 0.5) -DefineParam(0, 0, -0.25, 0.75) -DefineParam(0, 1, 0.0, 1.0) -DefineParam(1, 0, 0.0, 1.0) -DefineParam (0, 1, -0.5, 0.5) -DefineParam (0, 1, -0.5, 0.5) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams) - -// Define all of the parameters for the NormFeat type. -StartParamDesc (CharNormParams) -DefineParam(0, 0, -0.25, 0.75) -DefineParam(0, 1, 0.0, 1.0) -DefineParam(0, 0, 0.0, 1.0) -DefineParam(0, 0, 0.0, 1.0) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams) - -// Define all of the parameters for the IntFeature type -StartParamDesc(IntFeatParams) -DefineParam(0, 0, 0.0, 255.0) -DefineParam(0, 0, 0.0, 255.0) -DefineParam(1, 0, 0.0, 255.0) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams) - -// Define all of the parameters for the GeoFeature type -StartParamDesc(GeoFeatParams) -DefineParam(0, 0, 0.0, 255.0) -DefineParam(0, 0, 0.0, 255.0) -DefineParam(0, 0, 0.0, 255.0) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams) - -// Other features used for training the adaptive classifier, but not used -// during normal training, therefore not in the DescDefs array. - -// Define all of the parameters for the PicoFeature type -// define knob that can be used to adjust pico-feature length. -FLOAT32 PicoFeatureLength = PICO_FEATURE_LENGTH; -StartParamDesc(PicoFeatParams) -DefineParam(0, 0, -0.25, 0.75) -DefineParam(1, 0, 0.0, 1.0) -DefineParam(0, 0, -0.5, 0.5) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams) - -// Define all of the parameters for the OutlineFeature type. -StartParamDesc(OutlineFeatParams) -DefineParam(0, 0, -0.5, 0.5) -DefineParam(0, 0, -0.25, 0.75) -DefineParam(0, 0, 0.0, 1.0) -DefineParam(1, 0, 0.0, 1.0) -EndParamDesc -// Now define the feature type itself (see features.h for parameters). -DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams) - -// MUST be kept in-sync with ExtractorDefs in fxdefs.cpp. -static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = { - &MicroFeatureDesc, - &CharNormDesc, - &IntFeatDesc, - &GeoFeatDesc -}; +StartParamDesc(MicroFeatureParams) DefineParam(0, 0, -0.5, 0.5) + DefineParam(0, 0, -0.25, 0.75) DefineParam(0, 1, 0.0, 1.0) + DefineParam(1, 0, 0.0, 1.0) DefineParam(0, 1, -0.5, 0.5) + DefineParam(0, 1, -0.5, 0.5) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams) + + // Define all of the parameters for the NormFeat type. + StartParamDesc(CharNormParams) DefineParam(0, 0, -0.25, 0.75) + DefineParam(0, 1, 0.0, 1.0) DefineParam(0, 0, 0.0, 1.0) + DefineParam(0, 0, 0.0, 1.0) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams) + + // Define all of the parameters for the IntFeature type + StartParamDesc(IntFeatParams) DefineParam(0, 0, 0.0, 255.0) + DefineParam(0, 0, 0.0, 255.0) DefineParam(1, 0, 0.0, 255.0) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams) + + // Define all of the parameters for the GeoFeature type + StartParamDesc(GeoFeatParams) DefineParam(0, 0, 0.0, 255.0) + DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams) + + // Other features used for training the adaptive classifier, but not used + // during normal training, therefore not in the DescDefs array. + + // Define all of the parameters for the PicoFeature type + // define knob that can be used to adjust pico-feature length. + FLOAT32 PicoFeatureLength = PICO_FEATURE_LENGTH; +StartParamDesc(PicoFeatParams) DefineParam(0, 0, -0.25, 0.75) + DefineParam(1, 0, 0.0, 1.0) DefineParam(0, 0, -0.5, 0.5) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams) + + // Define all of the parameters for the OutlineFeature type. + StartParamDesc(OutlineFeatParams) DefineParam(0, 0, -0.5, 0.5) + DefineParam(0, 0, -0.25, 0.75) DefineParam(0, 0, 0.0, 1.0) + DefineParam(1, 0, 0.0, 1.0) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams) + + // MUST be kept in-sync with ExtractorDefs in fxdefs.cpp. + static const FEATURE_DESC_STRUCT* DescDefs[NUM_FEATURE_TYPES] = { + &MicroFeatureDesc, &CharNormDesc, &IntFeatDesc, &GeoFeatDesc}; /*----------------------------------------------------------------------------- Public Code -----------------------------------------------------------------------------*/ -void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) { +void InitFeatureDefs(FEATURE_DEFS_STRUCT* featuredefs) { featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES; for (int i = 0; i < NUM_FEATURE_TYPES; ++i) { featuredefs->FeatureDesc[i] = DescDefs[i]; @@ -128,7 +105,7 @@ void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) { * * @param CharDesc character description to be deallocated * - * Globals: + * Globals: * - none * * @note Exceptions: none @@ -137,27 +114,27 @@ void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) { void FreeCharDescription(CHAR_DESC CharDesc) { if (CharDesc) { for (size_t i = 0; i < CharDesc->NumFeatureSets; i++) - FreeFeatureSet (CharDesc->FeatureSets[i]); + FreeFeatureSet(CharDesc->FeatureSets[i]); Efree(CharDesc); } -} /* FreeCharDescription */ - +} /* FreeCharDescription */ /*---------------------------------------------------------------------------*/ /** * Allocate a new character description, initialize its * feature sets to be empty, and return it. * - * Globals: + * Globals: * - none * * @return New character description structure. * @note Exceptions: none * @note History: Wed May 23 15:27:10 1990, DSJ, Created. */ -CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) { +CHAR_DESC +NewCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs) { CHAR_DESC CharDesc; - CharDesc = (CHAR_DESC) Emalloc (sizeof (CHAR_DESC_STRUCT)); + CharDesc = (CHAR_DESC)Emalloc(sizeof(CHAR_DESC_STRUCT)); CharDesc->NumFeatureSets = FeatureDefs.NumFeatureTypes; for (size_t i = 0; i < CharDesc->NumFeatureSets; i++) @@ -165,8 +142,7 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) { return (CharDesc); -} /* NewCharDescription */ - +} /* NewCharDescription */ /*---------------------------------------------------------------------------*/ /** @@ -191,8 +167,7 @@ void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, int NumSetsToWrite = 0; for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) - if (CharDesc->FeatureSets[Type]) - NumSetsToWrite++; + if (CharDesc->FeatureSets[Type]) NumSetsToWrite++; str->add_str_int(" ", NumSetsToWrite); *str += "\n"; @@ -203,11 +178,11 @@ void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, WriteFeatureSet(CharDesc->FeatureSets[Type], str); } } -} /* WriteCharDescription */ +} /* WriteCharDescription */ // Return whether all of the fields of the given feature set // are well defined (not inf or nan). -bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, +bool ValidCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, CHAR_DESC CharDesc) { bool anything_written = false; bool well_formed = true; @@ -227,7 +202,7 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, } } return anything_written && well_formed; -} /* ValidCharDescription */ +} /* ValidCharDescription */ /*---------------------------------------------------------------------------*/ /** @@ -241,38 +216,37 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, ... @endverbatim * - * Globals: + * Globals: * - none - * + * * @param FeatureDefs definitions of feature types/extractors * @param File open text file to read character description from * @return Character description read from File. - * @note Exceptions: + * @note Exceptions: * - ILLEGAL_NUM_SETS * @note History: Wed May 23 17:32:48 1990, DSJ, Created. */ -CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, - FILE *File) { +CHAR_DESC +ReadCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, FILE* File) { int NumSetsToRead; char ShortName[FEAT_NAME_SIZE]; CHAR_DESC CharDesc; int Type; - if (tfscanf(File, "%d", &NumSetsToRead) != 1 || - NumSetsToRead < 0 || NumSetsToRead > FeatureDefs.NumFeatureTypes) - DoError (ILLEGAL_NUM_SETS, "Illegal number of feature sets"); + if (tfscanf(File, "%d", &NumSetsToRead) != 1 || NumSetsToRead < 0 || + NumSetsToRead > FeatureDefs.NumFeatureTypes) + DoError(ILLEGAL_NUM_SETS, "Illegal number of feature sets"); CharDesc = NewCharDescription(FeatureDefs); for (; NumSetsToRead > 0; NumSetsToRead--) { tfscanf(File, "%s", ShortName); Type = ShortNameToFeatureType(FeatureDefs, ShortName); CharDesc->FeatureSets[Type] = - ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]); + ReadFeatureSet(File, FeatureDefs.FeatureDesc[Type]); } return (CharDesc); -} // ReadCharDescription - +} // ReadCharDescription /*---------------------------------------------------------------------------*/ /** @@ -290,14 +264,14 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, * - ILLEGAL_SHORT_NAME * @note History: Wed May 23 15:36:05 1990, DSJ, Created. */ -uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, - const char *ShortName) { +uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT& FeatureDefs, + const char* ShortName) { int i; for (i = 0; i < FeatureDefs.NumFeatureTypes; i++) - if (!strcmp ((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) + if (!strcmp((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) return static_cast(i); - DoError (ILLEGAL_SHORT_NAME, "Illegal short name for a feature"); + DoError(ILLEGAL_SHORT_NAME, "Illegal short name for a feature"); return 0; -} // ShortNameToFeatureType +} // ShortNameToFeatureType diff --git a/src/classify/featdefs.h b/src/classify/featdefs.h index e27be2e0b3..c1c93a5a1b 100644 --- a/src/classify/featdefs.h +++ b/src/classify/featdefs.h @@ -15,8 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef FEATDEFS_H -#define FEATDEFS_H +#ifndef FEATDEFS_H +#define FEATDEFS_H /**---------------------------------------------------------------------------- Include Files and Type Defines @@ -31,7 +31,7 @@ extern const char* kIntFeatureType; extern const char* kGeoFeatureType; /* define error traps which can be triggered by this module.*/ -#define ILLEGAL_SHORT_NAME 2000 +#define ILLEGAL_SHORT_NAME 2000 /* A character is described by multiple sets of extracted features. Each set contains a number of features of a particular type, for example, a @@ -43,35 +43,36 @@ struct CHAR_DESC_STRUCT { uint32_t NumFeatureSets; FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]; }; -using CHAR_DESC = CHAR_DESC_STRUCT *; +using CHAR_DESC = CHAR_DESC_STRUCT*; struct FEATURE_DEFS_STRUCT { int32_t NumFeatureTypes; const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES]; int FeatureEnabled[NUM_FEATURE_TYPES]; }; -using FEATURE_DEFS = FEATURE_DEFS_STRUCT *; +using FEATURE_DEFS = FEATURE_DEFS_STRUCT*; /*---------------------------------------------------------------------- Generic functions for manipulating character descriptions ----------------------------------------------------------------------*/ -void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs); +void InitFeatureDefs(FEATURE_DEFS_STRUCT* featuredefs); void FreeCharDescription(CHAR_DESC CharDesc); -CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs); +CHAR_DESC +NewCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs); -bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, +bool ValidCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, CHAR_DESC CharDesc); void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, CHAR_DESC CharDesc, STRING* str); -CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, - FILE *File); +CHAR_DESC +ReadCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, FILE* File); -uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, - const char *ShortName); +uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT& FeatureDefs, + const char* ShortName); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/src/classify/float2int.cpp b/src/classify/float2int.cpp index 2af74d710b..d4c086714d 100644 --- a/src/classify/float2int.cpp +++ b/src/classify/float2int.cpp @@ -19,10 +19,10 @@ Include Files and Type Defines -----------------------------------------------------------------------------*/ #include "float2int.h" -#include "normmatch.h" -#include "mfoutline.h" #include "classify.h" #include "helpers.h" +#include "mfoutline.h" +#include "normmatch.h" #include "picofeat.h" #define MAX_INT_CHAR_NORM (INT_CHAR_NORM_RANGE - 1) @@ -37,7 +37,7 @@ namespace tesseract { * For each class in the unicharset, clears the corresponding * entry in char_norm_array. char_norm_array is indexed by unichar_id. * - * Globals: + * Globals: * - none * * @param char_norm_array array to be cleared @@ -47,17 +47,16 @@ namespace tesseract { */ void Classify::ClearCharNormArray(uint8_t* char_norm_array) { memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size()); -} /* ClearCharNormArray */ - +} /* ClearCharNormArray */ /*---------------------------------------------------------------------------*/ -/** +/** * For each class in unicharset, computes the match between * norm_feature and the normalization protos for that class. * Converts this number to the range from 0 - 255 and stores it * into char_norm_array. CharNormArray is indexed by unichar_id. * - * Globals: + * Globals: * - PreTrainedTemplates current set of built-in templates * * @param norm_feature character normalization feature @@ -70,8 +69,8 @@ void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature, uint8_t* char_norm_array) { for (int i = 0; i < unicharset.size(); i++) { if (i < PreTrainedTemplates->NumClasses) { - int norm_adjust = static_cast(INT_CHAR_NORM_RANGE * - ComputeNormMatch(i, norm_feature, false)); + int norm_adjust = static_cast( + INT_CHAR_NORM_RANGE * ComputeNormMatch(i, norm_feature, false)); char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM); } else { // Classes with no templates (eg. ambigs & ligatures) default @@ -79,8 +78,7 @@ void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature, char_norm_array[i] = MAX_INT_CHAR_NORM; } } -} /* ComputeIntCharNormArray */ - +} /* ComputeIntCharNormArray */ /*---------------------------------------------------------------------------*/ /** @@ -88,7 +86,7 @@ void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature, * in Features into integer format and saves it into * IntFeatures. * - * Globals: + * Globals: * - none * * @param Features floating point pico-features to be converted @@ -118,5 +116,5 @@ void Classify::ComputeIntFeatures(FEATURE_SET Features, ANGLE_SHIFT, INT_FEAT_RANGE); IntFeatures[Fid].CP_misses = 0; } -} /* ComputeIntFeatures */ +} /* ComputeIntFeatures */ } // namespace tesseract diff --git a/src/classify/float2int.h b/src/classify/float2int.h index 4f0a21bec9..097ccceeb9 100644 --- a/src/classify/float2int.h +++ b/src/classify/float2int.h @@ -24,7 +24,7 @@ #include "intmatcher.h" #include "ocrfeatures.h" -#define INT_FEAT_RANGE 256 -#define BASELINE_Y_SHIFT (0.25) +#define INT_FEAT_RANGE 256 +#define BASELINE_Y_SHIFT (0.25) #endif diff --git a/src/classify/fpoint.cpp b/src/classify/fpoint.cpp index 4102c03ec8..477a990e60 100644 --- a/src/classify/fpoint.cpp +++ b/src/classify/fpoint.cpp @@ -18,16 +18,17 @@ /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ -#include "const.h" #include "fpoint.h" -#include #include +#include +#include "const.h" /*---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------*/ -FLOAT32 DistanceBetween(FPOINT A, FPOINT B) { +FLOAT32 +DistanceBetween(FPOINT A, FPOINT B) { const double xd = XDelta(A, B); const double yd = YDelta(A, B); return sqrt(static_cast(xd * xd + yd * yd)); @@ -45,17 +46,14 @@ FLOAT32 DistanceBetween(FPOINT A, FPOINT B) { * @note Exceptions: none * @note History: Wed Mar 28 14:27:25 1990, DSJ, Created. */ -FLOAT32 NormalizedAngleFrom(FPOINT *Point1, - FPOINT *Point2, - FLOAT32 FullScale) { +FLOAT32 +NormalizedAngleFrom(FPOINT* Point1, FPOINT* Point2, FLOAT32 FullScale) { FLOAT32 Angle; FLOAT32 NumRadsInCircle = 2.0 * PI; - Angle = AngleFrom (*Point1, *Point2); - if (Angle < 0.0) - Angle += NumRadsInCircle; + Angle = AngleFrom(*Point1, *Point2); + if (Angle < 0.0) Angle += NumRadsInCircle; Angle *= FullScale / NumRadsInCircle; - if (Angle < 0.0 || Angle >= FullScale) - Angle = 0.0; + if (Angle < 0.0 || Angle >= FullScale) Angle = 0.0; return (Angle); } diff --git a/src/classify/fpoint.h b/src/classify/fpoint.h index ee73f94ca0..5533d053c2 100644 --- a/src/classify/fpoint.h +++ b/src/classify/fpoint.h @@ -15,19 +15,18 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef FPOINT_H -#define FPOINT_H +#ifndef FPOINT_H +#define FPOINT_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "host.h" -#include #include +#include +#include "host.h" /* define data structure to hold 2D points or vectors using floating point */ -typedef struct -{ +typedef struct { FLOAT32 x, y; } FPOINT; using FVECTOR = FPOINT; @@ -36,19 +35,20 @@ using FVECTOR = FPOINT; Macros ----------------------------------------------------------------------------**/ /* macros for computing miscellaneous functions of 2 points */ -#define XDelta(A,B) ( (B).x - (A).x ) -#define YDelta(A,B) ( (B).y - (A).y ) -#define SlopeFrom(A,B) ( YDelta(A,B) / XDelta(A,B) ) -#define AngleFrom(A,B) ( atan2((double) YDelta(A,B), \ - (double) XDelta(A,B) ) ) +#define XDelta(A, B) ((B).x - (A).x) +#define YDelta(A, B) ((B).y - (A).y) +#define SlopeFrom(A, B) (YDelta(A, B) / XDelta(A, B)) +#define AngleFrom(A, B) (atan2((double)YDelta(A, B), (double)XDelta(A, B))) -#define XIntersectionOf(A,B,X) ( SlopeFrom(A,B) * ((X) - A.x) + A.y) +#define XIntersectionOf(A, B, X) (SlopeFrom(A, B) * ((X)-A.x) + A.y) /*------------------------------------------------------------------------- Public Function Prototypes ---------------------------------------------------------------------------*/ -FLOAT32 DistanceBetween(FPOINT A, FPOINT B); +FLOAT32 +DistanceBetween(FPOINT A, FPOINT B); -FLOAT32 NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, FLOAT32 FullScale); +FLOAT32 +NormalizedAngleFrom(FPOINT* Point1, FPOINT* Point2, FLOAT32 FullScale); #endif diff --git a/src/classify/intfeaturedist.cpp b/src/classify/intfeaturedist.cpp index 53deef1b2e..c715240843 100644 --- a/src/classify/intfeaturedist.cpp +++ b/src/classify/intfeaturedist.cpp @@ -23,14 +23,14 @@ namespace tesseract { IntFeatureDist::IntFeatureDist() - : size_(0), total_feature_weight_(0.0), - feature_map_(nullptr), features_(nullptr), - features_delta_one_(nullptr), features_delta_two_(nullptr) { -} + : size_(0), + total_feature_weight_(0.0), + feature_map_(nullptr), + features_(nullptr), + features_delta_one_(nullptr), + features_delta_two_(nullptr) {} -IntFeatureDist::~IntFeatureDist() { - Clear(); -} +IntFeatureDist::~IntFeatureDist() { Clear(); } // Initialize the table to the given size of feature space. void IntFeatureDist::Init(const IntFeatureMap* feature_map) { @@ -49,7 +49,7 @@ void IntFeatureDist::Init(const IntFeatureMap* feature_map) { // Setup the map for the given indexed_features that have been indexed by // feature_map. void IntFeatureDist::Set(const GenericVector& indexed_features, - int canonical_count, bool value) { + int canonical_count, bool value) { total_feature_weight_ = canonical_count; for (int i = 0; i < indexed_features.size(); ++i) { const int f = indexed_features[i]; @@ -62,8 +62,7 @@ void IntFeatureDist::Set(const GenericVector& indexed_features, for (int dir2 = -kNumOffsetMaps; dir2 <= kNumOffsetMaps; ++dir2) { if (dir2 == 0) continue; const int mapped_f2 = feature_map_->OffsetFeature(mapped_f, dir2); - if (mapped_f2 >= 0) - features_delta_two_[mapped_f2] = value; + if (mapped_f2 >= 0) features_delta_two_[mapped_f2] = value; } } } @@ -148,11 +147,11 @@ double IntFeatureDist::DebugFeatureDistance( // Clear all data. void IntFeatureDist::Clear() { - delete [] features_; + delete[] features_; features_ = nullptr; - delete [] features_delta_one_; + delete[] features_delta_one_; features_delta_one_ = nullptr; - delete [] features_delta_two_; + delete[] features_delta_two_; features_delta_two_ = nullptr; } diff --git a/src/classify/intfeaturedist.h b/src/classify/intfeaturedist.h index 8cfcb3bed5..c544f3fe8c 100644 --- a/src/classify/intfeaturedist.h +++ b/src/classify/intfeaturedist.h @@ -49,8 +49,8 @@ class IntFeatureDist { // Setup the map for the given indexed_features that have been indexed by // feature_map. After use, use Set(..., false) to reset to the initial state // as this is faster than calling Init for sparse spaces. - void Set(const GenericVector& indexed_features, - int canonical_count, bool value); + void Set(const GenericVector& indexed_features, int canonical_count, + bool value); // Compute the distance between the given feature vector and the last // Set feature vector. diff --git a/src/classify/intfeaturemap.cpp b/src/classify/intfeaturemap.cpp index 53829c8711..ce76a9cb00 100644 --- a/src/classify/intfeaturemap.cpp +++ b/src/classify/intfeaturemap.cpp @@ -31,17 +31,14 @@ namespace tesseract { const int kMaxOffsetDist = 32; -IntFeatureMap::IntFeatureMap() - : mapping_changed_(true), compact_size_(0) { +IntFeatureMap::IntFeatureMap() : mapping_changed_(true), compact_size_(0) { for (int dir = 0; dir < kNumOffsetMaps; ++dir) { offset_plus_[dir] = nullptr; offset_minus_[dir] = nullptr; } } -IntFeatureMap::~IntFeatureMap() { - Clear(); -} +IntFeatureMap::~IntFeatureMap() { Clear(); } // Pseudo-accessors. int IntFeatureMap::IndexFeature(const INT_FEATURE_STRUCT& f) const { @@ -53,10 +50,12 @@ int IntFeatureMap::MapFeature(const INT_FEATURE_STRUCT& f) const { int IntFeatureMap::MapIndexFeature(int index_feature) const { return feature_map_.SparseToCompact(index_feature); } -INT_FEATURE_STRUCT IntFeatureMap::InverseIndexFeature(int index_feature) const { +INT_FEATURE_STRUCT +IntFeatureMap::InverseIndexFeature(int index_feature) const { return feature_space_.PositionFromIndex(index_feature); } -INT_FEATURE_STRUCT IntFeatureMap::InverseMapFeature(int map_feature) const { +INT_FEATURE_STRUCT +IntFeatureMap::InverseMapFeature(int map_feature) const { int index = feature_map_.CompactToSparse(map_feature); return feature_space_.PositionFromIndex(index); } @@ -79,12 +78,11 @@ void IntFeatureMap::Init(const IntFeatureSpace& feature_space) { compact_size_ = feature_map_.CompactSize(); // Initialize look-up tables if needed. FCOORD dir = FeatureDirection(0); - if (dir.x() == 0.0f && dir.y() == 0.0f) - InitIntegerFX(); + if (dir.x() == 0.0f && dir.y() == 0.0f) InitIntegerFX(); // Compute look-up tables to generate offset features. for (int dir = 0; dir < kNumOffsetMaps; ++dir) { - delete [] offset_plus_[dir]; - delete [] offset_minus_[dir]; + delete[] offset_plus_[dir]; + delete[] offset_minus_[dir]; offset_plus_[dir] = new int[sparse_size]; offset_minus_[dir] = new int[sparse_size]; } @@ -121,7 +119,6 @@ int IntFeatureMap::OffsetFeature(int index_feature, int dir) const { return -1; } - //#define EXPERIMENT_ON #ifdef EXPERIMENT_ON // This code is commented out as SampleIterator and // TrainingSample are not reviewed/checked in yet, but these functions are a @@ -137,8 +134,7 @@ int IntFeatureMap::FindNZFeatureMapping(SampleIterator* it) { const TrainingSample& sample = it->GetSample(); GenericVector features; feature_space_.IndexAndSortFeatures(sample.features(), - sample.num_features(), - &features); + sample.num_features(), &features); int num_features = features.size(); for (int f = 0; f < num_features; ++f) feature_map_.SetMap(features[f], true); @@ -148,8 +144,8 @@ int IntFeatureMap::FindNZFeatureMapping(SampleIterator* it) { compact_size_ = feature_map_.CompactSize(); mapping_changed_ = true; FinalizeMapping(it); - tprintf("%d non-zero features found in %d samples\n", - compact_size_, total_samples); + tprintf("%d non-zero features found in %d samples\n", compact_size_, + total_samples); return compact_size_; } #endif @@ -179,8 +175,8 @@ void IntFeatureMap::DebugMapFeatures( void IntFeatureMap::Clear() { for (int dir = 0; dir < kNumOffsetMaps; ++dir) { - delete [] offset_plus_[dir]; - delete [] offset_minus_[dir]; + delete[] offset_plus_[dir]; + delete[] offset_minus_[dir]; offset_plus_[dir] = nullptr; offset_minus_[dir] = nullptr; } diff --git a/src/classify/intfeaturemap.h b/src/classify/intfeaturemap.h index 5c5a54b83c..9929308855 100644 --- a/src/classify/intfeaturemap.h +++ b/src/classify/intfeaturemap.h @@ -21,8 +21,8 @@ #ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H_ #define TESSERACT_CLASSIFY_INTFEATUREMAP_H_ -#include "intfeaturespace.h" #include "indexmapbidi.h" +#include "intfeaturespace.h" #include "intproto.h" namespace tesseract { @@ -51,18 +51,10 @@ class IntFeatureMap { ~IntFeatureMap(); // Accessors. - int sparse_size() const { - return feature_space_.Size(); - } - int compact_size() const { - return compact_size_; - } - const IntFeatureSpace& feature_space() const { - return feature_space_; - } - const IndexMapBiDi& feature_map() const { - return feature_map_; - } + int sparse_size() const { return feature_space_.Size(); } + int compact_size() const { return compact_size_; } + const IntFeatureSpace& feature_space() const { return feature_space_; } + const IndexMapBiDi& feature_map() const { return feature_map_; } // Pseudo-accessors. int IndexFeature(const INT_FEATURE_STRUCT& f) const; diff --git a/src/classify/intfeaturespace.cpp b/src/classify/intfeaturespace.cpp index cf1a14e0bd..068e004d75 100644 --- a/src/classify/intfeaturespace.cpp +++ b/src/classify/intfeaturespace.cpp @@ -23,10 +23,10 @@ namespace tesseract { IntFeatureSpace::IntFeatureSpace() - : x_buckets_(0), y_buckets_(0), theta_buckets_(0) { -} + : x_buckets_(0), y_buckets_(0), theta_buckets_(0) {} -void IntFeatureSpace::Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets) { +void IntFeatureSpace::Init(uint8_t xbuckets, uint8_t ybuckets, + uint8_t thetabuckets) { x_buckets_ = xbuckets; y_buckets_ = ybuckets; theta_buckets_ = thetabuckets; @@ -35,12 +35,9 @@ void IntFeatureSpace::Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuck // Serializes the feature space definition to the given file. // Returns false on error. bool IntFeatureSpace::Serialize(FILE* fp) const { - if (fwrite(&x_buckets_, sizeof(x_buckets_), 1, fp) != 1) - return false; - if (fwrite(&y_buckets_, sizeof(y_buckets_), 1, fp) != 1) - return false; - if (fwrite(&theta_buckets_, sizeof(theta_buckets_), 1, fp) != 1) - return false; + if (fwrite(&x_buckets_, sizeof(x_buckets_), 1, fp) != 1) return false; + if (fwrite(&y_buckets_, sizeof(y_buckets_), 1, fp) != 1) return false; + if (fwrite(&theta_buckets_, sizeof(theta_buckets_), 1, fp) != 1) return false; return true; } @@ -48,18 +45,16 @@ bool IntFeatureSpace::Serialize(FILE* fp) const { // If swap is true, the data is big/little-endian swapped. // Returns false on error. bool IntFeatureSpace::DeSerialize(bool swap, FILE* fp) { - if (fread(&x_buckets_, sizeof(x_buckets_), 1, fp) != 1) - return false; - if (fread(&y_buckets_, sizeof(y_buckets_), 1, fp) != 1) - return false; - if (fread(&theta_buckets_, sizeof(theta_buckets_), 1, fp) != 1) - return false; + if (fread(&x_buckets_, sizeof(x_buckets_), 1, fp) != 1) return false; + if (fread(&y_buckets_, sizeof(y_buckets_), 1, fp) != 1) return false; + if (fread(&theta_buckets_, sizeof(theta_buckets_), 1, fp) != 1) return false; return true; } // Returns an INT_FEATURE_STRUCT corresponding to the given index. // This is the inverse of the Index member. -INT_FEATURE_STRUCT IntFeatureSpace::PositionFromIndex(int index) const { +INT_FEATURE_STRUCT +IntFeatureSpace::PositionFromIndex(int index) const { return PositionFromBuckets(index / (y_buckets_ * theta_buckets_), index / theta_buckets_ % y_buckets_, index % theta_buckets_); @@ -101,8 +96,8 @@ int IntFeatureSpace::XYToFeatureIndex(int x, int y) const { return -1; } feature = PositionFromIndex(index); - tprintf("Click at (%d, %d) ->(%d, %d), ->(%d, %d)\n", - x, y, feature.X, feature.Y, x - feature.X, y - feature.Y); + tprintf("Click at (%d, %d) ->(%d, %d), ->(%d, %d)\n", x, y, feature.X, + feature.Y, x - feature.X, y - feature.Y); // Get the relative position of x,y from the rounded feature. x -= feature.X; y -= feature.Y; @@ -123,9 +118,8 @@ int IntFeatureSpace::XYToFeatureIndex(int x, int y) const { } // Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords. -INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x, - int y, - int theta) const { +INT_FEATURE_STRUCT +IntFeatureSpace::PositionFromBuckets(int x, int y, int theta) const { INT_FEATURE_STRUCT pos( (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_, (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_, diff --git a/src/classify/intfeaturespace.h b/src/classify/intfeaturespace.h index ad81527895..4f652a9ac2 100644 --- a/src/classify/intfeaturespace.h +++ b/src/classify/intfeaturespace.h @@ -64,7 +64,7 @@ class IntFeatureSpace { // Range is [0, Size()-1]. Inverse of PositionFromIndex member. int Index(const INT_FEATURE_STRUCT& f) const { return (XBucket(f.X) * y_buckets_ + YBucket(f.Y)) * theta_buckets_ + - ThetaBucket(f.Theta); + ThetaBucket(f.Theta); } // Bulk calls to Index. Maps the given array of features to a vector of // int32_t indices in the same order as the input. diff --git a/src/classify/intfx.cpp b/src/classify/intfx.cpp index 75a07a7886..ca29eecf54 100644 --- a/src/classify/intfx.cpp +++ b/src/classify/intfx.cpp @@ -47,7 +47,6 @@ static float sin_table[INT_CHAR_NORM_RANGE]; // Guards write access to AtanTable so we don't create it more than once. tesseract::CCUtilMutex atan_table_mutex; - /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ @@ -67,7 +66,8 @@ void InitIntegerFX() { // Returns a vector representing the direction of a feature with the given // theta direction in an INT_FEATURE_STRUCT. -FCOORD FeatureDirection(uint8_t theta) { +FCOORD +FeatureDirection(uint8_t theta) { return FCOORD(cos_table[theta], sin_table[theta]); } @@ -82,8 +82,8 @@ TrainingSample* BlobToTrainingSample( const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, GenericVector* bl_features) { GenericVector cn_features; - Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, - &cn_features, fx_info, nullptr); + Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, &cn_features, + fx_info, nullptr); // TODO(rays) Use blob->PreciseBoundingBox() instead. TBOX box = blob.bounding_box(); TrainingSample* sample = nullptr; @@ -145,31 +145,29 @@ void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, fx_info->Ymean = IntCastRounded(center.y()); } // Setup the denorm for Baseline normalization. - bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), 128.0f, - 1.0f, 1.0f, 128.0f, 128.0f); + bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), + 128.0f, 1.0f, 1.0f, 128.0f, 128.0f); // Setup the denorm for character normalization. if (nonlinear_norm) { - GenericVector > x_coords; - GenericVector > y_coords; + GenericVector> x_coords; + GenericVector> y_coords; TBOX box; blob.GetPreciseBoundingBox(&box); box.pad(1, 1); blob.GetEdgeCoords(box, &x_coords, &y_coords); - cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX, - 0.0f, 0.0f, x_coords, y_coords); + cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX, 0.0f, + 0.0f, x_coords, y_coords); } else { - cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), - center.x(), center.y(), - 51.2f / second_moments.x(), - 51.2f / second_moments.y(), - 128.0f, 128.0f); + cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), + center.y(), 51.2f / second_moments.x(), + 51.2f / second_moments.y(), 128.0f, 128.0f); } } // Helper normalizes the direction, assuming that it is at the given // unnormed_pos, using the given denorm, starting at the root_denorm. uint8_t NormalizeDirection(uint8_t dir, const FCOORD& unnormed_pos, - const DENORM& denorm, const DENORM* root_denorm) { + const DENORM& denorm, const DENORM* root_denorm) { // Convert direction to a vector. FCOORD unnormed_end; unnormed_end.from_direction(dir); @@ -216,10 +214,8 @@ static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs, FCOORD fit_vector2 = !fit_vector; // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by // insisting that the scalar product with the feature_dir should be +ve. - if (fit_vector % feature_dir < 0.0) - fit_vector = -fit_vector; - if (fit_vector2 % feature_dir < 0.0) - fit_vector2 = -fit_vector2; + if (fit_vector % feature_dir < 0.0) fit_vector = -fit_vector; + if (fit_vector2 % feature_dir < 0.0) fit_vector2 = -fit_vector2; // Even though fit_vector2 has a higher mean squared error, it might be // a better fit, so use it if the dot product with feature_dir is bigger. if (fit_vector2 % feature_dir > fit_vector % feature_dir) @@ -272,9 +268,8 @@ static int ComputeFeatures(const FCOORD& start_pt, const FCOORD& end_pt, // with the least variance. static int GatherPoints(const C_OUTLINE* outline, double feature_length, const DENORM& denorm, const DENORM* root_denorm, - int start_index, int end_index, - ICOORD* pos, FCOORD* pos_normed, - LLSQ* points, LLSQ* dirs) { + int start_index, int end_index, ICOORD* pos, + FCOORD* pos_normed, LLSQ* points, LLSQ* dirs) { int step_length = outline->pathlength(); ICOORD step = outline->step(start_index % step_length); // Prev_normed is the start point of this collection and will be set on the @@ -327,8 +322,8 @@ static int GatherPoints(const C_OUTLINE* outline, double feature_length, // If force_poly is true, the features will be extracted from the polygonal // approximation even if more accurate data is available. static void ExtractFeaturesFromRun( - const EDGEPT* startpt, const EDGEPT* lastpt, - const DENORM& denorm, double feature_length, bool force_poly, + const EDGEPT* startpt, const EDGEPT* lastpt, const DENORM& denorm, + double feature_length, bool force_poly, GenericVector* features) { const EDGEPT* endpt = lastpt->next; const C_OUTLINE* outline = startpt->src_outline; @@ -346,8 +341,7 @@ static void ExtractFeaturesFromRun( // may be beyond the bounds of the outline steps/ due to wrap-around, to // so we use % step_length everywhere, except for start_index. int end_index = lastpt->start_step + lastpt->step_count; - if (end_index <= start_index) - end_index += step_length; + if (end_index <= start_index) end_index += step_length; LLSQ prev_points; LLSQ prev_dirs; FCOORD prev_normed_pos = outline->sub_pixel_pos_at_index(pos, start_index); @@ -355,9 +349,9 @@ static void ExtractFeaturesFromRun( LLSQ points; LLSQ dirs; FCOORD normed_pos; - int index = GatherPoints(outline, feature_length, denorm, root_denorm, - start_index, end_index, &pos, &normed_pos, - &points, &dirs); + int index = + GatherPoints(outline, feature_length, denorm, root_denorm, start_index, + end_index, &pos, &normed_pos, &points, &dirs); while (index <= end_index) { // At each iteration we nominally have 3 accumulated sets of points and // dirs: prev_points/dirs, points/dirs, next_points/dirs and sum them @@ -367,9 +361,9 @@ static void ExtractFeaturesFromRun( LLSQ next_points; LLSQ next_dirs; FCOORD next_normed_pos; - index = GatherPoints(outline, feature_length, denorm, root_denorm, - index, end_index, &pos, &next_normed_pos, - &next_points, &next_dirs); + index = GatherPoints(outline, feature_length, denorm, root_denorm, index, + end_index, &pos, &next_normed_pos, &next_points, + &next_dirs); LLSQ sum_points(prev_points); // TODO(rays) find out why it is better to use just dirs and next_dirs // in sum_dirs, instead of using prev_dirs as well. @@ -387,8 +381,8 @@ static void ExtractFeaturesFromRun( // The segment to which we fit features is the line passing through // fit_pt in direction of fit_vector that starts nearest to // prev_normed_pos and ends nearest to normed_pos. - FCOORD start_pos = prev_normed_pos.nearest_pt_on_line(fit_pt, - fit_vector); + FCOORD start_pos = + prev_normed_pos.nearest_pt_on_line(fit_pt, fit_vector); FCOORD end_pos = normed_pos.nearest_pt_on_line(fit_pt, fit_vector); // Possible correction to match the adjacent polygon segment. if (total_features == 0 && startpt != endpt) { @@ -399,8 +393,8 @@ static void ExtractFeaturesFromRun( FCOORD poly_pos(endpt->pos.x, endpt->pos.y); denorm.LocalNormTransform(poly_pos, &end_pos); } - int num_features = ComputeFeatures(start_pos, end_pos, feature_length, - features); + int num_features = + ComputeFeatures(start_pos, end_pos, feature_length, features); if (num_features > 0) { // We made some features so shuffle the accumulators. prev_points = points; @@ -442,17 +436,15 @@ static void ExtractFeaturesFromRun( // number of cn features generated for each outline in the blob (in order). // Thus after the first outline, there were (*outline_cn_counts)[0] features, // after the second outline, there were (*outline_cn_counts)[1] features etc. -void Classify::ExtractFeatures(const TBLOB& blob, - bool nonlinear_norm, +void Classify::ExtractFeatures(const TBLOB& blob, bool nonlinear_norm, GenericVector* bl_features, GenericVector* cn_features, INT_FX_RESULT_STRUCT* results, GenericVector* outline_cn_counts) { DENORM bl_denorm, cn_denorm; - tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, - &bl_denorm, &cn_denorm, results); - if (outline_cn_counts != nullptr) - outline_cn_counts->truncate(0); + tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, &bl_denorm, + &cn_denorm, results); + if (outline_cn_counts != nullptr) outline_cn_counts->truncate(0); // Iterate the outlines. for (TESSLINE* ol = blob.outlines; ol != nullptr; ol = ol->next) { // Iterate the polygon. diff --git a/src/classify/intfx.h b/src/classify/intfx.h index 0494aae468..94caf80bf3 100644 --- a/src/classify/intfx.h +++ b/src/classify/intfx.h @@ -15,16 +15,16 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef INTFX_H -#define INTFX_H +#ifndef INTFX_H +#define INTFX_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ +#include #include "blobs.h" #include "intproto.h" #include "normalis.h" -#include class DENORM; @@ -33,13 +33,13 @@ class TrainingSample; } struct INT_FX_RESULT_STRUCT { - int32_t Length; // total length of all outlines - int16_t Xmean, Ymean; // center of mass of all outlines - int16_t Rx, Ry; // radius of gyration - int16_t NumBL, NumCN; // number of features extracted - int16_t Width; // Width of blob in BLN coords. - uint8_t YBottom; // Bottom of blob in BLN coords. - uint8_t YTop; // Top of blob in BLN coords. + int32_t Length; // total length of all outlines + int16_t Xmean, Ymean; // center of mass of all outlines + int16_t Rx, Ry; // radius of gyration + int16_t NumBL, NumCN; // number of features extracted + int16_t Width; // Width of blob in BLN coords. + uint8_t YBottom; // Bottom of blob in BLN coords. + uint8_t YTop; // Top of blob in BLN coords. }; // The standard feature length @@ -52,17 +52,18 @@ void InitIntegerFX(); // Returns a vector representing the direction of a feature with the given // theta direction in an INT_FEATURE_STRUCT. -FCOORD FeatureDirection(uint8_t theta); +FCOORD +FeatureDirection(uint8_t theta); namespace tesseract { - // Generates a TrainingSample from a TBLOB. Extracts features and sets - // the bounding box, so classifiers that operate on the image can work. - // TODO(rays) BlobToTrainingSample must remain a global function until - // the FlexFx and FeatureDescription code can be removed and LearnBlob - // made a member of Classify. - TrainingSample* BlobToTrainingSample( - const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, - GenericVector* bl_features); -} +// Generates a TrainingSample from a TBLOB. Extracts features and sets +// the bounding box, so classifiers that operate on the image can work. +// TODO(rays) BlobToTrainingSample must remain a global function until +// the FlexFx and FeatureDescription code can be removed and LearnBlob +// made a member of Classify. +TrainingSample* BlobToTrainingSample( + const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, + GenericVector* bl_features); +} // namespace tesseract #endif diff --git a/src/classify/intmatcher.cpp b/src/classify/intmatcher.cpp index 66933bba12..15f84049e7 100644 --- a/src/classify/intmatcher.cpp +++ b/src/classify/intmatcher.cpp @@ -27,16 +27,16 @@ ----------------------------------------------------------------------------*/ #include "intmatcher.h" -#include "fontinfo.h" -#include "intproto.h" +#include #include "callcpp.h" -#include "scrollview.h" +#include "classify.h" #include "float2int.h" +#include "fontinfo.h" #include "globals.h" #include "helpers.h" -#include "classify.h" +#include "intproto.h" +#include "scrollview.h" #include "shapetable.h" -#include using tesseract::ScoredFont; using tesseract::UnicharRating; @@ -90,7 +90,8 @@ const float IntegerMatcher::kSimilarityCenter = 0.0075; // See http://b/19318793 (#6) for a complete discussion. Merging arrays // offset_table and next_table helps improve performance of PIE code. -static const uint8_t data_table[512] = {offset_table_entries, next_table_entries}; +static const uint8_t data_table[512] = {offset_table_entries, + next_table_entries}; static const uint8_t* const offset_table = &data_table[0]; static const uint8_t* const next_table = @@ -129,10 +130,10 @@ class ClassPruner { } ~ClassPruner() { - delete []class_count_; - delete []norm_count_; - delete []sort_key_; - delete []sort_index_; + delete[] class_count_; + delete[] norm_count_; + delete[] sort_key_; + delete[] sort_index_; } /// Computes the scores for every class in the character set, by summing the @@ -214,7 +215,7 @@ class ClassPruner { if (num_features_ < expected_num_features[class_id]) { int deficit = expected_num_features[class_id] - num_features_; class_count_[class_id] -= class_count_[class_id] * deficit / - (num_features_ * cutoff_strength + deficit); + (num_features_ * cutoff_strength + deficit); } } } @@ -246,7 +247,8 @@ class ClassPruner { void NormalizeForXheight(int norm_multiplier, const uint8_t* normalization_factors) { for (int class_id = 0; class_id < max_classes_; class_id++) { - norm_count_[class_id] = class_count_[class_id] - + norm_count_[class_id] = + class_count_[class_id] - ((norm_multiplier * normalization_factors[class_id]) >> 8); } } @@ -278,21 +280,19 @@ class ClassPruner { // Prune Classes. pruning_threshold_ = (max_count * pruning_factor) >> 8; // Select Classes. - if (pruning_threshold_ < 1) - pruning_threshold_ = 1; + if (pruning_threshold_ < 1) pruning_threshold_ = 1; num_classes_ = 0; for (int class_id = 0; class_id < max_classes_; class_id++) { if (norm_count_[class_id] >= pruning_threshold_ || class_id == keep_this) { - ++num_classes_; + ++num_classes_; sort_index_[num_classes_] = class_id; sort_key_[num_classes_] = norm_count_[class_id]; } } // Sort Classes using Heapsort Algorithm. - if (num_classes_ > 1) - HeapSort(num_classes_, sort_key_, sort_index_); + if (num_classes_ > 1) HeapSort(num_classes_, sort_key_, sort_index_); } /** Prints debug info on the class pruner matches for the pruned classes only. @@ -317,12 +317,13 @@ class ClassPruner { int_templates->ClassPruners[pruner_set]->p[x][y][theta]; for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { uint32_t pruner_word = *pruner_word_ptr++; - for (int word_class = 0; word_class < 16 && - class_id < max_num_classes; ++word_class, ++class_id) { + for (int word_class = 0; + word_class < 16 && class_id < max_num_classes; + ++word_class, ++class_id) { if (norm_count_[class_id] >= pruning_threshold_) { tprintf(" %s=%d,", - classify.ClassIDToDebugStr(int_templates, - class_id, 0).string(), + classify.ClassIDToDebugStr(int_templates, class_id, 0) + .string(), pruner_word & CLASS_PRUNER_CLASS_MASK); } pruner_word >>= NUM_BITS_PER_CLASS; @@ -342,16 +343,15 @@ class ClassPruner { tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_); for (int i = 0; i < num_classes_; ++i) { int class_id = sort_index_[num_classes_ - i]; - STRING class_string = classify.ClassIDToDebugStr(int_templates, - class_id, 0); + STRING class_string = + classify.ClassIDToDebugStr(int_templates, class_id, 0); tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n", - class_string.string(), - class_count_[class_id], + class_string.string(), class_count_[class_id], expected_num_features[class_id], (norm_multiplier * normalization_factors[class_id]) >> 8, sort_key_[num_classes_ - i], 100.0 - 100.0 * sort_key_[num_classes_ - i] / - (CLASS_PRUNER_CLASS_MASK * num_features_)); + (CLASS_PRUNER_CLASS_MASK * num_features_)); } } @@ -362,23 +362,25 @@ class ClassPruner { results->init_to_size(num_classes_, empty); for (int c = 0; c < num_classes_; ++c) { (*results)[c].Class = sort_index_[num_classes_ - c]; - (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] / - (static_cast(CLASS_PRUNER_CLASS_MASK) * num_features_); + (*results)[c].Rating = + 1.0 - + sort_key_[num_classes_ - c] / + (static_cast(CLASS_PRUNER_CLASS_MASK) * num_features_); } return num_classes_; } private: /** Array[rounded_classes_] of initial counts for each class. */ - int *class_count_; + int* class_count_; /// Array[rounded_classes_] of modified counts for each class after /// normalizing for expected number of features, disabled classes, fragments, /// and xheights. - int *norm_count_; + int* norm_count_; /** Array[rounded_classes_ +1] of pruned counts that gets sorted */ - int *sort_key_; + int* sort_key_; /** Array[rounded_classes_ +1] of classes corresponding to sort_key_. */ - int *sort_index_; + int* sort_index_; /** Number of classes in this class pruner. */ int max_classes_; /** Rounded up number of classes used for array sizes. */ @@ -422,8 +424,7 @@ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, pruner.AdjustForExpectedNumFeatures(expected_num_features, classify_cp_cutoff_strength); // Apply disabled classes in unicharset - only works without a shape_table. - if (shape_table_ == nullptr) - pruner.DisableDisabledClasses(unicharset); + if (shape_table_ == nullptr) pruner.DisableDisabledClasses(unicharset); // If fragments are disabled, remove them, also only without a shape table. if (disable_character_fragments && shape_table_ == nullptr) pruner.DisableFragments(unicharset); @@ -472,31 +473,26 @@ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, * @note Exceptions: none * @note History: Tue Feb 19 16:36:23 MST 1991, RWM, Created. */ -void IntegerMatcher::Match(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int16_t NumFeatures, +void IntegerMatcher::Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT* Features, - UnicharRating* Result, - int AdaptFeatureThreshold, - int Debug, - bool SeparateDebugWindows) { - ScratchEvidence *tables = new ScratchEvidence(); + UnicharRating* Result, int AdaptFeatureThreshold, + int Debug, bool SeparateDebugWindows) { + ScratchEvidence* tables = new ScratchEvidence(); int Feature; - if (MatchDebuggingOn (Debug)) - cprintf ("Integer Matcher -------------------------------------------\n"); + if (MatchDebuggingOn(Debug)) + cprintf("Integer Matcher -------------------------------------------\n"); tables->Clear(ClassTemplate); Result->feature_misses = 0; for (Feature = 0; Feature < NumFeatures; Feature++) { - int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, - Feature, &Features[Feature], - tables, Debug); + int csum = + UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, Feature, + &Features[Feature], tables, Debug); // Count features that were missed over all configs. - if (csum == 0) - ++Result->feature_misses; + if (csum == 0) ++Result->feature_misses; } #ifndef GRAPHICS_DISABLED @@ -506,8 +502,8 @@ void IntegerMatcher::Match(INT_CLASS ClassTemplate, } if (DisplayProtoMatchesOn(Debug)) { - DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask, - *tables, SeparateDebugWindows); + DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask, *tables, + SeparateDebugWindows); } if (DisplayFeatureMatchesOn(Debug)) { @@ -523,8 +519,7 @@ void IntegerMatcher::Match(INT_CLASS ClassTemplate, FindBestMatch(ClassTemplate, *tables, Result); #ifndef GRAPHICS_DISABLED - if (PrintMatchSummaryOn(Debug)) - Result->Print(); + if (PrintMatchSummaryOn(Debug)) Result->Print(); if (MatchDebuggingOn(Debug)) cprintf("Match Complete --------------------------------------------\n"); @@ -553,33 +548,27 @@ void IntegerMatcher::Match(INT_CLASS ClassTemplate, * @note Exceptions: none * @note History: Tue Mar 12 17:09:26 MST 1991, RWM, Created */ -int IntegerMatcher::FindGoodProtos( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uint16_t BlobLength, - int16_t NumFeatures, - INT_FEATURE_ARRAY Features, - PROTO_ID *ProtoArray, - int AdaptProtoThreshold, - int Debug) { - ScratchEvidence *tables = new ScratchEvidence(); +int IntegerMatcher::FindGoodProtos(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, + uint16_t BlobLength, int16_t NumFeatures, + INT_FEATURE_ARRAY Features, + PROTO_ID* ProtoArray, + int AdaptProtoThreshold, int Debug) { + ScratchEvidence* tables = new ScratchEvidence(); int NumGoodProtos = 0; /* DEBUG opening heading */ - if (MatchDebuggingOn (Debug)) - cprintf - ("Find Good Protos -------------------------------------------\n"); + if (MatchDebuggingOn(Debug)) + cprintf("Find Good Protos -------------------------------------------\n"); tables->Clear(ClassTemplate); for (int Feature = 0; Feature < NumFeatures; Feature++) - UpdateTablesForFeature( - ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]), - tables, Debug); + UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, Feature, + &(Features[Feature]), tables, Debug); #ifndef GRAPHICS_DISABLED - if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) + if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, NumFeatures, Debug); #endif @@ -601,8 +590,8 @@ int IntegerMatcher::FindGoodProtos( } } - if (MatchDebuggingOn (Debug)) - cprintf ("Match Complete --------------------------------------------\n"); + if (MatchDebuggingOn(Debug)) + cprintf("Match Complete --------------------------------------------\n"); delete tables; return NumGoodProtos; @@ -623,17 +612,13 @@ int IntegerMatcher::FindGoodProtos( * @return Number of bad features in FeatureArray. * @note History: Tue Mar 12 17:09:26 MST 1991, RWM, Created */ -int IntegerMatcher::FindBadFeatures( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uint16_t BlobLength, - int16_t NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_ID *FeatureArray, - int AdaptFeatureThreshold, - int Debug) { - ScratchEvidence *tables = new ScratchEvidence(); +int IntegerMatcher::FindBadFeatures(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, + uint16_t BlobLength, int16_t NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_ID* FeatureArray, + int AdaptFeatureThreshold, int Debug) { + ScratchEvidence* tables = new ScratchEvidence(); int NumBadFeatures = 0; /* DEBUG opening heading */ @@ -643,9 +628,8 @@ int IntegerMatcher::FindBadFeatures( tables->Clear(ClassTemplate); for (int Feature = 0; Feature < NumFeatures; Feature++) { - UpdateTablesForFeature( - ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], - tables, Debug); + UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, Feature, + &Features[Feature], tables, Debug); /* Find Best Evidence for Current Feature */ int best = 0; @@ -674,31 +658,32 @@ int IntegerMatcher::FindBadFeatures( return NumBadFeatures; } - -void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) { +void IntegerMatcher::Init(tesseract::IntParam* classify_debug_level) { classify_debug_level_ = classify_debug_level; /* Initialize table for evidence to similarity lookup */ for (int i = 0; i < SE_TABLE_SIZE; i++) { uint32_t IntSimilarity = i << (27 - SE_TABLE_BITS); - double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0; + double Similarity = ((double)IntSimilarity) / 65536.0 / 65536.0; double evidence = Similarity / kSimilarityCenter; evidence = 255.0 / (evidence * evidence + 1.0); if (kSEExponentialMultiplier > 0.0) { - double scale = 1.0 - exp(-kSEExponentialMultiplier) * - exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE)); + double scale = + 1.0 - exp(-kSEExponentialMultiplier) * + exp(kSEExponentialMultiplier * ((double)i / SE_TABLE_SIZE)); evidence *= ClipToRange(scale, 0.0, 1.0); } - similarity_evidence_table_[i] = (uint8_t) (evidence + 0.5); + similarity_evidence_table_[i] = (uint8_t)(evidence + 0.5); } /* Initialize evidence computation variables */ - evidence_table_mask_ = - ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits); + evidence_table_mask_ = ((1 << kEvidenceTableBits) - 1) + << (9 - kEvidenceTableBits); mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits); - table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1)); + table_trunc_shift_bits_ = + (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1)); evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); } @@ -723,21 +708,19 @@ void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { * @note Exceptions: none * @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. */ -void IMDebugConfiguration(int FeatureNum, - uint16_t ActualProtoNum, - uint8_t Evidence, - BIT_VECTOR ConfigMask, +void IMDebugConfiguration(int FeatureNum, uint16_t ActualProtoNum, + uint8_t Evidence, BIT_VECTOR ConfigMask, uint32_t ConfigWord) { - cprintf ("F = %3d, P = %3d, E = %3d, Configs = ", - FeatureNum, (int) ActualProtoNum, (int) Evidence); + cprintf("F = %3d, P = %3d, E = %3d, Configs = ", FeatureNum, + (int)ActualProtoNum, (int)Evidence); while (ConfigWord) { if (ConfigWord & 1) - cprintf ("1"); + cprintf("1"); else - cprintf ("0"); + cprintf("0"); ConfigWord >>= 1; } - cprintf ("\n"); + cprintf("\n"); } /** @@ -746,8 +729,7 @@ void IMDebugConfiguration(int FeatureNum, * @note Exceptions: none * @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. */ -void IMDebugConfigurationSum(int FeatureNum, - uint8_t *FeatureEvidence, +void IMDebugConfigurationSum(int FeatureNum, uint8_t* FeatureEvidence, int32_t ConfigCount) { cprintf("F=%3d, C=", FeatureNum); for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { @@ -767,14 +749,12 @@ void IMDebugConfigurationSum(int FeatureNum, * @param Debug Debugger flag: 1=debugger on * @return none */ -int IntegerMatcher::UpdateTablesForFeature( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int FeatureNum, - const INT_FEATURE_STRUCT* Feature, - ScratchEvidence *tables, - int Debug) { +int IntegerMatcher::UpdateTablesForFeature(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int FeatureNum, + const INT_FEATURE_STRUCT* Feature, + ScratchEvidence* tables, int Debug) { uint32_t ConfigWord; uint32_t ProtoWord; uint32_t ProtoNum; @@ -785,7 +765,7 @@ int IntegerMatcher::UpdateTablesForFeature( uint8_t config_byte; int32_t config_offset; PROTO_SET ProtoSet; - uint32_t *ProtoPrunerPtr; + uint32_t* ProtoPrunerPtr; INT_PROTO Proto; int ProtoSetIndex; uint8_t Evidence; @@ -809,12 +789,13 @@ int IntegerMatcher::UpdateTablesForFeature( ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1); for (ProtoSetIndex = 0, ActualProtoNum = 0; - ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { + ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ProtoPrunerPtr = (uint32_t *) ((*ProtoSet).ProtoPruner); + ProtoPrunerPtr = (uint32_t*)((*ProtoSet).ProtoPruner); for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET; - ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum += - (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) { + ProtoNum += (PROTOS_PER_PROTO_SET >> 1), + ActualProtoNum += (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, + ProtoPrunerPtr++) { /* Prune Protos of current Proto Set */ ProtoWord = *(ProtoPrunerPtr + XFeatureAddress); ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress); @@ -835,15 +816,13 @@ int IntegerMatcher::UpdateTablesForFeature( proto_byte = next_table[proto_byte]; Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]); ConfigWord = Proto->Configs[0]; - A3 = (((Proto->A * (Feature->X - 128)) << 1) - - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9)); - M3 = - (((int8_t) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1; - - if (A3 < 0) - A3 = ~A3; - if (M3 < 0) - M3 = ~M3; + A3 = (((Proto->A * (Feature->X - 128)) << 1) - + (Proto->B * (Feature->Y - 128)) + (Proto->C << 9)); + M3 = (((int8_t)(Feature->Theta - Proto->Angle)) * kIntThetaFudge) + << 1; + + if (A3 < 0) A3 = ~A3; + if (M3 < 0) M3 = ~M3; A3 >>= mult_trunc_shift_bits_; M3 >>= mult_trunc_shift_bits_; if (static_cast(A3) > evidence_mult_mask_) @@ -858,10 +837,9 @@ int IntegerMatcher::UpdateTablesForFeature( else Evidence = similarity_evidence_table_[A4]; - if (PrintFeatureMatchesOn (Debug)) - IMDebugConfiguration (FeatureNum, - ActualProtoNum + proto_offset, - Evidence, ConfigMask, ConfigWord); + if (PrintFeatureMatchesOn(Debug)) + IMDebugConfiguration(FeatureNum, ActualProtoNum + proto_offset, + Evidence, ConfigMask, ConfigWord); ConfigWord &= *ConfigMask; @@ -880,16 +858,15 @@ int IntegerMatcher::UpdateTablesForFeature( } UINT8Pointer = - &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]); + &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]); for (ProtoIndex = - ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset]; - ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) { + ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset]; + ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) { if (Evidence > *UINT8Pointer) { Temp = *UINT8Pointer; *UINT8Pointer = Evidence; Evidence = Temp; - } - else if (Evidence == 0) + } else if (Evidence == 0) break; } } @@ -920,13 +897,11 @@ int IntegerMatcher::UpdateTablesForFeature( * @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. */ #ifndef GRAPHICS_DISABLED -void IntegerMatcher::DebugFeatureProtoError( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - const ScratchEvidence& tables, - int16_t NumFeatures, - int Debug) { +void IntegerMatcher::DebugFeatureProtoError(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence& tables, + int16_t NumFeatures, int Debug) { FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS]; int ConfigNum; uint32_t ConfigWord; @@ -944,48 +919,42 @@ void IntegerMatcher::DebugFeatureProtoError( cprintf("Feature Error for Configurations:\n"); for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { - cprintf( - " %5.1f", - 100.0 * (1.0 - - (FLOAT32) tables.sum_feature_evidence_[ConfigNum] - / NumFeatures / 256.0)); + cprintf(" %5.1f", + 100.0 * (1.0 - (FLOAT32)tables.sum_feature_evidence_[ConfigNum] / + NumFeatures / 256.0)); } cprintf("\n\n\n"); } - if (PrintMatchSummaryOn (Debug)) { - cprintf ("Proto Mask:\n"); + if (PrintMatchSummaryOn(Debug)) { + cprintf("Proto Mask:\n"); for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; - ProtoSetIndex++) { + ProtoSetIndex++) { ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); - for (ProtoWordNum = 0; ProtoWordNum < 2; - ProtoWordNum++, ProtoMask++) { + for (ProtoWordNum = 0; ProtoWordNum < 2; ProtoWordNum++, ProtoMask++) { ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); - for (ProtoNum = 0; - ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) - && (ActualProtoNum < ClassTemplate->NumProtos)); - ProtoNum++, ActualProtoNum++) - cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1)); - cprintf ("\n"); + for (ProtoNum = 0; ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) && + (ActualProtoNum < ClassTemplate->NumProtos)); + ProtoNum++, ActualProtoNum++) + cprintf("%1d", (((*ProtoMask) >> ProtoNum) & 1)); + cprintf("\n"); } } - cprintf ("\n"); + cprintf("\n"); } - for (int i = 0; i < ClassTemplate->NumConfigs; i++) - ProtoConfigs[i] = 0; + for (int i = 0; i < ClassTemplate->NumConfigs; i++) ProtoConfigs[i] = 0; - if (PrintProtoMatchesOn (Debug)) { - cprintf ("Proto Evidence:\n"); + if (PrintProtoMatchesOn(Debug)) { + cprintf("Proto Evidence:\n"); for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; - ProtoSetIndex++) { + ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); - for (ProtoNum = 0; - ((ProtoNum < PROTOS_PER_PROTO_SET) && - (ActualProtoNum < ClassTemplate->NumProtos)); + for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && + (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) { - cprintf ("P %3d =", ActualProtoNum); + cprintf("P %3d =", ActualProtoNum); int temp = 0; for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) { uint8_t data = tables.proto_evidence_[ActualProtoNum][j]; @@ -999,9 +968,8 @@ void IntegerMatcher::DebugFeatureProtoError( ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; ConfigNum = 0; while (ConfigWord) { - cprintf ("%5d", ConfigWord & 1 ? temp : 0); - if (ConfigWord & 1) - ProtoConfigs[ConfigNum] += temp; + cprintf("%5d", ConfigWord & 1 ? temp : 0); + if (ConfigWord & 1) ProtoConfigs[ConfigNum] += temp; ConfigNum++; ConfigWord >>= 1; } @@ -1010,37 +978,34 @@ void IntegerMatcher::DebugFeatureProtoError( } } - if (PrintMatchSummaryOn (Debug)) { - cprintf ("Proto Error for Configurations:\n"); + if (PrintMatchSummaryOn(Debug)) { + cprintf("Proto Error for Configurations:\n"); for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) - cprintf (" %5.1f", - 100.0 * (1.0 - - ProtoConfigs[ConfigNum] / - ClassTemplate->ConfigLengths[ConfigNum] / 256.0)); - cprintf ("\n\n"); + cprintf( + " %5.1f", + 100.0 * (1.0 - ProtoConfigs[ConfigNum] / + ClassTemplate->ConfigLengths[ConfigNum] / 256.0)); + cprintf("\n\n"); } - if (PrintProtoMatchesOn (Debug)) { - cprintf ("Proto Sum for Configurations:\n"); + if (PrintProtoMatchesOn(Debug)) { + cprintf("Proto Sum for Configurations:\n"); for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) - cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0); - cprintf ("\n\n"); + cprintf(" %4.1f", ProtoConfigs[ConfigNum] / 256.0); + cprintf("\n\n"); - cprintf ("Proto Length for Configurations:\n"); + cprintf("Proto Length for Configurations:\n"); for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) - cprintf (" %4.1f", - (float) ClassTemplate->ConfigLengths[ConfigNum]); - cprintf ("\n\n"); + cprintf(" %4.1f", (float)ClassTemplate->ConfigLengths[ConfigNum]); + cprintf("\n\n"); } - } -void IntegerMatcher::DisplayProtoDebugInfo( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - const ScratchEvidence& tables, - bool SeparateDebugWindows) { +void IntegerMatcher::DisplayProtoDebugInfo(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence& tables, + bool SeparateDebugWindows) { uint16_t ProtoNum; uint16_t ActualProtoNum; PROTO_SET ProtoSet; @@ -1052,14 +1017,12 @@ void IntegerMatcher::DisplayProtoDebugInfo( InitProtoDisplayWindowIfReqd(); } - for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; - for (ProtoNum = 0; - ((ProtoNum < PROTOS_PER_PROTO_SET) && - (ActualProtoNum < ClassTemplate->NumProtos)); + for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && + (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) { /* Compute Average for Actual Proto */ int temp = 0; @@ -1075,17 +1038,11 @@ void IntegerMatcher::DisplayProtoDebugInfo( } } - void IntegerMatcher::DisplayFeatureDebugInfo( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int16_t NumFeatures, - const INT_FEATURE_STRUCT* Features, - int AdaptFeatureThreshold, - int Debug, - bool SeparateDebugWindows) { - ScratchEvidence *tables = new ScratchEvidence(); + INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, + int16_t NumFeatures, const INT_FEATURE_STRUCT* Features, + int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows) { + ScratchEvidence* tables = new ScratchEvidence(); tables->Clear(ClassTemplate); @@ -1096,9 +1053,8 @@ void IntegerMatcher::DisplayFeatureDebugInfo( } for (int Feature = 0; Feature < NumFeatures; Feature++) { - UpdateTablesForFeature( - ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], - tables, 0); + UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, Feature, + &Features[Feature], tables, 0); /* Find Best Evidence for Current Feature */ int best = 0; @@ -1124,10 +1080,10 @@ void IntegerMatcher::DisplayFeatureDebugInfo( /** * Add sum of Proto Evidences into Sum Of Feature Evidence Array */ -void ScratchEvidence::UpdateSumOfProtoEvidences( - INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, int16_t NumFeatures) { - - int *IntPointer; +void ScratchEvidence::UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, + BIT_VECTOR ConfigMask, + int16_t NumFeatures) { + int* IntPointer; uint32_t ConfigWord; int ProtoSetIndex; uint16_t ProtoNum; @@ -1146,14 +1102,13 @@ void ScratchEvidence::UpdateSumOfProtoEvidences( ProtoNum++, ActualProtoNum++) { int temp = 0; for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) - temp += proto_evidence_[ActualProtoNum] [i]; + temp += proto_evidence_[ActualProtoNum][i]; ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; ConfigWord &= *ConfigMask; IntPointer = sum_feature_evidence_; while (ConfigWord) { - if (ConfigWord & 1) - *IntPointer += temp; + if (ConfigWord & 1) *IntPointer += temp; IntPointer++; ConfigWord >>= 1; } @@ -1165,12 +1120,12 @@ void ScratchEvidence::UpdateSumOfProtoEvidences( * Normalize Sum of Proto and Feature Evidence by dividing by the sum of * the Feature Lengths and the Proto Lengths for each configuration. */ -void ScratchEvidence::NormalizeSums( - INT_CLASS ClassTemplate, int16_t NumFeatures, int32_t used_features) { - +void ScratchEvidence::NormalizeSums(INT_CLASS ClassTemplate, + int16_t NumFeatures, + int32_t used_features) { for (int i = 0; i < ClassTemplate->NumConfigs; i++) { sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) / - (NumFeatures + ClassTemplate->ConfigLengths[i]); + (NumFeatures + ClassTemplate->ConfigLengths[i]); } } @@ -1181,10 +1136,9 @@ void ScratchEvidence::NormalizeSums( * @note Exceptions: none * @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. */ -int IntegerMatcher::FindBestMatch( - INT_CLASS class_template, - const ScratchEvidence &tables, - UnicharRating* result) { +int IntegerMatcher::FindBestMatch(INT_CLASS class_template, + const ScratchEvidence& tables, + UnicharRating* result) { int best_match = 0; result->config = 0; result->fonts.truncate(0); @@ -1217,7 +1171,7 @@ float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, int matcher_multiplier) { return (rating * blob_length + matcher_multiplier * normalization_factor / 256.0) / - (blob_length + matcher_multiplier); + (blob_length + matcher_multiplier); } /** @@ -1231,8 +1185,7 @@ float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, * @note Exceptions: none * @note History: Tue Feb 19 10:24:24 MST 1991, RWM, Created. */ -void -HeapSort (int n, int ra[], int rb[]) { +void HeapSort(int n, int ra[], int rb[]) { int i, rra, rrb; int l, j, ir; @@ -1242,8 +1195,7 @@ HeapSort (int n, int ra[], int rb[]) { if (l > 1) { rra = ra[--l]; rrb = rb[l]; - } - else { + } else { rra = ra[ir]; rrb = rb[ir]; ra[ir] = ra[1]; @@ -1257,14 +1209,12 @@ HeapSort (int n, int ra[], int rb[]) { i = l; j = l << 1; while (j <= ir) { - if (j < ir && ra[j] < ra[j + 1]) - ++j; + if (j < ir && ra[j] < ra[j + 1]) ++j; if (rra < ra[j]) { ra[i] = ra[j]; rb[i] = rb[j]; j += (i = j); - } - else + } else j = ir + 1; } ra[i] = rra; diff --git a/src/classify/intmatcher.h b/src/classify/intmatcher.h index fac195f5b9..c410ccf745 100644 --- a/src/classify/intmatcher.h +++ b/src/classify/intmatcher.h @@ -15,8 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef INTMATCHER_H -#define INTMATCHER_H +#ifndef INTMATCHER_H +#define INTMATCHER_H #include "params.h" @@ -31,12 +31,11 @@ extern BOOL_VAR_H(disable_character_fragments, FALSE, extern INT_VAR_H(classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: "); - /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "intproto.h" #include "cutoffs.h" +#include "intproto.h" namespace tesseract { struct UnicharRating; @@ -63,8 +62,8 @@ extern INT_VAR_H(classify_adapt_feature_thresh, 230, Public Function Prototypes ----------------------------------------------------------------------------**/ -#define SE_TABLE_BITS 9 -#define SE_TABLE_SIZE 512 +#define SE_TABLE_BITS 9 +#define SE_TABLE_SIZE 512 struct ScratchEvidence { uint8_t feature_evidence_[MAX_NUM_CONFIGS]; @@ -75,11 +74,10 @@ struct ScratchEvidence { void ClearFeatureEvidence(const INT_CLASS class_template); void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures, int32_t used_features); - void UpdateSumOfProtoEvidences( - INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, int16_t NumFeatures); + void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, + int16_t NumFeatures); }; - class IntegerMatcher { public: // Integer Matcher Theta Fudge (0-255). @@ -95,108 +93,77 @@ class IntegerMatcher { IntegerMatcher() : classify_debug_level_(nullptr) {} - void Init(tesseract::IntParam *classify_debug_level); + void Init(tesseract::IntParam* classify_debug_level); - void Match(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int16_t NumFeatures, + void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT* Features, - tesseract::UnicharRating* Result, - int AdaptFeatureThreshold, - int Debug, - bool SeparateDebugWindows); + tesseract::UnicharRating* Result, int AdaptFeatureThreshold, + int Debug, bool SeparateDebugWindows); // Applies the CN normalization factor to the given rating and returns // the modified rating. float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier); - int FindGoodProtos(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uint16_t BlobLength, - int16_t NumFeatures, - INT_FEATURE_ARRAY Features, - PROTO_ID *ProtoArray, - int AdaptProtoThreshold, - int Debug); - - int FindBadFeatures(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uint16_t BlobLength, - int16_t NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_ID *FeatureArray, - int AdaptFeatureThreshold, + int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, uint16_t BlobLength, + int16_t NumFeatures, INT_FEATURE_ARRAY Features, + PROTO_ID* ProtoArray, int AdaptProtoThreshold, int Debug); + + int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, uint16_t BlobLength, + int16_t NumFeatures, INT_FEATURE_ARRAY Features, + FEATURE_ID* FeatureArray, int AdaptFeatureThreshold, int Debug); private: - int UpdateTablesForFeature( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int FeatureNum, - const INT_FEATURE_STRUCT* Feature, - ScratchEvidence *evidence, - int Debug); - - int FindBestMatch(INT_CLASS ClassTemplate, - const ScratchEvidence &tables, + int UpdateTablesForFeature(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, int FeatureNum, + const INT_FEATURE_STRUCT* Feature, + ScratchEvidence* evidence, int Debug); + + int FindBestMatch(INT_CLASS ClassTemplate, const ScratchEvidence& tables, tesseract::UnicharRating* Result); #ifndef GRAPHICS_DISABLED - void DebugFeatureProtoError( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - const ScratchEvidence &tables, - int16_t NumFeatures, - int Debug); - - void DisplayProtoDebugInfo( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - const ScratchEvidence &tables, - bool SeparateDebugWindows); - - void DisplayFeatureDebugInfo( - INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - int16_t NumFeatures, - const INT_FEATURE_STRUCT* Features, - int AdaptFeatureThreshold, - int Debug, - bool SeparateDebugWindows); + void DebugFeatureProtoError(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence& tables, + int16_t NumFeatures, int Debug); + + void DisplayProtoDebugInfo(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence& tables, + bool SeparateDebugWindows); + + void DisplayFeatureDebugInfo(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, int16_t NumFeatures, + const INT_FEATURE_STRUCT* Features, + int AdaptFeatureThreshold, int Debug, + bool SeparateDebugWindows); #endif - private: uint8_t similarity_evidence_table_[SE_TABLE_SIZE]; uint32_t evidence_table_mask_; uint32_t mult_trunc_shift_bits_; uint32_t table_trunc_shift_bits_; - tesseract::IntParam *classify_debug_level_; + tesseract::IntParam* classify_debug_level_; uint32_t evidence_mult_mask_; }; /**---------------------------------------------------------------------------- Private Function Prototypes ----------------------------------------------------------------------------**/ -void IMDebugConfiguration(INT_FEATURE FeatureNum, - uint16_t ActualProtoNum, - uint8_t Evidence, - BIT_VECTOR ConfigMask, +void IMDebugConfiguration(INT_FEATURE FeatureNum, uint16_t ActualProtoNum, + uint8_t Evidence, BIT_VECTOR ConfigMask, uint32_t ConfigWord); -void IMDebugConfigurationSum(INT_FEATURE FeatureNum, - uint8_t *FeatureEvidence, +void IMDebugConfigurationSum(INT_FEATURE FeatureNum, uint8_t* FeatureEvidence, int32_t ConfigCount); -void HeapSort (int n, int ra[], int rb[]); +void HeapSort(int n, int ra[], int rb[]); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/src/classify/intproto.cpp b/src/classify/intproto.cpp index ced8c57983..536dcda465 100644 --- a/src/classify/intproto.cpp +++ b/src/classify/intproto.cpp @@ -20,9 +20,9 @@ -----------------------------------------------------------------------------*/ #include +#include #include #include -#include #ifdef __UNIX__ #include #endif @@ -50,43 +50,38 @@ using tesseract::FontSet; /* match debug display constants*/ -#define PROTO_PRUNER_SCALE (4.0) +#define PROTO_PRUNER_SCALE (4.0) -#define INT_DESCENDER (0.0 * INT_CHAR_NORM_RANGE) -#define INT_BASELINE (0.25 * INT_CHAR_NORM_RANGE) +#define INT_DESCENDER (0.0 * INT_CHAR_NORM_RANGE) +#define INT_BASELINE (0.25 * INT_CHAR_NORM_RANGE) #define INT_XHEIGHT (0.75 * INT_CHAR_NORM_RANGE) -#define INT_CAPHEIGHT (1.0 * INT_CHAR_NORM_RANGE) +#define INT_CAPHEIGHT (1.0 * INT_CHAR_NORM_RANGE) -#define INT_XCENTER (0.5 * INT_CHAR_NORM_RANGE) -#define INT_YCENTER (0.5 * INT_CHAR_NORM_RANGE) -#define INT_XRADIUS (0.2 * INT_CHAR_NORM_RANGE) -#define INT_YRADIUS (0.2 * INT_CHAR_NORM_RANGE) +#define INT_XCENTER (0.5 * INT_CHAR_NORM_RANGE) +#define INT_YCENTER (0.5 * INT_CHAR_NORM_RANGE) +#define INT_XRADIUS (0.2 * INT_CHAR_NORM_RANGE) +#define INT_YRADIUS (0.2 * INT_CHAR_NORM_RANGE) #define INT_MIN_X 0 #define INT_MIN_Y 0 #define INT_MAX_X INT_CHAR_NORM_RANGE #define INT_MAX_Y INT_CHAR_NORM_RANGE /** define pad used to snap near horiz/vertical protos to horiz/vertical */ -#define HV_TOLERANCE (0.0025) /* approx 0.9 degrees */ +#define HV_TOLERANCE (0.0025) /* approx 0.9 degrees */ -typedef enum -{ StartSwitch, EndSwitch, LastSwitch } -SWITCH_TYPE; -#define MAX_NUM_SWITCHES 3 +typedef enum { StartSwitch, EndSwitch, LastSwitch } SWITCH_TYPE; +#define MAX_NUM_SWITCHES 3 -typedef struct -{ +typedef struct { SWITCH_TYPE Type; int8_t X, Y; int16_t YInit; int16_t Delta; } - FILL_SWITCH; -typedef struct -{ +typedef struct { uint8_t NextSwitch; uint8_t AngleStart, AngleEnd; int8_t X; @@ -95,91 +90,79 @@ typedef struct FILL_SWITCH Switch[MAX_NUM_SWITCHES]; } - TABLE_FILLER; -typedef struct -{ +typedef struct { int8_t X; int8_t YStart, YEnd; uint8_t AngleStart, AngleEnd; } - FILL_SPEC; - /* constants for conversion from old inttemp format */ -#define OLD_MAX_NUM_CONFIGS 32 -#define OLD_WERDS_PER_CONFIG_VEC ((OLD_MAX_NUM_CONFIGS + BITS_PER_WERD - 1) /\ - BITS_PER_WERD) +#define OLD_MAX_NUM_CONFIGS 32 +#define OLD_WERDS_PER_CONFIG_VEC \ + ((OLD_MAX_NUM_CONFIGS + BITS_PER_WERD - 1) / BITS_PER_WERD) /*----------------------------------------------------------------------------- Macros -----------------------------------------------------------------------------*/ /** macro for performing circular increments of bucket indices */ -#define CircularIncrement(i,r) (((i) < (r) - 1)?((i)++):((i) = 0)) +#define CircularIncrement(i, r) (((i) < (r)-1) ? ((i)++) : ((i) = 0)) /** macro for mapping floats to ints without bounds checking */ -#define MapParam(P,O,N) (floor (((P) + (O)) * (N))) +#define MapParam(P, O, N) (floor(((P) + (O)) * (N))) /*--------------------------------------------------------------------------- Private Function Prototypes ----------------------------------------------------------------------------*/ -FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets); +FLOAT32 +BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets); -FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets); +FLOAT32 +BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets); -void DoFill(FILL_SPEC *FillSpec, - CLASS_PRUNER_STRUCT* Pruner, - uint32_t ClassMask, - uint32_t ClassCount, - uint32_t WordIndex); +void DoFill(FILL_SPEC* FillSpec, CLASS_PRUNER_STRUCT* Pruner, + uint32_t ClassMask, uint32_t ClassCount, uint32_t WordIndex); bool FillerDone(TABLE_FILLER* Filler); -void FillPPCircularBits(uint32_t - ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug); +void FillPPCircularBits( + uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, + FLOAT32 Center, FLOAT32 Spread, bool debug); void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug); -void GetCPPadsForLevel(int Level, - FLOAT32 *EndPad, - FLOAT32 *SidePad, - FLOAT32 *AnglePad); +void GetCPPadsForLevel(int Level, FLOAT32* EndPad, FLOAT32* SidePad, + FLOAT32* AnglePad); ScrollView::Color GetMatchColorFor(FLOAT32 Evidence); -void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill); +void GetNextFill(TABLE_FILLER* Filler, FILL_SPEC* Fill); -void InitTableFiller(FLOAT32 EndPad, - FLOAT32 SidePad, - FLOAT32 AnglePad, - PROTO Proto, - TABLE_FILLER *Filler); +void InitTableFiller(FLOAT32 EndPad, FLOAT32 SidePad, FLOAT32 AnglePad, + PROTO Proto, TABLE_FILLER* Filler); #ifndef GRAPHICS_DISABLED -void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, +void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature, ScrollView::Color color); -void RenderIntProto(ScrollView *window, - INT_CLASS Class, - PROTO_ID ProtoId, +void RenderIntProto(ScrollView* window, INT_CLASS Class, PROTO_ID ProtoId, ScrollView::Color color); #endif // GRAPHICS_DISABLED -int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id); +int TruncateParam(FLOAT32 Param, int Min, int Max, char* Id); /*----------------------------------------------------------------------------- Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ /* global display lists used to display proto and feature match information*/ -ScrollView *IntMatchWindow = nullptr; -ScrollView *FeatureDisplayWindow = nullptr; -ScrollView *ProtoDisplayWindow = nullptr; +ScrollView* IntMatchWindow = nullptr; +ScrollView* FeatureDisplayWindow = nullptr; +ScrollView* ProtoDisplayWindow = nullptr; /*----------------------------------------------------------------------------- Variables @@ -187,12 +170,9 @@ ScrollView *ProtoDisplayWindow = nullptr; /* control knobs */ INT_VAR(classify_num_cp_levels, 3, "Number of Class Pruner Levels"); -double_VAR(classify_cp_angle_pad_loose, 45.0, - "Class Pruner Angle Pad Loose"); -double_VAR(classify_cp_angle_pad_medium, 20.0, - "Class Pruner Angle Pad Medium"); -double_VAR(classify_cp_angle_pad_tight, 10.0, - "CLass Pruner Angle Pad Tight"); +double_VAR(classify_cp_angle_pad_loose, 45.0, "Class Pruner Angle Pad Loose"); +double_VAR(classify_cp_angle_pad_medium, 20.0, "Class Pruner Angle Pad Medium"); +double_VAR(classify_cp_angle_pad_tight, 10.0, "CLass Pruner Angle Pad Tight"); double_VAR(classify_cp_end_pad_loose, 0.5, "Class Pruner End Pad Loose"); double_VAR(classify_cp_end_pad_medium, 0.5, "Class Pruner End Pad Medium"); double_VAR(classify_cp_end_pad_tight, 0.5, "Class Pruner End Pad Tight"); @@ -209,18 +189,16 @@ double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad"); /// Builds a feature from an FCOORD for position with all the necessary /// clipping and rounding. INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uint8_t theta) - : X(ClipToRange(static_cast(pos.x() + 0.5), 0, 255)), - Y(ClipToRange(static_cast(pos.y() + 0.5), 0, 255)), - Theta(theta), - CP_misses(0) { -} + : X(ClipToRange(static_cast(pos.x() + 0.5), 0, 255)), + Y(ClipToRange(static_cast(pos.y() + 0.5), 0, 255)), + Theta(theta), + CP_misses(0) {} /** Builds a feature from ints with all the necessary clipping and casting. */ INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta) - : X(static_cast(ClipToRange(x, 0, UINT8_MAX))), - Y(static_cast(ClipToRange(y, 0, UINT8_MAX))), - Theta(static_cast(ClipToRange(theta, 0, UINT8_MAX))), - CP_misses(0) { -} + : X(static_cast(ClipToRange(x, 0, UINT8_MAX))), + Y(static_cast(ClipToRange(y, 0, UINT8_MAX))), + Theta(static_cast(ClipToRange(theta, 0, UINT8_MAX))), + CP_misses(0) {} /** * This routine adds a new class structure to a set of @@ -239,22 +217,21 @@ INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta) void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class) { int Pruner; - assert (LegalClassId (ClassId)); + assert(LegalClassId(ClassId)); if (ClassId != Templates->NumClasses) { fprintf(stderr, "Please make sure that classes are added to templates"); fprintf(stderr, " in increasing order of ClassIds\n"); exit(1); } - ClassForClassId (Templates, ClassId) = Class; + ClassForClassId(Templates, ClassId) = Class; Templates->NumClasses++; - if (Templates->NumClasses > MaxNumClassesIn (Templates)) { + if (Templates->NumClasses > MaxNumClassesIn(Templates)) { Pruner = Templates->NumClassPruners++; Templates->ClassPruners[Pruner] = new CLASS_PRUNER_STRUCT; memset(Templates->ClassPruners[Pruner], 0, sizeof(CLASS_PRUNER_STRUCT)); } -} /* AddIntClass */ - +} /* AddIntClass */ /** * This routine returns the index of the next free config @@ -276,8 +253,7 @@ int AddIntConfig(INT_CLASS Class) { Index = Class->NumConfigs++; Class->ConfigLengths[Index] = 0; return Index; -} /* AddIntConfig */ - +} /* AddIntConfig */ /** * This routine allocates the next free proto in Class and @@ -296,33 +272,32 @@ int AddIntProto(INT_CLASS Class) { int ProtoSetId; PROTO_SET ProtoSet; INT_PROTO Proto; - uint32_t *Word; + uint32_t* Word; - if (Class->NumProtos >= MAX_NUM_PROTOS) - return (NO_PROTO); + if (Class->NumProtos >= MAX_NUM_PROTOS) return (NO_PROTO); Index = Class->NumProtos++; if (Class->NumProtos > MaxNumIntProtosIn(Class)) { ProtoSetId = Class->NumProtoSets++; - ProtoSet = (PROTO_SET) Emalloc(sizeof(PROTO_SET_STRUCT)); + ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT)); Class->ProtoSets[ProtoSetId] = ProtoSet; memset(ProtoSet, 0, sizeof(*ProtoSet)); /* reallocate space for the proto lengths and install in class */ - Class->ProtoLengths = - (uint8_t *)Erealloc(Class->ProtoLengths, - MaxNumIntProtosIn(Class) * sizeof(uint8_t)); + Class->ProtoLengths = (uint8_t*)Erealloc( + Class->ProtoLengths, MaxNumIntProtosIn(Class) * sizeof(uint8_t)); memset(&Class->ProtoLengths[Index], 0, sizeof(*Class->ProtoLengths) * (MaxNumIntProtosIn(Class) - Index)); } /* initialize proto so its length is zero and it isn't in any configs */ Class->ProtoLengths[Index] = 0; - Proto = ProtoForProtoId (Class, Index); - for (Word = Proto->Configs; - Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0); + Proto = ProtoForProtoId(Class, Index); + for (Word = Proto->Configs; Word < Proto->Configs + WERDS_PER_CONFIG_VEC; + *Word++ = 0) + ; return (Index); } @@ -340,9 +315,9 @@ int AddIntProto(INT_CLASS Class) { * @note Exceptions: none * @note History: Wed Feb 13 08:49:54 1991, DSJ, Created. */ -void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, - INT_TEMPLATES Templates) -#define MAX_LEVEL 2 +void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, + INT_TEMPLATES Templates) +#define MAX_LEVEL 2 { CLASS_PRUNER_STRUCT* Pruner; uint32_t ClassMask; @@ -353,21 +328,21 @@ void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, TABLE_FILLER TableFiller; FILL_SPEC FillSpec; - Pruner = CPrunerFor (Templates, ClassId); - WordIndex = CPrunerWordIndexFor (ClassId); - ClassMask = CPrunerMaskFor (MAX_LEVEL, ClassId); + Pruner = CPrunerFor(Templates, ClassId); + WordIndex = CPrunerWordIndexFor(ClassId); + ClassMask = CPrunerMaskFor(MAX_LEVEL, ClassId); for (Level = classify_num_cp_levels - 1; Level >= 0; Level--) { GetCPPadsForLevel(Level, &EndPad, &SidePad, &AnglePad); - ClassCount = CPrunerMaskFor (Level, ClassId); + ClassCount = CPrunerMaskFor(Level, ClassId); InitTableFiller(EndPad, SidePad, AnglePad, Proto, &TableFiller); - while (!FillerDone (&TableFiller)) { + while (!FillerDone(&TableFiller)) { GetNextFill(&TableFiller, &FillSpec); DoFill(&FillSpec, Pruner, ClassMask, ClassCount, WordIndex); } } -} /* AddProtoToClassPruner */ +} /* AddProtoToClassPruner */ /** * This routine updates the proto pruner lookup tables @@ -382,51 +357,48 @@ void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, * @note Exceptions: none * @note History: Fri Feb 8 13:07:19 1991, DSJ, Created. */ -void AddProtoToProtoPruner(PROTO Proto, int ProtoId, - INT_CLASS Class, bool debug) { +void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, + bool debug) { FLOAT32 Angle, X, Y, Length; FLOAT32 Pad; int Index; PROTO_SET ProtoSet; if (ProtoId >= Class->NumProtos) - cprintf("AddProtoToProtoPruner:assert failed: %d < %d", - ProtoId, Class->NumProtos); + cprintf("AddProtoToProtoPruner:assert failed: %d < %d", ProtoId, + Class->NumProtos); assert(ProtoId < Class->NumProtos); - Index = IndexForProto (ProtoId); - ProtoSet = Class->ProtoSets[SetForProto (ProtoId)]; + Index = IndexForProto(ProtoId); + ProtoSet = Class->ProtoSets[SetForProto(ProtoId)]; Angle = Proto->Angle; #ifndef _WIN32 assert(!std::isnan(Angle)); #endif - FillPPCircularBits (ProtoSet->ProtoPruner[PRUNER_ANGLE], Index, - Angle + ANGLE_SHIFT, classify_pp_angle_pad / 360.0, - debug); + FillPPCircularBits(ProtoSet->ProtoPruner[PRUNER_ANGLE], Index, + Angle + ANGLE_SHIFT, classify_pp_angle_pad / 360.0, debug); Angle *= 2.0 * PI; Length = Proto->Length; X = Proto->X + X_SHIFT; - Pad = std::max(fabs (cos (Angle)) * (Length / 2.0 + - classify_pp_end_pad * - GetPicoFeatureLength ()), - fabs (sin (Angle)) * (classify_pp_side_pad * - GetPicoFeatureLength ())); + Pad = std::max( + fabs(cos(Angle)) * + (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()), + fabs(sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength())); FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug); Y = Proto->Y + Y_SHIFT; - Pad = std::max(fabs (sin (Angle)) * (Length / 2.0 + - classify_pp_end_pad * - GetPicoFeatureLength ()), - fabs (cos (Angle)) * (classify_pp_side_pad * - GetPicoFeatureLength ())); + Pad = std::max( + fabs(sin(Angle)) * + (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()), + fabs(cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength())); FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug); -} /* AddProtoToProtoPruner */ +} /* AddProtoToProtoPruner */ /** * Returns a quantized bucket for the given param shifted by offset, @@ -450,8 +422,7 @@ uint16_t Bucket16For(FLOAT32 param, FLOAT32 offset, int num_buckets) { uint8_t CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets) { int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); return static_cast(Modulo(bucket, num_buckets)); -} /* CircBucketFor */ - +} /* CircBucketFor */ #ifndef GRAPHICS_DISABLED /** @@ -466,9 +437,8 @@ uint8_t CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets) { * @note History: Thu Mar 21 15:40:19 1991, DSJ, Created. */ void UpdateMatchDisplay() { - if (IntMatchWindow != nullptr) - IntMatchWindow->Update(); -} /* ClearMatchDisplay */ + if (IntMatchWindow != nullptr) IntMatchWindow->Update(); +} /* ClearMatchDisplay */ #endif /** @@ -490,8 +460,7 @@ void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) { INT_PROTO Proto; int TotalLength; - for (ProtoId = 0, TotalLength = 0; - ProtoId < Class->NumProtos; ProtoId++) { + for (ProtoId = 0, TotalLength = 0; ProtoId < Class->NumProtos; ProtoId++) { if (test_bit(Config, ProtoId)) { Proto = ProtoForProtoId(Class, ProtoId); SET_BIT(Proto->Configs, ConfigId); @@ -499,8 +468,7 @@ void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) { } } Class->ConfigLengths[ConfigId] = TotalLength; -} /* ConvertConfig */ - +} /* ConvertConfig */ namespace tesseract { /** @@ -535,15 +503,15 @@ void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { if (Param < 0 || Param >= 256) P->Angle = 0; else - P->Angle = (uint8_t) Param; + P->Angle = (uint8_t)Param; /* round proto length to nearest integer number of pico-features */ Param = (Proto->Length / GetPicoFeatureLength()) + 0.5; Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, nullptr); if (classify_learning_debug_level >= 2) - cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)", - P->A, P->B, P->C, Class->ProtoLengths[ProtoId]); -} /* ConvertProto */ + cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)", P->A, P->B, P->C, + Class->ProtoLengths[ProtoId]); +} /* ConvertProto */ /** * This routine converts from the old floating point format @@ -555,9 +523,9 @@ void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { * @note Exceptions: none * @note History: Thu Feb 7 14:40:42 1991, DSJ, Created. */ -INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, - const UNICHARSET& - target_unicharset) { +INT_TEMPLATES +Classify::CreateIntTemplates(CLASSES FloatProtos, + const UNICHARSET& target_unicharset) { INT_TEMPLATES IntTemplates; CLASS_TYPE FClass; INT_CLASS IClass; @@ -604,10 +572,9 @@ INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, } } return (IntTemplates); -} /* CreateIntTemplates */ +} /* CreateIntTemplates */ } // namespace tesseract - #ifndef GRAPHICS_DISABLED /** * This routine renders the specified feature into a @@ -621,13 +588,13 @@ INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, * @note Exceptions: none * @note History: Thu Mar 21 14:45:04 1991, DSJ, Created. */ -void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, FLOAT32 Evidence) { +void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) { ScrollView::Color color = GetMatchColorFor(Evidence); RenderIntFeature(IntMatchWindow, Feature, color); if (FeatureDisplayWindow) { RenderIntFeature(FeatureDisplayWindow, Feature, color); } -} /* DisplayIntFeature */ +} /* DisplayIntFeature */ /** * This routine renders the specified proto into a @@ -648,7 +615,7 @@ void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) { if (ProtoDisplayWindow) { RenderIntProto(ProtoDisplayWindow, Class, ProtoId, color); } -} /* DisplayIntProto */ +} /* DisplayIntProto */ #endif /** @@ -662,16 +629,17 @@ void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) { * @note Exceptions: none * @note History: Fri Feb 8 10:51:23 1991, DSJ, Created. */ -INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { +INT_CLASS +NewIntClass(int MaxNumProtos, int MaxNumConfigs) { INT_CLASS Class; PROTO_SET ProtoSet; int i; assert(MaxNumConfigs <= MAX_NUM_CONFIGS); - Class = (INT_CLASS) Emalloc(sizeof(INT_CLASS_STRUCT)); - Class->NumProtoSets = ((MaxNumProtos + PROTOS_PER_PROTO_SET - 1) / - PROTOS_PER_PROTO_SET); + Class = (INT_CLASS)Emalloc(sizeof(INT_CLASS_STRUCT)); + Class->NumProtoSets = + ((MaxNumProtos + PROTOS_PER_PROTO_SET - 1) / PROTOS_PER_PROTO_SET); assert(Class->NumProtoSets <= MAX_NUM_PROTO_SETS); @@ -680,15 +648,15 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { for (i = 0; i < Class->NumProtoSets; i++) { /* allocate space for a proto set, install in class, and initialize */ - ProtoSet = (PROTO_SET) Emalloc(sizeof(PROTO_SET_STRUCT)); + ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT)); memset(ProtoSet, 0, sizeof(*ProtoSet)); Class->ProtoSets[i] = ProtoSet; /* allocate space for the proto lengths and install in class */ } - if (MaxNumIntProtosIn (Class) > 0) { + if (MaxNumIntProtosIn(Class) > 0) { Class->ProtoLengths = - (uint8_t *)Emalloc(MaxNumIntProtosIn (Class) * sizeof (uint8_t)); + (uint8_t*)Emalloc(MaxNumIntProtosIn(Class) * sizeof(uint8_t)); memset(Class->ProtoLengths, 0, MaxNumIntProtosIn(Class) * sizeof(*Class->ProtoLengths)); } else { @@ -698,17 +666,16 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { return (Class); -} /* NewIntClass */ - +} /* NewIntClass */ void free_int_class(INT_CLASS int_class) { int i; for (i = 0; i < int_class->NumProtoSets; i++) { - Efree (int_class->ProtoSets[i]); + Efree(int_class->ProtoSets[i]); } if (int_class->ProtoLengths != nullptr) { - Efree (int_class->ProtoLengths); + Efree(int_class->ProtoLengths); } Efree(int_class); } @@ -721,20 +688,19 @@ void free_int_class(INT_CLASS int_class) { * @note Exceptions: none * @note History: Fri Feb 8 08:38:51 1991, DSJ, Created. */ -INT_TEMPLATES NewIntTemplates() { +INT_TEMPLATES +NewIntTemplates() { INT_TEMPLATES T; int i; - T = (INT_TEMPLATES) Emalloc (sizeof (INT_TEMPLATES_STRUCT)); + T = (INT_TEMPLATES)Emalloc(sizeof(INT_TEMPLATES_STRUCT)); T->NumClasses = 0; T->NumClassPruners = 0; - for (i = 0; i < MAX_NUM_CLASSES; i++) - ClassForClassId (T, i) = nullptr; + for (i = 0; i < MAX_NUM_CLASSES; i++) ClassForClassId(T, i) = nullptr; return (T); -} /* NewIntTemplates */ - +} /* NewIntTemplates */ /*---------------------------------------------------------------------------*/ void free_int_templates(INT_TEMPLATES templates) { @@ -747,7 +713,6 @@ void free_int_templates(INT_TEMPLATES templates) { Efree(templates); } - namespace tesseract { /** * This routine reads a set of integer templates from @@ -759,25 +724,26 @@ namespace tesseract { * @note Exceptions: none * @note History: Wed Feb 27 11:48:46 1991, DSJ, Created. */ -INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { +INT_TEMPLATES +Classify::ReadIntTemplates(TFile* fp) { int i, j, w, x, y, z; int unicharset_size; int version_id = 0; INT_TEMPLATES Templates; CLASS_PRUNER_STRUCT* Pruner; INT_CLASS Class; - uint8_t *Lengths; + uint8_t* Lengths; PROTO_SET ProtoSet; /* variables for conversion from older inttemp formats */ int b, bit_number, last_cp_bit_number, new_b, new_i, new_w; CLASS_ID class_id, max_class_id; - int16_t *IndexFor = new int16_t[MAX_NUM_CLASSES]; - CLASS_ID *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES]; - CLASS_PRUNER_STRUCT **TempClassPruner = + int16_t* IndexFor = new int16_t[MAX_NUM_CLASSES]; + CLASS_ID* ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES]; + CLASS_PRUNER_STRUCT** TempClassPruner = new CLASS_PRUNER_STRUCT*[MAX_NUM_CLASS_PRUNERS]; uint32_t SetBitsForMask = // word with NUM_BITS_PER_CLASS - (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0 + (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0 uint32_t Mask, NewMask, ClassBits; int MaxNumConfigs = MAX_NUM_CONFIGS; int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC; @@ -837,8 +803,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { // Allocate enough class pruners to cover all the class ids. max_class_id = 0; for (i = 0; i < Templates->NumClasses; i++) - if (ClassIdFor[i] > max_class_id) - max_class_id = ClassIdFor[i]; + if (ClassIdFor[i] > max_class_id) max_class_id = ClassIdFor[i]; for (i = 0; i <= CPrunerIdFor(max_class_id); i++) { Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT; memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT)); @@ -851,12 +816,11 @@ INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { for (y = 0; y < NUM_CP_BUCKETS; y++) for (z = 0; z < NUM_CP_BUCKETS; z++) for (w = 0; w < WERDS_PER_CP_VECTOR; w++) { - if (TempClassPruner[i]->p[x][y][z][w] == 0) - continue; + if (TempClassPruner[i]->p[x][y][z][w] == 0) continue; for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) { bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b; if (bit_number > last_cp_bit_number) - break; // the rest of the bits in this word are not used + break; // the rest of the bits in this word are not used class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS]; // Single out NUM_BITS_PER_CLASS bits relating to class_id. Mask = SetBitsForMask << b; @@ -887,7 +851,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { /* then read in each class */ for (i = 0; i < Templates->NumClasses; i++) { /* first read in the high level struct for the class */ - Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT)); + Class = (INT_CLASS)Emalloc(sizeof(INT_CLASS_STRUCT)); if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 || fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 || fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1) @@ -907,15 +871,15 @@ INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { tprintf("Bad read of inttemp!\n"); } if (version_id < 2) { - ClassForClassId (Templates, ClassIdFor[i]) = Class; + ClassForClassId(Templates, ClassIdFor[i]) = Class; } else { - ClassForClassId (Templates, i) = Class; + ClassForClassId(Templates, i) = Class; } /* then read in the proto lengths */ Lengths = nullptr; - if (MaxNumIntProtosIn (Class) > 0) { - Lengths = (uint8_t *)Emalloc(sizeof(uint8_t) * MaxNumIntProtosIn(Class)); + if (MaxNumIntProtosIn(Class) > 0) { + Lengths = (uint8_t*)Emalloc(sizeof(uint8_t) * MaxNumIntProtosIn(Class)); if (fp->FRead(Lengths, sizeof(uint8_t), MaxNumIntProtosIn(Class)) != MaxNumIntProtosIn(Class)) tprintf("Bad read of inttemp!\n"); @@ -956,19 +920,19 @@ INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { if (version_id < 2) { /* add an empty nullptr class with class id 0 */ - assert(UnusedClassIdIn (Templates, 0)); - ClassForClassId (Templates, 0) = NewIntClass (1, 1); - ClassForClassId (Templates, 0)->font_set_id = -1; + assert(UnusedClassIdIn(Templates, 0)); + ClassForClassId(Templates, 0) = NewIntClass(1, 1); + ClassForClassId(Templates, 0)->font_set_id = -1; Templates->NumClasses++; /* make sure the classes are contiguous */ for (i = 0; i < MAX_NUM_CLASSES; i++) { if (i < Templates->NumClasses) { - if (ClassForClassId (Templates, i) == nullptr) { + if (ClassForClassId(Templates, i) == nullptr) { fprintf(stderr, "Non-contiguous class ids in inttemp\n"); exit(1); } } else { - if (ClassForClassId (Templates, i) != nullptr) { + if (ClassForClassId(Templates, i) != nullptr) { fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n", i, Templates->NumClasses); exit(1); @@ -991,8 +955,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { delete[] TempClassPruner; return (Templates); -} /* ReadIntTemplates */ - +} /* ReadIntTemplates */ #ifndef GRAPHICS_DISABLED /** @@ -1017,17 +980,16 @@ void Classify::ShowMatchDisplay() { ClearFeatureSpaceWindow( static_cast(static_cast(classify_norm_method)), IntMatchWindow); - IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, - INT_MAX_X, INT_MAX_Y); + IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, INT_MAX_X, INT_MAX_Y); if (ProtoDisplayWindow) { - ProtoDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, - INT_MAX_X, INT_MAX_Y); + ProtoDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, INT_MAX_X, + INT_MAX_Y); } if (FeatureDisplayWindow) { - FeatureDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, - INT_MAX_X, INT_MAX_Y); + FeatureDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, INT_MAX_X, + INT_MAX_Y); } -} /* ShowMatchDisplay */ +} /* ShowMatchDisplay */ /// Clears the given window and draws the featurespace guides for the /// appropriate normalization method. @@ -1065,7 +1027,7 @@ void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) { * @note Exceptions: none * @note History: Wed Feb 27 11:48:46 1991, DSJ, Created. */ -void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, +void Classify::WriteIntTemplates(FILE* File, INT_TEMPLATES Templates, const UNICHARSET& target_unicharset) { int i, j; INT_CLASS Class; @@ -1073,22 +1035,22 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, int version_id = -5; // When negated by the reader -1 becomes +1 etc. if (Templates->NumClasses != unicharset_size) { - cprintf("Warning: executing WriteIntTemplates() with %d classes in" - " Templates, while target_unicharset size is %d\n", - Templates->NumClasses, unicharset_size); + cprintf( + "Warning: executing WriteIntTemplates() with %d classes in" + " Templates, while target_unicharset size is %d\n", + Templates->NumClasses, unicharset_size); } /* first write the high level template struct */ fwrite(&unicharset_size, sizeof(unicharset_size), 1, File); fwrite(&version_id, sizeof(version_id), 1, File); - fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners), - 1, File); + fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners), 1, + File); fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File); /* then write out the class pruners */ for (i = 0; i < Templates->NumClassPruners; i++) - fwrite(Templates->ClassPruners[i], - sizeof(CLASS_PRUNER_STRUCT), 1, File); + fwrite(Templates->ClassPruners[i], sizeof(CLASS_PRUNER_STRUCT), 1, File); /* then write out each class */ for (i = 0; i < Templates->NumClasses; i++) { @@ -1097,22 +1059,22 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, /* first write out the high level struct for the class */ fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File); fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File); - ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size); + ASSERT_HOST(Class->NumConfigs == + this->fontset_table_.get(Class->font_set_id).size); fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File); for (j = 0; j < Class->NumConfigs; ++j) { fwrite(&Class->ConfigLengths[j], sizeof(uint16_t), 1, File); } /* then write out the proto lengths */ - if (MaxNumIntProtosIn (Class) > 0) { - fwrite ((char *) (Class->ProtoLengths), sizeof (uint8_t), - MaxNumIntProtosIn (Class), File); + if (MaxNumIntProtosIn(Class) > 0) { + fwrite((char*)(Class->ProtoLengths), sizeof(uint8_t), + MaxNumIntProtosIn(Class), File); } /* then write out the proto sets */ for (j = 0; j < Class->NumProtoSets; j++) - fwrite ((char *) Class->ProtoSets[j], - sizeof (PROTO_SET_STRUCT), 1, File); + fwrite((char*)Class->ProtoSets[j], sizeof(PROTO_SET_STRUCT), 1, File); /* then write the fonts info */ fwrite(&Class->font_set_id, sizeof(int), 1, File); @@ -1123,9 +1085,8 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, this->fontinfo_table_.write(File, NewPermanentTessCallback(write_spacing_info)); this->fontset_table_.write(File, NewPermanentTessCallback(write_set)); -} /* WriteIntTemplates */ -} // namespace tesseract - +} /* WriteIntTemplates */ +} // namespace tesseract /*----------------------------------------------------------------------------- Private Code @@ -1143,10 +1104,11 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, * @note Exceptions: none * @note History: Thu Feb 14 13:24:33 1991, DSJ, Created. */ -FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) { - return (((FLOAT32) Bucket / NumBuckets) - Offset); +FLOAT32 +BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) { + return (((FLOAT32)Bucket / NumBuckets) - Offset); -} /* BucketStart */ +} /* BucketStart */ /** * This routine returns the parameter value which @@ -1161,9 +1123,10 @@ FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) { * @note Exceptions: none * @note History: Thu Feb 14 13:24:33 1991, DSJ, Created. */ -FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) { - return (((FLOAT32) (Bucket + 1) / NumBuckets) - Offset); -} /* BucketEnd */ +FLOAT32 +BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) { + return (((FLOAT32)(Bucket + 1) / NumBuckets) - Offset); +} /* BucketEnd */ /** * This routine fills in the section of a class pruner @@ -1179,38 +1142,30 @@ FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) { * @note Exceptions: none * @note History: Tue Feb 19 11:11:29 1991, DSJ, Created. */ -void DoFill(FILL_SPEC *FillSpec, - CLASS_PRUNER_STRUCT* Pruner, - uint32_t ClassMask, - uint32_t ClassCount, - uint32_t WordIndex) { +void DoFill(FILL_SPEC* FillSpec, CLASS_PRUNER_STRUCT* Pruner, + uint32_t ClassMask, uint32_t ClassCount, uint32_t WordIndex) { int X, Y, Angle; uint32_t OldWord; X = FillSpec->X; - if (X < 0) - X = 0; - if (X >= NUM_CP_BUCKETS) - X = NUM_CP_BUCKETS - 1; + if (X < 0) X = 0; + if (X >= NUM_CP_BUCKETS) X = NUM_CP_BUCKETS - 1; - if (FillSpec->YStart < 0) - FillSpec->YStart = 0; - if (FillSpec->YEnd >= NUM_CP_BUCKETS) - FillSpec->YEnd = NUM_CP_BUCKETS - 1; + if (FillSpec->YStart < 0) FillSpec->YStart = 0; + if (FillSpec->YEnd >= NUM_CP_BUCKETS) FillSpec->YEnd = NUM_CP_BUCKETS - 1; for (Y = FillSpec->YStart; Y <= FillSpec->YEnd; Y++) - for (Angle = FillSpec->AngleStart; - TRUE; CircularIncrement (Angle, NUM_CP_BUCKETS)) { + for (Angle = FillSpec->AngleStart; TRUE; + CircularIncrement(Angle, NUM_CP_BUCKETS)) { OldWord = Pruner->p[X][Y][Angle][WordIndex]; if (ClassCount > (OldWord & ClassMask)) { OldWord &= ~ClassMask; OldWord |= ClassCount; Pruner->p[X][Y][Angle][WordIndex] = OldWord; } - if (Angle == FillSpec->AngleEnd) - break; + if (Angle == FillSpec->AngleEnd) break; } -} /* DoFill */ +} /* DoFill */ /** * Return TRUE if the specified table filler is done, i.e. @@ -1222,13 +1177,13 @@ void DoFill(FILL_SPEC *FillSpec, * @note History: Tue Feb 19 10:08:05 1991, DSJ, Created. */ bool FillerDone(TABLE_FILLER* Filler) { - FILL_SWITCH *Next; + FILL_SWITCH* Next; Next = &(Filler->Switch[Filler->NextSwitch]); return Filler->X > Next->X && Next->Type == LastSwitch; -} /* FillerDone */ +} /* FillerDone */ /** * This routine sets Bit in each bit vector whose @@ -1247,30 +1202,27 @@ bool FillerDone(TABLE_FILLER* Filler) { * @note Exceptions: none * @note History: Tue Oct 16 09:26:54 1990, DSJ, Created. */ -void FillPPCircularBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) { +void FillPPCircularBits( + uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, + FLOAT32 Center, FLOAT32 Spread, bool debug) { int i, FirstBucket, LastBucket; - if (Spread > 0.5) - Spread = 0.5; + if (Spread > 0.5) Spread = 0.5; - FirstBucket = (int) floor ((Center - Spread) * NUM_PP_BUCKETS); - if (FirstBucket < 0) - FirstBucket += NUM_PP_BUCKETS; + FirstBucket = (int)floor((Center - Spread) * NUM_PP_BUCKETS); + if (FirstBucket < 0) FirstBucket += NUM_PP_BUCKETS; - LastBucket = (int) floor ((Center + Spread) * NUM_PP_BUCKETS); - if (LastBucket >= NUM_PP_BUCKETS) - LastBucket -= NUM_PP_BUCKETS; + LastBucket = (int)floor((Center + Spread) * NUM_PP_BUCKETS); + if (LastBucket >= NUM_PP_BUCKETS) LastBucket -= NUM_PP_BUCKETS; if (debug) tprintf("Circular fill from %d to %d", FirstBucket, LastBucket); - for (i = FirstBucket; TRUE; CircularIncrement (i, NUM_PP_BUCKETS)) { - SET_BIT (ParamTable[i], Bit); + for (i = FirstBucket; TRUE; CircularIncrement(i, NUM_PP_BUCKETS)) { + SET_BIT(ParamTable[i], Bit); /* exit loop after we have set the bit for the last bucket */ - if (i == LastBucket) - break; + if (i == LastBucket) break; } -} /* FillPPCircularBits */ +} /* FillPPCircularBits */ /** * This routine sets Bit in each bit vector whose @@ -1294,20 +1246,16 @@ void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) { int i, FirstBucket, LastBucket; - FirstBucket = (int) floor ((Center - Spread) * NUM_PP_BUCKETS); - if (FirstBucket < 0) - FirstBucket = 0; + FirstBucket = (int)floor((Center - Spread) * NUM_PP_BUCKETS); + if (FirstBucket < 0) FirstBucket = 0; - LastBucket = (int) floor ((Center + Spread) * NUM_PP_BUCKETS); - if (LastBucket >= NUM_PP_BUCKETS) - LastBucket = NUM_PP_BUCKETS - 1; + LastBucket = (int)floor((Center + Spread) * NUM_PP_BUCKETS); + if (LastBucket >= NUM_PP_BUCKETS) LastBucket = NUM_PP_BUCKETS - 1; if (debug) tprintf("Linear fill from %d to %d", FirstBucket, LastBucket); - for (i = FirstBucket; i <= LastBucket; i++) - SET_BIT (ParamTable[i], Bit); - -} /* FillPPLinearBits */ + for (i = FirstBucket; i <= LastBucket; i++) SET_BIT(ParamTable[i], Bit); +} /* FillPPLinearBits */ /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED @@ -1324,8 +1272,9 @@ namespace tesseract { * @note Exceptions: none * @note History: Thu Mar 21 16:55:13 1991, DSJ, Created. */ -CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on, - bool* pretrained_on, int* shape_id) { +CLASS_ID +Classify::GetClassToDebug(const char* Prompt, bool* adaptive_on, + bool* pretrained_on, int* shape_id) { tprintf("%s\n", Prompt); SVEvent* ev; SVEventType ev_type; @@ -1344,8 +1293,8 @@ CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on, int font_id; shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id, &font_id); - tprintf("Shape %d, first unichar=%d, font=%d\n", - *shape_id, unichar_id, font_id); + tprintf("Shape %d, first unichar=%d, font=%d\n", *shape_id, + unichar_id, font_id); return unichar_id; } tprintf("Shape index '%s' not found in shape table\n", ev->parameter); @@ -1376,15 +1325,14 @@ CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on, } } } else { - tprintf("Char class '%s' not found in unicharset", - ev->parameter); + tprintf("Char class '%s' not found in unicharset", ev->parameter); } } } delete ev; } while (ev_type != SVET_CLICK); return 0; -} /* GetClassToDebug */ +} /* GetClassToDebug */ } // namespace tesseract #endif @@ -1404,39 +1352,36 @@ CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on, * @note Exceptions: none * @note History: Thu Feb 14 08:26:49 1991, DSJ, Created. */ -void GetCPPadsForLevel(int Level, - FLOAT32 *EndPad, - FLOAT32 *SidePad, - FLOAT32 *AnglePad) { +void GetCPPadsForLevel(int Level, FLOAT32* EndPad, FLOAT32* SidePad, + FLOAT32* AnglePad) { switch (Level) { case 0: - *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength (); - *SidePad = classify_cp_side_pad_loose * GetPicoFeatureLength (); + *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength(); + *SidePad = classify_cp_side_pad_loose * GetPicoFeatureLength(); *AnglePad = classify_cp_angle_pad_loose / 360.0; break; case 1: - *EndPad = classify_cp_end_pad_medium * GetPicoFeatureLength (); - *SidePad = classify_cp_side_pad_medium * GetPicoFeatureLength (); + *EndPad = classify_cp_end_pad_medium * GetPicoFeatureLength(); + *SidePad = classify_cp_side_pad_medium * GetPicoFeatureLength(); *AnglePad = classify_cp_angle_pad_medium / 360.0; break; case 2: - *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength (); - *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength (); + *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength(); + *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength(); *AnglePad = classify_cp_angle_pad_tight / 360.0; break; default: - *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength (); - *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength (); + *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength(); + *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength(); *AnglePad = classify_cp_angle_pad_tight / 360.0; break; } - if (*AnglePad > 0.5) - *AnglePad = 0.5; + if (*AnglePad > 0.5) *AnglePad = 0.5; -} /* GetCPPadsForLevel */ +} /* GetCPPadsForLevel */ /** * @param Evidence evidence value to return color for @@ -1446,8 +1391,8 @@ void GetCPPadsForLevel(int Level, * @note History: Thu Mar 21 15:24:52 1991, DSJ, Created. */ ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) { - assert (Evidence >= 0.0); - assert (Evidence <= 1.0); + assert(Evidence >= 0.0); + assert(Evidence <= 1.0); if (Evidence >= 0.90) return ScrollView::WHITE; @@ -1457,7 +1402,7 @@ ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) { return ScrollView::RED; else return ScrollView::BLUE; -} /* GetMatchColorFor */ +} /* GetMatchColorFor */ /** * This routine returns (in Fill) the specification of @@ -1471,8 +1416,8 @@ ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) { * @note Exceptions: none * @note History: Tue Feb 19 10:17:42 1991, DSJ, Created. */ -void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { - FILL_SWITCH *Next; +void GetNextFill(TABLE_FILLER* Filler, FILL_SPEC* Fill) { + FILL_SWITCH* Next; /* compute the fill assuming no switches will be encountered */ Fill->AngleStart = Filler->AngleStart; @@ -1489,13 +1434,11 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { Fill->YStart = Next->Y; Filler->StartDelta = Next->Delta; Filler->YStart = Next->YInit; - } - else if (Next->Type == EndSwitch) { + } else if (Next->Type == EndSwitch) { Fill->YEnd = Next->Y; Filler->EndDelta = Next->Delta; Filler->YEnd = Next->YInit; - } - else { /* Type must be LastSwitch */ + } else { /* Type must be LastSwitch */ break; } Filler->NextSwitch++; @@ -1507,7 +1450,7 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { Filler->YStart += Filler->StartDelta; Filler->YEnd += Filler->EndDelta; -} /* GetNextFill */ +} /* GetNextFill */ /** * This routine computes a data structure (Filler) @@ -1523,12 +1466,12 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { * @note Exceptions: none * @note History: Thu Feb 14 09:27:05 1991, DSJ, Created. */ -void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, - FLOAT32 AnglePad, PROTO Proto, TABLE_FILLER * Filler) -#define XS X_SHIFT -#define YS Y_SHIFT -#define AS ANGLE_SHIFT -#define NB NUM_CP_BUCKETS +void InitTableFiller(FLOAT32 EndPad, FLOAT32 SidePad, FLOAT32 AnglePad, + PROTO Proto, TABLE_FILLER* Filler) +#define XS X_SHIFT +#define YS Y_SHIFT +#define AS ANGLE_SHIFT +#define NB NUM_CP_BUCKETS { FLOAT32 Angle; FLOAT32 X, Y, HalfLength; @@ -1547,7 +1490,7 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, Filler->AngleEnd = CircBucketFor(Angle + AnglePad, AS, NB); Filler->NextSwitch = 0; - if (fabs (Angle - 0.0) < HV_TOLERANCE || fabs (Angle - 0.5) < HV_TOLERANCE) { + if (fabs(Angle - 0.0) < HV_TOLERANCE || fabs(Angle - 0.5) < HV_TOLERANCE) { /* horizontal proto - handle as special case */ Filler->X = Bucket8For(X - HalfLength - EndPad, XS, NB); Filler->YStart = Bucket16For(Y - SidePad, YS, NB * 256); @@ -1557,7 +1500,7 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, Filler->Switch[0].Type = LastSwitch; Filler->Switch[0].X = Bucket8For(X + HalfLength + EndPad, XS, NB); } else if (fabs(Angle - 0.25) < HV_TOLERANCE || - fabs(Angle - 0.75) < HV_TOLERANCE) { + fabs(Angle - 0.75) < HV_TOLERANCE) { /* vertical proto - handle as special case */ Filler->X = Bucket8For(X - SidePad, XS, NB); Filler->YStart = Bucket16For(Y - HalfLength - EndPad, YS, NB * 256); @@ -1592,8 +1535,8 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, /* translate into bucket positions and deltas */ Filler->X = Bucket8For(Start.x, XS, NB); - Filler->StartDelta = -(int16_t) ((Cos / Sin) * 256); - Filler->EndDelta = (int16_t) ((Sin / Cos) * 256); + Filler->StartDelta = -(int16_t)((Cos / Sin) * 256); + Filler->EndDelta = (int16_t)((Sin / Cos) * 256); XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x; YAdjust = XAdjust * Cos / Sin; @@ -1673,8 +1616,7 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, Filler->Switch[2].X = Bucket8For(End.x, XS, NB); } } -} /* InitTableFiller */ - +} /* InitTableFiller */ /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED @@ -1688,7 +1630,7 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, * @note Exceptions: none * @note History: Thu Mar 21 14:57:41 1991, DSJ, Created. */ -void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, +void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature, ScrollView::Color color) { FLOAT32 X, Y, Dx, Dy, Length; @@ -1706,7 +1648,7 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, window->SetCursor(X, Y); window->DrawTo(X + Dx, Y + Dy); -} /* RenderIntFeature */ +} /* RenderIntFeature */ /** * This routine extracts the parameters of the specified @@ -1724,9 +1666,7 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, * @note Exceptions: none * @note History: Thu Mar 21 10:21:09 1991, DSJ, Created. */ -void RenderIntProto(ScrollView *window, - INT_CLASS Class, - PROTO_ID ProtoId, +void RenderIntProto(ScrollView* window, INT_CLASS Class, PROTO_ID ProtoId, ScrollView::Color color) { PROTO_SET ProtoSet; INT_PROTO Proto; @@ -1747,8 +1687,8 @@ void RenderIntProto(ScrollView *window, ProtoSet = Class->ProtoSets[SetForProto(ProtoId)]; ProtoSetIndex = IndexForProto(ProtoId); Proto = &(ProtoSet->Protos[ProtoSetIndex]); - Length = (Class->ProtoLengths[ProtoId] * - GetPicoFeatureLength() * INT_CHAR_NORM_RANGE); + Length = (Class->ProtoLengths[ProtoId] * GetPicoFeatureLength() * + INT_CHAR_NORM_RANGE); ProtoMask = PPrunerMaskFor(ProtoId); ProtoWordIndex = PPrunerWordIndexFor(ProtoId); @@ -1773,7 +1713,7 @@ void RenderIntProto(ScrollView *window, window->SetCursor(X - Dx, Y - Dy); window->DrawTo(X + Dx, Y + Dy); -} /* RenderIntProto */ +} /* RenderIntProto */ #endif /** @@ -1791,21 +1731,18 @@ void RenderIntProto(ScrollView *window, * @note Exceptions: none * @note History: Fri Feb 8 11:54:28 1991, DSJ, Created. */ -int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id) { +int TruncateParam(FLOAT32 Param, int Min, int Max, char* Id) { if (Param < Min) { if (Id) - cprintf("Warning: Param %s truncated from %f to %d!\n", - Id, Param, Min); + cprintf("Warning: Param %s truncated from %f to %d!\n", Id, Param, Min); Param = Min; } else if (Param > Max) { if (Id) - cprintf("Warning: Param %s truncated from %f to %d!\n", - Id, Param, Max); + cprintf("Warning: Param %s truncated from %f to %d!\n", Id, Param, Max); Param = Max; } return static_cast(floor(Param)); -} /* TruncateParam */ - +} /* TruncateParam */ #ifndef GRAPHICS_DISABLED /** @@ -1817,14 +1754,13 @@ void InitIntMatchWindowIfReqd() { IntMatchWindow = CreateFeatureSpaceWindow("IntMatchWindow", 50, 200); SVMenuNode* popup_menu = new SVMenuNode(); - popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, - "x", "Class to debug"); - popup_menu->AddChild("Debug Static classes", IDA_STATIC, - "x", "Class to debug"); - popup_menu->AddChild("Debug Both", IDA_BOTH, - "x", "Class to debug"); - popup_menu->AddChild("Debug Shape Index", IDA_SHAPE_INDEX, - "0", "Index to debug"); + popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, "x", + "Class to debug"); + popup_menu->AddChild("Debug Static classes", IDA_STATIC, "x", + "Class to debug"); + popup_menu->AddChild("Debug Both", IDA_BOTH, "x", "Class to debug"); + popup_menu->AddChild("Debug Shape Index", IDA_SHAPE_INDEX, "0", + "Index to debug"); popup_menu->BuildMenu(IntMatchWindow, false); } } @@ -1835,9 +1771,9 @@ void InitIntMatchWindowIfReqd() { */ void InitProtoDisplayWindowIfReqd() { if (ProtoDisplayWindow == nullptr) { - ProtoDisplayWindow = CreateFeatureSpaceWindow("ProtoDisplayWindow", - 550, 200); - } + ProtoDisplayWindow = + CreateFeatureSpaceWindow("ProtoDisplayWindow", 550, 200); + } } /** @@ -1846,8 +1782,8 @@ void InitProtoDisplayWindowIfReqd() { */ void InitFeatureDisplayWindowIfReqd() { if (FeatureDisplayWindow == nullptr) { - FeatureDisplayWindow = CreateFeatureSpaceWindow("FeatureDisplayWindow", - 50, 700); + FeatureDisplayWindow = + CreateFeatureSpaceWindow("FeatureDisplayWindow", 50, 700); } } diff --git a/src/classify/intproto.h b/src/classify/intproto.h index 04259b7513..571feb92b3 100644 --- a/src/classify/intproto.h +++ b/src/classify/intproto.h @@ -31,42 +31,41 @@ class FCOORD; /* define order of params in pruners */ -#define PRUNER_X 0 -#define PRUNER_Y 1 -#define PRUNER_ANGLE 2 +#define PRUNER_X 0 +#define PRUNER_Y 1 +#define PRUNER_ANGLE 2 /* definition of coordinate system offsets for each table parameter */ #define ANGLE_SHIFT (0.0) -#define X_SHIFT (0.5) -#define Y_SHIFT (0.5) +#define X_SHIFT (0.5) +#define Y_SHIFT (0.5) -#define MAX_PROTO_INDEX 24 -#define BITS_PER_WERD static_cast(8 * sizeof(uint32_t)) +#define MAX_PROTO_INDEX 24 +#define BITS_PER_WERD static_cast(8 * sizeof(uint32_t)) /* Script detection: increase this number to 128 */ -#define MAX_NUM_CONFIGS 64 -#define MAX_NUM_PROTOS 512 -#define PROTOS_PER_PROTO_SET 64 -#define MAX_NUM_PROTO_SETS (MAX_NUM_PROTOS / PROTOS_PER_PROTO_SET) -#define NUM_PP_PARAMS 3 -#define NUM_PP_BUCKETS 64 -#define NUM_CP_BUCKETS 24 -#define CLASSES_PER_CP 32 -#define NUM_BITS_PER_CLASS 2 +#define MAX_NUM_CONFIGS 64 +#define MAX_NUM_PROTOS 512 +#define PROTOS_PER_PROTO_SET 64 +#define MAX_NUM_PROTO_SETS (MAX_NUM_PROTOS / PROTOS_PER_PROTO_SET) +#define NUM_PP_PARAMS 3 +#define NUM_PP_BUCKETS 64 +#define NUM_CP_BUCKETS 24 +#define CLASSES_PER_CP 32 +#define NUM_BITS_PER_CLASS 2 #define CLASS_PRUNER_CLASS_MASK (~(~0u << NUM_BITS_PER_CLASS)) #define CLASSES_PER_CP_WERD (CLASSES_PER_CP / NUM_BITS_PER_CLASS) -#define PROTOS_PER_PP_WERD BITS_PER_WERD -#define BITS_PER_CP_VECTOR (CLASSES_PER_CP * NUM_BITS_PER_CLASS) -#define MAX_NUM_CLASS_PRUNERS ((MAX_NUM_CLASSES + CLASSES_PER_CP - 1) / \ - CLASSES_PER_CP) +#define PROTOS_PER_PP_WERD BITS_PER_WERD +#define BITS_PER_CP_VECTOR (CLASSES_PER_CP * NUM_BITS_PER_CLASS) +#define MAX_NUM_CLASS_PRUNERS \ + ((MAX_NUM_CLASSES + CLASSES_PER_CP - 1) / CLASSES_PER_CP) #define WERDS_PER_CP_VECTOR (BITS_PER_CP_VECTOR / BITS_PER_WERD) -#define WERDS_PER_PP_VECTOR ((PROTOS_PER_PROTO_SET+BITS_PER_WERD-1)/ \ - BITS_PER_WERD) -#define WERDS_PER_PP (NUM_PP_PARAMS * NUM_PP_BUCKETS * \ - WERDS_PER_PP_VECTOR) -#define WERDS_PER_CP (NUM_CP_BUCKETS * NUM_CP_BUCKETS * \ - NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR) -#define WERDS_PER_CONFIG_VEC ((MAX_NUM_CONFIGS + BITS_PER_WERD - 1) / \ - BITS_PER_WERD) +#define WERDS_PER_PP_VECTOR \ + ((PROTOS_PER_PROTO_SET + BITS_PER_WERD - 1) / BITS_PER_WERD) +#define WERDS_PER_PP (NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR) +#define WERDS_PER_CP \ + (NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR) +#define WERDS_PER_CONFIG_VEC \ + ((MAX_NUM_CONFIGS + BITS_PER_WERD - 1) / BITS_PER_WERD) /* The first 3 dimensions of the CLASS_PRUNER_STRUCT are the * 3 axes of the quantized feature space. @@ -74,11 +73,11 @@ class FCOORD; * 4th dimension is determined by using CPrunerWordIndexFor(c), * where c is the corresponding class id. */ struct CLASS_PRUNER_STRUCT { - uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]; + uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS] + [WERDS_PER_CP_VECTOR]; }; -typedef struct -{ +typedef struct { int8_t A; uint8_t B; int8_t C; @@ -86,54 +85,51 @@ typedef struct uint32_t Configs[WERDS_PER_CONFIG_VEC]; } +INT_PROTO_STRUCT, + *INT_PROTO; -INT_PROTO_STRUCT, *INT_PROTO; +typedef uint32_t PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS] + [WERDS_PER_PP_VECTOR]; -typedef uint32_t PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]; - -typedef struct -{ +typedef struct { PROTO_PRUNER ProtoPruner; INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]; } - -PROTO_SET_STRUCT, *PROTO_SET; +PROTO_SET_STRUCT, + *PROTO_SET; typedef uint32_t CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]; - -typedef struct -{ +typedef struct { uint16_t NumProtos; uint8_t NumProtoSets; uint8_t NumConfigs; PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]; - uint8_t *ProtoLengths; + uint8_t* ProtoLengths; uint16_t ConfigLengths[MAX_NUM_CONFIGS]; int font_set_id; // FontSet id, see above } +INT_CLASS_STRUCT, + *INT_CLASS; -INT_CLASS_STRUCT, *INT_CLASS; - -typedef struct -{ +typedef struct { int NumClasses; int NumClassPruners; INT_CLASS Class[MAX_NUM_CLASSES]; CLASS_PRUNER_STRUCT* ClassPruners[MAX_NUM_CLASS_PRUNERS]; } - -INT_TEMPLATES_STRUCT, *INT_TEMPLATES; +INT_TEMPLATES_STRUCT, + *INT_TEMPLATES; /* definitions of integer features*/ #define MAX_NUM_INT_FEATURES 512 -#define INT_CHAR_NORM_RANGE 256 +#define INT_CHAR_NORM_RANGE 256 struct INT_FEATURE_STRUCT { - INT_FEATURE_STRUCT() : X(0), Y(0), Theta(0), CP_misses(0) { } + INT_FEATURE_STRUCT() : X(0), Y(0), Theta(0), CP_misses(0) {} // Builds a feature from an FCOORD for position with all the necessary // clipping and rounding. INT_FEATURE_STRUCT(const FCOORD& pos, uint8_t theta); @@ -145,12 +141,10 @@ struct INT_FEATURE_STRUCT { uint8_t Theta; int8_t CP_misses; - void print() const { - tprintf("(%d,%d):%d\n", X, Y, Theta); - } + void print() const { tprintf("(%d,%d):%d\n", X, Y, Theta); } }; -typedef INT_FEATURE_STRUCT *INT_FEATURE; +typedef INT_FEATURE_STRUCT* INT_FEATURE; typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; @@ -165,26 +159,27 @@ enum IntmatcherDebugAction { Macros ----------------------------------------------------------------------------**/ -#define MaxNumIntProtosIn(C) (C->NumProtoSets * PROTOS_PER_PROTO_SET) -#define SetForProto(P) (P / PROTOS_PER_PROTO_SET) -#define IndexForProto(P) (P % PROTOS_PER_PROTO_SET) -#define ProtoForProtoId(C,P) (&((C->ProtoSets[SetForProto (P)])-> \ - Protos [IndexForProto (P)])) -#define PPrunerWordIndexFor(I) (((I) % PROTOS_PER_PROTO_SET) / \ - PROTOS_PER_PP_WERD) +#define MaxNumIntProtosIn(C) (C->NumProtoSets * PROTOS_PER_PROTO_SET) +#define SetForProto(P) (P / PROTOS_PER_PROTO_SET) +#define IndexForProto(P) (P % PROTOS_PER_PROTO_SET) +#define ProtoForProtoId(C, P) \ + (&((C->ProtoSets[SetForProto(P)])->Protos[IndexForProto(P)])) +#define PPrunerWordIndexFor(I) \ + (((I) % PROTOS_PER_PROTO_SET) / PROTOS_PER_PP_WERD) #define PPrunerBitIndexFor(I) ((I) % PROTOS_PER_PP_WERD) -#define PPrunerMaskFor(I) (1 << PPrunerBitIndexFor (I)) - -#define MaxNumClassesIn(T) (T->NumClassPruners * CLASSES_PER_CP) -#define LegalClassId(c) ((c) >= 0 && (c) <= MAX_CLASS_ID) -#define UnusedClassIdIn(T,c) ((T)->Class[c] == nullptr) -#define ClassForClassId(T,c) ((T)->Class[c]) -#define ClassPrunersFor(T) ((T)->ClassPruner) -#define CPrunerIdFor(c) ((c) / CLASSES_PER_CP) -#define CPrunerFor(T,c) ((T)->ClassPruners[CPrunerIdFor(c)]) -#define CPrunerWordIndexFor(c) (((c) % CLASSES_PER_CP) / CLASSES_PER_CP_WERD) +#define PPrunerMaskFor(I) (1 << PPrunerBitIndexFor(I)) + +#define MaxNumClassesIn(T) (T->NumClassPruners * CLASSES_PER_CP) +#define LegalClassId(c) ((c) >= 0 && (c) <= MAX_CLASS_ID) +#define UnusedClassIdIn(T, c) ((T)->Class[c] == nullptr) +#define ClassForClassId(T, c) ((T)->Class[c]) +#define ClassPrunersFor(T) ((T)->ClassPruner) +#define CPrunerIdFor(c) ((c) / CLASSES_PER_CP) +#define CPrunerFor(T, c) ((T)->ClassPruners[CPrunerIdFor(c)]) +#define CPrunerWordIndexFor(c) (((c) % CLASSES_PER_CP) / CLASSES_PER_CP_WERD) #define CPrunerBitIndexFor(c) (((c) % CLASSES_PER_CP) % CLASSES_PER_CP_WERD) -#define CPrunerMaskFor(L,c) (((L)+1) << CPrunerBitIndexFor (c) * NUM_BITS_PER_CLASS) +#define CPrunerMaskFor(L, c) \ + (((L) + 1) << CPrunerBitIndexFor(c) * NUM_BITS_PER_CLASS) /* DEBUG macros*/ #define PRINT_MATCH_SUMMARY 0x001 @@ -194,13 +189,13 @@ enum IntmatcherDebugAction { #define PRINT_PROTO_MATCHES 0x010 #define CLIP_MATCH_EVIDENCE 0x020 -#define MatchDebuggingOn(D) (D) -#define PrintMatchSummaryOn(D) ((D) & PRINT_MATCH_SUMMARY) -#define DisplayFeatureMatchesOn(D) ((D) & DISPLAY_FEATURE_MATCHES) -#define DisplayProtoMatchesOn(D) ((D) & DISPLAY_PROTO_MATCHES) -#define PrintFeatureMatchesOn(D) ((D) & PRINT_FEATURE_MATCHES) -#define PrintProtoMatchesOn(D) ((D) & PRINT_PROTO_MATCHES) -#define ClipMatchEvidenceOn(D) ((D) & CLIP_MATCH_EVIDENCE) +#define MatchDebuggingOn(D) (D) +#define PrintMatchSummaryOn(D) ((D)&PRINT_MATCH_SUMMARY) +#define DisplayFeatureMatchesOn(D) ((D)&DISPLAY_FEATURE_MATCHES) +#define DisplayProtoMatchesOn(D) ((D)&DISPLAY_PROTO_MATCHES) +#define PrintFeatureMatchesOn(D) ((D)&PRINT_FEATURE_MATCHES) +#define PrintProtoMatchesOn(D) ((D)&PRINT_PROTO_MATCHES) +#define ClipMatchEvidenceOn(D) ((D)&CLIP_MATCH_EVIDENCE) /**---------------------------------------------------------------------------- Public Function Prototypes @@ -211,12 +206,11 @@ int AddIntConfig(INT_CLASS Class); int AddIntProto(INT_CLASS Class); -void AddProtoToClassPruner(PROTO Proto, - CLASS_ID ClassId, +void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates); -void AddProtoToProtoPruner(PROTO Proto, int ProtoId, - INT_CLASS Class, bool debug); +void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, + bool debug); uint8_t Bucket8For(FLOAT32 param, FLOAT32 offset, int num_buckets); uint16_t Bucket16For(FLOAT32 param, FLOAT32 offset, int num_buckets); @@ -231,9 +225,11 @@ void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence); void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence); -INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs); +INT_CLASS +NewIntClass(int MaxNumProtos, int MaxNumConfigs); -INT_TEMPLATES NewIntTemplates(); +INT_TEMPLATES +NewIntTemplates(); void free_int_templates(INT_TEMPLATES templates); @@ -249,7 +245,7 @@ void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window); /*----------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED -void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, +void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature, ScrollView::Color color); void InitIntMatchWindowIfReqd(); diff --git a/src/classify/kdtree.cpp b/src/classify/kdtree.cpp index df2347f86f..321c5b5871 100644 --- a/src/classify/kdtree.cpp +++ b/src/classify/kdtree.cpp @@ -26,11 +26,11 @@ #include "emalloc.h" #include -#include #include +#include -#define Magnitude(X) ((X) < 0 ? -(X) : (X)) -#define NodeFound(N,K,D) (( (N)->Key == (K) ) && ( (N)->Data == (D) )) +#define Magnitude(X) ((X) < 0 ? -(X) : (X)) +#define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D))) /*----------------------------------------------------------------------------- Global Data Definitions and Declarations @@ -39,18 +39,17 @@ #define MAXSEARCH MAX_FLOAT32 // Helper function to find the next essential dimension in a cycle. -static int NextLevel(KDTREE *tree, int level) { +static int NextLevel(KDTREE* tree, int level) { do { ++level; - if (level >= tree->KeySize) - level = 0; + if (level >= tree->KeySize) level = 0; } while (tree->KeyDesc[level].NonEssential); return level; } //----------------------------------------------------------------------------- /** Store the k smallest-keyed key-value pairs. */ -template +template class MinK { public: MinK(Key max_key, int k); @@ -72,74 +71,70 @@ class MinK { private: const Key max_key_; //< the maximum possible Key - Element *elements_; //< unsorted array of elements + Element* elements_; //< unsorted array of elements int elements_count_; //< the number of results collected so far int k_; //< the number of results we want from the search int max_index_; //< the index of the result with the largest key }; -template -MinK::MinK(Key max_key, int k) : - max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) { +template +MinK::MinK(Key max_key, int k) + : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) { elements_ = new Element[k_]; } -template +template MinK::~MinK() { - delete []elements_; + delete[] elements_; } -template +template const Key& MinK::max_insertable_key() { - if (elements_count_ < k_) - return max_key_; + if (elements_count_ < k_) return max_key_; return elements_[max_index_].key; } -template +template bool MinK::insert(Key key, Value value) { if (elements_count_ < k_) { elements_[elements_count_++] = Element(key, value); - if (key > elements_[max_index_].key) - max_index_ = elements_count_ - 1; + if (key > elements_[max_index_].key) max_index_ = elements_count_ - 1; return true; } else if (key < elements_[max_index_].key) { // evict the largest element. elements_[max_index_] = Element(key, value); // recompute max_index_ for (int i = 0; i < elements_count_; i++) { - if (elements_[i].key > elements_[max_index_].key) - max_index_ = i; + if (elements_[i].key > elements_[max_index_].key) max_index_ = i; } return true; } return false; } - //----------------------------------------------------------------------------- /** Helper class for searching for the k closest points to query_point in tree. */ class KDTreeSearch { public: - KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest); + KDTreeSearch(KDTREE* tree, FLOAT32* query_point, int k_closest); ~KDTreeSearch(); /** Return the k nearest points' data. */ - void Search(int *result_count, FLOAT32 *distances, void **results); + void Search(int* result_count, FLOAT32* distances, void** results); private: - void SearchRec(int Level, KDNODE *SubTree); - bool BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper); - - KDTREE *tree_; - FLOAT32 *query_point_; - FLOAT32 *sb_min_; //< search box minimum - FLOAT32 *sb_max_; //< search box maximum - MinK results_; + void SearchRec(int Level, KDNODE* SubTree); + bool BoxIntersectsSearch(FLOAT32* lower, FLOAT32* upper); + + KDTREE* tree_; + FLOAT32* query_point_; + FLOAT32* sb_min_; //< search box minimum + FLOAT32* sb_max_; //< search box maximum + MinK results_; }; -KDTreeSearch::KDTreeSearch(KDTREE *tree, FLOAT32 *query_point, int k_closest) +KDTreeSearch::KDTreeSearch(KDTREE* tree, FLOAT32* query_point, int k_closest) : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) { sb_min_ = new FLOAT32[tree->KeySize]; sb_max_ = new FLOAT32[tree->KeySize]; @@ -152,9 +147,8 @@ KDTreeSearch::~KDTreeSearch() { /// Locate the k_closest points to query_point_, and return their distances and /// data into the given buffers. -void KDTreeSearch::Search(int *result_count, - FLOAT32 *distances, - void **results) { +void KDTreeSearch::Search(int* result_count, FLOAT32* distances, + void** results) { if (tree_->Root.Left == nullptr) { *result_count = 0; } else { @@ -180,9 +174,9 @@ void KDTreeSearch::Search(int *result_count, /// @return a new KDTREE based on the specified parameters. /// @param KeySize # of dimensions in the K-D tree /// @param KeyDesc array of params to describe key dimensions -KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) { - KDTREE *KDTree = (KDTREE *) Emalloc( - sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC)); +KDTREE* MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) { + KDTREE* KDTree = + (KDTREE*)Emalloc(sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC)); for (int i = 0; i < KeySize; i++) { KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential; KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular; @@ -203,7 +197,6 @@ KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) { return KDTree; } - /** * This routine stores Data in the K-D tree specified by Tree * using Key as an access key. @@ -216,10 +209,10 @@ KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) { * @note History: 3/10/89, DSJ, Created. * 7/13/89, DSJ, Changed return to void. */ -void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) { +void KDStore(KDTREE* Tree, FLOAT32* Key, void* Data) { int Level; - KDNODE *Node; - KDNODE **PtrToNode; + KDNODE* Node; + KDNODE** PtrToNode; PtrToNode = &(Tree->Root.Left); Node = *PtrToNode; @@ -227,20 +220,17 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) { while (Node != nullptr) { if (Key[Level] < Node->BranchPoint) { PtrToNode = &(Node->Left); - if (Key[Level] > Node->LeftBranch) - Node->LeftBranch = Key[Level]; - } - else { + if (Key[Level] > Node->LeftBranch) Node->LeftBranch = Key[Level]; + } else { PtrToNode = &(Node->Right); - if (Key[Level] < Node->RightBranch) - Node->RightBranch = Key[Level]; + if (Key[Level] < Node->RightBranch) Node->RightBranch = Key[Level]; } Level = NextLevel(Tree, Level); Node = *PtrToNode; } - *PtrToNode = MakeKDNode(Tree, Key, (void *) Data, Level); -} /* KDStore */ + *PtrToNode = MakeKDNode(Tree, Key, (void*)Data, Level); +} /* KDStore */ /** * This routine deletes a node from Tree. The node to be @@ -261,11 +251,10 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) { * @note History: 3/13/89, DSJ, Created. * 7/13/89, DSJ, Specify node indirectly by key and data. */ -void -KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { +void KDDelete(KDTREE* Tree, FLOAT32 Key[], void* Data) { int Level; - KDNODE *Current; - KDNODE *Father; + KDNODE* Current; + KDNODE* Father; /* initialize search at root of tree */ Father = &(Tree->Root); @@ -273,7 +262,7 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { Level = NextLevel(Tree, -1); /* search tree for node to be deleted */ - while ((Current != nullptr) && (!NodeFound (Current, Key, Data))) { + while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) { Father = Current; if (Key[Level] < Current->BranchPoint) Current = Current->Left; @@ -283,7 +272,7 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { Level = NextLevel(Tree, Level); } - if (Current != nullptr) { /* if node to be deleted was found */ + if (Current != nullptr) { /* if node to be deleted was found */ if (Current == Father->Left) { Father->Left = nullptr; Father->LeftBranch = Tree->KeyDesc[Level].Min; @@ -296,7 +285,7 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { InsertNodes(Tree, Current->Right); FreeSubTree(Current); } -} /* KDDelete */ +} /* KDDelete */ /** * This routine searches the K-D tree specified by Tree and @@ -318,22 +307,20 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { * - 3/10/89, DSJ, Created. * - 7/13/89, DSJ, Return contents of node instead of node itself. */ -void KDNearestNeighborSearch( - KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance, - int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[]) { +void KDNearestNeighborSearch(KDTREE* Tree, FLOAT32 Query[], int QuerySize, + FLOAT32 MaxDistance, int* NumberOfResults, + void** NBuffer, FLOAT32 DBuffer[]) { KDTreeSearch search(Tree, Query, QuerySize); search.Search(NumberOfResults, DBuffer, NBuffer); } - /*---------------------------------------------------------------------------*/ /** Walk a given Tree with action. */ -void KDWalk(KDTREE *Tree, void_proc action, void *context) { +void KDWalk(KDTREE* Tree, void_proc action, void* context) { if (Tree->Root.Left != nullptr) Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1)); } - /*---------------------------------------------------------------------------*/ /** * This routine frees all memory which is allocated to the @@ -347,11 +334,10 @@ void KDWalk(KDTREE *Tree, void_proc action, void *context) { * @note Exceptions: none * @note History: 5/26/89, DSJ, Created. */ -void FreeKDTree(KDTREE *Tree) { +void FreeKDTree(KDTREE* Tree) { FreeSubTree(Tree->Root.Left); free(Tree); -} /* FreeKDTree */ - +} /* FreeKDTree */ /*----------------------------------------------------------------------------- Private Code @@ -370,10 +356,10 @@ void FreeKDTree(KDTREE *Tree) { * @note Exceptions: None * @note History: 3/11/89, DSJ, Created. */ -KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) { - KDNODE *NewNode; +KDNODE* MakeKDNode(KDTREE* tree, FLOAT32 Key[], void* Data, int Index) { + KDNODE* NewNode; - NewNode = (KDNODE *) Emalloc (sizeof (KDNODE)); + NewNode = (KDNODE*)Emalloc(sizeof(KDNODE)); NewNode->Key = Key; NewNode->Data = Data; @@ -384,11 +370,10 @@ KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) { NewNode->Right = nullptr; return NewNode; -} /* MakeKDNode */ - +} /* MakeKDNode */ /*---------------------------------------------------------------------------*/ -void FreeKDNode(KDNODE *Node) { free(Node); } +void FreeKDNode(KDNODE* Node) { free(Node); } /*---------------------------------------------------------------------------*/ /** @@ -396,12 +381,10 @@ void FreeKDNode(KDNODE *Node) { free(Node); } * @param Level level in tree of sub-tree to be searched * @param SubTree sub-tree to be searched */ -void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { - if (level >= tree_->KeySize) - level = 0; +void KDTreeSearch::SearchRec(int level, KDNODE* sub_tree) { + if (level >= tree_->KeySize) level = 0; - if (!BoxIntersectsSearch(sb_min_, sb_max_)) - return; + if (!BoxIntersectsSearch(sb_min_, sb_max_)) return; results_.insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc, query_point_, sub_tree->Key), @@ -436,7 +419,6 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { } } - /*---------------------------------------------------------------------------*/ /** *Returns the Euclidean distance squared between p1 and p2 for all essential @@ -445,12 +427,12 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { * @param dim dimension descriptions (essential, circular, etc) * @param p1,p2 two different points in K-D space */ -FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) { +FLOAT32 +DistanceSquared(int k, PARAM_DESC* dim, FLOAT32 p1[], FLOAT32 p2[]) { FLOAT32 total_distance = 0; for (; k > 0; k--, p1++, p2++, dim++) { - if (dim->NonEssential) - continue; + if (dim->NonEssential) continue; FLOAT32 dimension_distance = *p1 - *p2; @@ -466,7 +448,8 @@ FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) { return total_distance; } -FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) { +FLOAT32 +ComputeDistance(int k, PARAM_DESC* dim, FLOAT32 p1[], FLOAT32 p2[]) { return sqrt(DistanceSquared(k, dim, p1, p2)); } @@ -475,17 +458,16 @@ FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) { /// query_point_ containing results->k_ points) intersects the box specified /// between lower and upper. For circular dimensions, we also check the point /// one wrap distance away from the query. -bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) { - FLOAT32 *query = query_point_; +bool KDTreeSearch::BoxIntersectsSearch(FLOAT32* lower, FLOAT32* upper) { + FLOAT32* query = query_point_; // Compute the sum in higher precision. FLOAT64 total_distance = 0.0; FLOAT64 radius_squared = results_.max_insertable_key() * results_.max_insertable_key(); - PARAM_DESC *dim = tree_->KeyDesc; + PARAM_DESC* dim = tree_->KeyDesc; for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) { - if (dim->NonEssential) - continue; + if (dim->NonEssential) continue; FLOAT32 dimension_distance; if (*query < *lower) @@ -506,13 +488,11 @@ bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) { } total_distance += dimension_distance * dimension_distance; - if (total_distance >= radius_squared) - return FALSE; + if (total_distance >= radius_squared) return FALSE; } return TRUE; } - /*---------------------------------------------------------------------------*/ /** * Walk a tree, calling action once on each node. @@ -529,8 +509,8 @@ bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) { * @param sub_tree ptr to root of subtree to be walked * @param level current level in the tree for this node */ -void Walk(KDTREE *tree, void_proc action, void *context, - KDNODE *sub_tree, int32_t level) { +void Walk(KDTREE* tree, void_proc action, void* context, KDNODE* sub_tree, + int32_t level) { (*action)(context, sub_tree->Data, level); if (sub_tree->Left != nullptr) Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level)); @@ -539,9 +519,8 @@ void Walk(KDTREE *tree, void_proc action, void *context, } /** Given a subtree nodes, insert all of its elements into tree. */ -void InsertNodes(KDTREE *tree, KDNODE *nodes) { - if (nodes == nullptr) - return; +void InsertNodes(KDTREE* tree, KDNODE* nodes) { + if (nodes == nullptr) return; KDStore(tree, nodes->Key, nodes->Data); InsertNodes(tree, nodes->Left); @@ -549,7 +528,7 @@ void InsertNodes(KDTREE *tree, KDNODE *nodes) { } /** Free all of the nodes of a sub tree. */ -void FreeSubTree(KDNODE *sub_tree) { +void FreeSubTree(KDNODE* sub_tree) { if (sub_tree != nullptr) { FreeSubTree(sub_tree->Left); FreeSubTree(sub_tree->Right); diff --git a/src/classify/kdtree.h b/src/classify/kdtree.h index a4e0c18365..af639ac118 100644 --- a/src/classify/kdtree.h +++ b/src/classify/kdtree.h @@ -16,14 +16,14 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef KDTREE_H -#define KDTREE_H +#ifndef KDTREE_H +#define KDTREE_H /*----------------------------------------------------------------------------- Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "host.h" #include "cutil.h" +#include "host.h" #include "ocrfeatures.h" /** @@ -37,60 +37,62 @@ correctly if circular parameters outside the specified range are used. */ struct KDNODE { - FLOAT32 *Key; /**< search key */ - void *Data; /**< data that corresponds to key */ - FLOAT32 BranchPoint; /**< needed to make deletes work efficiently */ - FLOAT32 LeftBranch; /**< used to optimize search pruning */ - FLOAT32 RightBranch; /**< used to optimize search pruning */ - struct KDNODE *Left; /**< ptrs for KD tree structure */ - struct KDNODE *Right; + FLOAT32* Key; /**< search key */ + void* Data; /**< data that corresponds to key */ + FLOAT32 BranchPoint; /**< needed to make deletes work efficiently */ + FLOAT32 LeftBranch; /**< used to optimize search pruning */ + FLOAT32 RightBranch; /**< used to optimize search pruning */ + struct KDNODE* Left; /**< ptrs for KD tree structure */ + struct KDNODE* Right; }; struct KDTREE { - int16_t KeySize; /* number of dimensions in the tree */ - KDNODE Root; /* Root.Left points to actual root node */ - PARAM_DESC KeyDesc[1]; /* description of each dimension */ + int16_t KeySize; /* number of dimensions in the tree */ + KDNODE Root; /* Root.Left points to actual root node */ + PARAM_DESC KeyDesc[1]; /* description of each dimension */ }; /*---------------------------------------------------------------------------- Macros -----------------------------------------------------------------------------*/ -#define RootOf(T) ((T)->Root.Left->Data) +#define RootOf(T) ((T)->Root.Left->Data) /*----------------------------------------------------------------------------- Public Function Prototypes -----------------------------------------------------------------------------*/ -KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]); +KDTREE* MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]); -void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data); +void KDStore(KDTREE* Tree, FLOAT32* Key, void* Data); -void KDDelete(KDTREE * Tree, FLOAT32 Key[], void *Data); +void KDDelete(KDTREE* Tree, FLOAT32 Key[], void* Data); -void KDNearestNeighborSearch( - KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance, - int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[]); +void KDNearestNeighborSearch(KDTREE* Tree, FLOAT32 Query[], int QuerySize, + FLOAT32 MaxDistance, int* NumberOfResults, + void** NBuffer, FLOAT32 DBuffer[]); -void KDWalk(KDTREE *Tree, void_proc Action, void *context); +void KDWalk(KDTREE* Tree, void_proc Action, void* context); -void FreeKDTree(KDTREE *Tree); +void FreeKDTree(KDTREE* Tree); /*----------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index); +KDNODE* MakeKDNode(KDTREE* tree, FLOAT32 Key[], void* Data, int Index); -void FreeKDNode(KDNODE *Node); +void FreeKDNode(KDNODE* Node); -FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]); +FLOAT32 +DistanceSquared(int k, PARAM_DESC* dim, FLOAT32 p1[], FLOAT32 p2[]); -FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]); +FLOAT32 +ComputeDistance(int k, PARAM_DESC* dim, FLOAT32 p1[], FLOAT32 p2[]); -int QueryInSearch(KDTREE *tree); +int QueryInSearch(KDTREE* tree); -void Walk(KDTREE *tree, void_proc action, void *context, - KDNODE *SubTree, int32_t Level); +void Walk(KDTREE* tree, void_proc action, void* context, KDNODE* SubTree, + int32_t Level); -void InsertNodes(KDTREE *tree, KDNODE *nodes); +void InsertNodes(KDTREE* tree, KDNODE* nodes); -void FreeSubTree(KDNODE *SubTree); +void FreeSubTree(KDNODE* SubTree); #endif diff --git a/src/classify/mastertrainer.cpp b/src/classify/mastertrainer.cpp index b90db311ea..706d34b632 100644 --- a/src/classify/mastertrainer.cpp +++ b/src/classify/mastertrainer.cpp @@ -24,7 +24,6 @@ #include "config_auto.h" #endif -#include "mastertrainer.h" #include #include #include "allheaders.h" @@ -33,6 +32,7 @@ #include "efio.h" #include "errorcounter.h" #include "featdefs.h" +#include "mastertrainer.h" #include "sampleiterator.h" #include "shapeclassifier.h" #include "shapetable.h" @@ -51,22 +51,22 @@ const int kMaxUnicharsPerCluster = 2000; // Mean font distance below which to merge fonts and unichars. const float kFontMergeDistance = 0.025; -MasterTrainer::MasterTrainer(NormalizationMode norm_mode, - bool shape_analysis, - bool replicate_samples, - int debug_level) - : norm_mode_(norm_mode), samples_(fontinfo_table_), - junk_samples_(fontinfo_table_), verify_samples_(fontinfo_table_), - charsetsize_(0), - enable_shape_anaylsis_(shape_analysis), - enable_replication_(replicate_samples), - fragments_(nullptr), prev_unichar_id_(-1), debug_level_(debug_level) { -} +MasterTrainer::MasterTrainer(NormalizationMode norm_mode, bool shape_analysis, + bool replicate_samples, int debug_level) + : norm_mode_(norm_mode), + samples_(fontinfo_table_), + junk_samples_(fontinfo_table_), + verify_samples_(fontinfo_table_), + charsetsize_(0), + enable_shape_anaylsis_(shape_analysis), + enable_replication_(replicate_samples), + fragments_(nullptr), + prev_unichar_id_(-1), + debug_level_(debug_level) {} MasterTrainer::~MasterTrainer() { - delete [] fragments_; - for (int p = 0; p < page_images_.size(); ++p) - pixDestroy(&page_images_[p]); + delete[] fragments_; + for (int p = 0; p < page_images_.size(); ++p) pixDestroy(&page_images_[p]); } // WARNING! Serialize/DeSerialize are only partial, providing @@ -89,9 +89,10 @@ bool MasterTrainer::Serialize(FILE* fp) const { // Load an initial unicharset, or set one up if the file cannot be read. void MasterTrainer::LoadUnicharset(const char* filename) { if (!unicharset_.load_from_file(filename)) { - tprintf("Failed to load unicharset from file %s\n" - "Building unicharset for training from scratch...\n", - filename); + tprintf( + "Failed to load unicharset from file %s\n" + "Building unicharset for training from scratch...\n", + filename); unicharset_.clear(); UNICHARSET initialized; // Add special characters, as they were removed by the clear, but the @@ -99,7 +100,7 @@ void MasterTrainer::LoadUnicharset(const char* filename) { unicharset_.AppendOtherUnicharset(initialized); } charsetsize_ = unicharset_.size(); - delete [] fragments_; + delete[] fragments_; fragments_ = new int[charsetsize_]; memset(fragments_, 0, sizeof(*fragments_) * charsetsize_); samples_.LoadUnicharset(filename); @@ -115,11 +116,14 @@ void MasterTrainer::ReadTrainingSamples(const char* page_name, const FEATURE_DEFS_STRUCT& feature_defs, bool verification) { char buffer[2048]; - const int int_feature_type = ShortNameToFeatureType(feature_defs, kIntFeatureType); - const int micro_feature_type = ShortNameToFeatureType(feature_defs, - kMicroFeatureType); - const int cn_feature_type = ShortNameToFeatureType(feature_defs, kCNFeatureType); - const int geo_feature_type = ShortNameToFeatureType(feature_defs, kGeoFeatureType); + const int int_feature_type = + ShortNameToFeatureType(feature_defs, kIntFeatureType); + const int micro_feature_type = + ShortNameToFeatureType(feature_defs, kMicroFeatureType); + const int cn_feature_type = + ShortNameToFeatureType(feature_defs, kCNFeatureType); + const int geo_feature_type = + ShortNameToFeatureType(feature_defs, kGeoFeatureType); FILE* fp = Efopen(page_name, "rb"); if (fp == nullptr) { @@ -128,8 +132,7 @@ void MasterTrainer::ReadTrainingSamples(const char* page_name, } tr_filenames_.push_back(STRING(page_name)); while (fgets(buffer, sizeof(buffer), fp) != nullptr) { - if (buffer[0] == '\n') - continue; + if (buffer[0] == '\n') continue; char* space = strchr(buffer, ' '); if (space == nullptr) { @@ -168,8 +171,7 @@ void MasterTrainer::AddSample(bool verification, const char* unichar, verify_samples_.AddSample(unichar, sample); prev_unichar_id_ = -1; } else if (unicharset_.contains_unichar(unichar)) { - if (prev_unichar_id_ >= 0) - fragments_[prev_unichar_id_] = -1; + if (prev_unichar_id_ >= 0) fragments_[prev_unichar_id_] = -1; prev_unichar_id_ = samples_.AddSample(unichar, sample); if (flat_shapes_.FindShape(prev_unichar_id_, sample->font_id()) < 0) flat_shapes_.AddShape(prev_unichar_id_, sample->font_id()); @@ -211,10 +213,8 @@ void MasterTrainer::LoadPageImages(const char* filename) { // Sets up the samples appropriately for class/fontwise access. // Deletes outlier samples. void MasterTrainer::PostLoadCleanup() { - if (debug_level_ > 0) - tprintf("PostLoadCleanup...\n"); - if (enable_shape_anaylsis_) - ReplaceFragmentedSamples(); + if (debug_level_ > 0) tprintf("PostLoadCleanup...\n"); + if (enable_shape_anaylsis_) ReplaceFragmentedSamples(); SampleIterator sample_it; sample_it.Init(nullptr, nullptr, true, &verify_samples_); sample_it.NormalizeSamples(); @@ -225,8 +225,7 @@ void MasterTrainer::PostLoadCleanup() { // against current training. // samples_.DeleteOutliers(feature_space_, debug_level_ > 0); samples_.OrganizeByFontAndClass(); - if (debug_level_ > 0) - tprintf("ComputeCanonicalSamples...\n"); + if (debug_level_ > 0) tprintf("ComputeCanonicalSamples...\n"); samples_.ComputeCanonicalSamples(feature_map_, debug_level_ > 0); } @@ -234,12 +233,10 @@ void MasterTrainer::PostLoadCleanup() { // ReadTrainingSamples+PostLoadCleanup or DeSerialize. // Re-indexes the features and computes canonical and cloud features. void MasterTrainer::PreTrainingSetup() { - if (debug_level_ > 0) - tprintf("PreTrainingSetup...\n"); + if (debug_level_ > 0) tprintf("PreTrainingSetup...\n"); samples_.IndexFeatures(feature_space_); samples_.ComputeCanonicalFeatures(); - if (debug_level_ > 0) - tprintf("ComputeCloudFeatures...\n"); + if (debug_level_ > 0) tprintf("ComputeCloudFeatures...\n"); samples_.ComputeCloudFeatures(feature_space_.Size()); } @@ -255,12 +252,11 @@ void MasterTrainer::SetupMasterShapes() { for (int c = 0; c < samples_.charsetsize(); ++c) { ShapeTable shapes(samples_.unicharset()); for (int f = 0; f < num_fonts; ++f) { - if (samples_.NumClassSamples(f, c, true) > 0) - shapes.AddShape(c, f); + if (samples_.NumClassSamples(f, c, true) > 0) shapes.AddShape(c, f); } ClusterShapes(kMinClusteredShapes, 1, kFontMergeDistance, &shapes); - const CHAR_FRAGMENT *fragment = samples_.unicharset().get_fragment(c); + const CHAR_FRAGMENT* fragment = samples_.unicharset().get_fragment(c); if (fragment == nullptr) char_shapes.AppendMasterShapes(shapes, nullptr); @@ -271,14 +267,14 @@ void MasterTrainer::SetupMasterShapes() { else char_shapes.AppendMasterShapes(shapes, nullptr); } - ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, - kFontMergeDistance, &char_shapes_begin_fragment); + ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, + &char_shapes_begin_fragment); char_shapes.AppendMasterShapes(char_shapes_begin_fragment, nullptr); - ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, - kFontMergeDistance, &char_shapes_end_fragment); + ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, + &char_shapes_end_fragment); char_shapes.AppendMasterShapes(char_shapes_end_fragment, nullptr); - ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, - kFontMergeDistance, &char_shapes); + ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, + &char_shapes); master_shapes_.AppendMasterShapes(char_shapes, nullptr); tprintf("Master shape_table:%s\n", master_shapes_.SummaryStr().string()); } @@ -305,8 +301,7 @@ void MasterTrainer::IncludeJunk() { int junk_id = sample->class_id(); const char* junk_utf8 = junk_set.id_to_unichar(junk_id); int sample_id = sample_set.unichar_to_id(junk_utf8); - if (sample_id == INVALID_UNICHAR_ID) - sample_id = 0; + if (sample_id == INVALID_UNICHAR_ID) sample_id = 0; sample->set_class_id(sample_id); junk_samples_.extract_sample(s); samples_.AddSample(sample_id, sample); @@ -322,8 +317,7 @@ void MasterTrainer::IncludeJunk() { // samples. void MasterTrainer::ReplicateAndRandomizeSamplesIfRequired() { if (enable_replication_) { - if (debug_level_ > 0) - tprintf("ReplicateAndRandomize...\n"); + if (debug_level_ > 0) tprintf("ReplicateAndRandomize...\n"); verify_samples_.ReplicateAndRandomizeSamples(); samples_.ReplicateAndRandomizeSamples(); samples_.IndexFeatures(feature_space_); @@ -350,12 +344,8 @@ bool MasterTrainer::LoadFontInfo(const char* filename) { delete[] font_name; continue; } - fontinfo.properties = - (italic << 0) + - (bold << 1) + - (fixed << 2) + - (serif << 3) + - (fraktur << 4); + fontinfo.properties = (italic << 0) + (bold << 1) + (fixed << 2) + + (serif << 3) + (fraktur << 4); if (!fontinfo_table_.contains(fontinfo)) { fontinfo_table_.push_back(fontinfo); } else { @@ -372,7 +362,7 @@ bool MasterTrainer::LoadXHeights(const char* filename) { tprintf("fontinfo table is of size %d\n", fontinfo_table_.size()); xheights_.init_to_size(fontinfo_table_.size(), -1); if (filename == nullptr) return true; - FILE *f = fopen(filename, "rb"); + FILE* f = fopen(filename, "rb"); if (f == nullptr) { fprintf(stderr, "Failed to load font xheights from %s\n", filename); return false; @@ -386,8 +376,7 @@ bool MasterTrainer::LoadXHeights(const char* filename) { int total_xheight = 0; int xheight_count = 0; while (!feof(f)) { - if (tfscanf(f, "%1023s %d\n", buffer, &xht) != 2) - continue; + if (tfscanf(f, "%1023s %d\n", buffer, &xht) != 2) continue; buffer[1023] = '\0'; fontinfo.name = buffer; if (!fontinfo_table_.contains(fontinfo)) continue; @@ -403,15 +392,14 @@ bool MasterTrainer::LoadXHeights(const char* filename) { } int mean_xheight = DivRounded(total_xheight, xheight_count); for (int i = 0; i < fontinfo_table_.size(); ++i) { - if (xheights_[i] < 0) - xheights_[i] = mean_xheight; + if (xheights_[i] < 0) xheights_[i] = mean_xheight; } fclose(f); return true; } // LoadXHeights // Reads spacing stats from filename and adds them to fontinfo_table. -bool MasterTrainer::AddSpacingInfo(const char *filename) { +bool MasterTrainer::AddSpacingInfo(const char* filename) { FILE* fontinfo_file = fopen(filename, "rb"); if (fontinfo_file == nullptr) return true; // We silently ignore missing files! @@ -431,12 +419,12 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) { char kerned_uch[UNICHAR_LEN]; int x_gap, x_gap_before, x_gap_after, num_kerned; ASSERT_HOST(tfscanf(fontinfo_file, "%d\n", &num_unichars) == 1); - FontInfo *fi = &fontinfo_table_.get(fontinfo_id); + FontInfo* fi = &fontinfo_table_.get(fontinfo_id); fi->init_spacing(unicharset_.size()); - FontSpacingInfo *spacing = nullptr; + FontSpacingInfo* spacing = nullptr; for (int l = 0; l < num_unichars; ++l) { - if (tfscanf(fontinfo_file, "%s %d %d %d", - uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) { + if (tfscanf(fontinfo_file, "%s %d %d %d", uch, &x_gap_before, &x_gap_after, + &num_kerned) != 4) { tprintf("Bad format of font spacing file %s\n", filename); fclose(fontinfo_file); return false; @@ -506,11 +494,9 @@ void MasterTrainer::SetupFlatShapeTable(ShapeTable* shape_table) { int font = flat_shapes_.GetShape(s)[0].font_ids[0]; int f = 0; for (f = 0; f < active_fonts.size(); ++f) { - if (active_fonts[f] == font) - break; + if (active_fonts[f] == font) break; } - if (f == active_fonts.size()) - active_fonts.push_back(font); + if (f == active_fonts.size()) active_fonts.push_back(font); } // For each font in order, add all the shapes with that font in reverse order. int num_fonts = active_fonts.size(); @@ -527,11 +513,8 @@ void MasterTrainer::SetupFlatShapeTable(ShapeTable* shape_table) { // Sets up a Clusterer for mftraining on a single shape_id. // Call FreeClusterer on the return value after use. CLUSTERER* MasterTrainer::SetupForClustering( - const ShapeTable& shape_table, - const FEATURE_DEFS_STRUCT& feature_defs, - int shape_id, - int* num_samples) { - + const ShapeTable& shape_table, const FEATURE_DEFS_STRUCT& feature_defs, + int shape_id, int* num_samples) { int desc_index = ShortNameToFeatureType(feature_defs, kMicroFeatureType); int num_params = feature_defs.FeatureDesc[desc_index]->NumParams; ASSERT_HOST(num_params == MFCount); @@ -572,11 +555,11 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET& unicharset, CLASS_STRUCT* float_classes, const char* inttemp_file, const char* pffmtable_file) { - tesseract::Classify *classify = new tesseract::Classify(); + tesseract::Classify* classify = new tesseract::Classify(); // Move the fontinfo table to classify. fontinfo_table_.MoveTo(&classify->get_fontinfo_table()); - INT_TEMPLATES int_templates = classify->CreateIntTemplates(float_classes, - shape_set); + INT_TEMPLATES int_templates = + classify->CreateIntTemplates(float_classes, shape_set); FILE* fp = fopen(inttemp_file, "wb"); classify->WriteIntTemplates(fp, int_templates, shape_set); fclose(fp); @@ -587,8 +570,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET& unicharset, // unicharset cutoffs along the way. GenericVector shapetable_cutoffs; GenericVector unichar_cutoffs; - for (int c = 0; c < unicharset.size(); ++c) - unichar_cutoffs.push_back(0); + for (int c = 0; c < unicharset.size(); ++c) unichar_cutoffs.push_back(0); /* then write out each class */ for (int i = 0; i < int_templates->NumClasses; ++i) { INT_CLASS Class = ClassForClassId(int_templates, i); @@ -599,8 +581,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET& unicharset, // Todo: Test with min instead of max // if (LengthForConfigId (Class, config_id) < MaxLength) uint16_t length = Class->ConfigLengths[config_id]; - if (length > max_length) - max_length = Class->ConfigLengths[config_id]; + if (length > max_length) max_length = Class->ConfigLengths[config_id]; int shape_id = float_classes[i].font_set.get(config_id); const Shape& shape = shape_table.GetShape(shape_id); for (int c = 0; c < shape.size(); ++c) { @@ -614,7 +595,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET& unicharset, fp = fopen(pffmtable_file, "wb"); shapetable_cutoffs.Serialize(fp); for (int c = 0; c < unicharset.size(); ++c) { - const char *unichar = unicharset.id_to_unichar(c); + const char* unichar = unicharset.id_to_unichar(c); if (strcmp(unichar, " ") == 0) { unichar = "NULL"; } @@ -631,14 +612,13 @@ void MasterTrainer::DebugCanonical(const char* unichar_str1, const char* unichar_str2) { int class_id1 = unicharset_.unichar_to_id(unichar_str1); int class_id2 = unicharset_.unichar_to_id(unichar_str2); - if (class_id2 == INVALID_UNICHAR_ID) - class_id2 = class_id1; + if (class_id2 == INVALID_UNICHAR_ID) class_id2 = class_id1; if (class_id1 == INVALID_UNICHAR_ID) { tprintf("No unicharset entry found for %s\n", unichar_str1); return; } else { - tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", - class_id1, unichar_str1, class_id2, unichar_str2); + tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", class_id1, + unichar_str1, class_id2, unichar_str2); } int num_fonts = samples_.NumFonts(); const IntFeatureMap& feature_map = feature_map_; @@ -646,21 +626,18 @@ void MasterTrainer::DebugCanonical(const char* unichar_str1, // class. tprintf(" "); for (int f = 0; f < num_fonts; ++f) { - if (samples_.NumClassSamples(f, class_id2, false) == 0) - continue; + if (samples_.NumClassSamples(f, class_id2, false) == 0) continue; tprintf("%6d", f); } tprintf("\n"); for (int f1 = 0; f1 < num_fonts; ++f1) { // Map the features of the canonical_sample. - if (samples_.NumClassSamples(f1, class_id1, false) == 0) - continue; + if (samples_.NumClassSamples(f1, class_id1, false) == 0) continue; tprintf("%4d ", f1); for (int f2 = 0; f2 < num_fonts; ++f2) { - if (samples_.NumClassSamples(f2, class_id2, false) == 0) - continue; - float dist = samples_.ClusterDistance(f1, class_id1, f2, class_id2, - feature_map); + if (samples_.NumClassSamples(f2, class_id2, false) == 0) continue; + float dist = + samples_.ClusterDistance(f1, class_id1, f2, class_id2, feature_map); tprintf(" %5.3f", dist); } tprintf("\n"); @@ -697,8 +674,8 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font, f_window); int class_id2 = samples_.unicharset().unichar_to_id(unichar_str2); if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) { - const TrainingSample* sample = samples_.GetCanonicalSample(canonical_font, - class_id2); + const TrainingSample* sample = + samples_.GetCanonicalSample(canonical_font, class_id2); for (int f = 0; f < sample->num_features(); ++f) { RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED); } @@ -708,8 +685,7 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font, const BitVector& cloud = samples_.GetCloudFeatures(cloud_font, class_id1); for (int f = 0; f < cloud.size(); ++f) { if (cloud[f]) { - INT_FEATURE_STRUCT feature = - feature_map.InverseIndexFeature(f); + INT_FEATURE_STRUCT feature = feature_map.InverseIndexFeature(f); RenderIntFeature(f_window, &feature, ScrollView::GREEN); } } @@ -729,9 +705,8 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font, Shape shape; shape.AddToShape(class_id1, cloud_font); s_window->Clear(); - samples_.DisplaySamplesWithFeature(feature_index, shape, - feature_space, ScrollView::GREEN, - s_window); + samples_.DisplaySamplesWithFeature(feature_index, shape, feature_space, + ScrollView::GREEN, s_window); s_window->Update(); } } @@ -774,8 +749,7 @@ void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, // sample including replicated and systematically perturbed samples. // If report_string is non-nullptr, a summary of the results for each font // is appended to the report_string. -double MasterTrainer::TestClassifier(CountTypes error_mode, - int report_level, +double MasterTrainer::TestClassifier(CountTypes error_mode, int report_level, bool replicate_samples, TrainingSampleSet* samples, ShapeClassifier* test_classifier, @@ -784,18 +758,16 @@ double MasterTrainer::TestClassifier(CountTypes error_mode, sample_it.Init(nullptr, nullptr, replicate_samples, samples); if (report_level > 0) { int num_samples = 0; - for (sample_it.Begin(); !sample_it.AtEnd(); sample_it.Next()) - ++num_samples; + for (sample_it.Begin(); !sample_it.AtEnd(); sample_it.Next()) ++num_samples; tprintf("Iterator has charset size of %d/%d, %d shapes, %d samples\n", sample_it.SparseCharsetSize(), sample_it.CompactCharsetSize(), test_classifier->GetShapeTable()->NumShapes(), num_samples); tprintf("Testing %sREPLICATED:\n", replicate_samples ? "" : "NON-"); } double unichar_error = 0.0; - ErrorCounter::ComputeErrorRate(test_classifier, report_level, - error_mode, fontinfo_table_, - page_images_, &sample_it, &unichar_error, - nullptr, report_string); + ErrorCounter::ComputeErrorRate(test_classifier, report_level, error_mode, + fontinfo_table_, page_images_, &sample_it, + &unichar_error, nullptr, report_string); return unichar_error; } @@ -814,16 +786,16 @@ float MasterTrainer::ShapeDistance(const ShapeTable& shapes, int s1, int s2) { // distances between characters of matching font where possible. for (int c1 = 0; c1 < num_chars1; ++c1) { for (int c2 = 0; c2 < num_chars2; ++c2) { - dist_sum += samples_.UnicharDistance(shape1[c1], shape2[c2], - true, feature_map); + dist_sum += + samples_.UnicharDistance(shape1[c1], shape2[c2], true, feature_map); ++dist_count; } } } else { // In the single unichar case, there is little alternative, but to compute // the squared-order distance between pairs of fonts. - dist_sum = samples_.UnicharDistance(shape1[0], shape2[0], - false, feature_map); + dist_sum = + samples_.UnicharDistance(shape1[0], shape2[0], false, feature_map); ++dist_count; } return dist_sum / dist_count; @@ -838,8 +810,7 @@ void MasterTrainer::ReplaceFragmentedSamples() { int num_samples = samples_.num_samples(); for (int s = 0; s < num_samples; ++s) { TrainingSample* sample = samples_.mutable_sample(s); - if (fragments_[sample->class_id()] > 0) - samples_.KillSample(sample); + if (fragments_[sample->class_id()] > 0) samples_.KillSample(sample); } samples_.DeleteDeadSamples(); @@ -888,7 +859,7 @@ void MasterTrainer::ReplaceFragmentedSamples() { unicharset_.AppendOtherUnicharset(samples_.unicharset()); // delete [] good_junk; // Fragments_ no longer needed? - delete [] fragments_; + delete[] fragments_; fragments_ = nullptr; } @@ -898,7 +869,7 @@ void MasterTrainer::ReplaceFragmentedSamples() { // * No shape shall have more than max_shape_unichars in it, // * Don't merge shapes where the distance between them exceeds max_dist. const float kInfiniteDist = 999.0f; -void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, +void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float max_dist, ShapeTable* shapes) { int num_shapes = shapes->NumShapes(); int max_merges = num_shapes - min_shapes; @@ -938,7 +909,7 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, if (!shape_dists[s].empty()) { shape_dists[s][min_s1 - s - 1].distance = ShapeDistance(*shapes, s, min_s1); - shape_dists[s][min_s2 - s -1].distance = kInfiniteDist; + shape_dists[s][min_s2 - s - 1].distance = kInfiniteDist; } } for (int s2 = min_s1 + 1; s2 < num_shapes; ++s2) { @@ -964,7 +935,7 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, } } tprintf("Stopped with %d merged, min dist %f\n", num_merged, min_dist); - delete [] shape_dists; + delete[] shape_dists; if (debug_level_ > 1) { for (int s1 = 0; s1 < num_shapes; ++s1) { if (shapes->MasterDestinationIndex(s1) == s1) { @@ -974,5 +945,4 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, } } - } // namespace tesseract. diff --git a/src/classify/mastertrainer.h b/src/classify/mastertrainer.h index 12afe27cd3..b3249acf74 100644 --- a/src/classify/mastertrainer.h +++ b/src/classify/mastertrainer.h @@ -27,14 +27,14 @@ ----------------------------------------------------------------------------**/ #include "classify.h" #include "cluster.h" -#include "intfx.h" #include "elst.h" #include "errorcounter.h" #include "featdefs.h" #include "fontinfo.h" #include "indexmapbidi.h" -#include "intfeaturespace.h" #include "intfeaturemap.h" +#include "intfeaturespace.h" +#include "intfx.h" #include "intmatcher.h" #include "params.h" #include "shapetable.h" @@ -50,7 +50,7 @@ class ShapeClassifier; struct ShapeDist { ShapeDist() : shape1(0), shape2(0), distance(0.0f) {} ShapeDist(int s1, int s2, float dist) - : shape1(s1), shape2(s2), distance(dist) {} + : shape1(s1), shape2(s2), distance(dist) {} // Sort operator to sort in ascending order of distance. bool operator<(const ShapeDist& other) const { @@ -147,7 +147,7 @@ class MasterTrainer { // Reads spacing stats from filename and adds them to fontinfo_table. // Returns false on failure. - bool AddSpacingInfo(const char *filename); + bool AddSpacingInfo(const char* filename); // Returns the font id corresponding to the given font name. // Returns -1 if the font cannot be found. @@ -159,9 +159,7 @@ class MasterTrainer { // Returns the filename of the tr file corresponding to the command-line // argument with the given index. - const STRING& GetTRFileName(int index) const { - return tr_filenames_[index]; - } + const STRING& GetTRFileName(int index) const { return tr_filenames_[index]; } // Sets up a flat shapetable with one shape per class/font combination. void SetupFlatShapeTable(ShapeTable* shape_table); @@ -183,20 +181,14 @@ class MasterTrainer { const char* inttemp_file, const char* pffmtable_file); - const UNICHARSET& unicharset() const { - return samples_.unicharset(); - } - TrainingSampleSet* GetSamples() { - return &samples_; - } - const ShapeTable& master_shapes() const { - return master_shapes_; - } + const UNICHARSET& unicharset() const { return samples_.unicharset(); } + TrainingSampleSet* GetSamples() { return &samples_; } + const ShapeTable& master_shapes() const { return master_shapes_; } // Generates debug output relating to the canonical distance between the // two given UTF8 grapheme strings. void DebugCanonical(const char* unichar_str1, const char* unichar_str2); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED // Debugging for cloud/canonical features. // Displays a Features window containing: // If unichar_str2 is in the unicharset, and canonical_font is non-negative, @@ -209,7 +201,7 @@ class MasterTrainer { // will display the samples that have that feature in a separate window. void DisplaySamples(const char* unichar_str1, int cloud_font, const char* unichar_str2, int canonical_font); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED void TestClassifierVOld(bool replicate_samples, ShapeClassifier* test_classifier, @@ -217,8 +209,7 @@ class MasterTrainer { // Tests the given test_classifier on the internal samples. // See TestClassifier for details. - void TestClassifierOnSamples(CountTypes error_mode, - int report_level, + void TestClassifierOnSamples(CountTypes error_mode, int report_level, bool replicate_samples, ShapeClassifier* test_classifier, STRING* report_string); @@ -235,10 +226,8 @@ class MasterTrainer { // sample including replicated and systematically perturbed samples. // If report_string is non-nullptr, a summary of the results for each font // is appended to the report_string. - double TestClassifier(CountTypes error_mode, - int report_level, - bool replicate_samples, - TrainingSampleSet* samples, + double TestClassifier(CountTypes error_mode, int report_level, + bool replicate_samples, TrainingSampleSet* samples, ShapeClassifier* test_classifier, STRING* report_string); @@ -257,8 +246,8 @@ class MasterTrainer { // * End with at least min_shapes left in shape_table, // * No shape shall have more than max_shape_unichars in it, // * Don't merge shapes where the distance between them exceeds max_dist. - void ClusterShapes(int min_shapes, int max_shape_unichars, - float max_dist, ShapeTable* shape_table); + void ClusterShapes(int min_shapes, int max_shape_unichars, float max_dist, + ShapeTable* shape_table); private: NormalizationMode norm_mode_; diff --git a/src/classify/mf.cpp b/src/classify/mf.cpp index 51e46ce317..d2d07dbaf7 100644 --- a/src/classify/mf.cpp +++ b/src/classify/mf.cpp @@ -42,7 +42,8 @@ * @note Exceptions: none * @note History: Wed May 23 18:06:38 1990, DSJ, Created. */ -FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) { +FEATURE_SET +ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) { int NumFeatures; MICROFEATURES Features, OldFeatures; FEATURE_SET FeatureSet; @@ -50,15 +51,14 @@ FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) { MICROFEATURE OldFeature; OldFeatures = BlobMicroFeatures(Blob, cn_denorm); - if (OldFeatures == nullptr) - return nullptr; - NumFeatures = count (OldFeatures); - FeatureSet = NewFeatureSet (NumFeatures); + if (OldFeatures == nullptr) return nullptr; + NumFeatures = count(OldFeatures); + FeatureSet = NewFeatureSet(NumFeatures); Features = OldFeatures; iterate(Features) { - OldFeature = (MICROFEATURE) first_node (Features); - Feature = NewFeature (&MicroFeatureDesc); + OldFeature = (MICROFEATURE)first_node(Features); + Feature = NewFeature(&MicroFeatureDesc); Feature->Params[MFDirection] = OldFeature[ORIENTATION]; Feature->Params[MFXPosition] = OldFeature[XPOSITION]; Feature->Params[MFYPosition] = OldFeature[YPOSITION]; @@ -80,4 +80,4 @@ FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) { } FreeMicroFeatures(OldFeatures); return FeatureSet; -} /* ExtractMicros */ +} /* ExtractMicros */ diff --git a/src/classify/mf.h b/src/classify/mf.h index 4c06a5625b..225f05b610 100644 --- a/src/classify/mf.h +++ b/src/classify/mf.h @@ -15,18 +15,22 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef MF_H -#define MF_H +#ifndef MF_H +#define MF_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "ocrfeatures.h" #include "blobs.h" +#include "ocrfeatures.h" typedef enum { - MFXPosition, MFYPosition, - MFLength, MFDirection, MFBulge1, MFBulge2, + MFXPosition, + MFYPosition, + MFLength, + MFDirection, + MFBulge1, + MFBulge2, MFCount // For array sizes. } MF_PARAM_NAME; @@ -34,6 +38,7 @@ typedef float MicroFeature[MFCount]; /*---------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm); +FEATURE_SET +ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm); #endif diff --git a/src/classify/mfdefs.cpp b/src/classify/mfdefs.cpp index 34478a57a2..cb359408dd 100644 --- a/src/classify/mfdefs.cpp +++ b/src/classify/mfdefs.cpp @@ -19,8 +19,8 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "mfdefs.h" -#include "emalloc.h" #include +#include "emalloc.h" /*---------------------------------------------------------------------------- Public Code @@ -32,10 +32,10 @@ * @return New MICROFEATURE * @note History: 7/27/89, DSJ, Created. */ -MICROFEATURE NewMicroFeature() { - return ((MICROFEATURE) Emalloc (sizeof (MFBLOCK))); -} /* NewMicroFeature */ - +MICROFEATURE +NewMicroFeature() { + return ((MICROFEATURE)Emalloc(sizeof(MFBLOCK))); +} /* NewMicroFeature */ /*---------------------------------------------------------------------------*/ /** @@ -47,4 +47,4 @@ MICROFEATURE NewMicroFeature() { */ void FreeMicroFeatures(MICROFEATURES MicroFeatures) { destroy_nodes(MicroFeatures, Efree); -} /* FreeMicroFeatures */ +} /* FreeMicroFeatures */ diff --git a/src/classify/mfdefs.h b/src/classify/mfdefs.h index 20a3e2189b..dc7f020a3f 100644 --- a/src/classify/mfdefs.h +++ b/src/classify/mfdefs.h @@ -15,42 +15,43 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef MFDEFS_H -#define MFDEFS_H +#ifndef MFDEFS_H +#define MFDEFS_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "oldlist.h" #include "matchdefs.h" +#include "oldlist.h" /* definition of a list of micro-features */ typedef LIST MICROFEATURES; /* definition of structure of micro-features */ -#define MFSIZE 6 +#define MFSIZE 6 typedef FLOAT32 MFBLOCK[MFSIZE]; -typedef FLOAT32 *MICROFEATURE; +typedef FLOAT32* MICROFEATURE; /* definitions of individual micro-feature parameters */ -#define XPOSITION 0 -#define YPOSITION 1 -#define MFLENGTH 2 -#define ORIENTATION 3 -#define FIRSTBULGE 4 -#define SECONDBULGE 5 +#define XPOSITION 0 +#define YPOSITION 1 +#define MFLENGTH 2 +#define ORIENTATION 3 +#define FIRSTBULGE 4 +#define SECONDBULGE 5 /**---------------------------------------------------------------------------- Macros ----------------------------------------------------------------------------**/ /* macros for accessing micro-feature lists */ -#define NextFeatureOf(L) ( (MICROFEATURE) first_node ( L ) ) +#define NextFeatureOf(L) ((MICROFEATURE)first_node(L)) /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -MICROFEATURE NewMicroFeature(); +MICROFEATURE +NewMicroFeature(); void FreeMicroFeatures(MICROFEATURES MicroFeatures); #endif diff --git a/src/classify/mfoutline.cpp b/src/classify/mfoutline.cpp index e7e3625490..9f6ce57f0f 100644 --- a/src/classify/mfoutline.cpp +++ b/src/classify/mfoutline.cpp @@ -18,14 +18,14 @@ /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ -#include "clusttool.h" //If remove you get cought in a loop somewhere -#include "emalloc.h" #include "mfoutline.h" #include "blobs.h" +#include "classify.h" +#include "clusttool.h" //If remove you get cought in a loop somewhere #include "const.h" +#include "emalloc.h" #include "mfx.h" #include "params.h" -#include "classify.h" #include #include @@ -37,25 +37,23 @@ /*---------------------------------------------------------------------------*/ /** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). */ -LIST ConvertBlob(TBLOB *blob) { +LIST ConvertBlob(TBLOB* blob) { LIST outlines = NIL_LIST; - return (blob == nullptr) - ? NIL_LIST - : ConvertOutlines(blob->outlines, outlines, outer); + return (blob == nullptr) ? NIL_LIST + : ConvertOutlines(blob->outlines, outlines, outer); } - /*---------------------------------------------------------------------------*/ /** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */ -MFOUTLINE ConvertOutline(TESSLINE *outline) { - MFEDGEPT *NewPoint; +MFOUTLINE +ConvertOutline(TESSLINE* outline) { + MFEDGEPT* NewPoint; MFOUTLINE MFOutline = NIL_LIST; - EDGEPT *EdgePoint; - EDGEPT *StartPoint; - EDGEPT *NextPoint; + EDGEPT* EdgePoint; + EDGEPT* StartPoint; + EDGEPT* NextPoint; - if (outline == nullptr || outline->loop == nullptr) - return MFOutline; + if (outline == nullptr || outline->loop == nullptr) return MFOutline; StartPoint = outline->loop; EdgePoint = StartPoint; @@ -75,12 +73,10 @@ MFOUTLINE ConvertOutline(TESSLINE *outline) { EdgePoint = NextPoint; } while (EdgePoint != StartPoint); - if (MFOutline != nullptr) - MakeOutlineCircular(MFOutline); + if (MFOutline != nullptr) MakeOutlineCircular(MFOutline); return MFOutline; } - /*---------------------------------------------------------------------------*/ /** * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). @@ -89,15 +85,13 @@ MFOUTLINE ConvertOutline(TESSLINE *outline) { * @param mf_outlines list to add converted outlines to * @param outline_type are the outlines outer or holes? */ -LIST ConvertOutlines(TESSLINE *outline, - LIST mf_outlines, +LIST ConvertOutlines(TESSLINE* outline, LIST mf_outlines, OUTLINETYPE outline_type) { MFOUTLINE mf_outline; while (outline != nullptr) { mf_outline = ConvertOutline(outline); - if (mf_outline != nullptr) - mf_outlines = push(mf_outlines, mf_outline); + if (mf_outline != nullptr) mf_outlines = push(mf_outlines, mf_outline); outline = outline->next; } return mf_outlines; @@ -118,30 +112,26 @@ LIST ConvertOutlines(TESSLINE *outline, * @note Exceptions: none * @note History: 7/21/89, DSJ, Created. */ -void FindDirectionChanges(MFOUTLINE Outline, - FLOAT32 MinSlope, +void FindDirectionChanges(MFOUTLINE Outline, FLOAT32 MinSlope, FLOAT32 MaxSlope) { - MFEDGEPT *Current; - MFEDGEPT *Last; + MFEDGEPT* Current; + MFEDGEPT* Last; MFOUTLINE EdgePoint; - if (DegenerateOutline (Outline)) - return; + if (DegenerateOutline(Outline)) return; - Last = PointAt (Outline); - Outline = NextPointAfter (Outline); + Last = PointAt(Outline); + Outline = NextPointAfter(Outline); EdgePoint = Outline; do { - Current = PointAt (EdgePoint); + Current = PointAt(EdgePoint); ComputeDirection(Last, Current, MinSlope, MaxSlope); Last = Current; - EdgePoint = NextPointAfter (EdgePoint); - } - while (EdgePoint != Outline); - -} /* FindDirectionChanges */ + EdgePoint = NextPointAfter(EdgePoint); + } while (EdgePoint != Outline); +} /* FindDirectionChanges */ /*---------------------------------------------------------------------------*/ /** @@ -152,20 +142,19 @@ void FindDirectionChanges(MFOUTLINE Outline, * @note Exceptions: none * @note History: 7/27/89, DSJ, Created. */ -void FreeMFOutline(void *arg) { //MFOUTLINE Outline) +void FreeMFOutline(void* arg) { // MFOUTLINE Outline) MFOUTLINE Start; - MFOUTLINE Outline = (MFOUTLINE) arg; + MFOUTLINE Outline = (MFOUTLINE)arg; /* break the circular outline so we can use std. techniques to deallocate */ - Start = list_rest (Outline); + Start = list_rest(Outline); set_rest(Outline, NIL_LIST); while (Start != nullptr) { free(first_node(Start)); - Start = pop (Start); + Start = pop(Start); } -} /* FreeMFOutline */ - +} /* FreeMFOutline */ /*---------------------------------------------------------------------------*/ /** @@ -178,8 +167,7 @@ void FreeMFOutline(void *arg) { //MFOUTLINE Outline */ void FreeOutlines(LIST Outlines) { destroy_nodes(Outlines, FreeMFOutline); -} /* FreeOutlines */ - +} /* FreeOutlines */ /*---------------------------------------------------------------------------*/ /** @@ -201,25 +189,22 @@ void MarkDirectionChanges(MFOUTLINE Outline) { MFOUTLINE Last; MFOUTLINE First; - if (DegenerateOutline (Outline)) - return; + if (DegenerateOutline(Outline)) return; - First = NextDirectionChange (Outline); + First = NextDirectionChange(Outline); Last = First; do { - Current = NextDirectionChange (Last); - MarkPoint (PointAt (Current)); + Current = NextDirectionChange(Last); + MarkPoint(PointAt(Current)); Last = Current; - } - while (Last != First); - -} /* MarkDirectionChanges */ + } while (Last != First); +} /* MarkDirectionChanges */ /*---------------------------------------------------------------------------*/ /** Return a new edge point for a micro-feature outline. */ -MFEDGEPT *NewEdgePoint() { - return reinterpret_cast(malloc(sizeof(MFEDGEPT))); +MFEDGEPT* NewEdgePoint() { + return reinterpret_cast(malloc(sizeof(MFEDGEPT))); } /*---------------------------------------------------------------------------*/ @@ -235,15 +220,15 @@ MFEDGEPT *NewEdgePoint() { * @note Exceptions: none * @note History: 7/26/89, DSJ, Created. */ -MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { +MFOUTLINE +NextExtremity(MFOUTLINE EdgePoint) { EdgePoint = NextPointAfter(EdgePoint); while (!PointAt(EdgePoint)->ExtremityMark) EdgePoint = NextPointAfter(EdgePoint); return (EdgePoint); -} /* NextExtremity */ - +} /* NextExtremity */ /*---------------------------------------------------------------------------*/ /** @@ -261,21 +246,18 @@ MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { * @note Exceptions: none * @note History: 8/2/89, DSJ, Created. */ -void NormalizeOutline(MFOUTLINE Outline, - FLOAT32 XOrigin) { - if (Outline == NIL_LIST) - return; +void NormalizeOutline(MFOUTLINE Outline, FLOAT32 XOrigin) { + if (Outline == NIL_LIST) return; MFOUTLINE EdgePoint = Outline; do { - MFEDGEPT *Current = PointAt(EdgePoint); - Current->Point.y = MF_SCALE_FACTOR * - (Current->Point.y - kBlnBaselineOffset); + MFEDGEPT* Current = PointAt(EdgePoint); + Current->Point.y = + MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset); Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); EdgePoint = NextPointAfter(EdgePoint); } while (EdgePoint != Outline); -} /* NormalizeOutline */ - +} /* NormalizeOutline */ /*---------------------------------------------------------------------------*/ namespace tesseract { @@ -297,9 +279,8 @@ namespace tesseract { * @note Exceptions: none * @note History: Fri Dec 14 08:14:55 1990, DSJ, Created. */ -void Classify::NormalizeOutlines(LIST Outlines, - FLOAT32 *XScale, - FLOAT32 *YScale) { +void Classify::NormalizeOutlines(LIST Outlines, FLOAT32* XScale, + FLOAT32* YScale) { MFOUTLINE Outline; switch (classify_norm_method) { @@ -309,13 +290,13 @@ void Classify::NormalizeOutlines(LIST Outlines, case baseline: iterate(Outlines) { - Outline = (MFOUTLINE) first_node(Outlines); + Outline = (MFOUTLINE)first_node(Outlines); NormalizeOutline(Outline, 0.0); } *XScale = *YScale = MF_SCALE_FACTOR; break; } -} /* NormalizeOutlines */ +} /* NormalizeOutlines */ } // namespace tesseract /*---------------------------------------------------------------------------- @@ -337,12 +318,12 @@ void Classify::NormalizeOutlines(LIST Outlines, void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { MFOUTLINE Current; - for (Current = Start; Current != End; Current = NextPointAfter (Current)) - PointAt (Current)->Direction = Direction; + for (Current = Start; Current != End; Current = NextPointAfter(Current)) + PointAt(Current)->Direction = Direction; - PointAt (End)->PreviousDirection = Direction; + PointAt(End)->PreviousDirection = Direction; -} /* ChangeDirection */ +} /* ChangeDirection */ /** * This routine normalizes each point in Outline by @@ -357,10 +338,9 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { */ void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { MFOUTLINE First, Current; - MFEDGEPT *CurrentPoint; + MFEDGEPT* CurrentPoint; - if (Outline == NIL_LIST) - return; + if (Outline == NIL_LIST) return; First = Outline; Current = First; @@ -372,10 +352,9 @@ void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR; Current = NextPointAfter(Current); - } - while (Current != First); + } while (Current != First); -} /* CharNormalizeOutline */ +} /* CharNormalizeOutline */ /** * This routine computes the slope from Start to Finish and @@ -396,23 +375,20 @@ void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { * @note Exceptions: none * @note History: 7/25/89, DSJ, Created. */ -void ComputeDirection(MFEDGEPT *Start, - MFEDGEPT *Finish, - FLOAT32 MinSlope, +void ComputeDirection(MFEDGEPT* Start, MFEDGEPT* Finish, FLOAT32 MinSlope, FLOAT32 MaxSlope) { FVECTOR Delta; Delta.x = Finish->Point.x - Start->Point.x; Delta.y = Finish->Point.y - Start->Point.y; if (Delta.x == 0) - if (Delta.y < 0) { - Start->Slope = -MAX_FLOAT32; - Start->Direction = south; - } - else { - Start->Slope = MAX_FLOAT32; - Start->Direction = north; - } + if (Delta.y < 0) { + Start->Slope = -MAX_FLOAT32; + Start->Direction = south; + } else { + Start->Slope = MAX_FLOAT32; + Start->Direction = north; + } else { Start->Slope = Delta.y / Delta.x; if (Delta.x > 0) @@ -420,30 +396,30 @@ void ComputeDirection(MFEDGEPT *Start, if (Start->Slope > MinSlope) if (Start->Slope < MaxSlope) Start->Direction = northeast; - else - Start->Direction = north; - else - Start->Direction = east; - else if (Start->Slope < -MinSlope) - if (Start->Slope > -MaxSlope) - Start->Direction = southeast; - else - Start->Direction = south; - else - Start->Direction = east; + else + Start->Direction = north; + else + Start->Direction = east; + else if (Start->Slope < -MinSlope) + if (Start->Slope > -MaxSlope) + Start->Direction = southeast; + else + Start->Direction = south; + else + Start->Direction = east; else if (Delta.y > 0) - if (Start->Slope < -MinSlope) - if (Start->Slope > -MaxSlope) - Start->Direction = northwest; - else - Start->Direction = north; - else - Start->Direction = west; + if (Start->Slope < -MinSlope) + if (Start->Slope > -MaxSlope) + Start->Direction = northwest; + else + Start->Direction = north; + else + Start->Direction = west; else if (Start->Slope > MinSlope) - if (Start->Slope < MaxSlope) - Start->Direction = southwest; - else - Start->Direction = south; + if (Start->Slope < MaxSlope) + Start->Direction = southwest; + else + Start->Direction = south; else Start->Direction = west; } @@ -461,18 +437,19 @@ void ComputeDirection(MFEDGEPT *Start, * @note Exceptions: none * @note History: 7/25/89, DSJ, Created. */ -MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { +MFOUTLINE +NextDirectionChange(MFOUTLINE EdgePoint) { DIRECTION InitialDirection; - InitialDirection = PointAt (EdgePoint)->Direction; + InitialDirection = PointAt(EdgePoint)->Direction; MFOUTLINE next_pt = nullptr; do { EdgePoint = NextPointAfter(EdgePoint); next_pt = NextPointAfter(EdgePoint); } while (PointAt(EdgePoint)->Direction == InitialDirection && - !PointAt(EdgePoint)->Hidden && - next_pt != nullptr && !PointAt(next_pt)->Hidden); + !PointAt(EdgePoint)->Hidden && next_pt != nullptr && + !PointAt(next_pt)->Hidden); return (EdgePoint); } diff --git a/src/classify/mfoutline.h b/src/classify/mfoutline.h index 750d83911c..abaa08c058 100644 --- a/src/classify/mfoutline.h +++ b/src/classify/mfoutline.h @@ -15,94 +15,96 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef MFOUTLINE_H -#define MFOUTLINE_H +#ifndef MFOUTLINE_H +#define MFOUTLINE_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "blobs.h" +#include "fpoint.h" #include "host.h" #include "oldlist.h" -#include "fpoint.h" #include "params.h" -#define NORMAL_X_HEIGHT (0.5) -#define NORMAL_BASELINE (0.0) +#define NORMAL_X_HEIGHT (0.5) +#define NORMAL_BASELINE (0.0) typedef LIST MFOUTLINE; typedef enum { - north, south, east, west, northeast, northwest, southeast, southwest + north, + south, + east, + west, + northeast, + northwest, + southeast, + southwest } DIRECTION; typedef struct { FPOINT Point; FLOAT32 Slope; - unsigned Padding:20; - BOOL8 Hidden:TRUE; - BOOL8 ExtremityMark:TRUE; - DIRECTION Direction:4; - DIRECTION PreviousDirection:4; + unsigned Padding : 20; + BOOL8 Hidden : TRUE; + BOOL8 ExtremityMark : TRUE; + DIRECTION Direction : 4; + DIRECTION PreviousDirection : 4; } MFEDGEPT; -typedef enum { - outer, hole -} OUTLINETYPE; +typedef enum { outer, hole } OUTLINETYPE; -typedef enum { - baseline, character -} NORM_METHOD; +typedef enum { baseline, character } NORM_METHOD; /**---------------------------------------------------------------------------- Macros ----------------------------------------------------------------------------**/ -#define AverageOf(A,B) (((A) + (B)) / 2) +#define AverageOf(A, B) (((A) + (B)) / 2) /* macro for computing the scale factor to use to normalize characters */ -#define MF_SCALE_FACTOR (NORMAL_X_HEIGHT / kBlnXHeight) +#define MF_SCALE_FACTOR (NORMAL_X_HEIGHT / kBlnXHeight) /* macros for manipulating micro-feature outlines */ -#define DegenerateOutline(O) (((O) == NIL_LIST) || ((O) == list_rest(O))) -#define PointAt(O) ((MFEDGEPT *) first_node (O)) -#define NextPointAfter(E) (list_rest (E)) -#define MakeOutlineCircular(O) (set_rest (last (O), (O))) +#define DegenerateOutline(O) (((O) == NIL_LIST) || ((O) == list_rest(O))) +#define PointAt(O) ((MFEDGEPT*)first_node(O)) +#define NextPointAfter(E) (list_rest(E)) +#define MakeOutlineCircular(O) (set_rest(last(O), (O))) /* macros for manipulating micro-feature outline edge points */ -#define ClearMark(P) ((P)->ExtremityMark = FALSE) -#define MarkPoint(P) ((P)->ExtremityMark = TRUE) +#define ClearMark(P) ((P)->ExtremityMark = FALSE) +#define MarkPoint(P) ((P)->ExtremityMark = TRUE) /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void ComputeBlobCenter(TBLOB *Blob, TPOINT *BlobCenter); +void ComputeBlobCenter(TBLOB* Blob, TPOINT* BlobCenter); -LIST ConvertBlob(TBLOB *Blob); +LIST ConvertBlob(TBLOB* Blob); -MFOUTLINE ConvertOutline(TESSLINE *Outline); +MFOUTLINE +ConvertOutline(TESSLINE* Outline); -LIST ConvertOutlines(TESSLINE *Outline, - LIST ConvertedOutlines, +LIST ConvertOutlines(TESSLINE* Outline, LIST ConvertedOutlines, OUTLINETYPE OutlineType); void FilterEdgeNoise(MFOUTLINE Outline, FLOAT32 NoiseSegmentLength); -void FindDirectionChanges(MFOUTLINE Outline, - FLOAT32 MinSlope, +void FindDirectionChanges(MFOUTLINE Outline, FLOAT32 MinSlope, FLOAT32 MaxSlope); -void FreeMFOutline(void *agr); //MFOUTLINE Outline); +void FreeMFOutline(void* agr); // MFOUTLINE Outline); void FreeOutlines(LIST Outlines); void MarkDirectionChanges(MFOUTLINE Outline); -MFEDGEPT *NewEdgePoint(); +MFEDGEPT* NewEdgePoint(); -MFOUTLINE NextExtremity(MFOUTLINE EdgePoint); +MFOUTLINE +NextExtremity(MFOUTLINE EdgePoint); -void NormalizeOutline(MFOUTLINE Outline, - FLOAT32 XOrigin); +void NormalizeOutline(MFOUTLINE Outline, FLOAT32 XOrigin); /*---------------------------------------------------------------------------- Private Function Prototypes @@ -114,11 +116,10 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction); // feature range of [-0.5, 0.5]. void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm); -void ComputeDirection(MFEDGEPT *Start, - MFEDGEPT *Finish, - FLOAT32 MinSlope, +void ComputeDirection(MFEDGEPT* Start, MFEDGEPT* Finish, FLOAT32 MinSlope, FLOAT32 MaxSlope); -MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint); +MFOUTLINE +NextDirectionChange(MFOUTLINE EdgePoint); #endif diff --git a/src/classify/mfx.cpp b/src/classify/mfx.cpp index 54c6588e25..00f3161164 100644 --- a/src/classify/mfx.cpp +++ b/src/classify/mfx.cpp @@ -18,11 +18,11 @@ /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ -#include "mfdefs.h" -#include "mfoutline.h" -#include "clusttool.h" //NEEDED +#include "clusttool.h" //NEEDED #include "const.h" #include "intfx.h" +#include "mfdefs.h" +#include "mfoutline.h" #include "normalis.h" #include "params.h" @@ -42,17 +42,19 @@ double_VAR(classify_max_slope, 2.414213562, Macros ----------------------------------------------------------------------------*/ /* miscellaneous macros */ -#define NormalizeAngle(A) ( (((A)<0)?((A)+2*PI):(A)) / (2*PI) ) +#define NormalizeAngle(A) ((((A) < 0) ? ((A) + 2 * PI) : (A)) / (2 * PI)) /*---------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End); +FLOAT32 +ComputeOrientation(MFEDGEPT* Start, MFEDGEPT* End); -MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, - MICROFEATURES MicroFeatures); +MICROFEATURES +ConvertToMicroFeatures(MFOUTLINE Outline, MICROFEATURES MicroFeatures); -MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); +MICROFEATURE +ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); /*---------------------------------------------------------------------------- Public Code @@ -69,7 +71,8 @@ MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); * @note Exceptions: none * @note History: 7/21/89, DSJ, Created. */ -MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) { +MICROFEATURES +BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) { MICROFEATURES MicroFeatures = NIL_LIST; LIST Outlines; LIST RemainingOutlines; @@ -80,13 +83,13 @@ MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) { RemainingOutlines = Outlines; iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node (RemainingOutlines); + Outline = (MFOUTLINE)first_node(RemainingOutlines); CharNormalizeOutline(Outline, cn_denorm); } RemainingOutlines = Outlines; iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node(RemainingOutlines); + Outline = (MFOUTLINE)first_node(RemainingOutlines); FindDirectionChanges(Outline, classify_min_slope, classify_max_slope); MarkDirectionChanges(Outline); MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures); @@ -94,8 +97,7 @@ MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) { FreeOutlines(Outlines); } return MicroFeatures; -} /* BlobMicroFeatures */ - +} /* BlobMicroFeatures */ /*--------------------------------------------------------------------------- Private Code @@ -117,16 +119,16 @@ MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) { * @note Exceptions: none * @note History: 7/27/89, DSJ, Created. */ -FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) { +FLOAT32 +ComputeOrientation(MFEDGEPT* Start, MFEDGEPT* End) { FLOAT32 Orientation; - Orientation = NormalizeAngle (AngleFrom (Start->Point, End->Point)); + Orientation = NormalizeAngle(AngleFrom(Start->Point, End->Point)); /* ensure that round-off errors do not put circular param out of range */ - if ((Orientation < 0) || (Orientation >= 1)) - Orientation = 0; + if ((Orientation < 0) || (Orientation >= 1)) Orientation = 0; return (Orientation); -} /* ComputeOrientation */ +} /* ComputeOrientation */ /** * Convert Outline to MicroFeatures @@ -137,31 +139,29 @@ FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) { * @note Exceptions: none * @note History: 7/26/89, DSJ, Created. */ -MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, - MICROFEATURES MicroFeatures) { +MICROFEATURES +ConvertToMicroFeatures(MFOUTLINE Outline, MICROFEATURES MicroFeatures) { MFOUTLINE Current; MFOUTLINE Last; MFOUTLINE First; MICROFEATURE NewFeature; - if (DegenerateOutline (Outline)) - return (MicroFeatures); + if (DegenerateOutline(Outline)) return (MicroFeatures); - First = NextExtremity (Outline); + First = NextExtremity(Outline); Last = First; do { - Current = NextExtremity (Last); + Current = NextExtremity(Last); if (!PointAt(Current)->Hidden) { - NewFeature = ExtractMicroFeature (Last, Current); + NewFeature = ExtractMicroFeature(Last, Current); if (NewFeature != nullptr) - MicroFeatures = push (MicroFeatures, NewFeature); + MicroFeatures = push(MicroFeatures, NewFeature); } Last = Current; - } - while (Last != First); + } while (Last != First); return (MicroFeatures); -} /* ConvertToMicroFeatures */ +} /* ConvertToMicroFeatures */ /** * This routine computes the feature parameters which describe @@ -180,20 +180,21 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, * - 7/26/89, DSJ, Created. * - 11/17/89, DSJ, Added handling for Start and End same point. */ -MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) { +MICROFEATURE +ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) { MICROFEATURE NewFeature; MFEDGEPT *P1, *P2; P1 = PointAt(Start); P2 = PointAt(End); - NewFeature = NewMicroFeature (); + NewFeature = NewMicroFeature(); NewFeature[XPOSITION] = AverageOf(P1->Point.x, P2->Point.x); NewFeature[YPOSITION] = AverageOf(P1->Point.y, P2->Point.y); NewFeature[MFLENGTH] = DistanceBetween(P1->Point, P2->Point); NewFeature[ORIENTATION] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0); - NewFeature[FIRSTBULGE] = 0.0f; // deprecated + NewFeature[FIRSTBULGE] = 0.0f; // deprecated NewFeature[SECONDBULGE] = 0.0f; // deprecated return NewFeature; -} /* ExtractMicroFeature */ +} /* ExtractMicroFeature */ diff --git a/src/classify/mfx.h b/src/classify/mfx.h index 5ed006dcc7..5be1bd82ac 100644 --- a/src/classify/mfx.h +++ b/src/classify/mfx.h @@ -15,8 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef MFX_H -#define MFX_H +#ifndef MFX_H +#define MFX_H /*---------------------------------------------------------------------------- Include Files and Type Defines @@ -36,6 +36,7 @@ extern double_VAR_H(classify_max_slope, 2.414213562, /*---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm); +MICROFEATURES +BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm); #endif diff --git a/src/classify/normfeat.cpp b/src/classify/normfeat.cpp index f297b3b05d..8292b9e5d3 100644 --- a/src/classify/normfeat.cpp +++ b/src/classify/normfeat.cpp @@ -20,8 +20,8 @@ ----------------------------------------------------------------------------*/ #include "normfeat.h" -#include "intfx.h" #include "featdefs.h" +#include "intfx.h" #include "mfoutline.h" /*---------------------------------------------------------------------------- @@ -29,7 +29,8 @@ ----------------------------------------------------------------------------*/ /** Return the length of the outline in baseline normalized form. */ -FLOAT32 ActualOutlineLength(FEATURE Feature) { +FLOAT32 +ActualOutlineLength(FEATURE Feature) { return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); } @@ -58,7 +59,8 @@ FLOAT32 ActualOutlineLength(FEATURE Feature) { * the x center of the grapheme's bounding box. * - English: [0.011, 0.31] */ -FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) { +FEATURE_SET +ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) { FEATURE_SET feature_set = NewFeatureSet(1); FEATURE feature = NewFeature(&CharNormDesc); @@ -72,4 +74,4 @@ FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) { AddFeature(feature_set, feature); return feature_set; -} /* ExtractCharNormFeatures */ +} /* ExtractCharNormFeatures */ diff --git a/src/classify/normfeat.h b/src/classify/normfeat.h index 1478b827d4..7c766b92d3 100644 --- a/src/classify/normfeat.h +++ b/src/classify/normfeat.h @@ -15,25 +15,30 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef NORMFEAT_H -#define NORMFEAT_H +#ifndef NORMFEAT_H +#define NORMFEAT_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "ocrfeatures.h" -#define LENGTH_COMPRESSION (10.0) +#define LENGTH_COMPRESSION (10.0) typedef enum { - CharNormY, CharNormLength, CharNormRx, CharNormRy + CharNormY, + CharNormLength, + CharNormRx, + CharNormRy } NORM_PARAM_NAME; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -FLOAT32 ActualOutlineLength(FEATURE Feature); +FLOAT32 +ActualOutlineLength(FEATURE Feature); -FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info); +FEATURE_SET +ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info); #endif diff --git a/src/classify/normmatch.cpp b/src/classify/normmatch.cpp index 0e60ee37c2..30f29ab1e9 100644 --- a/src/classify/normmatch.cpp +++ b/src/classify/normmatch.cpp @@ -20,8 +20,8 @@ ----------------------------------------------------------------------------*/ #include "normmatch.h" -#include #include +#include #include "classify.h" #include "clusttool.h" @@ -31,14 +31,13 @@ #include "globals.h" #include "helpers.h" #include "normfeat.h" +#include "params.h" #include "scanutils.h" #include "unicharset.h" -#include "params.h" -struct NORM_PROTOS -{ +struct NORM_PROTOS { int NumParams; - PARAM_DESC *ParamDesc; + PARAM_DESC* ParamDesc; LIST* Protos; int NumProtos; }; @@ -48,12 +47,10 @@ struct NORM_PROTOS ----------------------------------------------------------------------------*/ double NormEvidenceOf(double NormAdj); -void PrintNormMatch(FILE *File, - int NumParams, - PROTOTYPE *Proto, +void PrintNormMatch(FILE* File, int NumParams, PROTOTYPE* Proto, FEATURE Feature); -NORM_PROTOS *ReadNormProtos(FILE *File); +NORM_PROTOS* ReadNormProtos(FILE* File); /*---------------------------------------------------------------------------- Variables @@ -85,14 +82,14 @@ namespace tesseract { * @note Exceptions: none * @note History: Wed Dec 19 16:56:12 1990, DSJ, Created. */ -FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, - const FEATURE_STRUCT& feature, - bool DebugMatch) { +FLOAT32 +Classify::ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT& feature, + bool DebugMatch) { LIST Protos; FLOAT32 BestMatch; FLOAT32 Match; FLOAT32 Delta; - PROTOTYPE *Proto; + PROTOTYPE* Proto; int ProtoId; if (ClassId >= NormProtos->NumProtos) { @@ -102,13 +99,11 @@ FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, /* handle requests for classification as noise */ if (ClassId == NO_CLASS) { /* kludge - clean up constants and make into control knobs later */ - Match = (feature.Params[CharNormLength] * - feature.Params[CharNormLength] * 500.0 + - feature.Params[CharNormRx] * - feature.Params[CharNormRx] * 8000.0 + - feature.Params[CharNormRy] * - feature.Params[CharNormRy] * 8000.0); - return (1.0 - NormEvidenceOf (Match)); + Match = (feature.Params[CharNormLength] * feature.Params[CharNormLength] * + 500.0 + + feature.Params[CharNormRx] * feature.Params[CharNormRx] * 8000.0 + + feature.Params[CharNormRy] * feature.Params[CharNormRy] * 8000.0); + return (1.0 - NormEvidenceOf(Match)); } BestMatch = MAX_FLOAT32; @@ -120,7 +115,7 @@ FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, ProtoId = 0; iterate(Protos) { - Proto = (PROTOTYPE *) first_node (Protos); + Proto = (PROTOTYPE*)first_node(Protos); Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY]; Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY]; if (DebugMatch) { @@ -138,26 +133,24 @@ FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, // Ry is width! See intfx.cpp. Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy]; if (DebugMatch) { - tprintf("Width: Proto=%g, Delta=%g, Var=%g\n", - Proto->Mean[CharNormRy], Delta, - Proto->Weight.Elliptical[CharNormRy]); + tprintf("Width: Proto=%g, Delta=%g, Var=%g\n", Proto->Mean[CharNormRy], + Delta, Proto->Weight.Elliptical[CharNormRy]); } Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy]; Delta *= kWidthErrorWeighting; Match += Delta; if (DebugMatch) { - tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n", - Match, Match / classify_norm_adj_midpoint, - NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match))); + tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n", Match, + Match / classify_norm_adj_midpoint, NormEvidenceOf(Match), + 256 * (1 - NormEvidenceOf(Match))); } - if (Match < BestMatch) - BestMatch = Match; + if (Match < BestMatch) BestMatch = Match; ProtoId++; } return 1.0 - NormEvidenceOf(BestMatch); -} /* ComputeNormMatch */ +} /* ComputeNormMatch */ void Classify::FreeNormProtos() { if (NormProtos != nullptr) { @@ -189,11 +182,10 @@ double NormEvidenceOf(double NormAdj) { else if (classify_norm_adj_curl == 2) NormAdj = NormAdj * NormAdj; else - NormAdj = pow (NormAdj, classify_norm_adj_curl); + NormAdj = pow(NormAdj, classify_norm_adj_curl); return (1.0 / (1.0 + NormAdj)); } - /*---------------------------------------------------------------------------*/ /** * This routine dumps out detailed normalization match info. @@ -206,28 +198,24 @@ double NormEvidenceOf(double NormAdj) { * @note Exceptions: none * @note History: Wed Jan 2 09:49:35 1991, DSJ, Created. */ -void PrintNormMatch(FILE *File, - int NumParams, - PROTOTYPE *Proto, +void PrintNormMatch(FILE* File, int NumParams, PROTOTYPE* Proto, FEATURE Feature) { int i; FLOAT32 ParamMatch; FLOAT32 TotalMatch; for (i = 0, TotalMatch = 0.0; i < NumParams; i++) { - ParamMatch = (Feature->Params[i] - Mean(Proto, i)) / - StandardDeviation(Proto, i); + ParamMatch = + (Feature->Params[i] - Mean(Proto, i)) / StandardDeviation(Proto, i); - fprintf (File, " %6.1f", ParamMatch); + fprintf(File, " %6.1f", ParamMatch); if (i == CharNormY || i == CharNormRx) TotalMatch += ParamMatch * ParamMatch; } - fprintf (File, " --> %6.1f (%4.2f)\n", - TotalMatch, NormEvidenceOf (TotalMatch)); - -} /* PrintNormMatch */ + fprintf(File, " --> %6.1f (%4.2f)\n", TotalMatch, NormEvidenceOf(TotalMatch)); +} /* PrintNormMatch */ /*---------------------------------------------------------------------------*/ namespace tesseract { @@ -241,8 +229,8 @@ namespace tesseract { * @note Exceptions: none * @note History: Wed Dec 19 16:38:49 1990, DSJ, Created. */ -NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) { - NORM_PROTOS *NormProtos; +NORM_PROTOS* Classify::ReadNormProtos(TFile* fp) { + NORM_PROTOS* NormProtos; int i; char unichar[2 * UNICHAR_LEN + 1]; UNICHAR_ID unichar_id; @@ -250,11 +238,10 @@ NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) { int NumProtos; /* allocate and initialization data structure */ - NormProtos = (NORM_PROTOS *) Emalloc (sizeof (NORM_PROTOS)); + NormProtos = (NORM_PROTOS*)Emalloc(sizeof(NORM_PROTOS)); NormProtos->NumProtos = unicharset.size(); - NormProtos->Protos = (LIST *) Emalloc (NormProtos->NumProtos * sizeof(LIST)); - for (i = 0; i < NormProtos->NumProtos; i++) - NormProtos->Protos[i] = NIL_LIST; + NormProtos->Protos = (LIST*)Emalloc(NormProtos->NumProtos * sizeof(LIST)); + for (i = 0; i < NormProtos->NumProtos; i++) NormProtos->Protos[i] = NIL_LIST; /* read file header and save in data structure */ NormProtos->NumParams = ReadSampleSize(fp); @@ -279,5 +266,5 @@ NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) { } } return (NormProtos); -} /* ReadNormProtos */ +} /* ReadNormProtos */ } // namespace tesseract diff --git a/src/classify/ocrfeatures.cpp b/src/classify/ocrfeatures.cpp index 851e087531..b267779612 100644 --- a/src/classify/ocrfeatures.cpp +++ b/src/classify/ocrfeatures.cpp @@ -19,9 +19,9 @@ Include Files and Type Defines ----------------------------------------------------------------------------*/ #include "ocrfeatures.h" -#include "emalloc.h" #include "callcpp.h" #include "danerror.h" +#include "emalloc.h" #include "scanutils.h" #include @@ -48,7 +48,7 @@ bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) { FeatureSet->Features[FeatureSet->NumFeatures++] = Feature; return true; -} /* AddFeature */ +} /* AddFeature */ /** * Release the memory consumed by the specified feature. @@ -74,7 +74,7 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) { FreeFeature(FeatureSet->Features[i]); free(FeatureSet); } -} /* FreeFeatureSet */ +} /* FreeFeatureSet */ /** * Allocate and return a new feature of the specified @@ -83,7 +83,8 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) { * @return New #FEATURE. * @note History: Mon May 21 14:06:42 1990, DSJ, Created. */ -FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { +FEATURE +NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { FEATURE Feature; Feature = (FEATURE)malloc(sizeof(FEATURE_STRUCT) + @@ -91,7 +92,7 @@ FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { Feature->Type = FeatureDesc; return (Feature); -} /* NewFeature */ +} /* NewFeature */ /** * Allocate and return a new feature set large enough to @@ -100,16 +101,17 @@ FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { * @return New #FEATURE_SET. * @note History: Mon May 21 14:22:40 1990, DSJ, Created. */ -FEATURE_SET NewFeatureSet(int NumFeatures) { +FEATURE_SET +NewFeatureSet(int NumFeatures) { FEATURE_SET FeatureSet; - FeatureSet = (FEATURE_SET) Emalloc (sizeof (FEATURE_SET_STRUCT) + - (NumFeatures - 1) * sizeof (FEATURE)); + FeatureSet = (FEATURE_SET)Emalloc(sizeof(FEATURE_SET_STRUCT) + + (NumFeatures - 1) * sizeof(FEATURE)); FeatureSet->MaxNumFeatures = NumFeatures; FeatureSet->NumFeatures = 0; return (FeatureSet); -} /* NewFeatureSet */ +} /* NewFeatureSet */ /** * Create a new feature of the specified type and read in @@ -125,20 +127,21 @@ FEATURE_SET NewFeatureSet(int NumFeatures) { * format * @note History: Wed May 23 08:53:16 1990, DSJ, Created. */ -FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { +FEATURE +ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { FEATURE Feature; int i; - Feature = NewFeature (FeatureDesc); + Feature = NewFeature(FeatureDesc); for (i = 0; i < Feature->Type->NumParams; i++) { if (tfscanf(File, "%f", &(Feature->Params[i])) != 1) - DoError (ILLEGAL_FEATURE_PARAM, "Illegal feature parameter spec"); + DoError(ILLEGAL_FEATURE_PARAM, "Illegal feature parameter spec"); #ifndef _WIN32 - assert (!std::isnan(Feature->Params[i])); + assert(!std::isnan(Feature->Params[i])); #endif } return (Feature); -} /* ReadFeature */ +} /* ReadFeature */ /** * Create a new feature set of the specified type and read in @@ -151,7 +154,8 @@ FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { * @return New feature set read from File. * @note History: Wed May 23 09:17:31 1990, DSJ, Created. */ -FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { +FEATURE_SET +ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { FEATURE_SET FeatureSet; int NumFeatures; int i; @@ -161,10 +165,10 @@ FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { FeatureSet = NewFeatureSet(NumFeatures); for (i = 0; i < NumFeatures; i++) - AddFeature(FeatureSet, ReadFeature (File, FeatureDesc)); + AddFeature(FeatureSet, ReadFeature(File, FeatureDesc)); return (FeatureSet); -} /* ReadFeatureSet */ +} /* ReadFeatureSet */ /** * Appends a textual representation of Feature to str. @@ -186,7 +190,7 @@ void WriteFeature(FEATURE Feature, STRING* str) { str->add_str_double(" ", Feature->Params[i]); } *str += "\n"; -} /* WriteFeature */ +} /* WriteFeature */ /** * Write a textual representation of FeatureSet to File. @@ -206,7 +210,7 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { WriteFeature(FeatureSet->Features[i], str); } } -} /* WriteFeatureSet */ +} /* WriteFeatureSet */ /** * Write a textual representation of FeatureDesc to File @@ -226,19 +230,19 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { void WriteOldParamDesc(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { int i; - fprintf (File, "%d\n", FeatureDesc->NumParams); + fprintf(File, "%d\n", FeatureDesc->NumParams); for (i = 0; i < FeatureDesc->NumParams; i++) { if (FeatureDesc->ParamDesc[i].Circular) - fprintf (File, "circular "); + fprintf(File, "circular "); else - fprintf (File, "linear "); + fprintf(File, "linear "); if (FeatureDesc->ParamDesc[i].NonEssential) - fprintf (File, "non-essential "); + fprintf(File, "non-essential "); else - fprintf (File, "essential "); + fprintf(File, "essential "); - fprintf (File, "%f %f\n", - FeatureDesc->ParamDesc[i].Min, FeatureDesc->ParamDesc[i].Max); + fprintf(File, "%f %f\n", FeatureDesc->ParamDesc[i].Min, + FeatureDesc->ParamDesc[i].Max); } -} /* WriteOldParamDesc */ +} /* WriteOldParamDesc */ diff --git a/src/classify/ocrfeatures.h b/src/classify/ocrfeatures.h index f2808217c2..fe8180d6d7 100644 --- a/src/classify/ocrfeatures.h +++ b/src/classify/ocrfeatures.h @@ -15,8 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef FEATURES_H -#define FEATURES_H +#ifndef FEATURES_H +#define FEATURES_H /**---------------------------------------------------------------------------- Include Files and Type Defines @@ -30,11 +30,11 @@ struct INT_FX_RESULT_STRUCT; #undef Min #undef Max -#define FEAT_NAME_SIZE 80 +#define FEAT_NAME_SIZE 80 // define trap errors which can be caused by this module #define ILLEGAL_FEATURE_PARAM 1000 -#define ILLEGAL_NUM_FEATURES 1001 +#define ILLEGAL_NUM_FEATURES 1001 // A character is described by multiple sets of extracted features. Each // set contains a number of features of a particular type, for example, a @@ -44,52 +44,57 @@ struct INT_FX_RESULT_STRUCT; // parameters are required to be the first parameters in the feature. struct PARAM_DESC { - int8_t Circular; // TRUE if dimension wraps around - int8_t NonEssential; // TRUE if dimension not used in searches - FLOAT32 Min; // low end of range for circular dimensions - FLOAT32 Max; // high end of range for circular dimensions - FLOAT32 Range; // Max - Min - FLOAT32 HalfRange; // (Max - Min)/2 - FLOAT32 MidRange; // (Max + Min)/2 + int8_t Circular; // TRUE if dimension wraps around + int8_t NonEssential; // TRUE if dimension not used in searches + FLOAT32 Min; // low end of range for circular dimensions + FLOAT32 Max; // high end of range for circular dimensions + FLOAT32 Range; // Max - Min + FLOAT32 HalfRange; // (Max - Min)/2 + FLOAT32 MidRange; // (Max + Min)/2 }; struct FEATURE_DESC_STRUCT { - uint16_t NumParams; // total # of params - const char *ShortName; // short name for feature - const PARAM_DESC *ParamDesc; // array - one per param + uint16_t NumParams; // total # of params + const char* ShortName; // short name for feature + const PARAM_DESC* ParamDesc; // array - one per param }; -using FEATURE_DESC = FEATURE_DESC_STRUCT *; +using FEATURE_DESC = FEATURE_DESC_STRUCT*; struct FEATURE_STRUCT { - const FEATURE_DESC_STRUCT *Type; // points to description of feature type + const FEATURE_DESC_STRUCT* Type; // points to description of feature type FLOAT32 Params[1]; // variable size array - params for feature }; -using FEATURE = FEATURE_STRUCT *; +using FEATURE = FEATURE_STRUCT*; struct FEATURE_SET_STRUCT { - uint16_t NumFeatures; // number of features in set - uint16_t MaxNumFeatures; // maximum size of feature set - FEATURE Features[1]; // variable size array of features + uint16_t NumFeatures; // number of features in set + uint16_t MaxNumFeatures; // maximum size of feature set + FEATURE Features[1]; // variable size array of features }; -using FEATURE_SET = FEATURE_SET_STRUCT *; +using FEATURE_SET = FEATURE_SET_STRUCT*; // A generic character description as a char pointer. In reality, it will be // a pointer to some data structure. Paired feature extractors/matchers need // to agree on the data structure to be used, however, the high level // classifier does not need to know the details of this data structure. -using CHAR_FEATURES = char *; +using CHAR_FEATURES = char*; /*---------------------------------------------------------------------- Macros for defining the parameters of a new features ----------------------------------------------------------------------*/ -#define StartParamDesc(Name) \ -const PARAM_DESC Name[] = { - -#define DefineParam(Circular, NonEssential, Min, Max) \ - {Circular, NonEssential, Min, Max, \ - (Max) - (Min), (((Max) - (Min))/2.0), (((Max) + (Min))/2.0)}, - -#define EndParamDesc }; +#define StartParamDesc(Name) const PARAM_DESC Name[] = { +#define DefineParam(Circular, NonEssential, Min, Max) \ + {Circular, \ + NonEssential, \ + Min, \ + Max, \ + (Max) - (Min), \ + (((Max) - (Min)) / 2.0), \ + (((Max) + (Min)) / 2.0)}, + +#define EndParamDesc \ + } \ + ; /*---------------------------------------------------------------------- Macro for describing a new feature. The parameters of the macro @@ -97,9 +102,8 @@ are as follows: DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName) ----------------------------------------------------------------------*/ -#define DefineFeature(Name, NL, NC, SN, PN) \ -const FEATURE_DESC_STRUCT Name = { \ - ((NL) + (NC)), SN, PN}; +#define DefineFeature(Name, NL, NC, SN, PN) \ + const FEATURE_DESC_STRUCT Name = {((NL) + (NC)), SN, PN}; /*---------------------------------------------------------------------- Generic routines that work for all feature types @@ -110,13 +114,17 @@ void FreeFeature(FEATURE Feature); void FreeFeatureSet(FEATURE_SET FeatureSet); -FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc); +FEATURE +NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc); -FEATURE_SET NewFeatureSet(int NumFeatures); +FEATURE_SET +NewFeatureSet(int NumFeatures); -FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc); +FEATURE +ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc); -FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc); +FEATURE_SET +ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc); void WriteFeature(FEATURE Feature, STRING* str); diff --git a/src/classify/outfeat.cpp b/src/classify/outfeat.cpp index 1ece8c637b..a28e30ce20 100644 --- a/src/classify/outfeat.cpp +++ b/src/classify/outfeat.cpp @@ -44,30 +44,29 @@ namespace tesseract { * - 11/13/90, DSJ, Created. * - 05/24/91, DSJ, Updated for either char or baseline normalize. */ -FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { +FEATURE_SET +Classify::ExtractOutlineFeatures(TBLOB* Blob) { LIST Outlines; LIST RemainingOutlines; MFOUTLINE Outline; FEATURE_SET FeatureSet; FLOAT32 XScale, YScale; - FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES); - if (Blob == nullptr) - return (FeatureSet); + FeatureSet = NewFeatureSet(MAX_OUTLINE_FEATURES); + if (Blob == nullptr) return (FeatureSet); - Outlines = ConvertBlob (Blob); + Outlines = ConvertBlob(Blob); NormalizeOutlines(Outlines, &XScale, &YScale); RemainingOutlines = Outlines; iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node (RemainingOutlines); + Outline = (MFOUTLINE)first_node(RemainingOutlines); ConvertToOutlineFeatures(Outline, FeatureSet); } - if (classify_norm_method == baseline) - NormalizeOutlineX(FeatureSet); + if (classify_norm_method == baseline) NormalizeOutlineX(FeatureSet); FreeOutlines(Outlines); return (FeatureSet); -} /* ExtractOutlineFeatures */ +} /* ExtractOutlineFeatures */ } // namespace tesseract /*---------------------------------------------------------------------------- @@ -90,8 +89,7 @@ FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { * @note Exceptions: none * @note History: 11/13/90, DSJ, Created. */ -void AddOutlineFeatureToSet(FPOINT *Start, - FPOINT *End, +void AddOutlineFeatureToSet(FPOINT* Start, FPOINT* End, FEATURE_SET FeatureSet) { FEATURE Feature; @@ -102,8 +100,7 @@ void AddOutlineFeatureToSet(FPOINT *Start, Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End); AddFeature(FeatureSet, Feature); -} /* AddOutlineFeatureToSet */ - +} /* AddOutlineFeatureToSet */ /*---------------------------------------------------------------------------*/ /** @@ -125,8 +122,7 @@ void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { FPOINT FeatureStart; FPOINT FeatureEnd; - if (DegenerateOutline (Outline)) - return; + if (DegenerateOutline(Outline)) return; First = Outline; Next = First; @@ -143,10 +139,8 @@ void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { FeatureEnd = PointAt(Next)->Point; AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet); } - } - while (Next != First); -} /* ConvertToOutlineFeatures */ - + } while (Next != First); +} /* ConvertToOutlineFeatures */ /*---------------------------------------------------------------------------*/ /** @@ -168,8 +162,7 @@ void NormalizeOutlineX(FEATURE_SET FeatureSet) { FLOAT32 TotalWeight = 0.0; FLOAT32 Origin; - if (FeatureSet->NumFeatures <= 0) - return; + if (FeatureSet->NumFeatures <= 0) return; for (i = 0; i < FeatureSet->NumFeatures; i++) { Feature = FeatureSet->Features[i]; @@ -183,4 +176,4 @@ void NormalizeOutlineX(FEATURE_SET FeatureSet) { Feature = FeatureSet->Features[i]; Feature->Params[OutlineFeatX] -= Origin; } -} /* NormalizeOutlineX */ +} /* NormalizeOutlineX */ diff --git a/src/classify/outfeat.h b/src/classify/outfeat.h index e7a36476ef..e38fe61483 100644 --- a/src/classify/outfeat.h +++ b/src/classify/outfeat.h @@ -15,15 +15,15 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef OUTFEAT_H -#define OUTFEAT_H +#ifndef OUTFEAT_H +#define OUTFEAT_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "ocrfeatures.h" #include "fpoint.h" #include "mfoutline.h" +#include "ocrfeatures.h" typedef enum { OutlineFeatX, @@ -32,14 +32,12 @@ typedef enum { OutlineFeatDir } OUTLINE_FEAT_PARAM_NAME; -#define MAX_OUTLINE_FEATURES (100) +#define MAX_OUTLINE_FEATURES (100) /*--------------------------------------------------------------------------- Privat Function Prototypes ----------------------------------------------------------------------------*/ -void AddOutlineFeatureToSet(FPOINT *Start, - FPOINT *End, - FEATURE_SET FeatureSet); +void AddOutlineFeatureToSet(FPOINT* Start, FPOINT* End, FEATURE_SET FeatureSet); void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet); diff --git a/src/classify/picofeat.cpp b/src/classify/picofeat.cpp index c8fd778f0d..09c4f9ce93 100644 --- a/src/classify/picofeat.cpp +++ b/src/classify/picofeat.cpp @@ -41,8 +41,7 @@ double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length"); /*--------------------------------------------------------------------------- Private Function Prototypes ----------------------------------------------------------------------------*/ -void ConvertSegmentToPicoFeat(FPOINT *Start, - FPOINT *End, +void ConvertSegmentToPicoFeat(FPOINT* Start, FPOINT* End, FEATURE_SET FeatureSet); void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet); @@ -64,7 +63,8 @@ namespace tesseract { * @note Exceptions: none * @note History: 9/4/90, DSJ, Created. */ -FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { +FEATURE_SET +Classify::ExtractPicoFeatures(TBLOB* Blob) { LIST Outlines; LIST RemainingOutlines; MFOUTLINE Outline; @@ -76,15 +76,14 @@ FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { NormalizeOutlines(Outlines, &XScale, &YScale); RemainingOutlines = Outlines; iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node (RemainingOutlines); + Outline = (MFOUTLINE)first_node(RemainingOutlines); ConvertToPicoFeatures2(Outline, FeatureSet); } - if (classify_norm_method == baseline) - NormalizePicoX(FeatureSet); + if (classify_norm_method == baseline) NormalizePicoX(FeatureSet); FreeOutlines(Outlines); return (FeatureSet); -} /* ExtractPicoFeatures */ +} /* ExtractPicoFeatures */ } // namespace tesseract /*---------------------------------------------------------------------------- @@ -106,8 +105,7 @@ FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { * @note Exceptions: none * @note History: Tue Apr 30 15:44:34 1991, DSJ, Created. */ -void ConvertSegmentToPicoFeat(FPOINT *Start, - FPOINT *End, +void ConvertSegmentToPicoFeat(FPOINT* Start, FPOINT* End, FEATURE_SET FeatureSet) { FEATURE Feature; FLOAT32 Angle; @@ -117,15 +115,14 @@ void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT Delta; int i; - Angle = NormalizedAngleFrom (Start, End, 1.0); - Length = DistanceBetween (*Start, *End); - NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5); - if (NumFeatures < 1) - NumFeatures = 1; + Angle = NormalizedAngleFrom(Start, End, 1.0); + Length = DistanceBetween(*Start, *End); + NumFeatures = (int)floor(Length / classify_pico_feature_length + 0.5); + if (NumFeatures < 1) NumFeatures = 1; /* compute vector for one pico feature */ - Delta.x = XDelta (*Start, *End) / NumFeatures; - Delta.y = YDelta (*Start, *End) / NumFeatures; + Delta.x = XDelta(*Start, *End) / NumFeatures; + Delta.y = YDelta(*Start, *End) / NumFeatures; /* compute position of first pico feature */ Center.x = Start->x + Delta.x / 2.0; @@ -133,7 +130,7 @@ void ConvertSegmentToPicoFeat(FPOINT *Start, /* compute each pico feature in segment and add to feature set */ for (i = 0; i < NumFeatures; i++) { - Feature = NewFeature (&PicoFeatDesc); + Feature = NewFeature(&PicoFeatDesc); Feature->Params[PicoFeatDir] = Angle; Feature->Params[PicoFeatX] = Center.x; Feature->Params[PicoFeatY] = Center.y; @@ -142,8 +139,7 @@ void ConvertSegmentToPicoFeat(FPOINT *Start, Center.x += Delta.x; Center.y += Delta.y; } -} /* ConvertSegmentToPicoFeat */ - +} /* ConvertSegmentToPicoFeat */ /*---------------------------------------------------------------------------*/ /** @@ -165,8 +161,7 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { MFOUTLINE First; MFOUTLINE Current; - if (DegenerateOutline(Outline)) - return; + if (DegenerateOutline(Outline)) return; First = Outline; Current = First; @@ -178,16 +173,14 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { format. In the old format, a hidden edge is marked by the starting point for that edge. */ if (!(PointAt(Next)->Hidden)) - ConvertSegmentToPicoFeat (&(PointAt(Current)->Point), - &(PointAt(Next)->Point), FeatureSet); + ConvertSegmentToPicoFeat(&(PointAt(Current)->Point), + &(PointAt(Next)->Point), FeatureSet); Current = Next; Next = NextPointAfter(Current); - } - while (Current != First); - -} /* ConvertToPicoFeatures2 */ + } while (Current != First); +} /* ConvertToPicoFeatures2 */ /*---------------------------------------------------------------------------*/ /** @@ -216,7 +209,7 @@ void NormalizePicoX(FEATURE_SET FeatureSet) { Feature = FeatureSet->Features[i]; Feature->Params[PicoFeatX] -= Origin; } -} /* NormalizePicoX */ +} /* NormalizePicoX */ namespace tesseract { /*---------------------------------------------------------------------------*/ @@ -227,8 +220,9 @@ namespace tesseract { * @note Exceptions: none * @note History: 8/8/2011, rays, Created. */ -FEATURE_SET Classify::ExtractIntCNFeatures( - const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { +FEATURE_SET +Classify::ExtractIntCNFeatures(const TBLOB& blob, + const INT_FX_RESULT_STRUCT& fx_info) { INT_FX_RESULT_STRUCT local_fx_info(fx_info); GenericVector bl_features; tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( @@ -249,7 +243,7 @@ FEATURE_SET Classify::ExtractIntCNFeatures( delete sample; return feature_set; -} /* ExtractIntCNFeatures */ +} /* ExtractIntCNFeatures */ /*---------------------------------------------------------------------------*/ /** @@ -259,8 +253,9 @@ FEATURE_SET Classify::ExtractIntCNFeatures( * @note Exceptions: none * @note History: 8/8/2011, rays, Created. */ -FEATURE_SET Classify::ExtractIntGeoFeatures( - const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { +FEATURE_SET +Classify::ExtractIntGeoFeatures(const TBLOB& blob, + const INT_FX_RESULT_STRUCT& fx_info) { INT_FX_RESULT_STRUCT local_fx_info(fx_info); GenericVector bl_features; tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( @@ -277,6 +272,6 @@ FEATURE_SET Classify::ExtractIntGeoFeatures( delete sample; return feature_set; -} /* ExtractIntGeoFeatures */ +} /* ExtractIntGeoFeatures */ } // namespace tesseract. diff --git a/src/classify/picofeat.h b/src/classify/picofeat.h index 966ffc32e7..cab1f8c9b9 100644 --- a/src/classify/picofeat.h +++ b/src/classify/picofeat.h @@ -15,8 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef PICOFEAT_H -#define PICOFEAT_H +#ifndef PICOFEAT_H +#define PICOFEAT_H /**---------------------------------------------------------------------------- Include Files and Type Defines @@ -33,16 +33,14 @@ enum IntParams { // Enum for the order/type of params in GeoFeatDesc. enum GeoParams { - GeoBottom, // Bounding box bottom in baseline space (0-255). - GeoTop, // Bounding box top in baseline space (0-255). - GeoWidth, // Bounding box width in baseline space (0-255). + GeoBottom, // Bounding box bottom in baseline space (0-255). + GeoTop, // Bounding box top in baseline space (0-255). + GeoWidth, // Bounding box width in baseline space (0-255). - GeoCount // Number of geo features. + GeoCount // Number of geo features. }; -typedef enum -{ PicoFeatY, PicoFeatDir, PicoFeatX } -PICO_FEAT_PARAM_NAME; +typedef enum { PicoFeatY, PicoFeatDir, PicoFeatX } PICO_FEAT_PARAM_NAME; #define MAX_PICO_FEATURES (1000) @@ -52,11 +50,10 @@ PICO_FEAT_PARAM_NAME; extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length"); - /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -#define GetPicoFeatureLength() (PicoFeatureLength) +#define GetPicoFeatureLength() (PicoFeatureLength) /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/src/classify/protos.cpp b/src/classify/protos.cpp index e5896295c4..54dfc0f7b9 100644 --- a/src/classify/protos.cpp +++ b/src/classify/protos.cpp @@ -26,20 +26,20 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "protos.h" +#include "callcpp.h" +#include "classify.h" #include "const.h" #include "emalloc.h" -#include "callcpp.h" -#include "tprintf.h" -#include "scanutils.h" #include "globals.h" -#include "classify.h" #include "params.h" +#include "scanutils.h" +#include "tprintf.h" -#include #include +#include -#define PROTO_INCREMENT 32 -#define CONFIG_INCREMENT 16 +#define PROTO_INCREMENT 32 +#define CONFIG_INCREMENT 16 /*---------------------------------------------------------------------- V a r i a b l e s @@ -56,7 +56,7 @@ STRING_VAR(classify_training_file, "MicroFeatures", "Training file"); * * Add a new config to this class. Malloc new space and copy the * old configs if necessary. Return the config id for the new config. - * + * * @param Class The class to add to */ int AddConfigToClass(CLASS_TYPE Class) { @@ -69,30 +69,29 @@ int AddConfigToClass(CLASS_TYPE Class) { if (Class->NumConfigs >= Class->MaxNumConfigs) { /* add configs in CONFIG_INCREMENT chunks at a time */ - NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) / - CONFIG_INCREMENT) * CONFIG_INCREMENT); + NewNumConfigs = + (((Class->MaxNumConfigs + CONFIG_INCREMENT) / CONFIG_INCREMENT) * + CONFIG_INCREMENT); - Class->Configurations = - (CONFIGS) Erealloc (Class->Configurations, - sizeof (BIT_VECTOR) * NewNumConfigs); + Class->Configurations = (CONFIGS)Erealloc( + Class->Configurations, sizeof(BIT_VECTOR) * NewNumConfigs); Class->MaxNumConfigs = NewNumConfigs; } NewConfig = Class->NumConfigs++; - Config = NewBitVector (MaxNumProtos); + Config = NewBitVector(MaxNumProtos); Class->Configurations[NewConfig] = Config; - zero_all_bits (Config, WordsInVectorOfSize (MaxNumProtos)); + zero_all_bits(Config, WordsInVectorOfSize(MaxNumProtos)); return (NewConfig); } - /** * @name AddProtoToClass * * Add a new proto to this class. Malloc new space and copy the * old protos if necessary. Return the proto id for the new proto. - * + * * @param Class The class to add to */ int AddProtoToClass(CLASS_TYPE Class) { @@ -104,18 +103,18 @@ int AddProtoToClass(CLASS_TYPE Class) { if (Class->NumProtos >= Class->MaxNumProtos) { /* add protos in PROTO_INCREMENT chunks at a time */ - NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) / - PROTO_INCREMENT) * PROTO_INCREMENT); + NewNumProtos = + (((Class->MaxNumProtos + PROTO_INCREMENT) / PROTO_INCREMENT) * + PROTO_INCREMENT); - Class->Prototypes = (PROTO) Erealloc (Class->Prototypes, - sizeof (PROTO_STRUCT) * - NewNumProtos); + Class->Prototypes = + (PROTO)Erealloc(Class->Prototypes, sizeof(PROTO_STRUCT) * NewNumProtos); Class->MaxNumProtos = NewNumProtos; for (i = 0; i < Class->NumConfigs; i++) { Config = Class->Configurations[i]; - Class->Configurations[i] = ExpandBitVector (Config, NewNumProtos); + Class->Configurations[i] = ExpandBitVector(Config, NewNumProtos); for (Bit = Class->NumProtos; Bit < NewNumProtos; Bit++) reset_bit(Config, Bit); @@ -123,58 +122,56 @@ int AddProtoToClass(CLASS_TYPE Class) { } NewProto = Class->NumProtos++; if (Class->NumProtos > MAX_NUM_PROTOS) { - tprintf("Ouch! number of protos = %d, vs max of %d!", - Class->NumProtos, MAX_NUM_PROTOS); + tprintf("Ouch! number of protos = %d, vs max of %d!", Class->NumProtos, + MAX_NUM_PROTOS); } return (NewProto); } - /** * @name ClassConfigLength * * Return the length of all the protos in this class. - * + * * @param Class The class to add to * @param Config FIXME */ -FLOAT32 ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config) { +FLOAT32 +ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config) { int16_t Pid; FLOAT32 TotalLength = 0; for (Pid = 0; Pid < Class->NumProtos; Pid++) { - if (test_bit (Config, Pid)) { - - TotalLength += (ProtoIn (Class, Pid))->Length; + if (test_bit(Config, Pid)) { + TotalLength += (ProtoIn(Class, Pid))->Length; } } return (TotalLength); } - /** * @name ClassProtoLength * * Return the length of all the protos in this class. - * + * * @param Class The class to use */ -FLOAT32 ClassProtoLength(CLASS_TYPE Class) { +FLOAT32 +ClassProtoLength(CLASS_TYPE Class) { int16_t Pid; FLOAT32 TotalLength = 0; for (Pid = 0; Pid < Class->NumProtos; Pid++) { - TotalLength += (ProtoIn (Class, Pid))->Length; + TotalLength += (ProtoIn(Class, Pid))->Length; } return (TotalLength); } - /** * @name CopyProto * * Copy the first proto into the second. - * + * * @param Src Source * @param Dest Destination */ @@ -188,7 +185,6 @@ void CopyProto(PROTO Src, PROTO Dest) { Dest->C = Src->C; } - /********************************************************************** * FillABC * @@ -197,15 +193,14 @@ void CopyProto(PROTO Src, PROTO Dest) { void FillABC(PROTO Proto) { FLOAT32 Slope, Intercept, Normalizer; - Slope = tan (Proto->Angle * 2.0 * PI); + Slope = tan(Proto->Angle * 2.0 * PI); Intercept = Proto->Y - Slope * Proto->X; - Normalizer = 1.0 / sqrt (Slope * Slope + 1.0); + Normalizer = 1.0 / sqrt(Slope * Slope + 1.0); Proto->A = Slope * Normalizer; Proto->B = -Normalizer; Proto->C = Intercept * Normalizer; } - /********************************************************************** * FreeClass * @@ -218,7 +213,6 @@ void FreeClass(CLASS_TYPE Class) { } } - /********************************************************************** * FreeClassFields * @@ -231,7 +225,7 @@ void FreeClassFields(CLASS_TYPE Class) { if (Class->MaxNumProtos > 0) free(Class->Prototypes); if (Class->MaxNumConfigs > 0) { for (i = 0; i < Class->NumConfigs; i++) - FreeBitVector (Class->Configurations[i]); + FreeBitVector(Class->Configurations[i]); free(Class->Configurations); } } @@ -243,26 +237,24 @@ void FreeClassFields(CLASS_TYPE Class) { * Allocate a new class with enough memory to hold the specified number * of prototypes and configurations. **********************************************************************/ -CLASS_TYPE NewClass(int NumProtos, int NumConfigs) { +CLASS_TYPE +NewClass(int NumProtos, int NumConfigs) { CLASS_TYPE Class; Class = new CLASS_STRUCT; if (NumProtos > 0) - Class->Prototypes = (PROTO) Emalloc (NumProtos * sizeof (PROTO_STRUCT)); + Class->Prototypes = (PROTO)Emalloc(NumProtos * sizeof(PROTO_STRUCT)); if (NumConfigs > 0) - Class->Configurations = (CONFIGS) Emalloc (NumConfigs * - sizeof (BIT_VECTOR)); + Class->Configurations = (CONFIGS)Emalloc(NumConfigs * sizeof(BIT_VECTOR)); Class->MaxNumProtos = NumProtos; Class->MaxNumConfigs = NumConfigs; Class->NumProtos = 0; Class->NumConfigs = 0; return (Class); - } - /********************************************************************** * PrintProtos * @@ -272,10 +264,10 @@ void PrintProtos(CLASS_TYPE Class) { int16_t Pid; for (Pid = 0; Pid < Class->NumProtos; Pid++) { - cprintf ("Proto %d:\t", Pid); - PrintProto (ProtoIn (Class, Pid)); - cprintf ("\t"); - PrintProtoLine (ProtoIn (Class, Pid)); + cprintf("Proto %d:\t", Pid); + PrintProto(ProtoIn(Class, Pid)); + cprintf("\t"); + PrintProtoLine(ProtoIn(Class, Pid)); new_line(); } } diff --git a/src/classify/protos.h b/src/classify/protos.h index d53fe467f4..28ae54b975 100644 --- a/src/classify/protos.h +++ b/src/classify/protos.h @@ -30,17 +30,16 @@ ----------------------------------------------------------------------*/ #include "bitvec.h" #include "cutil.h" +#include "params.h" #include "unichar.h" #include "unicity_table.h" -#include "params.h" /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ -using CONFIGS = BIT_VECTOR *; +using CONFIGS = BIT_VECTOR*; -typedef struct -{ +typedef struct { FLOAT32 A; FLOAT32 B; FLOAT32 C; @@ -49,13 +48,16 @@ typedef struct FLOAT32 Angle; FLOAT32 Length; } PROTO_STRUCT; -using PROTO = PROTO_STRUCT *; +using PROTO = PROTO_STRUCT*; struct CLASS_STRUCT { CLASS_STRUCT() - : NumProtos(0), MaxNumProtos(0), Prototypes(nullptr), - NumConfigs(0), MaxNumConfigs(0), Configurations(nullptr) { - } + : NumProtos(0), + MaxNumProtos(0), + Prototypes(nullptr), + NumConfigs(0), + MaxNumConfigs(0), + Configurations(nullptr) {} int16_t NumProtos; int16_t MaxNumProtos; PROTO Prototypes; @@ -64,15 +66,15 @@ struct CLASS_STRUCT { CONFIGS Configurations; UnicityTableEqEq font_set; }; -using CLASS_TYPE = CLASS_STRUCT *; -using CLASSES = CLASS_STRUCT *; +using CLASS_TYPE = CLASS_STRUCT*; +using CLASSES = CLASS_STRUCT*; /*---------------------------------------------------------------------- C o n s t a n t s ----------------------------------------------------------------------*/ -#define NUMBER_OF_CLASSES MAX_NUM_CLASSES -#define Y_OFFSET -40.0 -#define FEATURE_SCALE 100.0 +#define NUMBER_OF_CLASSES MAX_NUM_CLASSES +#define Y_OFFSET -40.0 +#define FEATURE_SCALE 100.0 /*---------------------------------------------------------------------- V a r i a b l e s @@ -90,8 +92,7 @@ extern STRING_VAR_H(classify_training_file, "MicroFeatures", "Training file"); * Set a single proto bit in the specified configuration. */ -#define AddProtoToConfig(Pid,Config) \ -(SET_BIT (Config, Pid)) +#define AddProtoToConfig(Pid, Config) (SET_BIT(Config, Pid)) /** * RemoveProtoFromConfig @@ -99,8 +100,7 @@ extern STRING_VAR_H(classify_training_file, "MicroFeatures", "Training file"); * Clear a single proto bit in the specified configuration. */ -#define RemoveProtoFromConfig(Pid,Config) \ -(reset_bit (Config, Pid)) +#define RemoveProtoFromConfig(Pid, Config) (reset_bit(Config, Pid)) /** * ClassOfChar @@ -108,10 +108,8 @@ extern STRING_VAR_H(classify_training_file, "MicroFeatures", "Training file"); * Return the class of a particular ASCII character value. */ -#define ClassOfChar(Char) \ -((TrainingData [Char].NumProtos) ? \ - (& TrainingData [Char]) : \ - NO_CLASS) +#define ClassOfChar(Char) \ + ((TrainingData[Char].NumProtos) ? (&TrainingData[Char]) : NO_CLASS) /** * ProtoIn @@ -120,8 +118,7 @@ extern STRING_VAR_H(classify_training_file, "MicroFeatures", "Training file"); * pointer to it (type PROTO). */ -#define ProtoIn(Class,Pid) \ -(& (Class)->Prototypes [Pid]) +#define ProtoIn(Class, Pid) (&(Class)->Prototypes[Pid]) /** * PrintProto @@ -130,13 +127,9 @@ extern STRING_VAR_H(classify_training_file, "MicroFeatures", "Training file"); * type 'PROTO'. */ -#define PrintProto(Proto) \ -(tprintf("X=%4.2f, Y=%4.2f, Length=%4.2f, Angle=%4.2f", \ - Proto->X, \ - Proto->Y, \ - Proto->Length, \ - Proto->Angle)) \ - +#define PrintProto(Proto) \ + (tprintf("X=%4.2f, Y=%4.2f, Length=%4.2f, Angle=%4.2f", Proto->X, Proto->Y, \ + Proto->Length, Proto->Angle)) /** * PrintProtoLine @@ -145,11 +138,8 @@ extern STRING_VAR_H(classify_training_file, "MicroFeatures", "Training file"); * type 'PROTO'. */ -#define PrintProtoLine(Proto) \ -(cprintf ("A=%4.2f, B=%4.2f, C=%4.2f", \ - Proto->A, \ - Proto->B, \ - Proto->C)) \ +#define PrintProtoLine(Proto) \ + (cprintf("A=%4.2f, B=%4.2f, C=%4.2f", Proto->A, Proto->B, Proto->C)) /*---------------------------------------------------------------------- F u n c t i o n s @@ -158,9 +148,11 @@ int AddConfigToClass(CLASS_TYPE Class); int AddProtoToClass(CLASS_TYPE Class); -FLOAT32 ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config); +FLOAT32 +ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config); -FLOAT32 ClassProtoLength(CLASS_TYPE Class); +FLOAT32 +ClassProtoLength(CLASS_TYPE Class); void CopyProto(PROTO Src, PROTO Dest); @@ -172,7 +164,8 @@ void FreeClassFields(CLASS_TYPE Class); void InitPrototypes(); -CLASS_TYPE NewClass(int NumProtos, int NumConfigs); +CLASS_TYPE +NewClass(int NumProtos, int NumConfigs); void PrintProtos(CLASS_TYPE Class); diff --git a/src/classify/sampleiterator.cpp b/src/classify/sampleiterator.cpp index 9d52c4fbd4..109eeb33d8 100644 --- a/src/classify/sampleiterator.cpp +++ b/src/classify/sampleiterator.cpp @@ -25,18 +25,16 @@ namespace tesseract { // ================== SampleIterator Implementation ================= SampleIterator::SampleIterator() - : charset_map_(nullptr), - shape_table_(nullptr), - sample_set_(nullptr), - randomize_(false), - owned_shape_table_(nullptr) { + : charset_map_(nullptr), + shape_table_(nullptr), + sample_set_(nullptr), + randomize_(false), + owned_shape_table_(nullptr) { num_shapes_ = 0; Begin(); } -SampleIterator::~SampleIterator() { - Clear(); -} +SampleIterator::~SampleIterator() { Clear(); } void SampleIterator::Clear() { delete owned_shape_table_; @@ -45,8 +43,7 @@ void SampleIterator::Clear() { // See class comment for arguments. void SampleIterator::Init(const IndexMapBiDi* charset_map, - const ShapeTable* shape_table, - bool randomize, + const ShapeTable* shape_table, bool randomize, TrainingSampleSet* sample_set) { Clear(); charset_map_ = charset_map; @@ -74,8 +71,8 @@ void SampleIterator::Init(const IndexMapBiDi* charset_map, if (shape_table_ != nullptr) { num_shapes_ = shape_table_->NumShapes(); } else { - num_shapes_ = randomize ? sample_set_->num_samples() - : sample_set_->num_raw_samples(); + num_shapes_ = + randomize ? sample_set_->num_samples() : sample_set_->num_raw_samples(); } Begin(); } @@ -96,9 +93,7 @@ void SampleIterator::Begin() { Next(); } -bool SampleIterator::AtEnd() const { - return shape_index_ >= num_shapes_; -} +bool SampleIterator::AtEnd() const { return shape_index_ >= num_shapes_; } const TrainingSample& SampleIterator::GetSample() const { if (shape_table_ != nullptr) { @@ -141,7 +136,7 @@ int SampleIterator::GlobalSampleIndex() const { // If the charset_map_ is nullptr, then this is equal to GetSparseClassID(). int SampleIterator::GetCompactClassID() const { return charset_map_ != nullptr ? charset_map_->SparseToCompact(shape_index_) - : GetSparseClassID(); + : GetSparseClassID(); } // Returns the index of the current sample in sparse charset space, so // in a 2-class problem between x and y, the returned indices will all be @@ -157,8 +152,7 @@ void SampleIterator::Next() { if (shape_table_ != nullptr) { // Next sample in this class/font combination. ++sample_index_; - if (sample_index_ < num_samples_) - return; + if (sample_index_ < num_samples_) return; // Next font in this class in this shape. sample_index_ = 0; do { @@ -172,11 +166,9 @@ void SampleIterator::Next() { shape_char_index_ = 0; do { ++shape_index_; - } while (shape_index_ < num_shapes_ && - charset_map_ != nullptr && + } while (shape_index_ < num_shapes_ && charset_map_ != nullptr && charset_map_->SparseToCompact(shape_index_) < 0); - if (shape_index_ >= num_shapes_) - return; // The end. + if (shape_index_ >= num_shapes_) return; // The end. num_shape_chars_ = shape_table_->GetShape(shape_index_).size(); } } @@ -195,15 +187,15 @@ void SampleIterator::Next() { // Returns the size of the compact charset space. int SampleIterator::CompactCharsetSize() const { return charset_map_ != nullptr ? charset_map_->CompactSize() - : SparseCharsetSize(); + : SparseCharsetSize(); } // Returns the size of the sparse charset space. int SampleIterator::SparseCharsetSize() const { return charset_map_ != nullptr - ? charset_map_->SparseSize() - : (shape_table_ != nullptr ? shape_table_->NumShapes() - : sample_set_->charsetsize()); + ? charset_map_->SparseSize() + : (shape_table_ != nullptr ? shape_table_->NumShapes() + : sample_set_->charsetsize()); } // Apply the supplied feature_space/feature_map transform to all samples diff --git a/src/classify/sampleiterator.h b/src/classify/sampleiterator.h index 47bc945988..05763674bc 100644 --- a/src/classify/sampleiterator.h +++ b/src/classify/sampleiterator.h @@ -13,7 +13,6 @@ // /////////////////////////////////////////////////////////////////////// - #ifndef TESSERACT_CLASSIFY_SAMPLEITERATOR_H_ #define TESSERACT_CLASSIFY_SAMPLEITERATOR_H_ @@ -72,8 +71,8 @@ struct UnicharAndFonts; // (If you want the unichar_id or font_id, the sample still has them.) // // Non-nullptr shape_table, non-nullptr charset_map. -// When shape_table is not nullptr, the charset_map indexes and subsets shapes in -// the shape_table, and iterations will be in shape_table order, not +// When shape_table is not nullptr, the charset_map indexes and subsets shapes +// in the shape_table, and iterations will be in shape_table order, not // charset_map order. // GetCompactClassID returns the charset_map index of a shape, and // GetSparseClassID returns the shape_id. @@ -97,10 +96,8 @@ class SampleIterator { void Clear(); // See class comment for arguments. - void Init(const IndexMapBiDi* charset_map, - const ShapeTable* shape_table, - bool randomize, - TrainingSampleSet* sample_set); + void Init(const IndexMapBiDi* charset_map, const ShapeTable* shape_table, + bool randomize, TrainingSampleSet* sample_set); // Iterator functions designed for use with a simple for loop: // for (it.Begin(); !it.AtEnd(); it.Next()) { @@ -134,16 +131,10 @@ class SampleIterator { // Returns the size of the sparse charset space. int SparseCharsetSize() const; - const IndexMapBiDi& charset_map() const { - return *charset_map_; - } - const ShapeTable* shape_table() const { - return shape_table_; - } + const IndexMapBiDi& charset_map() const { return *charset_map_; } + const ShapeTable* shape_table() const { return shape_table_; } // Sample set operations. - const TrainingSampleSet* sample_set() const { - return sample_set_; - } + const TrainingSampleSet* sample_set() const { return sample_set_; } // A set of functions that do something to all the samples accessed by the // iterator, as it is currently setup. diff --git a/src/classify/shapeclassifier.cpp b/src/classify/shapeclassifier.cpp index 7c10d6fef0..da0239d50e 100644 --- a/src/classify/shapeclassifier.cpp +++ b/src/classify/shapeclassifier.cpp @@ -24,13 +24,13 @@ #include "config_auto.h" #endif -#include "shapeclassifier.h" #include "genericvector.h" #include "scrollview.h" +#include "shapeclassifier.h" #include "shapetable.h" #include "svmnode.h" -#include "trainingsample.h" #include "tprintf.h" +#include "trainingsample.h" namespace tesseract { @@ -42,8 +42,8 @@ int ShapeClassifier::UnicharClassifySample( UNICHAR_ID keep_this, GenericVector* results) { results->truncate(0); GenericVector shape_results; - int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, - &shape_results); + int num_shape_results = + ClassifySample(sample, page_pix, debug, keep_this, &shape_results); const ShapeTable* shapes = GetShapeTable(); GenericVector unichar_map; unichar_map.init_to_size(shapes->unicharset().size(), -1); @@ -57,8 +57,8 @@ int ShapeClassifier::UnicharClassifySample( // See shapeclassifier.h for a full description. // Default implementation aborts. int ShapeClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix, - int debug, int keep_this, - GenericVector* results) { + int debug, int keep_this, + GenericVector* results) { ASSERT_HOST("Must implement ClassifySample!" == nullptr); return 0; } @@ -75,8 +75,7 @@ int ShapeClassifier::BestShapeForUnichar(const TrainingSample& sample, int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results); for (int r = 0; r < num_results; ++r) { if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) { - if (result != nullptr) - *result = results[r]; + if (result != nullptr) *result = results[r]; return results[r].shape_id; } } @@ -94,8 +93,7 @@ const UNICHARSET& ShapeClassifier::GetUnicharset() const { // the user has finished with debugging the sample. // Probably doesn't need to be overridden if the subclass provides // DisplayClassifyAs. -void ShapeClassifier::DebugDisplay(const TrainingSample& sample, - Pix* page_pix, +void ShapeClassifier::DebugDisplay(const TrainingSample& sample, Pix* page_pix, UNICHAR_ID unichar_id) { #ifndef GRAPHICS_DISABLED static ScrollView* terminator = nullptr; @@ -122,8 +120,8 @@ void ShapeClassifier::DebugDisplay(const TrainingSample& sample, do { PointerVector windows; if (unichar_id >= 0) { - tprintf("Debugging class %d = %s\n", - unichar_id, unicharset.id_to_unichar(unichar_id)); + tprintf("Debugging class %d = %s\n", unichar_id, + unicharset.id_to_unichar(unichar_id)); UnicharClassifySample(sample, page_pix, 1, unichar_id, &results); DisplayClassifyAs(sample, page_pix, unichar_id, 1, &windows); } else { @@ -131,8 +129,8 @@ void ShapeClassifier::DebugDisplay(const TrainingSample& sample, UnicharClassifySample(sample, page_pix, 1, -1, &results); } if (unichar_id >= 0) { - tprintf("Debugged class %d = %s\n", - unichar_id, unicharset.id_to_unichar(unichar_id)); + tprintf("Debugged class %d = %s\n", unichar_id, + unicharset.id_to_unichar(unichar_id)); } tprintf("Right-click in ClassifierDebug window to choose debug class,"); tprintf(" Left-click or close window to quit...\n"); @@ -149,8 +147,8 @@ void ShapeClassifier::DebugDisplay(const TrainingSample& sample, } } delete ev; - } while (unichar_id == old_unichar_id && - ev_type != SVET_CLICK && ev_type != SVET_DESTROY); + } while (unichar_id == old_unichar_id && ev_type != SVET_CLICK && + ev_type != SVET_DESTROY); } while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY); delete debug_win; #endif // GRAPHICS_DISABLED @@ -161,10 +159,10 @@ void ShapeClassifier::DebugDisplay(const TrainingSample& sample, // windows to the windows output and returns a new index that may be used // by any subsequent classifiers. Caller waits for the user to view and // then destroys the windows by clearing the vector. -int ShapeClassifier::DisplayClassifyAs( - const TrainingSample& sample, Pix* page_pix, - UNICHAR_ID unichar_id, int index, - PointerVector* windows) { +int ShapeClassifier::DisplayClassifyAs(const TrainingSample& sample, + Pix* page_pix, UNICHAR_ID unichar_id, + int index, + PointerVector* windows) { // Does nothing in the default implementation. return index; } @@ -190,10 +188,8 @@ void ShapeClassifier::PrintResults( tprintf("%s\n", context); for (int i = 0; i < results.size(); ++i) { tprintf("%g:", results[i].rating); - if (results[i].joined) - tprintf("[J]"); - if (results[i].broken) - tprintf("[B]"); + if (results[i].joined) tprintf("[J]"); + if (results[i].broken) tprintf("[B]"); tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string()); } } @@ -217,8 +213,7 @@ void ShapeClassifier::FilterDuplicateUnichars( if (shape_s.ContainsUnichar(unichar_id)) break; // We found unichar_id. } - if (s == r) - break; // We didn't find unichar_id. + if (s == r) break; // We didn't find unichar_id. } if (c == shape_r.size()) continue; // We found all the unichar ids in previous answers. diff --git a/src/classify/shapeclassifier.h b/src/classify/shapeclassifier.h index 24852caca9..d2aefcf4b0 100644 --- a/src/classify/shapeclassifier.h +++ b/src/classify/shapeclassifier.h @@ -25,14 +25,16 @@ #include "unichar.h" -template class GenericVector; +template +class GenericVector; struct Pix; class ScrollView; class UNICHARSET; namespace tesseract { -template class PointerVector; +template +class PointerVector; struct ShapeRating; class ShapeTable; class TrainingSample; @@ -96,13 +98,12 @@ class ShapeClassifier { virtual void DebugDisplay(const TrainingSample& sample, Pix* page_pix, UNICHAR_ID unichar_id); - // Displays classification as the given unichar_id. Creates as many windows // as it feels fit, using index as a guide for placement. Adds any created // windows to the windows output and returns a new index that may be used // by any subsequent classifiers. Caller waits for the user to view and // then destroys the windows by clearing the vector. - virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix, + virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix, UNICHAR_ID unichar_id, int index, PointerVector* windows); diff --git a/src/classify/shapetable.cpp b/src/classify/shapetable.cpp index 3d7b104e9b..6cb9fa1d38 100644 --- a/src/classify/shapetable.cpp +++ b/src/classify/shapetable.cpp @@ -38,8 +38,7 @@ namespace tesseract { // best result with the required unichar_id. // Returns -1 if the unichar_id is not found int ShapeRating::FirstResultWithUnichar( - const GenericVector& results, - const ShapeTable& shape_table, + const GenericVector& results, const ShapeTable& shape_table, UNICHAR_ID unichar_id) { for (int r = 0; r < results.size(); ++r) { const int shape_id = results[r].shape_id; @@ -56,11 +55,9 @@ int ShapeRating::FirstResultWithUnichar( // best result with the required unichar_id. // Returns -1 if the unichar_id is not found int UnicharRating::FirstResultWithUnichar( - const GenericVector& results, - UNICHAR_ID unichar_id) { + const GenericVector& results, UNICHAR_ID unichar_id) { for (int r = 0; r < results.size(); ++r) { - if (results[r].unichar_id == unichar_id) - return r; + if (results[r].unichar_id == unichar_id) return r; } return -1; } @@ -87,8 +84,7 @@ int UnicharAndFonts::SortByUnicharId(const void* v1, const void* v2) { // Writes to the given file. Returns false in case of error. bool Shape::Serialize(FILE* fp) const { uint8_t sorted = unichars_sorted_; - if (fwrite(&sorted, sizeof(sorted), 1, fp) != 1) - return false; + if (fwrite(&sorted, sizeof(sorted), 1, fp) != 1) return false; return unichars_.SerializeClasses(fp); } // Reads from the given file. Returns false in case of error. @@ -108,8 +104,7 @@ void Shape::AddToShape(int unichar_id, int font_id) { // Found the unichar in the shape table. GenericVector& font_list = unichars_[c].font_ids; for (int f = 0; f < font_list.size(); ++f) { - if (font_list[f] == font_id) - return; // Font is already there. + if (font_list[f] == font_id) return; // Font is already there. } font_list.push_back(font_id); return; @@ -117,18 +112,17 @@ void Shape::AddToShape(int unichar_id, int font_id) { } // Unichar_id is not in shape, so add it to shape. unichars_.push_back(UnicharAndFonts(unichar_id, font_id)); - unichars_sorted_ = unichars_.size() <= 1; + unichars_sorted_ = unichars_.size() <= 1; } // Adds everything in other to this. void Shape::AddShape(const Shape& other) { for (int c = 0; c < other.unichars_.size(); ++c) { for (int f = 0; f < other.unichars_[c].font_ids.size(); ++f) { - AddToShape(other.unichars_[c].unichar_id, - other.unichars_[c].font_ids[f]); + AddToShape(other.unichars_[c].unichar_id, other.unichars_[c].font_ids[f]); } } - unichars_sorted_ = unichars_.size() <= 1; + unichars_sorted_ = unichars_.size() <= 1; } // Returns true if the shape contains the given unichar_id, font_id pair. @@ -138,8 +132,7 @@ bool Shape::ContainsUnicharAndFont(int unichar_id, int font_id) const { // Found the unichar, so look for the font. GenericVector& font_list = unichars_[c].font_ids; for (int f = 0; f < font_list.size(); ++f) { - if (font_list[f] == font_id) - return true; + if (font_list[f] == font_id) return true; } return false; } @@ -162,8 +155,7 @@ bool Shape::ContainsFont(int font_id) const { for (int c = 0; c < unichars_.size(); ++c) { GenericVector& font_list = unichars_[c].font_ids; for (int f = 0; f < font_list.size(); ++f) { - if (font_list[f] == font_id) - return true; + if (font_list[f] == font_id) return true; } } return false; @@ -175,8 +167,7 @@ bool Shape::ContainsFontProperties(const FontInfoTable& font_table, for (int c = 0; c < unichars_.size(); ++c) { GenericVector& font_list = unichars_[c].font_ids; for (int f = 0; f < font_list.size(); ++f) { - if (font_table.get(font_list[f]).properties == properties) - return true; + if (font_table.get(font_list[f]).properties == properties) return true; } } return false; @@ -189,8 +180,7 @@ bool Shape::ContainsMultipleFontProperties( for (int c = 0; c < unichars_.size(); ++c) { GenericVector& font_list = unichars_[c].font_ids; for (int f = 0; f < font_list.size(); ++f) { - if (font_table.get(font_list[f]).properties != properties) - return true; + if (font_table.get(font_list[f]).properties != properties) return true; } } return false; @@ -208,8 +198,7 @@ bool Shape::IsSubsetOf(const Shape& other) const { int unichar_id = unichars_[c].unichar_id; const GenericVector& font_list = unichars_[c].font_ids; for (int f = 0; f < font_list.size(); ++f) { - if (!other.ContainsUnicharAndFont(unichar_id, font_list[f])) - return false; + if (!other.ContainsUnicharAndFont(unichar_id, font_list[f])) return false; } } return true; @@ -223,8 +212,7 @@ bool Shape::IsEqualUnichars(Shape* other) { if (!unichars_sorted_) SortUnichars(); if (!other->unichars_sorted_) other->SortUnichars(); for (int c = 0; c < unichars_.size(); ++c) { - if (unichars_[c].unichar_id != other->unichars_[c].unichar_id) - return false; + if (unichars_[c].unichar_id != other->unichars_[c].unichar_id) return false; } return true; } @@ -235,11 +223,9 @@ void Shape::SortUnichars() { unichars_sorted_ = true; } -ShapeTable::ShapeTable() : unicharset_(nullptr), num_fonts_(0) { -} +ShapeTable::ShapeTable() : unicharset_(nullptr), num_fonts_(0) {} ShapeTable::ShapeTable(const UNICHARSET& unicharset) - : unicharset_(&unicharset), num_fonts_(0) { -} + : unicharset_(&unicharset), num_fonts_(0) {} // Writes to the given file. Returns false in case of error. bool ShapeTable::Serialize(FILE* fp) const { @@ -282,7 +268,8 @@ void ShapeTable::ReMapClassIds(const GenericVector& unicharset_map) { } // Returns a string listing the classes/fonts in a shape. -STRING ShapeTable::DebugStr(int shape_id) const { +STRING +ShapeTable::DebugStr(int shape_id) const { if (shape_id < 0 || shape_id >= shape_table_.size()) return STRING("INVALID_UNICHAR_ID"); const Shape& shape = GetShape(shape_id); @@ -314,7 +301,8 @@ STRING ShapeTable::DebugStr(int shape_id) const { } // Returns a debug string summarizing the table. -STRING ShapeTable::SummaryStr() const { +STRING +ShapeTable::SummaryStr() const { int max_unichars = 0; int num_multi_shapes = 0; int num_master_shapes = 0; @@ -322,10 +310,8 @@ STRING ShapeTable::SummaryStr() const { if (MasterDestinationIndex(s) != s) continue; ++num_master_shapes; int shape_size = GetShape(s).size(); - if (shape_size > 1) - ++num_multi_shapes; - if (shape_size > max_unichars) - max_unichars = shape_size; + if (shape_size > 1) ++num_multi_shapes; + if (shape_size > max_unichars) max_unichars = shape_size; } STRING result; result.add_str_int("Number of shapes = ", num_master_shapes); @@ -334,7 +320,6 @@ STRING ShapeTable::SummaryStr() const { return result; } - // Adds a new shape starting with the given unichar_id and font_id. // Returns the assigned index. int ShapeTable::AddShape(int unichar_id, int font_id) { @@ -350,8 +335,8 @@ int ShapeTable::AddShape(int unichar_id, int font_id) { // Returns the assigned index or index of existing shape if already present. int ShapeTable::AddShape(const Shape& other) { int index; - for (index = 0; index < shape_table_.size() && - !(other == *shape_table_[index]); ++index) + for (index = 0; + index < shape_table_.size() && !(other == *shape_table_[index]); ++index) continue; if (index == shape_table_.size()) { Shape* shape = new Shape(other); @@ -392,11 +377,9 @@ int ShapeTable::FindShape(int unichar_id, int font_id) const { const Shape& shape = GetShape(s); for (int c = 0; c < shape.size(); ++c) { if (shape[c].unichar_id == unichar_id) { - if (font_id < 0) - return s; // We don't care about the font. + if (font_id < 0) return s; // We don't care about the font. for (int f = 0; f < shape[c].font_ids.size(); ++f) { - if (shape[c].font_ids[f] == font_id) - return s; + if (shape[c].font_ids[f] == font_id) return s; } } } @@ -405,8 +388,8 @@ int ShapeTable::FindShape(int unichar_id, int font_id) const { } // Returns the first unichar_id and font_id in the given shape. -void ShapeTable::GetFirstUnicharAndFont(int shape_id, - int* unichar_id, int* font_id) const { +void ShapeTable::GetFirstUnicharAndFont(int shape_id, int* unichar_id, + int* font_id) const { const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0]; *unichar_id = unichar_and_fonts.unichar_id; *font_id = unichar_and_fonts.font_ids[0]; @@ -449,8 +432,7 @@ bool ShapeTable::AnyMultipleUnichars() const { int num_shapes = NumShapes(); for (int s1 = 0; s1 < num_shapes; ++s1) { if (MasterDestinationIndex(s1) != s1) continue; - if (GetShape(s1).size() > 1) - return true; + if (GetShape(s1).size() > 1) return true; } return false; } @@ -466,7 +448,6 @@ int ShapeTable::MaxNumUnichars() const { return max_num_unichars; } - // Merges shapes with a common unichar over the [start, end) interval. // Assumes single unichar per shape. void ShapeTable::ForceFontMerges(int start, int end) { @@ -550,13 +531,11 @@ bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const { int c1, c2; for (c1 = 0; c1 < shape1.size(); ++c1) { int unichar_id1 = shape1[c1].unichar_id; - if (!shape2.ContainsUnichar(unichar_id1)) - break; + if (!shape2.ContainsUnichar(unichar_id1)) break; } for (c2 = 0; c2 < shape2.size(); ++c2) { int unichar_id2 = shape2[c2].unichar_id; - if (!shape1.ContainsUnichar(unichar_id2)) - break; + if (!shape1.ContainsUnichar(unichar_id2)) break; } return c1 == shape1.size() || c2 == shape2.size(); } @@ -593,13 +572,11 @@ bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const { const Shape& shape2 = GetShape(shape_id2); for (int c1 = 0; c1 < shape1.size(); ++c1) { int unichar_id1 = shape1[c1].unichar_id; - if (!shape2.ContainsUnichar(unichar_id1)) - return false; + if (!shape2.ContainsUnichar(unichar_id1)) return false; } for (int c2 = 0; c2 < shape2.size(); ++c2) { int unichar_id2 = shape2[c2].unichar_id; - if (!shape1.ContainsUnichar(unichar_id2)) - return false; + if (!shape1.ContainsUnichar(unichar_id2)) return false; } return true; } @@ -635,8 +612,7 @@ bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const { const Shape& shape2 = GetShape(shape_id2); for (int c1 = 0; c1 < shape1.size(); ++c1) { int unichar_id1 = shape1[c1].unichar_id; - if (shape2.ContainsUnichar(unichar_id1)) - return true; + if (shape2.ContainsUnichar(unichar_id1)) return true; } return false; } @@ -648,8 +624,7 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const { for (int c1 = 0; c1 < shape1.size(); ++c1) { const GenericVector& font_list1 = shape1[c1].font_ids; for (int f = 0; f < font_list1.size(); ++f) { - if (shape2.ContainsFont(font_list1[f])) - return true; + if (shape2.ContainsFont(font_list1[f])) return true; } } return false; @@ -659,13 +634,11 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const { // If not nullptr, shape_map is set to map other shape_ids to this's shape_ids. void ShapeTable::AppendMasterShapes(const ShapeTable& other, GenericVector* shape_map) { - if (shape_map != nullptr) - shape_map->init_to_size(other.NumShapes(), -1); + if (shape_map != nullptr) shape_map->init_to_size(other.NumShapes(), -1); for (int s = 0; s < other.shape_table_.size(); ++s) { if (other.shape_table_[s]->destination_index() < 0) { int index = AddShape(*other.shape_table_[s]); - if (shape_map != nullptr) - (*shape_map)[s] = index; + if (shape_map != nullptr) (*shape_map)[s] = index; } } } @@ -674,13 +647,11 @@ void ShapeTable::AppendMasterShapes(const ShapeTable& other, int ShapeTable::NumMasterShapes() const { int num_shapes = 0; for (int s = 0; s < shape_table_.size(); ++s) { - if (shape_table_[s]->destination_index() < 0) - ++num_shapes; + if (shape_table_[s]->destination_index() < 0) ++num_shapes; } return num_shapes; } - // Adds the unichars of the given shape_id to the vector of results. Any // unichar_id that is already present just has the fonts added to the // font set for that result without adding a new entry in the vector. @@ -688,9 +659,9 @@ int ShapeTable::NumMasterShapes() const { // of decreasing rating. // The unichar_map vector indicates the index of the results entry containing // each unichar, or -1 if the unichar is not yet included in results. -void ShapeTable::AddShapeToResults(const ShapeRating& shape_rating, - GenericVector* unichar_map, - GenericVector* results)const { +void ShapeTable::AddShapeToResults( + const ShapeRating& shape_rating, GenericVector* unichar_map, + GenericVector* results) const { if (shape_rating.joined) { AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map, results); @@ -701,9 +672,8 @@ void ShapeTable::AddShapeToResults(const ShapeRating& shape_rating, } const Shape& shape = GetShape(shape_rating.shape_id); for (int u = 0; u < shape.size(); ++u) { - int result_index = AddUnicharToResults(shape[u].unichar_id, - shape_rating.rating, - unichar_map, results); + int result_index = AddUnicharToResults( + shape[u].unichar_id, shape_rating.rating, unichar_map, results); for (int f = 0; f < shape[u].font_ids.size(); ++f) { (*results)[result_index].fonts.push_back( ScoredFont(shape[u].font_ids[f], @@ -726,5 +696,4 @@ int ShapeTable::AddUnicharToResults( return result_index; } - } // namespace tesseract diff --git a/src/classify/shapetable.h b/src/classify/shapetable.h index 00407a0f9a..08a8af4dc1 100644 --- a/src/classify/shapetable.h +++ b/src/classify/shapetable.h @@ -40,16 +40,24 @@ class ShapeTable; // rating, and a list of appropriate fonts. struct UnicharRating { UnicharRating() - : unichar_id(0), rating(0.0f), adapted(false), config(0), - feature_misses(0) {} + : unichar_id(0), + rating(0.0f), + adapted(false), + config(0), + feature_misses(0) {} UnicharRating(int u, float r) - : unichar_id(u), rating(r), adapted(false), config(0), feature_misses(0) {} + : unichar_id(u), + rating(r), + adapted(false), + config(0), + feature_misses(0) {} // Print debug info. void Print() const { - tprintf("Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%d," - " %d fonts\n", unichar_id, rating, adapted, config, feature_misses, - fonts.size()); + tprintf( + "Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%d," + " %d fonts\n", + unichar_id, rating, adapted, config, feature_misses, fonts.size()); } // Sort function to sort ratings appropriately by descending rating. @@ -92,11 +100,19 @@ struct UnicharRating { // ShapeTable and a rating. struct ShapeRating { ShapeRating() - : shape_id(0), rating(0.0f), raw(0.0f), font(0.0f), - joined(false), broken(false) {} + : shape_id(0), + rating(0.0f), + raw(0.0f), + font(0.0f), + joined(false), + broken(false) {} ShapeRating(int s, float r) - : shape_id(s), rating(r), raw(1.0f), font(0.0f), - joined(false), broken(false) {} + : shape_id(s), + rating(r), + raw(1.0f), + font(0.0f), + joined(false), + broken(false) {} // Sort function to sort ratings appropriately by descending rating. static int SortDescendingRating(const void* t1, const void* t2) { @@ -138,13 +154,12 @@ struct ShapeRating { struct ShapeQueueEntry { ShapeQueueEntry() : result(ShapeRating(0, 0.0f)), level(0) {} ShapeQueueEntry(const ShapeRating& rating, int level0) - : result(rating), level(level0) {} + : result(rating), level(level0) {} // Sort by decreasing rating and decreasing level for equal rating. bool operator<(const ShapeQueueEntry& other) const { if (result.rating > other.result.rating) return true; - if (result.rating == other.result.rating) - return level > other.level; + if (result.rating == other.result.rating) return level > other.level; return false; } @@ -158,8 +173,7 @@ using ShapeQueue = GenericHeap; // Simple struct to hold a set of fonts associated with a single unichar-id. // A vector of UnicharAndFonts makes a shape. struct UnicharAndFonts { - UnicharAndFonts() : unichar_id(0) { - } + UnicharAndFonts() : unichar_id(0) {} UnicharAndFonts(int uni_id, int font_id) : unichar_id(uni_id) { font_ids.push_back(font_id); } @@ -191,15 +205,9 @@ class Shape { // Reads from the given file. Returns false in case of error. bool DeSerialize(TFile* fp); - int destination_index() const { - return destination_index_; - } - void set_destination_index(int index) { - destination_index_ = index; - } - int size() const { - return unichars_.size(); - } + int destination_index() const { return destination_index_; } + void set_destination_index(int index) { destination_index_ = index; } + int size() const { return unichars_.size(); } // Returns a UnicharAndFonts entry for the given index, which must be // in the range [0, size()). const UnicharAndFonts& operator[](int index) const { @@ -272,12 +280,8 @@ class ShapeTable { bool DeSerialize(TFile* fp); // Accessors. - int NumShapes() const { - return shape_table_.size(); - } - const UNICHARSET& unicharset() const { - return *unicharset_; - } + int NumShapes() const { return shape_table_.size(); } + const UNICHARSET& unicharset() const { return *unicharset_; } // Returns the number of fonts used in this ShapeTable, computing it if // necessary. int NumFonts() const; @@ -313,16 +317,12 @@ class ShapeTable { // the unichar_id is returned. int FindShape(int unichar_id, int font_id) const; // Returns the first unichar_id and font_id in the given shape. - void GetFirstUnicharAndFont(int shape_id, - int* unichar_id, int* font_id) const; + void GetFirstUnicharAndFont(int shape_id, int* unichar_id, + int* font_id) const; // Accessors for the Shape with the given shape_id. - const Shape& GetShape(int shape_id) const { - return *shape_table_[shape_id]; - } - Shape* MutableShape(int shape_id) { - return shape_table_[shape_id]; - } + const Shape& GetShape(int shape_id) const { return *shape_table_[shape_id]; } + Shape* MutableShape(int shape_id) { return shape_table_[shape_id]; } // Expands all the classes/fonts in the shape individually to build // a ShapeTable. @@ -350,7 +350,8 @@ class ShapeTable { // Appends the master shapes from other to this. // Used to create a clean ShapeTable from a merged one, or to create a // copy of a ShapeTable. - // If not nullptr, shape_map is set to map other shape_ids to this's shape_ids. + // If not nullptr, shape_map is set to map other shape_ids to this's + // shape_ids. void AppendMasterShapes(const ShapeTable& other, GenericVector* shape_map); // Returns the number of master shapes remaining after merging. diff --git a/src/classify/tessclassifier.cpp b/src/classify/tessclassifier.cpp index df7866becf..8bfabe439f 100644 --- a/src/classify/tessclassifier.cpp +++ b/src/classify/tessclassifier.cpp @@ -65,9 +65,9 @@ const UNICHARSET& TessClassifier::GetUnicharset() const { // windows to the windows output and returns a new index that may be used // by any subsequent classifiers. Caller waits for the user to view and // then destroys the windows by clearing the vector. -int TessClassifier::DisplayClassifyAs( - const TrainingSample& sample, Pix* page_pix, int unichar_id, int index, - PointerVector* windows) { +int TessClassifier::DisplayClassifyAs(const TrainingSample& sample, + Pix* page_pix, int unichar_id, int index, + PointerVector* windows) { int shape_id = unichar_id; // TODO(rays) Fix this so it works with both flat and real shapetables. // if (GetShapeTable() != nullptr) diff --git a/src/classify/tessclassifier.h b/src/classify/tessclassifier.h index b72e90a18a..ddd227547e 100644 --- a/src/classify/tessclassifier.h +++ b/src/classify/tessclassifier.h @@ -36,7 +36,7 @@ class TrainingSample; class TessClassifier : public ShapeClassifier { public: TessClassifier(bool pruner_only, tesseract::Classify* classify) - : pruner_only_(pruner_only), classify_(classify) {} + : pruner_only_(pruner_only), classify_(classify) {} virtual ~TessClassifier() = default; // Classifies the given [training] sample, writing to results. @@ -67,11 +67,6 @@ class TessClassifier : public ShapeClassifier { tesseract::Classify* classify_; }; - } // namespace tesseract - - - - #endif /* THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ */ diff --git a/src/classify/trainingsample.cpp b/src/classify/trainingsample.cpp index e89abad607..e22981816b 100644 --- a/src/classify/trainingsample.cpp +++ b/src/classify/trainingsample.cpp @@ -35,16 +35,13 @@ ELISTIZE(TrainingSample) const int kRandomizingCenter = 128; // Randomizing factors. -const int TrainingSample::kYShiftValues[kSampleYShiftSize] = { - 6, 3, -3, -6, 0 -}; -const double TrainingSample::kScaleValues[kSampleScaleSize] = { - 1.0625, 0.9375, 1.0 -}; +const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {6, 3, -3, -6, 0}; +const double TrainingSample::kScaleValues[kSampleScaleSize] = {1.0625, 0.9375, + 1.0}; TrainingSample::~TrainingSample() { - delete [] features_; - delete [] micro_features_; + delete[] features_; + delete[] micro_features_; } // WARNING! Serialize/DeSerialize do not save/restore the "cache" data @@ -61,15 +58,15 @@ bool TrainingSample::Serialize(FILE* fp) const { return false; if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1) return false; - if (static_cast(fwrite(features_, sizeof(*features_), num_features_, fp)) - != num_features_) + if (static_cast(fwrite(features_, sizeof(*features_), num_features_, + fp)) != num_features_) return false; if (static_cast(fwrite(micro_features_, sizeof(*micro_features_), - num_micro_features_, - fp)) != num_micro_features_) + num_micro_features_, fp)) != num_micro_features_) return false; if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != - kNumCNParams) return false; + kNumCNParams) + return false; if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) return false; return true; @@ -102,19 +99,19 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) { ReverseN(&num_micro_features_, sizeof(num_micro_features_)); ReverseN(&outline_length_, sizeof(outline_length_)); } - delete [] features_; + delete[] features_; features_ = new INT_FEATURE_STRUCT[num_features_]; - if (static_cast(fread(features_, sizeof(*features_), num_features_, fp)) - != num_features_) + if (static_cast(fread(features_, sizeof(*features_), num_features_, + fp)) != num_features_) return false; - delete [] micro_features_; + delete[] micro_features_; micro_features_ = new MicroFeature[num_micro_features_]; if (static_cast(fread(micro_features_, sizeof(*micro_features_), - num_micro_features_, - fp)) != num_micro_features_) + num_micro_features_, fp)) != num_micro_features_) return false; if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != - kNumCNParams) return false; + kNumCNParams) + return false; if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) return false; return true; @@ -122,10 +119,8 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) { // Saves the given features into a TrainingSample. TrainingSample* TrainingSample::CopyFromFeatures( - const INT_FX_RESULT_STRUCT& fx_info, - const TBOX& bounding_box, - const INT_FEATURE_STRUCT* features, - int num_features) { + const INT_FX_RESULT_STRUCT& fx_info, const TBOX& bounding_box, + const INT_FEATURE_STRUCT* features, int num_features) { TrainingSample* sample = new TrainingSample; sample->num_features_ = num_features; sample->features_ = new INT_FEATURE_STRUCT[num_features]; @@ -151,8 +146,7 @@ TrainingSample* TrainingSample::CopyFromFeatures( // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. FEATURE_STRUCT* TrainingSample::GetCNFeature() const { FEATURE feature = NewFeature(&CharNormDesc); - for (int i = 0; i < kNumCNParams; ++i) - feature->Params[i] = cn_feature_[i]; + for (int i = 0; i < kNumCNParams; ++i) feature->Params[i] = cn_feature_[i]; return feature; } @@ -201,17 +195,14 @@ TrainingSample* TrainingSample::Copy() const { } // Extracts the needed information from the CHAR_DESC_STRUCT. -void TrainingSample::ExtractCharDesc(int int_feature_type, - int micro_type, - int cn_type, - int geo_type, +void TrainingSample::ExtractCharDesc(int int_feature_type, int micro_type, + int cn_type, int geo_type, CHAR_DESC_STRUCT* char_desc) { // Extract the INT features. delete[] features_; FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type]; if (char_features == nullptr) { - tprintf("Error: no features to train on of type %s\n", - kIntFeatureType); + tprintf("Error: no features to train on of type %s\n", kIntFeatureType); num_features_ = 0; features_ = nullptr; } else { @@ -231,8 +222,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type, delete[] micro_features_; char_features = char_desc->FeatureSets[micro_type]; if (char_features == nullptr) { - tprintf("Error: no features to train on of type %s\n", - kMicroFeatureType); + tprintf("Error: no features to train on of type %s\n", kMicroFeatureType); num_micro_features_ = 0; micro_features_ = nullptr; } else { @@ -302,8 +292,7 @@ Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { for (int i = 0; i <= 5; ++i) { int x = static_cast(start_x + dx * i); int y = static_cast(start_y + dy * i); - if (x >= 0 && x < 256 && y >= 0 && y < 256) - pixSetPixel(pix, x, y, 1); + if (x >= 0 && x < 256 && y >= 0 && y < 256) pixSetPixel(pix, x, y, 1); } } if (unicharset != nullptr) @@ -314,11 +303,11 @@ Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { // Displays the features in the given window with the given color. void TrainingSample::DisplayFeatures(ScrollView::Color color, ScrollView* window) const { - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED for (int f = 0; f < num_features_; ++f) { RenderIntFeature(window, &features_[f], color); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED } // Returns a pix of the original sample image. The pix is padded all round @@ -326,8 +315,7 @@ void TrainingSample::DisplayFeatures(ScrollView::Color color, // The returned Pix must be pixDestroyed after use. // If the input page_pix is nullptr, nullptr is returned. Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const { - if (page_pix == nullptr) - return nullptr; + if (page_pix == nullptr) return nullptr; int page_width = pixGetWidth(page_pix); int page_height = pixGetHeight(page_pix); TBOX padded_box = bounding_box(); diff --git a/src/classify/trainingsample.h b/src/classify/trainingsample.h index d74ea99a2e..3b7291fb77 100644 --- a/src/classify/trainingsample.h +++ b/src/classify/trainingsample.h @@ -53,13 +53,20 @@ static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; class TrainingSample : public ELIST_LINK { public: TrainingSample() - : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0), - num_features_(0), num_micro_features_(0), outline_length_(0), - features_(nullptr), micro_features_(nullptr), weight_(1.0), - max_dist_(0.0), sample_index_(0), - features_are_indexed_(false), features_are_mapped_(false), - is_error_(false) { - } + : class_id_(INVALID_UNICHAR_ID), + font_id_(0), + page_num_(0), + num_features_(0), + num_micro_features_(0), + outline_length_(0), + features_(nullptr), + micro_features_(nullptr), + weight_(1.0), + max_dist_(0.0), + sample_index_(0), + features_are_indexed_(false), + features_are_mapped_(false), + is_error_(false) {} ~TrainingSample(); // Saves the given features into a TrainingSample. The features are copied, @@ -90,9 +97,8 @@ class TrainingSample : public ELIST_LINK { bool DeSerialize(bool swap, FILE* fp); // Extracts the needed information from the CHAR_DESC_STRUCT. - void ExtractCharDesc(int feature_type, int micro_type, - int cn_type, int geo_type, - CHAR_DESC_STRUCT* char_desc); + void ExtractCharDesc(int feature_type, int micro_type, int cn_type, + int geo_type, CHAR_DESC_STRUCT* char_desc); // Sets the mapped_features_ from the features_ using the provided // feature_space to the indexed versions of the features. @@ -113,72 +119,28 @@ class TrainingSample : public ELIST_LINK { Pix* GetSamplePix(int padding, Pix* page_pix) const; // Accessors. - UNICHAR_ID class_id() const { - return class_id_; - } - void set_class_id(int id) { - class_id_ = id; - } - int font_id() const { - return font_id_; - } - void set_font_id(int id) { - font_id_ = id; - } - int page_num() const { - return page_num_; - } - void set_page_num(int page) { - page_num_ = page; - } - const TBOX& bounding_box() const { - return bounding_box_; - } - void set_bounding_box(const TBOX& box) { - bounding_box_ = box; - } - int num_features() const { - return num_features_; - } - const INT_FEATURE_STRUCT* features() const { - return features_; - } - int num_micro_features() const { - return num_micro_features_; - } - const MicroFeature* micro_features() const { - return micro_features_; - } - int outline_length() const { - return outline_length_; - } - float cn_feature(int index) const { - return cn_feature_[index]; - } - int geo_feature(int index) const { - return geo_feature_[index]; - } - double weight() const { - return weight_; - } - void set_weight(double value) { - weight_ = value; - } - double max_dist() const { - return max_dist_; - } - void set_max_dist(double value) { - max_dist_ = value; - } - int sample_index() const { - return sample_index_; - } - void set_sample_index(int value) { - sample_index_ = value; - } - bool features_are_mapped() const { - return features_are_mapped_; - } + UNICHAR_ID class_id() const { return class_id_; } + void set_class_id(int id) { class_id_ = id; } + int font_id() const { return font_id_; } + void set_font_id(int id) { font_id_ = id; } + int page_num() const { return page_num_; } + void set_page_num(int page) { page_num_ = page; } + const TBOX& bounding_box() const { return bounding_box_; } + void set_bounding_box(const TBOX& box) { bounding_box_ = box; } + int num_features() const { return num_features_; } + const INT_FEATURE_STRUCT* features() const { return features_; } + int num_micro_features() const { return num_micro_features_; } + const MicroFeature* micro_features() const { return micro_features_; } + int outline_length() const { return outline_length_; } + float cn_feature(int index) const { return cn_feature_[index]; } + int geo_feature(int index) const { return geo_feature_[index]; } + double weight() const { return weight_; } + void set_weight(double value) { weight_ = value; } + double max_dist() const { return max_dist_; } + void set_max_dist(double value) { max_dist_ = value; } + int sample_index() const { return sample_index_; } + void set_sample_index(int value) { sample_index_ = value; } + bool features_are_mapped() const { return features_are_mapped_; } const GenericVector& mapped_features() const { ASSERT_HOST(features_are_mapped_); return mapped_features_; @@ -187,12 +149,8 @@ class TrainingSample : public ELIST_LINK { ASSERT_HOST(features_are_indexed_); return mapped_features_; } - bool is_error() const { - return is_error_; - } - void set_is_error(bool value) { - is_error_ = value; - } + bool is_error() const { return is_error_; } + void set_is_error(bool value) { is_error_ = value; } private: // Unichar id that this sample represents. There obviously must be a diff --git a/src/classify/trainingsampleset.cpp b/src/classify/trainingsampleset.cpp index 2a53d7228f..652a72c94f 100644 --- a/src/classify/trainingsampleset.cpp +++ b/src/classify/trainingsampleset.cpp @@ -39,8 +39,7 @@ const int kPrime2 = 13; const int kMinOutlierSamples = 5; TrainingSampleSet::FontClassInfo::FontClassInfo() - : num_raw_samples(0), canonical_sample(-1), canonical_dist(0.0f) { -} + : num_raw_samples(0), canonical_sample(-1), canonical_dist(0.0f) {} // Writes to the given file. Returns false in case of error. bool TrainingSampleSet::FontClassInfo::Serialize(FILE* fp) const { @@ -70,13 +69,12 @@ bool TrainingSampleSet::FontClassInfo::DeSerialize(bool swap, FILE* fp) { } TrainingSampleSet::TrainingSampleSet(const FontInfoTable& font_table) - : num_raw_samples_(0), unicharset_size_(0), - font_class_array_(nullptr), fontinfo_table_(font_table) { -} + : num_raw_samples_(0), + unicharset_size_(0), + font_class_array_(nullptr), + fontinfo_table_(font_table) {} -TrainingSampleSet::~TrainingSampleSet() { - delete font_class_array_; -} +TrainingSampleSet::~TrainingSampleSet() { delete font_class_array_; } // Writes to the given file. Returns false in case of error. bool TrainingSampleSet::Serialize(FILE* fp) const { @@ -104,7 +102,7 @@ bool TrainingSampleSet::DeSerialize(bool swap, FILE* fp) { if (fread(¬_null, sizeof(not_null), 1, fp) != 1) return false; if (not_null) { FontClassInfo empty; - font_class_array_ = new GENERIC_2D_ARRAY(1, 1 , empty); + font_class_array_ = new GENERIC_2D_ARRAY(1, 1, empty); if (!font_class_array_->DeSerializeClasses(swap, fp)) return false; } unicharset_size_ = unicharset_.size(); @@ -114,9 +112,10 @@ bool TrainingSampleSet::DeSerialize(bool swap, FILE* fp) { // Load an initial unicharset, or set one up if the file cannot be read. void TrainingSampleSet::LoadUnicharset(const char* filename) { if (!unicharset_.load_from_file(filename)) { - tprintf("Failed to load unicharset from file %s\n" - "Building unicharset from scratch...\n", - filename); + tprintf( + "Failed to load unicharset from file %s\n" + "Building unicharset from scratch...\n", + filename); unicharset_.clear(); // Add special characters as they were removed by the clear. UNICHARSET empty; @@ -132,8 +131,9 @@ int TrainingSampleSet::AddSample(const char* unichar, TrainingSample* sample) { if (!unicharset_.contains_unichar(unichar)) { unicharset_.unichar_insert(unichar); if (unicharset_.size() > MAX_NUM_CLASSES) { - tprintf("Error: Size of unicharset in TrainingSampleSet::AddSample is " - "greater than MAX_NUM_CLASSES\n"); + tprintf( + "Error: Size of unicharset in TrainingSampleSet::AddSample is " + "greater than MAX_NUM_CLASSES\n"); return -1; } } @@ -158,14 +158,13 @@ void TrainingSampleSet::AddSample(int unichar_id, TrainingSample* sample) { int TrainingSampleSet::NumClassSamples(int font_id, int class_id, bool randomize) const { ASSERT_HOST(font_class_array_ != nullptr); - if (font_id < 0 || class_id < 0 || - font_id >= font_id_map_.SparseSize() || class_id >= unicharset_size_) { + if (font_id < 0 || class_id < 0 || font_id >= font_id_map_.SparseSize() || + class_id >= unicharset_size_) { // There are no samples because the font or class doesn't exist. return 0; } int font_index = font_id_map_.SparseToCompact(font_id); - if (font_index < 0) - return 0; // The font has no samples. + if (font_index < 0) return 0; // The font has no samples. if (randomize) return (*font_class_array_)(font_index, class_id).samples.size(); else @@ -201,7 +200,8 @@ TrainingSample* TrainingSampleSet::MutableSample(int font_id, int class_id, // Returns a string debug representation of the given sample: // font, unichar_str, bounding box, page. -STRING TrainingSampleSet::SampleToString(const TrainingSample& sample) const { +STRING +TrainingSampleSet::SampleToString(const TrainingSample& sample) const { STRING boxfile_str; MakeBoxFileStr(unicharset_.id_to_unichar(sample.class_id()), sample.bounding_box(), sample.page_num(), &boxfile_str); @@ -210,8 +210,8 @@ STRING TrainingSampleSet::SampleToString(const TrainingSample& sample) const { // Gets the combined set of features used by all the samples of the given // font/class combination. -const BitVector& TrainingSampleSet::GetCloudFeatures( - int font_id, int class_id) const { +const BitVector& TrainingSampleSet::GetCloudFeatures(int font_id, + int class_id) const { int font_index = font_id_map_.SparseToCompact(font_id); ASSERT_HOST(font_index >= 0); return (*font_class_array_)(font_index, class_id).cloud_features; @@ -260,9 +260,8 @@ float TrainingSampleSet::UnicharDistance(const UnicharAndFonts& uf1, int f2 = uf2.font_ids[j]; dist_sum += ClusterDistance(f1, c1, f2, c2, feature_map); if (debug) { - tprintf("Cluster dist %d %d %d %d = %g\n", - f1, c1, f2, c2, - ClusterDistance(f1, c1, f2, c2, feature_map)); + tprintf("Cluster dist %d %d %d %d = %g\n", f1, c1, f2, c2, + ClusterDistance(f1, c1, f2, c2, feature_map)); } ++dist_count; } @@ -277,16 +276,15 @@ float TrainingSampleSet::UnicharDistance(const UnicharAndFonts& uf1, int f1 = uf1.font_ids[i % num_fonts1]; int f2 = uf2.font_ids[index % num_fonts2]; if (debug) { - tprintf("Cluster dist %d %d %d %d = %g\n", - f1, c1, f2, c2, ClusterDistance(f1, c1, f2, c2, feature_map)); + tprintf("Cluster dist %d %d %d %d = %g\n", f1, c1, f2, c2, + ClusterDistance(f1, c1, f2, c2, feature_map)); } dist_sum += ClusterDistance(f1, c1, f2, c2, feature_map); ++dist_count; } } if (dist_count == 0) { - if (matched_fonts) - return UnicharDistance(uf1, uf2, false, feature_map); + if (matched_fonts) return UnicharDistance(uf1, uf2, false, feature_map); return 0.0f; } return dist_sum / dist_count; @@ -301,8 +299,7 @@ float TrainingSampleSet::ClusterDistance(int font_id1, int class_id1, ASSERT_HOST(font_class_array_ != nullptr); int font_index1 = font_id_map_.SparseToCompact(font_id1); int font_index2 = font_id_map_.SparseToCompact(font_id2); - if (font_index1 < 0 || font_index2 < 0) - return 0.0f; + if (font_index1 < 0 || font_index2 < 0) return 0.0f; FontClassInfo& fc_info = (*font_class_array_)(font_index1, class_id1); if (font_id1 == font_id2) { // Special case cache for speed. @@ -310,9 +307,8 @@ float TrainingSampleSet::ClusterDistance(int font_id1, int class_id1, fc_info.unichar_distance_cache.init_to_size(unicharset_size_, -1.0f); if (fc_info.unichar_distance_cache[class_id2] < 0) { // Distance has to be calculated. - float result = ComputeClusterDistance(font_id1, class_id1, - font_id2, class_id2, - feature_map); + float result = ComputeClusterDistance(font_id1, class_id1, font_id2, + class_id2, feature_map); fc_info.unichar_distance_cache[class_id2] = result; // Copy to the symmetric cache entry. FontClassInfo& fc_info2 = (*font_class_array_)(font_index2, class_id2); @@ -328,9 +324,8 @@ float TrainingSampleSet::ClusterDistance(int font_id1, int class_id1, -1.0f); if (fc_info.font_distance_cache[font_index2] < 0) { // Distance has to be calculated. - float result = ComputeClusterDistance(font_id1, class_id1, - font_id2, class_id2, - feature_map); + float result = ComputeClusterDistance(font_id1, class_id1, font_id2, + class_id2, feature_map); fc_info.font_distance_cache[font_index2] = result; // Copy to the symmetric cache entry. FontClassInfo& fc_info2 = (*font_class_array_)(font_index2, class_id2); @@ -350,10 +345,9 @@ float TrainingSampleSet::ClusterDistance(int font_id1, int class_id1, ++cache_index; if (cache_index == fc_info.distance_cache.size()) { // Distance has to be calculated. - float result = ComputeClusterDistance(font_id1, class_id1, - font_id2, class_id2, - feature_map); - FontClassDistance fc_dist = { class_id2, font_id2, result }; + float result = ComputeClusterDistance(font_id1, class_id1, font_id2, + class_id2, feature_map); + FontClassDistance fc_dist = {class_id2, font_id2, result}; fc_info.distance_cache.push_back(fc_dist); // Copy to the symmetric cache entry. We know it isn't there already, as // we always copy to the symmetric entry. @@ -418,10 +412,9 @@ int TrainingSampleSet::ReliablySeparable(int font_id1, int class_id1, bool thorough) const { int result = 0; const TrainingSample* sample2 = GetCanonicalSample(font_id2, class_id2); - if (sample2 == nullptr) - return 0; // There are no canonical features. - const GenericVector& canonical2 = GetCanonicalFeatures(font_id2, - class_id2); + if (sample2 == nullptr) return 0; // There are no canonical features. + const GenericVector& canonical2 = + GetCanonicalFeatures(font_id2, class_id2); const BitVector& cloud1 = GetCloudFeatures(font_id1, class_id1); if (cloud1.size() == 0) return canonical2.size(); // There are no cloud features. @@ -429,8 +422,7 @@ int TrainingSampleSet::ReliablySeparable(int font_id1, int class_id1, // Find a canonical2 feature that is not in cloud1. for (int f = 0; f < canonical2.size(); ++f) { const int feature = canonical2[f]; - if (cloud1[feature]) - continue; + if (cloud1[feature]) continue; // Gather the near neighbours of f. GenericVector good_features; AddNearFeatures(feature_map, feature, 1, &good_features); @@ -442,8 +434,7 @@ int TrainingSampleSet::ReliablySeparable(int font_id1, int class_id1, break; } } - if (i < good_features.size()) - continue; // Found one in the cloud. + if (i < good_features.size()) continue; // Found one in the cloud. ++result; } return result; @@ -466,8 +457,8 @@ const TrainingSample* TrainingSampleSet::GetCanonicalSample( ASSERT_HOST(font_class_array_ != nullptr); int font_index = font_id_map_.SparseToCompact(font_id); if (font_index < 0) return nullptr; - const int sample_index = (*font_class_array_)(font_index, - class_id).canonical_sample; + const int sample_index = + (*font_class_array_)(font_index, class_id).canonical_sample; return sample_index >= 0 ? samples_[sample_index] : nullptr; } @@ -524,9 +515,8 @@ void TrainingSampleSet::OrganizeByFontAndClass() { int font_id = samples_[s]->font_id(); int class_id = samples_[s]->class_id(); if (font_id < 0 || font_id >= font_id_map_.SparseSize()) { - tprintf("Font id = %d/%d, class id = %d/%d on sample %d\n", - font_id, font_id_map_.SparseSize(), class_id, unicharset_size_, - s); + tprintf("Font id = %d/%d, class id = %d/%d on sample %d\n", font_id, + font_id_map_.SparseSize(), class_id, unicharset_size_, s); } ASSERT_HOST(font_id >= 0 && font_id < font_id_map_.SparseSize()); ASSERT_HOST(class_id >= 0 && class_id < unicharset_size_); @@ -552,8 +542,7 @@ void TrainingSampleSet::SetupFontIdMap() { GenericVector font_counts; for (int s = 0; s < samples_.size(); ++s) { const int font_id = samples_[s]->font_id(); - while (font_id >= font_counts.size()) - font_counts.push_back(0); + while (font_id >= font_counts.size()) font_counts.push_back(0); ++font_counts[font_id]; } font_id_map_.Init(font_counts.size(), false); @@ -563,7 +552,6 @@ void TrainingSampleSet::SetupFontIdMap() { font_id_map_.Setup(); } - // Finds the sample for each font, class pair that has least maximum // distance to all the other samples of the same font, class. // OrganizeByFontAndClass must have been already called. @@ -583,8 +571,7 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map, for (int c = 0; c < unicharset_size_; ++c) { int samples_found = 0; FontClassInfo& fcinfo = (*font_class_array_)(font_index, c); - if (fcinfo.samples.size() == 0 || - (kTestChar >= 0 && c != kTestChar)) { + if (fcinfo.samples.size() == 0 || (kTestChar >= 0 && c != kTestChar)) { fcinfo.canonical_sample = -1; fcinfo.canonical_dist = 0.0f; if (debug) tprintf("Skipping class %d\n", c); @@ -611,9 +598,8 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map, // of a single char/font. for (int j = 0; j < fcinfo.samples.size(); ++j) { int s2 = fcinfo.samples[j]; - if (samples_[s2]->class_id() != c || - samples_[s2]->font_id() != font_id || - s2 == s1) + if (samples_[s2]->class_id() != c || + samples_[s2]->font_id() != font_id || s2 == s1) continue; GenericVector features2 = samples_[s2]->indexed_features(); double dist = f_table.FeatureDistance(features2); @@ -643,12 +629,13 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map, worst_s2 = max_s2; } if (debug) { - tprintf("Found %d samples of class %d=%s, font %d, " - "dist range [%g, %g], worst pair= %s, %s\n", - samples_found, c, unicharset_.debug_str(c).string(), - font_index, min_max_dist, max_max_dist, - SampleToString(*samples_[max_s1]).string(), - SampleToString(*samples_[max_s2]).string()); + tprintf( + "Found %d samples of class %d=%s, font %d, " + "dist range [%g, %g], worst pair= %s, %s\n", + samples_found, c, unicharset_.debug_str(c).string(), font_index, + min_max_dist, max_max_dist, + SampleToString(*samples_[max_s1]).string(), + SampleToString(*samples_[max_s2]).string()); } } } @@ -699,8 +686,7 @@ void TrainingSampleSet::ComputeCanonicalFeatures() { const int font_id = font_id_map_.CompactToSparse(font_index); for (int c = 0; c < unicharset_size_; ++c) { int num_samples = NumClassSamples(font_id, c, false); - if (num_samples == 0) - continue; + if (num_samples == 0) continue; const TrainingSample* sample = GetCanonicalSample(font_id, c); FontClassInfo& fcinfo = (*font_class_array_)(font_index, c); fcinfo.canonical_features = sample->indexed_features(); @@ -717,8 +703,7 @@ void TrainingSampleSet::ComputeCloudFeatures(int feature_space_size) { int font_id = font_id_map_.CompactToSparse(font_index); for (int c = 0; c < unicharset_size_; ++c) { int num_samples = NumClassSamples(font_id, c, false); - if (num_samples == 0) - continue; + if (num_samples == 0) continue; FontClassInfo& fcinfo = (*font_class_array_)(font_index, c); fcinfo.cloud_features.Init(feature_space_size); for (int s = 0; s < num_samples; ++s) { @@ -761,5 +746,4 @@ void TrainingSampleSet::DisplaySamplesWithFeature(int f_index, } } - } // namespace tesseract. diff --git a/src/classify/trainingsampleset.h b/src/classify/trainingsampleset.h index f5f0732a4f..6f81401791 100644 --- a/src/classify/trainingsampleset.h +++ b/src/classify/trainingsampleset.h @@ -52,24 +52,12 @@ class TrainingSampleSet { bool DeSerialize(bool swap, FILE* fp); // Accessors - int num_samples() const { - return samples_.size(); - } - int num_raw_samples() const { - return num_raw_samples_; - } - int NumFonts() const { - return font_id_map_.SparseSize(); - } - const UNICHARSET& unicharset() const { - return unicharset_; - } - int charsetsize() const { - return unicharset_size_; - } - const FontInfoTable& fontinfo_table() const { - return fontinfo_table_; - } + int num_samples() const { return samples_.size(); } + int num_raw_samples() const { return num_raw_samples_; } + int NumFonts() const { return font_id_map_.SparseSize(); } + const UNICHARSET& unicharset() const { return unicharset_; } + int charsetsize() const { return unicharset_size_; } + const FontInfoTable& fontinfo_table() const { return fontinfo_table_; } // Loads an initial unicharset, or sets one up if the file cannot be read. void LoadUnicharset(const char* filename); @@ -121,13 +109,12 @@ class TrainingSampleSet { // Returns the distance between the given pair of font/class pairs. // Finds in cache or computes and caches. // OrganizeByFontAndClass must have been already called. - float ClusterDistance(int font_id1, int class_id1, - int font_id2, int class_id2, - const IntFeatureMap& feature_map); + float ClusterDistance(int font_id1, int class_id1, int font_id2, + int class_id2, const IntFeatureMap& feature_map); // Computes the distance between the given pair of font/class pairs. - float ComputeClusterDistance(int font_id1, int class_id1, - int font_id2, int class_id2, + float ComputeClusterDistance(int font_id1, int class_id1, int font_id2, + int class_id2, const IntFeatureMap& feature_map) const; // Returns the number of canonical features of font/class 2 for which @@ -140,12 +127,10 @@ class TrainingSampleSet { // to include only those for which this assumption is true. // ComputeCanonicalFeatures and ComputeCloudFeatures must have been called // first, or the results will be nonsense. - int ReliablySeparable(int font_id1, int class_id1, - int font_id2, int class_id2, - const IntFeatureMap& feature_map, + int ReliablySeparable(int font_id1, int class_id1, int font_id2, + int class_id2, const IntFeatureMap& feature_map, bool thorough) const; - // Returns the total index of the requested sample. // OrganizeByFontAndClass must have been already called. int GlobalSampleIndex(int font_id, int class_id, int index) const; @@ -158,9 +143,7 @@ class TrainingSampleSet { float GetCanonicalDist(int font_id, int class_id) const; // Returns a mutable pointer to the sample with the given index. - TrainingSample* mutable_sample(int index) { - return samples_[index]; - } + TrainingSample* mutable_sample(int index) { return samples_[index]; } // Gets ownership of the sample with the given index, removing it from this. TrainingSample* extract_sample(int index) { TrainingSample* sample = samples_[index]; @@ -285,5 +268,4 @@ class TrainingSampleSet { } // namespace tesseract. - #endif // TRAININGSAMPLESETSET_H_ diff --git a/src/cutil/bitvec.cpp b/src/cutil/bitvec.cpp index 6cc091f0b7..cecc6e0a51 100644 --- a/src/cutil/bitvec.cpp +++ b/src/cutil/bitvec.cpp @@ -34,7 +34,7 @@ * This routine uses realloc to increase the size of * the specified bit vector. * - * Globals: + * Globals: * - none * * @param Vector bit vector to be expanded @@ -44,33 +44,32 @@ * @note Exceptions: none * @note History: Fri Nov 16 10:11:16 1990, DSJ, Created. */ -BIT_VECTOR ExpandBitVector(BIT_VECTOR Vector, int NewNumBits) { - return ((BIT_VECTOR) Erealloc(Vector, - sizeof(Vector[0]) * WordsInVectorOfSize(NewNumBits))); -} /* ExpandBitVector */ - +BIT_VECTOR +ExpandBitVector(BIT_VECTOR Vector, int NewNumBits) { + return ((BIT_VECTOR)Erealloc( + Vector, sizeof(Vector[0]) * WordsInVectorOfSize(NewNumBits))); +} /* ExpandBitVector */ /*---------------------------------------------------------------------------*/ void FreeBitVector(BIT_VECTOR BitVector) { -/** - * This routine frees a bit vector. It also decrements - * the global counter that keeps track of the number of - * bit vectors allocated. If BitVector is nullptr, then - * the count is printed to stderr. - * - * Globals: - * - BitVectorCount count of number of bit vectors allocated - * - * @param BitVector bit vector to be freed - * - * @note Exceptions: none - * @note History: Tue Oct 23 16:46:09 1990, DSJ, Created. - */ + /** + * This routine frees a bit vector. It also decrements + * the global counter that keeps track of the number of + * bit vectors allocated. If BitVector is nullptr, then + * the count is printed to stderr. + * + * Globals: + * - BitVectorCount count of number of bit vectors allocated + * + * @param BitVector bit vector to be freed + * + * @note Exceptions: none + * @note History: Tue Oct 23 16:46:09 1990, DSJ, Created. + */ if (BitVector) { Efree(BitVector); } -} /* FreeBitVector */ - +} /* FreeBitVector */ /*---------------------------------------------------------------------------*/ /** @@ -86,7 +85,7 @@ void FreeBitVector(BIT_VECTOR BitVector) { * @note Exceptions: none * @note History: Tue Oct 23 16:51:27 1990, DSJ, Created. */ -BIT_VECTOR NewBitVector(int NumBits) { - return ((BIT_VECTOR) Emalloc(sizeof(uint32_t) * - WordsInVectorOfSize(NumBits))); -} /* NewBitVector */ +BIT_VECTOR +NewBitVector(int NumBits) { + return ((BIT_VECTOR)Emalloc(sizeof(uint32_t) * WordsInVectorOfSize(NumBits))); +} /* NewBitVector */ diff --git a/src/cutil/bitvec.h b/src/cutil/bitvec.h index 5068006633..e2e2aeecae 100644 --- a/src/cutil/bitvec.h +++ b/src/cutil/bitvec.h @@ -15,8 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef BITVEC_H -#define BITVEC_H +#ifndef BITVEC_H +#define BITVEC_H #include "host.h" @@ -24,8 +24,8 @@ Include Files and Type Defines -----------------------------------------------------------------------------*/ // TODO(rays) Rename BITSINLONG to BITSINuint32_t, and use sizeof. -#define BITSINLONG 32 /**< no of bits in a long */ -using BIT_VECTOR = uint32_t *; +#define BITSINLONG 32 /**< no of bits in a long */ +using BIT_VECTOR = uint32_t*; /*----------------------------------------------------------------------------- Public Function Prototypes @@ -54,22 +54,26 @@ using BIT_VECTOR = uint32_t *; dest[index] = source[index]; /*copy all bits*/ \ } -#define SET_BIT(array,bit) (array[bit/BITSINLONG]|=1<<(bit&(BITSINLONG-1))) +#define SET_BIT(array, bit) \ + (array[bit / BITSINLONG] |= 1 << (bit & (BITSINLONG - 1))) -#define reset_bit(array,bit) (array[bit/BITSINLONG]&=~(1<<(bit&(BITSINLONG-1)))) +#define reset_bit(array, bit) \ + (array[bit / BITSINLONG] &= ~(1 << (bit & (BITSINLONG - 1)))) -#define test_bit(array,bit) (array[bit/BITSINLONG] & (1<<(bit&(BITSINLONG-1)))) +#define test_bit(array, bit) \ + (array[bit / BITSINLONG] & (1 << (bit & (BITSINLONG - 1)))) -#define WordsInVectorOfSize(NumBits) \ -(((NumBits) + BITSINLONG - 1) / BITSINLONG) +#define WordsInVectorOfSize(NumBits) (((NumBits) + BITSINLONG - 1) / BITSINLONG) /*-------------------------------------------------------------------------- Public Function Prototypes --------------------------------------------------------------------------*/ -BIT_VECTOR ExpandBitVector(BIT_VECTOR Vector, int NewNumBits); +BIT_VECTOR +ExpandBitVector(BIT_VECTOR Vector, int NewNumBits); void FreeBitVector(BIT_VECTOR BitVector); -BIT_VECTOR NewBitVector(int NumBits); +BIT_VECTOR +NewBitVector(int NumBits); #endif diff --git a/src/cutil/callcpp.cpp b/src/cutil/callcpp.cpp index 1d478075e4..8edbfec7ad 100644 --- a/src/cutil/callcpp.cpp +++ b/src/cutil/callcpp.cpp @@ -22,92 +22,82 @@ #include "config_auto.h" #endif -#include "errcode.h" +#include "errcode.h" #ifdef __UNIX__ -#include +#include #include #endif -#include -#include "memry.h" -#include "scrollview.h" -#include "params.h" -#include "callcpp.h" -#include "tprintf.h" -#include "host.h" +#include +#include "callcpp.h" +#include "host.h" +#include "memry.h" +#include "params.h" +#include "scrollview.h" +#include "tprintf.h" #include "unichar.h" -void -cprintf ( //Trace printf -const char *format, ... //special message +void cprintf( // Trace printf + const char* format, + ... // special message ) { - va_list args; //variable args + va_list args; // variable args char msg[1000]; - va_start(args, format); //variable list - vsprintf(msg, format, args); //Format into msg + va_start(args, format); // variable list + vsprintf(msg, format, args); // Format into msg va_end(args); - tprintf ("%s", msg); + tprintf("%s", msg); } - #ifndef GRAPHICS_DISABLED -ScrollView *c_create_window( /*create a window */ - const char *name, /*name/title of window */ - int16_t xpos, /*coords of window */ - int16_t ypos, /*coords of window */ - int16_t xsize, /*size of window */ - int16_t ysize, /*size of window */ - double xmin, /*scrolling limits */ - double xmax, /*to stop users */ - double ymin, /*getting lost in */ - double ymax /*empty space */ - ) { - return new ScrollView(name, xpos, ypos, xsize, ysize, xmax - xmin, ymax - ymin, true); +ScrollView* c_create_window( /*create a window */ + const char* name, /*name/title of window */ + int16_t xpos, /*coords of window */ + int16_t ypos, /*coords of window */ + int16_t xsize, /*size of window */ + int16_t ysize, /*size of window */ + double xmin, /*scrolling limits */ + double xmax, /*to stop users */ + double ymin, /*getting lost in */ + double ymax /*empty space */ +) { + return new ScrollView(name, xpos, ypos, xsize, ysize, xmax - xmin, + ymax - ymin, true); } - -void c_line_color_index( /*set color */ - void *win, - C_COL index) { - // The colors are the same as the SV ones except that SV has COLOR:NONE --> offset of 1 - ScrollView* window = (ScrollView*) win; - window->Pen((ScrollView::Color) (index + 1)); +void c_line_color_index(/*set color */ + void* win, C_COL index) { + // The colors are the same as the SV ones except that SV has COLOR:NONE --> + // offset of 1 + ScrollView* window = (ScrollView*)win; + window->Pen((ScrollView::Color)(index + 1)); } - -void c_move( /*move pen */ - void *win, - double x, - double y) { - ScrollView* window = (ScrollView*) win; - window->SetCursor((int) x, (int) y); +void c_move(/*move pen */ + void* win, double x, double y) { + ScrollView* window = (ScrollView*)win; + window->SetCursor((int)x, (int)y); } - -void c_draw( /*move pen */ - void *win, - double x, - double y) { - ScrollView* window = (ScrollView*) win; - window->DrawTo((int) x, (int) y); +void c_draw(/*move pen */ + void* win, double x, double y) { + ScrollView* window = (ScrollView*)win; + window->DrawTo((int)x, (int)y); } - -void c_make_current( /*move pen */ - void *win) { - ScrollView* window = (ScrollView*) win; +void c_make_current(/*move pen */ + void* win) { + ScrollView* window = (ScrollView*)win; window->Update(); } - -void c_clear_window( /*move pen */ - void *win) { - ScrollView* window = (ScrollView*) win; +void c_clear_window(/*move pen */ + void* win) { + ScrollView* window = (ScrollView*)win; window->Clear(); } - char window_wait(ScrollView* win) { SVEvent* ev; // Wait till an input or click event (all others are thrown away) @@ -116,17 +106,16 @@ char window_wait(ScrollView* win) { do { ev = win->AwaitEvent(SVET_ANY); ev_type = ev->type; - if (ev_type == SVET_INPUT) - ret = ev->parameter[0]; + if (ev_type == SVET_INPUT) ret = ev->parameter[0]; delete ev; } while (ev_type != SVET_INPUT && ev_type != SVET_CLICK); return ret; } #endif -void reverse32(void *ptr) { +void reverse32(void* ptr) { char tmp; - char *cptr = (char *) ptr; + char* cptr = (char*)ptr; tmp = *cptr; *cptr = *(cptr + 3); @@ -136,10 +125,9 @@ void reverse32(void *ptr) { *(cptr + 2) = tmp; } - -void reverse16(void *ptr) { +void reverse16(void* ptr) { char tmp; - char *cptr = (char *) ptr; + char* cptr = (char*)ptr; tmp = *cptr; *cptr = *(cptr + 1); diff --git a/src/cutil/callcpp.h b/src/cutil/callcpp.h index dfa07006b9..071b4ab8f8 100644 --- a/src/cutil/callcpp.h +++ b/src/cutil/callcpp.h @@ -78,39 +78,35 @@ typedef enum { Violet, Wheat, Green_YELLOW -} C_COL; /*starbase colours */ +} C_COL; /*starbase colours */ -void cprintf ( //Trace printf -const char *format, ... //special message +void cprintf( // Trace printf + const char* format, + ... // special message ); -ScrollView *c_create_window( /*create a window */ - const char *name, /*name/title of window */ - int16_t xpos, /*coords of window */ - int16_t ypos, /*coords of window */ - int16_t xsize, /*size of window */ - int16_t ysize, /*size of window */ - double xmin, /*scrolling limits */ - double xmax, /*to stop users */ - double ymin, /*getting lost in */ - double ymax /*empty space */ - ); -void c_line_color_index( /*set color */ - void *win, - C_COL index); -void c_move( /*move pen */ - void *win, - double x, - double y); -void c_draw( /*move pen */ - void *win, - double x, - double y); -void c_make_current( /*move pen */ - void *win); -void c_clear_window( /*move pen */ - void *win); +ScrollView* c_create_window( /*create a window */ + const char* name, /*name/title of window */ + int16_t xpos, /*coords of window */ + int16_t ypos, /*coords of window */ + int16_t xsize, /*size of window */ + int16_t ysize, /*size of window */ + double xmin, /*scrolling limits */ + double xmax, /*to stop users */ + double ymin, /*getting lost in */ + double ymax /*empty space */ +); +void c_line_color_index(/*set color */ + void* win, C_COL index); +void c_move(/*move pen */ + void* win, double x, double y); +void c_draw(/*move pen */ + void* win, double x, double y); +void c_make_current(/*move pen */ + void* win); +void c_clear_window(/*move pen */ + void* win); char window_wait(ScrollView* win); -void reverse32(void *ptr); -void reverse16(void *ptr); +void reverse32(void* ptr); +void reverse16(void* ptr); #endif diff --git a/src/cutil/const.h b/src/cutil/const.h index 33f8fa443c..288bb478c7 100644 --- a/src/cutil/const.h +++ b/src/cutil/const.h @@ -8,16 +8,16 @@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. -**************************************************************************/ + **************************************************************************/ #ifndef CONST_H #define CONST_H /*This file contains constants which are global to the entire system*/ -#define SPLINESIZE 23 // max spline parts to a line +#define SPLINESIZE 23 // max spline parts to a line -#define PI 3.14159265359 // pi +#define PI 3.14159265359 // pi -#define EDGEPTFLAGS 4 // concavity,length etc. +#define EDGEPTFLAGS 4 // concavity,length etc. #endif diff --git a/src/cutil/cutil.cpp b/src/cutil/cutil.cpp index 8a1f11620f..fbaeba9df6 100644 --- a/src/cutil/cutil.cpp +++ b/src/cutil/cutil.cpp @@ -39,8 +39,8 @@ Import original HP distribution **/ #include "cutil.h" -#include "tprintf.h" #include "callcpp.h" +#include "tprintf.h" #include @@ -58,8 +58,8 @@ long long_rand(long limit) { static long seed; long num; - num = (long) rand () << 16; - num |= rand () & 0xffff; + num = (long)rand() << 16; + num |= rand() & 0xffff; seed ^= num; long result = num % limit; while (result < 0) { @@ -67,11 +67,10 @@ long long_rand(long limit) { } return result; #else - return (long)((double)limit * rand()/(RAND_MAX + 1.0)); + return (long)((double)limit * rand() / (RAND_MAX + 1.0)); #endif } - /********************************************************************** * open_file * @@ -79,19 +78,19 @@ long long_rand(long limit) { * nullptr use stdin (or stdout) for the file. If the file can not be * opened then call the error routine. **********************************************************************/ -FILE *open_file(const char *filename, const char *mode) { - FILE *thisfile = nullptr; - if ((thisfile = fopen (filename, mode)) == nullptr) { - tprintf ("Could not open file, %s\n", filename); - exit (1); +FILE* open_file(const char* filename, const char* mode) { + FILE* thisfile = nullptr; + if ((thisfile = fopen(filename, mode)) == nullptr) { + tprintf("Could not open file, %s\n", filename); + exit(1); } return (thisfile); } /// Check whether the file exists -bool exists_file(const char *filename) { +bool exists_file(const char* filename) { bool exists = false; - FILE *f = nullptr; + FILE* f = nullptr; if ((f = fopen(filename, "rb")) != nullptr) { fclose(f); exists = true; diff --git a/src/cutil/cutil.h b/src/cutil/cutil.h index fcb78bb079..af11cb35a6 100644 --- a/src/cutil/cutil.h +++ b/src/cutil/cutil.h @@ -37,8 +37,8 @@ Import original HP distribution I n c l u d e s ----------------------------------------------------------------------*/ #include -#include #include +#include #include "host.h" #include "tprintf.h" @@ -57,19 +57,19 @@ Import original HP distribution #define CHARS_PER_LINE 500 #if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s +#define _ARGS(s) s #else -# define _ARGS(s) () +#define _ARGS(s) () #endif -//typedef int (*int_proc) (void); -typedef void (*void_proc) (...); -typedef void *(*void_star_proc) _ARGS ((...)); +// typedef int (*int_proc) (void); +typedef void (*void_proc)(...); +typedef void*(*void_star_proc)_ARGS((...)); -typedef int (*int_void) (void); -typedef void (*void_void) (void); -typedef int (*int_compare) (void *, void *); -typedef void (*void_dest) (void *); +typedef int (*int_void)(void); +typedef void (*void_void)(void); +typedef int (*int_compare)(void*, void*); +typedef void (*void_dest)(void*); /*---------------------------------------------------------------------- M a c r o s @@ -80,8 +80,7 @@ typedef void (*void_dest) (void *); * Print a new line character on stdout. **********************************************************************/ -#define new_line() \ - tprintf("\n") +#define new_line() tprintf("\n") /********************************************************************** * print_string @@ -89,17 +88,16 @@ typedef void (*void_dest) (void *); * Print a string on stdout. **********************************************************************/ -#define print_string(str) \ - printf ("%s\n", str) +#define print_string(str) printf("%s\n", str) /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ long long_rand(long limit); -FILE *open_file(const char *filename, const char *mode); +FILE* open_file(const char* filename, const char* mode); -bool exists_file(const char *filename); +bool exists_file(const char* filename); /* util.c long long_rand diff --git a/src/cutil/cutil_class.cpp b/src/cutil/cutil_class.cpp index e6feb24a24..2c19334804 100644 --- a/src/cutil/cutil_class.cpp +++ b/src/cutil/cutil_class.cpp @@ -18,6 +18,4 @@ #include "cutil_class.h" -namespace tesseract { - -} // namespace tesseract +namespace tesseract {} // namespace tesseract diff --git a/src/cutil/cutil_class.h b/src/cutil/cutil_class.h index f89a439e35..f51bdb30b3 100644 --- a/src/cutil/cutil_class.h +++ b/src/cutil/cutil_class.h @@ -29,7 +29,7 @@ class CUtil : public CCUtil { public: CUtil() = default; ~CUtil() = default; - void read_variables(const char *filename, bool global_only); + void read_variables(const char* filename, bool global_only); }; } // namespace tesseract diff --git a/src/cutil/danerror.cpp b/src/cutil/danerror.cpp index 29920e9843..d2beaa9e18 100644 --- a/src/cutil/danerror.cpp +++ b/src/cutil/danerror.cpp @@ -18,10 +18,10 @@ /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ -#include "host.h" #include "danerror.h" -#include "tprintf.h" #include "globaloc.h" +#include "host.h" +#include "tprintf.h" #ifdef __UNIX__ #include "assert.h" #endif @@ -39,10 +39,10 @@ * @return None - this routine does not return. * @note History: 4/3/89, DSJ, Created. */ -void DoError(int Error, const char *Message) { +void DoError(int Error, const char* Message) { if (Message != nullptr) { tprintf("\nError: %s!\n", Message); } err_exit(); -} /* DoError */ +} /* DoError */ diff --git a/src/cutil/danerror.h b/src/cutil/danerror.h index 0e7abf1df8..4334181e6e 100644 --- a/src/cutil/danerror.h +++ b/src/cutil/danerror.h @@ -15,21 +15,21 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef DANERROR_H -#define DANERROR_H +#ifndef DANERROR_H +#define DANERROR_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#define NOERROR 0 -#define DO_NOTHING 0 +#define NOERROR 0 +#define DO_NOTHING 0 using TRAPERROR = int; -typedef void (*VOID_PROC) (); +typedef void (*VOID_PROC)(); /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void DoError(int Error, const char *Message); +void DoError(int Error, const char* Message); #endif diff --git a/src/cutil/efio.cpp b/src/cutil/efio.cpp index 943efc9baf..a1916f5583 100644 --- a/src/cutil/efio.cpp +++ b/src/cutil/efio.cpp @@ -19,11 +19,11 @@ Include Files and Type Defines ----------------------------------------------------------------------------*/ #include "efio.h" -#include "danerror.h" #include #include +#include "danerror.h" -#define MAXERRORMESSAGE 256 +#define MAXERRORMESSAGE 256 /*---------------------------------------------------------------------------- Public Code @@ -40,16 +40,15 @@ * @note Exceptions: #FOPENERROR unable to open specified file * @note History: 5/21/89, DSJ, Created. */ -FILE *Efopen(const char *Name, const char *Mode) { - FILE *File; +FILE* Efopen(const char* Name, const char* Mode) { + FILE* File; char ErrorMessage[MAXERRORMESSAGE]; - File = fopen (Name, Mode); + File = fopen(Name, Mode); if (File == nullptr) { - sprintf (ErrorMessage, "Unable to open %s", Name); - DoError(FOPENERROR, ErrorMessage); + sprintf(ErrorMessage, "Unable to open %s", Name); + DoError(FOPENERROR, ErrorMessage); return (nullptr); - } - else + } else return (File); -} /* Efopen */ +} /* Efopen */ diff --git a/src/cutil/efio.h b/src/cutil/efio.h index 593e6b66cb..cc70221559 100644 --- a/src/cutil/efio.h +++ b/src/cutil/efio.h @@ -15,18 +15,18 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef EFIO_H -#define EFIO_H +#ifndef EFIO_H +#define EFIO_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ #include -#define FOPENERROR 3000 +#define FOPENERROR 3000 /**---------------------------------------------------------------------------- Public Function Prototype ----------------------------------------------------------------------------**/ -FILE *Efopen(const char *Name, const char *Mode); +FILE* Efopen(const char* Name, const char* Mode); #endif diff --git a/src/cutil/emalloc.cpp b/src/cutil/emalloc.cpp index e353b51b21..f9070e0c4b 100644 --- a/src/cutil/emalloc.cpp +++ b/src/cutil/emalloc.cpp @@ -25,8 +25,8 @@ Include Files and Type Defines ----------------------------------------------------------------------------*/ #include "emalloc.h" -#include "danerror.h" #include +#include "danerror.h" /*---------------------------------------------------------------------------- Public Code @@ -43,43 +43,39 @@ * - #NOTENOUGHMEMORY unable to allocate Size bytes * - #ILLEGALMALLOCREQUEST negative or zero request size * @note History: 4/3/89, DSJ, Created. -*/ -void *Emalloc(int Size) { - void *Buffer; + */ +void* Emalloc(int Size) { + void* Buffer; - if (Size <= 0) - DoError (ILLEGALMALLOCREQUEST, "Illegal malloc request size"); - Buffer = (void *) malloc (Size); + if (Size <= 0) DoError(ILLEGALMALLOCREQUEST, "Illegal malloc request size"); + Buffer = (void*)malloc(Size); if (Buffer == nullptr) { - DoError (NOTENOUGHMEMORY, "Not enough memory"); + DoError(NOTENOUGHMEMORY, "Not enough memory"); return (nullptr); - } - else + } else return (Buffer); -} /* Emalloc */ - +} /* Emalloc */ /*---------------------------------------------------------------------------*/ -void *Erealloc(void *ptr, int size) { - void *Buffer; +void* Erealloc(void* ptr, int size) { + void* Buffer; if (size < 0 || (size == 0 && ptr == nullptr)) - DoError (ILLEGALMALLOCREQUEST, "Illegal realloc request size"); + DoError(ILLEGALMALLOCREQUEST, "Illegal realloc request size"); - Buffer = (void *) realloc (ptr, size); + Buffer = (void*)realloc(ptr, size); if (Buffer == nullptr && size != 0) - DoError (NOTENOUGHMEMORY, "Not enough memory"); + DoError(NOTENOUGHMEMORY, "Not enough memory"); return (Buffer); -} /* Erealloc */ - +} /* Erealloc */ /*---------------------------------------------------------------------------*/ -void Efree(void *ptr) { +void Efree(void* ptr) { if (ptr == nullptr) - DoError (ILLEGALMALLOCREQUEST, "Attempted to free nullptr ptr"); + DoError(ILLEGALMALLOCREQUEST, "Attempted to free nullptr ptr"); - free(ptr); + free(ptr); -} /* Efree */ +} /* Efree */ diff --git a/src/cutil/emalloc.h b/src/cutil/emalloc.h index 6fda20a596..e349c34bdf 100644 --- a/src/cutil/emalloc.h +++ b/src/cutil/emalloc.h @@ -15,27 +15,26 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef EMALLOC_H -#define EMALLOC_H +#ifndef EMALLOC_H +#define EMALLOC_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "host.h" #include "callcpp.h" +#include "host.h" -#define NOTENOUGHMEMORY 2000 -#define ILLEGALMALLOCREQUEST 2001 +#define NOTENOUGHMEMORY 2000 +#define ILLEGALMALLOCREQUEST 2001 /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void *Emalloc(int Size); - -void *Erealloc(void *ptr, int size); +void* Emalloc(int Size); -void Efree(void *ptr); +void* Erealloc(void* ptr, int size); +void Efree(void* ptr); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/src/cutil/globals.h b/src/cutil/globals.h index 9ffd4077b7..57d072334e 100644 --- a/src/cutil/globals.h +++ b/src/cutil/globals.h @@ -26,10 +26,9 @@ #define GLOBALS_H #include "const.h" -#include "unicharset.h" #include "strngs.h" +#include "unicharset.h" #include - #endif diff --git a/src/cutil/oldlist.cpp b/src/cutil/oldlist.cpp index 2d5ebaab03..0d4dc11f0b 100644 --- a/src/cutil/oldlist.cpp +++ b/src/cutil/oldlist.cpp @@ -116,7 +116,7 @@ int count(LIST var_list) { * key, and return a non-zero value when they match. If the value * nullptr is supplied for is_equal, the is_key routine will be used. **********************************************************************/ -LIST delete_d(LIST list, void *key, int_compare is_equal) { +LIST delete_d(LIST list, void* key, int_compare is_equal) { LIST result = NIL_LIST; LIST last_one = NIL_LIST; @@ -142,8 +142,8 @@ LIST delete_d(LIST list, void *key, int_compare is_equal) { return (result); } -LIST delete_d(LIST list, void *key, - TessResultCallback2 *is_equal) { +LIST delete_d(LIST list, void* key, + TessResultCallback2* is_equal) { LIST result = NIL_LIST; LIST last_one = NIL_LIST; @@ -203,7 +203,7 @@ void destroy_nodes(LIST list, void_dest destructor) { * Create a list element and rearange the pointers so that the first * element in the list is the second aurgment. **********************************************************************/ -void insert(LIST list, void *node) { +void insert(LIST list, void* node) { LIST element; if (list != NIL_LIST) { @@ -222,8 +222,8 @@ void insert(LIST list, void *node) { * Compare the list node with the key value return TRUE (non-zero) * if they are equivalent strings. (Return FALSE if not) **********************************************************************/ -int is_same(void *item1, void *item2) { - return strcmp((char *)item1, (char *)item2) == 0 ? 1 : 0; +int is_same(void* item1, void* item2) { + return strcmp((char*)item1, (char*)item2) == 0 ? 1 : 0; } /********************************************************************** @@ -254,7 +254,7 @@ LIST last(LIST var_list) { * * Return nth list cell in the list. **********************************************************************/ -void *nth_cell(LIST var_list, int item_num) { +void* nth_cell(LIST var_list, int item_num) { int x = 0; iterate(var_list) { if (x++ == item_num) return (var_list); @@ -285,7 +285,7 @@ LIST pop(LIST list) { * Create a list element. Push the second parameter (the node) onto * the first parameter (the list). Return the new list to the caller. **********************************************************************/ -LIST push(LIST list, void *element) { +LIST push(LIST list, void* element) { LIST t; t = new_cell(); @@ -299,7 +299,7 @@ LIST push(LIST list, void *element) { * * Create a list element. Add the element onto the end of the list. **********************************************************************/ -LIST push_last(LIST list, void *item) { +LIST push_last(LIST list, void* item) { LIST t; if (list != NIL_LIST) { @@ -341,7 +341,7 @@ LIST reverse_d(LIST list) { * Adjoin an element to an assorted list. The original list is * modified. Returns the modified list. **********************************************************************/ -LIST s_adjoin(LIST var_list, void *variable, int_compare compare) { +LIST s_adjoin(LIST var_list, void* variable, int_compare compare) { LIST l; int result; @@ -368,15 +368,15 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) { * the third parameter to this routine. If the value nullptr is supplied * for is_equal, the is_key routine will be used. **********************************************************************/ -LIST search(LIST list, void *key, int_compare is_equal) { +LIST search(LIST list, void* key, int_compare is_equal) { if (is_equal == nullptr) is_equal = is_same; iterate(list) if ((*is_equal)(first_node(list), key)) return (list); return (NIL_LIST); } -LIST search(LIST list, void *key, - TessResultCallback2 *is_equal) { +LIST search(LIST list, void* key, + TessResultCallback2* is_equal) { iterate(list) if ((*is_equal).Run(first_node(list), key)) return (list); return (NIL_LIST); } diff --git a/src/cutil/oldlist.h b/src/cutil/oldlist.h index 2aa7a7b844..05cc847277 100644 --- a/src/cutil/oldlist.h +++ b/src/cutil/oldlist.h @@ -123,19 +123,18 @@ /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ -#define NIL_LIST (LIST) 0 -struct list_rec -{ - struct list_rec *node; - struct list_rec *next; +#define NIL_LIST (LIST)0 +struct list_rec { + struct list_rec* node; + struct list_rec* next; }; -using LIST = list_rec *; +using LIST = list_rec*; /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ /* Predefinitions */ -#define list_rest(l) ((l) ? (l)->next : NIL_LIST) +#define list_rest(l) ((l) ? (l)->next : NIL_LIST) #define first_node(l) ((l) ? (l)->node : NIL_LIST) /********************************************************************** @@ -146,8 +145,7 @@ using LIST = list_rec *; * **********************************************************************/ -#define copy_first(l1,l2) \ -(l2=push(l2, first_node(l1))) +#define copy_first(l1, l2) (l2 = push(l2, first_node(l1))) /********************************************************************** * i t e r a t e @@ -156,8 +154,7 @@ using LIST = list_rec *; * minus the head. Continue until the list is NIL_LIST. **********************************************************************/ -#define iterate(l) \ -for (; (l) != NIL_LIST; (l) = list_rest (l)) +#define iterate(l) for (; (l) != NIL_LIST; (l) = list_rest(l)) /********************************************************************** * i t e r a t e l i s t @@ -167,8 +164,7 @@ for (; (l) != NIL_LIST; (l) = list_rest (l)) * except that it does not lose the original list. **********************************************************************/ -#define iterate_list(x,l) \ -for ((x)=(l); (x)!=0; (x)=list_rest(x)) +#define iterate_list(x, l) for ((x) = (l); (x) != 0; (x) = list_rest(x)) /********************************************************************** * j o i n o n @@ -177,8 +173,7 @@ for ((x)=(l); (x)!=0; (x)=list_rest(x)) * parameter is modified. **********************************************************************/ -#define JOIN_ON(list1,list2) \ -((list1) = join ((list1), (list2))) +#define JOIN_ON(list1, list2) ((list1) = join((list1), (list2))) /********************************************************************** * p o p o f f @@ -187,8 +182,7 @@ for ((x)=(l); (x)!=0; (x)=list_rest(x)) * parameter is modified. **********************************************************************/ -#define pop_off(list) \ -((list) = pop (list)) +#define pop_off(list) ((list) = pop(list)) /********************************************************************** * p u s h o n @@ -197,8 +191,7 @@ for ((x)=(l); (x)!=0; (x)=list_rest(x)) * parameter is modified. **********************************************************************/ -#define push_on(list,thing) \ -((list) = push (list, (LIST) (thing))) +#define push_on(list, thing) ((list) = push(list, (LIST)(thing))) /********************************************************************** * s e c o n d @@ -208,8 +201,7 @@ for ((x)=(l); (x)!=0; (x)=list_rest(x)) * #define second_node(l) first_node (list_rest (l)) **********************************************************************/ -#define second_node(l) \ -first_node (list_rest (l)) +#define second_node(l) first_node(list_rest(l)) /********************************************************************** * s e t r e s t @@ -219,8 +211,7 @@ first_node (list_rest (l)) * #define set_rest(l,node) l->next = node; **********************************************************************/ -#define set_rest(l,cell)\ -((l)->next = (cell)) +#define set_rest(l, cell) ((l)->next = (cell)) /********************************************************************** * t h i r d @@ -230,48 +221,47 @@ first_node (list_rest (l)) * #define third(l) first_node (list_rest (list_rest (l))) **********************************************************************/ -#define third(l) \ -first_node (list_rest (list_rest (l))) +#define third(l) first_node(list_rest(list_rest(l))) /*---------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------*/ int count(LIST var_list); -LIST delete_d(LIST list, void *key, int_compare is_equal); +LIST delete_d(LIST list, void* key, int_compare is_equal); -LIST delete_d(LIST list, void *key, +LIST delete_d(LIST list, void* key, TessResultCallback2* is_equal); LIST destroy(LIST list); void destroy_nodes(LIST list, void_dest destructor); -void insert(LIST list, void *node); +void insert(LIST list, void* node); -int is_same(void *item1, void *item2); +int is_same(void* item1, void* item2); LIST join(LIST list1, LIST list2); LIST last(LIST var_list); -void *nth_cell(LIST var_list, int item_num); +void* nth_cell(LIST var_list, int item_num); LIST pop(LIST list); -LIST push(LIST list, void *element); +LIST push(LIST list, void* element); -LIST push_last(LIST list, void *item); +LIST push_last(LIST list, void* item); LIST reverse(LIST list); LIST reverse_d(LIST list); -LIST s_adjoin(LIST var_list, void *variable, int_compare compare); +LIST s_adjoin(LIST var_list, void* variable, int_compare compare); -LIST search(LIST list, void *key, int_compare is_equal); +LIST search(LIST list, void* key, int_compare is_equal); -LIST search(LIST list, void *key, TessResultCallback2*); +LIST search(LIST list, void* key, TessResultCallback2*); /* #if defined(__STDC__) || defined(__cplusplus) diff --git a/src/cutil/structures.cpp b/src/cutil/structures.cpp index 559f4b6982..cf58e4373f 100644 --- a/src/cutil/structures.cpp +++ b/src/cutil/structures.cpp @@ -29,7 +29,6 @@ #include - /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ diff --git a/src/cutil/structures.h b/src/cutil/structures.h index 6b974df868..561264dd08 100644 --- a/src/cutil/structures.h +++ b/src/cutil/structures.h @@ -28,8 +28,8 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "oldlist.h" #include "danerror.h" +#include "oldlist.h" /*---------------------------------------------------------------------- M a c r o s @@ -42,18 +42,10 @@ * for each new data type. **********************************************************************/ -#define makestructure(newfunc, old, type) \ -type *newfunc() \ -{ \ - return new type; \ -} \ - \ - \ - \ -void old(type* deadelement) \ -{ \ - delete deadelement; \ -} \ +#define makestructure(newfunc, old, type) \ + type* newfunc() { return new type; } \ + \ + void old(type* deadelement) { delete deadelement; } /*---------------------------------------------------------------------- F u n c t i o n s diff --git a/src/dict/context.cpp b/src/dict/context.cpp index dbdcaaf72c..29a3d4cee7 100644 --- a/src/dict/context.cpp +++ b/src/dict/context.cpp @@ -49,7 +49,7 @@ const int case_state_table[6][4] = { 5, -1, 2, -1}, }; -int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const { +int Dict::case_ok(const WERD_CHOICE& word, const UNICHARSET& unicharset) const { int state = 0; int x; for (x = 0; x < word.length(); ++x) { @@ -64,19 +64,19 @@ int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const { state = case_state_table[state][0]; if (state == -1) return false; } - return state != 5; // single lower is bad + return state != 5; // single lower is bad } -bool Dict::absolute_garbage(const WERD_CHOICE &word, - const UNICHARSET &unicharset) { +bool Dict::absolute_garbage(const WERD_CHOICE& word, + const UNICHARSET& unicharset) { if (word.length() < kMinAbsoluteGarbageWordLength) return false; int num_alphanum = 0; for (int x = 0; x < word.length(); ++x) { num_alphanum += (unicharset.get_isalpha(word.unichar_id(x)) || unicharset.get_isdigit(word.unichar_id(x))); } - return (static_cast(num_alphanum) / - static_cast(word.length()) < kMinAbsoluteGarbageAlphanumFrac); + return (static_cast(num_alphanum) / static_cast(word.length()) < + kMinAbsoluteGarbageAlphanumFrac); } } // namespace tesseract diff --git a/src/dict/dawg.cpp b/src/dict/dawg.cpp index ebb3ac5b41..95654d1aa2 100644 --- a/src/dict/dawg.cpp +++ b/src/dict/dawg.cpp @@ -41,7 +41,7 @@ ----------------------------------------------------------------------*/ namespace tesseract { -bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, +bool Dawg::prefix_in_dawg(const WERD_CHOICE& word, bool requires_complete) const { if (word.length() == 0) return !requires_complete; NODE_REF node = 0; @@ -59,30 +59,28 @@ bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, } // Now check the last character. return edge_char_of(node, word.unichar_id(end_index), requires_complete) != - NO_EDGE; + NO_EDGE; } -bool Dawg::word_in_dawg(const WERD_CHOICE &word) const { +bool Dawg::word_in_dawg(const WERD_CHOICE& word) const { return prefix_in_dawg(word, true); } -int Dawg::check_for_words(const char *filename, - const UNICHARSET &unicharset, +int Dawg::check_for_words(const char* filename, const UNICHARSET& unicharset, bool enable_wildcard) const { if (filename == nullptr) return 0; - FILE *word_file; - char string [CHARS_PER_LINE]; + FILE* word_file; + char string[CHARS_PER_LINE]; int misses = 0; UNICHAR_ID wildcard = unicharset.unichar_to_id(kWildcard); - word_file = open_file (filename, "r"); + word_file = open_file(filename, "r"); - while (fgets (string, CHARS_PER_LINE, word_file) != nullptr) { + while (fgets(string, CHARS_PER_LINE, word_file) != nullptr) { chomp_string(string); // remove newline WERD_CHOICE word(string, unicharset); - if (word.length() > 0 && - !word.contains_unichar_id(INVALID_UNICHAR_ID)) { + if (word.length() > 0 && !word.contains_unichar_id(INVALID_UNICHAR_ID)) { if (!match_words(&word, 0, 0, enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) { tprintf("Missing word: %s\n", string); @@ -92,36 +90,36 @@ int Dawg::check_for_words(const char *filename, tprintf("Failed to create a valid word from %s\n", string); } } - fclose (word_file); + fclose(word_file); // Make sure the user sees this with fprintf instead of tprintf. if (debug_level_) tprintf("Number of lost words=%d\n", misses); return misses; } -void Dawg::iterate_words(const UNICHARSET &unicharset, - TessCallback1 *cb) const { +void Dawg::iterate_words(const UNICHARSET& unicharset, + TessCallback1* cb) const { WERD_CHOICE word(&unicharset); iterate_words_rec(word, 0, cb); } -void CallWithUTF8(TessCallback1 *cb, const WERD_CHOICE *wc) { +void CallWithUTF8(TessCallback1* cb, const WERD_CHOICE* wc) { STRING s; wc->string_and_lengths(&s, nullptr); cb->Run(s.string()); } -void Dawg::iterate_words(const UNICHARSET &unicharset, - TessCallback1 *cb) const { - TessCallback1 *shim = +void Dawg::iterate_words(const UNICHARSET& unicharset, + TessCallback1* cb) const { + TessCallback1* shim = NewPermanentTessCallback(CallWithUTF8, cb); WERD_CHOICE word(&unicharset); iterate_words_rec(word, 0, shim); delete shim; } -void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, +void Dawg::iterate_words_rec(const WERD_CHOICE& word_so_far, NODE_REF to_explore, - TessCallback1 *cb) const { + TessCallback1* cb) const { NodeChildVector children; this->unichar_ids_of(to_explore, &children, false); for (int i = 0; i < children.size(); i++) { @@ -137,8 +135,8 @@ void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, } } -bool Dawg::match_words(WERD_CHOICE *word, int32_t index, - NODE_REF node, UNICHAR_ID wildcard) const { +bool Dawg::match_words(WERD_CHOICE* word, int32_t index, NODE_REF node, + UNICHAR_ID wildcard) const { EDGE_REF edge; int32_t word_end; @@ -148,8 +146,7 @@ bool Dawg::match_words(WERD_CHOICE *word, int32_t index, this->unichar_ids_of(node, &vec, false); for (int i = 0; i < vec.size(); ++i) { word->set_unichar_id(vec[i].unichar_id, index); - if (match_words(word, index, node, wildcard)) - any_matched = true; + if (match_words(word, index, node, wildcard)) any_matched = true; } word->set_unichar_id(wildcard, index); return any_matched; @@ -162,7 +159,7 @@ bool Dawg::match_words(WERD_CHOICE *word, int32_t index, if (debug_level_ > 1) word->print("match_words() found: "); return true; } else if (node != 0) { - return match_words(word, index+1, node, wildcard); + return match_words(word, index + 1, node, wildcard); } } } @@ -181,16 +178,15 @@ void Dawg::init(int unicharset_size) { flags_mask_ = ~(letter_mask_ | next_node_mask_); } - /*---------------------------------------------------------------------- F u n c t i o n s f o r S q u i s h e d D a w g ----------------------------------------------------------------------*/ SquishedDawg::~SquishedDawg() { delete[] edges_; } -EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, - UNICHAR_ID unichar_id, - bool word_end) const { +EDGE_REF +SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, + bool word_end) const { EDGE_REF edge = node; if (node == 0) { // binary search EDGE_REF start = 0; @@ -198,8 +194,8 @@ EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, int compare; while (start <= end) { edge = (start + end) >> 1; // (start + end) / 2 - compare = given_greater_than_edge_rec(NO_EDGE, word_end, - unichar_id, edges_[edge]); + compare = given_greater_than_edge_rec(NO_EDGE, word_end, unichar_id, + edges_[edge]); if (compare == 0) { // given == vec[k] return edge; } else if (compare == 1) { // given > vec[k] @@ -221,10 +217,10 @@ EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, } int32_t SquishedDawg::num_forward_edges(NODE_REF node) const { - EDGE_REF edge = node; - int32_t num = 0; + EDGE_REF edge = node; + int32_t num = 0; - if (forward_edge (edge)) { + if (forward_edge(edge)) { do { num++; } while (!last_edge(edge++)); @@ -236,56 +232,50 @@ int32_t SquishedDawg::num_forward_edges(NODE_REF node) const { void SquishedDawg::print_node(NODE_REF node, int max_num_edges) const { if (node == NO_EDGE) return; // nothing to print - EDGE_REF edge = node; - const char *forward_string = "FORWARD"; - const char *backward_string = " "; + EDGE_REF edge = node; + const char* forward_string = "FORWARD"; + const char* backward_string = " "; - const char *last_string = "LAST"; - const char *not_last_string = " "; + const char* last_string = "LAST"; + const char* not_last_string = " "; - const char *eow_string = "EOW"; - const char *not_eow_string = " "; + const char* eow_string = "EOW"; + const char* not_eow_string = " "; - const char *direction; - const char *is_last; - const char *eow; + const char* direction; + const char* is_last; + const char* eow; UNICHAR_ID unichar_id; if (edge_occupied(edge)) { do { - direction = - forward_edge(edge) ? forward_string : backward_string; + direction = forward_edge(edge) ? forward_string : backward_string; is_last = last_edge(edge) ? last_string : not_last_string; eow = end_of_word(edge) ? eow_string : not_eow_string; unichar_id = edge_letter(edge); tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", - edge, next_node(edge), unichar_id, - direction, is_last, eow); + edge, next_node(edge), unichar_id, direction, is_last, eow); if (edge - node > max_num_edges) return; } while (!last_edge(edge++)); - if (edge < num_edges_ && - edge_occupied(edge) && backward_edge(edge)) { + if (edge < num_edges_ && edge_occupied(edge) && backward_edge(edge)) { do { - direction = - forward_edge(edge) ? forward_string : backward_string; + direction = forward_edge(edge) ? forward_string : backward_string; is_last = last_edge(edge) ? last_string : not_last_string; eow = end_of_word(edge) ? eow_string : not_eow_string; unichar_id = edge_letter(edge); tprintf(REFFORMAT " : next = " REFFORMAT - ", unichar_id = %d, %s %s %s\n", - edge, next_node(edge), unichar_id, - direction, is_last, eow); + ", unichar_id = %d, %s %s %s\n", + edge, next_node(edge), unichar_id, direction, is_last, eow); if (edge - node > MAX_NODE_EDGES_DISPLAY) return; } while (!last_edge(edge++)); } - } - else { + } else { tprintf(REFFORMAT " : no edges in this node\n", node); } tprintf("\n"); @@ -295,16 +285,15 @@ void SquishedDawg::print_edge(EDGE_REF edge) const { if (edge == NO_EDGE) { tprintf("NO_EDGE\n"); } else { - tprintf(REFFORMAT " : next = " REFFORMAT - ", unichar_id = '%d', %s %s %s\n", edge, - next_node(edge), edge_letter(edge), + tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = '%d', %s %s %s\n", + edge, next_node(edge), edge_letter(edge), (forward_edge(edge) ? "FORWARD" : " "), - (last_edge(edge) ? "LAST" : " "), - (end_of_word(edge) ? "EOW" : "")); + (last_edge(edge) ? "LAST" : " "), + (end_of_word(edge) ? "EOW" : "")); } } -bool SquishedDawg::read_squished_dawg(TFile *file) { +bool SquishedDawg::read_squished_dawg(TFile* file) { if (debug_level_) tprintf("Reading squished dawg\n"); // Read the magic number and check that it matches kDawgMagicNumber, as @@ -336,39 +325,41 @@ bool SquishedDawg::read_squished_dawg(TFile *file) { } std::unique_ptr SquishedDawg::build_node_map( - int32_t *num_nodes) const { - EDGE_REF edge; + int32_t* num_nodes) const { + EDGE_REF edge; std::unique_ptr node_map(new EDGE_REF[num_edges_]); - int32_t node_counter; - int32_t num_edges; + int32_t node_counter; + int32_t num_edges; - for (edge = 0; edge < num_edges_; edge++) // init all slots + for (edge = 0; edge < num_edges_; edge++) // init all slots node_map[edge] = -1; node_counter = num_forward_edges(0); - *num_nodes = 0; - for (edge = 0; edge < num_edges_; edge++) { // search all slots + *num_nodes = 0; + for (edge = 0; edge < num_edges_; edge++) { // search all slots if (forward_edge(edge)) { - (*num_nodes)++; // count nodes links + (*num_nodes)++; // count nodes links node_map[edge] = (edge ? node_counter : 0); num_edges = num_forward_edges(edge); if (edge != 0) node_counter += num_edges; edge += num_edges; if (edge >= num_edges_) break; - if (backward_edge(edge)) while (!last_edge(edge++)); + if (backward_edge(edge)) + while (!last_edge(edge++)) + ; edge--; } } return node_map; } -bool SquishedDawg::write_squished_dawg(TFile *file) { - EDGE_REF edge; - int32_t num_edges; - int32_t node_count = 0; - EDGE_REF old_index; +bool SquishedDawg::write_squished_dawg(TFile* file) { + EDGE_REF edge; + int32_t num_edges; + int32_t node_count = 0; + EDGE_REF old_index; EDGE_RECORD temp_record; if (debug_level_) tprintf("write_squished_dawg\n"); @@ -383,9 +374,8 @@ bool SquishedDawg::write_squished_dawg(TFile *file) { // Count the number of edges in this Dawg. num_edges = 0; - for (edge=0; edge < num_edges_; edge++) - if (forward_edge(edge)) - num_edges++; + for (edge = 0; edge < num_edges_; edge++) + if (forward_edge(edge)) num_edges++; // Write edge count to file. if (file->FWrite(&num_edges, sizeof(num_edges), 1) != 1) return false; @@ -408,7 +398,8 @@ bool SquishedDawg::write_squished_dawg(TFile *file) { if (edge >= num_edges_) break; if (backward_edge(edge)) // skip back links - while (!last_edge(edge++)); + while (!last_edge(edge++)) + ; edge--; } diff --git a/src/dict/dawg.h b/src/dict/dawg.h index dd6ad49aa0..77edfb353d 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -39,10 +39,10 @@ #ifndef __GNUC__ #ifdef _WIN32 -#define NO_EDGE (int64_t) 0xffffffffffffffffi64 -#endif /*_WIN32*/ +#define NO_EDGE (int64_t)0xffffffffffffffffi64 +#endif /*_WIN32*/ #else -#define NO_EDGE (int64_t) 0xffffffffffffffffll +#define NO_EDGE (int64_t)0xffffffffffffffffll #endif /*__GNUC__*/ /*---------------------------------------------------------------------- @@ -51,23 +51,23 @@ class UNICHARSET; using EDGE_RECORD = uint64_t; -using EDGE_ARRAY = EDGE_RECORD *; +using EDGE_ARRAY = EDGE_RECORD*; using EDGE_REF = int64_t; using NODE_REF = int64_t; -using NODE_MAP = EDGE_REF *; +using NODE_MAP = EDGE_REF*; namespace tesseract { struct NodeChild { UNICHAR_ID unichar_id; EDGE_REF edge_ref; - NodeChild(UNICHAR_ID id, EDGE_REF ref): unichar_id(id), edge_ref(ref) {} - NodeChild(): unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {} + NodeChild(UNICHAR_ID id, EDGE_REF ref) : unichar_id(id), edge_ref(ref) {} + NodeChild() : unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {} }; using NodeChildVector = GenericVector; using SuccessorList = GenericVector; -using SuccessorListsVector = GenericVector; +using SuccessorListsVector = GenericVector; enum DawgType { DAWG_TYPE_PUNCTUATION, @@ -82,26 +82,25 @@ enum DawgType { C o n s t a n t s ----------------------------------------------------------------------*/ -#define FORWARD_EDGE (int32_t) 0 -#define BACKWARD_EDGE (int32_t) 1 -#define MAX_NODE_EDGES_DISPLAY (int64_t) 100 -#define MARKER_FLAG (int64_t) 1 -#define DIRECTION_FLAG (int64_t) 2 -#define WERD_END_FLAG (int64_t) 4 -#define LETTER_START_BIT 0 -#define NUM_FLAG_BITS 3 +#define FORWARD_EDGE (int32_t)0 +#define BACKWARD_EDGE (int32_t)1 +#define MAX_NODE_EDGES_DISPLAY (int64_t)100 +#define MARKER_FLAG (int64_t)1 +#define DIRECTION_FLAG (int64_t)2 +#define WERD_END_FLAG (int64_t)4 +#define LETTER_START_BIT 0 +#define NUM_FLAG_BITS 3 #define REFFORMAT "%" PRId64 static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] = { - { 0, 1, 1, 0 }, // for DAWG_TYPE_PUNCTUATION - { 1, 0, 0, 0 }, // for DAWG_TYPE_WORD - { 1, 0, 0, 0 }, // for DAWG_TYPE_NUMBER - { 0, 0, 0, 0 }, // for DAWG_TYPE_PATTERN + {0, 1, 1, 0}, // for DAWG_TYPE_PUNCTUATION + {1, 0, 0, 0}, // for DAWG_TYPE_WORD + {1, 0, 0, 0}, // for DAWG_TYPE_NUMBER + {0, 0, 0, 0}, // for DAWG_TYPE_PATTERN }; static const char kWildcard[] = "*"; - /*---------------------------------------------------------------------- C l a s s e s a n d S t r u c t s ----------------------------------------------------------------------*/ @@ -126,33 +125,32 @@ class Dawg { static const UNICHAR_ID kPatternUnicharID = 0; inline DawgType type() const { return type_; } - inline const STRING &lang() const { return lang_; } + inline const STRING& lang() const { return lang_; } inline PermuterType permuter() const { return perm_; } virtual ~Dawg() = default; /// Returns true if the given word is in the Dawg. - bool word_in_dawg(const WERD_CHOICE &word) const; + bool word_in_dawg(const WERD_CHOICE& word) const; // Returns true if the given word prefix is not contraindicated by the dawg. // If requires_complete is true, then the exact complete word must be present. - bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const; + bool prefix_in_dawg(const WERD_CHOICE& prefix, bool requires_complete) const; /// Checks the Dawg for the words that are listed in the requested file. /// Returns the number of words in the given file missing from the Dawg. - int check_for_words(const char *filename, - const UNICHARSET &unicharset, + int check_for_words(const char* filename, const UNICHARSET& unicharset, bool enable_wildcard) const; // For each word in the Dawg, call the given (permanent) callback with the // text (UTF-8) version of the word. - void iterate_words(const UNICHARSET &unicharset, - TessCallback1 *cb) const; + void iterate_words(const UNICHARSET& unicharset, + TessCallback1* cb) const; // For each word in the Dawg, call the given (permanent) callback with the // text (UTF-8) version of the word. - void iterate_words(const UNICHARSET &unicharset, - TessCallback1 *cb) const; + void iterate_words(const UNICHARSET& unicharset, + TessCallback1* cb) const; // Pure virtual function that should be implemented by the derived classes. @@ -162,7 +160,7 @@ class Dawg { /// Fills the given NodeChildVector with all the unichar ids (and the /// corresponding EDGE_REFs) for which there is an edge out of this node. - virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, + virtual void unichar_ids_of(NODE_REF node, NodeChildVector* vec, bool word_end) const = 0; /// Returns the next node visited by following the edge @@ -183,8 +181,8 @@ class Dawg { /// Fills vec with unichar ids that represent the character classes /// of the given unichar_id. virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, - const UNICHARSET &unicharset, - GenericVector *vec) const { + const UNICHARSET& unicharset, + GenericVector* vec) const { (void)unichar_id; (void)unicharset; (void)vec; @@ -193,8 +191,8 @@ class Dawg { /// Returns the given EDGE_REF if the EDGE_RECORD that it points to has /// a self loop and the given unichar_id matches the unichar_id stored in the /// EDGE_RECORD, returns NO_EDGE otherwise. - virtual EDGE_REF pattern_loop_edge( - EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const { + virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, + bool word_end) const { (void)edge_ref; (void)unichar_id; (void)word_end; @@ -202,7 +200,7 @@ class Dawg { } protected: - Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level) + Dawg(DawgType type, const STRING& lang, PermuterType perm, int debug_level) : type_(type), lang_(lang), perm_(perm), @@ -210,35 +208,34 @@ class Dawg { debug_level_(debug_level) {} /// Returns the next node visited by following this edge. - inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const { + inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD& edge_rec) const { return ((edge_rec & next_node_mask_) >> next_node_start_bit_); } /// Returns the marker flag of this edge. - inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const { + inline bool marker_flag_from_edge_rec(const EDGE_RECORD& edge_rec) const { return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0; } /// Returns the direction flag of this edge. - inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const { - return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? - BACKWARD_EDGE : FORWARD_EDGE; + inline int direction_from_edge_rec(const EDGE_RECORD& edge_rec) const { + return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? BACKWARD_EDGE + : FORWARD_EDGE; } /// Returns true if this edge marks the end of a word. - inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const { + inline bool end_of_word_from_edge_rec(const EDGE_RECORD& edge_rec) const { return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0; } /// Returns UNICHAR_ID recorded in this edge. inline UNICHAR_ID unichar_id_from_edge_rec( - const EDGE_RECORD &edge_rec) const { + const EDGE_RECORD& edge_rec) const { return ((edge_rec & letter_mask_) >> LETTER_START_BIT); } /// Sets the next node link for this edge in the Dawg. - inline void set_next_node_in_edge_rec( - EDGE_RECORD *edge_rec, EDGE_REF value) { + inline void set_next_node_in_edge_rec(EDGE_RECORD* edge_rec, EDGE_REF value) { *edge_rec &= (~next_node_mask_); *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_); } /// Sets this edge record to be the last one in a sequence of edges. - inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) { + inline void set_marker_flag_in_edge_rec(EDGE_RECORD* edge_rec) { *edge_rec |= (MARKER_FLAG << flag_start_bit_); } /// Sequentially compares the given values of unichar ID, next node @@ -248,15 +245,15 @@ class Dawg { /// checked are the same) /// 0 if edge_rec_match() returns true /// -1 otherwise - inline int given_greater_than_edge_rec(NODE_REF next_node, - bool word_end, + inline int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, - const EDGE_RECORD &edge_rec) const { + const EDGE_RECORD& edge_rec) const { UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec); NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec); bool curr_word_end = end_of_word_from_edge_rec(edge_rec); if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node, - curr_word_end, curr_unichar_id)) return 0; + curr_word_end, curr_unichar_id)) + return 0; if (unichar_id > curr_unichar_id) return 1; if (unichar_id == curr_unichar_id) { if (next_node > curr_next_node) return 1; @@ -269,10 +266,8 @@ class Dawg { /// Returns true if all the values are equal (any value matches /// next_node if next_node == NO_EDGE, any value matches word_end /// if word_end is false). - inline bool edge_rec_match(NODE_REF next_node, - bool word_end, - UNICHAR_ID unichar_id, - NODE_REF other_next_node, + inline bool edge_rec_match(NODE_REF next_node, bool word_end, + UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const { return ((unichar_id == other_unichar_id) && @@ -289,13 +284,12 @@ class Dawg { /// the *'s in this string are interpreted as wildcards. /// WERD_CHOICE param is not passed by const so that wildcard searches /// can modify it and work without having to copy WERD_CHOICEs. - bool match_words(WERD_CHOICE *word, int32_t index, - NODE_REF node, UNICHAR_ID wildcard) const; + bool match_words(WERD_CHOICE* word, int32_t index, NODE_REF node, + UNICHAR_ID wildcard) const; // Recursively iterate over all words in a dawg (see public iterate_words). - void iterate_words_rec(const WERD_CHOICE &word_so_far, - NODE_REF to_explore, - TessCallback1 *cb) const; + void iterate_words_rec(const WERD_CHOICE& word_so_far, NODE_REF to_explore, + TessCallback1* cb) const; // Member Variables. DawgType type_; @@ -353,21 +347,21 @@ class Dawg { // We're back in the punctuation dawg. Continuing there is the only option. struct DawgPosition { DawgPosition() - : dawg_index(-1), dawg_ref(NO_EDGE), punc_ref(NO_EDGE), + : dawg_index(-1), + dawg_ref(NO_EDGE), + punc_ref(NO_EDGE), back_to_punc(false) {} - DawgPosition(int dawg_idx, EDGE_REF dawgref, - int punc_idx, EDGE_REF puncref, + DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc) - : dawg_index(dawg_idx), dawg_ref(dawgref), - punc_index(punc_idx), punc_ref(puncref), - back_to_punc(backtopunc) { - } - bool operator==(const DawgPosition &other) { - return dawg_index == other.dawg_index && - dawg_ref == other.dawg_ref && - punc_index == other.punc_index && - punc_ref == other.punc_ref && - back_to_punc == other.back_to_punc; + : dawg_index(dawg_idx), + dawg_ref(dawgref), + punc_index(punc_idx), + punc_ref(puncref), + back_to_punc(backtopunc) {} + bool operator==(const DawgPosition& other) { + return dawg_index == other.dawg_index && dawg_ref == other.dawg_ref && + punc_index == other.punc_index && punc_ref == other.punc_ref && + back_to_punc == other.back_to_punc; } int8_t dawg_index; @@ -386,17 +380,16 @@ class DawgPositionVector : public GenericVector { /// Adds an entry for the given dawg_index with the given node to the vec. /// Returns false if the same entry already exists in the vector, /// true otherwise. - inline bool add_unique(const DawgPosition &new_pos, - bool debug, - const char *debug_msg) { + inline bool add_unique(const DawgPosition& new_pos, bool debug, + const char* debug_msg) { for (int i = 0; i < size_used_; ++i) { if (data_[i] == new_pos) return false; } push_back(new_pos); if (debug) { - tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", - debug_msg, new_pos.dawg_index, new_pos.dawg_ref, - new_pos.punc_ref, new_pos.back_to_punc ? " returned" : ""); + tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", debug_msg, + new_pos.dawg_index, new_pos.dawg_ref, new_pos.punc_ref, + new_pos.back_to_punc ? " returned" : ""); } return true; } @@ -412,10 +405,10 @@ class DawgPositionVector : public GenericVector { // class SquishedDawg : public Dawg { public: - SquishedDawg(DawgType type, const STRING &lang, PermuterType perm, + SquishedDawg(DawgType type, const STRING& lang, PermuterType perm, int debug_level) : Dawg(type, lang, perm, debug_level) {} - SquishedDawg(const char *filename, DawgType type, const STRING &lang, + SquishedDawg(const char* filename, DawgType type, const STRING& lang, PermuterType perm, int debug_level) : Dawg(type, lang, perm, debug_level) { TFile file; @@ -424,7 +417,7 @@ class SquishedDawg : public Dawg { num_forward_edges_in_node0 = num_forward_edges(0); } SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, - const STRING &lang, PermuterType perm, int unicharset_size, + const STRING& lang, PermuterType perm, int unicharset_size, int debug_level) : Dawg(type, lang, perm, debug_level), edges_(edges), @@ -436,7 +429,7 @@ class SquishedDawg : public Dawg { virtual ~SquishedDawg(); // Loads using the given TFile. Returns false on failure. - bool Load(TFile *fp) { + bool Load(TFile* fp) { if (!read_squished_dawg(fp)) return false; num_forward_edges_in_node0 = num_forward_edges(0); return true; @@ -450,7 +443,7 @@ class SquishedDawg : public Dawg { /// Fills the given NodeChildVector with all the unichar ids (and the /// corresponding EDGE_REFs) for which there is an edge out of this node. - void unichar_ids_of(NODE_REF node, NodeChildVector *vec, + void unichar_ids_of(NODE_REF node, NodeChildVector* vec, bool word_end) const { EDGE_REF edge = node; if (!edge_occupied(edge) || edge == NO_EDGE) return; @@ -484,11 +477,11 @@ class SquishedDawg : public Dawg { void print_node(NODE_REF node, int max_num_edges) const; /// Writes the squished/reduced Dawg to a file. - bool write_squished_dawg(TFile *file); + bool write_squished_dawg(TFile* file); /// Opens the file with the given filename and writes the /// squished/reduced Dawg to the file. - bool write_squished_dawg(const char *filename) { + bool write_squished_dawg(const char* filename) { TFile file; file.OpenWrite(nullptr); if (!this->write_squished_dawg(&file)) { @@ -517,7 +510,7 @@ class SquishedDawg : public Dawg { } /// Clears the last flag of this edge. inline void clear_marker_flag(EDGE_REF edge_ref) { - (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_)); + (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_)); } /// Returns true if this edge is in the forward direction. inline bool forward_edge(EDGE_REF edge_ref) const { @@ -542,7 +535,7 @@ class SquishedDawg : public Dawg { int32_t num_forward_edges(NODE_REF node) const; /// Reads SquishedDawg from a file. - bool read_squished_dawg(TFile *file); + bool read_squished_dawg(TFile* file); /// Prints the contents of an edge indicated by the given EDGE_REF. void print_edge(EDGE_REF edge) const; @@ -554,7 +547,7 @@ class SquishedDawg : public Dawg { tprintf("__________________________\n"); } /// Constructs a mapping from the memory node indices to disk node indices. - std::unique_ptr build_node_map(int32_t *num_nodes) const; + std::unique_ptr build_node_map(int32_t* num_nodes) const; // Member variables. EDGE_ARRAY edges_; diff --git a/src/dict/dawg_cache.cpp b/src/dict/dawg_cache.cpp index 59b2118c2b..b8474b7430 100644 --- a/src/dict/dawg_cache.cpp +++ b/src/dict/dawg_cache.cpp @@ -27,31 +27,31 @@ namespace tesseract { struct DawgLoader { - DawgLoader(const STRING &lang, TessdataType tessdata_dawg_type, - int dawg_debug_level, TessdataManager *data_file) + DawgLoader(const STRING& lang, TessdataType tessdata_dawg_type, + int dawg_debug_level, TessdataManager* data_file) : lang_(lang), data_file_(data_file), tessdata_dawg_type_(tessdata_dawg_type), dawg_debug_level_(dawg_debug_level) {} - Dawg *Load(); + Dawg* Load(); STRING lang_; - TessdataManager *data_file_; + TessdataManager* data_file_; TessdataType tessdata_dawg_type_; int dawg_debug_level_; }; -Dawg *DawgCache::GetSquishedDawg(const STRING &lang, +Dawg* DawgCache::GetSquishedDawg(const STRING& lang, TessdataType tessdata_dawg_type, - int debug_level, TessdataManager *data_file) { + int debug_level, TessdataManager* data_file) { STRING data_id = data_file->GetDataFileName(); data_id += kTessdataFileSuffixes[tessdata_dawg_type]; DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file); return dawgs_.Get(data_id, NewTessCallback(&loader, &DawgLoader::Load)); } -Dawg *DawgLoader::Load() { +Dawg* DawgLoader::Load() { TFile fp; if (!data_file_->GetComponent(tessdata_dawg_type_, &fp)) return nullptr; DawgType dawg_type; @@ -87,7 +87,7 @@ Dawg *DawgLoader::Load() { default: return nullptr; } - SquishedDawg *retval = + SquishedDawg* retval = new SquishedDawg(dawg_type, lang_, perm_type, dawg_debug_level_); if (retval->Load(&fp)) return retval; delete retval; diff --git a/src/dict/dawg_cache.h b/src/dict/dawg_cache.h index 8323380222..c344418759 100644 --- a/src/dict/dawg_cache.h +++ b/src/dict/dawg_cache.h @@ -29,20 +29,16 @@ namespace tesseract { class DawgCache { public: - Dawg *GetSquishedDawg(const STRING &lang, TessdataType tessdata_dawg_type, - int debug_level, TessdataManager *data_file); + Dawg* GetSquishedDawg(const STRING& lang, TessdataType tessdata_dawg_type, + int debug_level, TessdataManager* data_file); // If we manage the given dawg, decrement its count, // and possibly delete it if the count reaches zero. // If dawg is unknown to us, return false. - bool FreeDawg(Dawg *dawg) { - return dawgs_.Free(dawg); - } + bool FreeDawg(Dawg* dawg) { return dawgs_.Free(dawg); } // Free up any currently unused dawgs. - void DeleteUnusedDawgs() { - dawgs_.DeleteUnusedObjects(); - } + void DeleteUnusedDawgs() { dawgs_.DeleteUnusedObjects(); } private: ObjectCache dawgs_; diff --git a/src/dict/dict.cpp b/src/dict/dict.cpp index 9f59ed6d59..fb981aadc1 100644 --- a/src/dict/dict.cpp +++ b/src/dict/dict.cpp @@ -27,7 +27,7 @@ namespace tesseract { class Image; -Dict::Dict(CCUtil *ccutil) +Dict::Dict(CCUtil* ccutil) : letter_is_okay_(&tesseract::Dict::def_letter_is_okay), probability_in_context_(&tesseract::Dict::def_probability_in_context), params_model_classify_(nullptr), @@ -186,7 +186,7 @@ Dict::~Dict() { if (output_ambig_words_file_ != nullptr) fclose(output_ambig_words_file_); } -DawgCache *Dict::GlobalDawgCache() { +DawgCache* Dict::GlobalDawgCache() { // This global cache (a singleton) will outlive every Tesseract instance // (even those that someone else might declare as global statics). static DawgCache cache; @@ -194,7 +194,7 @@ DawgCache *Dict::GlobalDawgCache() { } // Sets up ready for a Load or LoadLSTM. -void Dict::SetupForLoad(DawgCache *dawg_cache) { +void Dict::SetupForLoad(DawgCache* dawg_cache) { if (dawgs_.length() != 0) this->End(); apostrophe_unichar_id_ = getUnicharset().unichar_to_id(kApostropheSymbol); @@ -212,7 +212,7 @@ void Dict::SetupForLoad(DawgCache *dawg_cache) { } // Loads the dawgs needed by Tesseract. Call FinishLoad() after. -void Dict::Load(const STRING &lang, TessdataManager *data_file) { +void Dict::Load(const STRING& lang, TessdataManager* data_file) { // Load dawgs_. if (load_punc_dawg) { punc_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_PUNC_DAWG, @@ -220,12 +220,12 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) { if (punc_dawg_) dawgs_ += punc_dawg_; } if (load_system_dawg) { - Dawg *system_dawg = dawg_cache_->GetSquishedDawg( + Dawg* system_dawg = dawg_cache_->GetSquishedDawg( lang, TESSDATA_SYSTEM_DAWG, dawg_debug_level, data_file); if (system_dawg) dawgs_ += system_dawg; } if (load_number_dawg) { - Dawg *number_dawg = dawg_cache_->GetSquishedDawg( + Dawg* number_dawg = dawg_cache_->GetSquishedDawg( lang, TESSDATA_NUMBER_DAWG, dawg_debug_level, data_file); if (number_dawg) dawgs_ += number_dawg; } @@ -247,15 +247,15 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) { } STRING name; - if (((STRING &)user_words_suffix).length() > 0 || - ((STRING &)user_words_file).length() > 0) { - Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM, + if (((STRING&)user_words_suffix).length() > 0 || + ((STRING&)user_words_file).length() > 0) { + Trie* trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM, getUnicharset().size(), dawg_debug_level); - if (((STRING &)user_words_file).length() > 0) { - name = user_words_file; + if (((STRING&)user_words_file).length() > 0) { + name = user_words_file; } else { - name = getCCUtil()->language_data_path_prefix; - name += user_words_suffix; + name = getCCUtil()->language_data_path_prefix; + name += user_words_suffix; } if (!trie_ptr->read_and_add_word_list(name.string(), getUnicharset(), Trie::RRP_REVERSE_IF_HAS_RTL)) { @@ -266,16 +266,16 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) { } } - if (((STRING &)user_patterns_suffix).length() > 0 || - ((STRING &)user_patterns_file).length() > 0) { - Trie *trie_ptr = new Trie(DAWG_TYPE_PATTERN, lang, USER_PATTERN_PERM, + if (((STRING&)user_patterns_suffix).length() > 0 || + ((STRING&)user_patterns_file).length() > 0) { + Trie* trie_ptr = new Trie(DAWG_TYPE_PATTERN, lang, USER_PATTERN_PERM, getUnicharset().size(), dawg_debug_level); trie_ptr->initialize_patterns(&(getUnicharset())); - if (((STRING &)user_patterns_file).length() > 0) { - name = user_patterns_file; + if (((STRING&)user_patterns_file).length() > 0) { + name = user_patterns_file; } else { - name = getCCUtil()->language_data_path_prefix; - name += user_patterns_suffix; + name = getCCUtil()->language_data_path_prefix; + name += user_patterns_suffix; } if (!trie_ptr->read_pattern_list(name.string(), getUnicharset())) { tprintf("Error: failed to load %s\n", name.string()); @@ -295,7 +295,7 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) { } // Loads the dawgs needed by the LSTM model. Call FinishLoad() after. -void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) { +void Dict::LoadLSTM(const STRING& lang, TessdataManager* data_file) { // Load dawgs_. if (load_punc_dawg) { punc_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_LSTM_PUNC_DAWG, @@ -303,12 +303,12 @@ void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) { if (punc_dawg_) dawgs_ += punc_dawg_; } if (load_system_dawg) { - Dawg *system_dawg = dawg_cache_->GetSquishedDawg( + Dawg* system_dawg = dawg_cache_->GetSquishedDawg( lang, TESSDATA_LSTM_SYSTEM_DAWG, dawg_debug_level, data_file); if (system_dawg) dawgs_ += system_dawg; } if (load_number_dawg) { - Dawg *number_dawg = dawg_cache_->GetSquishedDawg( + Dawg* number_dawg = dawg_cache_->GetSquishedDawg( lang, TESSDATA_LSTM_NUMBER_DAWG, dawg_debug_level, data_file); if (number_dawg) dawgs_ += number_dawg; } @@ -323,13 +323,14 @@ bool Dict::FinishLoad() { // indices into the dawgs_ vector of the successors for dawg i. successors_.reserve(dawgs_.length()); for (int i = 0; i < dawgs_.length(); ++i) { - const Dawg *dawg = dawgs_[i]; - SuccessorList *lst = new SuccessorList(); + const Dawg* dawg = dawgs_[i]; + SuccessorList* lst = new SuccessorList(); for (int j = 0; j < dawgs_.length(); ++j) { - const Dawg *other = dawgs_[j]; + const Dawg* other = dawgs_[j]; if (dawg != nullptr && other != nullptr && (dawg->lang() == other->lang()) && - kDawgSuccessors[dawg->type()][other->type()]) *lst += j; + kDawgSuccessors[dawg->type()][other->type()]) + *lst += j; } successors_ += lst; } @@ -337,8 +338,7 @@ bool Dict::FinishLoad() { } void Dict::End() { - if (dawgs_.length() == 0) - return; // Not safe to call twice. + if (dawgs_.length() == 0) return; // Not safe to call twice. for (int i = 0; i < dawgs_.size(); i++) { if (!dawg_cache_->FreeDawg(dawgs_[i])) { delete dawgs_[i]; @@ -360,16 +360,16 @@ void Dict::End() { // Returns true if in light of the current state unichar_id is allowed // according to at least one of the dawgs in the dawgs_ vector. // See more extensive comments in dict.h where this function is declared. -int Dict::def_letter_is_okay(void* void_dawg_args, - UNICHAR_ID unichar_id, +int Dict::def_letter_is_okay(void* void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const { - DawgArgs *dawg_args = static_cast(void_dawg_args); + DawgArgs* dawg_args = static_cast(void_dawg_args); if (dawg_debug_level >= 3) { - tprintf("def_letter_is_okay: current unichar=%s word_end=%d" - " num active dawgs=%d\n", - getUnicharset().debug_str(unichar_id).string(), word_end, - dawg_args->active_dawgs->length()); + tprintf( + "def_letter_is_okay: current unichar=%s word_end=%d" + " num active dawgs=%d\n", + getUnicharset().debug_str(unichar_id).string(), word_end, + dawg_args->active_dawgs->length()); } // Do not accept words that contain kPatternUnicharID. @@ -390,9 +390,10 @@ int Dict::def_letter_is_okay(void* void_dawg_args, // with the updated ref (an edge with the corresponding unichar id) into // dawg_args->updated_pos. for (int a = 0; a < dawg_args->active_dawgs->length(); ++a) { - const DawgPosition &pos = (*dawg_args->active_dawgs)[a]; - const Dawg *punc_dawg = pos.punc_index >= 0 ? dawgs_[pos.punc_index] : nullptr; - const Dawg *dawg = pos.dawg_index >= 0 ? dawgs_[pos.dawg_index] : nullptr; + const DawgPosition& pos = (*dawg_args->active_dawgs)[a]; + const Dawg* punc_dawg = + pos.punc_index >= 0 ? dawgs_[pos.punc_index] : nullptr; + const Dawg* dawg = pos.dawg_index >= 0 ? dawgs_[pos.dawg_index] : nullptr; if (!dawg && !punc_dawg) { // shouldn't happen. @@ -402,23 +403,23 @@ int Dict::def_letter_is_okay(void* void_dawg_args, if (!dawg) { // We're in the punctuation dawg. A core dawg has not been chosen. NODE_REF punc_node = GetStartingNode(punc_dawg, pos.punc_ref); - EDGE_REF punc_transition_edge = punc_dawg->edge_char_of( - punc_node, Dawg::kPatternUnicharID, word_end); + EDGE_REF punc_transition_edge = + punc_dawg->edge_char_of(punc_node, Dawg::kPatternUnicharID, word_end); if (punc_transition_edge != NO_EDGE) { // Find all successors, and see which can transition. - const SuccessorList &slist = *(successors_[pos.punc_index]); + const SuccessorList& slist = *(successors_[pos.punc_index]); for (int s = 0; s < slist.length(); ++s) { int sdawg_index = slist[s]; - const Dawg *sdawg = dawgs_[sdawg_index]; + const Dawg* sdawg = dawgs_[sdawg_index]; UNICHAR_ID ch = char_for_dawg(unichar_id, sdawg); EDGE_REF dawg_edge = sdawg->edge_char_of(0, ch, word_end); if (dawg_edge != NO_EDGE) { - if (dawg_debug_level >=3) { + if (dawg_debug_level >= 3) { tprintf("Letter found in dawg %d\n", sdawg_index); } dawg_args->updated_dawgs->add_unique( - DawgPosition(sdawg_index, dawg_edge, - pos.punc_index, punc_transition_edge, false), + DawgPosition(sdawg_index, dawg_edge, pos.punc_index, + punc_transition_edge, false), dawg_debug_level > 0, "Append transition from punc dawg to current dawgs: "); if (sdawg->permuter() > curr_perm) curr_perm = sdawg->permuter(); @@ -428,16 +429,15 @@ int Dict::def_letter_is_okay(void* void_dawg_args, } } } - EDGE_REF punc_edge = punc_dawg->edge_char_of(punc_node, unichar_id, - word_end); + EDGE_REF punc_edge = + punc_dawg->edge_char_of(punc_node, unichar_id, word_end); if (punc_edge != NO_EDGE) { - if (dawg_debug_level >=3) { + if (dawg_debug_level >= 3) { tprintf("Letter found in punctuation dawg\n"); } dawg_args->updated_dawgs->add_unique( DawgPosition(-1, NO_EDGE, pos.punc_index, punc_edge, false), - dawg_debug_level > 0, - "Extend punctuation dawg: "); + dawg_debug_level > 0, "Extend punctuation dawg: "); if (PUNC_PERM > curr_perm) curr_perm = PUNC_PERM; if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true; } @@ -448,14 +448,15 @@ int Dict::def_letter_is_okay(void* void_dawg_args, // We can end the main word here. // If we can continue on the punc ref, add that possibility. NODE_REF punc_node = GetStartingNode(punc_dawg, pos.punc_ref); - EDGE_REF punc_edge = punc_node == NO_EDGE ? NO_EDGE - : punc_dawg->edge_char_of(punc_node, unichar_id, word_end); + EDGE_REF punc_edge = + punc_node == NO_EDGE + ? NO_EDGE + : punc_dawg->edge_char_of(punc_node, unichar_id, word_end); if (punc_edge != NO_EDGE) { dawg_args->updated_dawgs->add_unique( - DawgPosition(pos.dawg_index, pos.dawg_ref, - pos.punc_index, punc_edge, true), - dawg_debug_level > 0, - "Return to punctuation dawg: "); + DawgPosition(pos.dawg_index, pos.dawg_ref, pos.punc_index, + punc_edge, true), + dawg_debug_level > 0, "Return to punctuation dawg: "); if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter(); if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true; } @@ -476,8 +477,10 @@ int Dict::def_letter_is_okay(void* void_dawg_args, // Find the edge out of the node for the unichar_id. NODE_REF node = GetStartingNode(dawg, pos.dawg_ref); - EDGE_REF edge = (node == NO_EDGE) ? NO_EDGE - : dawg->edge_char_of(node, char_for_dawg(unichar_id, dawg), word_end); + EDGE_REF edge = (node == NO_EDGE) + ? NO_EDGE + : dawg->edge_char_of( + node, char_for_dawg(unichar_id, dawg), word_end); if (dawg_debug_level >= 3) { tprintf("Active dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n", @@ -485,7 +488,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args, } if (edge != NO_EDGE) { // the unichar was found in the current dawg - if (dawg_debug_level >=3) { + if (dawg_debug_level >= 3) { tprintf("Letter found in dawg %d\n", pos.dawg_index); } if (word_end && punc_dawg && !punc_dawg->end_of_word(pos.punc_ref)) { @@ -520,10 +523,10 @@ int Dict::def_letter_is_okay(void* void_dawg_args, return dawg_args->permuter; } -void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, +void Dict::ProcessPatternEdges(const Dawg* dawg, const DawgPosition& pos, UNICHAR_ID unichar_id, bool word_end, - DawgArgs *dawg_args, - PermuterType *curr_perm) const { + DawgArgs* dawg_args, + PermuterType* curr_perm) const { NODE_REF node = GetStartingNode(dawg, pos.dawg_ref); // Try to find the edge corresponding to the exact unichar_id and to all the // edges corresponding to the character class of unichar_id. @@ -535,9 +538,10 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, // On the first iteration check all the outgoing edges. // On the second iteration check all self-loops. for (int k = 0; k < 2; ++k) { - EDGE_REF edge = (k == 0) - ? dawg->edge_char_of(node, unichar_id_patterns[i], word_end) - : dawg->pattern_loop_edge(pos.dawg_ref, unichar_id_patterns[i], word_end); + EDGE_REF edge = + (k == 0) ? dawg->edge_char_of(node, unichar_id_patterns[i], word_end) + : dawg->pattern_loop_edge(pos.dawg_ref, + unichar_id_patterns[i], word_end); if (edge == NO_EDGE) continue; if (dawg_debug_level >= 3) { tprintf("Pattern dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n", @@ -558,7 +562,7 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, // Fill the given active_dawgs vector with dawgs that could contain the // beginning of the word. If hyphenated() returns true, copy the entries // from hyphen_active_dawgs_ instead. -void Dict::init_active_dawgs(DawgPositionVector *active_dawgs, +void Dict::init_active_dawgs(DawgPositionVector* active_dawgs, bool ambigs_mode) const { int i; if (hyphenated()) { @@ -575,11 +579,11 @@ void Dict::init_active_dawgs(DawgPositionVector *active_dawgs, } } -void Dict::default_dawgs(DawgPositionVector *dawg_pos_vec, +void Dict::default_dawgs(DawgPositionVector* dawg_pos_vec, bool suppress_patterns) const { bool punc_dawg_available = - (punc_dawg_ != nullptr) && - punc_dawg_->edge_char_of(0, Dawg::kPatternUnicharID, true) != NO_EDGE; + (punc_dawg_ != nullptr) && + punc_dawg_->edge_char_of(0, Dawg::kPatternUnicharID, true) != NO_EDGE; for (int i = 0; i < dawgs_.length(); i++) { if (dawgs_[i] != nullptr && @@ -602,7 +606,7 @@ void Dict::default_dawgs(DawgPositionVector *dawg_pos_vec, } } -void Dict::add_document_word(const WERD_CHOICE &best_choice) { +void Dict::add_document_word(const WERD_CHOICE& best_choice) { // Do not add hyphenated word parts to the document dawg. // hyphen_word_ will be non-nullptr after the set_hyphen_word() is // called when the first part of the hyphenated word is @@ -613,8 +617,7 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) { int stringlen = best_choice.length(); - if (valid_word(best_choice) || stringlen < 2) - return; + if (valid_word(best_choice) || stringlen < 2) return; // Discard words that contain >= kDocDictMaxRepChars repeating unichars. if (best_choice.length() >= kDocDictMaxRepChars) { @@ -633,8 +636,7 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) { if (best_choice.certainty() < doc_dict_certainty_threshold || stringlen == 2) { - if (best_choice.certainty() < doc_dict_pending_threshold) - return; + if (best_choice.certainty() < doc_dict_pending_threshold) return; if (!pending_words_->word_in_dawg(best_choice)) { if (stringlen > 2 || @@ -650,19 +652,16 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) { if (save_doc_words) { STRING filename(getCCUtil()->imagefile); filename += ".doc"; - FILE *doc_word_file = open_file (filename.string(), "a"); - fprintf(doc_word_file, "%s\n", - best_choice.debug_string().string()); + FILE* doc_word_file = open_file(filename.string(), "a"); + fprintf(doc_word_file, "%s\n", best_choice.debug_string().string()); fclose(doc_word_file); } document_words_->add_word_to_dawg(best_choice); } -void Dict::adjust_word(WERD_CHOICE *word, - bool nonword, +void Dict::adjust_word(WERD_CHOICE* word, bool nonword, XHeightConsistencyEnum xheight_consistency, - float additional_adjust, - bool modify_rating, + float additional_adjust, bool modify_rating, bool debug) { bool is_han = (getUnicharset().han_sid() != getUnicharset().null_sid() && word->GetTopScriptID() == getUnicharset().han_sid()); @@ -672,7 +671,7 @@ void Dict::adjust_word(WERD_CHOICE *word, float adjust_factor = additional_adjust; float new_rating = word->rating(); new_rating += kRatingPad; - const char *xheight_triggered = ""; + const char* xheight_triggered = ""; if (word->length() > 1) { // Calculate x-height and y-offset consistency penalties. switch (xheight_consistency) { @@ -697,8 +696,7 @@ void Dict::adjust_word(WERD_CHOICE *word, } if (debug) { tprintf("%sWord: %s %4.2f%s", nonword ? "Non-" : "", - word->unichar_string().string(), word->rating(), - xheight_triggered); + word->unichar_string().string(), word->rating(), xheight_triggered); } if (nonword) { // non-dictionary word @@ -738,8 +736,8 @@ void Dict::adjust_word(WERD_CHOICE *word, word->set_adjust_factor(adjust_factor); } -int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const { - const WERD_CHOICE *word_ptr = &word; +int Dict::valid_word(const WERD_CHOICE& word, bool numbers_ok) const { + const WERD_CHOICE* word_ptr = &word; WERD_CHOICE temp_word(word.unicharset()); if (hyphenated() && hyphen_word_->unicharset() == word.unicharset()) { copy_hyphen_info(&temp_word); @@ -749,14 +747,15 @@ int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const { if (word_ptr->length() == 0) return NO_PERM; // Allocate vectors for holding current and updated // active_dawgs and initialize them. - DawgPositionVector *active_dawgs = new DawgPositionVector[2]; + DawgPositionVector* active_dawgs = new DawgPositionVector[2]; init_active_dawgs(&(active_dawgs[0]), false); DawgArgs dawg_args(&(active_dawgs[0]), &(active_dawgs[1]), NO_PERM); int last_index = word_ptr->length() - 1; // Call leter_is_okay for each letter in the word. for (int i = hyphen_base_size(); i <= last_index; ++i) { if (!((this->*letter_is_okay_)(&dawg_args, word_ptr->unichar_id(i), - i == last_index))) break; + i == last_index))) + break; // Swap active_dawgs, constraints with the corresponding updated vector. if (dawg_args.updated_dawgs == &(active_dawgs[1])) { dawg_args.updated_dawgs = &(active_dawgs[0]); @@ -767,12 +766,13 @@ int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const { } } delete[] active_dawgs; - return valid_word_permuter(dawg_args.permuter, numbers_ok) ? - dawg_args.permuter : NO_PERM; + return valid_word_permuter(dawg_args.permuter, numbers_ok) + ? dawg_args.permuter + : NO_PERM; } -bool Dict::valid_bigram(const WERD_CHOICE &word1, - const WERD_CHOICE &word2) const { +bool Dict::valid_bigram(const WERD_CHOICE& word1, + const WERD_CHOICE& word2) const { if (bigram_dawg_ == nullptr) return false; // Extract the core word from the middle of each word with any digits @@ -808,13 +808,13 @@ bool Dict::valid_bigram(const WERD_CHOICE &word1, } WERD_CHOICE normalized_word(&uchset, bigram_string.size()); for (int i = 0; i < bigram_string.size(); ++i) { - normalized_word.append_unichar_id_space_allocated(bigram_string[i], 1, - 0.0f, 0.0f); + normalized_word.append_unichar_id_space_allocated(bigram_string[i], 1, 0.0f, + 0.0f); } return bigram_dawg_->word_in_dawg(normalized_word); } -bool Dict::valid_punctuation(const WERD_CHOICE &word) { +bool Dict::valid_punctuation(const WERD_CHOICE& word) { if (word.length() == 0) return NO_PERM; int i; WERD_CHOICE new_word(word.unicharset()); @@ -828,21 +828,21 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) { !getUnicharset().get_isdigit(unichar_id)) { return false; // neither punc, nor alpha, nor digit } else if ((new_len = new_word.length()) == 0 || - new_word.unichar_id(new_len-1) != Dawg::kPatternUnicharID) { + new_word.unichar_id(new_len - 1) != Dawg::kPatternUnicharID) { new_word.append_unichar_id(Dawg::kPatternUnicharID, 1, 0.0, 0.0); } } for (i = 0; i < dawgs_.size(); ++i) { - if (dawgs_[i] != nullptr && - dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION && - dawgs_[i]->word_in_dawg(new_word)) return true; + if (dawgs_[i] != nullptr && dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION && + dawgs_[i]->word_in_dawg(new_word)) + return true; } return false; } /// Returns true if the language is space-delimited (not CJ, or T). bool Dict::IsSpaceDelimitedLang() const { - const UNICHARSET &u_set = getUnicharset(); + const UNICHARSET& u_set = getUnicharset(); if (u_set.han_sid() > 0) return false; if (u_set.katakana_sid() > 0) return false; if (u_set.thai_sid() > 0) return false; diff --git a/src/dict/dict.h b/src/dict/dict.h index 35cad5bb22..fa8c92a567 100644 --- a/src/dict/dict.h +++ b/src/dict/dict.h @@ -23,22 +23,22 @@ #include "dawg.h" #include "dawg_cache.h" #include "host.h" +#include "params_training_featdef.h" #include "ratngs.h" #include "stopper.h" #include "trie.h" #include "unicharset.h" -#include "params_training_featdef.h" class MATRIX; class WERD_RES; -#define MAX_WERD_LENGTH (int64_t) 128 -#define NO_RATING -1 +#define MAX_WERD_LENGTH (int64_t)128 +#define NO_RATING -1 /** Struct used to hold temporary information about fragments. */ struct CHAR_FRAGMENT_INFO { UNICHAR_ID unichar_id; - const CHAR_FRAGMENT *fragment; + const CHAR_FRAGMENT* fragment; int num_fragments; float rating; float certainty; @@ -46,14 +46,14 @@ struct CHAR_FRAGMENT_INFO { namespace tesseract { -using DawgVector = GenericVector; +using DawgVector = GenericVector; // // Constants // static const int kRatingPad = 4; -static const char kDictWildcard[] = "\u2606"; // WHITE STAR -static const int kDictMaxWildcards = 2; // max wildcards for a word +static const char kDictWildcard[] = "\u2606"; // WHITE STAR +static const int kDictMaxWildcards = 2; // max wildcards for a word // TODO(daria): If hyphens are different in different languages and can be // inferred from training data we should load their values dynamically. static const char kHyphenSymbol[] = "-"; @@ -62,7 +62,7 @@ static const char kQuestionSymbol[] = "?"; static const char kApostropheSymbol[] = "'"; static const float kSimCertaintyScale = -10.0; // similarity matcher scaling static const float kSimCertaintyOffset = -10.0; // similarity matcher offset -static const float kSimilarityFloor = 100.0; // worst E*L product to stop on +static const float kSimilarityFloor = 100.0; // worst E*L product to stop on static const int kDocDictMaxRepChars = 4; // Enum for describing whether the x-height for the word is consistent: @@ -71,14 +71,14 @@ static const int kDocDictMaxRepChars = 4; // [think subscript and superscript], or there is an oversized // first character. // 2 - the word is inconsistent. -enum XHeightConsistencyEnum {XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT}; +enum XHeightConsistencyEnum { XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT }; struct DawgArgs { - DawgArgs(DawgPositionVector *d, DawgPositionVector *up, PermuterType p) + DawgArgs(DawgPositionVector* d, DawgPositionVector* up, PermuterType p) : active_dawgs(d), updated_dawgs(up), permuter(p), valid_end(false) {} - DawgPositionVector *active_dawgs; - DawgPositionVector *updated_dawgs; + DawgPositionVector* active_dawgs; + DawgPositionVector* updated_dawgs; PermuterType permuter; // True if the current position is a valid word end. bool valid_end; @@ -88,19 +88,11 @@ class Dict { public: Dict(CCUtil* image_ptr); ~Dict(); - const CCUtil* getCCUtil() const { - return ccutil_; - } - CCUtil* getCCUtil() { - return ccutil_; - } - const UNICHARSET& getUnicharset() const { - return getCCUtil()->unicharset; - } - UNICHARSET& getUnicharset() { - return getCCUtil()->unicharset; - } - const UnicharAmbigs &getUnicharAmbigs() const { + const CCUtil* getCCUtil() const { return ccutil_; } + CCUtil* getCCUtil() { return ccutil_; } + const UNICHARSET& getUnicharset() const { return getCCUtil()->unicharset; } + UNICHARSET& getUnicharset() { return getCCUtil()->unicharset; } + const UnicharAmbigs& getUnicharAmbigs() const { return getCCUtil()->unichar_ambigs; } @@ -108,9 +100,8 @@ class Dict { inline bool compound_marker(UNICHAR_ID unichar_id) { const GenericVector& normed_ids = getUnicharset().normed_ids(unichar_id); - return normed_ids.size() == 1 && - (normed_ids[0] == hyphen_unichar_id_ || - normed_ids[0] == slash_unichar_id_); + return normed_ids.size() == 1 && (normed_ids[0] == hyphen_unichar_id_ || + normed_ids[0] == slash_unichar_id_); } // Returns true if unichar_id is an apostrophe-like character that may // separate prefix/suffix words from a main body word. @@ -123,9 +114,7 @@ class Dict { /* hyphen.cpp ************************************************************/ /// Returns true if we've recorded the beginning of a hyphenated word. - inline bool hyphenated() const { return - !last_word_on_line_ && hyphen_word_; - } + inline bool hyphenated() const { return !last_word_on_line_ && hyphen_word_; } /// Size of the base word (the part on the line before) of a hyphenated word. inline int hyphen_base_size() const { return this->hyphenated() ? hyphen_word_->length() : 0; @@ -133,7 +122,7 @@ class Dict { /// If this word is hyphenated copy the base word (the part on /// the line before) of a hyphenated word into the given word. /// This function assumes that word is not nullptr. - inline void copy_hyphen_info(WERD_CHOICE *word) const { + inline void copy_hyphen_info(WERD_CHOICE* word) const { if (this->hyphenated()) { *word = *hyphen_word_; if (hyphen_debug_level) word->print("copy_hyphen_info: "); @@ -141,14 +130,13 @@ class Dict { } /// Check whether the word has a hyphen at the end. inline bool has_hyphen_end(UNICHAR_ID unichar_id, bool first_pos) const { - if (!last_word_on_line_ || first_pos) - return false; + if (!last_word_on_line_ || first_pos) return false; const GenericVector& normed_ids = getUnicharset().normed_ids(unichar_id); return normed_ids.size() == 1 && normed_ids[0] == hyphen_unichar_id_; } /// Same as above, but check the unichar at the end of the word. - inline bool has_hyphen_end(const WERD_CHOICE &word) const { + inline bool has_hyphen_end(const WERD_CHOICE& word) const { int word_index = word.length() - 1; return has_hyphen_end(word.unichar_id(word_index), word_index == 0); } @@ -158,8 +146,8 @@ class Dict { void reset_hyphen_vars(bool last_word_on_line); /// Update hyphen_word_, and copy the given DawgPositionVectors into /// hyphen_active_dawgs_ . - void set_hyphen_word(const WERD_CHOICE &word, - const DawgPositionVector &active_dawgs); + void set_hyphen_word(const WERD_CHOICE& word, + const DawgPositionVector& active_dawgs); /* permdawg.cpp ************************************************************/ // Note: Functions in permdawg.cpp are only used by NoDangerousAmbig(). @@ -167,8 +155,8 @@ class Dict { /// Copies word into best_choice if its rating is smaller /// than that of best_choice. - inline void update_best_choice(const WERD_CHOICE &word, - WERD_CHOICE *best_choice) { + inline void update_best_choice(const WERD_CHOICE& word, + WERD_CHOICE* best_choice) { if (word.rating() < best_choice->rating()) { *best_choice = word; } @@ -176,78 +164,62 @@ class Dict { /// Fill the given active_dawgs vector with dawgs that could contain the /// beginning of the word. If hyphenated() returns true, copy the entries /// from hyphen_active_dawgs_ instead. - void init_active_dawgs(DawgPositionVector *active_dawgs, + void init_active_dawgs(DawgPositionVector* active_dawgs, bool ambigs_mode) const; // Fill the given vector with the default collection of any-length dawgs - void default_dawgs(DawgPositionVector *anylength_dawgs, - bool suppress_patterns) const; - + void default_dawgs(DawgPositionVector* anylength_dawgs, + bool suppress_patterns) const; /// Recursively explore all the possible character combinations in /// the given char_choices. Use go_deeper_dawg_fxn() to explore all the /// dawgs in the dawgs_ vector in parallel and discard invalid words. /// /// Allocate and return a WERD_CHOICE with the best valid word found. - WERD_CHOICE *dawg_permute_and_select( - const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit); + WERD_CHOICE* dawg_permute_and_select( + const BLOB_CHOICE_LIST_VECTOR& char_choices, float rating_limit); /// If the choice being composed so far could be a dictionary word /// and we have not reached the end of the word keep exploring the /// char_choices further. void go_deeper_dawg_fxn( - const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, float certainties[], - float *limit, WERD_CHOICE *best_choice, int *attempts_left, - void *void_more_args); + const char* debug, const BLOB_CHOICE_LIST_VECTOR& char_choices, + int char_choice_index, const CHAR_FRAGMENT_INFO* prev_char_frag_info, + bool word_ending, WERD_CHOICE* word, float certainties[], float* limit, + WERD_CHOICE* best_choice, int* attempts_left, void* void_more_args); /// Pointer to go_deeper function. - void (Dict::*go_deeper_fxn_)(const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, - float certainties[], float *limit, - WERD_CHOICE *best_choice, int *attempts_left, - void *void_more_args); + void (Dict::*go_deeper_fxn_)( + const char* debug, const BLOB_CHOICE_LIST_VECTOR& char_choices, + int char_choice_index, const CHAR_FRAGMENT_INFO* prev_char_frag_info, + bool word_ending, WERD_CHOICE* word, float certainties[], float* limit, + WERD_CHOICE* best_choice, int* attempts_left, void* void_more_args); // // Helper functions for dawg_permute_and_select(). // - void permute_choices( - const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - WERD_CHOICE *word, - float certainties[], - float *limit, - WERD_CHOICE *best_choice, - int *attempts_left, - void *more_args); - - void append_choices( - const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - const BLOB_CHOICE &blob_choice, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - WERD_CHOICE *word, - float certainties[], - float *limit, - WERD_CHOICE *best_choice, - int *attempts_left, - void *more_args); - - bool fragment_state_okay(UNICHAR_ID curr_unichar_id, - float curr_rating, float curr_certainty, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - const char *debug, int word_ending, - CHAR_FRAGMENT_INFO *char_frag_info); + void permute_choices(const char* debug, + const BLOB_CHOICE_LIST_VECTOR& char_choices, + int char_choice_index, + const CHAR_FRAGMENT_INFO* prev_char_frag_info, + WERD_CHOICE* word, float certainties[], float* limit, + WERD_CHOICE* best_choice, int* attempts_left, + void* more_args); + + void append_choices(const char* debug, + const BLOB_CHOICE_LIST_VECTOR& char_choices, + const BLOB_CHOICE& blob_choice, int char_choice_index, + const CHAR_FRAGMENT_INFO* prev_char_frag_info, + WERD_CHOICE* word, float certainties[], float* limit, + WERD_CHOICE* best_choice, int* attempts_left, + void* more_args); + + bool fragment_state_okay(UNICHAR_ID curr_unichar_id, float curr_rating, + float curr_certainty, + const CHAR_FRAGMENT_INFO* prev_char_frag_info, + const char* debug, int word_ending, + CHAR_FRAGMENT_INFO* char_frag_info); /* stopper.cpp *************************************************************/ - bool NoDangerousAmbig(WERD_CHOICE *BestChoice, - DANGERR *fixpt, - bool fix_replaceable, - MATRIX* ratings); + bool NoDangerousAmbig(WERD_CHOICE* BestChoice, DANGERR* fixpt, + bool fix_replaceable, MATRIX* ratings); // Replaces the corresponding wrong ngram in werd_choice with the correct // one. The whole correct n-gram is inserted into the ratings matrix and // the werd_choice: no more fragments!. Rating and certainty of new entries @@ -256,11 +228,11 @@ class Dict { // E.g. for werd_choice mystring'' and ambiguity ''->": werd_choice becomes // mystring", with a new entry in the ratings matrix for ". void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, - UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, - MATRIX *ratings); + UNICHAR_ID correct_ngram_id, WERD_CHOICE* werd_choice, + MATRIX* ratings); /// Returns the length of the shortest alpha run in WordChoice. - int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const; + int LengthOfShortestAlphaRun(const WERD_CHOICE& WordChoice) const; /// Returns true if the certainty of the BestChoice word is within a /// reasonable range of the average certainties for the best choices for /// each character in the segmentation. This test is used to catch words @@ -275,7 +247,7 @@ class Dict { /// Returns false if the best choice for the current word is questionable /// and should be tried again on the second pass or should be flagged to /// the user. - bool AcceptableResult(WERD_RES *word) const; + bool AcceptableResult(WERD_RES* word) const; void EndDangerousAmbigs(); /// Prints the current choices for this word to stdout. void DebugWordChoices(); @@ -285,22 +257,22 @@ class Dict { void SettupStopperPass2(); /* context.cpp *************************************************************/ /// Check a string to see if it matches a set of lexical rules. - int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const; + int case_ok(const WERD_CHOICE& word, const UNICHARSET& unicharset) const; /// Returns true if the word looks like an absolute garbage /// (e.g. image mistakenly recognized as text). - bool absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharset); + bool absolute_garbage(const WERD_CHOICE& word, const UNICHARSET& unicharset); /* dict.cpp ****************************************************************/ /// Initialize Dict class - load dawgs from [lang].traineddata and /// user-specified wordlist and parttern list. - static DawgCache *GlobalDawgCache(); + static DawgCache* GlobalDawgCache(); // Sets up ready for a Load or LoadLSTM. - void SetupForLoad(DawgCache *dawg_cache); + void SetupForLoad(DawgCache* dawg_cache); // Loads the dawgs needed by Tesseract. Call FinishLoad() after. - void Load(const STRING &lang, TessdataManager *data_file); + void Load(const STRING& lang, TessdataManager* data_file); // Loads the dawgs needed by the LSTM model. Call FinishLoad() after. - void LoadLSTM(const STRING &lang, TessdataManager *data_file); + void LoadLSTM(const STRING& lang, TessdataManager* data_file); // Completes the loading process after Load() and/or LoadLSTM(). // Returns false if no dictionaries were loaded. bool FinishLoad(); @@ -308,10 +280,8 @@ class Dict { // Resets the document dictionary analogous to ResetAdaptiveClassifier. void ResetDocumentDictionary() { - if (pending_words_ != nullptr) - pending_words_->clear(); - if (document_words_ != nullptr) - document_words_->clear(); + if (pending_words_ != nullptr) pending_words_->clear(); + if (document_words_ != nullptr) document_words_->clear(); } /** @@ -350,39 +320,34 @@ class Dict { */ // - int def_letter_is_okay(void* void_dawg_args, - UNICHAR_ID unichar_id, bool word_end) const; + int def_letter_is_okay(void* void_dawg_args, UNICHAR_ID unichar_id, + bool word_end) const; - int (Dict::*letter_is_okay_)(void* void_dawg_args, - UNICHAR_ID unichar_id, bool word_end) const; + int (Dict::*letter_is_okay_)(void* void_dawg_args, UNICHAR_ID unichar_id, + bool word_end) const; /// Calls letter_is_okay_ member function. - int LetterIsOkay(void* void_dawg_args, - UNICHAR_ID unichar_id, bool word_end) const { + int LetterIsOkay(void* void_dawg_args, UNICHAR_ID unichar_id, + bool word_end) const { return (this->*letter_is_okay_)(void_dawg_args, unichar_id, word_end); } - /// Probability in context function used by the ngram permuter. - double (Dict::*probability_in_context_)(const char* lang, - const char* context, + double (Dict::*probability_in_context_)(const char* lang, const char* context, int context_bytes, const char* character, int character_bytes); /// Calls probability_in_context_ member function. - double ProbabilityInContext(const char* context, - int context_bytes, - const char* character, - int character_bytes) { - return (this->*probability_in_context_)( - getCCUtil()->lang.string(), - context, context_bytes, - character, character_bytes); + double ProbabilityInContext(const char* context, int context_bytes, + const char* character, int character_bytes) { + return (this->*probability_in_context_)(getCCUtil()->lang.string(), context, + context_bytes, character, + character_bytes); } /// Default (no-op) implementation of probability in context function. - double def_probability_in_context( - const char* lang, const char* context, int context_bytes, - const char* character, int character_bytes) { + double def_probability_in_context(const char* lang, const char* context, + int context_bytes, const char* character, + int character_bytes) { (void)lang; (void)context; (void)context_bytes; @@ -390,20 +355,17 @@ class Dict { (void)character_bytes; return 0.0; } - double ngram_probability_in_context(const char* lang, - const char* context, - int context_bytes, - const char* character, + double ngram_probability_in_context(const char* lang, const char* context, + int context_bytes, const char* character, int character_bytes); // Interface with params model. - float (Dict::*params_model_classify_)(const char *lang, void *path); - float ParamsModelClassify(const char *lang, void *path); + float (Dict::*params_model_classify_)(const char* lang, void* path); + float ParamsModelClassify(const char* lang, void* path); // Call params_model_classify_ member function. - float CallParamsModelClassify(void *path) { + float CallParamsModelClassify(void* path) { ASSERT_HOST(params_model_classify_ != nullptr); // ASSERT_HOST -> assert - return (this->*params_model_classify_)( - getCCUtil()->lang.string(), path); + return (this->*params_model_classify_)(getCCUtil()->lang.string(), path); } inline void SetWildcardID(UNICHAR_ID id) { wildcard_unichar_id_ = id; } @@ -411,13 +373,13 @@ class Dict { /// Return the number of dawgs in the dawgs_ vector. inline int NumDawgs() const { return dawgs_.size(); } /// Return i-th dawg pointer recorded in the dawgs_ vector. - inline const Dawg *GetDawg(int index) const { return dawgs_[index]; } + inline const Dawg* GetDawg(int index) const { return dawgs_[index]; } /// Return the points to the punctuation dawg. - inline const Dawg *GetPuncDawg() const { return punc_dawg_; } + inline const Dawg* GetPuncDawg() const { return punc_dawg_; } /// Return the points to the unambiguous words dawg. - inline const Dawg *GetUnambigDawg() const { return unambig_dawg_; } + inline const Dawg* GetUnambigDawg() const { return unambig_dawg_; } /// Returns the appropriate next node given the EDGE_REF. - static inline NODE_REF GetStartingNode(const Dawg *dawg, EDGE_REF edge_ref) { + static inline NODE_REF GetStartingNode(const Dawg* dawg, EDGE_REF edge_ref) { if (edge_ref == NO_EDGE) return 0; // beginning to explore the dawg NODE_REF node = dawg->next_node(edge_ref); if (node == 0) node = NO_EDGE; // end of word @@ -427,7 +389,7 @@ class Dict { // Given a unichar from a string and a given dawg, return the unichar // we should use to match in that dawg type. (for example, in the number // dawg, all numbers are transformed to kPatternUnicharId). - inline UNICHAR_ID char_for_dawg(UNICHAR_ID ch, const Dawg *dawg) const { + inline UNICHAR_ID char_for_dawg(UNICHAR_ID ch, const Dawg* dawg) const { if (!dawg) return ch; switch (dawg->type()) { case DAWG_TYPE_NUMBER: @@ -442,10 +404,10 @@ class Dict { /// in the given dawg and (after checking that it is valid) records it in /// dawg_args->updated_ative_dawgs. Updates current_permuter if any valid /// edges were found. - void ProcessPatternEdges(const Dawg *dawg, const DawgPosition &info, + void ProcessPatternEdges(const Dawg* dawg, const DawgPosition& info, UNICHAR_ID unichar_id, bool word_end, - DawgArgs *dawg_args, - PermuterType *current_permuter) const; + DawgArgs* dawg_args, + PermuterType* current_permuter) const; /// Read/Write/Access special purpose dawgs which contain words /// only of a certain length (used for phrase search for @@ -458,35 +420,33 @@ class Dict { perm == USER_PATTERN_PERM || perm == COMPOUND_PERM || (numbers_ok && perm == NUMBER_PERM)); } - int valid_word(const WERD_CHOICE &word, bool numbers_ok) const; - int valid_word(const WERD_CHOICE &word) const { + int valid_word(const WERD_CHOICE& word, bool numbers_ok) const; + int valid_word(const WERD_CHOICE& word) const { return valid_word(word, false); // return NO_PERM for words with digits } - int valid_word_or_number(const WERD_CHOICE &word) const { + int valid_word_or_number(const WERD_CHOICE& word) const { return valid_word(word, true); // return NUMBER_PERM for valid numbers } /// This function is used by api/tesseract_cube_combiner.cpp - int valid_word(const char *string) const { + int valid_word(const char* string) const { WERD_CHOICE word(string, getUnicharset()); return valid_word(word); } // Do the two WERD_CHOICEs form a meaningful bigram? - bool valid_bigram(const WERD_CHOICE &word1, const WERD_CHOICE &word2) const; + bool valid_bigram(const WERD_CHOICE& word1, const WERD_CHOICE& word2) const; /// Returns true if the word contains a valid punctuation pattern. /// Note: Since the domains of punctuation symbols and symblos /// used in numbers are not disjoint, a valid number might contain /// an invalid punctuation pattern (e.g. .99). - bool valid_punctuation(const WERD_CHOICE &word); + bool valid_punctuation(const WERD_CHOICE& word); /// Returns true if a good answer is found for the unknown blob rating. - int good_choice(const WERD_CHOICE &choice); + int good_choice(const WERD_CHOICE& choice); /// Adds a word found on this document to the document specific dictionary. - void add_document_word(const WERD_CHOICE &best_choice); + void add_document_word(const WERD_CHOICE& best_choice); /// Adjusts the rating of the given word. - void adjust_word(WERD_CHOICE *word, - bool nonword, XHeightConsistencyEnum xheight_consistency, - float additional_adjust, - bool modify_rating, - bool debug); + void adjust_word(WERD_CHOICE* word, bool nonword, + XHeightConsistencyEnum xheight_consistency, + float additional_adjust, bool modify_rating, bool debug); /// Set wordseg_rating_adjust_factor_ to the given value. inline void SetWordsegRatingAdjustFactor(float f) { wordseg_rating_adjust_factor_ = f; @@ -503,9 +463,9 @@ class Dict { * Each entry i in the table stores a set of amibiguities whose * wrong ngram starts with unichar id i. */ - UnicharAmbigs *dang_ambigs_table_; + UnicharAmbigs* dang_ambigs_table_; /** Same as above, but for ambiguities with replace flag set. */ - UnicharAmbigs *replace_ambigs_table_; + UnicharAmbigs* replace_ambigs_table_; /** Additional certainty padding allowed before a word is rejected. */ FLOAT32 reject_offset_; // Cached UNICHAR_IDs: @@ -515,21 +475,21 @@ class Dict { UNICHAR_ID slash_unichar_id_; // kSlashSymbol. UNICHAR_ID hyphen_unichar_id_; // kHyphenSymbol. // Hyphen-related variables. - WERD_CHOICE *hyphen_word_; + WERD_CHOICE* hyphen_word_; DawgPositionVector hyphen_active_dawgs_; bool last_word_on_line_; // List of lists of "equivalent" UNICHAR_IDs for the purposes of dictionary // matching. The first member of each list is taken as canonical. For // example, the first list contains hyphens and dashes with the first symbol // being the ASCII hyphen minus. - GenericVector > equivalent_symbols_; + GenericVector> equivalent_symbols_; // Dawg Cache reference - this is who we ask to allocate/deallocate dawgs. - DawgCache *dawg_cache_; + DawgCache* dawg_cache_; bool dawg_cache_is_ours_; // we should delete our own dawg_cache_ // Dawgs. DawgVector dawgs_; SuccessorListsVector successors_; - Trie *pending_words_; + Trie* pending_words_; /// The following pointers are only cached for convenience. /// The dawgs will be deleted when dawgs_ vector is destroyed. // bigram_dawg_ points to a dawg of two-word bigrams which always supercede if @@ -537,18 +497,18 @@ class Dict { // the bigrams are stored as space-separated words where: // (1) leading and trailing punctuation has been removed from each word and // (2) any digits have been replaced with '?' marks. - Dawg *bigram_dawg_; + Dawg* bigram_dawg_; // TODO(daria): need to support multiple languages in the future, // so maybe will need to maintain a list of dawgs of each kind. - Dawg *freq_dawg_; - Dawg *unambig_dawg_; - Dawg *punc_dawg_; - Trie *document_words_; + Dawg* freq_dawg_; + Dawg* unambig_dawg_; + Dawg* punc_dawg_; + Trie* document_words_; /// Current segmentation cost adjust factor for word rating. /// See comments in incorporate_segcost. float wordseg_rating_adjust_factor_; // File for recording ambiguities discovered during dictionary search. - FILE *output_ambig_words_file_; + FILE* output_ambig_words_file_; public: /// Variable members. @@ -557,18 +517,15 @@ class Dict { STRING_VAR_H(user_words_file, "", "A filename of user-provided words."); STRING_VAR_H(user_words_suffix, "", "A suffix of user-provided words located in tessdata."); - STRING_VAR_H(user_patterns_file, "", - "A filename of user-provided patterns."); + STRING_VAR_H(user_patterns_file, "", "A filename of user-provided patterns."); STRING_VAR_H(user_patterns_suffix, "", "A suffix of user-provided patterns located in tessdata."); BOOL_VAR_H(load_system_dawg, true, "Load system word dawg."); BOOL_VAR_H(load_freq_dawg, true, "Load frequent word dawg."); BOOL_VAR_H(load_unambig_dawg, true, "Load unambiguous word dawg."); - BOOL_VAR_H(load_punc_dawg, true, - "Load dawg with punctuation patterns."); + BOOL_VAR_H(load_punc_dawg, true, "Load dawg with punctuation patterns."); BOOL_VAR_H(load_number_dawg, true, "Load dawg with number patterns."); - BOOL_VAR_H(load_bigram_dawg, true, - "Load dawg with special word bigrams."); + BOOL_VAR_H(load_bigram_dawg, true, "Load dawg with special word bigrams."); double_VAR_H(xheight_penalty_subscripts, 0.125, "Score penalty (0.1 = 10%) added if there are subscripts " "or superscripts in a word, but it is otherwise OK."); @@ -597,7 +554,8 @@ class Dict { " better)."); STRING_VAR_H(output_ambig_words_file, "", "Output file for ambiguities found in the dictionary"); - INT_VAR_H(dawg_debug_level, 0, "Set to 1 for general debug info" + INT_VAR_H(dawg_debug_level, 0, + "Set to 1 for general debug info" ", to 2 for more details, to 3 to see all the debug messages"); INT_VAR_H(hyphen_debug_level, 0, "Debug level for hyphenated words."); INT_VAR_H(max_viterbi_list_size, 10, "Maximum size of viterbi list."); @@ -620,7 +578,8 @@ class Dict { "Make AcceptableChoice() always return false. Useful" " when there is a need to explore all segmentations"); INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list"); - STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information" + STRING_VAR_H(word_to_debug, "", + "Word for which stopper debug information" " should be printed to stdout"); STRING_VAR_H(word_to_debug_lengths, "", "Lengths of unichars in word_to_debug"); @@ -632,13 +591,15 @@ class Dict { BOOL_VAR_H(save_doc_words, 0, "Save Document Words"); double_VAR_H(doc_dict_pending_threshold, 0.0, "Worst certainty for using pending dictionary"); - double_VAR_H(doc_dict_certainty_threshold, -2.25, "Worst certainty" + double_VAR_H(doc_dict_certainty_threshold, -2.25, + "Worst certainty" " for words that can be inserted into the document dictionary"); - INT_VAR_H(max_permuter_attempts, 10000, "Maximum number of different" - " character choices to consider during permutation." - " This limit is especially useful when user patterns" - " are specified, since overly generic patterns can result in" - " dawg search exploring an overly large number of options."); + INT_VAR_H(max_permuter_attempts, 10000, + "Maximum number of different" + " character choices to consider during permutation." + " This limit is especially useful when user patterns" + " are specified, since overly generic patterns can result in" + " dawg search exploring an overly large number of options."); }; } // namespace tesseract diff --git a/src/dict/hyphen.cpp b/src/dict/hyphen.cpp index 829a177f2e..29ea81b59d 100644 --- a/src/dict/hyphen.cpp +++ b/src/dict/hyphen.cpp @@ -46,8 +46,8 @@ void Dict::reset_hyphen_vars(bool last_word_on_line) { // Update hyphen_word_, and copy the given DawgPositionVectors into // hyphen_active_dawgs_. -void Dict::set_hyphen_word(const WERD_CHOICE &word, - const DawgPositionVector &active_dawgs) { +void Dict::set_hyphen_word(const WERD_CHOICE& word, + const DawgPositionVector& active_dawgs) { if (hyphen_word_ == nullptr) { hyphen_word_ = new WERD_CHOICE(word.unicharset()); hyphen_word_->make_bad(); diff --git a/src/dict/matchdefs.h b/src/dict/matchdefs.h index 719b2a59e6..2c724b51be 100644 --- a/src/dict/matchdefs.h +++ b/src/dict/matchdefs.h @@ -15,41 +15,41 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef MATCHDEFS_H -#define MATCHDEFS_H +#ifndef MATCHDEFS_H +#define MATCHDEFS_H /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "host.h" #include +#include "host.h" #include "unichar.h" /* define the maximum number of classes defined for any matcher and the maximum class id for any matcher. This must be changed if more different classes need to be classified */ -#define MAX_NUM_CLASSES INT16_MAX -#define MAX_CLASS_ID (MAX_NUM_CLASSES - 1) +#define MAX_NUM_CLASSES INT16_MAX +#define MAX_CLASS_ID (MAX_NUM_CLASSES - 1) /** a CLASS_ID is the ascii character to be associated with a class */ using CLASS_ID = UNICHAR_ID; -#define NO_CLASS (0) +#define NO_CLASS (0) /** a PROTO_ID is the index of a prototype within it's class. Valid proto id's are 0 to N-1 where N is the number of prototypes that make up the class. */ using PROTO_ID = int16_t; -#define NO_PROTO (-1) +#define NO_PROTO (-1) /** FEATURE_ID is the index of a feature within a character description The feature id ranges from 0 to N-1 where N is the number of features in a character description. */ using FEATURE_ID = uint8_t; -#define NO_FEATURE 255 -#define NOISE_FEATURE 254 -#define MISSING_PROTO 254 -#define MAX_NUM_FEAT 40 -#define MAX_FEATURE_ID 250 +#define NO_FEATURE 255 +#define NOISE_FEATURE 254 +#define MISSING_PROTO 254 +#define MAX_NUM_FEAT 40 +#define MAX_FEATURE_ID 250 /** a RATING is the match rating returned by a classifier. Higher is better. */ @@ -62,14 +62,12 @@ using RATING = FLOAT32; using CERTAINTY = FLOAT32; /** define a data structure to hold a single match result */ -typedef struct -{ +typedef struct { CLASS_ID Class; RATING Rating; CERTAINTY Certainty; } - MATCH_RESULT; /** define a data structure for holding an array of match results */ @@ -91,19 +89,17 @@ typedef MATCH_RESULT SORTED_CLASSES[MAX_CLASS_ID + 1]; #define IsValidProto(Pid) ((Pid) >= 0) #if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s +#define _ARGS(s) s #else -# define _ARGS(s) () +#define _ARGS(s) () #endif /* matchdefs.c */ -int CompareMatchResults -_ARGS ((MATCH_RESULT * Result1, MATCH_RESULT * Result2)); +int CompareMatchResults _ARGS((MATCH_RESULT * Result1, MATCH_RESULT* Result2)); -void PrintMatchResult _ARGS ((FILE * File, MATCH_RESULT * MatchResult)); +void PrintMatchResult _ARGS((FILE * File, MATCH_RESULT* MatchResult)); -void PrintMatchResults -_ARGS ((FILE * File, int N, MATCH_RESULT MatchResults[])); +void PrintMatchResults _ARGS((FILE * File, int N, MATCH_RESULT MatchResults[])); #undef _ARGS diff --git a/src/dict/permdawg.cpp b/src/dict/permdawg.cpp index 7b5f1f6ec2..0fa3f77052 100644 --- a/src/dict/permdawg.cpp +++ b/src/dict/permdawg.cpp @@ -30,12 +30,12 @@ #include "dawg.h" #include "globals.h" #include "ndminx.h" +#include "params.h" #include "stopper.h" #include "tprintf.h" -#include "params.h" -#include #include +#include #include "dict.h" /*---------------------------------------------------------------------- @@ -50,12 +50,12 @@ namespace tesseract { * keep exploring choices. */ void Dict::go_deeper_dawg_fxn( - const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, - WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args) { - DawgArgs *more_args = static_cast(void_more_args); - word_ending = (char_choice_index == char_choices.size()-1); + const char* debug, const BLOB_CHOICE_LIST_VECTOR& char_choices, + int char_choice_index, const CHAR_FRAGMENT_INFO* prev_char_frag_info, + bool word_ending, WERD_CHOICE* word, float certainties[], float* limit, + WERD_CHOICE* best_choice, int* attempts_left, void* void_more_args) { + DawgArgs* more_args = static_cast(void_more_args); + word_ending = (char_choice_index == char_choices.size() - 1); int word_index = word->length() - 1; if (best_choice->rating() < *limit) return; // Look up char in DAWG @@ -72,16 +72,15 @@ void Dict::go_deeper_dawg_fxn( int num_unigrams = 0; word->remove_last_unichar_id(); GenericVector encoding; - const char *ngram_str = getUnicharset().id_to_unichar(orig_uch_id); + const char* ngram_str = getUnicharset().id_to_unichar(orig_uch_id); // Since the string came out of the unicharset, failure is impossible. - ASSERT_HOST(getUnicharset().encode_string(ngram_str, true, &encoding, nullptr, - nullptr)); + ASSERT_HOST(getUnicharset().encode_string(ngram_str, true, &encoding, + nullptr, nullptr)); bool unigrams_ok = true; // Construct DawgArgs that reflect the current state. DawgPositionVector unigram_active_dawgs = *(more_args->active_dawgs); DawgPositionVector unigram_updated_dawgs; - DawgArgs unigram_dawg_args(&unigram_active_dawgs, - &unigram_updated_dawgs, + DawgArgs unigram_dawg_args(&unigram_active_dawgs, &unigram_updated_dawgs, more_args->permuter); // Check unigrams in the ngram with letter_is_okay(). for (int i = 0; unigrams_ok && i < encoding.size(); ++i) { @@ -90,8 +89,7 @@ void Dict::go_deeper_dawg_fxn( ++num_unigrams; word->append_unichar_id(uch_id, 1, 0.0, 0.0); unigrams_ok = (this->*letter_is_okay_)( - &unigram_dawg_args, - word->unichar_id(word_index+num_unigrams-1), + &unigram_dawg_args, word->unichar_id(word_index + num_unigrams - 1), word_ending && i == encoding.size() - 1); (*unigram_dawg_args.active_dawgs) = *(unigram_dawg_args.updated_dawgs); if (dawg_debug_level) { @@ -112,8 +110,9 @@ void Dict::go_deeper_dawg_fxn( // Check which dawgs from the dawgs_ vector contain the word // up to and including the current unichar. - if (checked_unigrams || (this->*letter_is_okay_)( - more_args, word->unichar_id(word_index), word_ending)) { + if (checked_unigrams || + (this->*letter_is_okay_)(more_args, word->unichar_id(word_index), + word_ending)) { // Add a new word choice if (word_ending) { if (dawg_debug_level) { @@ -138,7 +137,7 @@ void Dict::go_deeper_dawg_fxn( word_str += " "; fprintf(output_ambig_words_file_, "%s", word_str.string()); } - WERD_CHOICE *adjusted_word = word; + WERD_CHOICE* adjusted_word = word; adjusted_word->set_permuter(more_args->permuter); update_best_choice(*adjusted_word, best_choice); } else { // search the next letter @@ -155,14 +154,13 @@ void Dict::go_deeper_dawg_fxn( --(more_args->active_dawgs); } } else { - if (dawg_debug_level) { - tprintf("last unichar not OK at index %d in %s\n", - word_index, word->debug_string().string()); + if (dawg_debug_level) { + tprintf("last unichar not OK at index %d in %s\n", word_index, + word->debug_string().string()); } } } - /** * dawg_permute_and_select * @@ -172,14 +170,14 @@ void Dict::go_deeper_dawg_fxn( * * Allocate and return a WERD_CHOICE with the best valid word found. */ -WERD_CHOICE *Dict::dawg_permute_and_select( - const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit) { - WERD_CHOICE *best_choice = new WERD_CHOICE(&getUnicharset()); +WERD_CHOICE* Dict::dawg_permute_and_select( + const BLOB_CHOICE_LIST_VECTOR& char_choices, float rating_limit) { + WERD_CHOICE* best_choice = new WERD_CHOICE(&getUnicharset()); best_choice->make_bad(); best_choice->set_rating(rating_limit); if (char_choices.length() == 0 || char_choices.length() > MAX_WERD_LENGTH) return best_choice; - DawgPositionVector *active_dawgs = + DawgPositionVector* active_dawgs = new DawgPositionVector[char_choices.length() + 1]; init_active_dawgs(&(active_dawgs[0]), true); DawgArgs dawg_args(&(active_dawgs[0]), &(active_dawgs[1]), NO_PERM); @@ -189,8 +187,8 @@ WERD_CHOICE *Dict::dawg_permute_and_select( this->go_deeper_fxn_ = &tesseract::Dict::go_deeper_dawg_fxn; int attempts_left = max_permuter_attempts; permute_choices((dawg_debug_level) ? "permute_dawg_debug" : nullptr, - char_choices, 0, nullptr, &word, certainties, &rating_limit, best_choice, - &attempts_left, &dawg_args); + char_choices, 0, nullptr, &word, certainties, &rating_limit, + best_choice, &attempts_left, &dawg_args); delete[] active_dawgs; return best_choice; } @@ -201,22 +199,19 @@ WERD_CHOICE *Dict::dawg_permute_and_select( * Call append_choices() for each BLOB_CHOICE in BLOB_CHOICE_LIST * with the given char_choice_index in char_choices. */ -void Dict::permute_choices( - const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - WERD_CHOICE *word, - float certainties[], - float *limit, - WERD_CHOICE *best_choice, - int *attempts_left, - void *more_args) { +void Dict::permute_choices(const char* debug, + const BLOB_CHOICE_LIST_VECTOR& char_choices, + int char_choice_index, + const CHAR_FRAGMENT_INFO* prev_char_frag_info, + WERD_CHOICE* word, float certainties[], float* limit, + WERD_CHOICE* best_choice, int* attempts_left, + void* more_args) { if (debug) { - tprintf("%s permute_choices: char_choice_index=%d" - " limit=%g rating=%g, certainty=%g word=%s\n", - debug, char_choice_index, *limit, word->rating(), - word->certainty(), word->debug_string().string()); + tprintf( + "%s permute_choices: char_choice_index=%d" + " limit=%g rating=%g, certainty=%g word=%s\n", + debug, char_choice_index, *limit, word->rating(), word->certainty(), + word->debug_string().string()); } if (char_choice_index < char_choices.length()) { BLOB_CHOICE_IT blob_choice_it; @@ -225,8 +220,8 @@ void Dict::permute_choices( blob_choice_it.forward()) { (*attempts_left)--; append_choices(debug, char_choices, *(blob_choice_it.data()), - char_choice_index, prev_char_frag_info, word, - certainties, limit, best_choice, attempts_left, more_args); + char_choice_index, prev_char_frag_info, word, certainties, + limit, best_choice, attempts_left, more_args); if (*attempts_left <= 0) { if (debug) tprintf("permute_choices(): attempts_left is 0\n"); break; @@ -243,20 +238,15 @@ void Dict::permute_choices( * * This function assumes that Dict::go_deeper_fxn_ is set. */ -void Dict::append_choices( - const char *debug, - const BLOB_CHOICE_LIST_VECTOR &char_choices, - const BLOB_CHOICE &blob_choice, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - WERD_CHOICE *word, - float certainties[], - float *limit, - WERD_CHOICE *best_choice, - int *attempts_left, - void *more_args) { +void Dict::append_choices(const char* debug, + const BLOB_CHOICE_LIST_VECTOR& char_choices, + const BLOB_CHOICE& blob_choice, int char_choice_index, + const CHAR_FRAGMENT_INFO* prev_char_frag_info, + WERD_CHOICE* word, float certainties[], float* limit, + WERD_CHOICE* best_choice, int* attempts_left, + void* more_args) { int word_ending = - (char_choice_index == char_choices.length() - 1) ? true : false; + (char_choice_index == char_choices.length() - 1) ? true : false; // Deal with fragments. CHAR_FRAGMENT_INFO char_frag_info; @@ -267,9 +257,9 @@ void Dict::append_choices( } // Search the next letter if this character is a fragment. if (char_frag_info.unichar_id == INVALID_UNICHAR_ID) { - permute_choices(debug, char_choices, char_choice_index + 1, - &char_frag_info, word, certainties, limit, - best_choice, attempts_left, more_args); + permute_choices(debug, char_choices, char_choice_index + 1, &char_frag_info, + word, certainties, limit, best_choice, attempts_left, + more_args); return; } @@ -319,21 +309,20 @@ void Dict::append_choices( * * @returns false if a non-matching fragment is discovered, true otherwise. */ -bool Dict::fragment_state_okay(UNICHAR_ID curr_unichar_id, - float curr_rating, float curr_certainty, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - const char *debug, int word_ending, - CHAR_FRAGMENT_INFO *char_frag_info) { - const CHAR_FRAGMENT *this_fragment = - getUnicharset().get_fragment(curr_unichar_id); - const CHAR_FRAGMENT *prev_fragment = - prev_char_frag_info != nullptr ? prev_char_frag_info->fragment : nullptr; +bool Dict::fragment_state_okay(UNICHAR_ID curr_unichar_id, float curr_rating, + float curr_certainty, + const CHAR_FRAGMENT_INFO* prev_char_frag_info, + const char* debug, int word_ending, + CHAR_FRAGMENT_INFO* char_frag_info) { + const CHAR_FRAGMENT* this_fragment = + getUnicharset().get_fragment(curr_unichar_id); + const CHAR_FRAGMENT* prev_fragment = + prev_char_frag_info != nullptr ? prev_char_frag_info->fragment : nullptr; // Print debug info for fragments. if (debug && (prev_fragment || this_fragment)) { tprintf("%s check fragments: choice=%s word_ending=%d\n", debug, - getUnicharset().debug_str(curr_unichar_id).string(), - word_ending); + getUnicharset().debug_str(curr_unichar_id).string(), word_ending); if (prev_fragment) { tprintf("prev_fragment %s\n", prev_fragment->to_string().string()); } @@ -361,23 +350,22 @@ bool Dict::fragment_state_okay(UNICHAR_ID curr_unichar_id, } if (this_fragment->is_ending()) { char_frag_info->unichar_id = - getUnicharset().unichar_to_id(this_fragment->get_unichar()); + getUnicharset().unichar_to_id(this_fragment->get_unichar()); char_frag_info->fragment = nullptr; if (debug) { - tprintf("Built character %s from fragments\n", - getUnicharset().debug_str( - char_frag_info->unichar_id).string()); + tprintf( + "Built character %s from fragments\n", + getUnicharset().debug_str(char_frag_info->unichar_id).string()); } } else { if (debug) tprintf("Record fragment continuation\n"); char_frag_info->fragment = this_fragment; } // Update certainty and rating. - char_frag_info->rating = - prev_char_frag_info->rating + curr_rating; + char_frag_info->rating = prev_char_frag_info->rating + curr_rating; char_frag_info->num_fragments = prev_char_frag_info->num_fragments + 1; char_frag_info->certainty = - std::min(curr_certainty, prev_char_frag_info->certainty); + std::min(curr_certainty, prev_char_frag_info->certainty); } else { if (this_fragment->is_beginning()) { if (debug) tprintf("Record fragment beginning\n"); diff --git a/src/dict/stopper.cpp b/src/dict/stopper.cpp index fa83b6d32b..4c2e1f1739 100644 --- a/src/dict/stopper.cpp +++ b/src/dict/stopper.cpp @@ -16,12 +16,11 @@ ** limitations under the License. ******************************************************************************/ -#include -#include #include #include +#include +#include -#include "stopper.h" #include "ambigs.h" #include "ccutil.h" #include "const.h" @@ -34,6 +33,7 @@ #include "params.h" #include "ratngs.h" #include "scanutils.h" +#include "stopper.h" #include "unichar.h" /*---------------------------------------------------------------------------- @@ -56,19 +56,23 @@ bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice, bool is_case_ok = case_ok(best_choice, getUnicharset()); if (stopper_debug_level >= 1) { - const char *xht = "UNKNOWN"; + const char* xht = "UNKNOWN"; switch (xheight_consistency) { - case XH_GOOD: xht = "NORMAL"; break; - case XH_SUBNORMAL: xht = "SUBNORMAL"; break; - case XH_INCONSISTENT: xht = "INCONSISTENT"; break; - default: xht = "UNKNOWN"; + case XH_GOOD: + xht = "NORMAL"; + break; + case XH_SUBNORMAL: + xht = "SUBNORMAL"; + break; + case XH_INCONSISTENT: + xht = "INCONSISTENT"; + break; + default: + xht = "UNKNOWN"; } tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n", - best_choice.unichar_string().string(), - (is_valid_word ? 'y' : 'n'), - (is_case_ok ? 'y' : 'n'), - xht, - best_choice.min_x_height(), + best_choice.unichar_string().string(), (is_valid_word ? 'y' : 'n'), + (is_case_ok ? 'y' : 'n'), xht, best_choice.min_x_height(), best_choice.max_x_height()); } // Do not accept invalid words in PASS1. @@ -76,8 +80,7 @@ bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice, if (is_valid_word && is_case_ok) { WordSize = LengthOfShortestAlphaRun(best_choice); WordSize -= stopper_smallword_size; - if (WordSize < 0) - WordSize = 0; + if (WordSize < 0) WordSize = 0; CertaintyThreshold += WordSize * stopper_certainty_per_char; } @@ -85,24 +88,23 @@ bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice, tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n", best_choice.rating(), best_choice.certainty(), CertaintyThreshold); - if (no_dang_ambigs && - best_choice.certainty() > CertaintyThreshold && + if (no_dang_ambigs && best_choice.certainty() > CertaintyThreshold && xheight_consistency < XH_INCONSISTENT && UniformCertainties(best_choice)) { return true; } else { if (stopper_debug_level >= 1) { - tprintf("AcceptableChoice() returned false" - " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n", - no_dang_ambigs, best_choice.certainty(), - CertaintyThreshold, - UniformCertainties(best_choice)); + tprintf( + "AcceptableChoice() returned false" + " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n", + no_dang_ambigs, best_choice.certainty(), CertaintyThreshold, + UniformCertainties(best_choice)); } return false; } } -bool Dict::AcceptableResult(WERD_RES *word) const { +bool Dict::AcceptableResult(WERD_RES* word) const { if (word->best_choice == nullptr) return false; float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_; int WordSize; @@ -122,8 +124,7 @@ bool Dict::AcceptableResult(WERD_RES *word) const { case_ok(*word->best_choice, getUnicharset())) { WordSize = LengthOfShortestAlphaRun(*word->best_choice); WordSize -= stopper_smallword_size; - if (WordSize < 0) - WordSize = 0; + if (WordSize < 0) WordSize = 0; CertaintyThreshold += WordSize * stopper_certainty_per_char; } @@ -133,20 +134,16 @@ bool Dict::AcceptableResult(WERD_RES *word) const { if (word->best_choice->certainty() > CertaintyThreshold && !stopper_no_acceptable_choices) { - if (stopper_debug_level >= 1) - tprintf("ACCEPTED\n"); + if (stopper_debug_level >= 1) tprintf("ACCEPTED\n"); return true; } else { - if (stopper_debug_level >= 1) - tprintf("REJECTED\n"); + if (stopper_debug_level >= 1) tprintf("REJECTED\n"); return false; } } -bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, - DANGERR *fixpt, - bool fix_replaceable, - MATRIX *ratings) { +bool Dict::NoDangerousAmbig(WERD_CHOICE* best_choice, DANGERR* fixpt, + bool fix_replaceable, MATRIX* ratings) { if (stopper_debug_level > 2) { tprintf("\nRunning NoDangerousAmbig() for %s\n", best_choice->debug_string().string()); @@ -174,19 +171,20 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, // if replacements are made the length of best_choice might change. for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) { bool replace = (fix_replaceable && pass == 0); - const UnicharAmbigsVector &table = replace ? - getUnicharAmbigs().replace_ambigs() : getUnicharAmbigs().dang_ambigs(); + const UnicharAmbigsVector& table = replace + ? getUnicharAmbigs().replace_ambigs() + : getUnicharAmbigs().dang_ambigs(); if (!replace) { // Initialize ambig_blob_choices with lists containing a single // unichar id for the correspoding position in best_choice. // best_choice consisting from only the original letters will // have a rating of 0.0. for (i = 0; i < best_choice->length(); ++i) { - BLOB_CHOICE_LIST *lst = new BLOB_CHOICE_LIST(); + BLOB_CHOICE_LIST* lst = new BLOB_CHOICE_LIST(); BLOB_CHOICE_IT lst_it(lst); // TODO(rays/antonova) Put real xheights and y shifts here. - lst_it.add_to_end(new BLOB_CHOICE(best_choice->unichar_id(i), - 0.0, 0.0, -1, 0, 1, 0, BCC_AMBIG)); + lst_it.add_to_end(new BLOB_CHOICE(best_choice->unichar_id(i), 0.0, 0.0, + -1, 0, 1, 0, BCC_AMBIG)); ambig_blob_choices.push_back(lst); } } @@ -194,8 +192,8 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, int wrong_ngram_index; int next_index; int blob_index = 0; - for (i = 0; i < best_choice->length(); blob_index += best_choice->state(i), - ++i) { + for (i = 0; i < best_choice->length(); + blob_index += best_choice->state(i), ++i) { UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i); if (stopper_debug_level > 2) { tprintf("Looking for %s ngrams starting with %s:\n", @@ -212,10 +210,10 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, } AmbigSpec_IT spec_it(table[curr_unichar_id]); for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) { - const AmbigSpec *ambig_spec = spec_it.data(); - wrong_ngram[wrong_ngram_index+1] = INVALID_UNICHAR_ID; - int compare = UnicharIdArrayUtils::compare(wrong_ngram, - ambig_spec->wrong_ngram); + const AmbigSpec* ambig_spec = spec_it.data(); + wrong_ngram[wrong_ngram_index + 1] = INVALID_UNICHAR_ID; + int compare = + UnicharIdArrayUtils::compare(wrong_ngram, ambig_spec->wrong_ngram); if (stopper_debug_level > 2) { tprintf("candidate ngram: "); UnicharIdArrayUtils::print(wrong_ngram, getUnicharset()); @@ -234,29 +232,27 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, if (stopper_debug_level > 1) { tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index, blob_index + num_wrong_blobs, false, - getUnicharset().get_isngram( - ambig_spec->correct_ngram_id), + getUnicharset().get_isngram(ambig_spec->correct_ngram_id), getUnicharset().id_to_unichar(leftmost_id)); } } if (replace) { if (stopper_debug_level > 2) { - tprintf("replace ambiguity with %s : ", - getUnicharset().id_to_unichar( - ambig_spec->correct_ngram_id)); - UnicharIdArrayUtils::print( - ambig_spec->correct_fragments, getUnicharset()); + tprintf( + "replace ambiguity with %s : ", + getUnicharset().id_to_unichar(ambig_spec->correct_ngram_id)); + UnicharIdArrayUtils::print(ambig_spec->correct_fragments, + getUnicharset()); } ReplaceAmbig(i, ambig_spec->wrong_ngram_size, - ambig_spec->correct_ngram_id, - best_choice, ratings); + ambig_spec->correct_ngram_id, best_choice, ratings); } else if (i > 0 || ambig_spec->type != CASE_AMBIG) { // We found dang ambig - update ambig_blob_choices. if (stopper_debug_level > 2) { tprintf("found ambiguity: "); - UnicharIdArrayUtils::print( - ambig_spec->correct_fragments, getUnicharset()); + UnicharIdArrayUtils::print(ambig_spec->correct_fragments, + getUnicharset()); } ambigs_found = true; for (int tmp_index = 0; tmp_index <= wrong_ngram_index; @@ -268,20 +264,21 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, // word not consisting of only the original letters a better // choice and stops searching for alternatives once such a // choice is found. - BLOB_CHOICE_IT bc_it(ambig_blob_choices[i+tmp_index]); - bc_it.add_to_end(new BLOB_CHOICE( - ambig_spec->correct_fragments[tmp_index], -1.0, 0.0, - -1, 0, 1, 0, BCC_AMBIG)); + BLOB_CHOICE_IT bc_it(ambig_blob_choices[i + tmp_index]); + bc_it.add_to_end( + new BLOB_CHOICE(ambig_spec->correct_fragments[tmp_index], + -1.0, 0.0, -1, 0, 1, 0, BCC_AMBIG)); } } spec_it.forward(); } else if (compare == -1) { - if (wrong_ngram_index+1 < ambig_spec->wrong_ngram_size && - ((next_index = wrong_ngram_index+1+i) < best_choice->length())) { + if (wrong_ngram_index + 1 < ambig_spec->wrong_ngram_size && + ((next_index = wrong_ngram_index + 1 + i) < + best_choice->length())) { // Add the next unichar id to wrong_ngram and keep looking for // more ambigs starting with curr_unichar_id in AMBIG_SPEC_LIST. wrong_ngram[++wrong_ngram_index] = - best_choice->unichar_id(next_index); + best_choice->unichar_id(next_index); num_wrong_blobs += best_choice->state(next_index); } else { break; // no more matching ambigs in this AMBIG_SPEC_LIST @@ -290,8 +287,8 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, spec_it.forward(); } } // end searching AmbigSpec_LIST - } // end searching best_choice - } // end searching replace and dangerous ambigs + } // end searching best_choice + } // end searching replace and dangerous ambigs // If any ambiguities were found permute the constructed ambig_blob_choices // to see if an alternative dictionary word can be found. @@ -303,12 +300,12 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, tprintf("\n"); } } - WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0); + WERD_CHOICE* alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0); ambigs_found = (alt_word->rating() < 0.0); if (ambigs_found) { if (stopper_debug_level >= 1) { - tprintf ("Stopper: Possible ambiguous word = %s\n", - alt_word->debug_string().string()); + tprintf("Stopper: Possible ambiguous word = %s\n", + alt_word->debug_string().string()); } if (fixpt != nullptr) { // Note: Currently character choices combined from fragments can only @@ -317,13 +314,13 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, // fragments is added to other functions. int orig_i = 0; for (i = 0; i < alt_word->length(); ++i) { - const UNICHARSET &uchset = getUnicharset(); + const UNICHARSET& uchset = getUnicharset(); bool replacement_is_ngram = uchset.get_isngram(alt_word->unichar_id(i)); UNICHAR_ID leftmost_id = alt_word->unichar_id(i); if (replacement_is_ngram) { // we have to extract the leftmost unichar from the ngram. - const char *str = uchset.id_to_unichar(leftmost_id); + const char* str = uchset.id_to_unichar(leftmost_id); int step = uchset.step(str); if (step) leftmost_id = uchset.unichar_to_id(str, step); } @@ -361,17 +358,15 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, void Dict::EndDangerousAmbigs() {} -void Dict::SettupStopperPass1() { - reject_offset_ = 0.0; -} +void Dict::SettupStopperPass1() { reject_offset_ = 0.0; } void Dict::SettupStopperPass2() { reject_offset_ = stopper_phase2_certainty_rejection_offset; } void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, - UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, - MATRIX *ratings) { + UNICHAR_ID correct_ngram_id, WERD_CHOICE* werd_choice, + MATRIX* ratings) { int num_blobs_to_replace = 0; int begin_blob_index = 0; int i; @@ -409,8 +404,7 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices); if (choice != nullptr) { // Already there. Upgrade if new rating better. - if (new_rating < choice->rating()) - choice->set_rating(new_rating); + if (new_rating < choice->rating()) choice->set_rating(new_rating); if (new_certainty < choice->certainty()) choice->set_certainty(new_certainty); // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState. @@ -422,7 +416,7 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, choice->set_certainty(new_certainty); choice->set_classifier(BCC_AMBIG); choice->set_matrix_cell(coord.col, coord.row); - BLOB_CHOICE_IT it (new_choices); + BLOB_CHOICE_IT it(new_choices); it.add_to_end(choice); } // Remove current unichar from werd_choice. On the last iteration @@ -437,13 +431,13 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, } } if (stopper_debug_level >= 1) { - werd_choice->print("ReplaceAmbig() "); - tprintf("Modified blob_choices: "); - print_ratings_list("\n", new_choices, getUnicharset()); + werd_choice->print("ReplaceAmbig() "); + tprintf("Modified blob_choices: "); + print_ratings_list("\n", new_choices, getUnicharset()); } } -int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const { +int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE& WordChoice) const { int shortest = INT32_MAX; int curr_len = 0; for (int w = 0; w < WordChoice.length(); ++w) { @@ -472,16 +466,14 @@ int Dict::UniformCertainties(const WERD_CHOICE& word) { FLOAT32 Mean, StdDev; int word_length = word.length(); - if (word_length < 3) - return true; + if (word_length < 3) return true; TotalCertainty = TotalCertaintySquared = 0.0; for (int i = 0; i < word_length; ++i) { Certainty = word.certainty(i); TotalCertainty += Certainty; TotalCertaintySquared += Certainty * Certainty; - if (Certainty < WorstCertainty) - WorstCertainty = Certainty; + if (Certainty < WorstCertainty) WorstCertainty = Certainty; } // Subtract off worst certainty from statistics. @@ -490,11 +482,10 @@ int Dict::UniformCertainties(const WERD_CHOICE& word) { TotalCertaintySquared -= WorstCertainty * WorstCertainty; Mean = TotalCertainty / word_length; - Variance = ((word_length * TotalCertaintySquared - - TotalCertainty * TotalCertainty) / - (word_length * (word_length - 1))); - if (Variance < 0.0) - Variance = 0.0; + Variance = + ((word_length * TotalCertaintySquared - TotalCertainty * TotalCertainty) / + (word_length * (word_length - 1))); + if (Variance < 0.0) Variance = 0.0; StdDev = sqrt(Variance); CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev; @@ -503,13 +494,14 @@ int Dict::UniformCertainties(const WERD_CHOICE& word) { if (word.certainty() < CertaintyThreshold) { if (stopper_debug_level >= 1) - tprintf("Stopper: Non-uniform certainty = %4.1f" - " (m=%4.1f, s=%4.1f, t=%4.1f)\n", - word.certainty(), Mean, StdDev, CertaintyThreshold); + tprintf( + "Stopper: Non-uniform certainty = %4.1f" + " (m=%4.1f, s=%4.1f, t=%4.1f)\n", + word.certainty(), Mean, StdDev, CertaintyThreshold); return false; } else { return true; } } -} // namespace tesseract +} // namespace tesseract diff --git a/src/dict/stopper.h b/src/dict/stopper.h index 6a4c5cd580..a6d5286e54 100644 --- a/src/dict/stopper.h +++ b/src/dict/stopper.h @@ -15,8 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#ifndef STOPPER_H -#define STOPPER_H +#ifndef STOPPER_H +#define STOPPER_H /*---------------------------------------------------------------------------- Include Files and Type Defines @@ -32,19 +32,21 @@ class WERD_CHOICE; using BLOB_WIDTH = uint8_t; struct DANGERR_INFO { - DANGERR_INFO() : - begin(-1), end(-1), dangerous(false), correct_is_ngram(false), - leftmost(INVALID_UNICHAR_ID) {} - DANGERR_INFO(int b, int e, bool d, bool n, UNICHAR_ID l) : - begin(b), end(e), dangerous(d), correct_is_ngram(n), leftmost(l) {} + DANGERR_INFO() + : begin(-1), + end(-1), + dangerous(false), + correct_is_ngram(false), + leftmost(INVALID_UNICHAR_ID) {} + DANGERR_INFO(int b, int e, bool d, bool n, UNICHAR_ID l) + : begin(b), end(e), dangerous(d), correct_is_ngram(n), leftmost(l) {} int begin; int end; bool dangerous; bool correct_is_ngram; - UNICHAR_ID leftmost; // in the replacement, what's the leftmost character? + UNICHAR_ID leftmost; // in the replacement, what's the leftmost character? }; using DANGERR = GenericVector; - #endif diff --git a/src/dict/trie.cpp b/src/dict/trie.cpp index decce380dc..9ce8814f84 100644 --- a/src/dict/trie.cpp +++ b/src/dict/trie.cpp @@ -41,11 +41,8 @@ const char kDoNotReverse[] = "RRP_DO_NO_REVERSE"; const char kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"; const char kForceReverse[] = "RRP_FORCE_REVERSE"; -const char * const RTLReversePolicyNames[] = { - kDoNotReverse, - kReverseIfHasRTL, - kForceReverse -}; +const char* const RTLReversePolicyNames[] = {kDoNotReverse, kReverseIfHasRTL, + kForceReverse}; const char Trie::kAlphaPatternUnicode[] = "\u2000"; const char Trie::kDigitPatternUnicode[] = "\u2001"; @@ -54,7 +51,7 @@ const char Trie::kPuncPatternUnicode[] = "\u2003"; const char Trie::kLowerPatternUnicode[] = "\u2004"; const char Trie::kUpperPatternUnicode[] = "\u2005"; -const char *Trie::get_reverse_policy_name(RTLReversePolicy reverse_policy) { +const char* Trie::get_reverse_policy_name(RTLReversePolicy reverse_policy) { return RTLReversePolicyNames[reverse_policy]; } @@ -67,9 +64,9 @@ void Trie::clear() { new_dawg_node(); // Need to allocate node 0. } -bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, - int direction, bool word_end, UNICHAR_ID unichar_id, - EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const { +bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, + bool word_end, UNICHAR_ID unichar_id, + EDGE_RECORD** edge_ptr, EDGE_INDEX* edge_index) const { if (debug_level_ == 3) { tprintf("edge_char_of() given node_ref " REFFORMAT " next_node " REFFORMAT " direction %d word_end %d unichar_id %d, exploring node:\n", @@ -80,8 +77,9 @@ bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, } if (node_ref == NO_EDGE) return false; assert(node_ref < nodes_.size()); - EDGE_VECTOR &vec = (direction == FORWARD_EDGE) ? - nodes_[node_ref]->forward_edges : nodes_[node_ref]->backward_edges; + EDGE_VECTOR& vec = (direction == FORWARD_EDGE) + ? nodes_[node_ref]->forward_edges + : nodes_[node_ref]->backward_edges; int vec_size = vec.size(); if (node_ref == 0 && direction == FORWARD_EDGE) { // binary search EDGE_INDEX start = 0; @@ -90,8 +88,8 @@ bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, int compare; while (start <= end) { k = (start + end) >> 1; // (start + end) / 2 - compare = given_greater_than_edge_rec(next_node, word_end, - unichar_id, vec[k]); + compare = + given_greater_than_edge_rec(next_node, word_end, unichar_id, vec[k]); if (compare == 0) { // given == vec[k] *edge_ptr = &(vec[k]); *edge_index = k; @@ -104,7 +102,7 @@ bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, } } else { // linear search for (int i = 0; i < vec_size; ++i) { - EDGE_RECORD &edge_rec = vec[i]; + EDGE_RECORD& edge_rec = vec[i]; if (edge_rec_match(next_node, word_end, unichar_id, next_node_from_edge_rec(edge_rec), end_of_word_from_edge_rec(edge_rec), @@ -121,8 +119,9 @@ bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, bool marker_flag, int direction, bool word_end, UNICHAR_ID unichar_id) { - EDGE_VECTOR *vec = (direction == FORWARD_EDGE) ? - &(nodes_[node1]->forward_edges) : &(nodes_[node1]->backward_edges); + EDGE_VECTOR* vec = (direction == FORWARD_EDGE) + ? &(nodes_[node1]->forward_edges) + : &(nodes_[node1]->backward_edges); int search_index; if (node1 == 0 && direction == FORWARD_EDGE) { search_index = 0; // find the index to make the add sorted @@ -154,11 +153,9 @@ bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, bool marker_flag, return true; } -void Trie::add_word_ending(EDGE_RECORD *edge_ptr, - NODE_REF the_next_node, - bool marker_flag, - UNICHAR_ID unichar_id) { - EDGE_RECORD *back_edge_ptr; +void Trie::add_word_ending(EDGE_RECORD* edge_ptr, NODE_REF the_next_node, + bool marker_flag, UNICHAR_ID unichar_id) { + EDGE_RECORD* back_edge_ptr; EDGE_INDEX back_edge_index; ASSERT_HOST(edge_char_of(the_next_node, NO_EDGE, BACKWARD_EDGE, false, unichar_id, &back_edge_ptr, &back_edge_index)); @@ -171,17 +168,17 @@ void Trie::add_word_ending(EDGE_RECORD *edge_ptr, *edge_ptr |= (WERD_END_FLAG << flag_start_bit_); } -bool Trie::add_word_to_dawg(const WERD_CHOICE &word, - const GenericVector *repetitions) { +bool Trie::add_word_to_dawg(const WERD_CHOICE& word, + const GenericVector* repetitions) { if (word.length() <= 0) return false; // can't add empty words if (repetitions != nullptr) ASSERT_HOST(repetitions->size() == word.length()); // Make sure the word does not contain invalid unchar ids. for (int i = 0; i < word.length(); ++i) { - if (word.unichar_id(i) < 0 || - word.unichar_id(i) >= unicharset_size_) return false; + if (word.unichar_id(i) < 0 || word.unichar_id(i) >= unicharset_size_) + return false; } - EDGE_RECORD *edge_ptr; + EDGE_RECORD* edge_ptr; NODE_REF last_node = 0; NODE_REF the_next_node; bool marker_flag = false; @@ -189,7 +186,7 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, int i; int32_t still_finding_chars = true; int32_t word_end = false; - bool add_failed = false; + bool add_failed = false; bool found; if (debug_level_ > 1) word.print("\nAdding word: "); @@ -233,8 +230,8 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, add_failed = true; break; } - if (!add_new_edge(last_node, the_next_node, - marker_flag, word_end, unichar_id)) { + if (!add_new_edge(last_node, the_next_node, marker_flag, word_end, + unichar_id)) { add_failed = true; break; } @@ -247,12 +244,12 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, marker_flag = (repetitions != nullptr) ? (*repetitions)[i] : false; if (debug_level_ > 1) tprintf("Adding letter %d\n", unichar_id); if (still_finding_chars && - edge_char_of(last_node, NO_EDGE, FORWARD_EDGE, false, - unichar_id, &edge_ptr, &edge_index)) { + edge_char_of(last_node, NO_EDGE, FORWARD_EDGE, false, unichar_id, + &edge_ptr, &edge_index)) { // An extension of this word already exists in the trie, so we // only have to add the ending flags in both directions. - add_word_ending(edge_ptr, next_node_from_edge_rec(*edge_ptr), - marker_flag, unichar_id); + add_word_ending(edge_ptr, next_node_from_edge_rec(*edge_ptr), marker_flag, + unichar_id); } else { // Add a link to node 0. All leaves connect to node 0 so the back links can // be used in reduction to a dawg. This root backward node has one edge @@ -270,21 +267,22 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, } } -NODE_REF Trie::new_dawg_node() { - TRIE_NODE_RECORD *node = new TRIE_NODE_RECORD(); +NODE_REF +Trie::new_dawg_node() { + TRIE_NODE_RECORD* node = new TRIE_NODE_RECORD(); nodes_.push_back(node); return nodes_.length() - 1; } // Sort function to sort words by decreasing order of length. static int sort_strings_by_dec_length(const void* v1, const void* v2) { - const STRING *s1 = static_cast(v1); - const STRING *s2 = static_cast(v2); + const STRING* s1 = static_cast(v1); + const STRING* s2 = static_cast(v2); return s2->length() - s1->length(); } -bool Trie::read_and_add_word_list(const char *filename, - const UNICHARSET &unicharset, +bool Trie::read_and_add_word_list(const char* filename, + const UNICHARSET& unicharset, Trie::RTLReversePolicy reverse_policy) { GenericVector word_list; if (!read_word_list(filename, &word_list)) return false; @@ -292,11 +290,10 @@ bool Trie::read_and_add_word_list(const char *filename, return add_word_list(word_list, unicharset, reverse_policy); } -bool Trie::read_word_list(const char *filename, - GenericVector* words) { - FILE *word_file; +bool Trie::read_word_list(const char* filename, GenericVector* words) { + FILE* word_file; char line_str[CHARS_PER_LINE]; - int word_count = 0; + int word_count = 0; word_file = fopen(filename, "rb"); if (word_file == nullptr) return false; @@ -309,14 +306,13 @@ bool Trie::read_word_list(const char *filename, tprintf("Read %d words so far\n", word_count); words->push_back(word_str); } - if (debug_level_) - tprintf("Read %d words total.\n", word_count); + if (debug_level_) tprintf("Read %d words total.\n", word_count); fclose(word_file); return true; } -bool Trie::add_word_list(const GenericVector &words, - const UNICHARSET &unicharset, +bool Trie::add_word_list(const GenericVector& words, + const UNICHARSET& unicharset, Trie::RTLReversePolicy reverse_policy) { for (int i = 0; i < words.size(); ++i) { WERD_CHOICE word(words[i].string(), unicharset); @@ -339,7 +335,7 @@ bool Trie::add_word_list(const GenericVector &words, return true; } -void Trie::initialize_patterns(UNICHARSET *unicharset) { +void Trie::initialize_patterns(UNICHARSET* unicharset) { unicharset->unichar_insert(kAlphaPatternUnicode); alpha_pattern_ = unicharset->unichar_to_id(kAlphaPatternUnicode); unicharset->unichar_insert(kDigitPatternUnicode); @@ -357,8 +353,8 @@ void Trie::initialize_patterns(UNICHARSET *unicharset) { } void Trie::unichar_id_to_patterns(UNICHAR_ID unichar_id, - const UNICHARSET &unicharset, - GenericVector *vec) const { + const UNICHARSET& unicharset, + GenericVector* vec) const { bool is_alpha = unicharset.get_isalpha(unichar_id); if (is_alpha) { vec->push_back(alpha_pattern_); @@ -378,7 +374,8 @@ void Trie::unichar_id_to_patterns(UNICHAR_ID unichar_id, } } -UNICHAR_ID Trie::character_class_to_pattern(char ch) { +UNICHAR_ID +Trie::character_class_to_pattern(char ch) { if (ch == 'c') { return alpha_pattern_; } else if (ch == 'd') { @@ -396,14 +393,14 @@ UNICHAR_ID Trie::character_class_to_pattern(char ch) { } } -bool Trie::read_pattern_list(const char *filename, - const UNICHARSET &unicharset) { +bool Trie::read_pattern_list(const char* filename, + const UNICHARSET& unicharset) { if (!initialized_patterns_) { tprintf("please call initialize_patterns() before read_pattern_list()\n"); return false; } - FILE *pattern_file = fopen(filename, "rb"); + FILE* pattern_file = fopen(filename, "rb"); if (pattern_file == nullptr) { tprintf("Error opening pattern file %s\n", filename); return false; @@ -417,7 +414,7 @@ bool Trie::read_pattern_list(const char *filename, // Record the number of repetitions of each unichar in the parallel vector. WERD_CHOICE word(&unicharset); GenericVector repetitions_vec; - const char *str_ptr = string; + const char* str_ptr = string; int step = unicharset.step(str_ptr); bool failed = false; while (step > 0) { @@ -428,8 +425,10 @@ bool Trie::read_pattern_list(const char *filename, curr_unichar_id = unicharset.unichar_to_id(str_ptr, step); } else { if (word.length() < kSaneNumConcreteChars) { - tprintf("Please provide at least %d concrete characters at the" - " beginning of the pattern\n", kSaneNumConcreteChars); + tprintf( + "Please provide at least %d concrete characters at the" + " beginning of the pattern\n", + kSaneNumConcreteChars); failed = true; break; } @@ -439,7 +438,7 @@ bool Trie::read_pattern_list(const char *filename, } else { curr_unichar_id = unicharset.unichar_to_id(str_ptr, step); } - if (curr_unichar_id == INVALID_UNICHAR_ID) { + if (curr_unichar_id == INVALID_UNICHAR_ID) { failed = true; break; // failed to parse this pattern } @@ -448,8 +447,8 @@ bool Trie::read_pattern_list(const char *filename, str_ptr += step; step = unicharset.step(str_ptr); // Check if there is a repetition pattern specified after this unichar. - if (step == 1 && *str_ptr == '\\' && *(str_ptr+1) == '*') { - repetitions_vec[repetitions_vec.size()-1] = true; + if (step == 1 && *str_ptr == '\\' && *(str_ptr + 1) == '*') { + repetitions_vec[repetitions_vec.size() - 1] = true; str_ptr += 2; step = unicharset.step(str_ptr); } @@ -480,10 +479,10 @@ bool Trie::read_pattern_list(const char *filename, void Trie::remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id) { - EDGE_RECORD *edge_ptr = nullptr; + EDGE_RECORD* edge_ptr = nullptr; EDGE_INDEX edge_index = 0; - ASSERT_HOST(edge_char_of(node1, node2, direction, word_end, - unichar_id, &edge_ptr, &edge_index)); + ASSERT_HOST(edge_char_of(node1, node2, direction, word_end, unichar_id, + &edge_ptr, &edge_index)); if (debug_level_ > 1) { tprintf("removed edge in nodes_[" REFFORMAT "]: ", node1); print_edge_rec(*edge_ptr); @@ -513,7 +512,7 @@ void Trie::remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, // a suffix is added to an existing word. Adding words by decreasing // length avoids this problem entirely. Words can still be added in // any order, but it is faster to add the longest first. -SquishedDawg *Trie::trie_to_dawg() { +SquishedDawg* Trie::trie_to_dawg() { root_back_freelist_.clear(); // Will be invalided by trie_to_dawg. if (debug_level_ > 2) { print_all("Before reduction:", MAX_NODE_EDGES_DISPLAY); @@ -528,11 +527,11 @@ SquishedDawg *Trie::trie_to_dawg() { } // Build a translation map from node indices in nodes_ vector to // their target indices in EDGE_ARRAY. - NODE_REF *node_ref_map = new NODE_REF[nodes_.size() + 1]; + NODE_REF* node_ref_map = new NODE_REF[nodes_.size() + 1]; int i, j; node_ref_map[0] = 0; for (i = 0; i < nodes_.size(); ++i) { - node_ref_map[i+1] = node_ref_map[i] + nodes_[i]->forward_edges.size(); + node_ref_map[i + 1] = node_ref_map[i] + nodes_[i]->forward_edges.size(); } int num_forward_edges = node_ref_map[i]; @@ -541,10 +540,10 @@ SquishedDawg *Trie::trie_to_dawg() { EDGE_ARRAY edge_array = new EDGE_RECORD[num_forward_edges]; EDGE_ARRAY edge_array_ptr = edge_array; for (i = 0; i < nodes_.size(); ++i) { - TRIE_NODE_RECORD *node_ptr = nodes_[i]; + TRIE_NODE_RECORD* node_ptr = nodes_[i]; int end = node_ptr->forward_edges.size(); for (j = 0; j < end; ++j) { - EDGE_RECORD &edge_rec = node_ptr->forward_edges[j]; + EDGE_RECORD& edge_rec = node_ptr->forward_edges[j]; NODE_REF node_ref = next_node_from_edge_rec(edge_rec); ASSERT_HOST(node_ref < nodes_.size()); UNICHAR_ID unichar_id = unichar_id_from_edge_rec(edge_rec); @@ -556,13 +555,12 @@ SquishedDawg *Trie::trie_to_dawg() { } delete[] node_ref_map; - return new SquishedDawg(edge_array, num_forward_edges, type_, lang_, - perm_, unicharset_size_, debug_level_); + return new SquishedDawg(edge_array, num_forward_edges, type_, lang_, perm_, + unicharset_size_, debug_level_); } -bool Trie::eliminate_redundant_edges(NODE_REF node, - const EDGE_RECORD &edge1, - const EDGE_RECORD &edge2) { +bool Trie::eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD& edge1, + const EDGE_RECORD& edge2) { if (debug_level_ > 1) { tprintf("\nCollapsing node %" PRIi64 ":\n", node); print_node(node, MAX_NODE_EDGES_DISPLAY); @@ -574,15 +572,15 @@ bool Trie::eliminate_redundant_edges(NODE_REF node, } NODE_REF next_node1 = next_node_from_edge_rec(edge1); NODE_REF next_node2 = next_node_from_edge_rec(edge2); - TRIE_NODE_RECORD *next_node2_ptr = nodes_[next_node2]; + TRIE_NODE_RECORD* next_node2_ptr = nodes_[next_node2]; // Translate all edges going to/from next_node2 to go to/from next_node1. - EDGE_RECORD *edge_ptr = nullptr; + EDGE_RECORD* edge_ptr = nullptr; EDGE_INDEX edge_index; int i; // The backward link in node to next_node2 will be zeroed out by the caller. // Copy all the backward links in next_node2 to node next_node1 for (i = 0; i < next_node2_ptr->backward_edges.size(); ++i) { - const EDGE_RECORD &bkw_edge = next_node2_ptr->backward_edges[i]; + const EDGE_RECORD& bkw_edge = next_node2_ptr->backward_edges[i]; NODE_REF curr_next_node = next_node_from_edge_rec(bkw_edge); UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(bkw_edge); int curr_word_end = end_of_word_from_edge_rec(bkw_edge); @@ -591,15 +589,15 @@ bool Trie::eliminate_redundant_edges(NODE_REF node, curr_word_end, curr_unichar_id); // Relocate the corresponding forward edge in curr_next_node ASSERT_HOST(edge_char_of(curr_next_node, next_node2, FORWARD_EDGE, - curr_word_end, curr_unichar_id, - &edge_ptr, &edge_index)); + curr_word_end, curr_unichar_id, &edge_ptr, + &edge_index)); set_next_node_in_edge_rec(edge_ptr, next_node1); } int next_node2_num_edges = (next_node2_ptr->forward_edges.size() + next_node2_ptr->backward_edges.size()); if (debug_level_ > 1) { - tprintf("removed %d edges from node " REFFORMAT "\n", - next_node2_num_edges, next_node2); + tprintf("removed %d edges from node " REFFORMAT "\n", next_node2_num_edges, + next_node2); } next_node2_ptr->forward_edges.clear(); next_node2_ptr->backward_edges.clear(); @@ -607,10 +605,8 @@ bool Trie::eliminate_redundant_edges(NODE_REF node, return true; } -bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, - UNICHAR_ID unichar_id, - NODE_REF node, - EDGE_VECTOR* backward_edges, +bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, + NODE_REF node, EDGE_VECTOR* backward_edges, NODE_MARKER reduced_nodes) { if (debug_level_ > 1) tprintf("reduce_lettered_edges(edge=" REFFORMAT ")\n", edge_index); @@ -628,15 +624,15 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, ++i; } if (i == backward_edges->size()) break; - const EDGE_RECORD &edge_rec = (*backward_edges)[i]; + const EDGE_RECORD& edge_rec = (*backward_edges)[i]; // Compare it to the rest of the edges with the given unichar_id. for (int j = i + 1; j < backward_edges->size(); ++j) { - const EDGE_RECORD &next_edge_rec = (*backward_edges)[j]; + const EDGE_RECORD& next_edge_rec = (*backward_edges)[j]; if (DeadEdge(next_edge_rec)) continue; UNICHAR_ID next_id = unichar_id_from_edge_rec(next_edge_rec); if (next_id != unichar_id) break; if (end_of_word_from_edge_rec(next_edge_rec) == - end_of_word_from_edge_rec(edge_rec) && + end_of_word_from_edge_rec(edge_rec) && can_be_eliminated(next_edge_rec) && eliminate_redundant_edges(node, edge_rec, next_edge_rec)) { reduced_nodes[next_node_from_edge_rec(edge_rec)] = 0; @@ -648,23 +644,21 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, return did_something; } -void Trie::sort_edges(EDGE_VECTOR *edges) { +void Trie::sort_edges(EDGE_VECTOR* edges) { int num_edges = edges->size(); if (num_edges <= 1) return; - GenericVector > sort_vec; + GenericVector> sort_vec; sort_vec.reserve(num_edges); for (int i = 0; i < num_edges; ++i) { sort_vec.push_back(KDPairInc( unichar_id_from_edge_rec((*edges)[i]), (*edges)[i])); } sort_vec.sort(); - for (int i = 0; i < num_edges; ++i) - (*edges)[i] = sort_vec[i].data; + for (int i = 0; i < num_edges; ++i) (*edges)[i] = sort_vec[i].data; } -void Trie::reduce_node_input(NODE_REF node, - NODE_MARKER reduced_nodes) { - EDGE_VECTOR &backward_edges = nodes_[node]->backward_edges; +void Trie::reduce_node_input(NODE_REF node, NODE_MARKER reduced_nodes) { + EDGE_VECTOR& backward_edges = nodes_[node]->backward_edges; sort_edges(&backward_edges); if (debug_level_ > 1) { tprintf("reduce_node_input(node=" REFFORMAT ")\n", node); @@ -675,9 +669,10 @@ void Trie::reduce_node_input(NODE_REF node, while (edge_index < backward_edges.size()) { if (DeadEdge(backward_edges[edge_index])) continue; UNICHAR_ID unichar_id = - unichar_id_from_edge_rec(backward_edges[edge_index]); - while (reduce_lettered_edges(edge_index, unichar_id, node, - &backward_edges, reduced_nodes)); + unichar_id_from_edge_rec(backward_edges[edge_index]); + while (reduce_lettered_edges(edge_index, unichar_id, node, &backward_edges, + reduced_nodes)) + ; while (++edge_index < backward_edges.size()) { UNICHAR_ID id = unichar_id_from_edge_rec(backward_edges[edge_index]); if (!DeadEdge(backward_edges[edge_index]) && id != unichar_id) break; @@ -701,10 +696,10 @@ void Trie::reduce_node_input(NODE_REF node, void Trie::print_node(NODE_REF node, int max_num_edges) const { if (node == NO_EDGE) return; // nothing to print - TRIE_NODE_RECORD *node_ptr = nodes_[node]; + TRIE_NODE_RECORD* node_ptr = nodes_[node]; int num_fwd = node_ptr->forward_edges.size(); int num_bkw = node_ptr->backward_edges.size(); - EDGE_VECTOR *vec; + EDGE_VECTOR* vec; for (int dir = 0; dir < 2; ++dir) { if (dir == 0) { vec = &(node_ptr->forward_edges); @@ -714,8 +709,8 @@ void Trie::print_node(NODE_REF node, int max_num_edges) const { tprintf("\t"); } int i; - for (i = 0; (dir == 0 ? i < num_fwd : i < num_bkw) && - i < max_num_edges; ++i) { + for (i = 0; (dir == 0 ? i < num_fwd : i < num_bkw) && i < max_num_edges; + ++i) { if (DeadEdge((*vec)[i])) continue; print_edge_rec((*vec)[i]); tprintf(" "); diff --git a/src/dict/trie.h b/src/dict/trie.h index 6ae151fae7..6037d89d40 100644 --- a/src/dict/trie.h +++ b/src/dict/trie.h @@ -25,8 +25,8 @@ #ifndef TRIE_H #define TRIE_H -#include "dawg.h" #include "cutil.h" +#include "dawg.h" #include "genericvector.h" class UNICHARSET; @@ -41,15 +41,15 @@ class UNICHARSET; // It might be cleanest to change the types of most of the Trie/Dawg related // typedefs to int and restrict the casts to extracting these values from // the 64 bit EDGE_RECORD. -using EDGE_INDEX = int64_t ; // index of an edge in a given node -using NODE_MARKER = bool *; -using EDGE_VECTOR = GenericVector ; +using EDGE_INDEX = int64_t; // index of an edge in a given node +using NODE_MARKER = bool*; +using EDGE_VECTOR = GenericVector; struct TRIE_NODE_RECORD { EDGE_VECTOR forward_edges; EDGE_VECTOR backward_edges; }; -using TRIE_NODES = GenericVector ; +using TRIE_NODES = GenericVector; namespace tesseract { @@ -79,14 +79,13 @@ class Trie : public Dawg { static const char kLowerPatternUnicode[]; static const char kUpperPatternUnicode[]; - static const char *get_reverse_policy_name( - RTLReversePolicy reverse_policy); + static const char* get_reverse_policy_name(RTLReversePolicy reverse_policy); // max_num_edges argument allows limiting the amount of memory this // Trie can consume (if a new word insert would cause the Trie to // contain more edges than max_num_edges, all the edges are cleared // so that new inserts can proceed). - Trie(DawgType type, const STRING &lang, PermuterType perm, + Trie(DawgType type, const STRING& lang, PermuterType perm, int unicharset_size, int debug_level) : Dawg(type, lang, perm, debug_level) { init(unicharset_size); @@ -103,10 +102,11 @@ class Trie : public Dawg { /** Returns the edge that corresponds to the letter out of this node. */ EDGE_REF edge_char_of(NODE_REF node_ref, UNICHAR_ID unichar_id, bool word_end) const { - EDGE_RECORD *edge_ptr; + EDGE_RECORD* edge_ptr; EDGE_INDEX edge_index; if (!edge_char_of(node_ref, NO_EDGE, FORWARD_EDGE, word_end, unichar_id, - &edge_ptr, &edge_index)) return NO_EDGE; + &edge_ptr, &edge_index)) + return NO_EDGE; return make_edge_ref(node_ref, edge_index); } @@ -114,10 +114,10 @@ class Trie : public Dawg { * Fills the given NodeChildVector with all the unichar ids (and the * corresponding EDGE_REFs) for which there is an edge out of this node. */ - void unichar_ids_of(NODE_REF node, NodeChildVector *vec, + void unichar_ids_of(NODE_REF node, NodeChildVector* vec, bool word_end) const { - const EDGE_VECTOR &forward_edges = - nodes_[static_cast(node)]->forward_edges; + const EDGE_VECTOR& forward_edges = + nodes_[static_cast(node)]->forward_edges; for (int i = 0; i < forward_edges.size(); ++i) { if (!word_end || end_of_word_from_edge_rec(forward_edges[i])) { vec->push_back(NodeChild(unichar_id_from_edge_rec(forward_edges[i]), @@ -167,25 +167,24 @@ class Trie : public Dawg { // Eliminates redundant edges and returns the pointer to the SquishedDawg. // Note: the caller is responsible for deallocating memory associated // with the returned SquishedDawg pointer. - SquishedDawg *trie_to_dawg(); + SquishedDawg* trie_to_dawg(); // Reads a list of words from the given file and adds into the Trie. // Calls WERD_CHOICE::reverse_unichar_ids_if_rtl() according to the reverse // policy and information in the unicharset. // Returns false on error. - bool read_and_add_word_list(const char *filename, - const UNICHARSET &unicharset, + bool read_and_add_word_list(const char* filename, + const UNICHARSET& unicharset, Trie::RTLReversePolicy reverse); // Reads a list of words from the given file. // Returns false on error. - bool read_word_list(const char *filename, - GenericVector* words); + bool read_word_list(const char* filename, GenericVector* words); // Adds a list of words previously read using read_word_list to the trie // using the given unicharset and reverse_policy to convert to unichar-ids. // Returns false on error. - bool add_word_list(const GenericVector &words, - const UNICHARSET &unicharset, + bool add_word_list(const GenericVector& words, + const UNICHARSET& unicharset, Trie::RTLReversePolicy reverse_policy); // Inserts the list of patterns from the given file into the Trie. @@ -229,30 +228,30 @@ class Trie : public Dawg { // Because of potential problems with speed that could be difficult to // identify, each user pattern has to have at least kSaneNumConcreteChars // concrete characters from the unicharset at the beginning. - bool read_pattern_list(const char *filename, const UNICHARSET &unicharset); + bool read_pattern_list(const char* filename, const UNICHARSET& unicharset); // Initializes the values of *_pattern_ unichar ids. // This function should be called before calling read_pattern_list(). - void initialize_patterns(UNICHARSET *unicharset); + void initialize_patterns(UNICHARSET* unicharset); // Fills in the given unichar id vector with the unichar ids that represent // the patterns of the character classes of the given unichar_id. void unichar_id_to_patterns(UNICHAR_ID unichar_id, - const UNICHARSET &unicharset, - GenericVector *vec) const; + const UNICHARSET& unicharset, + GenericVector* vec) const; // Returns the given EDGE_REF if the EDGE_RECORD that it points to has // a self loop and the given unichar_id matches the unichar_id stored in the // EDGE_RECORD, returns NO_EDGE otherwise. - virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, - UNICHAR_ID unichar_id, + virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const { if (edge_ref == NO_EDGE) return NO_EDGE; - EDGE_RECORD *edge_rec = deref_edge_ref(edge_ref); + EDGE_RECORD* edge_rec = deref_edge_ref(edge_ref); return (marker_flag_from_edge_rec(*edge_rec) && unichar_id == unichar_id_from_edge_rec(*edge_rec) && - word_end == end_of_word_from_edge_rec(*edge_rec)) ? - edge_ref : NO_EDGE; + word_end == end_of_word_from_edge_rec(*edge_rec)) + ? edge_ref + : NO_EDGE; } // Adds a word to the Trie (creates the necessary nodes and edges). @@ -264,9 +263,9 @@ class Trie : public Dawg { // // Return true if add succeeded, false otherwise (e.g. when a word contained // an invalid unichar id or the trie was getting too large and was cleared). - bool add_word_to_dawg(const WERD_CHOICE &word, - const GenericVector *repetitions); - bool add_word_to_dawg(const WERD_CHOICE &word) { + bool add_word_to_dawg(const WERD_CHOICE& word, + const GenericVector* repetitions); + bool add_word_to_dawg(const WERD_CHOICE& word) { return add_word_to_dawg(word, nullptr); } @@ -288,22 +287,21 @@ class Trie : public Dawg { // Returns the pointer to EDGE_RECORD after decoding the location // of the edge from the information in the given EDGE_REF. // This function assumes that EDGE_REF holds valid node/edge indices. - inline EDGE_RECORD *deref_edge_ref(EDGE_REF edge_ref) const { - int edge_index = static_cast( - (edge_ref & letter_mask_) >> LETTER_START_BIT); - int node_index = static_cast( - (edge_ref & deref_node_index_mask_) >> flag_start_bit_); - TRIE_NODE_RECORD *node_rec = nodes_[node_index]; + inline EDGE_RECORD* deref_edge_ref(EDGE_REF edge_ref) const { + int edge_index = + static_cast((edge_ref & letter_mask_) >> LETTER_START_BIT); + int node_index = static_cast((edge_ref & deref_node_index_mask_) >> + flag_start_bit_); + TRIE_NODE_RECORD* node_rec = nodes_[node_index]; return &(node_rec->forward_edges[edge_index]); } /** Constructs EDGE_REF from the given node_index and edge_index. */ inline EDGE_REF make_edge_ref(NODE_REF node_index, EDGE_INDEX edge_index) const { - return ((node_index << flag_start_bit_) | - (edge_index << LETTER_START_BIT)); + return ((node_index << flag_start_bit_) | (edge_index << LETTER_START_BIT)); } /** Sets up this edge record to the requested values. */ - inline void link_edge(EDGE_RECORD *edge, NODE_REF nxt, bool repeats, + inline void link_edge(EDGE_RECORD* edge, NODE_REF nxt, bool repeats, int direction, bool word_end, UNICHAR_ID unichar_id) { EDGE_RECORD flags = 0; if (repeats) flags |= MARKER_FLAG; @@ -314,7 +312,7 @@ class Trie : public Dawg { (static_cast(unichar_id) << LETTER_START_BIT)); } /** Prints the given EDGE_RECORD. */ - inline void print_edge_rec(const EDGE_RECORD &edge_rec) const { + inline void print_edge_rec(const EDGE_RECORD& edge_rec) const { tprintf("|" REFFORMAT "|%s%s%s|%d|", next_node_from_edge_rec(edge_rec), marker_flag_from_edge_rec(edge_rec) ? "R," : "", (direction_from_edge_rec(edge_rec) == FORWARD_EDGE) ? "F" : "B", @@ -323,7 +321,7 @@ class Trie : public Dawg { } // Returns true if the next node in recorded the given EDGE_RECORD // has exactly one forward edge. - inline bool can_be_eliminated(const EDGE_RECORD &edge_rec) { + inline bool can_be_eliminated(const EDGE_RECORD& edge_rec) { NODE_REF node_ref = next_node_from_edge_rec(edge_rec); return (node_ref != NO_EDGE && nodes_[static_cast(node_ref)]->forward_edges.size() == 1); @@ -341,31 +339,28 @@ class Trie : public Dawg { // in the node indicated by node_ref. Fills in the pointer to the // EDGE_RECORD and the index of the edge with the the values // corresponding to the edge found. Returns true if an edge was found. - bool edge_char_of(NODE_REF node_ref, NODE_REF next_node, - int direction, bool word_end, UNICHAR_ID unichar_id, - EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const; + bool edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, + bool word_end, UNICHAR_ID unichar_id, + EDGE_RECORD** edge_ptr, EDGE_INDEX* edge_index) const; // Adds an single edge linkage between node1 and node2 in the direction // indicated by direction argument. bool add_edge_linkage(NODE_REF node1, NODE_REF node2, bool repeats, - int direction, bool word_end, - UNICHAR_ID unichar_id); + int direction, bool word_end, UNICHAR_ID unichar_id); // Adds forward edge linkage from node1 to node2 and the corresponding // backward edge linkage in the other direction. - bool add_new_edge(NODE_REF node1, NODE_REF node2, - bool repeats, bool word_end, UNICHAR_ID unichar_id) { - return (add_edge_linkage(node1, node2, repeats, FORWARD_EDGE, - word_end, unichar_id) && - add_edge_linkage(node2, node1, repeats, BACKWARD_EDGE, - word_end, unichar_id)); + bool add_new_edge(NODE_REF node1, NODE_REF node2, bool repeats, bool word_end, + UNICHAR_ID unichar_id) { + return (add_edge_linkage(node1, node2, repeats, FORWARD_EDGE, word_end, + unichar_id) && + add_edge_linkage(node2, node1, repeats, BACKWARD_EDGE, word_end, + unichar_id)); } // Sets the word ending flags in an already existing edge pair. // Returns true on success. - void add_word_ending(EDGE_RECORD *edge, - NODE_REF the_next_node, - bool repeats, + void add_word_ending(EDGE_RECORD* edge, NODE_REF the_next_node, bool repeats, UNICHAR_ID unichar_id); // Allocates space for a new node in the Trie. @@ -378,8 +373,8 @@ class Trie : public Dawg { // Removes forward edge linkage from node1 to node2 and the corresponding // backward edge linkage in the other direction. - void remove_edge(NODE_REF node1, NODE_REF node2, - bool word_end, UNICHAR_ID unichar_id) { + void remove_edge(NODE_REF node1, NODE_REF node2, bool word_end, + UNICHAR_ID unichar_id) { remove_edge_linkage(node1, node2, FORWARD_EDGE, word_end, unichar_id); remove_edge_linkage(node2, node1, BACKWARD_EDGE, word_end, unichar_id); } @@ -387,18 +382,16 @@ class Trie : public Dawg { // Compares edge1 and edge2 in the given node to see if they point to two // next nodes that could be collapsed. If they do, performs the reduction // and returns true. - bool eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1, - const EDGE_RECORD &edge2); + bool eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD& edge1, + const EDGE_RECORD& edge2); // Assuming that edge_index indicates the first edge in a group of edges // in this node with a particular letter value, looks through these edges // to see if any of them can be collapsed. If so does it. Returns to the // caller when all edges with this letter have been reduced. // Returns true if further reduction is possible with this same letter. - bool reduce_lettered_edges(EDGE_INDEX edge_index, - UNICHAR_ID unichar_id, - NODE_REF node, - EDGE_VECTOR* backward_edges, + bool reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, + NODE_REF node, EDGE_VECTOR* backward_edges, NODE_MARKER reduced_nodes); /** @@ -407,7 +400,7 @@ class Trie : public Dawg { * for all edges in a single node, and since number of edges in each node * is usually quite small, selection sort is used. */ - void sort_edges(EDGE_VECTOR *edges); + void sort_edges(EDGE_VECTOR* edges); /** Eliminates any redundant edges from this node in the Trie. */ void reduce_node_input(NODE_REF node, NODE_MARKER reduced_nodes); @@ -416,7 +409,7 @@ class Trie : public Dawg { UNICHAR_ID character_class_to_pattern(char ch); // Member variables - TRIE_NODES nodes_; // vector of nodes in the Trie + TRIE_NODES nodes_; // vector of nodes in the Trie uint64_t num_edges_; // sum of all edges (forward and backward) uint64_t deref_direction_mask_; // mask for EDGE_REF to extract direction uint64_t deref_node_index_mask_; // mask for EDGE_REF to extract node index diff --git a/src/lstm/convolve.cpp b/src/lstm/convolve.cpp index 04d0afeb69..0932cb4aff 100644 --- a/src/lstm/convolve.cpp +++ b/src/lstm/convolve.cpp @@ -26,9 +26,9 @@ namespace tesseract { Convolve::Convolve(const STRING& name, int ni, int half_x, int half_y) - : Network(NT_CONVOLVE, name, ni, ni * (2*half_x + 1) * (2*half_y + 1)), - half_x_(half_x), half_y_(half_y) { -} + : Network(NT_CONVOLVE, name, ni, ni * (2 * half_x + 1) * (2 * half_y + 1)), + half_x_(half_x), + half_y_(half_y) {} // Writes to the given file. Returns false in case of error. bool Convolve::Serialize(TFile* fp) const { @@ -42,7 +42,7 @@ bool Convolve::Serialize(TFile* fp) const { bool Convolve::DeSerialize(TFile* fp) { if (fp->FReadEndian(&half_x_, sizeof(half_x_), 1) != 1) return false; if (fp->FReadEndian(&half_y_, sizeof(half_y_), 1) != 1) return false; - no_ = ni_ * (2*half_x_ + 1) * (2*half_y_ + 1); + no_ = ni_ * (2 * half_x_ + 1) * (2 * half_y_ + 1); return true; } @@ -83,8 +83,7 @@ void Convolve::Forward(bool debug, const NetworkIO& input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool Convolve::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { back_deltas->Resize(fwd_deltas, ni_); NetworkScratch::IO delta_sum; delta_sum.ResizeFloat(fwd_deltas, ni_, scratch); diff --git a/src/lstm/convolve.h b/src/lstm/convolve.h index fcf5ccf029..85ad267426 100644 --- a/src/lstm/convolve.h +++ b/src/lstm/convolve.h @@ -52,14 +52,13 @@ class Convolve : public Network { // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; + const TransposedArray* input_transpose, NetworkScratch* scratch, + NetworkIO* output) override; // Runs backward propagation of errors on the deltas line. // See Network for a detailed discussion of the arguments. bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; + NetworkScratch* scratch, NetworkIO* back_deltas) override; protected: // Serialized data. @@ -69,5 +68,4 @@ class Convolve : public Network { } // namespace tesseract. - #endif // TESSERACT_LSTM_SUBSAMPLE_H_ diff --git a/src/lstm/fullyconnected.cpp b/src/lstm/fullyconnected.cpp index 0c4b6da1a2..1e7b4c1855 100644 --- a/src/lstm/fullyconnected.cpp +++ b/src/lstm/fullyconnected.cpp @@ -38,8 +38,9 @@ namespace tesseract { FullyConnected::FullyConnected(const STRING& name, int ni, int no, NetworkType type) - : Network(type, name, ni, no), external_source_(nullptr), int_mode_(false) { -} + : Network(type, name, ni, no), + external_source_(nullptr), + int_mode_(false) {} // Returns the shape output from the network given an input shape (which may // be partially unknown ie zero). @@ -66,8 +67,7 @@ void FullyConnected::SetEnableTraining(TrainingState state) { // Temp disable only from enabled. if (training_ == TS_ENABLED) training_ = state; } else { - if (state == TS_ENABLED && training_ != TS_ENABLED) - weights_.InitBackward(); + if (state == TS_ENABLED && training_ != TS_ENABLED) weights_.InitBackward(); training_ = state; } } @@ -93,14 +93,10 @@ int FullyConnected::RemapOutputs(int old_no, const std::vector& code_map) { } // Converts a float network to an int network. -void FullyConnected::ConvertToInt() { - weights_.ConvertToInt(); -} +void FullyConnected::ConvertToInt() { weights_.ConvertToInt(); } // Provides debug output on the weights. -void FullyConnected::DebugWeights() { - weights_.Debug2D(name_.string()); -} +void FullyConnected::DebugWeights() { weights_.Debug2D(name_.string()); } // Writes to the given file. Returns false in case of error. bool FullyConnected::Serialize(TFile* fp) const { @@ -200,8 +196,8 @@ void FullyConnected::ForwardTimeStep(int t, double* output_line) { } } -void FullyConnected::ForwardTimeStep(const double* d_input, - int t, double* output_line) { +void FullyConnected::ForwardTimeStep(const double* d_input, int t, + double* output_line) { // input is copied to source_ line-by-line for cache coherency. if (IsTraining() && external_source_ == nullptr) source_t_.WriteStrided(t, d_input); @@ -209,8 +205,8 @@ void FullyConnected::ForwardTimeStep(const double* d_input, ForwardTimeStep(t, output_line); } -void FullyConnected::ForwardTimeStep(const int8_t* i_input, - int t, double* output_line) { +void FullyConnected::ForwardTimeStep(const int8_t* i_input, int t, + double* output_line) { // input is copied to source_ line-by-line for cache coherency. weights_.MatrixDotVector(i_input, output_line); ForwardTimeStep(t, output_line); @@ -219,8 +215,7 @@ void FullyConnected::ForwardTimeStep(const int8_t* i_input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool FullyConnected::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { if (debug) DisplayBackward(fwd_deltas); back_deltas->Resize(fwd_deltas, ni_); GenericVector errors; diff --git a/src/lstm/fullyconnected.h b/src/lstm/fullyconnected.h index 771830e454..1ab7da1b04 100644 --- a/src/lstm/fullyconnected.h +++ b/src/lstm/fullyconnected.h @@ -57,9 +57,7 @@ class FullyConnected : public Network { // Changes the type to the given type. Used to commute a softmax to a // non-output type for adding on other networks. - void ChangeType(NetworkType type) { - type_ = type; - } + void ChangeType(NetworkType type) { type_ = type; } // Suspends/Enables training by setting the training_ flag. Serialize and // DeSerialize only operate on the run-time data if state is false. @@ -131,6 +129,4 @@ class FullyConnected : public Network { } // namespace tesseract. - - #endif // TESSERACT_LSTM_FULLYCONNECTED_H_ diff --git a/src/lstm/input.cpp b/src/lstm/input.cpp index a104198cff..5885774f9c 100644 --- a/src/lstm/input.cpp +++ b/src/lstm/input.cpp @@ -43,22 +43,16 @@ bool Input::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -bool Input::DeSerialize(TFile* fp) { - return shape_.DeSerialize(fp); -} +bool Input::DeSerialize(TFile* fp) { return shape_.DeSerialize(fp); } // Returns an integer reduction factor that the network applies to the // time sequence. Assumes that any 2-d is already eliminated. Used for // scaling bounding boxes of truth data. -int Input::XScaleFactor() const { - return 1; -} +int Input::XScaleFactor() const { return 1; } // Provides the (minimum) x scale factor to the network (of interest only to // input units) so they can determine how to scale bounding boxes. -void Input::CacheXScaleFactor(int factor) { - cached_x_scale_ = factor; -} +void Input::CacheXScaleFactor(int factor) { cached_x_scale_ = factor; } // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. @@ -71,8 +65,7 @@ void Input::Forward(bool debug, const NetworkIO& input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool Input::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { tprintf("Input::Backward should not be called!!\n"); return false; } diff --git a/src/lstm/input.h b/src/lstm/input.h index cec2241496..419aeb4f04 100644 --- a/src/lstm/input.h +++ b/src/lstm/input.h @@ -68,14 +68,13 @@ class Input : public Network { // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; + const TransposedArray* input_transpose, NetworkScratch* scratch, + NetworkIO* output) override; // Runs backward propagation of errors on the deltas line. // See Network for a detailed discussion of the arguments. bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; + NetworkScratch* scratch, NetworkIO* back_deltas) override; // Creates and returns a Pix of appropriate size for the network from the // image_data. If non-null, *image_scale returns the image scale factor used. // Returns nullptr on error. diff --git a/src/lstm/lstm.cpp b/src/lstm/lstm.cpp index 979c66b48b..3106aef5ba 100644 --- a/src/lstm/lstm.cpp +++ b/src/lstm/lstm.cpp @@ -25,7 +25,7 @@ #include #if !defined(__GNUC__) && defined(_MSC_VER) -#include // _BitScanReverse +#include // _BitScanReverse #endif #include "fullyconnected.h" @@ -38,16 +38,11 @@ #define PARALLEL_IF_OPENMP(__num_threads) \ PRAGMA(omp parallel if (__num_threads > 1) num_threads(__num_threads)) { \ PRAGMA(omp sections nowait) { \ - PRAGMA(omp section) { -#define SECTION_IF_OPENMP \ - } \ - PRAGMA(omp section) \ - { - -#define END_PARALLEL_IF_OPENMP \ - } \ - } /* end of sections */ \ - } /* end of parallel section */ + PRAGMA(omp section){#define SECTION_IF_OPENMP} PRAGMA(omp section) { \ +#define END_PARALLEL_IF_OPENMP \ + } \ + } /* end of sections */ \ + } /* end of parallel section */ // Define the portable PRAGMA macro. #ifdef _MSC_VER // Different _Pragma @@ -62,7 +57,6 @@ #define END_PARALLEL_IF_OPENMP #endif // _OPENMP - namespace tesseract { // Max absolute value of state_. It is reasonably high to enable the state @@ -72,8 +66,7 @@ const double kStateClip = 100.0; const double kErrClip = 1.0f; // Calculate ceil(log2(n)). -static inline uint32_t ceil_log2(uint32_t n) -{ +static inline uint32_t ceil_log2(uint32_t n) { // l2 = (unsigned)log2(n). #if defined(__GNUC__) // Use fast inline assembler code for gcc or clang. @@ -88,8 +81,8 @@ static inline uint32_t ceil_log2(uint32_t n) uint32_t val = n; uint32_t l2 = 0; while (val > 1) { - val >>= 1; - l2++; + val >>= 1; + l2++; } #endif // Round up if n is not a power of 2. @@ -308,7 +301,7 @@ void LSTM::Forward(bool debug, const NetworkIO& input, if (!dim_index.AddOffset(-1, FD_HEIGHT)) valid_2d = false; } // Index of the 2-D revolving buffers (outputs, states). - int mod_t = Modulo(t, buf_width); // Current timestep. + int mod_t = Modulo(t, buf_width); // Current timestep. // Setup the padded input in source. source_.CopyTimeStepGeneral(t, 0, ni_, input, t, 0); if (softmax_ != nullptr) { @@ -439,8 +432,7 @@ void LSTM::Forward(bool debug, const NetworkIO& input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool LSTM::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { if (debug) DisplayBackward(fwd_deltas); back_deltas->ResizeToMap(fwd_deltas.int_mode(), input_map_, ni_); // ======Scratch space.====== @@ -472,8 +464,7 @@ bool LSTM::Backward(bool debug, const NetworkIO& fwd_deltas, } // Parallel-generated sourceerr from each of the gates. NetworkScratch::FloatVec sourceerr_temps[WT_COUNT]; - for (int w = 0; w < WT_COUNT; ++w) - sourceerr_temps[w].Init(na_, scratch); + for (int w = 0; w < WT_COUNT; ++w) sourceerr_temps[w].Init(na_, scratch); int width = input_width_; // Transposed gate errors stored over all timesteps for sum outer. NetworkScratch::GradientStore gate_errors_t[WT_COUNT]; @@ -515,7 +506,7 @@ bool LSTM::Backward(bool debug, const NetworkIO& fwd_deltas, } } // Index of the 2-D revolving buffers (sourceerr, stateerr). - int mod_t = Modulo(t, buf_width); // Current timestep. + int mod_t = Modulo(t, buf_width); // Current timestep. // Zero the state in the major direction only at the end of every row. if (at_last_x) { ZeroVector(na_, curr_sourceerr); @@ -763,5 +754,4 @@ void LSTM::ResizeForward(const NetworkIO& input) { } } - } // namespace tesseract. diff --git a/src/lstm/lstm.h b/src/lstm/lstm.h index bf73affebe..6583f180ad 100644 --- a/src/lstm/lstm.h +++ b/src/lstm/lstm.h @@ -19,8 +19,8 @@ #ifndef TESSERACT_LSTM_LSTM_H_ #define TESSERACT_LSTM_LSTM_H_ -#include "network.h" #include "fullyconnected.h" +#include "network.h" namespace tesseract { @@ -116,9 +116,7 @@ class LSTM : public Network { void PrintDW(); // Returns true of this is a 2-d lstm. - bool Is2D() const { - return is_2d_; - } + bool Is2D() const { return is_2d_; } private: // Resizes forward data to cope with an input image of the given width. @@ -158,5 +156,4 @@ class LSTM : public Network { } // namespace tesseract. - #endif // TESSERACT_LSTM_LSTM_H_ diff --git a/src/lstm/lstmrecognizer.cpp b/src/lstm/lstmrecognizer.cpp index c49dca16f2..fca774c07c 100644 --- a/src/lstm/lstmrecognizer.cpp +++ b/src/lstm/lstmrecognizer.cpp @@ -265,7 +265,8 @@ bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert, pixInvert(pix, pix); Input::PreparePixInput(network_->InputShape(), pix, &randomizer_, &inv_inputs); - network_->Forward(debug, inv_inputs, nullptr, &scratch_space_, &inv_outputs); + network_->Forward(debug, inv_inputs, nullptr, &scratch_space_, + &inv_outputs); float inv_min, inv_mean, inv_sd; OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd); if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) { @@ -295,7 +296,8 @@ bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert, // Converts an array of labels to utf-8, whether or not the labels are // augmented with character boundaries. -STRING LSTMRecognizer::DecodeLabels(const GenericVector& labels) { +STRING +LSTMRecognizer::DecodeLabels(const GenericVector& labels) { STRING result; int end = 1; for (int start = 0; start < labels.size(); start = end) { @@ -409,8 +411,8 @@ void LSTMRecognizer::DebugActivationRange(const NetworkIO& outputs, // Helper returns true if the null_char is the winner at t, and it beats the // null_threshold, or the next choice is space, in which case we will use the // null anyway. -static bool NullIsBest(const NetworkIO& output, float null_thr, - int null_char, int t) { +static bool NullIsBest(const NetworkIO& output, float null_thr, int null_char, + int t) { if (output.f(t)[null_char] >= null_thr) return true; if (output.BestLabel(t, null_char, null_char, nullptr) != UNICHAR_SPACE) return false; diff --git a/src/lstm/lstmrecognizer.h b/src/lstm/lstmrecognizer.h index f1377740b8..b811bae6a9 100644 --- a/src/lstm/lstmrecognizer.h +++ b/src/lstm/lstmrecognizer.h @@ -55,18 +55,10 @@ class LSTMRecognizer { LSTMRecognizer(); ~LSTMRecognizer(); - int NumOutputs() const { - return network_->NumOutputs(); - } - int training_iteration() const { - return training_iteration_; - } - int sample_iteration() const { - return sample_iteration_; - } - double learning_rate() const { - return learning_rate_; - } + int NumOutputs() const { return network_->NumOutputs(); } + int training_iteration() const { return training_iteration_; } + int sample_iteration() const { return sample_iteration_; } + double learning_rate() const { return learning_rate_; } LossType OutputLossType() const { if (network_ == nullptr) return LT_NONE; StaticShape shape; @@ -144,13 +136,9 @@ class LSTMRecognizer { // Sets the sample iteration to the given value. The sample_iteration_ // determines the seed for the random number generator. The training // iteration is incremented only by a successful training iteration. - void SetIteration(int iteration) { - sample_iteration_ = iteration; - } + void SetIteration(int iteration) { sample_iteration_ = iteration; } // Accessors for textline image normalization. - int NumInputs() const { - return network_->NumInputs(); - } + int NumInputs() const { return network_->NumInputs(); } int null_char() const { return null_char_; } // Loads a model from mgr, including the dictionary only if lang is not null. @@ -187,8 +175,8 @@ class LSTMRecognizer { PointerVector* words); // Helper computes min and mean best results in the output. - void OutputStats(const NetworkIO& outputs, - float* min_output, float* mean_output, float* sd); + void OutputStats(const NetworkIO& outputs, float* min_output, + float* mean_output, float* sd); // Recognizes the image_data, returning the labels, // scores, and corresponding pairs of start, end x-coords in coords. // Returned in scale_factor is the reduction factor @@ -208,11 +196,9 @@ class LSTMRecognizer { // Displays the forward results in a window with the characters and // boundaries as determined by the labels and label_coords. - void DisplayForward(const NetworkIO& inputs, - const GenericVector& labels, + void DisplayForward(const NetworkIO& inputs, const GenericVector& labels, const GenericVector& label_coords, - const char* window_name, - ScrollView** window); + const char* window_name, ScrollView** window); // Converts the network output to a sequence of labels. Outputs labels, scores // and start xcoords of each char, and each null_char_, with an additional // final xcoord for the end of the output. @@ -231,8 +217,8 @@ class LSTMRecognizer { // Displays the labels and cuts at the corresponding xcoords. // Size of labels should match xcoords. void DisplayLSTMOutput(const GenericVector& labels, - const GenericVector& xcoords, - int height, ScrollView* window); + const GenericVector& xcoords, int height, + ScrollView* window); // Prints debug output detailing the activation path that is implied by the // xcoords. @@ -252,8 +238,7 @@ class LSTMRecognizer { // Converts the network output to a sequence of labels, with scores, using // the simple character model (each position is a char, and the null_char_ is // mainly intended for tail padding.) - void LabelsViaSimpleText(const NetworkIO& output, - GenericVector* labels, + void LabelsViaSimpleText(const NetworkIO& output, GenericVector* labels, GenericVector* xcoords); // Returns a string corresponding to the label starting at start. Sets *end diff --git a/src/lstm/lstmtrainer.cpp b/src/lstm/lstmtrainer.cpp index dfabb3058a..d5125bfac3 100644 --- a/src/lstm/lstmtrainer.cpp +++ b/src/lstm/lstmtrainer.cpp @@ -21,8 +21,8 @@ #include "config_auto.h" #endif -#include "lstmtrainer.h" #include +#include "lstmtrainer.h" #include "allheaders.h" #include "boxread.h" @@ -183,8 +183,8 @@ bool LSTMTrainer::InitNetwork(const STRING& network_spec, int append_index, return false; } network_str_ += network_spec; - tprintf("Built network:%s from request %s\n", - network_->spec().string(), network_spec.string()); + tprintf("Built network:%s from request %s\n", network_->spec().string(), + network_spec.string()); tprintf( "Training parameters:\n Debug interval = %d," " weights = %g, learning rate = %g, momentum=%g\n", @@ -255,7 +255,8 @@ Trainability LSTMTrainer::GridSearchDictParams( GenericVector truth_labels, ocr_labels, xcoords; ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels)); // NO-dict error. - RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr); + RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), + nullptr); base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr); base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords); @@ -268,7 +269,8 @@ Trainability LSTMTrainer::GridSearchDictParams( for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) { for (double c = min_cert_offset; c < max_cert_offset; c += cert_offset_step) { - search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, nullptr); + search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, + nullptr); search.ExtractBestPathAsLabels(&ocr_labels, &xcoords); truth_text = DecodeLabels(truth_labels); ocr_text = DecodeLabels(ocr_labels); @@ -290,9 +292,7 @@ Trainability LSTMTrainer::GridSearchDictParams( } // Provides output on the distribution of weight values. -void LSTMTrainer::DebugNetwork() { - network_->DebugWeights(); -} +void LSTMTrainer::DebugNetwork() { network_->DebugWeights(); } // Loads a set of lstmf files that were created using the lstm.train config to // tesseract into memory ready for training. Returns false if nothing was @@ -468,7 +468,8 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, if (serialize_amount != NO_BEST_TRAINER && !best_trainer_.Serialize(fp)) return false; GenericVector sub_data; - if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data)) + if (sub_trainer_ != nullptr && + !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data)) return false; if (!sub_data.Serialize(fp)) return false; if (!best_error_history_.Serialize(fp)) return false; @@ -801,8 +802,8 @@ Trainability LSTMTrainer::TrainOnLine(const ImageData* trainingdata, if (trainable == UNENCODABLE || trainable == NOT_BOXED) { return trainable; // Sample was unusable. } - bool debug = debug_interval_ > 0 && - training_iteration() % debug_interval_ == 0; + bool debug = + debug_interval_ > 0 && training_iteration() % debug_interval_ == 0; // Run backprop on the output. NetworkIO bp_deltas; if (network_->IsTraining() && @@ -833,8 +834,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData* trainingdata, return UNENCODABLE; } // Ensure repeatability of random elements even across checkpoints. - bool debug = debug_interval_ > 0 && - training_iteration() % debug_interval_ == 0; + bool debug = + debug_interval_ > 0 && training_iteration() % debug_interval_ == 0; GenericVector truth_labels; if (!EncodeString(trainingdata->transcription(), &truth_labels)) { tprintf("Can't encode transcription: '%s' in language '%s'\n", @@ -967,7 +968,8 @@ void LSTMTrainer::SaveRecognitionDump(GenericVector* data) const { // Returns a suitable filename for a training dump, based on the model_base_, // the iteration and the error rates. -STRING LSTMTrainer::DumpFilename() const { +STRING +LSTMTrainer::DumpFilename() const { STRING filename; filename.add_str_double(model_base_.string(), best_error_rate_); filename.add_str_int("_", best_iteration_); @@ -1072,8 +1074,8 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO& inputs, GenericVector xcoords; LabelsFromOutputs(outputs, &labels, &xcoords); STRING text = DecodeLabels(labels); - tprintf("Iteration %d: ALIGNED TRUTH : %s\n", - training_iteration(), text.string()); + tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(), + text.string()); if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) { tprintf("TRAINING activation path for truth string %s\n", truth_text.string()); @@ -1206,8 +1208,7 @@ double LSTMTrainer::ComputeWinnerError(const NetworkIO& deltas) { float abs_delta = fabs(class_errs[c]); // TODO(rays) Filtering cases where the delta is very large to cut out // GT errors doesn't work. Find a better way or get better truth. - if (0.5 <= abs_delta) - ++num_errors; + if (0.5 <= abs_delta) ++num_errors; } } return static_cast(num_errors) / width; @@ -1279,7 +1280,8 @@ void LSTMTrainer::UpdateErrorBuffer(double new_error, ErrorTypes type) { int index = training_iteration_ % kRollingBufferSize_; error_buffers_[type][index] = new_error; // Compute the mean error. - int mean_count = std::min(training_iteration_ + 1, error_buffers_[type].size()); + int mean_count = + std::min(training_iteration_ + 1, error_buffers_[type].size()); double buffer_sum = 0.0; for (int i = 0; i < mean_count; ++i) buffer_sum += error_buffers_[type][i]; double mean = buffer_sum / mean_count; @@ -1307,16 +1309,18 @@ void LSTMTrainer::RollErrorBuffers() { // error rates, and record of progress. // Tester is an externally supplied callback function that tests on some // data set with a given model and records the error rates in a graph. -STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, - const GenericVector& model_data, - TestCallback tester) { - if (error_rate > best_error_rate_ - && iteration < best_iteration_ + kErrorGraphInterval) { +STRING +LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, + const GenericVector& model_data, + TestCallback tester) { + if (error_rate > best_error_rate_ && + iteration < best_iteration_ + kErrorGraphInterval) { // Too soon to record a new point. if (tester != nullptr && !worst_model_data_.empty()) { mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size()); - return tester->Run(worst_iteration_, nullptr, mgr_, CurrentTrainingStage()); + return tester->Run(worst_iteration_, nullptr, mgr_, + CurrentTrainingStage()); } else { return ""; } @@ -1369,8 +1373,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, result = tester->Run(worst_iteration_, worst_error_rates_, mgr_, CurrentTrainingStage()); } - if (result.length() > 0) - best_model_data_.truncate(0); + if (result.length() > 0) best_model_data_.truncate(0); worst_model_data_ = model_data; } } diff --git a/src/lstm/lstmtrainer.h b/src/lstm/lstmtrainer.h index 0fb152f4c9..bc8a6901f1 100644 --- a/src/lstm/lstmtrainer.h +++ b/src/lstm/lstmtrainer.h @@ -92,9 +92,9 @@ class LSTMTrainer : public LSTMRecognizer { // Callbacks may be null, in which case defaults are used. LSTMTrainer(FileReader file_reader, FileWriter file_writer, CheckPointReader checkpoint_reader, - CheckPointWriter checkpoint_writer, - const char* model_base, const char* checkpoint_name, - int debug_interval, int64_t max_memory); + CheckPointWriter checkpoint_writer, const char* model_base, + const char* checkpoint_name, int debug_interval, + int64_t max_memory); virtual ~LSTMTrainer(); // Tries to deserialize a trainer from the given file and silently returns @@ -133,19 +133,11 @@ class LSTMTrainer : public LSTMRecognizer { void InitIterations(); // Accessors. - double ActivationError() const { - return error_rates_[ET_DELTA]; - } + double ActivationError() const { return error_rates_[ET_DELTA]; } double CharError() const { return error_rates_[ET_CHAR_ERROR]; } - const double* error_rates() const { - return error_rates_; - } - double best_error_rate() const { - return best_error_rate_; - } - int best_iteration() const { - return best_iteration_; - } + const double* error_rates() const { return error_rates_; } + double best_error_rate() const { return best_error_rate_; } + int best_iteration() const { return best_iteration_; } int learning_iteration() const { return learning_iteration_; } int improvement_steps() const { return improvement_steps_; } void set_perfect_delay(int delay) { perfect_delay_ = delay; } @@ -162,9 +154,7 @@ class LSTMTrainer : public LSTMRecognizer { [(training_iteration() + kRollingBufferSize_ - 1) % kRollingBufferSize_]; } - const DocumentCache& training_data() const { - return training_data_; - } + const DocumentCache& training_data() const { return training_data_; } DocumentCache* mutable_training_data() { return &training_data_; } // If the training sample is usable, grid searches for the optimal @@ -244,8 +234,9 @@ class LSTMTrainer : public LSTMRecognizer { // Converts the string to integer class labels, with appropriate null_char_s // in between if not in SimpleTextOutput mode. Returns false on failure. bool EncodeString(const STRING& str, GenericVector* labels) const { - return EncodeString(str, GetUnicharset(), IsRecoding() ? &recoder_ : nullptr, - SimpleTextOutput(), null_char_, labels); + return EncodeString(str, GetUnicharset(), + IsRecoding() ? &recoder_ : nullptr, SimpleTextOutput(), + null_char_, labels); } // Static version operates on supplied unicharset, encoder, simple_text. static bool EncodeString(const STRING& str, const UNICHARSET& unicharset, @@ -335,8 +326,7 @@ class LSTMTrainer : public LSTMRecognizer { // as an image in the given window, and the corresponding labels at the // corresponding x_starts. // Returns false if the truth string is empty. - bool DebugLSTMTraining(const NetworkIO& inputs, - const ImageData& trainingdata, + bool DebugLSTMTraining(const NetworkIO& inputs, const ImageData& trainingdata, const NetworkIO& fwd_outputs, const GenericVector& truth_labels, const NetworkIO& outputs); @@ -478,7 +468,7 @@ class LSTMTrainer : public LSTMRecognizer { static const int kRollingBufferSize_ = 1000; GenericVector error_buffers_[ET_COUNT]; // Rounded mean percent trailing training errors in the buffers. - double error_rates_[ET_COUNT]; // RMS training error. + double error_rates_[ET_COUNT]; // RMS training error. // Traineddata file with optional dawgs + UNICHARSET and recoder. TessdataManager mgr_; }; diff --git a/src/lstm/maxpool.cpp b/src/lstm/maxpool.cpp index 5ac2620228..ebf47e38b4 100644 --- a/src/lstm/maxpool.cpp +++ b/src/lstm/maxpool.cpp @@ -22,7 +22,7 @@ namespace tesseract { Maxpool::Maxpool(const STRING& name, int ni, int x_scale, int y_scale) - : Reconfig(name, ni, x_scale, y_scale) { + : Reconfig(name, ni, x_scale, y_scale) { type_ = NT_MAXPOOL; no_ = ni; } @@ -71,12 +71,10 @@ void Maxpool::Forward(bool debug, const NetworkIO& input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool Maxpool::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { back_deltas->ResizeToMap(fwd_deltas.int_mode(), back_map_, ni_); back_deltas->MaxpoolBackward(fwd_deltas, maxes_); return true; } - } // namespace tesseract. diff --git a/src/lstm/maxpool.h b/src/lstm/maxpool.h index 90ddc70800..85ebc9be82 100644 --- a/src/lstm/maxpool.h +++ b/src/lstm/maxpool.h @@ -45,25 +45,19 @@ class Maxpool : public Reconfig { // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; + const TransposedArray* input_transpose, NetworkScratch* scratch, + NetworkIO* output) override; // Runs backward propagation of errors on the deltas line. // See Network for a detailed discussion of the arguments. bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; + NetworkScratch* scratch, NetworkIO* back_deltas) override; private: // Memory of which input was the max. GENERIC_2D_ARRAY maxes_; }; - } // namespace tesseract. - - - - #endif // TESSERACT_LSTM_MAXPOOL_H_ diff --git a/src/lstm/network.cpp b/src/lstm/network.cpp index db0c707869..55ab9cdae3 100644 --- a/src/lstm/network.cpp +++ b/src/lstm/network.cpp @@ -97,7 +97,6 @@ Network::Network(NetworkType type, const STRING& name, int ni, int no) backward_win_(nullptr), randomizer_(nullptr) {} - // Suspends/Enables/Permanently disables training by setting the training_ // flag. Serialize and DeSerialize only operate on the run-time data if state // is TS_DISABLED or TS_TEMP_DISABLE. Specifying TS_TEMP_DISABLE will @@ -121,9 +120,7 @@ void Network::SetEnableTraining(TrainingState state) { // Sets flags that control the action of the network. See NetworkFlags enum // for bit values. -void Network::SetNetworkFlags(uint32_t flags) { - network_flags_ = flags; -} +void Network::SetNetworkFlags(uint32_t flags) { network_flags_ = flags; } // Sets up the network for training. Initializes weights using weights of // scale `range` picked according to the random number generator `randomizer`. @@ -135,9 +132,7 @@ int Network::InitWeights(float range, TRand* randomizer) { // Provides a pointer to a TRand for any networks that care to use it. // Note that randomizer is a borrowed pointer that should outlive the network // and should not be deleted by any of the networks. -void Network::SetRandomizer(TRand* randomizer) { - randomizer_ = randomizer; -} +void Network::SetRandomizer(TRand* randomizer) { randomizer_ = randomizer; } // Sets needs_to_backprop_ to needs_backprop and returns true if // needs_backprop || any weights in this network so the next layer forward @@ -283,8 +278,8 @@ double Network::Random(double range) { void Network::DisplayForward(const NetworkIO& matrix) { #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics Pix* image = matrix.ToPix(); - ClearWindow(false, name_.string(), pixGetWidth(image), - pixGetHeight(image), &forward_win_); + ClearWindow(false, name_.string(), pixGetWidth(image), pixGetHeight(image), + &forward_win_); DisplayImage(image, forward_win_); forward_win_->Update(); #endif // GRAPHICS_DISABLED @@ -304,8 +299,8 @@ void Network::DisplayBackward(const NetworkIO& matrix) { #ifndef GRAPHICS_DISABLED // Creates the window if needed, otherwise clears it. -void Network::ClearWindow(bool tess_coords, const char* window_name, - int width, int height, ScrollView** window) { +void Network::ClearWindow(bool tess_coords, const char* window_name, int width, + int height, ScrollView** window) { if (*window == nullptr) { int min_size = std::min(width, height); if (min_size < kMinWinSize) { diff --git a/src/lstm/network.h b/src/lstm/network.h index ba528f11d1..4d464f1443 100644 --- a/src/lstm/network.h +++ b/src/lstm/network.h @@ -19,8 +19,8 @@ #ifndef TESSERACT_LSTM_NETWORK_H_ #define TESSERACT_LSTM_NETWORK_H_ -#include #include +#include #include "genericvector.h" #include "helpers.h" @@ -95,7 +95,7 @@ enum TrainingState { TS_ENABLED, // Enabled for backprop and to write a training dump. // Re-enable from ANY disabled state. TS_TEMP_DISABLE, // Temporarily disabled to write a recognition dump. - // Valid only for SetEnableTraining. + // Valid only for SetEnableTraining. TS_RE_ENABLE, // Re-Enable from TS_TEMP_DISABLE, but not TS_DISABLED. }; @@ -109,20 +109,12 @@ class Network { virtual ~Network() = default; // Accessors. - NetworkType type() const { - return type_; - } + NetworkType type() const { return type_; } bool IsTraining() const { return training_ == TS_ENABLED; } - bool needs_to_backprop() const { - return needs_to_backprop_; - } + bool needs_to_backprop() const { return needs_to_backprop_; } int num_weights() const { return num_weights_; } - int NumInputs() const { - return ni_; - } - int NumOutputs() const { - return no_; - } + int NumInputs() const { return ni_; } + int NumOutputs() const { return no_; } // Returns the required shape input to the network. virtual StaticShape InputShape() const { StaticShape result; @@ -135,12 +127,8 @@ class Network { result.set_depth(no_); return result; } - const STRING& name() const { - return name_; - } - virtual STRING spec() const { - return "?"; - } + const STRING& name() const { return name_; } + virtual STRING spec() const { return "?"; } bool TestFlag(NetworkFlags flag) const { return (network_flags_ & flag) != 0; } @@ -206,9 +194,7 @@ class Network { // WARNING: if GlobalMinimax is used to vary the scale, this will return // the last used scale factor. Call it before any forward, and it will return // the minimum scale factor of the paths through the GlobalMinimax. - virtual int XScaleFactor() const { - return 1; - } + virtual int XScaleFactor() const { return 1; } // Provides the (minimum) x scale factor to the network (of interest only to // input units) so they can determine how to scale bounding boxes. @@ -271,8 +257,7 @@ class Network { // propagating further backwards. Thus most complete networks will always // return false from Backward! virtual bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { tprintf("Must override Network::Backward for type %d\n", type_); return false; } @@ -284,8 +269,8 @@ class Network { void DisplayBackward(const NetworkIO& matrix); // Creates the window if needed, otherwise clears it. - static void ClearWindow(bool tess_coords, const char* window_name, - int width, int height, ScrollView** window); + static void ClearWindow(bool tess_coords, const char* window_name, int width, + int height, ScrollView** window); // Displays the pix in the given window. and returns the height of the pix. // The pix is pixDestroyed. @@ -296,14 +281,14 @@ class Network { double Random(double range); protected: - NetworkType type_; // Type of the derived network class. - TrainingState training_; // Are we currently training? - bool needs_to_backprop_; // This network needs to output back_deltas. - int32_t network_flags_; // Behavior control flags in NetworkFlags. - int32_t ni_; // Number of input values. - int32_t no_; // Number of output values. - int32_t num_weights_; // Number of weights in this and sub-network. - STRING name_; // A unique name for this layer. + NetworkType type_; // Type of the derived network class. + TrainingState training_; // Are we currently training? + bool needs_to_backprop_; // This network needs to output back_deltas. + int32_t network_flags_; // Behavior control flags in NetworkFlags. + int32_t ni_; // Number of input values. + int32_t no_; // Number of output values. + int32_t num_weights_; // Number of weights in this and sub-network. + STRING name_; // A unique name for this layer. // NOT-serialized debug data. ScrollView* forward_win_; // Recognition debug display window. @@ -314,7 +299,6 @@ class Network { static char const* const kTypeNames[NT_COUNT]; }; - } // namespace tesseract. #endif // TESSERACT_LSTM_NETWORK_H_ diff --git a/src/lstm/networkio.cpp b/src/lstm/networkio.cpp index 20621e0bc6..6b322b67ca 100644 --- a/src/lstm/networkio.cpp +++ b/src/lstm/networkio.cpp @@ -64,8 +64,8 @@ void NetworkIO::ResizeToMap(bool int_mode, const StrideMap& stride_map, } // Shrinks image size by x_scale,y_scale, and use given number of features. -void NetworkIO::ResizeScaled(const NetworkIO& src, - int x_scale, int y_scale, int num_features) { +void NetworkIO::ResizeScaled(const NetworkIO& src, int x_scale, int y_scale, + int num_features) { StrideMap stride_map = src.stride_map_; stride_map.ScaleXY(x_scale, y_scale); ResizeToMap(src.int_mode_, stride_map, num_features); @@ -281,7 +281,7 @@ void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) { float float_pixel = (pixel - black) / contrast - 1.0f; if (int_mode_) { i_[t][f] = ClipToRange(IntCastRounded((INT8_MAX + 1) * float_pixel), - -INT8_MAX, INT8_MAX); + -INT8_MAX, INT8_MAX); } else { f_[t][f] = float_pixel; } @@ -315,8 +315,10 @@ Pix* NetworkIO::ToPix() const { int green = red, blue = red; if (feature_factor == 3) { // With 3 features assume RGB color. - green = ClipToRange(features[y * feature_factor + 1] + 128, 0, 255); - blue = ClipToRange(features[y * feature_factor + 2] + 128, 0, 255); + green = + ClipToRange(features[y * feature_factor + 1] + 128, 0, 255); + blue = + ClipToRange(features[y * feature_factor + 2] + 128, 0, 255); } else if (num_features > 3) { // More than 3 features use false yellow/blue color, assuming a signed // input in the range [-1,1]. @@ -329,23 +331,26 @@ Pix* NetworkIO::ToPix() const { green = red = 0; } } - pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) | - (green << L_GREEN_SHIFT) | - (blue << L_BLUE_SHIFT)); + pixSetPixel(pix, im_x, im_y, + (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | + (blue << L_BLUE_SHIFT)); } } else { const float* features = f_[t]; for (int y = 0; y < num_features; ++y, im_y += im_height) { float pixel = features[y * feature_factor]; // 1 or 2 features use greyscale. - int red = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); + int red = + ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); int green = red, blue = red; if (feature_factor == 3) { // With 3 features assume RGB color. pixel = features[y * feature_factor + 1]; - green = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); + green = + ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); pixel = features[y * feature_factor + 2]; - blue = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); + blue = + ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); } else if (num_features > 3) { // More than 3 features use false yellow/blue color, assuming a signed // input in the range [-1,1]. @@ -358,9 +363,9 @@ Pix* NetworkIO::ToPix() const { green = red = 0; } } - pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) | - (green << L_GREEN_SHIFT) | - (blue << L_BLUE_SHIFT)); + pixSetPixel(pix, im_x, im_y, + (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | + (blue << L_BLUE_SHIFT)); } } } while (index.Increment()); @@ -544,8 +549,7 @@ void NetworkIO::SetActivations(int t, int label, float ok_score) { int num_classes = NumFeatures(); float bad_score = (1.0f - ok_score) / (num_classes - 1); float* targets = f_[t]; - for (int i = 0; i < num_classes; ++i) - targets[i] = bad_score; + for (int i = 0; i < num_classes; ++i) targets[i] = bad_score; targets[label] = ok_score; } @@ -658,8 +662,8 @@ void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, if (int_mode_) { int8_t* line = i_[t] + offset; for (int i = 0; i < num_features; ++i) { - line[i] = ClipToRange(IntCastRounded(input[i] * INT8_MAX), - -INT8_MAX, INT8_MAX); + line[i] = ClipToRange(IntCastRounded(input[i] * INT8_MAX), -INT8_MAX, + INT8_MAX); } } else { float* line = f_[t] + offset; @@ -977,8 +981,7 @@ void NetworkIO::ClipVector(int t, float range) { ASSERT_HOST(!int_mode_); float* v = f_[t]; int dim = f_.dim2(); - for (int i = 0; i < dim; ++i) - v[i] = ClipToRange(v[i], -range, range); + for (int i = 0; i < dim; ++i) v[i] = ClipToRange(v[i], -range, range); } // Returns the padding required for the given number of features in order diff --git a/src/lstm/networkio.h b/src/lstm/networkio.h index b850afdcb6..3a064c848f 100644 --- a/src/lstm/networkio.h +++ b/src/lstm/networkio.h @@ -104,13 +104,9 @@ class NetworkIO { void Print(int num) const; // Returns the timestep width. - int Width() const { - return int_mode_ ? i_.dim1() : f_.dim1(); - } + int Width() const { return int_mode_ ? i_.dim1() : f_.dim1(); } // Returns the number of features. - int NumFeatures() const { - return int_mode_ ? i_.dim2() : f_.dim2(); - } + int NumFeatures() const { return int_mode_ ? i_.dim2() : f_.dim2(); } // Accessor to a timestep of the float matrix. float* f(int t) { ASSERT_HOST(!int_mode_); @@ -124,18 +120,10 @@ class NetworkIO { ASSERT_HOST(int_mode_); return i_[t]; } - bool int_mode() const { - return int_mode_; - } - void set_int_mode(bool is_quantized) { - int_mode_ = is_quantized; - } - const StrideMap& stride_map() const { - return stride_map_; - } - void set_stride_map(const StrideMap& map) { - stride_map_ = map; - } + bool int_mode() const { return int_mode_; } + void set_int_mode(bool is_quantized) { int_mode_ = is_quantized; } + const StrideMap& stride_map() const { return stride_map_; } + void set_stride_map(const StrideMap& map) { stride_map_ = map; } const GENERIC_2D_ARRAY& float_array() const { return f_; } GENERIC_2D_ARRAY* mutable_float_array() { return &f_; } diff --git a/src/lstm/networkscratch.h b/src/lstm/networkscratch.h index c92a9fe2c8..c79bce0318 100644 --- a/src/lstm/networkscratch.h +++ b/src/lstm/networkscratch.h @@ -43,9 +43,7 @@ class NetworkScratch { // This saves memory by having separate int-specific and float-specific // stacks. If the network representation is float, then all NetworkIOs go // to the float stack. - void set_int_mode(bool int_mode) { - int_mode_ = int_mode; - } + void set_int_mode(bool int_mode) { int_mode_ = int_mode; } // Class that acts like a NetworkIO (by having an implicit cast operator), // yet actually holds a pointer to NetworkIOs in the source NetworkScratch, @@ -54,8 +52,8 @@ class NetworkScratch { public: // The NetworkIO should be sized after construction. IO(const NetworkIO& src, NetworkScratch* scratch) - : int_mode_(scratch->int_mode_ && src.int_mode()), - scratch_space_(scratch) { + : int_mode_(scratch->int_mode_ && src.int_mode()), + scratch_space_(scratch) { network_io_ = int_mode_ ? scratch_space_->int_stack_.Borrow() : scratch_space_->float_stack_.Borrow(); } @@ -110,15 +108,9 @@ class NetworkScratch { // Returns a ref to a NetworkIO that enables *this to be treated as if // it were just a NetworkIO*. - NetworkIO& operator*() { - return *network_io_; - } - NetworkIO* operator->() { - return network_io_; - } - operator NetworkIO*() { - return network_io_; - } + NetworkIO& operator*() { return *network_io_; } + NetworkIO* operator->() { return network_io_; } + operator NetworkIO*() { return network_io_; } private: // True if this is from the always-float stack, otherwise the default stack. @@ -137,7 +129,7 @@ class NetworkScratch { public: // The array will have size elements in it, uninitialized. FloatVec(int size, NetworkScratch* scratch) - : vec_(nullptr), scratch_space_(scratch) { + : vec_(nullptr), scratch_space_(scratch) { Init(size, scratch); } // Default constructor is for arrays. Use Init to setup. @@ -178,7 +170,8 @@ class NetworkScratch { // Default constructor is for arrays. Use Init to setup. GradientStore() : array_(nullptr), scratch_space_(nullptr) {} ~GradientStore() { - if (scratch_space_ != nullptr) scratch_space_->array_stack_.Return(array_); + if (scratch_space_ != nullptr) + scratch_space_->array_stack_.Return(array_); } void Init(int size1, int size2, NetworkScratch* scratch) { @@ -204,10 +197,10 @@ class NetworkScratch { // Class that does the work of holding a stack of objects, a stack pointer // and a vector of in-use flags, so objects can be returned out of order. // It is safe to attempt to Borrow/Return in multiple threads. - template class Stack { + template + class Stack { public: - Stack() : stack_top_(0) { - } + Stack() : stack_top_(0) {} // Lends out the next free item, creating one if none available, sets // the used flags and increments the stack top. @@ -248,7 +241,7 @@ class NetworkScratch { // deleted until the NetworkScratch is deleted. Stack int_stack_; Stack float_stack_; - Stack > vec_stack_; + Stack> vec_stack_; Stack array_stack_; }; diff --git a/src/lstm/parallel.cpp b/src/lstm/parallel.cpp index b4f45aa55f..aa64aa26e7 100644 --- a/src/lstm/parallel.cpp +++ b/src/lstm/parallel.cpp @@ -108,8 +108,7 @@ void Parallel::Forward(bool debug, const NetworkIO& input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool Parallel::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair, // or a 2-d LSTM quad, do debug locally, and don't pass the flag on. if (debug && type_ != NT_PARALLEL) { diff --git a/src/lstm/parallel.h b/src/lstm/parallel.h index 671de96c76..d512dde933 100644 --- a/src/lstm/parallel.h +++ b/src/lstm/parallel.h @@ -64,14 +64,13 @@ class Parallel : public Plumbing { // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; + const TransposedArray* input_transpose, NetworkScratch* scratch, + NetworkIO* output) override; // Runs backward propagation of errors on the deltas line. // See Network for a detailed discussion of the arguments. bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; + NetworkScratch* scratch, NetworkIO* back_deltas) override; private: // If *this is a NT_REPLICATED, then it feeds a replicated network with diff --git a/src/lstm/plumbing.cpp b/src/lstm/plumbing.cpp index 973643add6..3c26694df3 100644 --- a/src/lstm/plumbing.cpp +++ b/src/lstm/plumbing.cpp @@ -22,24 +22,20 @@ namespace tesseract { // ni_ and no_ will be set by AddToStack. -Plumbing::Plumbing(const STRING& name) - : Network(NT_PARALLEL, name, 0, 0) { -} +Plumbing::Plumbing(const STRING& name) : Network(NT_PARALLEL, name, 0, 0) {} // Suspends/Enables training by setting the training_ flag. Serialize and // DeSerialize only operate on the run-time data if state is false. void Plumbing::SetEnableTraining(TrainingState state) { Network::SetEnableTraining(state); - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->SetEnableTraining(state); + for (int i = 0; i < stack_.size(); ++i) stack_[i]->SetEnableTraining(state); } // Sets flags that control the action of the network. See NetworkFlags enum // for bit values. void Plumbing::SetNetworkFlags(uint32_t flags) { Network::SetNetworkFlags(flags); - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->SetNetworkFlags(flags); + for (int i = 0; i < stack_.size(); ++i) stack_[i]->SetNetworkFlags(flags); } // Sets up the network for training. Initializes weights using weights of @@ -66,16 +62,14 @@ int Plumbing::RemapOutputs(int old_no, const std::vector& code_map) { // Converts a float network to an int network. void Plumbing::ConvertToInt() { - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->ConvertToInt(); + for (int i = 0; i < stack_.size(); ++i) stack_[i]->ConvertToInt(); } // Provides a pointer to a TRand for any networks that care to use it. // Note that randomizer is a borrowed pointer that should outlive the network // and should not be deleted by any of the networks. void Plumbing::SetRandomizer(TRand* randomizer) { - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->SetRandomizer(randomizer); + for (int i = 0; i < stack_.size(); ++i) stack_[i]->SetRandomizer(randomizer); } // Adds the given network to the stack. @@ -117,9 +111,7 @@ bool Plumbing::SetupNeedsBackprop(bool needs_backprop) { // WARNING: if GlobalMinimax is used to vary the scale, this will return // the last used scale factor. Call it before any forward, and it will return // the minimum scale factor of the paths through the GlobalMinimax. -int Plumbing::XScaleFactor() const { - return stack_[0]->XScaleFactor(); -} +int Plumbing::XScaleFactor() const { return stack_[0]->XScaleFactor(); } // Provides the (minimum) x scale factor to the network (of interest only to // input units) so they can determine how to scale bounding boxes. @@ -131,8 +123,7 @@ void Plumbing::CacheXScaleFactor(int factor) { // Provides debug output on the weights. void Plumbing::DebugWeights() { - for (int i = 0; i < stack_.size(); ++i) - stack_[i]->DebugWeights(); + for (int i = 0; i < stack_.size(); ++i) stack_[i]->DebugWeights(); } // Returns a set of strings representing the layer-ids of all layers below. diff --git a/src/lstm/plumbing.h b/src/lstm/plumbing.h index 7c55e2e1c2..59e9889c61 100644 --- a/src/lstm/plumbing.h +++ b/src/lstm/plumbing.h @@ -93,9 +93,7 @@ class Plumbing : public Network { void DebugWeights() override; // Returns the current stack. - const PointerVector& stack() const { - return stack_; - } + const PointerVector& stack() const { return stack_; } // Returns a set of strings representing the layer-ids of all layers below. void EnumerateLayers(const STRING* prefix, GenericVector* layers) const; diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp index 9119f28e8e..730056aeee 100644 --- a/src/lstm/recodebeam.cpp +++ b/src/lstm/recodebeam.cpp @@ -71,7 +71,8 @@ RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress& recoder, space_delimited_(true), is_simple_text_(simple_text), null_char_(null_char) { - if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false; + if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) + space_delimited_ = false; } // Decodes the set of network outputs, storing the lattice internally. @@ -185,9 +186,10 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box, bool leading_space = word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE; // Create a WERD_RES for the output word. - WERD_RES* word_res = InitializeWord( - leading_space, line_box, word_start, word_end, - std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor); + WERD_RES* word_res = + InitializeWord(leading_space, line_box, word_start, word_end, + std::min(space_cert, prev_space_cert), unicharset, + xcoords, scale_factor); for (int i = word_start; i < word_end; ++i) { BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; BLOB_CHOICE_IT bc_it(choices); @@ -235,7 +237,8 @@ void RecodeBeamSearch::DebugBeamPos(const UNICHARSET& unicharset, for (int i = 0; i < heap_size; ++i) { const RecodeNode* node = &heap.get(i).data; if (node->unichar_id == INVALID_UNICHAR_ID) { - if (null_best == nullptr || null_best->score < node->score) null_best = node; + if (null_best == nullptr || null_best->score < node->score) + null_best = node; } else { if (unichar_bests[node->unichar_id] == nullptr || unichar_bests[node->unichar_id]->score < node->score) { @@ -672,7 +675,8 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true, cert, cont, prev, step); PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, - word_start, true, false, cert, prev, nullptr, nodawg_heap); + word_start, true, false, cert, prev, nullptr, + nodawg_heap); } } else { delete updated_dawgs; @@ -833,8 +837,9 @@ void RecodeBeamSearch::ExtractBestPaths( (dawg_node->unichar_id == INVALID_UNICHAR_ID || dawg_node->duplicate)) dawg_node = dawg_node->prev; - if (dawg_node == nullptr || (!dawg_node->end_of_word && - dawg_node->unichar_id != UNICHAR_SPACE)) { + if (dawg_node == nullptr || + (!dawg_node->end_of_word && + dawg_node->unichar_id != UNICHAR_SPACE)) { // Dawg node is not valid. continue; } diff --git a/src/lstm/reconfig.cpp b/src/lstm/reconfig.cpp index d4b0bb44ba..4975d974f4 100644 --- a/src/lstm/reconfig.cpp +++ b/src/lstm/reconfig.cpp @@ -22,9 +22,9 @@ namespace tesseract { Reconfig::Reconfig(const STRING& name, int ni, int x_scale, int y_scale) - : Network(NT_RECONFIG, name, ni, ni * x_scale * y_scale), - x_scale_(x_scale), y_scale_(y_scale) { -} + : Network(NT_RECONFIG, name, ni, ni * x_scale * y_scale), + x_scale_(x_scale), + y_scale_(y_scale) {} // Returns the shape output from the network given an input shape (which may // be partially unknown ie zero). @@ -43,9 +43,7 @@ StaticShape Reconfig::OutputShape(const StaticShape& input_shape) const { // WARNING: if GlobalMinimax is used to vary the scale, this will return // the last used scale factor. Call it before any forward, and it will return // the minimum scale factor of the paths through the GlobalMinimax. -int Reconfig::XScaleFactor() const { - return x_scale_; -} +int Reconfig::XScaleFactor() const { return x_scale_; } // Writes to the given file. Returns false in case of error. bool Reconfig::Serialize(TFile* fp) const { @@ -92,8 +90,7 @@ void Reconfig::Forward(bool debug, const NetworkIO& input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool Reconfig::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { back_deltas->ResizeToMap(fwd_deltas.int_mode(), back_map_, ni_); StrideMap::Index src_index(fwd_deltas.stride_map()); do { @@ -116,5 +113,4 @@ bool Reconfig::Backward(bool debug, const NetworkIO& fwd_deltas, return needs_to_backprop_; } - } // namespace tesseract. diff --git a/src/lstm/reconfig.h b/src/lstm/reconfig.h index 6e26399dbb..905e0fce1c 100644 --- a/src/lstm/reconfig.h +++ b/src/lstm/reconfig.h @@ -19,7 +19,6 @@ #ifndef TESSERACT_LSTM_RECONFIG_H_ #define TESSERACT_LSTM_RECONFIG_H_ - #include "genericvector.h" #include "matrix.h" #include "network.h" @@ -62,14 +61,13 @@ class Reconfig : public Network { // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; + const TransposedArray* input_transpose, NetworkScratch* scratch, + NetworkIO* output) override; // Runs backward propagation of errors on the deltas line. // See Network for a detailed discussion of the arguments. bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; + NetworkScratch* scratch, NetworkIO* back_deltas) override; protected: // Non-serialized data used to store parameters between forward and back. @@ -81,5 +79,4 @@ class Reconfig : public Network { } // namespace tesseract. - #endif // TESSERACT_LSTM_SUBSAMPLE_H_ diff --git a/src/lstm/reversed.cpp b/src/lstm/reversed.cpp index 7ef7006d39..9f73b6b918 100644 --- a/src/lstm/reversed.cpp +++ b/src/lstm/reversed.cpp @@ -64,8 +64,7 @@ void Reversed::Forward(bool debug, const NetworkIO& input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool Reversed::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { NetworkScratch::IO rev_input(fwd_deltas, scratch); ReverseData(fwd_deltas, rev_input); NetworkScratch::IO rev_output(fwd_deltas, scratch); diff --git a/src/lstm/reversed.h b/src/lstm/reversed.h index ec91e7e7ab..8093a82ef1 100644 --- a/src/lstm/reversed.h +++ b/src/lstm/reversed.h @@ -70,14 +70,13 @@ class Reversed : public Plumbing { // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; + const TransposedArray* input_transpose, NetworkScratch* scratch, + NetworkIO* output) override; // Runs backward propagation of errors on the deltas line. // See Network for a detailed discussion of the arguments. bool Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) override; + NetworkScratch* scratch, NetworkIO* back_deltas) override; private: // Copies src to *dest with the reversal according to type_. diff --git a/src/lstm/series.cpp b/src/lstm/series.cpp index 0c1599fb89..30ea054dbc 100644 --- a/src/lstm/series.cpp +++ b/src/lstm/series.cpp @@ -26,9 +26,7 @@ namespace tesseract { // ni_ and no_ will be set by AddToStack. -Series::Series(const STRING& name) : Plumbing(name) { - type_ = NT_SERIES; -} +Series::Series(const STRING& name) : Plumbing(name) { type_ = NT_SERIES; } // Returns the shape output from the network given an input shape (which may // be partially unknown ie zero). @@ -49,8 +47,8 @@ int Series::InitWeights(float range, TRand* randomizer) { tprintf("Num outputs,weights in Series:\n"); for (int i = 0; i < stack_.size(); ++i) { int weights = stack_[i]->InitWeights(range, randomizer); - tprintf(" %s:%d, %d\n", - stack_[i]->spec().string(), stack_[i]->NumOutputs(), weights); + tprintf(" %s:%d, %d\n", stack_[i]->spec().string(), + stack_[i]->NumOutputs(), weights); num_weights_ += weights; } tprintf("Total weights = %d\n", num_weights_); @@ -91,8 +89,7 @@ bool Series::SetupNeedsBackprop(bool needs_backprop) { // the minimum scale factor of the paths through the GlobalMinimax. int Series::XScaleFactor() const { int factor = 1; - for (int i = 0; i < stack_.size(); ++i) - factor *= stack_[i]->XScaleFactor(); + for (int i = 0; i < stack_.size(); ++i) factor *= stack_[i]->XScaleFactor(); return factor; } @@ -127,8 +124,7 @@ void Series::Forward(bool debug, const NetworkIO& input, // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool Series::Backward(bool debug, const NetworkIO& fwd_deltas, - NetworkScratch* scratch, - NetworkIO* back_deltas) { + NetworkScratch* scratch, NetworkIO* back_deltas) { if (!IsTraining()) return false; int stack_size = stack_.size(); ASSERT_HOST(stack_size > 1); @@ -161,8 +157,8 @@ void Series::SplitAt(int last_start, Series** start, Series** end) { *start = nullptr; *end = nullptr; if (last_start < 0 || last_start >= stack_.size()) { - tprintf("Invalid split index %d must be in range [0,%d]!\n", - last_start, stack_.size() - 1); + tprintf("Invalid split index %d must be in range [0,%d]!\n", last_start, + stack_.size() - 1); return; } Series* master_series = new Series("MasterSeries"); @@ -197,5 +193,4 @@ void Series::AppendSeries(Network* src) { delete src; } - } // namespace tesseract. diff --git a/src/lstm/series.h b/src/lstm/series.h index 1b381c3812..232e299349 100644 --- a/src/lstm/series.h +++ b/src/lstm/series.h @@ -36,8 +36,7 @@ class Series : public Plumbing { STRING spec() const override { STRING spec("["); - for (int i = 0; i < stack_.size(); ++i) - spec += stack_[i]->spec(); + for (int i = 0; i < stack_.size(); ++i) spec += stack_[i]->spec(); spec += "]"; return spec; } diff --git a/src/lstm/static_shape.h b/src/lstm/static_shape.h index 78b31a15c7..d9836f6c3e 100644 --- a/src/lstm/static_shape.h +++ b/src/lstm/static_shape.h @@ -59,26 +59,24 @@ class StaticShape { height_, width_, depth_, loss_type_); } - bool DeSerialize(TFile *fp) { + bool DeSerialize(TFile* fp) { int32_t tmp = LT_NONE; - bool result = - fp->FReadEndian(&batch_, sizeof(batch_), 1) == 1 && - fp->FReadEndian(&height_, sizeof(height_), 1) == 1 && - fp->FReadEndian(&width_, sizeof(width_), 1) == 1 && - fp->FReadEndian(&depth_, sizeof(depth_), 1) == 1 && - fp->FReadEndian(&tmp, sizeof(tmp), 1) == 1; + bool result = fp->FReadEndian(&batch_, sizeof(batch_), 1) == 1 && + fp->FReadEndian(&height_, sizeof(height_), 1) == 1 && + fp->FReadEndian(&width_, sizeof(width_), 1) == 1 && + fp->FReadEndian(&depth_, sizeof(depth_), 1) == 1 && + fp->FReadEndian(&tmp, sizeof(tmp), 1) == 1; loss_type_ = static_cast(tmp); return result; } - bool Serialize(TFile *fp) const { + bool Serialize(TFile* fp) const { int32_t tmp = loss_type_; - return - fp->FWrite(&batch_, sizeof(batch_), 1) == 1 && - fp->FWrite(&height_, sizeof(height_), 1) == 1 && - fp->FWrite(&width_, sizeof(width_), 1) == 1 && - fp->FWrite(&depth_, sizeof(depth_), 1) == 1 && - fp->FWrite(&tmp, sizeof(tmp), 1) == 1; + return fp->FWrite(&batch_, sizeof(batch_), 1) == 1 && + fp->FWrite(&height_, sizeof(height_), 1) == 1 && + fp->FWrite(&width_, sizeof(width_), 1) == 1 && + fp->FWrite(&depth_, sizeof(depth_), 1) == 1 && + fp->FWrite(&tmp, sizeof(tmp), 1) == 1; } private: diff --git a/src/lstm/tfnetwork.h b/src/lstm/tfnetwork.h index 1bccd233b0..eba1754656 100644 --- a/src/lstm/tfnetwork.h +++ b/src/lstm/tfnetwork.h @@ -65,8 +65,8 @@ class TFNetwork : public Network { // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. void Forward(bool debug, const NetworkIO& input, - const TransposedArray* input_transpose, - NetworkScratch* scratch, NetworkIO* output) override; + const TransposedArray* input_transpose, NetworkScratch* scratch, + NetworkIO* output) override; private: int InitFromProto(); diff --git a/src/lstm/weightmatrix.cpp b/src/lstm/weightmatrix.cpp index 2eb0dad147..af99898b2d 100644 --- a/src/lstm/weightmatrix.cpp +++ b/src/lstm/weightmatrix.cpp @@ -28,9 +28,7 @@ namespace tesseract { #if (defined(_MSC_VER) && _MSC_VER < 1900) || defined(ANDROID) -static inline double log2(double n) { - return log(n) / log(2.0); -} +static inline double log2(double n) { return log(n) / log(2.0); } #endif // _MSC_VER // Number of iterations after which the correction effectively becomes unity. diff --git a/src/lstm/weightmatrix.h b/src/lstm/weightmatrix.h index c077c7c0cb..455da7f6fb 100644 --- a/src/lstm/weightmatrix.h +++ b/src/lstm/weightmatrix.h @@ -95,9 +95,7 @@ class WeightMatrix { } // Accessors. - bool is_int_mode() const { - return int_mode_; - } + bool is_int_mode() const { return int_mode_; } int NumOutputs() const { return int_mode_ ? wi_.dim1() : wf_.dim1(); } // Provides one set of weights. Only used by peep weight maxpool. const double* GetWeights(int index) const { return wf_[index]; } diff --git a/src/opencl/oclkernels.h b/src/opencl/oclkernels.h index 1788baa9f9..cca7afc9a3 100644 --- a/src/opencl/oclkernels.h +++ b/src/opencl/oclkernels.h @@ -12,7 +12,7 @@ #define TESSERACT_OPENCL_OCLKERNELS_H_ #ifndef USE_EXTERNAL_KERNEL -#define KERNEL( ... )# __VA_ARGS__ "\n" +#define KERNEL(...) #__VA_ARGS__ "\n" // Double precision is a default of spreadsheets // cl_khr_fp64: Khronos extension // cl_amd_fp64: AMD extension diff --git a/src/opencl/opencl_device_selection.h b/src/opencl/opencl_device_selection.h index c44c2bafd1..17df446e8e 100644 --- a/src/opencl/opencl_device_selection.h +++ b/src/opencl/opencl_device_selection.h @@ -17,8 +17,8 @@ #define _CRT_SECURE_NO_WARNINGS #endif -#include #include +#include #include #ifdef __APPLE__ @@ -34,12 +34,12 @@ typedef enum { } ds_device_type; typedef struct { - ds_device_type type; - cl_device_id oclDeviceID; - char* oclDeviceName; - char* oclDriverVersion; + ds_device_type type; + cl_device_id oclDeviceID; + char* oclDeviceName; + char* oclDriverVersion; // a pointer to the score data, the content/format is application defined. - void* score; + void* score; } ds_device; #endif // USE_OPENCL diff --git a/src/opencl/openclwrapper.cpp b/src/opencl/openclwrapper.cpp index aa211e3fad..7f50840806 100644 --- a/src/opencl/openclwrapper.cpp +++ b/src/opencl/openclwrapper.cpp @@ -15,8 +15,8 @@ #endif #include -#include "openclwrapper.h" #include "oclkernels.h" +#include "openclwrapper.h" // for micro-benchmark #include "otsuthr.h" @@ -85,8 +85,8 @@ typedef enum { DS_EVALUATE_ALL, DS_EVALUATE_NEW_ONLY } ds_evaluation_type; typedef struct { unsigned int numDevices; - ds_device *devices; - const char *version; + ds_device* devices; + const char* version; } ds_profile; typedef enum { @@ -107,11 +107,11 @@ typedef enum { // device->score) update the data size of score. The encoding and the format // of the score data is implementation defined. The function should return // DS_SUCCESS if there's no error to be reported. -typedef ds_status (*ds_perf_evaluator)(ds_device *device, void *data); +typedef ds_status (*ds_perf_evaluator)(ds_device* device, void* data); // deallocate memory used by score -typedef ds_status (*ds_score_release)(void *score); -static ds_status releaseDSProfile(ds_profile *profile, ds_score_release sr) { +typedef ds_status (*ds_score_release)(void* score); +static ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) { ds_status status = DS_SUCCESS; if (profile != nullptr) { if (profile->devices != nullptr && sr != nullptr) { @@ -129,18 +129,18 @@ static ds_status releaseDSProfile(ds_profile *profile, ds_score_release sr) { return status; } -static ds_status initDSProfile(ds_profile **p, const char *version) { +static ds_status initDSProfile(ds_profile** p, const char* version) { int numDevices; cl_uint numPlatforms; - cl_platform_id *platforms = nullptr; - cl_device_id *devices = nullptr; + cl_platform_id* platforms = nullptr; + cl_device_id* devices = nullptr; ds_status status = DS_SUCCESS; unsigned int next; unsigned int i; if (p == nullptr) return DS_INVALID_PROFILE; - ds_profile *profile = (ds_profile *)malloc(sizeof(ds_profile)); + ds_profile* profile = (ds_profile*)malloc(sizeof(ds_profile)); if (profile == nullptr) return DS_MEMORY_ERROR; memset(profile, 0, sizeof(ds_profile)); @@ -148,7 +148,7 @@ static ds_status initDSProfile(ds_profile **p, const char *version) { clGetPlatformIDs(0, nullptr, &numPlatforms); if (numPlatforms > 0) { - platforms = (cl_platform_id *)malloc(numPlatforms * sizeof(cl_platform_id)); + platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); if (platforms == nullptr) { status = DS_MEMORY_ERROR; goto cleanup; @@ -164,7 +164,7 @@ static ds_status initDSProfile(ds_profile **p, const char *version) { } if (numDevices > 0) { - devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id)); + devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); if (devices == nullptr) { status = DS_MEMORY_ERROR; goto cleanup; @@ -174,7 +174,7 @@ static ds_status initDSProfile(ds_profile **p, const char *version) { profile->numDevices = numDevices + 1; // +1 to numDevices to include the native CPU profile->devices = - (ds_device *)malloc(profile->numDevices * sizeof(ds_device)); + (ds_device*)malloc(profile->numDevices * sizeof(ds_device)); if (profile->devices == nullptr) { profile->numDevices = 0; status = DS_MEMORY_ERROR; @@ -197,13 +197,13 @@ static ds_status initDSProfile(ds_profile **p, const char *version) { clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME, DS_DEVICE_NAME_LENGTH, &buffer, nullptr); length = strlen(buffer); - profile->devices[next].oclDeviceName = (char *)malloc(length + 1); + profile->devices[next].oclDeviceName = (char*)malloc(length + 1); memcpy(profile->devices[next].oclDeviceName, buffer, length + 1); clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION, DS_DEVICE_NAME_LENGTH, &buffer, nullptr); length = strlen(buffer); - profile->devices[next].oclDriverVersion = (char *)malloc(length + 1); + profile->devices[next].oclDriverVersion = (char*)malloc(length + 1); memcpy(profile->devices[next].oclDriverVersion, buffer, length + 1); } } @@ -224,10 +224,10 @@ static ds_status initDSProfile(ds_profile **p, const char *version) { return status; } -static ds_status profileDevices(ds_profile *profile, +static ds_status profileDevices(ds_profile* profile, const ds_evaluation_type type, ds_perf_evaluator evaluator, - void *evaluatorData, unsigned int *numUpdates) { + void* evaluatorData, unsigned int* numUpdates) { ds_status status = DS_SUCCESS; unsigned int i; unsigned int updates = 0; @@ -263,11 +263,11 @@ static ds_status profileDevices(ds_profile *profile, return status; } -static const char *findString(const char *contentStart, const char *contentEnd, - const char *string) { +static const char* findString(const char* contentStart, const char* contentEnd, + const char* string) { size_t stringLength; - const char *currentPosition; - const char *found = nullptr; + const char* currentPosition; + const char* found = nullptr; stringLength = strlen(string); currentPosition = contentStart; for (currentPosition = contentStart; currentPosition < contentEnd; @@ -284,14 +284,14 @@ static const char *findString(const char *contentStart, const char *contentEnd, return found; } -static ds_status readProFile(const char *fileName, char **content, - size_t *contentSize) { +static ds_status readProFile(const char* fileName, char** content, + size_t* contentSize) { size_t size = 0; *contentSize = 0; *content = nullptr; - FILE *input = fopen(fileName, "rb"); + FILE* input = fopen(fileName, "rb"); if (input == nullptr) { return DS_FILE_ERROR; } @@ -299,7 +299,7 @@ static ds_status readProFile(const char *fileName, char **content, fseek(input, 0L, SEEK_END); size = ftell(input); rewind(input); - char *binary = (char *)malloc(size); + char* binary = (char*)malloc(size); if (binary == nullptr) { fclose(input); return DS_FILE_ERROR; @@ -312,25 +312,25 @@ static ds_status readProFile(const char *fileName, char **content, return DS_SUCCESS; } -typedef ds_status (*ds_score_deserializer)(ds_device *device, - const unsigned char *serializedScore, +typedef ds_status (*ds_score_deserializer)(ds_device* device, + const unsigned char* serializedScore, unsigned int serializedScoreSize); -static ds_status readProfileFromFile(ds_profile *profile, +static ds_status readProfileFromFile(ds_profile* profile, ds_score_deserializer deserializer, - const char *file) { + const char* file) { ds_status status = DS_SUCCESS; - char *contentStart = nullptr; - const char *contentEnd = nullptr; + char* contentStart = nullptr; + const char* contentEnd = nullptr; size_t contentSize; if (profile == nullptr) return DS_INVALID_PROFILE; status = readProFile(file, &contentStart, &contentSize); if (status == DS_SUCCESS) { - const char *currentPosition; - const char *dataStart; - const char *dataEnd; + const char* currentPosition; + const char* dataStart; + const char* dataEnd; contentEnd = contentStart + contentSize; currentPosition = contentStart; @@ -362,18 +362,18 @@ static ds_status readProfileFromFile(ds_profile *profile, while (1) { unsigned int i; - const char *deviceTypeStart; - const char *deviceTypeEnd; + const char* deviceTypeStart; + const char* deviceTypeEnd; ds_device_type deviceType; - const char *deviceNameStart; - const char *deviceNameEnd; + const char* deviceNameStart; + const char* deviceNameEnd; - const char *deviceScoreStart; - const char *deviceScoreEnd; + const char* deviceScoreStart; + const char* deviceScoreEnd; - const char *deviceDriverStart; - const char *deviceDriverEnd; + const char* deviceDriverStart; + const char* deviceDriverEnd; dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE); if (dataStart == nullptr) { @@ -455,7 +455,7 @@ static ds_status readProfileFromFile(ds_profile *profile, deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); status = deserializer(profile->devices + i, - (const unsigned char *)deviceScoreStart, + (const unsigned char*)deviceScoreStart, deviceScoreEnd - deviceScoreStart); if (status != DS_SUCCESS) { goto cleanup; @@ -475,7 +475,7 @@ static ds_status readProfileFromFile(ds_profile *profile, deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); status = deserializer(profile->devices + i, - (const unsigned char *)deviceScoreStart, + (const unsigned char*)deviceScoreStart, deviceScoreEnd - deviceScoreStart); if (status != DS_SUCCESS) { goto cleanup; @@ -493,17 +493,17 @@ static ds_status readProfileFromFile(ds_profile *profile, return status; } -typedef ds_status (*ds_score_serializer)(ds_device *device, - void **serializedScore, - unsigned int *serializedScoreSize); -static ds_status writeProfileToFile(ds_profile *profile, +typedef ds_status (*ds_score_serializer)(ds_device* device, + void** serializedScore, + unsigned int* serializedScoreSize); +static ds_status writeProfileToFile(ds_profile* profile, ds_score_serializer serializer, - const char *file) { + const char* file) { ds_status status = DS_SUCCESS; if (profile == nullptr) return DS_INVALID_PROFILE; - FILE *profileFile = fopen(file, "wb"); + FILE* profileFile = fopen(file, "wb"); if (profileFile == nullptr) { status = DS_FILE_ERROR; } else { @@ -518,7 +518,7 @@ static ds_status writeProfileToFile(ds_profile *profile, fwrite("\n", sizeof(char), 1, profileFile); for (i = 0; i < profile->numDevices && status == DS_SUCCESS; i++) { - void *serializedScore; + void* serializedScore; unsigned int serializedScoreSize; fwrite(DS_TAG_DEVICE, sizeof(char), strlen(DS_TAG_DEVICE), profileFile); @@ -581,9 +581,9 @@ static ds_status writeProfileToFile(ds_profile *profile, } // substitute invalid characters in device name with _ -static void legalizeFileName(char *fileName) { +static void legalizeFileName(char* fileName) { // printf("fileName: %s\n", fileName); - const char *invalidChars = + const char* invalidChars = "/\?:*\"><| "; // space is valid but can cause headaches // for each invalid char for (unsigned i = 0; i < strlen(invalidChars); i++) { @@ -595,7 +595,7 @@ static void legalizeFileName(char *fileName) { // initial ./ is valid for present directory // if (*pos == '.') pos++; // if (*pos == '/') pos++; - for (char *pos = strstr(fileName, invalidStr); pos != nullptr; + for (char* pos = strstr(fileName, invalidStr); pos != nullptr; pos = strstr(pos + 1, invalidStr)) { // printf("\tfound: %s, ", pos); pos[0] = '_'; @@ -604,7 +604,7 @@ static void legalizeFileName(char *fileName) { } } -static void populateGPUEnvFromDevice(GPUEnv *gpuInfo, cl_device_id device) { +static void populateGPUEnvFromDevice(GPUEnv* gpuInfo, cl_device_id device) { // printf("[DS] populateGPUEnvFromDevice\n"); size_t size; gpuInfo->mnIsUserCreated = 1; @@ -635,11 +635,10 @@ static void populateGPUEnvFromDevice(GPUEnv *gpuInfo, cl_device_id device) { CHECK_OPENCL(clStatus, "populateGPUEnv::createCommandQueue"); } -int OpenclDevice::LoadOpencl() -{ +int OpenclDevice::LoadOpencl() { #ifdef WIN32 HINSTANCE HOpenclDll = nullptr; - void *OpenclDll = nullptr; + void* OpenclDll = nullptr; // fprintf(stderr, " LoadOpenclDllxx... \n"); OpenclDll = static_cast(HOpenclDll); OpenclDll = LoadLibrary("openCL.dll"); @@ -647,23 +646,22 @@ int OpenclDevice::LoadOpencl() fprintf(stderr, "[OD] Load opencl.dll failed!\n"); FreeLibrary(static_cast(OpenclDll)); return 0; - } - fprintf(stderr, "[OD] Load opencl.dll successful!\n"); + } + fprintf(stderr, "[OD] Load opencl.dll successful!\n"); #endif - return 1; + return 1; } -int OpenclDevice::SetKernelEnv( KernelEnv *envInfo ) -{ - envInfo->mpkContext = gpuEnv.mpContext; - envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue; - envInfo->mpkProgram = gpuEnv.mpArryPrograms[0]; +int OpenclDevice::SetKernelEnv(KernelEnv* envInfo) { + envInfo->mpkContext = gpuEnv.mpContext; + envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue; + envInfo->mpkProgram = gpuEnv.mpArryPrograms[0]; - return 1; + return 1; } -static cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer, +static cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32* hostbuffer, size_t nElements, cl_mem_flags flags, - cl_int *pStatus) { + cl_int* pStatus) { cl_mem membuffer = clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(flags), nElements * sizeof(l_uint32), hostbuffer, pStatus); @@ -671,8 +669,8 @@ static cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer, return membuffer; } -static Pix *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, Pix *pixd, - Pix *pixs, int elements, cl_mem_flags flags, +static Pix* mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, Pix* pixd, + Pix* pixs, int elements, cl_mem_flags flags, bool memcopy = false, bool sync = true) { PROCNAME("mapOutputCLBuffer"); if (!pixd) { @@ -685,7 +683,7 @@ static Pix *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, Pix *pixd, tprintf("pixd not made\n"); } } - l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer( + l_uint32* pValues = (l_uint32*)clEnqueueMapBuffer( rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0, elements * sizeof(l_uint32), 0, nullptr, nullptr, nullptr); @@ -705,8 +703,7 @@ static Pix *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, Pix *pixd, return pixd; } -void OpenclDevice::releaseMorphCLBuffers() -{ +void OpenclDevice::releaseMorphCLBuffers() { if (pixdCLIntermediate != nullptr) clReleaseMemObject(pixdCLIntermediate); if (pixsCLBuffer != nullptr) clReleaseMemObject(pixsCLBuffer); if (pixdCLBuffer != nullptr) clReleaseMemObject(pixdCLBuffer); @@ -714,7 +711,7 @@ void OpenclDevice::releaseMorphCLBuffers() pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr; } -int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs) { +int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs) { SetKernelEnv(&rEnv); if (pixThBuffer != nullptr) { @@ -727,8 +724,8 @@ int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs) { sizeof(l_uint32) * wpl * h, 0, nullptr, nullptr); } else { // Get data from the source image - l_uint32 *srcdata = - reinterpret_cast(malloc(wpl * h * sizeof(l_uint32))); + l_uint32* srcdata = + reinterpret_cast(malloc(wpl * h * sizeof(l_uint32))); memcpy(srcdata, pixGetData(pixs), wpl * h * sizeof(l_uint32)); pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl * h, @@ -744,516 +741,476 @@ int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs) { return (int)clStatus; } -int OpenclDevice::InitEnv() -{ -//PERF_COUNT_START("OD::InitEnv") +int OpenclDevice::InitEnv() { +// PERF_COUNT_START("OD::InitEnv") // printf("[OD] OpenclDevice::InitEnv()\n"); #ifdef SAL_WIN32 - while( 1 ) - { - if( 1 == LoadOpencl() ) - break; - } -PERF_COUNT_SUB("LoadOpencl") + while (1) { + if (1 == LoadOpencl()) break; + } + PERF_COUNT_SUB("LoadOpencl") #endif - // sets up environment, compiles programs + // sets up environment, compiles programs - InitOpenclRunEnv_DeviceSelection( 0 ); -//PERF_COUNT_SUB("called InitOpenclRunEnv_DS") -//PERF_COUNT_END - return 1; + InitOpenclRunEnv_DeviceSelection(0); + // PERF_COUNT_SUB("called InitOpenclRunEnv_DS") + // PERF_COUNT_END + return 1; } -int OpenclDevice::ReleaseOpenclRunEnv() -{ - ReleaseOpenclEnv( &gpuEnv ); +int OpenclDevice::ReleaseOpenclRunEnv() { + ReleaseOpenclEnv(&gpuEnv); #ifdef SAL_WIN32 - FreeOpenclDll(); + FreeOpenclDll(); #endif - return 1; + return 1; } -inline int OpenclDevice::AddKernelConfig( int kCount, const char *kName ) -{ - if ( kCount < 1 ) - fprintf(stderr,"Error: ( KCount < 1 ) AddKernelConfig\n" ); - strcpy( gpuEnv.mArrykernelNames[kCount-1], kName ); - gpuEnv.mnKernelCount++; - return 0; +inline int OpenclDevice::AddKernelConfig(int kCount, const char* kName) { + if (kCount < 1) fprintf(stderr, "Error: ( KCount < 1 ) AddKernelConfig\n"); + strcpy(gpuEnv.mArrykernelNames[kCount - 1], kName); + gpuEnv.mnKernelCount++; + return 0; } -int OpenclDevice::RegistOpenclKernel() -{ - if ( !gpuEnv.mnIsUserCreated ) - memset( &gpuEnv, 0, sizeof(gpuEnv) ); +int OpenclDevice::RegistOpenclKernel() { + if (!gpuEnv.mnIsUserCreated) memset(&gpuEnv, 0, sizeof(gpuEnv)); - gpuEnv.mnFileCount = 0; //argc; - gpuEnv.mnKernelCount = 0UL; + gpuEnv.mnFileCount = 0; // argc; + gpuEnv.mnKernelCount = 0UL; - AddKernelConfig( 1, (const char*) "oclAverageSub1" ); - return 0; + AddKernelConfig(1, (const char*)"oclAverageSub1"); + return 0; } -int OpenclDevice::InitOpenclRunEnv_DeviceSelection( int argc ) { -//PERF_COUNT_START("InitOpenclRunEnv_DS") - if (!isInited) { - // after programs compiled, selects best device - ds_device bestDevice_DS = getDeviceSelection( ); -//PERF_COUNT_SUB("called getDeviceSelection()") - cl_device_id bestDevice = bestDevice_DS.oclDeviceID; - // overwrite global static GPUEnv with new device - if (selectedDeviceIsOpenCL() ) { - //printf("[DS] InitOpenclRunEnv_DS::Calling populateGPUEnvFromDevice() for selected device\n"); - populateGPUEnvFromDevice( &gpuEnv, bestDevice ); - gpuEnv.mnFileCount = 0; //argc; - gpuEnv.mnKernelCount = 0UL; -//PERF_COUNT_SUB("populate gpuEnv") - CompileKernelFile(&gpuEnv, ""); -//PERF_COUNT_SUB("CompileKernelFile") - } else { - //printf("[DS] InitOpenclRunEnv_DS::Skipping populateGPUEnvFromDevice() b/c native cpu selected\n"); - } - isInited = 1; +int OpenclDevice::InitOpenclRunEnv_DeviceSelection(int argc) { + // PERF_COUNT_START("InitOpenclRunEnv_DS") + if (!isInited) { + // after programs compiled, selects best device + ds_device bestDevice_DS = getDeviceSelection(); + // PERF_COUNT_SUB("called getDeviceSelection()") + cl_device_id bestDevice = bestDevice_DS.oclDeviceID; + // overwrite global static GPUEnv with new device + if (selectedDeviceIsOpenCL()) { + // printf("[DS] InitOpenclRunEnv_DS::Calling populateGPUEnvFromDevice() + // for selected device\n"); + populateGPUEnvFromDevice(&gpuEnv, bestDevice); + gpuEnv.mnFileCount = 0; // argc; + gpuEnv.mnKernelCount = 0UL; + // PERF_COUNT_SUB("populate gpuEnv") + CompileKernelFile(&gpuEnv, ""); + // PERF_COUNT_SUB("CompileKernelFile") + } else { + // printf("[DS] InitOpenclRunEnv_DS::Skipping populateGPUEnvFromDevice() + // b/c native cpu selected\n"); } -//PERF_COUNT_END - return 0; + isInited = 1; + } + // PERF_COUNT_END + return 0; } - -OpenclDevice::OpenclDevice() -{ - //InitEnv(); +OpenclDevice::OpenclDevice() { + // InitEnv(); } -OpenclDevice::~OpenclDevice() -{ - //ReleaseOpenclRunEnv(); +OpenclDevice::~OpenclDevice() { + // ReleaseOpenclRunEnv(); } -int OpenclDevice::ReleaseOpenclEnv( GPUEnv *gpuInfo ) -{ - int i = 0; - int clStatus = 0; +int OpenclDevice::ReleaseOpenclEnv(GPUEnv* gpuInfo) { + int i = 0; + int clStatus = 0; - if ( !isInited ) - { - return 1; - } + if (!isInited) { + return 1; + } - for ( i = 0; i < gpuEnv.mnFileCount; i++ ) - { - if ( gpuEnv.mpArryPrograms[i] ) - { - clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] ); - CHECK_OPENCL( clStatus, "clReleaseProgram" ); - gpuEnv.mpArryPrograms[i] = nullptr; - } - } - if ( gpuEnv.mpCmdQueue ) - { - clReleaseCommandQueue( gpuEnv.mpCmdQueue ); - gpuEnv.mpCmdQueue = nullptr; + for (i = 0; i < gpuEnv.mnFileCount; i++) { + if (gpuEnv.mpArryPrograms[i]) { + clStatus = clReleaseProgram(gpuEnv.mpArryPrograms[i]); + CHECK_OPENCL(clStatus, "clReleaseProgram"); + gpuEnv.mpArryPrograms[i] = nullptr; } - if ( gpuEnv.mpContext ) - { - clReleaseContext( gpuEnv.mpContext ); - gpuEnv.mpContext = nullptr; - } - isInited = 0; - gpuInfo->mnIsUserCreated = 0; - delete[] gpuInfo->mpArryDevsID; - return 1; + } + if (gpuEnv.mpCmdQueue) { + clReleaseCommandQueue(gpuEnv.mpCmdQueue); + gpuEnv.mpCmdQueue = nullptr; + } + if (gpuEnv.mpContext) { + clReleaseContext(gpuEnv.mpContext); + gpuEnv.mpContext = nullptr; + } + isInited = 0; + gpuInfo->mnIsUserCreated = 0; + delete[] gpuInfo->mpArryDevsID; + return 1; } -int OpenclDevice::BinaryGenerated( const char * clFileName, FILE ** fhandle ) -{ - unsigned int i = 0; - cl_int clStatus; - int status = 0; - char *str = nullptr; - FILE *fd = nullptr; - char fileName[256] = {0}, cl_name[128] = {0}; - char deviceName[1024]; - clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, - sizeof(deviceName), deviceName, nullptr); - CHECK_OPENCL(clStatus, "clGetDeviceInfo"); - str = (char *)strstr(clFileName, (char *)".cl"); - memcpy(cl_name, clFileName, str - clFileName); - cl_name[str - clFileName] = '\0'; - sprintf(fileName, "%s-%s.bin", cl_name, deviceName); - legalizeFileName(fileName); - fd = fopen(fileName, "rb"); - status = (fd != nullptr) ? 1 : 0; - if (fd != nullptr) { - *fhandle = fd; - } - return status; - +int OpenclDevice::BinaryGenerated(const char* clFileName, FILE** fhandle) { + unsigned int i = 0; + cl_int clStatus; + int status = 0; + char* str = nullptr; + FILE* fd = nullptr; + char fileName[256] = {0}, cl_name[128] = {0}; + char deviceName[1024]; + clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, + sizeof(deviceName), deviceName, nullptr); + CHECK_OPENCL(clStatus, "clGetDeviceInfo"); + str = (char*)strstr(clFileName, (char*)".cl"); + memcpy(cl_name, clFileName, str - clFileName); + cl_name[str - clFileName] = '\0'; + sprintf(fileName, "%s-%s.bin", cl_name, deviceName); + legalizeFileName(fileName); + fd = fopen(fileName, "rb"); + status = (fd != nullptr) ? 1 : 0; + if (fd != nullptr) { + *fhandle = fd; + } + return status; } -int OpenclDevice::CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName ) -{ - int i; - for ( i = 0; i < gpuEnvCached->mnFileCount; i++ ) - { - if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 ) - { - if (gpuEnvCached->mpArryPrograms[i] != nullptr) { - return 1; - } - } +int OpenclDevice::CachedOfKernerPrg(const GPUEnv* gpuEnvCached, + const char* clFileName) { + int i; + for (i = 0; i < gpuEnvCached->mnFileCount; i++) { + if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) { + if (gpuEnvCached->mpArryPrograms[i] != nullptr) { + return 1; + } } + } - return 0; + return 0; } -int OpenclDevice::WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes ) -{ - FILE *output = nullptr; +int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary, + size_t numBytes) { + FILE* output = nullptr; output = fopen(fileName, "wb"); if (output == nullptr) { return 0; - } + } - fwrite( birary, sizeof(char), numBytes, output ); - fclose( output ); - - return 1; + fwrite(birary, sizeof(char), numBytes, output); + fclose(output); + return 1; } -int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * clFileName ) -{ - unsigned int i = 0; - cl_int clStatus; - size_t *binarySizes; - cl_uint numDevices; - cl_device_id *mpArryDevsID; - char **binaries, *str = nullptr; - - clStatus = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, - sizeof(numDevices), &numDevices, nullptr); - CHECK_OPENCL( clStatus, "clGetProgramInfo" ); - - mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) * numDevices ); - if (mpArryDevsID == nullptr) { - return 0; - } - /* grab the handles to all of the devices in the program. */ - clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES, - sizeof(cl_device_id) * numDevices, mpArryDevsID, - nullptr); - CHECK_OPENCL( clStatus, "clGetProgramInfo" ); +int OpenclDevice::GeneratBinFromKernelSource(cl_program program, + const char* clFileName) { + unsigned int i = 0; + cl_int clStatus; + size_t* binarySizes; + cl_uint numDevices; + cl_device_id* mpArryDevsID; + char **binaries, *str = nullptr; - /* figure out the sizes of each of the binaries. */ - binarySizes = (size_t*) malloc( sizeof(size_t) * numDevices ); + clStatus = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, + sizeof(numDevices), &numDevices, nullptr); + CHECK_OPENCL(clStatus, "clGetProgramInfo"); - clStatus = - clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * numDevices, binarySizes, nullptr); - CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + mpArryDevsID = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices); + if (mpArryDevsID == nullptr) { + return 0; + } + /* grab the handles to all of the devices in the program. */ + clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * numDevices, mpArryDevsID, + nullptr); + CHECK_OPENCL(clStatus, "clGetProgramInfo"); - /* copy over all of the generated binaries. */ - binaries = (char**) malloc( sizeof(char *) * numDevices ); - if (binaries == nullptr) { - return 0; - } + /* figure out the sizes of each of the binaries. */ + binarySizes = (size_t*)malloc(sizeof(size_t) * numDevices); - for ( i = 0; i < numDevices; i++ ) - { - if ( binarySizes[i] != 0 ) - { - binaries[i] = (char*) malloc( sizeof(char) * binarySizes[i] ); - if (binaries[i] == nullptr) { - return 0; - } - } - else - { - binaries[i] = nullptr; - } - } + clStatus = + clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * numDevices, binarySizes, nullptr); + CHECK_OPENCL(clStatus, "clGetProgramInfo"); - clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARIES, - sizeof(char *) * numDevices, binaries, nullptr); - CHECK_OPENCL(clStatus,"clGetProgramInfo"); - - /* dump out each binary into its own separate file. */ - for ( i = 0; i < numDevices; i++ ) - { - char fileName[256] = { 0 }, cl_name[128] = { 0 }; - - if ( binarySizes[i] != 0 ) - { - char deviceName[1024]; - clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME, - sizeof(deviceName), deviceName, nullptr); - CHECK_OPENCL( clStatus, "clGetDeviceInfo" ); - - str = (char*) strstr( clFileName, (char*) ".cl" ); - memcpy( cl_name, clFileName, str - clFileName ); - cl_name[str - clFileName] = '\0'; - sprintf( fileName, "%s-%s.bin", cl_name, deviceName ); - legalizeFileName(fileName); - if ( !WriteBinaryToFile( fileName, binaries[i], binarySizes[i] ) ) - { - printf("[OD] write binary[%s] failed\n", fileName); - return 0; - } //else - printf("[OD] write binary[%s] successfully\n", fileName); - } - } + /* copy over all of the generated binaries. */ + binaries = (char**)malloc(sizeof(char*) * numDevices); + if (binaries == nullptr) { + return 0; + } - // Release all resouces and memory - for ( i = 0; i < numDevices; i++ ) - { - free(binaries[i]); + for (i = 0; i < numDevices; i++) { + if (binarySizes[i] != 0) { + binaries[i] = (char*)malloc(sizeof(char) * binarySizes[i]); + if (binaries[i] == nullptr) { + return 0; + } + } else { binaries[i] = nullptr; } + } - free(binaries); - binaries = nullptr; + clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARIES, + sizeof(char*) * numDevices, binaries, nullptr); + CHECK_OPENCL(clStatus, "clGetProgramInfo"); - free(binarySizes); - binarySizes = nullptr; + /* dump out each binary into its own separate file. */ + for (i = 0; i < numDevices; i++) { + char fileName[256] = {0}, cl_name[128] = {0}; - free(mpArryDevsID); - mpArryDevsID = nullptr; + if (binarySizes[i] != 0) { + char deviceName[1024]; + clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME, + sizeof(deviceName), deviceName, nullptr); + CHECK_OPENCL(clStatus, "clGetDeviceInfo"); + + str = (char*)strstr(clFileName, (char*)".cl"); + memcpy(cl_name, clFileName, str - clFileName); + cl_name[str - clFileName] = '\0'; + sprintf(fileName, "%s-%s.bin", cl_name, deviceName); + legalizeFileName(fileName); + if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) { + printf("[OD] write binary[%s] failed\n", fileName); + return 0; + } // else + printf("[OD] write binary[%s] successfully\n", fileName); + } + } - return 1; -} + // Release all resouces and memory + for (i = 0; i < numDevices; i++) { + free(binaries[i]); + binaries[i] = nullptr; + } -int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ) -{ -//PERF_COUNT_START("CompileKernelFile") - cl_int clStatus = 0; - size_t length; - char *buildLog = nullptr, *binary; - const char *source; - size_t source_size[1]; - int b_error, binary_status, binaryExisted, idx; - cl_uint numDevices; - cl_device_id *mpArryDevsID; - FILE *fd, *fd1; - const char* filename = "kernel.cl"; - //fprintf(stderr, "[OD] CompileKernelFile ... \n"); - if ( CachedOfKernerPrg(gpuInfo, filename) == 1 ) - { - return 1; - } + free(binaries); + binaries = nullptr; - idx = gpuInfo->mnFileCount; + free(binarySizes); + binarySizes = nullptr; - source = kernel_src; + free(mpArryDevsID); + mpArryDevsID = nullptr; - source_size[0] = strlen( source ); - binaryExisted = 0; - binaryExisted = BinaryGenerated( filename, &fd ); // don't check for binary during microbenchmark -//PERF_COUNT_SUB("BinaryGenerated") - if ( binaryExisted == 1 ) - { - clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), &numDevices, nullptr); - CHECK_OPENCL(clStatus, "clGetContextInfo"); + return 1; +} - mpArryDevsID = (cl_device_id *)malloc(sizeof(cl_device_id) * numDevices); - if (mpArryDevsID == nullptr) { - return 0; - } -//PERF_COUNT_SUB("get numDevices") - b_error = 0; - length = 0; - b_error |= fseek( fd, 0, SEEK_END ) < 0; - b_error |= ( length = ftell(fd) ) <= 0; - b_error |= fseek( fd, 0, SEEK_SET ) < 0; - if ( b_error ) - { - return 0; - } +int OpenclDevice::CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption) { + // PERF_COUNT_START("CompileKernelFile") + cl_int clStatus = 0; + size_t length; + char *buildLog = nullptr, *binary; + const char* source; + size_t source_size[1]; + int b_error, binary_status, binaryExisted, idx; + cl_uint numDevices; + cl_device_id* mpArryDevsID; + FILE *fd, *fd1; + const char* filename = "kernel.cl"; + // fprintf(stderr, "[OD] CompileKernelFile ... \n"); + if (CachedOfKernerPrg(gpuInfo, filename) == 1) { + return 1; + } - binary = (char*) malloc( length + 2 ); - if ( !binary ) - { - return 0; - } + idx = gpuInfo->mnFileCount; + + source = kernel_src; - memset( binary, 0, length + 2 ); - b_error |= fread( binary, 1, length, fd ) != length; - - - fclose( fd ); -//PERF_COUNT_SUB("read file") - fd = nullptr; - // grab the handles to all of the devices in the context. - clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - mpArryDevsID, nullptr); - CHECK_OPENCL( clStatus, "clGetContextInfo" ); -//PERF_COUNT_SUB("get devices") - //fprintf(stderr, "[OD] Create kernel from binary\n"); - gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices, - mpArryDevsID, &length, (const unsigned char**) &binary, - &binary_status, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateProgramWithBinary" ); -//PERF_COUNT_SUB("clCreateProgramWithBinary") - free( binary ); - free( mpArryDevsID ); - mpArryDevsID = nullptr; - // PERF_COUNT_SUB("binaryExisted") + source_size[0] = strlen(source); + binaryExisted = 0; + binaryExisted = BinaryGenerated( + filename, &fd); // don't check for binary during microbenchmark + // PERF_COUNT_SUB("BinaryGenerated") + if (binaryExisted == 1) { + clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), &numDevices, nullptr); + CHECK_OPENCL(clStatus, "clGetContextInfo"); + + mpArryDevsID = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices); + if (mpArryDevsID == nullptr) { + return 0; } - else - { - // create a CL program using the kernel source - //fprintf(stderr, "[OD] Create kernel from source\n"); - gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &source, - source_size, &clStatus); - CHECK_OPENCL( clStatus, "clCreateProgramWithSource" ); -//PERF_COUNT_SUB("!binaryExisted") + // PERF_COUNT_SUB("get numDevices") + b_error = 0; + length = 0; + b_error |= fseek(fd, 0, SEEK_END) < 0; + b_error |= (length = ftell(fd)) <= 0; + b_error |= fseek(fd, 0, SEEK_SET) < 0; + if (b_error) { + return 0; } - if (gpuInfo->mpArryPrograms[idx] == (cl_program) nullptr) { + binary = (char*)malloc(length + 2); + if (!binary) { return 0; } - //char options[512]; - // create a cl program executable for all the devices specified - //printf("[OD] BuildProgram.\n"); -PERF_COUNT_START("OD::CompileKernel::clBuildProgram") - if (!gpuInfo->mnIsUserCreated) - { + memset(binary, 0, length + 2); + b_error |= fread(binary, 1, length, fd) != length; + + fclose(fd); + // PERF_COUNT_SUB("read file") + fd = nullptr; + // grab the handles to all of the devices in the context. + clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, mpArryDevsID, + nullptr); + CHECK_OPENCL(clStatus, "clGetContextInfo"); + // PERF_COUNT_SUB("get devices") + // fprintf(stderr, "[OD] Create kernel from binary\n"); + gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( + gpuInfo->mpContext, numDevices, mpArryDevsID, &length, + (const unsigned char**)&binary, &binary_status, &clStatus); + CHECK_OPENCL(clStatus, "clCreateProgramWithBinary"); + // PERF_COUNT_SUB("clCreateProgramWithBinary") + free(binary); + free(mpArryDevsID); + mpArryDevsID = nullptr; + // PERF_COUNT_SUB("binaryExisted") + } else { + // create a CL program using the kernel source + // fprintf(stderr, "[OD] Create kernel from source\n"); + gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( + gpuInfo->mpContext, 1, &source, source_size, &clStatus); + CHECK_OPENCL(clStatus, "clCreateProgramWithSource"); + // PERF_COUNT_SUB("!binaryExisted") + } + + if (gpuInfo->mpArryPrograms[idx] == (cl_program) nullptr) { + return 0; + } + + // char options[512]; + // create a cl program executable for all the devices specified + // printf("[OD] BuildProgram.\n"); + PERF_COUNT_START("OD::CompileKernel::clBuildProgram") + if (!gpuInfo->mnIsUserCreated) { + clStatus = + clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, + buildOption, nullptr, nullptr); + // PERF_COUNT_SUB("clBuildProgram notUserCreated") + } else { + clStatus = + clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), + buildOption, nullptr, nullptr); + // PERF_COUNT_SUB("clBuildProgram isUserCreated") + } + PERF_COUNT_END + if (clStatus != CL_SUCCESS) { + printf("BuildProgram error!\n"); + if (!gpuInfo->mnIsUserCreated) { + clStatus = clGetProgramBuildInfo( + gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], + CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); + } else { clStatus = - clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, - buildOption, nullptr, nullptr); - // PERF_COUNT_SUB("clBuildProgram notUserCreated") + clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, + CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); } - else - { - clStatus = - clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), - buildOption, nullptr, nullptr); - // PERF_COUNT_SUB("clBuildProgram isUserCreated") + if (clStatus != CL_SUCCESS) { + printf("opencl create build log fail\n"); + return 0; } -PERF_COUNT_END - if ( clStatus != CL_SUCCESS ) - { - printf ("BuildProgram error!\n"); - if ( !gpuInfo->mnIsUserCreated ) - { - clStatus = clGetProgramBuildInfo( - gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], - CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); - } - else - { - clStatus = clGetProgramBuildInfo( - gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, - CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); - } - if ( clStatus != CL_SUCCESS ) - { - printf("opencl create build log fail\n"); - return 0; - } - buildLog = (char*) malloc( length ); - if (buildLog == (char *)nullptr) { - return 0; - } - if ( !gpuInfo->mnIsUserCreated ) - { - clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], - CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); - } - else - { - clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, - CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); - } - if ( clStatus != CL_SUCCESS ) - { - printf("opencl program build info fail\n"); - return 0; - } - - fd1 = fopen( "kernel-build.log", "w+" ); - if (fd1 != nullptr) { - fwrite(buildLog, sizeof(char), length, fd1); - fclose(fd1); - } - - free( buildLog ); -//PERF_COUNT_SUB("build error log") - return 0; + buildLog = (char*)malloc(length); + if (buildLog == (char*)nullptr) { + return 0; + } + if (!gpuInfo->mnIsUserCreated) { + clStatus = clGetProgramBuildInfo( + gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], + CL_PROGRAM_BUILD_LOG, length, buildLog, &length); + } else { + clStatus = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], + gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG, + length, buildLog, &length); + } + if (clStatus != CL_SUCCESS) { + printf("opencl program build info fail\n"); + return 0; } - strcpy( gpuInfo->mArryKnelSrcFile[idx], filename ); -//PERF_COUNT_SUB("strcpy") - if ( binaryExisted == 0 ) { - GeneratBinFromKernelSource( gpuInfo->mpArryPrograms[idx], filename ); - PERF_COUNT_SUB("GenerateBinFromKernelSource") + fd1 = fopen("kernel-build.log", "w+"); + if (fd1 != nullptr) { + fwrite(buildLog, sizeof(char), length, fd1); + fclose(fd1); } - gpuInfo->mnFileCount += 1; -//PERF_COUNT_END - return 1; + free(buildLog); + // PERF_COUNT_SUB("build error log") + return 0; + } + + strcpy(gpuInfo->mArryKnelSrcFile[idx], filename); + // PERF_COUNT_SUB("strcpy") + if (binaryExisted == 0) { + GeneratBinFromKernelSource(gpuInfo->mpArryPrograms[idx], filename); + PERF_COUNT_SUB("GenerateBinFromKernelSource") + } + + gpuInfo->mnFileCount += 1; + // PERF_COUNT_END + return 1; } -l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl,l_uint32 *line) -{ -PERF_COUNT_START("pixReadFromTiffKernel") - cl_int clStatus; - KernelEnv rEnv; - size_t globalThreads[2]; - size_t localThreads[2]; - int gsize; - cl_mem valuesCl; - cl_mem outputCl; - - //global and local work dimensions for Horizontal pass - gsize = (w + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - SetKernelEnv( &rEnv ); - - l_uint32 *pResult = (l_uint32 *)malloc(w*h * sizeof(l_uint32)); - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "composeRGBPixel", &clStatus ); - CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel"); - - //Allocate input and output OCL buffers - valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); - outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); - - //Kernel arguments - clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl); - CHECK_OPENCL( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(w), &w); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(h), &h); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl); - CHECK_OPENCL( clStatus, "clSetKernelArg"); - - //Kernel enqueue -PERF_COUNT_SUB("before") -clStatus = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, - globalThreads, localThreads, 0, nullptr, nullptr); -CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel"); - -/* map results back from gpu */ -void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ, - 0, w * h * sizeof(l_uint32), 0, nullptr, nullptr, - &clStatus); -CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl"); -clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, nullptr, nullptr); - -// Sync -clFinish(rEnv.mpkCmdQueue); -PERF_COUNT_SUB("kernel & map") -PERF_COUNT_END - return pResult; +l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w, + l_int32 h, l_int32 wpl, + l_uint32* line) { + PERF_COUNT_START("pixReadFromTiffKernel") + cl_int clStatus; + KernelEnv rEnv; + size_t globalThreads[2]; + size_t localThreads[2]; + int gsize; + cl_mem valuesCl; + cl_mem outputCl; + + // global and local work dimensions for Horizontal pass + gsize = (w + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; + globalThreads[0] = gsize; + gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; + globalThreads[1] = gsize; + localThreads[0] = GROUPSIZE_X; + localThreads[1] = GROUPSIZE_Y; + + SetKernelEnv(&rEnv); + + l_uint32* pResult = (l_uint32*)malloc(w * h * sizeof(l_uint32)); + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "composeRGBPixel", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel"); + + // Allocate input and output OCL buffers + valuesCl = allocateZeroCopyBuffer( + rEnv, tiffdata, w * h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); + outputCl = allocateZeroCopyBuffer( + rEnv, pResult, w * h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); + + // Kernel arguments + clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl); + CHECK_OPENCL(clStatus, "clSetKernelArg"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(w), &w); + CHECK_OPENCL(clStatus, "clSetKernelArg"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(h), &h); + CHECK_OPENCL(clStatus, "clSetKernelArg"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); + CHECK_OPENCL(clStatus, "clSetKernelArg"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl); + CHECK_OPENCL(clStatus, "clSetKernelArg"); + + // Kernel enqueue + PERF_COUNT_SUB("before") + clStatus = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, + globalThreads, localThreads, 0, nullptr, nullptr); + CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel"); + + /* map results back from gpu */ + void* ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, + CL_MAP_READ, 0, w * h * sizeof(l_uint32), 0, + nullptr, nullptr, &clStatus); + CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl"); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, nullptr, nullptr); + + // Sync + clFinish(rEnv.mpkCmdQueue); + PERF_COUNT_SUB("kernel & map") + PERF_COUNT_END + return pResult; } -//Morphology Dilate operation for 5x5 structuring element. Invokes the relevant OpenCL kernels +// Morphology Dilate operation for 5x5 structuring element. Invokes the relevant +// OpenCL kernels static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h) { size_t globalThreads[2]; cl_mem pixtemp; @@ -1309,7 +1266,8 @@ static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h) { return status; } -//Morphology Erode operation for 5x5 structuring element. Invokes the relevant OpenCL kernels +// Morphology Erode operation for 5x5 structuring element. Invokes the relevant +// OpenCL kernels static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h) { size_t globalThreads[2]; cl_mem pixtemp; @@ -1371,11 +1329,11 @@ static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h) { return status; } -//Morphology Dilate operation. Invokes the relevant OpenCL kernels +// Morphology Dilate operation. Invokes the relevant OpenCL kernels static cl_int pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { l_int32 xp, yp, xn, yn; - SEL *sel; + SEL* sel; size_t globalThreads[2]; cl_mem pixtemp; cl_int status; @@ -1467,11 +1425,11 @@ static cl_int pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, return status; } -//Morphology Erode operation. Invokes the relevant OpenCL kernels +// Morphology Erode operation. Invokes the relevant OpenCL kernels static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { l_int32 xp, yp, xn, yn; - SEL *sel; + SEL* sel; size_t globalThreads[2]; size_t localThreads[2]; cl_mem pixtemp; @@ -1575,7 +1533,7 @@ static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, return status; } -//Morphology Open operation. Invokes the relevant OpenCL kernels +// Morphology Open operation. Invokes the relevant OpenCL kernels static cl_int pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { cl_int status; cl_mem pixtemp; @@ -1592,7 +1550,7 @@ static cl_int pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { return status; } -//Morphology Close operation. Invokes the relevant OpenCL kernels +// Morphology Close operation. Invokes the relevant OpenCL kernels static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { cl_int status; cl_mem pixtemp; @@ -1609,7 +1567,7 @@ static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { return status; } -//output = buffer1 & ~(buffer2) +// output = buffer1 & ~(buffer2) static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outBuffer = nullptr) { cl_int status; @@ -1647,9 +1605,10 @@ static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, } // OpenCL implementation of Get Lines from pix function -//Note: Assumes the source and dest opencl buffer are initialized. No check done -void OpenclDevice::pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, - Pix **pix_hline, Pix **pixClosed, +// Note: Assumes the source and dest opencl buffer are initialized. No check +// done +void OpenclDevice::pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline, + Pix** pix_hline, Pix** pixClosed, bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize, @@ -1729,12 +1688,12 @@ void OpenclDevice::pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, * histogramAllChannels is laid out as all channel 0, then all channel 1... * only supports 1 or 4 channels (bytes_per_pixel) ************************************************************************/ -int OpenclDevice::HistogramRectOCL(unsigned char *imageData, +int OpenclDevice::HistogramRectOCL(unsigned char* imageData, int bytes_per_pixel, int bytes_per_line, int left, // always 0 int top, // always 0 int width, int height, int kHistogramSize, - int *histogramAllChannels) { + int* histogramAllChannels) { PERF_COUNT_START("HistogramRectOCL") cl_int clStatus; int retVal = 0; @@ -1785,7 +1744,7 @@ int OpenclDevice::HistogramRectOCL(unsigned char *imageData, CHECK_OPENCL(clStatus, "clCreateBuffer tmpHistogramBuffer"); /* atomic sync buffer */ - int *zeroBuffer = new int[1]; + int* zeroBuffer = new int[1]; zeroBuffer[0] = 0; cl_mem atomicSyncBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, @@ -1804,87 +1763,92 @@ int OpenclDevice::HistogramRectOCL(unsigned char *imageData, CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectOneChannelReduction"); } else { - histKern.mpkKernel = clCreateKernel( histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectAllChannels"); + histKern.mpkKernel = clCreateKernel( + histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectAllChannels"); - histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram, "kernel_HistogramRectAllChannelsReduction", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectAllChannelsReduction"); - } + histRedKern.mpkKernel = + clCreateKernel(histRedKern.mpkProgram, + "kernel_HistogramRectAllChannelsReduction", &clStatus); + CHECK_OPENCL(clStatus, + "clCreateKernel kernel_HistogramRectAllChannelsReduction"); + } - void *ptr; + void* ptr; - //Initialize tmpHistogramBuffer buffer - ptr = clEnqueueMapBuffer( - histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0, - tmpHistogramBins * sizeof(cl_uint), 0, nullptr, nullptr, &clStatus); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer tmpHistogramBuffer"); + // Initialize tmpHistogramBuffer buffer + ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, + CL_MAP_WRITE, 0, tmpHistogramBins * sizeof(cl_uint), + 0, nullptr, nullptr, &clStatus); + CHECK_OPENCL(clStatus, "clEnqueueMapBuffer tmpHistogramBuffer"); - memset(ptr, 0, tmpHistogramBins*sizeof(cl_uint)); - clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, - nullptr, nullptr); + memset(ptr, 0, tmpHistogramBins * sizeof(cl_uint)); + clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, + nullptr, nullptr); - /* set kernel 1 arguments */ - clStatus = - clSetKernelArg(histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); - CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer"); - cl_uint numPixels = width*height; - clStatus = - clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels); - CHECK_OPENCL( clStatus, "clSetKernelArg numPixels" ); - clStatus = clSetKernelArg(histKern.mpkKernel, 2, sizeof(cl_mem), - &tmpHistogramBuffer); - CHECK_OPENCL( clStatus, "clSetKernelArg tmpHistogramBuffer"); - - /* set kernel 2 arguments */ - int n = numThreads/bytes_per_pixel; - clStatus = clSetKernelArg(histRedKern.mpkKernel, 0, sizeof(cl_int), &n); - CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer"); - clStatus = clSetKernelArg(histRedKern.mpkKernel, 1, sizeof(cl_mem), - &tmpHistogramBuffer); - CHECK_OPENCL( clStatus, "clSetKernelArg tmpHistogramBuffer"); - clStatus = clSetKernelArg(histRedKern.mpkKernel, 2, sizeof(cl_mem), - &histogramBuffer); - CHECK_OPENCL( clStatus, "clSetKernelArg histogramBuffer"); - - /* launch histogram */ -PERF_COUNT_SUB("before") -clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1, - nullptr, global_work_size, local_work_size, 0, - nullptr, nullptr); -CHECK_OPENCL(clStatus, - "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels"); -clFinish(histKern.mpkCmdQueue); -if (clStatus != 0) { - retVal = -1; - } - /* launch histogram */ - clStatus = clEnqueueNDRangeKernel( - histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, nullptr, - red_global_work_size, local_work_size, 0, nullptr, nullptr); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" ); - clFinish( histRedKern.mpkCmdQueue ); - if (clStatus != 0) { - retVal = -1; - } - PERF_COUNT_SUB("redKernel") - - /* map results back from gpu */ - ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, - CL_MAP_READ, 0, - kHistogramSize * bytes_per_pixel * sizeof(int), 0, - nullptr, nullptr, &clStatus); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer"); - if (clStatus != 0) { - retVal = -1; - } - clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, - nullptr, nullptr); - - clReleaseMemObject(histogramBuffer); - clReleaseMemObject(imageBuffer); -PERF_COUNT_SUB("after") -PERF_COUNT_END -return retVal; + /* set kernel 1 arguments */ + clStatus = + clSetKernelArg(histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); + cl_uint numPixels = width * height; + clStatus = clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels); + CHECK_OPENCL(clStatus, "clSetKernelArg numPixels"); + clStatus = clSetKernelArg(histKern.mpkKernel, 2, sizeof(cl_mem), + &tmpHistogramBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg tmpHistogramBuffer"); + + /* set kernel 2 arguments */ + int n = numThreads / bytes_per_pixel; + clStatus = clSetKernelArg(histRedKern.mpkKernel, 0, sizeof(cl_int), &n); + CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); + clStatus = clSetKernelArg(histRedKern.mpkKernel, 1, sizeof(cl_mem), + &tmpHistogramBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg tmpHistogramBuffer"); + clStatus = clSetKernelArg(histRedKern.mpkKernel, 2, sizeof(cl_mem), + &histogramBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg histogramBuffer"); + + /* launch histogram */ + PERF_COUNT_SUB("before") + clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1, + nullptr, global_work_size, local_work_size, + 0, nullptr, nullptr); + CHECK_OPENCL(clStatus, + "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels"); + clFinish(histKern.mpkCmdQueue); + if (clStatus != 0) { + retVal = -1; + } + /* launch histogram */ + clStatus = clEnqueueNDRangeKernel( + histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, nullptr, + red_global_work_size, local_work_size, 0, nullptr, nullptr); + CHECK_OPENCL( + clStatus, + "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction"); + clFinish(histRedKern.mpkCmdQueue); + if (clStatus != 0) { + retVal = -1; + } + PERF_COUNT_SUB("redKernel") + + /* map results back from gpu */ + ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, + CL_MAP_READ, 0, + kHistogramSize * bytes_per_pixel * sizeof(int), 0, + nullptr, nullptr, &clStatus); + CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); + if (clStatus != 0) { + retVal = -1; + } + clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, + nullptr, nullptr); + + clReleaseMemObject(histogramBuffer); + clReleaseMemObject(imageBuffer); + PERF_COUNT_SUB("after") + PERF_COUNT_END + return retVal; } /************************************************************************* @@ -1892,16 +1856,16 @@ return retVal; * from the class, using thresholds/hi_values to the output IMAGE. * only supports 1 or 4 channels ************************************************************************/ -int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, +int OpenclDevice::ThresholdRectToPixOCL(unsigned char* imageData, int bytes_per_pixel, int bytes_per_line, - int *thresholds, int *hi_values, - Pix **pix, int height, int width, + int* thresholds, int* hi_values, + Pix** pix, int height, int width, int top, int left) { PERF_COUNT_START("ThresholdRectToPixOCL") int retVal = 0; /* create pix result buffer */ *pix = pixCreate(width, height, 1); - uint32_t *pixData = pixGetData(*pix); + uint32_t* pixData = pixGetData(*pix); int wpl = pixGetWpl(*pix); int pixSize = wpl * height * sizeof(uint32_t); // number of pixels @@ -1989,7 +1953,7 @@ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, retVal = -1; } /* map results back from gpu */ - void *ptr = + void* ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, pixSize, 0, nullptr, nullptr, &clStatus); CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); @@ -2005,22 +1969,20 @@ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, return retVal; } - - /****************************************************************************** * Data Types for Device Selection *****************************************************************************/ typedef struct _TessScoreEvaluationInputData { - int height; - int width; - int numChannels; - unsigned char *imageData; - Pix *pix; + int height; + int width; + int numChannels; + unsigned char* imageData; + Pix* pix; } TessScoreEvaluationInputData; static void populateTessScoreEvaluationInputData( - TessScoreEvaluationInputData *input) { + TessScoreEvaluationInputData* input) { srand(1); // 8.5x11 inches @ 300dpi rounded to clean multiples int height = 3328; // %256 @@ -2032,7 +1994,7 @@ static void populateTessScoreEvaluationInputData( unsigned char(*imageData4)[4] = (unsigned char(*)[4])malloc( height * width * numChannels * sizeof(unsigned char)); // new unsigned char[4][height*width]; - input->imageData = (unsigned char *)&imageData4[0]; + input->imageData = (unsigned char*)&imageData4[0]; // zero out image unsigned char pixelWhite[4] = {0, 0, 0, 255}; @@ -2107,197 +2069,206 @@ static void populateTessScoreEvaluationInputData( } typedef struct _TessDeviceScore { - float time; // small time means faster device - bool clError; // were there any opencl errors - bool valid; // was the correct response generated + float time; // small time means faster device + bool clError; // were there any opencl errors + bool valid; // was the correct response generated } TessDeviceScore; /****************************************************************************** * Micro Benchmarks for Device Selection *****************************************************************************/ -static double composeRGBPixelMicroBench(GPUEnv *env, +static double composeRGBPixelMicroBench(GPUEnv* env, TessScoreEvaluationInputData input, ds_device_type type) { double time = 0; #if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); + LARGE_INTEGER freq, time_funct_start, time_funct_end; + QueryPerformanceFrequency(&freq); #elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; + mach_timebase_info_data_t info = {0, 0}; + mach_timebase_info(&info); + long long start, stop; #else - timespec time_funct_start, time_funct_end; + timespec time_funct_start, time_funct_end; #endif - // input data - l_uint32 *tiffdata = (l_uint32 *)input.imageData;// same size and random data; data doesn't change workload + // input data + l_uint32* tiffdata = + (l_uint32*)input.imageData; // same size and random data; data doesn't + // change workload - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { + // function call + if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); + QueryPerformanceCounter(&time_funct_start); #elif ON_APPLE - start = mach_absolute_time(); + start = mach_absolute_time(); #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); #endif - OpenclDevice::gpuEnv = *env; - int wpl = pixGetWpl(input.pix); - OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, - wpl, nullptr); + OpenclDevice::gpuEnv = *env; + int wpl = pixGetWpl(input.pix); + OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, + wpl, nullptr); #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); + QueryPerformanceCounter(&time_funct_end); + time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / + (double)(freq.QuadPart); #elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); + time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; #endif - } else { + } else { #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); + QueryPerformanceCounter(&time_funct_start); #elif ON_APPLE - start = mach_absolute_time(); + start = mach_absolute_time(); #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); #endif - Pix *pix = pixCreate(input.width, input.height, 32); - l_uint32 *pixData = pixGetData(pix); - int i, j; - int idx = 0; - for (i = 0; i < input.height ; i++) { - for (j = 0; j < input.width; j++) { - l_uint32 tiffword = tiffdata[i * input.width + j]; - l_int32 rval = ((tiffword) & 0xff); - l_int32 gval = (((tiffword) >> 8) & 0xff); - l_int32 bval = (((tiffword) >> 16) & 0xff); - l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8); - pixData[idx] = value; - idx++; - } - } + Pix* pix = pixCreate(input.width, input.height, 32); + l_uint32* pixData = pixGetData(pix); + int i, j; + int idx = 0; + for (i = 0; i < input.height; i++) { + for (j = 0; j < input.width; j++) { + l_uint32 tiffword = tiffdata[i * input.width + j]; + l_int32 rval = ((tiffword)&0xff); + l_int32 gval = (((tiffword) >> 8) & 0xff); + l_int32 bval = (((tiffword) >> 16) & 0xff); + l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8); + pixData[idx] = value; + idx++; + } + } #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); + QueryPerformanceCounter(&time_funct_end); + time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / + (double)(freq.QuadPart); #elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); + time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; #endif - pixDestroy(&pix); - } - + pixDestroy(&pix); + } - // cleanup + // cleanup - return time; + return time; } -static double histogramRectMicroBench(GPUEnv *env, +static double histogramRectMicroBench(GPUEnv* env, TessScoreEvaluationInputData input, ds_device_type type) { double time; #if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); + LARGE_INTEGER freq, time_funct_start, time_funct_end; + QueryPerformanceFrequency(&freq); #elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; + mach_timebase_info_data_t info = {0, 0}; + mach_timebase_info(&info); + long long start, stop; #else - timespec time_funct_start, time_funct_end; + timespec time_funct_start, time_funct_end; #endif - int left = 0; - int top = 0; - int kHistogramSize = 256; - int bytes_per_line = input.width*input.numChannels; - int *histogramAllChannels = new int[kHistogramSize*input.numChannels]; - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { + int left = 0; + int top = 0; + int kHistogramSize = 256; + int bytes_per_line = input.width * input.numChannels; + int* histogramAllChannels = new int[kHistogramSize * input.numChannels]; + // function call + if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); + QueryPerformanceCounter(&time_funct_start); #elif ON_APPLE - start = mach_absolute_time(); + start = mach_absolute_time(); #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); #endif - OpenclDevice::gpuEnv = *env; - int retVal = OpenclDevice::HistogramRectOCL( - input.imageData, input.numChannels, bytes_per_line, top, left, - input.width, input.height, kHistogramSize, histogramAllChannels); + OpenclDevice::gpuEnv = *env; + int retVal = OpenclDevice::HistogramRectOCL( + input.imageData, input.numChannels, bytes_per_line, top, left, + input.width, input.height, kHistogramSize, histogramAllChannels); #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); + QueryPerformanceCounter(&time_funct_end); + time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / + (double)(freq.QuadPart); #elif ON_APPLE - stop = mach_absolute_time(); - if (retVal == 0) { - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; - } else { - time = FLT_MAX; - } + stop = mach_absolute_time(); + if (retVal == 0) { + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + } else { + time = FLT_MAX; + } #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); + time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; #endif - } else { - int *histogram = new int[kHistogramSize]; + } else { + int* histogram = new int[kHistogramSize]; #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); + QueryPerformanceCounter(&time_funct_start); #elif ON_APPLE - start = mach_absolute_time(); + start = mach_absolute_time(); #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); #endif - for (int ch = 0; ch < input.numChannels; ++ch) { - tesseract::HistogramRect(input.pix, input.numChannels, left, top, - input.width, input.height, histogram); - } + for (int ch = 0; ch < input.numChannels; ++ch) { + tesseract::HistogramRect(input.pix, input.numChannels, left, top, + input.width, input.height, histogram); + } #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); + QueryPerformanceCounter(&time_funct_end); + time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / + (double)(freq.QuadPart); #elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); + time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; #endif - delete[] histogram; - } + delete[] histogram; + } - // cleanup - delete[] histogramAllChannels; - return time; + // cleanup + delete[] histogramAllChannels; + return time; } -//Reproducing the ThresholdRectToPix native version -static void ThresholdRectToPix_Native(const unsigned char *imagedata, +// Reproducing the ThresholdRectToPix native version +static void ThresholdRectToPix_Native(const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, - const int *thresholds, - const int *hi_values, Pix **pix) { + const int* thresholds, + const int* hi_values, Pix** pix) { int top = 0; int left = 0; int width = pixGetWidth(*pix); int height = pixGetHeight(*pix); *pix = pixCreate(width, height, 1); - uint32_t *pixdata = pixGetData(*pix); + uint32_t* pixdata = pixGetData(*pix); int wpl = pixGetWpl(*pix); - const unsigned char* srcdata = imagedata + top * bytes_per_line + - left * bytes_per_pixel; + const unsigned char* srcdata = + imagedata + top * bytes_per_line + left * bytes_per_pixel; for (int y = 0; y < height; ++y) { - const uint8_t *linedata = srcdata; - uint32_t *pixline = pixdata + y * wpl; + const uint8_t* linedata = srcdata; + uint32_t* pixline = pixdata + y * wpl; for (int x = 0; x < width; ++x, linedata += bytes_per_pixel) { bool white_result = true; for (int ch = 0; ch < bytes_per_pixel; ++ch) { @@ -2316,193 +2287,198 @@ static void ThresholdRectToPix_Native(const unsigned char *imagedata, } } -static double thresholdRectToPixMicroBench(GPUEnv *env, +static double thresholdRectToPixMicroBench(GPUEnv* env, TessScoreEvaluationInputData input, ds_device_type type) { double time; #if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); + LARGE_INTEGER freq, time_funct_start, time_funct_end; + QueryPerformanceFrequency(&freq); #elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; + mach_timebase_info_data_t info = {0, 0}; + mach_timebase_info(&info); + long long start, stop; #else - timespec time_funct_start, time_funct_end; + timespec time_funct_start, time_funct_end; #endif - // input data - unsigned char pixelHi = (unsigned char)255; - int* thresholds = new int[4]; - thresholds[0] = pixelHi/2; - thresholds[1] = pixelHi/2; - thresholds[2] = pixelHi/2; - thresholds[3] = pixelHi/2; - int *hi_values = new int[4]; - thresholds[0] = pixelHi; - thresholds[1] = pixelHi; - thresholds[2] = pixelHi; - thresholds[3] = pixelHi; - //Pix* pix = pixCreate(width, height, 1); - int top = 0; - int left = 0; - int bytes_per_line = input.width*input.numChannels; - - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { + // input data + unsigned char pixelHi = (unsigned char)255; + int* thresholds = new int[4]; + thresholds[0] = pixelHi / 2; + thresholds[1] = pixelHi / 2; + thresholds[2] = pixelHi / 2; + thresholds[3] = pixelHi / 2; + int* hi_values = new int[4]; + thresholds[0] = pixelHi; + thresholds[1] = pixelHi; + thresholds[2] = pixelHi; + thresholds[3] = pixelHi; + // Pix* pix = pixCreate(width, height, 1); + int top = 0; + int left = 0; + int bytes_per_line = input.width * input.numChannels; + + // function call + if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); + QueryPerformanceCounter(&time_funct_start); #elif ON_APPLE - start = mach_absolute_time(); + start = mach_absolute_time(); #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); #endif - OpenclDevice::gpuEnv = *env; - int retVal = OpenclDevice::ThresholdRectToPixOCL( - input.imageData, input.numChannels, bytes_per_line, thresholds, - hi_values, &input.pix, input.height, input.width, top, left); + OpenclDevice::gpuEnv = *env; + int retVal = OpenclDevice::ThresholdRectToPixOCL( + input.imageData, input.numChannels, bytes_per_line, thresholds, + hi_values, &input.pix, input.height, input.width, top, left); #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); + QueryPerformanceCounter(&time_funct_end); + time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / + (double)(freq.QuadPart); #elif ON_APPLE - stop = mach_absolute_time(); - if (retVal == 0) { - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; - ; - } else { - time = FLT_MAX; - } + stop = mach_absolute_time(); + if (retVal == 0) { + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + ; + } else { + time = FLT_MAX; + } #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); + time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; #endif - } else { - - - tesseract::ImageThresholder thresholder; - thresholder.SetImage( input.pix ); + } else { + tesseract::ImageThresholder thresholder; + thresholder.SetImage(input.pix); #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); + QueryPerformanceCounter(&time_funct_start); #elif ON_APPLE - start = mach_absolute_time(); + start = mach_absolute_time(); #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); #endif - ThresholdRectToPix_Native( input.imageData, input.numChannels, bytes_per_line, - thresholds, hi_values, &input.pix ); + ThresholdRectToPix_Native(input.imageData, input.numChannels, + bytes_per_line, thresholds, hi_values, + &input.pix); #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); + QueryPerformanceCounter(&time_funct_end); + time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / + (double)(freq.QuadPart); #elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); + time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; #endif - } + } - // cleanup - delete[] thresholds; - delete[] hi_values; - return time; + // cleanup + delete[] thresholds; + delete[] hi_values; + return time; } -static double getLineMasksMorphMicroBench(GPUEnv *env, +static double getLineMasksMorphMicroBench(GPUEnv* env, TessScoreEvaluationInputData input, ds_device_type type) { double time = 0; #if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); + LARGE_INTEGER freq, time_funct_start, time_funct_end; + QueryPerformanceFrequency(&freq); #elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; + mach_timebase_info_data_t info = {0, 0}; + mach_timebase_info(&info); + long long start, stop; #else - timespec time_funct_start, time_funct_end; + timespec time_funct_start, time_funct_end; #endif - // input data - int resolution = 300; - int wpl = pixGetWpl(input.pix); - int kThinLineFraction = 20; // tess constant - int kMinLineLengthFraction = 4; // tess constant - int max_line_width = resolution / kThinLineFraction; - int min_line_length = resolution / kMinLineLengthFraction; - int closing_brick = max_line_width / 3; - - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { + // input data + int resolution = 300; + int wpl = pixGetWpl(input.pix); + int kThinLineFraction = 20; // tess constant + int kMinLineLengthFraction = 4; // tess constant + int max_line_width = resolution / kThinLineFraction; + int min_line_length = resolution / kMinLineLengthFraction; + int closing_brick = max_line_width / 3; + + // function call + if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); + QueryPerformanceCounter(&time_funct_start); #elif ON_APPLE - start = mach_absolute_time(); + start = mach_absolute_time(); #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); #endif - OpenclDevice::gpuEnv = *env; - OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix); - Pix *pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr; - OpenclDevice::pixGetLinesCL( - nullptr, input.pix, &pix_vline, &pix_hline, &pix_closed, true, - closing_brick, closing_brick, max_line_width, max_line_width, - min_line_length, min_line_length); + OpenclDevice::gpuEnv = *env; + OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix); + Pix *pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr; + OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline, + &pix_closed, true, closing_brick, closing_brick, + max_line_width, max_line_width, min_line_length, + min_line_length); - OpenclDevice::releaseMorphCLBuffers(); + OpenclDevice::releaseMorphCLBuffers(); #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); + QueryPerformanceCounter(&time_funct_end); + time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / + (double)(freq.QuadPart); #elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); + time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; #endif - } else { + } else { #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); + QueryPerformanceCounter(&time_funct_start); #elif ON_APPLE - start = mach_absolute_time(); + start = mach_absolute_time(); #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); #endif - // native serial code - Pix *src_pix = input.pix; - Pix *pix_closed = - pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); - Pix *pix_solid = - pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); - Pix *pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); - pixDestroy(&pix_solid); - Pix *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - Pix *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - pixDestroy(&pix_hollow); + // native serial code + Pix* src_pix = input.pix; + Pix* pix_closed = + pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); + Pix* pix_solid = + pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); + Pix* pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); + pixDestroy(&pix_solid); + Pix* pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); + Pix* pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); + pixDestroy(&pix_hollow); #if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); + QueryPerformanceCounter(&time_funct_end); + time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / + (double)(freq.QuadPart); #elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); + time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; #endif - } + } - return time; + return time; } - - /****************************************************************************** * Device Selection *****************************************************************************/ @@ -2510,8 +2486,8 @@ static double getLineMasksMorphMicroBench(GPUEnv *env, #include "stdlib.h" // encode score object as byte string -static ds_status serializeScore(ds_device *device, void **serializedScore, - unsigned int *serializedScoreSize) { +static ds_status serializeScore(ds_device* device, void** serializedScore, + unsigned int* serializedScoreSize) { *serializedScoreSize = sizeof(TessDeviceScore); *serializedScore = new unsigned char[*serializedScoreSize]; memcpy(*serializedScore, device->score, *serializedScoreSize); @@ -2519,8 +2495,8 @@ static ds_status serializeScore(ds_device *device, void **serializedScore, } // parses byte string and stores in score object -static ds_status deserializeScore(ds_device *device, - const unsigned char *serializedScore, +static ds_status deserializeScore(ds_device* device, + const unsigned char* serializedScore, unsigned int serializedScoreSize) { // check that serializedScoreSize == sizeof(TessDeviceScore); device->score = new TessDeviceScore; @@ -2528,18 +2504,18 @@ static ds_status deserializeScore(ds_device *device, return DS_SUCCESS; } -static ds_status releaseScore(void *score) { - delete (TessDeviceScore *)score; +static ds_status releaseScore(void* score) { + delete (TessDeviceScore*)score; return DS_SUCCESS; } // evaluate devices -static ds_status evaluateScoreForDevice(ds_device *device, void *inputData) { +static ds_status evaluateScoreForDevice(ds_device* device, void* inputData) { // overwrite statuc gpuEnv w/ current device // so native opencl calls can be used; they use static gpuEnv printf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - GPUEnv *env = nullptr; + GPUEnv* env = nullptr; if (device->type == DS_DEVICE_OPENCL_DEVICE) { env = new GPUEnv; // printf("[DS] populating tmp GPUEnv from device\n"); @@ -2551,8 +2527,8 @@ static ds_status evaluateScoreForDevice(ds_device *device, void *inputData) { OpenclDevice::CompileKernelFile(env, ""); } - TessScoreEvaluationInputData *input = - static_cast(inputData); + TessScoreEvaluationInputData* input = + static_cast(inputData); // pixReadTiff double composeRGBPixelTime = @@ -2581,7 +2557,7 @@ static ds_status evaluateScoreForDevice(ds_device *device, void *inputData) { thresholdRectToPixWeight * thresholdRectToPixTime + getLineMasksMorphWeight * getLineMasksMorphTime; device->score = new TessDeviceScore; - ((TessDeviceScore *)device->score)->time = weightedTime; + ((TessDeviceScore*)device->score)->time = weightedTime; printf("[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); @@ -2594,12 +2570,12 @@ static ds_status evaluateScoreForDevice(ds_device *device, void *inputData) { printf("[DS]%25s: %f (w=%.1f)\n", "getLineMasksMorph", getLineMasksMorphTime, getLineMasksMorphWeight); printf("[DS]%25s: %f\n", "Score", - static_cast(device->score)->time); + static_cast(device->score)->time); return DS_SUCCESS; } // initial call to select device -ds_device OpenclDevice::getDeviceSelection( ) { +ds_device OpenclDevice::getDeviceSelection() { if (!deviceIsSelected) { PERF_COUNT_START("getDeviceSelection") // check if opencl is available at runtime @@ -2608,11 +2584,11 @@ ds_device OpenclDevice::getDeviceSelection( ) { // PERF_COUNT_SUB("LoadOpencl") // setup devices ds_status status; - ds_profile *profile; + ds_profile* profile; status = initDSProfile(&profile, "v0.1"); PERF_COUNT_SUB("initDSProfile") // try reading scores from file - const char *fileName = "tesseract_opencl_profile_devices.dat"; + const char* fileName = "tesseract_opencl_profile_devices.dat"; status = readProfileFromFile(profile, deserializeScore, fileName); if (status != DS_SUCCESS) { // need to run evaluation @@ -2656,7 +2632,7 @@ ds_device OpenclDevice::getDeviceSelection( ) { int bestDeviceIdx = -1; for (unsigned d = 0; d < profile->numDevices; d++) { ds_device device = profile->devices[d]; - TessDeviceScore score = *(TessDeviceScore *)device.score; + TessDeviceScore score = *(TessDeviceScore*)device.score; float time = score.time; printf("[DS] Device[%u] %i:%s score is %f\n", d + 1, device.type, @@ -2675,7 +2651,7 @@ ds_device OpenclDevice::getDeviceSelection( ) { // TODO: call destructor for profile object? bool overridden = false; - char *overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE"); + char* overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE"); if (overrideDeviceStr != nullptr) { int overrideDeviceIdx = atoi(overrideDeviceStr); if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) { @@ -2720,7 +2696,6 @@ ds_device OpenclDevice::getDeviceSelection( ) { return selectedDevice; } - bool OpenclDevice::selectedDeviceIsOpenCL() { ds_device device = getDeviceSelection(); return (device.type == DS_DEVICE_OPENCL_DEVICE); diff --git a/src/opencl/openclwrapper.h b/src/opencl/openclwrapper.h index 6d03008e55..0af1ab5836 100644 --- a/src/opencl/openclwrapper.h +++ b/src/opencl/openclwrapper.h @@ -22,29 +22,29 @@ #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \ defined(__CYGWIN__) || defined(__MINGW32__) #define ON_WINDOWS 1 -#define ON_LINUX 0 -#define ON_APPLE 0 -#define ON_OTHER 0 +#define ON_LINUX 0 +#define ON_APPLE 0 +#define ON_OTHER 0 #define IF_WINDOWS(X) X #define IF_LINUX(X) #define IF_APPLE(X) #define IF_OTHER(X) #define NOT_WINDOWS(X) -#elif defined( __linux__ ) +#elif defined(__linux__) #define ON_WINDOWS 0 -#define ON_LINUX 1 -#define ON_APPLE 0 -#define ON_OTHER 0 +#define ON_LINUX 1 +#define ON_APPLE 0 +#define ON_OTHER 0 #define IF_WINDOWS(X) #define IF_LINUX(X) X #define IF_APPLE(X) #define IF_OTHER(X) #define NOT_WINDOWS(X) X -#elif defined( __APPLE__ ) +#elif defined(__APPLE__) #define ON_WINDOWS 0 -#define ON_LINUX 0 -#define ON_APPLE 1 -#define ON_OTHER 0 +#define ON_LINUX 0 +#define ON_APPLE 1 +#define ON_OTHER 0 #define IF_WINDOWS(X) #define IF_LINUX(X) #define IF_APPLE(X) X @@ -52,9 +52,9 @@ #define NOT_WINDOWS(X) X #else #define ON_WINDOWS 0 -#define ON_LINUX 0 -#define ON_APPLE 0 -#define ON_OTHER 1 +#define ON_LINUX 0 +#define ON_APPLE 0 +#define ON_OTHER 1 #define IF_WINDOWS(X) #define IF_LINUX(X) #define IF_APPLE(X) @@ -72,23 +72,24 @@ * 0 - no reporting * 1 - no reporting * 2 - report total function call time for functions we're tracking - * 3 - optionally report breakdown of function calls (kernel launch, kernel time, data copies) + * 3 - optionally report breakdown of function calls (kernel launch, kernel + *time, data copies) ************************************************************************************/ #define PERF_COUNT_VERBOSE 1 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n" - #if ON_WINDOWS #if PERF_COUNT_VERBOSE >= 2 -#define PERF_COUNT_START(FUNCT_NAME) \ - char *funct_name = FUNCT_NAME; \ - double elapsed_time_sec; \ - LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ - QueryPerformanceFrequency(&freq); \ - QueryPerformanceCounter(&time_funct_start); \ - time_sub_start = time_funct_start; \ - time_sub_end = time_funct_start; +#define PERF_COUNT_START(FUNCT_NAME) \ + char* funct_name = FUNCT_NAME; \ + double elapsed_time_sec; \ + LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, \ + time_sub_end; \ + QueryPerformanceFrequency(&freq); \ + QueryPerformanceCounter(&time_funct_start); \ + time_sub_start = time_funct_start; \ + time_sub_end = time_funct_start; #define PERF_COUNT_END \ QueryPerformanceCounter(&time_funct_end); \ @@ -111,18 +112,17 @@ #define PERF_COUNT_SUB(SUB) #endif - // not on windows #else #if PERF_COUNT_VERBOSE >= 2 -#define PERF_COUNT_START(FUNCT_NAME) \ - char *funct_name = FUNCT_NAME; \ - double elapsed_time_sec; \ - timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ - clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \ - time_sub_start = time_funct_start; \ - time_sub_end = time_funct_start; +#define PERF_COUNT_START(FUNCT_NAME) \ + char* funct_name = FUNCT_NAME; \ + double elapsed_time_sec; \ + timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ + clock_gettime(CLOCK_MONOTONIC, &time_funct_start); \ + time_sub_start = time_funct_start; \ + time_sub_end = time_funct_start; #define PERF_COUNT_END \ clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \ @@ -169,124 +169,127 @@ #define GROUPSIZE_HMORX 256 #define GROUPSIZE_HMORY 1 -typedef struct _KernelEnv -{ - cl_context mpkContext; - cl_command_queue mpkCmdQueue; - cl_program mpkProgram; - cl_kernel mpkKernel; - char mckKernelName[150]; +typedef struct _KernelEnv { + cl_context mpkContext; + cl_command_queue mpkCmdQueue; + cl_program mpkProgram; + cl_kernel mpkKernel; + char mckKernelName[150]; } KernelEnv; -typedef struct _OpenCLEnv -{ - cl_platform_id mpOclPlatformID; - cl_context mpOclContext; - cl_device_id mpOclDevsID; - cl_command_queue mpOclCmdQueue; +typedef struct _OpenCLEnv { + cl_platform_id mpOclPlatformID; + cl_context mpOclContext; + cl_device_id mpOclDevsID; + cl_command_queue mpOclCmdQueue; } OpenCLEnv; -typedef int ( *cl_kernel_function )( void **userdata, KernelEnv *kenv ); - -#define CHECK_OPENCL(status,name) \ -if( status != CL_SUCCESS ) \ -{ \ - printf ("OpenCL error code is %d at when %s .\n", status, name); \ -} - - -typedef struct _GPUEnv -{ - //share vb in all modules in hb library - cl_platform_id mpPlatformID; - cl_device_type mDevType; - cl_context mpContext; - cl_device_id *mpArryDevsID; - cl_device_id mpDevID; - cl_command_queue mpCmdQueue; - cl_kernel mpArryKernels[MAX_CLFILE_NUM]; - cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file - char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256 - mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1]; - cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM]; - int mnKernelCount, mnFileCount, // only one kernel file - mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper - int mnKhrFp64Flag; - int mnAmdFp64Flag; +typedef int (*cl_kernel_function)(void** userdata, KernelEnv* kenv); + +#define CHECK_OPENCL(status, name) \ + if (status != CL_SUCCESS) { \ + printf("OpenCL error code is %d at when %s .\n", status, name); \ + } + +typedef struct _GPUEnv { + // share vb in all modules in hb library + cl_platform_id mpPlatformID; + cl_device_type mDevType; + cl_context mpContext; + cl_device_id* mpArryDevsID; + cl_device_id mpDevID; + cl_command_queue mpCmdQueue; + cl_kernel mpArryKernels[MAX_CLFILE_NUM]; + cl_program mpArryPrograms[MAX_CLFILE_NUM]; // one program object maps one + // kernel source file + char mArryKnelSrcFile[MAX_CLFILE_NUM] + [256], // the max len of kernel file name is 256 + mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1]; + cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM]; + int mnKernelCount, mnFileCount, // only one kernel file + mnIsUserCreated; // 1: created , 0:no create and needed to create by + // opencl wrapper + int mnKhrFp64Flag; + int mnAmdFp64Flag; } GPUEnv; - -class OpenclDevice -{ - -public: - static GPUEnv gpuEnv; - static int isInited; - OpenclDevice(); - ~OpenclDevice(); - static int InitEnv(); // load dll, call InitOpenclRunEnv(0) - static int InitOpenclRunEnv( int argc ); // RegistOpenclKernel, double flags, compile kernels - static int InitOpenclRunEnv_DeviceSelection( int argc ); // RegistOpenclKernel, double flags, compile kernels - static int RegistOpenclKernel(); - static int ReleaseOpenclRunEnv(); - static int ReleaseOpenclEnv( GPUEnv *gpuInfo ); - static int CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ); - static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName ); - static int GeneratBinFromKernelSource( cl_program program, const char * clFileName ); - static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes ); - static int BinaryGenerated( const char * clFileName, FILE ** fhandle ); - //static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption ); - static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line); - static int composeRGBPixelCl(int *tiffdata,int *line,int h,int w); - -/* OpenCL implementations of Morphological operations*/ - - //Initialiation of OCL buffers used in Morph operations - static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs); - static void releaseMorphCLBuffers(); - - static void pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, - Pix **pix_hline, Pix **pixClosed, - bool getpixClosed, l_int32 close_hsize, - l_int32 close_vsize, l_int32 open_hsize, - l_int32 open_vsize, l_int32 line_hsize, - l_int32 line_vsize); - - //int InitOpenclAttr( OpenCLEnv * env ); - //int ReleaseKernel( KernelEnv * env ); - static int SetKernelEnv( KernelEnv *envInfo ); - //int CreateKernel( char * kernelname, KernelEnv * env ); - //int RunKernel( const char *kernelName, void **userdata ); - //int ConvertToString( const char *filename, char **source ); - //int CheckKernelName( KernelEnv *envInfo, const char *kernelName ); - //int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function ); - //int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata ); - //int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function ); - - static int LoadOpencl(); +class OpenclDevice { + public: + static GPUEnv gpuEnv; + static int isInited; + OpenclDevice(); + ~OpenclDevice(); + static int InitEnv(); // load dll, call InitOpenclRunEnv(0) + static int InitOpenclRunEnv( + int argc); // RegistOpenclKernel, double flags, compile kernels + static int InitOpenclRunEnv_DeviceSelection( + int argc); // RegistOpenclKernel, double flags, compile kernels + static int RegistOpenclKernel(); + static int ReleaseOpenclRunEnv(); + static int ReleaseOpenclEnv(GPUEnv* gpuInfo); + static int CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption); + static int CachedOfKernerPrg(const GPUEnv* gpuEnvCached, + const char* clFileName); + static int GeneratBinFromKernelSource(cl_program program, + const char* clFileName); + static int WriteBinaryToFile(const char* fileName, const char* birary, + size_t numBytes); + static int BinaryGenerated(const char* clFileName, FILE** fhandle); + // static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const + // char *buildOption ); + static l_uint32* pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w, + l_int32 h, l_int32 wpl, + l_uint32* line); + static int composeRGBPixelCl(int* tiffdata, int* line, int h, int w); + + /* OpenCL implementations of Morphological operations*/ + + // Initialiation of OCL buffers used in Morph operations + static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs); + static void releaseMorphCLBuffers(); + + static void pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline, + Pix** pix_hline, Pix** pixClosed, bool getpixClosed, + l_int32 close_hsize, l_int32 close_vsize, + l_int32 open_hsize, l_int32 open_vsize, + l_int32 line_hsize, l_int32 line_vsize); + + // int InitOpenclAttr( OpenCLEnv * env ); + // int ReleaseKernel( KernelEnv * env ); + static int SetKernelEnv(KernelEnv* envInfo); + // int CreateKernel( char * kernelname, KernelEnv * env ); + // int RunKernel( const char *kernelName, void **userdata ); + // int ConvertToString( const char *filename, char **source ); + // int CheckKernelName( KernelEnv *envInfo, const char *kernelName ); + // int RegisterKernelWrapper( const char *kernelName, cl_kernel_function + // function ); int RunKernelWrapper( cl_kernel_function function, const char * + // kernelName, void **usrdata ); int GetKernelEnvAndFunc( const char + // *kernelName, KernelEnv *env, cl_kernel_function *function ); + + static int LoadOpencl(); #ifdef WIN32 - //static int OpenclInite(); - static void FreeOpenclDll(); + // static int OpenclInite(); + static void FreeOpenclDll(); #endif - inline static int AddKernelConfig( int kCount, const char *kName ); + inline static int AddKernelConfig(int kCount, const char* kName); - /* for binarization */ - static int HistogramRectOCL(unsigned char *imagedata, int bytes_per_pixel, - int bytes_per_line, int left, int top, - int width, int height, int kHistogramSize, - int *histogramAllChannels); + /* for binarization */ + static int HistogramRectOCL(unsigned char* imagedata, int bytes_per_pixel, + int bytes_per_line, int left, int top, int width, + int height, int kHistogramSize, + int* histogramAllChannels); - static int ThresholdRectToPixOCL(unsigned char *imagedata, - int bytes_per_pixel, int bytes_per_line, - int *thresholds, int *hi_values, Pix **pix, - int rect_height, int rect_width, - int rect_top, int rect_left); + static int ThresholdRectToPixOCL(unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int* thresholds, int* hi_values, Pix** pix, + int rect_height, int rect_width, + int rect_top, int rect_left); - static ds_device getDeviceSelection(); - static ds_device selectedDevice; - static bool deviceIsSelected; - static bool selectedDeviceIsOpenCL(); + static ds_device getDeviceSelection(); + static ds_device selectedDevice; + static bool deviceIsSelected; + static bool selectedDeviceIsOpenCL(); }; #endif // USE_OPENCL diff --git a/src/textord/alignedblob.cpp b/src/textord/alignedblob.cpp index 494758dadb..31ce493272 100644 --- a/src/textord/alignedblob.cpp +++ b/src/textord/alignedblob.cpp @@ -72,15 +72,14 @@ const int kMaxSkewFactor = 15; // on vertical gap before giving up and calling the line ended. // resolution is the original image resolution, and align0 indicates the // type of tab stop to be found. -AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, - int height, int v_gap_multiple, - int min_gutter_width, +AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, int height, + int v_gap_multiple, int min_gutter_width, int resolution, TabAlignment align0) - : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED), - ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED), - alignment(align0), - confirmed_type(TT_CONFIRMED), - min_length(0) { + : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED), + ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED), + alignment(align0), + confirmed_type(TT_CONFIRMED), + min_length(0) { // Set the tolerances according to the type of line sought. // For tab search, these are based on the image resolution for most, or // the height of the starting blob for the maximum vertical gap. @@ -104,8 +103,7 @@ AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, min_points = kMinAlignedTabs; } min_gutter = static_cast(height * gutter_fraction + 0.5); - if (min_gutter < min_gutter_width) - min_gutter = min_gutter_width; + if (min_gutter < min_gutter_width) min_gutter = min_gutter_width; // Fit the vertical vector into an ICOORD, which is 16 bit. set_vertical(vertical_x, vertical_y); } @@ -113,17 +111,16 @@ AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, // Constructor to set the parameters for finding vertical lines. // Vertical_x and vertical_y are the current estimates of the true vertical // direction (up) in the image. Width is the width of the starter blob. -AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, - int width) - : gutter_fraction(0.0), - right_tab(false), - ragged(false), - alignment(TA_SEPARATOR), - confirmed_type(TT_VLINE), - max_v_gap(kVLineSearchSize), - min_gutter(kVLineGutter), - min_points(1), - min_length(kVLineMinLength) { +AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, int width) + : gutter_fraction(0.0), + right_tab(false), + ragged(false), + alignment(TA_SEPARATOR), + confirmed_type(TT_VLINE), + max_v_gap(kVLineSearchSize), + min_gutter(kVLineGutter), + min_points(1), + min_length(kVLineMinLength) { // Compute threshold for left and right alignment. l_align_tolerance = std::max(kVLineAlignment, width); r_align_tolerance = std::max(kVLineAlignment, width); @@ -135,23 +132,19 @@ AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, // Fit the vertical vector into an ICOORD, which is 16 bit. void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) { int factor = 1; - if (vertical_y > INT16_MAX) - factor = vertical_y / INT16_MAX + 1; + if (vertical_y > INT16_MAX) factor = vertical_y / INT16_MAX + 1; vertical.set_x(vertical_x / factor); vertical.set_y(vertical_y / factor); } - -AlignedBlob::AlignedBlob(int gridsize, - const ICOORD& bleft, const ICOORD& tright) - : BlobGrid(gridsize, bleft, tright) { -} +AlignedBlob::AlignedBlob(int gridsize, const ICOORD& bleft, + const ICOORD& tright) + : BlobGrid(gridsize, bleft, tright) {} // Return true if the given coordinates are within the test rectangle // and the debug level is at least the given detail level. bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { - if (textord_debug_tabfind < detail_level) - return false; + if (textord_debug_tabfind < detail_level) return false; return x >= textord_testregion_left && x <= textord_testregion_right && y <= textord_testregion_top && y >= textord_testregion_bottom; } @@ -160,8 +153,7 @@ bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { ScrollView* AlignedBlob::DisplayTabs(const char* window_name, ScrollView* tab_win) { #ifndef GRAPHICS_DISABLED - if (tab_win == nullptr) - tab_win = MakeWindow(0, 50, window_name); + if (tab_win == nullptr) tab_win = MakeWindow(0, 50, window_name); // For every tab in the grid, display it. GridSearch gsearch(this); gsearch.StartFullSearch(); @@ -220,8 +212,7 @@ static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) { // vertical direction. (skew finding.) // Returns nullptr if no decent vector can be found. TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, - BLOBNBOX* bbox, - int* vertical_x, + BLOBNBOX* bbox, int* vertical_x, int* vertical_y) { int ext_start_y, ext_end_y; BLOBNBOX_CLIST good_points; @@ -246,9 +237,9 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, // will always end up parallel to the vertical direction. bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); if ((pt_count >= align_params.min_points && - end_y - start_y >= align_params.min_length && - (align_params.ragged || - end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || + end_y - start_y >= align_params.min_length && + (align_params.ragged || + end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || at_least_2_crossings) { int confirmed_points = 0; // Count existing confirmed points to see if vector is acceptable. @@ -267,8 +258,8 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, confirmed_points + confirmed_points < pt_count) { const TBOX& box = bbox->bounding_box(); if (debug) { - tprintf("Confirming tab vector of %d pts starting at %d,%d\n", - pt_count, box.left(), box.bottom()); + tprintf("Confirming tab vector of %d pts starting at %d,%d\n", pt_count, + box.left(), box.bottom()); } // Flag all the aligned neighbours as confirmed . for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { @@ -283,11 +274,9 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, } } // Now make the vector and return it. - TabVector* result = TabVector::FitVector(align_params.alignment, - align_params.vertical, - ext_start_y, ext_end_y, - &good_points, - vertical_x, vertical_y); + TabVector* result = TabVector::FitVector( + align_params.alignment, align_params.vertical, ext_start_y, ext_end_y, + &good_points, vertical_x, vertical_y); result->set_intersects_other_lines(at_least_2_crossings); if (debug) { tprintf("Box was %d, %d\n", box.left(), box.bottom()); @@ -299,10 +288,11 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, confirmed_points, pt_count); } } else if (debug) { - tprintf("Tab vector failed basic tests: pt count %d vs min %d, " - "length %d vs min %d, min grad %g\n", - pt_count, align_params.min_points, end_y - start_y, - align_params.min_length, abs(end_x - start_x) * kMinTabGradient); + tprintf( + "Tab vector failed basic tests: pt count %d vs min %d, " + "length %d vs min %d, min grad %g\n", + pt_count, align_params.min_points, end_y - start_y, + align_params.min_length, abs(end_x - start_x) * kMinTabGradient); } return nullptr; } @@ -311,9 +301,9 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, // direction with the given blob. Returns a list of aligned // blobs and the number in the list. // For other parameters see FindAlignedBlob below. -int AlignedBlob::AlignTabs(const AlignedBlobParams& params, - bool top_to_bottom, BLOBNBOX* bbox, - BLOBNBOX_CLIST* good_points, int* end_y) { +int AlignedBlob::AlignTabs(const AlignedBlobParams& params, bool top_to_bottom, + BLOBNBOX* bbox, BLOBNBOX_CLIST* good_points, + int* end_y) { int ptcount = 0; BLOBNBOX_C_IT it(good_points); @@ -327,8 +317,8 @@ int AlignedBlob::AlignTabs(const AlignedBlobParams& params, while (bbox != nullptr) { // Add the blob to the list if the appropriate side is a tab candidate, // or if we are working on a ragged tab. - TabType type = params.right_tab ? bbox->right_tab_type() - : bbox->left_tab_type(); + TabType type = + params.right_tab ? bbox->right_tab_type() : bbox->left_tab_type(); if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && (it.empty() || it.data() != bbox)) { if (top_to_bottom) @@ -344,8 +334,7 @@ int AlignedBlob::AlignTabs(const AlignedBlobParams& params, bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); if (bbox != nullptr) { box = bbox->bounding_box(); - if (!params.ragged) - x_start = params.right_tab ? box.right() : box.left(); + if (!params.ragged) x_start = params.right_tab ? box.right() : box.left(); } } if (debug) { @@ -375,8 +364,8 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, int start_y = top_to_bottom ? box.bottom() : box.top(); if (WithinTestRegion(2, x_start, start_y)) { tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", - box.left(), box.top(), box.right(), box.bottom(), - left_column_edge, right_column_edge); + box.left(), box.top(), box.right(), box.bottom(), left_column_edge, + right_column_edge); } // Compute skew tolerance. int skew_tolerance = p.max_v_gap / kMaxSkewFactor; @@ -384,7 +373,7 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, // all possibly relevant boxes up to p.max_v_gap above or below accoording // to top_to_bottom. // Start with a notion of vertical with the current estimate. - int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y(); + int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y() / 2) / p.vertical.y(); if (top_to_bottom) { x2 = x_start - x2; *end_y = start_y - p.max_v_gap; @@ -418,15 +407,14 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, // neighbour is the blob that is currently being investigated. BLOBNBOX* neighbour = nullptr; while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { - if (neighbour == bbox) - continue; + if (neighbour == bbox) continue; TBOX nbox = neighbour->bounding_box(); int n_y = (nbox.top() + nbox.bottom()) / 2; if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || (top_to_bottom && n_y < start_y - p.max_v_gap)) { if (WithinTestRegion(2, x_start, start_y)) - tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", - nbox.left(), nbox.bottom(), nbox.right(), nbox.top()); + tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", nbox.left(), + nbox.bottom(), nbox.right(), nbox.top()); break; // Gone far enough. } // It is CRITICAL to ensure that forward progress is made, (strictly @@ -453,30 +441,26 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, int n_x = p.right_tab ? n_right : n_left; if (WithinTestRegion(2, x_start, start_y)) tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", - nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), - n_x, n_y, x_at_n_y); - if (p.right_tab && - n_left < x_at_n_y + p.min_gutter && + nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), n_x, n_y, + x_at_n_y); + if (p.right_tab && n_left < x_at_n_y + p.min_gutter && n_right > x_at_n_y + p.r_align_tolerance && (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { // In the gutter so end of line. if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED) bbox->set_right_tab_type(TT_DELETED); *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); - if (WithinTestRegion(2, x_start, start_y)) - tprintf("gutter\n"); + if (WithinTestRegion(2, x_start, start_y)) tprintf("gutter\n"); return nullptr; } - if (!p.right_tab && - n_left < x_at_n_y - p.l_align_tolerance && + if (!p.right_tab && n_left < x_at_n_y - p.l_align_tolerance && n_right > x_at_n_y - p.min_gutter && (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { // In the gutter so end of line. if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED) bbox->set_left_tab_type(TT_DELETED); *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); - if (WithinTestRegion(2, x_start, start_y)) - tprintf("gutter\n"); + if (WithinTestRegion(2, x_start, start_y)) tprintf("gutter\n"); return nullptr; } if ((p.right_tab && neighbour->leader_on_right()) || @@ -487,9 +471,8 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, // Aligned so keep it. If it is a marked tab save it as result, // otherwise keep it as backup_result to return in case of later failure. if (WithinTestRegion(2, x_start, start_y)) - tprintf("aligned, seeking%d, l=%d, r=%d\n", - p.right_tab, neighbour->left_tab_type(), - neighbour->right_tab_type()); + tprintf("aligned, seeking%d, l=%d, r=%d\n", p.right_tab, + neighbour->left_tab_type(), neighbour->right_tab_type()); TabType n_type = p.right_tab ? neighbour->right_tab_type() : neighbour->left_tab_type(); if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) { @@ -506,19 +489,16 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, x_diff = n_x - x_at_n_y; y_diff = n_y - start_y; int new_dist = x_diff * x_diff + y_diff * y_diff; - if (new_dist < old_dist) - result = neighbour; + if (new_dist < old_dist) result = neighbour; } } else if (backup_result == nullptr) { - if (WithinTestRegion(2, x_start, start_y)) - tprintf("Backup\n"); + if (WithinTestRegion(2, x_start, start_y)) tprintf("Backup\n"); backup_result = neighbour; } else { TBOX backup_box = backup_result->bounding_box(); if ((p.right_tab && backup_box.right() < nbox.right()) || (!p.right_tab && backup_box.left() > nbox.left())) { - if (WithinTestRegion(2, x_start, start_y)) - tprintf("Better backup\n"); + if (WithinTestRegion(2, x_start, start_y)) tprintf("Better backup\n"); backup_result = neighbour; } } diff --git a/src/textord/alignedblob.h b/src/textord/alignedblob.h index 0ba222ef74..275f4b8845 100644 --- a/src/textord/alignedblob.h +++ b/src/textord/alignedblob.h @@ -66,13 +66,13 @@ struct AlignedBlobParams { int max_v_gap; // Max vertical gap to be tolerated. int min_gutter; // Minimum gutter between columns. // Tolerances allowed on horizontal alignment of aligned edges. - int l_align_tolerance; // Left edges. - int r_align_tolerance; // Right edges. + int l_align_tolerance; // Left edges. + int r_align_tolerance; // Right edges. // Conditions for accepting a line. - int min_points; // Minimum number of points to be OK. - int min_length; // Min length of completed line. + int min_points; // Minimum number of points to be OK. + int min_length; // Min length of completed line. - ICOORD vertical; // Current estimate of logical vertical. + ICOORD vertical; // Current estimate of logical vertical. }; // The AlignedBlob class contains code to find vertically aligned blobs. @@ -97,17 +97,16 @@ class AlignedBlob : public BlobGrid { // vertical direction. (skew finding.) // Returns nullptr if no decent vector can be found. TabVector* FindVerticalAlignment(AlignedBlobParams align_params, - BLOBNBOX* bbox, - int* vertical_x, int* vertical_y); + BLOBNBOX* bbox, int* vertical_x, + int* vertical_y); private: // Find a set of blobs that are aligned in the given vertical // direction with the given blob. Returns a list of aligned // blobs and the number in the list. // For other parameters see FindAlignedBlob below. - int AlignTabs(const AlignedBlobParams& params, - bool top_to_bottom, BLOBNBOX* bbox, - BLOBNBOX_CLIST* good_points, int* end_y); + int AlignTabs(const AlignedBlobParams& params, bool top_to_bottom, + BLOBNBOX* bbox, BLOBNBOX_CLIST* good_points, int* end_y); // Search vertically for a blob that is aligned with the input bbox. // The search parameters are determined by AlignedBlobParams. @@ -116,9 +115,8 @@ class AlignedBlob : public BlobGrid { // or if a blob was found in the gutter. On a nullptr return, end_y // is set to the edge of the search box or the leading edge of the // gutter blob if one was found. - BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p, - bool top_to_bottom, BLOBNBOX* bbox, - int x_start, int* end_y); + BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p, bool top_to_bottom, + BLOBNBOX* bbox, int x_start, int* end_y); }; } // namespace tesseract. diff --git a/src/textord/baselinedetect.cpp b/src/textord/baselinedetect.cpp index 760ddb8a73..43633b210e 100644 --- a/src/textord/baselinedetect.cpp +++ b/src/textord/baselinedetect.cpp @@ -50,7 +50,8 @@ const double kMaxSkewDeviation = 1.0 / 64; const double kOffsetQuantizationFactor = 3.0 / 64; // Fraction of line spacing estimate for computing blob fit error. const double kFitHalfrangeFactor = 6.0 / 64; -// Max fraction of line spacing allowed before a baseline counts as badly fitting. +// Max fraction of line spacing allowed before a baseline counts as badly +// fitting. const double kMaxBaselineError = 3.0 / 64; // Multiple of linespacing that sets max_blob_size in TO_BLOCK. // Copied from textord_excess_blobsize. @@ -64,9 +65,11 @@ const double kMinFittingLinespacings = 0.25; namespace tesseract { BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row) - : blobs_(to_row->blob_list()), - baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f), - baseline_error_(0.0), good_baseline_(false) { + : blobs_(to_row->blob_list()), + baseline_pt1_(0.0f, 0.0f), + baseline_pt2_(0.0f, 0.0f), + baseline_error_(0.0), + good_baseline_(false) { ComputeBoundingBox(); // Compute a scale factor for rounding to ints. disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing; @@ -87,11 +90,10 @@ void BaselineRow::SetupOldLineParameters(TO_ROW* row) const { // Outputs diagnostic information. void BaselineRow::Print() const { tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", - baseline_pt1_.x(), baseline_pt1_.y(), - baseline_pt2_.x(), baseline_pt2_.y(), - BaselineAngle(), StraightYAtX(0.0)); - tprintf("Quant factor=%g, error=%g, good=%d, box:", - disp_quant_factor_, baseline_error_, good_baseline_); + baseline_pt1_.x(), baseline_pt1_.y(), baseline_pt2_.x(), + baseline_pt2_.y(), BaselineAngle(), StraightYAtX(0.0)); + tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_, + baseline_error_, good_baseline_); bounding_box_.print(); } @@ -109,7 +111,8 @@ double BaselineRow::BaselineAngle() const { double BaselineRow::SpaceBetween(const BaselineRow& other) const { // Find the x-centre of overlap of the lines. float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) + - std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f; + std::min(bounding_box_.right(), other.bounding_box_.right())) / + 2.0f; // Find the vertical centre between them. float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f; // Find the perpendicular distance of (x,y) from each line. @@ -129,11 +132,10 @@ double BaselineRow::PerpDisp(const FCOORD& direction) const { // defined by baseline_pt1_ and baseline_pt2__. double BaselineRow::StraightYAtX(double x) const { double denominator = baseline_pt2_.x() - baseline_pt1_.x(); - if (denominator == 0.0) - return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0; - return baseline_pt1_.y() + - (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) / - denominator; + if (denominator == 0.0) return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0; + return baseline_pt1_.y() + (x - baseline_pt1_.x()) * + (baseline_pt2_.y() - baseline_pt1_.y()) / + denominator; } // Fits a straight baseline to the points. Returns true if it had enough @@ -154,8 +156,8 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) { int x_middle = (box.left() + box.right()) / 2; #ifdef kDebugYCoord if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) { - tprintf("Box bottom = %d, baseline pos=%d for box at:", - box.bottom(), blob->baseline_position()); + tprintf("Box bottom = %d, baseline pos=%d for box at:", box.bottom(), + blob->baseline_position()); box.print(); } #endif @@ -183,8 +185,9 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) { #ifdef kDebugYCoord Print(); debug = bounding_box_.bottom() < kDebugYCoord && - bounding_box_.top() > kDebugYCoord - ? 3 : 2; + bounding_box_.top() > kDebugYCoord + ? 3 + : 2; #endif // Now we obtained a direction from that fit, see if we can improve the // fit using the same direction and some other start point. @@ -212,11 +215,9 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) { // Modifies an existing result of FitBaseline to be parallel to the given // direction vector if that produces a better result. -void BaselineRow::AdjustBaselineToParallel(int debug, - const FCOORD& direction) { +void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD& direction) { SetupBlobDisplacements(direction); - if (displacement_modes_.empty()) - return; + if (displacement_modes_.empty()) return; #ifdef kDebugYCoord if (bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord && debug < 3) @@ -227,8 +228,7 @@ void BaselineRow::AdjustBaselineToParallel(int debug, // Modifies the baseline to snap to the textline grid if the existing // result is not good enough. -double BaselineRow::AdjustBaselineToGrid(int debug, - const FCOORD& direction, +double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD& direction, double line_spacing, double line_offset) { if (blobs_->empty()) { @@ -243,8 +243,8 @@ double BaselineRow::AdjustBaselineToGrid(int debug, int best_index = -1; for (int i = 0; i < displacement_modes_.size(); ++i) { double blob_y = displacement_modes_[i]; - double error = BaselineBlock::SpacingModelError(blob_y, line_spacing, - line_offset); + double error = + BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset); if (debug > 1) { tprintf("Mode at %g has error %g from model \n", blob_y, error); } @@ -319,13 +319,13 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) { for (int i = 0; i < perp_blob_dists.size(); ++i) { dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1); } - GenericVector > scaled_modes; + GenericVector> scaled_modes; dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes); #ifdef kDebugYCoord if (debug) { for (int i = 0; i < scaled_modes.size(); ++i) { - tprintf("Top mode = %g * %d\n", - scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data); + tprintf("Top mode = %g * %d\n", scaled_modes[i].key * disp_quant_factor_, + scaled_modes[i].data); } } #endif @@ -343,8 +343,7 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) { // Otherwise the new fit will only replace the old if it is really better, // or the old fit is marked bad and the new fit has sufficient points, as // well as being within the max_baseline_error_. -void BaselineRow::FitConstrainedIfBetter(int debug, - const FCOORD& direction, +void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD& direction, double cheat_allowance, double target_offset) { double halfrange = fit_halfrange_ * direction.length(); @@ -358,21 +357,20 @@ void BaselineRow::FitConstrainedIfBetter(int debug, double old_angle = BaselineAngle(); double new_angle = direction.angle(); if (debug > 1) { - tprintf("Constrained error = %g, original = %g", - new_error, baseline_error_); - tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", - old_angle, new_angle, - new_angle - old_angle, kMaxSkewDeviation); + tprintf("Constrained error = %g, original = %g", new_error, + baseline_error_); + tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle, + new_angle, new_angle - old_angle, kMaxSkewDeviation); } - bool new_good_baseline = new_error <= max_baseline_error_ && + bool new_good_baseline = + new_error <= max_baseline_error_ && (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit()); // The new will replace the old if any are true: // 1. the new error is better // 2. the old is NOT good, but the new is // 3. there is a wild angular difference between them (assuming that the new // is a better guess at the angle.) - if (new_error <= baseline_error_ || - (!good_baseline_ && new_good_baseline) || + if (new_error <= baseline_error_ || (!good_baseline_ && new_good_baseline) || fabs(new_angle - old_angle) > kMaxSkewDeviation) { baseline_error_ = new_error; baseline_pt1_ = line_pt; @@ -406,11 +404,15 @@ void BaselineRow::ComputeBoundingBox() { bounding_box_ = box; } - BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block) - : block_(block), debug_level_(debug_level), non_text_block_(non_text), - good_skew_angle_(false), skew_angle_(0.0), - line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) { + : block_(block), + debug_level_(debug_level), + non_text_block_(non_text), + good_skew_angle_(false), + skew_angle_(0.0), + line_spacing_(block->line_spacing), + line_offset_(0.0), + model_error_(0.0) { TO_ROW_IT row_it(block_->get_rows()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { // Sort the blobs on the rows. @@ -442,8 +444,7 @@ bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) { double angle = row->BaselineAngle(); angles.push_back(angle); } - if (debug_level_ > 1) - row->Print(); + if (debug_level_ > 1) row->Print(); } if (!angles.empty()) { @@ -454,8 +455,8 @@ bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) { good_skew_angle_ = false; } if (debug_level_ > 0) { - tprintf("Initial block skew angle = %g, good = %d\n", - skew_angle_, good_skew_angle_); + tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_, + good_skew_angle_); } return good_skew_angle_; } @@ -471,11 +472,9 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) { for (int r = 0; r < rows_.size(); ++r) { BaselineRow* row = rows_[r]; row->AdjustBaselineToParallel(debug_level_, direction); - if (debug_level_ > 1) - row->Print(); + if (debug_level_ > 1) row->Print(); } - if (rows_.size() < 3 || !ComputeLineSpacing()) - return; + if (rows_.size() < 3 || !ComputeLineSpacing()) return; // Enforce the line spacing model on all lines that don't yet have a good // baseline. // Start by finding the row that is best fitted to the model. @@ -507,9 +506,9 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) { void BaselineBlock::SetupBlockParameters() const { if (line_spacing_ > 0.0) { // Where was block_line_spacing set before? - float min_spacing = std::min(block_->line_spacing, static_cast(line_spacing_)); - if (min_spacing < block_->line_size) - block_->line_size = min_spacing; + float min_spacing = + std::min(block_->line_spacing, static_cast(line_spacing_)); + if (min_spacing < block_->line_size) block_->line_size = min_spacing; block_->line_spacing = line_spacing_; block_->baseline_offset = line_offset_; block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple; @@ -548,8 +547,7 @@ void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) { // Although x-height estimation is conceptually separate, it is part of // detecting perspective distortion and therefore baseline fitting. void BaselineBlock::FitBaselineSplines(bool enable_splines, - bool show_final_rows, - Textord* textord) { + bool show_final_rows, Textord* textord) { double gradient = tan(skew_angle_); FCOORD rotation(1.0f, 0.0f); @@ -557,12 +555,12 @@ void BaselineBlock::FitBaselineSplines(bool enable_splines, textord->make_spline_rows(block_, gradient, show_final_rows); } else { // Make a fake spline from the existing line. - TBOX block_box= block_->block->pdblk.bounding_box(); + TBOX block_box = block_->block->pdblk.bounding_box(); TO_ROW_IT row_it = block_->get_rows(); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { TO_ROW* row = row_it.data(); - int32_t xstarts[2] = { block_box.left(), block_box.right() }; - double coeffs[3] = { 0.0, row->line_m(), row->line_c() }; + int32_t xstarts[2] = {block_box.left(), block_box.right()}; + double coeffs[3] = {0.0, row->line_m(), row->line_c()}; row->baseline = QSPLINE(1, xstarts, coeffs); textord->compute_row_xheight(row, block_->block->classify_rotation(), row->line_m(), block_->line_size); @@ -588,13 +586,12 @@ void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) { for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation); colour = static_cast(colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; + if (colour > ScrollView::MAGENTA) colour = ScrollView::RED; } plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE); // Show discarded blobs. - plot_blob_list(win, &block_->underlines, - ScrollView::YELLOW, ScrollView::CORAL); + plot_blob_list(win, &block_->underlines, ScrollView::YELLOW, + ScrollView::CORAL); if (block_->blobs.length() > 0) tprintf("%d blobs discarded as noise\n", block_->blobs.length()); draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation); @@ -630,8 +627,7 @@ bool BaselineBlock::ComputeLineSpacing() { double row_gap = fabs(row_positions[i - 1] - row_positions[i]); if (row_gap > max_baseline_error) { ++non_trivial_gaps; - if (fabs(row_gap - line_spacing_) <= max_baseline_error) - ++fitting_gaps; + if (fabs(row_gap - line_spacing_) <= max_baseline_error) ++fitting_gaps; } } if (debug_level_ > 0) { @@ -675,8 +671,9 @@ void BaselineBlock::EstimateLineSpacing() { const TBOX& row_box = row->bounding_box(); int r2; for (r2 = r + 1; r2 < rows_.size() && - !row_box.major_x_overlap(rows_[r2]->bounding_box()); - ++r2); + !row_box.major_x_overlap(rows_[r2]->bounding_box()); + ++r2) + ; if (r2 < rows_.size()) { BaselineRow* row2 = rows_[r2]; // Exclude silly lines. @@ -701,16 +698,16 @@ void BaselineBlock::EstimateLineSpacing() { void BaselineBlock::RefineLineSpacing(const GenericVector& positions) { double spacings[3], offsets[3], errors[3]; int index_range; - errors[0] = FitLineSpacingModel(positions, line_spacing_, - &spacings[0], &offsets[0], &index_range); + errors[0] = FitLineSpacingModel(positions, line_spacing_, &spacings[0], + &offsets[0], &index_range); if (index_range > 1) { double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range); // Try the hypotheses that there might be index_range +/- 1 line spaces. - errors[1] = FitLineSpacingModel(positions, spacing_plus, - &spacings[1], &offsets[1], nullptr); + errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1], + &offsets[1], nullptr); double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range); - errors[2] = FitLineSpacingModel(positions, spacing_minus, - &spacings[2], &offsets[2], nullptr); + errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2], + &offsets[2], nullptr); for (int i = 1; i <= 2; ++i) { if (errors[i] < errors[0]) { spacings[0] = spacings[i]; @@ -736,8 +733,8 @@ void BaselineBlock::RefineLineSpacing(const GenericVector& positions) { // in index_delta. Returns the error of fit to the line spacing model. // Uses a simple linear regression, but optimized the offset using the median. double BaselineBlock::FitLineSpacingModel( - const GenericVector& positions, double m_in, - double* m_out, double* c_out, int* index_delta) { + const GenericVector& positions, double m_in, double* m_out, + double* c_out, int* index_delta) { if (m_in == 0.0f || positions.size() < 2) { *m_out = m_in; *c_out = 0.0; @@ -768,23 +765,21 @@ double BaselineBlock::FitLineSpacingModel( offsets.push_back(fmod(positions[i], *m_out)); // Get the median offset. if (debug_level_ > 2) { - for (int i = 0; i < offsets.size(); ++i) - tprintf("%d: %g\n", i, offsets[i]); + for (int i = 0; i < offsets.size(); ++i) tprintf("%d: %g\n", i, offsets[i]); } *c_out = MedianOfCircularValues(*m_out, &offsets); if (debug_level_ > 1) { - tprintf("Median offset = %g, compared to mean of %g.\n", - *c_out, llsq.c(*m_out)); + tprintf("Median offset = %g, compared to mean of %g.\n", *c_out, + llsq.c(*m_out)); } // Index_delta is the number of hypothesized line gaps present. - if (index_delta != nullptr) - *index_delta = max_index - min_index; + if (index_delta != nullptr) *index_delta = max_index - min_index; // Use the regression model's intercept to compute the error, as it may be // a full line-spacing in disagreement with the median. double rms_error = llsq.rms(*m_out, llsq.c(*m_out)); if (debug_level_ > 1) { - tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", - m_in, median_offset, *m_out, *c_out, rms_error); + tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in, + median_offset, *m_out, *c_out, rms_error); } return rms_error; } @@ -815,8 +810,7 @@ void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) { GenericVector block_skew_angles; for (int i = 0; i < blocks_.size(); ++i) { BaselineBlock* bl_block = blocks_[i]; - if (debug_level_ > 0) - tprintf("Fitting initial baselines...\n"); + if (debug_level_ > 0) tprintf("Fitting initial baselines...\n"); if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) { block_skew_angles.push_back(bl_block->skew_angle()); } @@ -847,7 +841,7 @@ void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, - Textord* textord) { + Textord* textord) { for (int i = 0; i < blocks_.size(); ++i) { BaselineBlock* bl_block = blocks_[i]; if (enable_splines) diff --git a/src/textord/baselinedetect.h b/src/textord/baselinedetect.h index 325922e10d..a2e850b976 100644 --- a/src/textord/baselinedetect.h +++ b/src/textord/baselinedetect.h @@ -41,9 +41,7 @@ class BaselineRow { public: BaselineRow(double line_size, TO_ROW* to_row); - const TBOX& bounding_box() const { - return bounding_box_; - } + const TBOX& bounding_box() const { return bounding_box_; } // Sets the TO_ROW with the output straight line. void SetupOldLineParameters(TO_ROW* row) const; @@ -91,8 +89,7 @@ class BaselineRow { // or the old fit is marked bad and the new fit has sufficient points, as // well as being within the max_baseline_error_. void FitConstrainedIfBetter(int debug, const FCOORD& direction, - double cheat_allowance, - double target_offset); + double cheat_allowance, double target_offset); // Returns the perpendicular distance of the point from the straight // baseline. double PerpDistanceFromBaseline(const FCOORD& pt) const; @@ -130,12 +127,8 @@ class BaselineBlock { public: BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block); - TO_BLOCK* block() const { - return block_; - } - double skew_angle() const { - return skew_angle_; - } + TO_BLOCK* block() const { return block_; } + double skew_angle() const { return skew_angle_; } // Computes and returns the absolute error of the given perp_disp from the // given linespacing model. @@ -212,7 +205,6 @@ class BaselineBlock { double m_in, double* m_out, double* c_out, int* index_delta); - // The block to which this class adds extra information used during baseline // calculation. TO_BLOCK* block_; @@ -257,8 +249,7 @@ class BaselineDetect { // NOTE that ComputeStraightBaselines must have been called first as this // sets up data in the TO_ROWs upon which this function depends. void ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, - bool enable_splines, - bool remove_noise, + bool enable_splines, bool remove_noise, bool show_final_rows, Textord* textord); diff --git a/src/textord/bbgrid.cpp b/src/textord/bbgrid.cpp index fb65e27c96..547266f9b4 100644 --- a/src/textord/bbgrid.cpp +++ b/src/textord/bbgrid.cpp @@ -37,8 +37,7 @@ void GridBase::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { gridsize_ = gridsize; bleft_ = bleft; tright_ = tright; - if (gridsize_ == 0) - gridsize_ = 1; + if (gridsize_ == 0) gridsize_ = 1; gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_; gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_; gridbuckets_ = gridwidth_ * gridheight_; @@ -57,24 +56,20 @@ void GridBase::ClipGridCoords(int* x, int* y) const { *y = ClipToRange(*y, 0, gridheight_ - 1); } -IntGrid::IntGrid() { - grid_ = nullptr; -} +IntGrid::IntGrid() { grid_ = nullptr; } IntGrid::IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright) - : grid_(nullptr) { + : grid_(nullptr) { Init(gridsize, bleft, tright); } -IntGrid::~IntGrid() { - delete [] grid_; -} +IntGrid::~IntGrid() { delete[] grid_; } // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, // and bleft, tright are the bounding box of everything to go in it. void IntGrid::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { GridBase::Init(gridsize, bleft, tright); - delete [] grid_; + delete[] grid_; grid_ = new int[gridbuckets_]; Clear(); } @@ -113,12 +108,11 @@ void IntGrid::Rotate(const FCOORD& rotation) { for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) { int grid_x, grid_y; GridCoords(static_cast(line_pos.x() + 0.5), - static_cast(line_pos.y() + 0.5), - &grid_x, &grid_y); + static_cast(line_pos.y() + 0.5), &grid_x, &grid_y); grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi]; } } - delete [] old_grid; + delete[] old_grid; } // Returns a new IntGrid containing values equal to the sum of all the @@ -138,8 +132,7 @@ IntGrid* IntGrid::NeighbourhoodSum() const { cell_count += GridCellValue(grid_x, grid_y); } } - if (GridCellValue(x, y) > 1) - sumgrid->SetGridCell(x, y, cell_count); + if (GridCellValue(x, y) > 1) sumgrid->SetGridCell(x, y, cell_count); } } return sumgrid; @@ -156,8 +149,8 @@ bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const { for (int x = min_x; x <= max_x; ++x) { int value = GridCellValue(x, y); if (value > threshold) { - TBOX cell_box(x * gridsize_, y * gridsize_, - (x + 1) * gridsize_, (y + 1) * gridsize_); + TBOX cell_box(x * gridsize_, y * gridsize_, (x + 1) * gridsize_, + (y + 1) * gridsize_); cell_box &= rect; // This is in-place box intersection. total_area += cell_box.area(); } @@ -173,8 +166,7 @@ bool IntGrid::AnyZeroInRect(const TBOX& rect) const { GridCoords(rect.right(), rect.top(), &max_x, &max_y); for (int y = min_y; y <= max_y; ++y) { for (int x = min_x; x <= max_x; ++x) { - if (GridCellValue(x, y) == 0) - return true; + if (GridCellValue(x, y) == 0) return true; } } return false; @@ -184,14 +176,14 @@ bool IntGrid::AnyZeroInRect(const TBOX& rect) const { // threshold is filled as a black square. pixDestroy after use. // Edge cells, which have a zero 4-neighbour, are not marked. Pix* IntGrid::ThresholdToPix(int threshold) const { - Pix* pix = pixCreate(tright().x() - bleft().x(), - tright().y() - bleft().y(), 1); + Pix* pix = + pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1); int cellsize = gridsize(); for (int y = 0; y < gridheight(); ++y) { for (int x = 0; x < gridwidth(); ++x) { - if (GridCellValue(x, y) > threshold && - GridCellValue(x - 1, y) > 0 && GridCellValue(x + 1, y) > 0 && - GridCellValue(x, y - 1) > 0 && GridCellValue(x, y + 1) > 0) { + if (GridCellValue(x, y) > threshold && GridCellValue(x - 1, y) > 0 && + GridCellValue(x + 1, y) > 0 && GridCellValue(x, y - 1) > 0 && + GridCellValue(x, y + 1) > 0) { pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize), cellsize, cellsize, PIX_SET, nullptr, 0, 0); } @@ -201,8 +193,8 @@ Pix* IntGrid::ThresholdToPix(int threshold) const { } // Make a Pix of the correct scaled size for the TraceOutline functions. -Pix* GridReducedPix(const TBOX& box, int gridsize, - ICOORD bleft, int* left, int* bottom) { +Pix* GridReducedPix(const TBOX& box, int gridsize, ICOORD bleft, int* left, + int* bottom) { // Compute grid bounds of the outline and pad all round by 1. int grid_left = (box.left() - bleft.x()) / gridsize - 1; int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1; @@ -210,9 +202,7 @@ Pix* GridReducedPix(const TBOX& box, int gridsize, int grid_top = (box.top() - bleft.y()) / gridsize + 1; *left = grid_left; *bottom = grid_bottom; - return pixCreate(grid_right - grid_left + 1, - grid_top - grid_bottom + 1, - 1); + return pixCreate(grid_right - grid_left + 1, grid_top - grid_bottom + 1, 1); } // Helper function to return a scaled Pix with one pixel per grid cell, @@ -221,8 +211,8 @@ Pix* GridReducedPix(const TBOX& box, int gridsize, // Also returns the grid coords of the bottom-left of the Pix, in *left // and *bottom, which corresponds to (0, 0) on the Pix. // Note that the Pix is used upside-down, with (0, 0) being the bottom-left. -Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, - ICOORD bleft, int* left, int* bottom) { +Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, ICOORD bleft, + int* left, int* bottom) { const TBOX& box = outline->bounding_box(); Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom); int wpl = pixGetWpl(pix); @@ -247,8 +237,8 @@ Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, #endif // As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. -Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, - ICOORD bleft, int* left, int* bottom) { +Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, ICOORD bleft, int* left, + int* bottom) { const TBOX& box = block->pdblk.bounding_box(); Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom); int wpl = pixGetWpl(pix); diff --git a/src/textord/bbgrid.h b/src/textord/bbgrid.h index c7fc21e46b..688edb4c49 100644 --- a/src/textord/bbgrid.h +++ b/src/textord/bbgrid.h @@ -40,13 +40,14 @@ namespace tesseract { // Also returns the grid coords of the bottom-left of the Pix, in *left // and *bottom, which corresponds to (0, 0) on the Pix. // Note that the Pix is used upside-down, with (0, 0) being the bottom-left. -Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, - ICOORD bleft, int* left, int* bottom); +Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, ICOORD bleft, + int* left, int* bottom); // As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. -Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, - ICOORD bleft, int* left, int* bottom); +Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, ICOORD bleft, int* left, + int* bottom); -template class GridSearch; +template +class GridSearch; // The GridBase class is the base class for BBGrid and IntGrid. // It holds the geometry and scale of the grid. @@ -61,21 +62,11 @@ class GridBase { void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); // Simple accessors. - int gridsize() const { - return gridsize_; - } - int gridwidth() const { - return gridwidth_; - } - int gridheight() const { - return gridheight_; - } - const ICOORD& bleft() const { - return bleft_; - } - const ICOORD& tright() const { - return tright_; - } + int gridsize() const { return gridsize_; } + int gridwidth() const { return gridwidth_; } + int gridheight() const { return gridheight_; } + const ICOORD& bleft() const { return bleft_; } + const ICOORD& tright() const { return tright_; } // Compute the given grid coordinates from image coords. void GridCoords(int x, int y, int* grid_x, int* grid_y) const; @@ -84,8 +75,8 @@ class GridBase { protected: // TODO(rays) Make these private and migrate to the accessors in subclasses. - int gridsize_; // Pixel size of each grid cell. - int gridwidth_; // Size of the grid in cells. + int gridsize_; // Pixel size of each grid cell. + int gridwidth_; // Size of the grid in cells. int gridheight_; int gridbuckets_; // Total cells in grid. ICOORD bleft_; // Pixel coords of bottom-left of grid. @@ -156,9 +147,10 @@ class IntGrid : public GridBase { // thereby making most of the ugly template notation go away. // The friend class GridSearch, with the same template arguments, is // used to search a grid efficiently in one of several search patterns. -template class BBGrid - : public GridBase { +template +class BBGrid : public GridBase { friend class GridSearch; + public: BBGrid(); BBGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); @@ -225,36 +217,32 @@ template class BBGrid }; // Hash functor for generic pointers. -template struct PtrHash { +template +struct PtrHash { size_t operator()(const T* ptr) const { return reinterpret_cast(ptr) / sizeof(T); } }; - // The GridSearch class enables neighbourhood searching on a BBGrid. -template class GridSearch { +template +class GridSearch { public: GridSearch(BBGrid* grid) - : grid_(grid), unique_mode_(false), - previous_return_(nullptr), next_return_(nullptr) { - } + : grid_(grid), + unique_mode_(false), + previous_return_(nullptr), + next_return_(nullptr) {} // Get the grid x, y coords of the most recently returned BBC. - int GridX() const { - return x_; - } - int GridY() const { - return y_; - } + int GridX() const { return x_; } + int GridY() const { return y_; } // Sets the search mode to return a box only once. // Efficiency warning: Implementation currently uses a squared-order // search in the number of returned elements. Use only where a small // number of elements are spread over a wide area, eg ColPartitions. - void SetUniqueMode(bool mode) { - unique_mode_ = mode; - } + void SetUniqueMode(bool mode) { unique_mode_ = mode; } // TODO(rays) Replace calls to ReturnedSeedElement with SetUniqueMode. // It only works if the search includes the bottom-left corner. // Apart from full search, all other searches return a box several @@ -265,8 +253,8 @@ template class GridSearch { // both h_spread=true and v_spread=true bool ReturnedSeedElement() const { TBOX box = previous_return_->bounding_box(); - int x_center = (box.left()+box.right())/2; - int y_center = (box.top()+box.bottom())/2; + int x_center = (box.left() + box.right()) / 2; + int y_center = (box.top() + box.bottom()) / 2; int grid_x, grid_y; grid_->GridCoords(x_center, y_center, &grid_x, &grid_y); return (x_ == grid_x) && (y_ == grid_y); @@ -361,99 +349,88 @@ template class GridSearch { int y_; bool unique_mode_; BBC* previous_return_; // Previous return from Next*. - BBC* next_return_; // Current value of it_.data() used for repositioning. + BBC* next_return_; // Current value of it_.data() used for repositioning. // An iterator over the list at (x_, y_) in the grid_. BBC_C_IT it_; // Set of unique returned elements used when unique_mode_ is true. - std::unordered_set > returns_; + std::unordered_set> returns_; }; // Sort function to sort a BBC by bounding_box().left(). -template +template int SortByBoxLeft(const void* void1, const void* void2) { // The void*s are actually doubly indirected, so get rid of one level. const BBC* p1 = *static_cast(void1); const BBC* p2 = *static_cast(void2); int result = p1->bounding_box().left() - p2->bounding_box().left(); - if (result != 0) - return result; + if (result != 0) return result; result = p1->bounding_box().right() - p2->bounding_box().right(); - if (result != 0) - return result; + if (result != 0) return result; result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); - if (result != 0) - return result; + if (result != 0) return result; return p1->bounding_box().top() - p2->bounding_box().top(); } // Sort function to sort a BBC by bounding_box().right() in right-to-left order. -template +template int SortRightToLeft(const void* void1, const void* void2) { // The void*s are actually doubly indirected, so get rid of one level. const BBC* p1 = *static_cast(void1); const BBC* p2 = *static_cast(void2); int result = p2->bounding_box().right() - p1->bounding_box().right(); - if (result != 0) - return result; + if (result != 0) return result; result = p2->bounding_box().left() - p1->bounding_box().left(); - if (result != 0) - return result; + if (result != 0) return result; result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); - if (result != 0) - return result; + if (result != 0) return result; return p1->bounding_box().top() - p2->bounding_box().top(); } // Sort function to sort a BBC by bounding_box().bottom(). -template +template int SortByBoxBottom(const void* void1, const void* void2) { // The void*s are actually doubly indirected, so get rid of one level. const BBC* p1 = *static_cast(void1); const BBC* p2 = *static_cast(void2); int result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); - if (result != 0) - return result; - result = p1->bounding_box().top() - p2->bounding_box().top(); - if (result != 0) - return result; + if (result != 0) return result; + result = p1->bounding_box().top() - p2->bounding_box().top(); + if (result != 0) return result; result = p1->bounding_box().left() - p2->bounding_box().left(); - if (result != 0) - return result; + if (result != 0) return result; return p1->bounding_box().right() - p2->bounding_box().right(); } /////////////////////////////////////////////////////////////////////// // BBGrid IMPLEMENTATION. /////////////////////////////////////////////////////////////////////// -template -BBGrid::BBGrid() : grid_(nullptr) { -} +template +BBGrid::BBGrid() : grid_(nullptr) {} -template -BBGrid::BBGrid( - int gridsize, const ICOORD& bleft, const ICOORD& tright) +template +BBGrid::BBGrid(int gridsize, const ICOORD& bleft, + const ICOORD& tright) : grid_(nullptr) { Init(gridsize, bleft, tright); } -template +template BBGrid::~BBGrid() { - delete [] grid_; + delete[] grid_; } // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, // and bleft, tright are the bounding box of everything to go in it. -template -void BBGrid::Init(int gridsize, - const ICOORD& bleft, +template +void BBGrid::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { GridBase::Init(gridsize, bleft, tright); - delete [] grid_; + delete[] grid_; grid_ = new BBC_CLIST[gridbuckets_]; } // Clear all lists, but leave the array of lists present. -template +template void BBGrid::Clear() { for (int i = 0; i < gridbuckets_; ++i) { grid_[i].shallow_clear(); @@ -462,7 +439,7 @@ void BBGrid::Clear() { // Deallocate the data in the lists but otherwise leave the lists and the grid // intact. -template +template void BBGrid::ClearGridData( void (*free_method)(BBC*)) { if (grid_ == nullptr) return; @@ -484,17 +461,15 @@ void BBGrid::ClearGridData( // used, otherwise, just the bottom-left. Similarly for v_spread. // WARNING: InsertBBox may invalidate an active GridSearch. Call // RepositionIterator() on any GridSearches that are active on this grid. -template +template void BBGrid::InsertBBox(bool h_spread, bool v_spread, BBC* bbox) { TBOX box = bbox->bounding_box(); int start_x, start_y, end_x, end_y; GridCoords(box.left(), box.bottom(), &start_x, &start_y); GridCoords(box.right(), box.top(), &end_x, &end_y); - if (!h_spread) - end_x = start_x; - if (!v_spread) - end_y = start_y; + if (!h_spread) end_x = start_x; + if (!v_spread) end_y = start_y; int grid_index = start_y * gridwidth_; for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) { for (int x = start_x; x <= end_x; ++x) { @@ -512,7 +487,7 @@ void BBGrid::InsertBBox(bool h_spread, bool v_spread, // grid (in grid coords), and the pix works up the grid from there. // WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call // RepositionIterator() on any GridSearches that are active on this grid. -template +template void BBGrid::InsertPixPtBBox(int left, int bottom, Pix* pix, BBC* bbox) { int width = pixGetWidth(pix); @@ -521,8 +496,8 @@ void BBGrid::InsertPixPtBBox(int left, int bottom, l_uint32* data = pixGetData(pix) + y * pixGetWpl(pix); for (int x = 0; x < width; ++x) { if (GET_DATA_BIT(data, x)) { - grid_[(bottom + y) * gridwidth_ + x + left]. - add_sorted(SortByBoxLeft, true, bbox); + grid_[(bottom + y) * gridwidth_ + x + left].add_sorted( + SortByBoxLeft, true, bbox); } } } @@ -531,7 +506,7 @@ void BBGrid::InsertPixPtBBox(int left, int bottom, // Remove the bbox from the grid. // WARNING: Any GridSearch operating on this grid could be invalidated! // If a GridSearch is operating, call GridSearch::RemoveBBox() instead. -template +template void BBGrid::RemoveBBox(BBC* bbox) { TBOX box = bbox->bounding_box(); int start_x, start_y, end_x, end_y; @@ -542,15 +517,14 @@ void BBGrid::RemoveBBox(BBC* bbox) { for (int x = start_x; x <= end_x; ++x) { BBC_C_IT it(&grid_[grid_index + x]); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - if (it.data() == bbox) - it.extract(); + if (it.data() == bbox) it.extract(); } } } } // Returns true if the given rectangle has no overlapping elements. -template +template bool BBGrid::RectangleEmpty(const TBOX& rect) { GridSearch rsearch(this); rsearch.StartRectSearch(rect); @@ -559,7 +533,7 @@ bool BBGrid::RectangleEmpty(const TBOX& rect) { // Returns an IntGrid showing the number of elements in each cell. // Returned IntGrid must be deleted after use. -template +template IntGrid* BBGrid::CountCellElements() { IntGrid* intgrid = new IntGrid(gridsize(), bleft(), tright()); for (int y = 0; y < gridheight(); ++y) { @@ -571,35 +545,32 @@ IntGrid* BBGrid::CountCellElements() { return intgrid; } - -template class TabEventHandler : public SVEventHandler { +template +class TabEventHandler : public SVEventHandler { public: - explicit TabEventHandler(G* grid) : grid_(grid) { - } + explicit TabEventHandler(G* grid) : grid_(grid) {} void Notify(const SVEvent* sv_event) { if (sv_event->type == SVET_CLICK) { grid_->HandleClick(sv_event->x, sv_event->y); } } + private: G* grid_; }; // Make a window of an appropriate size to display things in the grid. // Position the window at the given x,y. -template +template ScrollView* BBGrid::MakeWindow( int x, int y, const char* window_name) { ScrollView* tab_win = nullptr; #ifndef GRAPHICS_DISABLED - tab_win = new ScrollView(window_name, x, y, - tright_.x() - bleft_.x(), - tright_.y() - bleft_.y(), - tright_.x() - bleft_.x(), - tright_.y() - bleft_.y(), - true); - TabEventHandler >* handler = - new TabEventHandler >(this); + tab_win = new ScrollView(window_name, x, y, tright_.x() - bleft_.x(), + tright_.y() - bleft_.y(), tright_.x() - bleft_.x(), + tright_.y() - bleft_.y(), true); + TabEventHandler>* handler = + new TabEventHandler>(this); tab_win->AddEventHandler(handler); tab_win->Pen(ScrollView::GREY); tab_win->Rectangle(0, 0, tright_.x() - bleft_.x(), tright_.y() - bleft_.y()); @@ -611,7 +582,7 @@ ScrollView* BBGrid::MakeWindow( // BLOBNBOXes in this grid. // Use of this function requires an additional member of the BBC class: // ScrollView::Color BBC::BoxColor() const. -template +template void BBGrid::DisplayBoxes(ScrollView* tab_win) { #ifndef GRAPHICS_DISABLED tab_win->Pen(ScrollView::BLUE); @@ -636,7 +607,7 @@ void BBGrid::DisplayBoxes(ScrollView* tab_win) { } // ASSERT_HOST that every cell contains no more than one copy of each entry. -template +template void BBGrid::AssertNoDuplicates() { // Process all grid cells. for (int i = gridwidth_ * gridheight_ - 1; i >= 0; --i) { @@ -653,7 +624,7 @@ void BBGrid::AssertNoDuplicates() { } // Handle a click event in a display window. -template +template void BBGrid::HandleClick(int x, int y) { tprintf("Click at (%d, %d)\n", x, y); } @@ -663,7 +634,7 @@ void BBGrid::HandleClick(int x, int y) { /////////////////////////////////////////////////////////////////////// // Start a new full search. Will iterate all stored blobs. -template +template void GridSearch::StartFullSearch() { // Full search uses x_ and y_ as the current grid // cell being searched. @@ -673,7 +644,7 @@ void GridSearch::StartFullSearch() { // Return the next bbox in the search or nullptr if done. // The other searches will return a box that overlaps the grid cell // thereby duplicating boxes, but NextFullSearch only returns each box once. -template +template BBC* GridSearch::NextFullSearch() { int x; int y; @@ -682,8 +653,7 @@ BBC* GridSearch::NextFullSearch() { ++x_; if (x_ >= grid_->gridwidth_) { --y_; - if (y_ < 0) - return CommonEnd(); + if (y_ < 0) return CommonEnd(); x_ = 0; } SetIterator(); @@ -696,7 +666,7 @@ BBC* GridSearch::NextFullSearch() { } // Start a new radius search. -template +template void GridSearch::StartRadSearch(int x, int y, int max_radius) { // Rad search uses x_origin_ and y_origin_ as the center of the circle. @@ -711,7 +681,7 @@ void GridSearch::StartRadSearch(int x, int y, // Return the next bbox in the radius search or nullptr if the // maximum radius has been reached. -template +template BBC* GridSearch::NextRadSearch() { do { while (it_.cycled_list()) { @@ -721,8 +691,7 @@ BBC* GridSearch::NextRadSearch() { rad_index_ = 0; if (rad_dir_ >= 4) { ++radius_; - if (radius_ > max_radius_) - return CommonEnd(); + if (radius_ > max_radius_) return CommonEnd(); rad_dir_ = 0; } } @@ -731,22 +700,21 @@ BBC* GridSearch::NextRadSearch() { offset += C_OUTLINE::chain_step(rad_dir_ + 1) * rad_index_; x_ = x_origin_ + offset.x(); y_ = y_origin_ + offset.y(); - if (x_ >= 0 && x_ < grid_->gridwidth_ && - y_ >= 0 && y_ < grid_->gridheight_) + if (x_ >= 0 && x_ < grid_->gridwidth_ && y_ >= 0 && + y_ < grid_->gridheight_) SetIterator(); } CommonNext(); } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); - if (unique_mode_) - returns_.insert(previous_return_); + if (unique_mode_) returns_.insert(previous_return_); return previous_return_; } // Start a new left or right-looking search. Will search to the side // for a box that vertically overlaps the given vertical line segment. -template -void GridSearch::StartSideSearch(int x, - int ymin, int ymax) { +template +void GridSearch::StartSideSearch(int x, int ymin, + int ymax) { // Right search records the x in x_origin_, the ymax in y_origin_ // and the size of the vertical strip to search in radius_. // To guarantee finding overlapping objects of up to twice the @@ -759,7 +727,7 @@ void GridSearch::StartSideSearch(int x, // Return the next bbox in the side search or nullptr if the // edge has been reached. Searches left to right or right to left // according to the flag. -template +template BBC* GridSearch::NextSideSearch(bool right_to_left) { do { while (it_.cycled_list()) { @@ -770,23 +738,20 @@ BBC* GridSearch::NextSideSearch(bool right_to_left) { else ++x_; rad_index_ = 0; - if (x_ < 0 || x_ >= grid_->gridwidth_) - return CommonEnd(); + if (x_ < 0 || x_ >= grid_->gridwidth_) return CommonEnd(); } y_ = y_origin_ - rad_index_; - if (y_ >= 0 && y_ < grid_->gridheight_) - SetIterator(); + if (y_ >= 0 && y_ < grid_->gridheight_) SetIterator(); } CommonNext(); } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); - if (unique_mode_) - returns_.insert(previous_return_); + if (unique_mode_) returns_.insert(previous_return_); return previous_return_; } // Start a vertical-looking search. Will search up or down // for a box that horizontally overlaps the given line segment. -template +template void GridSearch::StartVerticalSearch(int xmin, int xmax, int y) { @@ -800,7 +765,7 @@ void GridSearch::StartVerticalSearch(int xmin, // Return the next bbox in the vertical search or nullptr if the // edge has been reached. Searches top to bottom or bottom to top // according to the flag. -template +template BBC* GridSearch::NextVerticalSearch( bool top_to_bottom) { do { @@ -812,35 +777,33 @@ BBC* GridSearch::NextVerticalSearch( else ++y_; rad_index_ = 0; - if (y_ < 0 || y_ >= grid_->gridheight_) - return CommonEnd(); + if (y_ < 0 || y_ >= grid_->gridheight_) return CommonEnd(); } x_ = x_origin_ + rad_index_; - if (x_ >= 0 && x_ < grid_->gridwidth_) - SetIterator(); + if (x_ >= 0 && x_ < grid_->gridwidth_) SetIterator(); } CommonNext(); } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); - if (unique_mode_) - returns_.insert(previous_return_); + if (unique_mode_) returns_.insert(previous_return_); return previous_return_; } // Start a rectangular search. Will search for a box that overlaps the // given rectangle. -template +template void GridSearch::StartRectSearch(const TBOX& rect) { // Rect search records the xmin in x_origin_, the ymin in y_origin_ // and the xmax in max_radius_. // The search proceeds left to right, top to bottom. rect_ = rect; CommonStart(rect.left(), rect.top()); - grid_->GridCoords(rect.right(), rect.bottom(), // - rect.height(), + grid_->GridCoords(rect.right(), + rect.bottom(), // - rect.height(), &max_radius_, &y_origin_); } // Return the next bbox in the rectangular search or nullptr if complete. -template +template BBC* GridSearch::NextRectSearch() { do { while (it_.cycled_list()) { @@ -848,23 +811,21 @@ BBC* GridSearch::NextRectSearch() { if (x_ > max_radius_) { --y_; x_ = x_origin_; - if (y_ < y_origin_) - return CommonEnd(); + if (y_ < y_origin_) return CommonEnd(); } SetIterator(); } CommonNext(); } while (!rect_.overlap(previous_return_->bounding_box()) || (unique_mode_ && returns_.find(previous_return_) != returns_.end())); - if (unique_mode_) - returns_.insert(previous_return_); + if (unique_mode_) returns_.insert(previous_return_); return previous_return_; } // Remove the last returned BBC. Will not invalidate this. May invalidate // any other concurrent GridSearch on the same grid. If any others are // in use, call RepositionIterator on those, to continue without harm. -template +template void GridSearch::RemoveBBox() { if (previous_return_ != nullptr) { // Remove all instances of previous_return_ from the list, so the iterator @@ -874,7 +835,7 @@ void GridSearch::RemoveBBox() { BBC* new_previous_return = nullptr; it_.move_to_first(); for (it_.mark_cycle_pt(); !it_.cycled_list();) { - if (it_.data() == previous_return_) { + if (it_.data() == previous_return_) { new_previous_return = prev_data; it_.extract(); it_.forward(); @@ -890,7 +851,7 @@ void GridSearch::RemoveBBox() { } } -template +template void GridSearch::RepositionIterator() { // Something was deleted, so we have little choice but to clear the // returns list. @@ -919,7 +880,7 @@ void GridSearch::RepositionIterator() { } // Factored out helper to start a search. -template +template void GridSearch::CommonStart(int x, int y) { grid_->GridCoords(x, y, &x_origin_, &y_origin_); x_ = x_origin_; @@ -931,7 +892,7 @@ void GridSearch::CommonStart(int x, int y) { } // Factored out helper to complete a next search. -template +template BBC* GridSearch::CommonNext() { previous_return_ = it_.data(); it_.forward(); @@ -940,7 +901,7 @@ BBC* GridSearch::CommonNext() { } // Factored out final return when search is exhausted. -template +template BBC* GridSearch::CommonEnd() { previous_return_ = nullptr; next_return_ = nullptr; @@ -949,9 +910,9 @@ BBC* GridSearch::CommonEnd() { // Factored out function to set the iterator to the current x_, y_ // grid coords and mark the cycle pt. -template +template void GridSearch::SetIterator() { - it_= &(grid_->grid_[y_ * grid_->gridwidth_ + x_]); + it_ = &(grid_->grid_[y_ * grid_->gridwidth_ + x_]); it_.mark_cycle_pt(); } diff --git a/src/textord/blkocc.cpp b/src/textord/blkocc.cpp index 7178540887..8561246e5c 100644 --- a/src/textord/blkocc.cpp +++ b/src/textord/blkocc.cpp @@ -28,20 +28,20 @@ ---------------------------------------------------------------------- */ +#include "blkocc.h" #include #include -#include "errcode.h" #include "drawtord.h" -#include "blkocc.h" +#include "errcode.h" #include "helpers.h" double_VAR(textord_underline_threshold, 0.5, "Fraction of width occupied"); // Forward declarations of static functions -static void horizontal_cblob_projection(C_BLOB *blob, // blob to project - STATS *stats); // output -static void horizontal_coutline_projection(C_OUTLINE *outline, - STATS *stats); // output +static void horizontal_cblob_projection(C_BLOB* blob, // blob to project + STATS* stats); // output +static void horizontal_coutline_projection(C_OUTLINE* outline, + STATS* stats); // output /** * test_underline @@ -50,23 +50,23 @@ static void horizontal_coutline_projection(C_OUTLINE *outline, * Return TRUE if it is. */ -bool test_underline( //look for underlines - bool testing_on, //< drawing blob - C_BLOB* blob, //< blob to test - int16_t baseline, //< coords of baseline - int16_t xheight //< height of line +bool test_underline( // look for underlines + bool testing_on, //< drawing blob + C_BLOB* blob, //< blob to test + int16_t baseline, //< coords of baseline + int16_t xheight //< height of line ) { int16_t occ; - int16_t blob_width; //width of blob - TBOX blob_box; //bounding box + int16_t blob_width; // width of blob + TBOX blob_box; // bounding box int32_t desc_occ; int32_t x_occ; int32_t asc_occ; STATS projection; - blob_box = blob->bounding_box (); - blob_width = blob->bounding_box ().width (); - projection.set_range (blob_box.bottom (), blob_box.top () + 1); + blob_box = blob->bounding_box(); + blob_width = blob->bounding_box().width(); + projection.set_range(blob_box.bottom(), blob_box.top() + 1); if (testing_on) { // blob->plot(to_win,GOLDENROD,GOLDENROD); // line_color_index(to_win,GOLDENROD); @@ -74,45 +74,42 @@ bool test_underline( //look for underlines // draw2d(to_win,blob_box.right(),baseline); // move2d(to_win,blob_box.left(),baseline+xheight); // draw2d(to_win,blob_box.right(),baseline+xheight); - tprintf - ("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:", - blob->bounding_box ().left (), blob->bounding_box ().bottom (), - blob->bounding_box ().right (), blob->bounding_box ().top (), - baseline); + tprintf("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:", + blob->bounding_box().left(), blob->bounding_box().bottom(), + blob->bounding_box().right(), blob->bounding_box().top(), baseline); } horizontal_cblob_projection(blob, &projection); desc_occ = 0; - for (occ = blob_box.bottom (); occ < baseline; occ++) - if (occ <= blob_box.top () && projection.pile_count (occ) > desc_occ) - //max in region - desc_occ = projection.pile_count (occ); + for (occ = blob_box.bottom(); occ < baseline; occ++) + if (occ <= blob_box.top() && projection.pile_count(occ) > desc_occ) + // max in region + desc_occ = projection.pile_count(occ); x_occ = 0; for (occ = baseline; occ <= baseline + xheight; occ++) - if (occ >= blob_box.bottom () && occ <= blob_box.top () - && projection.pile_count (occ) > x_occ) - //max in region - x_occ = projection.pile_count (occ); + if (occ >= blob_box.bottom() && occ <= blob_box.top() && + projection.pile_count(occ) > x_occ) + // max in region + x_occ = projection.pile_count(occ); asc_occ = 0; - for (occ = baseline + xheight + 1; occ <= blob_box.top (); occ++) - if (occ >= blob_box.bottom () && projection.pile_count (occ) > asc_occ) - asc_occ = projection.pile_count (occ); + for (occ = baseline + xheight + 1; occ <= blob_box.top(); occ++) + if (occ >= blob_box.bottom() && projection.pile_count(occ) > asc_occ) + asc_occ = projection.pile_count(occ); if (testing_on) { - tprintf ("%d %d %d\n", desc_occ, x_occ, asc_occ); + tprintf("%d %d %d\n", desc_occ, x_occ, asc_occ); } if (desc_occ == 0 && x_occ == 0 && asc_occ == 0) { - tprintf ("Bottom=%d, top=%d, base=%d, x=%d\n", - blob_box.bottom (), blob_box.top (), baseline, xheight); + tprintf("Bottom=%d, top=%d, base=%d, x=%d\n", blob_box.bottom(), + blob_box.top(), baseline, xheight); projection.print(); } - if (desc_occ > x_occ + x_occ - && desc_occ > blob_width * textord_underline_threshold) - return true; //real underline + if (desc_occ > x_occ + x_occ && + desc_occ > blob_width * textord_underline_threshold) + return true; // real underline return asc_occ > x_occ + x_occ && - asc_occ > blob_width * textord_underline_threshold; //overline - //neither + asc_occ > blob_width * textord_underline_threshold; // overline + // neither } - /** * horizontal_cblob_projection * @@ -120,19 +117,18 @@ bool test_underline( //look for underlines * and add to the given STATS. */ -static void horizontal_cblob_projection( //project outlines - C_BLOB *blob, //< blob to project - STATS *stats //< output - ) { - //outlines of blob - C_OUTLINE_IT out_it = blob->out_list (); +static void horizontal_cblob_projection( // project outlines + C_BLOB* blob, //< blob to project + STATS* stats //< output +) { + // outlines of blob + C_OUTLINE_IT out_it = blob->out_list(); - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - horizontal_coutline_projection (out_it.data (), stats); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + horizontal_coutline_projection(out_it.data(), stats); } } - /** * horizontal_coutline_projection * @@ -140,30 +136,29 @@ static void horizontal_cblob_projection( //project outlines * and add to the given STATS. */ -static void horizontal_coutline_projection( //project outlines - C_OUTLINE *outline, //< outline to project - STATS *stats //< output - ) { - ICOORD pos; //current point - ICOORD step; //edge step - int32_t length; //of outline - int16_t stepindex; //current step - C_OUTLINE_IT out_it = outline->child (); - - pos = outline->start_pos (); - length = outline->pathlength (); +static void horizontal_coutline_projection( // project outlines + C_OUTLINE* outline, //< outline to project + STATS* stats //< output +) { + ICOORD pos; // current point + ICOORD step; // edge step + int32_t length; // of outline + int16_t stepindex; // current step + C_OUTLINE_IT out_it = outline->child(); + + pos = outline->start_pos(); + length = outline->pathlength(); for (stepindex = 0; stepindex < length; stepindex++) { - step = outline->step (stepindex); - if (step.y () > 0) { - stats->add (pos.y (), pos.x ()); - } - else if (step.y () < 0) { - stats->add (pos.y () - 1, -pos.x ()); + step = outline->step(stepindex); + if (step.y() > 0) { + stats->add(pos.y(), pos.x()); + } else if (step.y() < 0) { + stats->add(pos.y() - 1, -pos.x()); } pos += step; } - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - horizontal_coutline_projection (out_it.data (), stats); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + horizontal_coutline_projection(out_it.data(), stats); } } diff --git a/src/textord/blkocc.h b/src/textord/blkocc.h index 126a12023d..95553cbf14 100644 --- a/src/textord/blkocc.h +++ b/src/textord/blkocc.h @@ -22,11 +22,12 @@ * ******************************************************************************/ -#ifndef BLKOCC_H -#define BLKOCC_H +#ifndef BLKOCC_H +#define BLKOCC_H -#include "params.h" -#include "elst.h" +#include "elst.h" +#include "params.h" +#include "stepblob.h" /*************************************************************************** CLASS REGION_OCC @@ -45,28 +46,25 @@ CLASS REGION_OCC ****************************************************************************/ -class REGION_OCC:public ELIST_LINK -{ - public: - float min_x; //Lowest x in region - float max_x; //Highest x in region - int16_t region_type; //Type of crossing - - REGION_OCC() = default; // constructor used - // only in COPIER etc - REGION_OCC( //constructor - float min, - float max, - int16_t region) { - min_x = min; - max_x = max; - region_type = region; - } +class REGION_OCC : public ELIST_LINK { + public: + float min_x; // Lowest x in region + float max_x; // Highest x in region + int16_t region_type; // Type of crossing + + REGION_OCC() = default; // constructor used + // only in COPIER etc + REGION_OCC( // constructor + float min, float max, int16_t region) { + min_x = min; + max_x = max; + region_type = region; + } }; -ELISTIZEH (REGION_OCC) -#define RANGE_IN_BAND( band_max, band_min, range_max, range_min ) \ -( ((range_min) >= (band_min)) && ((range_max) < (band_max)) ) ? true : false +ELISTIZEH(REGION_OCC) +#define RANGE_IN_BAND(band_max, band_min, range_max, range_min) \ + (((range_min) >= (band_min)) && ((range_max) < (band_max))) ? true : false /************************************************************************ Adapted from the following procedure so that it can be used in the bands class in an include file... @@ -84,8 +82,8 @@ int16_t range_min] return FALSE; } ***********************************************************************/ -#define RANGE_OVERLAPS_BAND( band_max, band_min, range_max, range_min ) \ -( ((range_max) >= (band_min)) && ((range_min) < (band_max)) ) ? true : false +#define RANGE_OVERLAPS_BAND(band_max, band_min, range_max, range_min) \ + (((range_max) >= (band_min)) && ((range_min) < (band_max))) ? true : false /************************************************************************ Adapted from the following procedure so that it can be used in the bands class in an include file... @@ -123,101 +121,100 @@ Band 0 is the dot band Each band has an error margin above and below. An outline is not considered to have significantly changed bands until it has moved out of the error margin. *************************************************************************/ -class BAND -{ - public: - int16_t max_max; //upper max - int16_t max; //nominal max - int16_t min_max; //lower max - int16_t max_min; //upper min - int16_t min; //nominal min - int16_t min_min; //lower min - - BAND() = default; // constructor - - void set( // initialise a band - int16_t new_max_max, // upper max - int16_t new_max, // new nominal max - int16_t new_min_max, // new lower max - int16_t new_max_min, // new upper min - int16_t new_min, // new nominal min - int16_t new_min_min) { // new lower min - max_max = new_max_max; - max = new_max; - min_max = new_min_max; - max_min = new_max_min; - min = new_min; - min_min = new_min_min; - } - - bool in_minimal( //in minimal limits? - float y) { //y value - return (y >= max_min) && (y < min_max); - } - - bool in_nominal( //in nominal limits? - float y) { //y value - return (y >= min) && (y < max); - } - - bool in_maximal( //in maximal limits? - float y) { //y value - return (y >= min_min) && (y < max_max); - } - - //overlaps min limits? - bool range_overlaps_minimal(float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_OVERLAPS_BAND (min_max, max_min, y1, y2); - else - return RANGE_OVERLAPS_BAND (min_max, max_min, y2, y1); - } - - //overlaps nom limits? - bool range_overlaps_nominal(float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_OVERLAPS_BAND (max, min, y1, y2); - else - return RANGE_OVERLAPS_BAND (max, min, y2, y1); - } - - //overlaps max limits? - bool range_overlaps_maximal(float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_OVERLAPS_BAND (max_max, min_min, y1, y2); - else - return RANGE_OVERLAPS_BAND (max_max, min_min, y2, y1); - } - - bool range_in_minimal( //within min limits? - float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_IN_BAND (min_max, max_min, y1, y2); - else - return RANGE_IN_BAND (min_max, max_min, y2, y1); - } - - bool range_in_nominal( //within nom limits? - float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_IN_BAND (max, min, y1, y2); - else - return RANGE_IN_BAND (max, min, y2, y1); - } - - bool range_in_maximal( //within max limits? - float y1, //one range limit - float y2) { //other range limit - if (y1 > y2) - return RANGE_IN_BAND (max_max, min_min, y1, y2); - else - return RANGE_IN_BAND (max_max, min_min, y2, y1); - } +class BAND { + public: + int16_t max_max; // upper max + int16_t max; // nominal max + int16_t min_max; // lower max + int16_t max_min; // upper min + int16_t min; // nominal min + int16_t min_min; // lower min + + BAND() = default; // constructor + + void set( // initialise a band + int16_t new_max_max, // upper max + int16_t new_max, // new nominal max + int16_t new_min_max, // new lower max + int16_t new_max_min, // new upper min + int16_t new_min, // new nominal min + int16_t new_min_min) { // new lower min + max_max = new_max_max; + max = new_max; + min_max = new_min_max; + max_min = new_max_min; + min = new_min; + min_min = new_min_min; + } + + bool in_minimal( // in minimal limits? + float y) { // y value + return (y >= max_min) && (y < min_max); + } + + bool in_nominal( // in nominal limits? + float y) { // y value + return (y >= min) && (y < max); + } + + bool in_maximal( // in maximal limits? + float y) { // y value + return (y >= min_min) && (y < max_max); + } + + // overlaps min limits? + bool range_overlaps_minimal(float y1, // one range limit + float y2) { // other range limit + if (y1 > y2) + return RANGE_OVERLAPS_BAND(min_max, max_min, y1, y2); + else + return RANGE_OVERLAPS_BAND(min_max, max_min, y2, y1); + } + + // overlaps nom limits? + bool range_overlaps_nominal(float y1, // one range limit + float y2) { // other range limit + if (y1 > y2) + return RANGE_OVERLAPS_BAND(max, min, y1, y2); + else + return RANGE_OVERLAPS_BAND(max, min, y2, y1); + } + + // overlaps max limits? + bool range_overlaps_maximal(float y1, // one range limit + float y2) { // other range limit + if (y1 > y2) + return RANGE_OVERLAPS_BAND(max_max, min_min, y1, y2); + else + return RANGE_OVERLAPS_BAND(max_max, min_min, y2, y1); + } + + bool range_in_minimal( // within min limits? + float y1, // one range limit + float y2) { // other range limit + if (y1 > y2) + return RANGE_IN_BAND(min_max, max_min, y1, y2); + else + return RANGE_IN_BAND(min_max, max_min, y2, y1); + } + + bool range_in_nominal( // within nom limits? + float y1, // one range limit + float y2) { // other range limit + if (y1 > y2) + return RANGE_IN_BAND(max, min, y1, y2); + else + return RANGE_IN_BAND(max, min, y2, y1); + } + + bool range_in_maximal( // within max limits? + float y1, // one range limit + float y2) { // other range limit + if (y1 > y2) + return RANGE_IN_BAND(max_max, min_min, y1, y2); + else + return RANGE_IN_BAND(max_max, min_min, y2, y1); + } }; /* Standard positions */ @@ -233,20 +230,20 @@ class BAND #define END_OF_WERD_CODE 255 -extern BOOL_VAR_H (blockocc_show_result, FALSE, "Show intermediate results"); -extern INT_VAR_H (blockocc_desc_height, 0, -"Descender height after normalisation"); -extern INT_VAR_H (blockocc_asc_height, 255, -"Ascender height after normalisation"); -extern INT_VAR_H (blockocc_band_count, 4, "Number of bands used"); -extern double_VAR_H (textord_underline_threshold, 0.9, -"Fraction of width occupied"); - -bool test_underline( //look for underlines - bool testing_on, //drawing blob - C_BLOB* blob, //blob to test - int16_t baseline, //coords of baseline - int16_t xheight //height of line +extern BOOL_VAR_H(blockocc_show_result, FALSE, "Show intermediate results"); +extern INT_VAR_H(blockocc_desc_height, 0, + "Descender height after normalisation"); +extern INT_VAR_H(blockocc_asc_height, 255, + "Ascender height after normalisation"); +extern INT_VAR_H(blockocc_band_count, 4, "Number of bands used"); +extern double_VAR_H(textord_underline_threshold, 0.9, + "Fraction of width occupied"); + +bool test_underline( // look for underlines + bool testing_on, // drawing blob + C_BLOB* blob, // blob to test + int16_t baseline, // coords of baseline + int16_t xheight // height of line ); #endif diff --git a/src/textord/blobgrid.cpp b/src/textord/blobgrid.cpp index 4adc637427..d2374f03ab 100644 --- a/src/textord/blobgrid.cpp +++ b/src/textord/blobgrid.cpp @@ -22,7 +22,7 @@ namespace tesseract { BlobGrid::BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright) - : BBGrid(gridsize, bleft, tright) { + : BBGrid(gridsize, bleft, tright) { } // Inserts all the blobs from the given list, with x and y spreading, @@ -32,10 +32,8 @@ void BlobGrid::InsertBlobList(BLOBNBOX_LIST* blobs) { BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); - if (!blob->joined_to_prev()) - InsertBBox(true, true, blob); + if (!blob->joined_to_prev()) InsertBBox(true, true, blob); } } - } // namespace tesseract. diff --git a/src/textord/blobgrid.h b/src/textord/blobgrid.h index c5ec2c2316..a848dde469 100644 --- a/src/textord/blobgrid.h +++ b/src/textord/blobgrid.h @@ -17,7 +17,6 @@ // /////////////////////////////////////////////////////////////////////// - #ifndef TESSERACT_TEXTORD_BLOBGRID_H_ #define TESSERACT_TEXTORD_BLOBGRID_H_ diff --git a/src/textord/ccnontextdetect.cpp b/src/textord/ccnontextdetect.cpp index 734c75fdd9..8544685870 100644 --- a/src/textord/ccnontextdetect.cpp +++ b/src/textord/ccnontextdetect.cpp @@ -59,20 +59,18 @@ const double kPhotoOffsetFraction = 0.375; // the 1/(4pi) that you would expect. const double kMinGoodTextPARatio = 1.5; -CCNonTextDetect::CCNonTextDetect(int gridsize, - const ICOORD& bleft, const ICOORD& tright) - : BlobGrid(gridsize, bleft, tright), - max_noise_count_(static_cast(kMaxSmallNeighboursPerPix * - gridsize * gridsize)), - noise_density_(nullptr) { +CCNonTextDetect::CCNonTextDetect(int gridsize, const ICOORD& bleft, + const ICOORD& tright) + : BlobGrid(gridsize, bleft, tright), + max_noise_count_( + static_cast(kMaxSmallNeighboursPerPix * gridsize * gridsize)), + noise_density_(nullptr) { // TODO(rays) break max_noise_count_ out into an area-proportional // value, as now plus an additive constant for the number of text blobs // in the 3x3 neighbourhood - maybe 9. } -CCNonTextDetect::~CCNonTextDetect() { - delete noise_density_; -} +CCNonTextDetect::~CCNonTextDetect() { delete noise_density_; } // Creates and returns a Pix with the same resolution as the original // in which 1 (black) pixels represent likely non text (photo, line drawing) @@ -109,41 +107,41 @@ Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map, pixWrite("junknoisemask.png", pix, IFF_PNG); } ScrollView* win = nullptr; - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (debug) { win = MakeWindow(0, 400, "Photo Mask Blobs"); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED // Large and medium blobs are not text if they overlap with "a lot" of small // blobs. MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, - kMaxLargeOverlapsWithSmall, - win, ScrollView::DARK_GREEN, pix); + kMaxLargeOverlapsWithSmall, win, + ScrollView::DARK_GREEN, pix); MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, - win, ScrollView::WHITE, pix); + win, ScrollView::WHITE, pix); // Clear the grid of small blobs and insert the medium blobs. Clear(); InsertBlobList(&blob_block->blobs); MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, - kMaxLargeOverlapsWithMedium, - win, ScrollView::DARK_GREEN, pix); + kMaxLargeOverlapsWithMedium, win, + ScrollView::DARK_GREEN, pix); // Clear again before we start deleting the blobs in the grid. Clear(); - MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, - win, ScrollView::CORAL, pix); - MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, - win, ScrollView::GOLDENROD, pix); - MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, - win, ScrollView::WHITE, pix); + MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, win, + ScrollView::CORAL, pix); + MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, win, + ScrollView::GOLDENROD, pix); + MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, win, ScrollView::WHITE, + pix); if (debug) { - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED win->Update(); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED pixWrite("junkccphotomask.png", pix, IFF_PNG); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED delete win->AwaitEvent(SVET_DESTROY); delete win; - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED } return pix; } @@ -189,8 +187,7 @@ IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map, good_counts->GridCellValue(x, y), noise_counts->GridCellValue(x, y), max_noise_count_); } - if (noise > max_noise_count_ && - good_counts->GridCellValue(x, y) > 0 && + if (noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0 && noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= max_noise_count_) { noise_density->SetGridCell(x, y, 0); @@ -209,24 +206,19 @@ static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density, int pad) { TBOX expanded_box(box); expanded_box.set_right(box.right() + pad); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; + if (!noise_density.AnyZeroInRect(expanded_box)) return expanded_box; expanded_box = box; expanded_box.set_left(box.left() - pad); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; + if (!noise_density.AnyZeroInRect(expanded_box)) return expanded_box; expanded_box = box; expanded_box.set_top(box.top() + pad); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; + if (!noise_density.AnyZeroInRect(expanded_box)) return expanded_box; expanded_box = box; expanded_box.set_bottom(box.bottom() + pad); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; + if (!noise_density.AnyZeroInRect(expanded_box)) return expanded_box; expanded_box = box; expanded_box.pad(kNoisePadding, kNoisePadding); - if (!noise_density.AnyZeroInRect(expanded_box)) - return expanded_box; + if (!noise_density.AnyZeroInRect(expanded_box)) return expanded_box; return box; } @@ -258,20 +250,19 @@ void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, TBOX box = blob->bounding_box(); if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && (max_blob_overlaps < 0 || - !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { + !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { blob->ClearNeighbours(); - #ifndef GRAPHICS_DISABLED - if (win != nullptr) - blob->plot(win, ok_color, ok_color); - #endif // GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED + if (win != nullptr) blob->plot(win, ok_color, ok_color); +#endif // GRAPHICS_DISABLED } else { if (noise_density_->AnyZeroInRect(box)) { // There is a danger that the bounding box may overlap real text, so // we need to render the outline. Pix* blob_pix = blob->cblob()->render_outline(); pixRasterop(nontext_mask, box.left(), imageheight - box.top(), - box.width(), box.height(), PIX_SRC | PIX_DST, - blob_pix, 0, 0); + box.width(), box.height(), PIX_SRC | PIX_DST, blob_pix, 0, + 0); pixDestroy(&blob_pix); } else { if (box.area() < gridsize() * gridsize()) { @@ -284,10 +275,9 @@ void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SET, nullptr, 0, 0); } - #ifndef GRAPHICS_DISABLED - if (win != nullptr) - blob->plot(win, ScrollView::RED, ScrollView::RED); - #endif // GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED + if (win != nullptr) blob->plot(win, ScrollView::RED, ScrollView::RED); +#endif // GRAPHICS_DISABLED // It is safe to delete the cblob now, as it isn't used by the grid // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the // dead_blobs list. @@ -314,8 +304,7 @@ bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) { (neighbour = rsearch.NextRectSearch()) != nullptr) { if (box.major_overlap(neighbour->bounding_box())) { ++overlap_count; - if (overlap_count > max_overlaps) - return true; + if (overlap_count > max_overlaps) return true; } } return false; diff --git a/src/textord/ccnontextdetect.h b/src/textord/ccnontextdetect.h index 6f536ef274..ed899ba2e1 100644 --- a/src/textord/ccnontextdetect.h +++ b/src/textord/ccnontextdetect.h @@ -67,8 +67,7 @@ class CCNonTextDetect : public BlobGrid { // not removed. If any deleted blobs might be in *this, then this must be // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. // If the win is not nullptr, deleted blobs are drawn on it in red, and kept - void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, - int max_blob_overlaps, + void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, int max_blob_overlaps, ScrollView* win, ScrollView::Color ok_color, Pix* nontext_mask); // Returns true if the given blob overlaps more than max_overlaps blobs diff --git a/src/textord/cjkpitch.cpp b/src/textord/cjkpitch.cpp index e713191d84..ee2e75e06d 100644 --- a/src/textord/cjkpitch.cpp +++ b/src/textord/cjkpitch.cpp @@ -41,8 +41,8 @@ static const float kFixedPitchThreshold = 0.35; // rank statistics for a small collection of float values. class SimpleStats { public: - SimpleStats(): finalized_(false), values_() { } - ~SimpleStats() { } + SimpleStats() : finalized_(false), values_() {} + ~SimpleStats() {} void Clear() { values_.clear(); @@ -67,13 +67,10 @@ class SimpleStats { int index = static_cast((values_.size() - 1) * frac); float reminder = (values_.size() - 1) * frac - index; - return values_[index] * (1.0 - reminder) + - values_[index + 1] * reminder; + return values_[index] * (1.0 - reminder) + values_[index + 1] * reminder; } - float median() { - return ile(0.5); - } + float median() { return ile(0.5); } float maximum() { if (!finalized_) Finish(); @@ -87,9 +84,7 @@ class SimpleStats { return values_[0]; } - int size() const { - return values_.size(); - } + int size() const { return values_.size(); } private: static int float_compare(const void* a, const void* b) { @@ -112,17 +107,15 @@ class LocalCorrelation { int vote; }; - LocalCorrelation(): finalized_(false) { } - ~LocalCorrelation() { } + LocalCorrelation() : finalized_(false) {} + ~LocalCorrelation() {} void Finish() { values_.sort(float_pair_compare); finalized_ = true; } - void Clear() { - finalized_ = false; - } + void Clear() { finalized_ = false; } void Add(float x, float y, int v) { struct float_pair value; @@ -174,18 +167,22 @@ class LocalCorrelation { // consist of multiple blobs (BLOBNBOX's). class FPChar { public: - enum Alignment { - ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD - }; - - FPChar(): box_(), real_body_(), - from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0), - final_(false), alignment_(ALIGN_UNKNOWN), - merge_to_prev_(false), delete_flag_(false) { - } + enum Alignment { ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD }; + + FPChar() + : box_(), + real_body_(), + from_(nullptr), + to_(nullptr), + num_blobs_(0), + max_gap_(0), + final_(false), + alignment_(ALIGN_UNKNOWN), + merge_to_prev_(false), + delete_flag_(false) {} // Initialize from blob. - void Init(BLOBNBOX *blob) { + void Init(BLOBNBOX* blob) { box_ = blob->bounding_box(); real_body_ = box_; from_ = to_ = blob; @@ -194,7 +191,7 @@ class FPChar { // Merge this character with "next". The "next" character should // consist of succeeding blobs on the same row. - void Merge(const FPChar &next) { + void Merge(const FPChar& next) { int gap = real_body_.x_gap(next.real_body_); if (gap > max_gap_) max_gap_ = gap; @@ -205,60 +202,40 @@ class FPChar { } // Accessors. - const TBOX &box() const { return box_; } - void set_box(const TBOX &box) { - box_ = box; - } - const TBOX &real_body() const { return real_body_; } + const TBOX& box() const { return box_; } + void set_box(const TBOX& box) { box_ = box; } + const TBOX& real_body() const { return real_body_; } bool is_final() const { return final_; } - void set_final(bool flag) { - final_ = flag; - } + void set_final(bool flag) { final_ = flag; } - const Alignment& alignment() const { - return alignment_; - } - void set_alignment(Alignment alignment) { - alignment_ = alignment; - } + const Alignment& alignment() const { return alignment_; } + void set_alignment(Alignment alignment) { alignment_ = alignment; } - bool merge_to_prev() const { - return merge_to_prev_; - } - void set_merge_to_prev(bool flag) { - merge_to_prev_ = flag; - } + bool merge_to_prev() const { return merge_to_prev_; } + void set_merge_to_prev(bool flag) { merge_to_prev_ = flag; } - bool delete_flag() const { - return delete_flag_; - } - void set_delete_flag(bool flag) { - delete_flag_ = flag; - } + bool delete_flag() const { return delete_flag_; } + void set_delete_flag(bool flag) { delete_flag_ = flag; } - int max_gap() const { - return max_gap_; - } + int max_gap() const { return max_gap_; } - int num_blobs() const { - return num_blobs_; - } + int num_blobs() const { return num_blobs_; } private: TBOX box_; // Rectangle region considered to be occupied by this // character. It could be bigger than the bounding box. TBOX real_body_; // Real bounding box of this character. - BLOBNBOX *from_; // The first blob of this character. - BLOBNBOX *to_; // The last blob of this character. - int num_blobs_; // Number of blobs that belong to this character. - int max_gap_; // Maximum x gap between the blobs. + BLOBNBOX* from_; // The first blob of this character. + BLOBNBOX* to_; // The last blob of this character. + int num_blobs_; // Number of blobs that belong to this character. + int max_gap_; // Maximum x gap between the blobs. bool final_; // True if alignment/fragmentation decision for this // character is finalized. Alignment alignment_; // Alignment status. - bool merge_to_prev_; // True if this is a fragmented blob that + bool merge_to_prev_; // True if this is a fragmented blob that // needs to be merged to the previous // character. @@ -270,15 +247,21 @@ class FPChar { // FPChar's. class FPRow { public: - FPRow() : pitch_(0.0f), estimated_pitch_(0.0f), - all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), - heights_(), characters_(), real_row_(nullptr) { - } - - ~FPRow() { } + FPRow() + : pitch_(0.0f), + estimated_pitch_(0.0f), + all_pitches_(), + all_gaps_(), + good_pitches_(), + good_gaps_(), + heights_(), + characters_(), + real_row_(nullptr) {} + + ~FPRow() {} // Initialize from TD_ROW. - void Init(TO_ROW *row); + void Init(TO_ROW* row); // Estimate character pitch of this row, based on current alignment // status of underlying FPChar's. The argument pass1 can be set to @@ -311,53 +294,31 @@ class FPRow { void DebugOutputResult(int row_index); - int good_pitches() { - return good_pitches_.size(); - } + int good_pitches() { return good_pitches_.size(); } - int good_gaps() { - return good_gaps_.size(); - } + int good_gaps() { return good_gaps_.size(); } - float pitch() { - return pitch_; - } + float pitch() { return pitch_; } - float estimated_pitch() { - return estimated_pitch_; - } + float estimated_pitch() { return estimated_pitch_; } - void set_estimated_pitch(float v) { - estimated_pitch_ = v; - } + void set_estimated_pitch(float v) { estimated_pitch_ = v; } - float height() { - return height_; - } + float height() { return height_; } float height_pitch_ratio() { if (good_pitches_.size() < 2) return -1.0; return height_ / good_pitches_.median(); } - float gap() { - return gap_; - } + float gap() { return gap_; } - int num_chars() { - return characters_.size(); - } - FPChar *character(int i) { - return &characters_[i]; - } + int num_chars() { return characters_.size(); } + FPChar* character(int i) { return &characters_[i]; } - const TBOX &box(int i) { - return characters_[i].box(); - } + const TBOX& box(int i) { return characters_[i].box(); } - const TBOX &real_body(int i) { - return characters_[i].real_body(); - } + const TBOX& real_body(int i) { return characters_[i].real_body(); } bool is_box_modified(int i) { return !(characters_[i].box() == characters_[i].real_body()); @@ -367,33 +328,23 @@ class FPRow { return (characters_[i].box().left() + characters_[i].box().right()) / 2.0; } - bool is_final(int i) { - return characters_[i].is_final(); - } + bool is_final(int i) { return characters_[i].is_final(); } - void finalize(int i) { - characters_[i].set_final(true); - } + void finalize(int i) { characters_[i].set_final(true); } bool is_good(int i) { return characters_[i].alignment() == FPChar::ALIGN_GOOD; } - bool is_bad(int i) { - return characters_[i].alignment() == FPChar::ALIGN_BAD; - } + bool is_bad(int i) { return characters_[i].alignment() == FPChar::ALIGN_BAD; } bool is_unknown(int i) { return characters_[i].alignment() == FPChar::ALIGN_UNKNOWN; } - void mark_good(int i) { - characters_[i].set_alignment(FPChar::ALIGN_GOOD); - } + void mark_good(int i) { characters_[i].set_alignment(FPChar::ALIGN_GOOD); } - void mark_bad(int i) { - characters_[i].set_alignment(FPChar::ALIGN_BAD); - } + void mark_bad(int i) { characters_[i].set_alignment(FPChar::ALIGN_BAD); } void clear_alignment(int i) { characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN); @@ -425,7 +376,8 @@ class FPRow { if (box1.width() >= pitch * (1.0 + kFPTolerance) || box2.width() >= pitch * (1.0 + kFPTolerance) || box1.height() >= pitch * (1.0 + kFPTolerance) || - box2.height() >= pitch * (1.0 + kFPTolerance)) return false; + box2.height() >= pitch * (1.0 + kFPTolerance)) + return false; const float real_pitch = box_pitch(box1, box2); if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true; @@ -441,7 +393,7 @@ class FPRow { return false; } - static bool is_interesting_blob(const BLOBNBOX *blob) { + static bool is_interesting_blob(const BLOBNBOX* blob) { return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER; } @@ -457,11 +409,11 @@ class FPRow { characters_.truncate(index); } - float pitch_; // Character pitch. + float pitch_; // Character pitch. float estimated_pitch_; // equal to pitch_ if pitch_ is considered // to be good enough. float height_; // Character height. - float gap_; // Minimum gap between characters. + float gap_; // Minimum gap between characters. // Pitches between any two successive characters. SimpleStats all_pitches_; @@ -477,10 +429,10 @@ class FPRow { SimpleStats heights_; GenericVector characters_; - TO_ROW *real_row_; // Underlying TD_ROW for this row. + TO_ROW* real_row_; // Underlying TD_ROW for this row. }; -void FPRow::Init(TO_ROW *row) { +void FPRow::Init(TO_ROW* row) { ASSERT_HOST(row != nullptr); ASSERT_HOST(row->xheight > 0); real_row_ = row; @@ -523,7 +475,7 @@ void FPRow::OutputEstimations() { // are skinny. Use pitch_ - height_ instead if it's smaller, but // positive. real_row_->kern_size = real_row_->pr_nonsp = - std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f)); + std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f)); real_row_->body_size = pitch_ - real_row_->kern_size; if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) { @@ -549,11 +501,11 @@ void FPRow::OutputEstimations() { // Don't consider a quarter space as a real space, because it's used // for line justification in traditional Japanese books. real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(), - (double)good_gaps_.ile(0.875)); + (double)good_gaps_.ile(0.875)); int space_threshold = - std::min((real_row_->max_nonspace + real_row_->min_space) / 2, - static_cast(real_row_->xheight)); + std::min((real_row_->max_nonspace + real_row_->min_space) / 2, + static_cast(real_row_->xheight)); // Make max_nonspace larger than any intra-character gap so that // make_prop_words() won't break a row at the middle of a character. @@ -563,13 +515,13 @@ void FPRow::OutputEstimations() { } } real_row_->space_threshold = - std::min((real_row_->max_nonspace + real_row_->min_space) / 2, - static_cast(real_row_->xheight)); + std::min((real_row_->max_nonspace + real_row_->min_space) / 2, + static_cast(real_row_->xheight)); real_row_->used_dm_model = false; // Setup char_cells. ICOORDELT_IT cell_it = &real_row_->char_cells; - ICOORDELT *cell = new ICOORDELT(real_body(0).left(), 0); + ICOORDELT* cell = new ICOORDELT(real_body(0).left(), 0); cell_it.add_after_then_move(cell); int right = real_body(0).right(); @@ -633,9 +585,8 @@ void FPRow::EstimatePitch(bool pass1) { // character may have a good pitch only between its successor. // So we collect only pitch values between two good // characters. and within tolerance in pass2. - if (pass1 || (prev_was_good && - fabs(estimated_pitch_ - pitch) < - kFPTolerance * estimated_pitch_)) { + if (pass1 || (prev_was_good && fabs(estimated_pitch_ - pitch) < + kFPTolerance * estimated_pitch_)) { good_pitches_.Add(pitch); if (!is_box_modified(i - 1) && !is_box_modified(i)) { good_gaps_.Add(gap); @@ -674,16 +625,16 @@ void FPRow::EstimatePitch(bool pass1) { void FPRow::DebugOutputResult(int row_index) { if (num_chars() > 0) { - tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, " - "space_size=%f, space_threshold=%d, xheight=%f\n", - row_index, (int)(real_row_->pitch_decision), - real_row_->fixed_pitch, real_row_->max_nonspace, - real_row_->space_size, real_row_->space_threshold, - real_row_->xheight); + tprintf( + "Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, " + "space_size=%f, space_threshold=%d, xheight=%f\n", + row_index, (int)(real_row_->pitch_decision), real_row_->fixed_pitch, + real_row_->max_nonspace, real_row_->space_size, + real_row_->space_threshold, real_row_->xheight); for (int i = 0; i < num_chars(); i++) { - tprintf("Char %d: is_final=%d is_good=%d num_blobs=%d: ", - i, is_final(i), is_good(i), character(i)->num_blobs()); + tprintf("Char %d: is_final=%d is_good=%d num_blobs=%d: ", i, is_final(i), + is_good(i), character(i)->num_blobs()); box(i).print(); } } @@ -694,21 +645,22 @@ void FPRow::Pass1Analyze() { if (estimated_pitch_ > 0.0f) { for (int i = 2; i < num_chars(); i++) { - if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) && + if (is_good_pitch(estimated_pitch_, box(i - 2), box(i - 1)) && is_good_pitch(estimated_pitch_, box(i - 1), box(i))) { mark_good(i - 1); } } } else { for (int i = 2; i < num_chars(); i++) { - if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) { + if (is_good_pitch(box_pitch(box(i - 2), box(i - 1)), box(i - 1), + box(i))) { mark_good(i - 1); } } } character(0)->set_alignment(character(1)->alignment()); - character(num_chars() - 1)->set_alignment( - character(num_chars() - 2)->alignment()); + character(num_chars() - 1) + ->set_alignment(character(num_chars() - 2)->alignment()); } bool FPRow::Pass2Analyze() { @@ -728,7 +680,7 @@ bool FPRow::Pass2Analyze() { // body including this character based on the character. Skip // whitespace if necessary. bool skipped_whitespaces = false; - float c1 = center_x(i + 1) - 1.5 * estimated_pitch_; + float c1 = center_x(i + 1) - 1.5 * estimated_pitch_; while (c1 > box(i).right()) { skipped_whitespaces = true; c1 -= estimated_pitch_; @@ -741,7 +693,7 @@ bool FPRow::Pass2Analyze() { TBOX merged; while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) && merged.bounding_union(box(j)).height() < - estimated_pitch_ * (1 + kFPTolerance)) { + estimated_pitch_ * (1 + kFPTolerance)) { merged += box(j); j--; } @@ -791,7 +743,7 @@ bool FPRow::Pass2Analyze() { TBOX merged; while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) && merged.bounding_union(box(j)).height() < - estimated_pitch_ * (1 + kFPTolerance)) { + estimated_pitch_ * (1 + kFPTolerance)) { merged += box(j); j++; } @@ -838,7 +790,7 @@ void FPRow::MergeFragments() { character(last_char)->Merge(*character(j)); character(j)->set_delete_flag(true); clear_alignment(last_char); - character(j-1)->set_merge_to_prev(false); + character(j - 1)->set_merge_to_prev(false); } else { last_char = j; } @@ -907,17 +859,19 @@ void FPRow::FinalizeLargeChars() { bad_pitch = true; } } - if (good_pitch && !bad_pitch) mark_good(i); - else if (!good_pitch && bad_pitch) mark_bad(i); + if (good_pitch && !bad_pitch) + mark_good(i); + else if (!good_pitch && bad_pitch) + mark_bad(i); } } class FPAnalyzer { public: - FPAnalyzer(): page_tr_(), rows_() { } - ~FPAnalyzer() { } + FPAnalyzer() : page_tr_(), rows_() {} + ~FPAnalyzer() {} - void Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks); + void Init(ICOORD page_tr, TO_BLOCK_LIST* port_blocks); void Pass1Analyze() { for (int i = 0; i < rows_.size(); i++) rows_[i].Pass1Analyze(); @@ -929,8 +883,8 @@ class FPAnalyzer { void EstimatePitch(bool pass1); bool maybe_fixed_pitch() { - if (rows_.empty() || - rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false; + if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) + return false; return true; } @@ -962,9 +916,7 @@ class FPAnalyzer { for (int i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i); } - int num_rows() { - return rows_.size(); - } + int num_rows() { return rows_.size(); } // Returns the upper limit for pass2 loop iteration. int max_iteration() { @@ -982,15 +934,14 @@ class FPAnalyzer { int max_chars_per_row_; }; -void FPAnalyzer::Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { +void FPAnalyzer::Init(ICOORD page_tr, TO_BLOCK_LIST* port_blocks) { page_tr_ = page_tr; TO_BLOCK_IT block_it; - block_it.set_to_list (port_blocks); + block_it.set_to_list(port_blocks); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - TO_BLOCK *block = block_it.data(); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + TO_BLOCK* block = block_it.data(); if (!block->get_rows()->empty()) { ASSERT_HOST(block->xheight > 0); find_repeated_chars(block, FALSE); @@ -999,8 +950,7 @@ void FPAnalyzer::Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { num_empty_rows_ = 0; max_chars_per_row_ = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { TO_ROW_IT row_it = block_it.data()->get_rows(); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { FPRow row; @@ -1037,9 +987,8 @@ void FPAnalyzer::EstimatePitch(bool pass1) { // from this row. rows_[i].set_estimated_pitch(rows_[i].pitch()); } else if (rows_[i].num_chars() > 1) { - float estimated_pitch = - pitch_height_stats.EstimateYFor(rows_[i].height() + rows_[i].gap(), - 0.1); + float estimated_pitch = pitch_height_stats.EstimateYFor( + rows_[i].height() + rows_[i].gap(), 0.1); // CJK characters are more likely to be fragmented than poorly // chopped. So trust the page-level estimation of character // pitch only if it's larger than row-level estimation or @@ -1056,8 +1005,7 @@ void FPAnalyzer::EstimatePitch(bool pass1) { } // namespace -void compute_fixed_pitch_cjk(ICOORD page_tr, - TO_BLOCK_LIST *port_blocks) { +void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST* port_blocks) { FPAnalyzer analyzer; analyzer.Init(page_tr, port_blocks); if (analyzer.num_rows() == 0) return; diff --git a/src/textord/cjkpitch.h b/src/textord/cjkpitch.h index 601ec21371..c636888588 100644 --- a/src/textord/cjkpitch.h +++ b/src/textord/cjkpitch.h @@ -20,7 +20,7 @@ #ifndef CJKPITCH_H_ #define CJKPITCH_H_ -#include "blobbox.h" +#include "blobbox.h" // Function to test "fixed-pitchness" of the input text and estimating // character pitch parameters for it, based on CJK fixed-pitch layout @@ -66,6 +66,6 @@ // fixed_pitch_words() and the rows need to be processed with // make_prop_words() even if they are fixed pitched. void compute_fixed_pitch_cjk(ICOORD page_tr, // top right - TO_BLOCK_LIST *port_blocks); // input list + TO_BLOCK_LIST* port_blocks); // input list #endif // CJKPITCH_H_ diff --git a/src/textord/colfind.cpp b/src/textord/colfind.cpp index bf630d3562..06631f9a72 100644 --- a/src/textord/colfind.cpp +++ b/src/textord/colfind.cpp @@ -25,17 +25,17 @@ #include "colfind.h" +#include "blobbox.h" #include "ccnontextdetect.h" #include "colpartition.h" #include "colpartitionset.h" #include "equationdetectbase.h" #include "linefind.h" #include "normalis.h" -#include "strokewidth.h" -#include "blobbox.h" +#include "params.h" #include "scrollview.h" +#include "strokewidth.h" #include "tablefind.h" -#include "params.h" #include "workingpartset.h" #include @@ -54,10 +54,10 @@ const double kMinGutterWidthGrid = 0.5; // adding noise blobs. const double kMaxDistToPartSizeRatio = 1.5; -BOOL_VAR(textord_tabfind_show_initial_partitions, - false, "Show partition bounds"); -BOOL_VAR(textord_tabfind_show_reject_blobs, - false, "Show blobs rejected as noise"); +BOOL_VAR(textord_tabfind_show_initial_partitions, false, + "Show partition bounds"); +BOOL_VAR(textord_tabfind_show_reject_blobs, false, + "Show blobs rejected as noise"); INT_VAR(textord_tabfind_show_partitions, 0, "Show partition bounds, waiting if >1"); BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds"); @@ -72,30 +72,35 @@ ScrollView* ColumnFinder::blocks_win_ = nullptr; // bleft and tright are the bounds of the image (or rectangle) being processed. // vlines is a (possibly empty) list of TabVector and vertical_x and y are // the sum logical vertical vector produced by LineFinder::FindVerticalLines. -ColumnFinder::ColumnFinder(int gridsize, - const ICOORD& bleft, const ICOORD& tright, - int resolution, bool cjk_script, - double aligned_gap_fraction, +ColumnFinder::ColumnFinder(int gridsize, const ICOORD& bleft, + const ICOORD& tright, int resolution, + bool cjk_script, double aligned_gap_fraction, TabVector_LIST* vlines, TabVector_LIST* hlines, int vertical_x, int vertical_y) - : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, - resolution), - cjk_script_(cjk_script), - min_gutter_width_(static_cast(kMinGutterWidthGrid * gridsize)), - mean_column_gap_(tright.x() - bleft.x()), - tabfind_aligned_gap_fraction_(aligned_gap_fraction), - reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), - best_columns_(nullptr), stroke_width_(nullptr), - part_grid_(gridsize, bleft, tright), nontext_map_(nullptr), - projection_(resolution), - denorm_(nullptr), input_blobs_win_(nullptr), equation_detect_(nullptr) { + : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, + resolution), + cjk_script_(cjk_script), + min_gutter_width_(static_cast(kMinGutterWidthGrid * gridsize)), + mean_column_gap_(tright.x() - bleft.x()), + tabfind_aligned_gap_fraction_(aligned_gap_fraction), + reskew_(1.0f, 0.0f), + rotation_(1.0f, 0.0f), + rerotate_(1.0f, 0.0f), + best_columns_(nullptr), + stroke_width_(nullptr), + part_grid_(gridsize, bleft, tright), + nontext_map_(nullptr), + projection_(resolution), + denorm_(nullptr), + input_blobs_win_(nullptr), + equation_detect_(nullptr) { TabVector_IT h_it(&horizontal_lines_); h_it.add_list_after(hlines); } ColumnFinder::~ColumnFinder() { column_sets_.delete_data_pointers(); - delete [] best_columns_; + delete[] best_columns_; delete stroke_width_; delete input_blobs_win_; pixDestroy(&nontext_map_); @@ -146,12 +151,12 @@ void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); min_gutter_width_ = static_cast(kMinGutterWidthGrid * gridsize()); input_block->ReSetAndReFilterBlobs(); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (textord_tabfind_show_blocks) { input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); input_block->plot_graded_blobs(input_blobs_win_); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED SetBlockRuleEdges(input_block); pixDestroy(&nontext_map_); // Run a preliminary strokewidth neighbour detection on the medium blobs. @@ -193,8 +198,7 @@ bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio, // vertical_text_lines true if the text lines are vertical. // recognition_rotation [0..3] is the number of anti-clockwise 90 degree // rotations from osd required for the text to be upright and readable. -void ColumnFinder::CorrectOrientation(TO_BLOCK* block, - bool vertical_text_lines, +void ColumnFinder::CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines, int recognition_rotation) { const FCOORD anticlockwise90(0.0f, 1.0f); const FCOORD clockwise90(0.0f, -1.0f); @@ -246,15 +250,14 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK* block, } if (textord_debug_tabfind) { tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", - vertical_text_lines, recognition_rotation, - rotation_.x(), rotation_.y(), - text_rotation_.x(), text_rotation_.y()); + vertical_text_lines, recognition_rotation, rotation_.x(), + rotation_.y(), text_rotation_.x(), text_rotation_.y()); } // Setup the denormalization. ASSERT_HOST(denorm_ == nullptr); denorm_ = new DENORM; - denorm_->SetupNormalization(nullptr, &rotation_, nullptr, - 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); + denorm_->SetupNormalization(nullptr, &rotation_, nullptr, 0.0f, 0.0f, 1.0f, + 1.0f, 0.0f, 0.0f); } // Finds blocks of text, image, rule line, table etc, returning them in the @@ -290,8 +293,8 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); stroke_width_->FindLeaderPartitions(input_block, &part_grid_); stroke_width_->RemoveLineResidue(&big_parts_); - FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_, - input_block); + FindInitialTabVectors(nullptr, min_gutter_width_, + tabfind_aligned_gap_fraction_, input_block); SetBlockRuleEdges(input_block); stroke_width_->GradeBlobsIntoPartitions( pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_, @@ -358,8 +361,8 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, &part_grid_, &deskew_, &reskew_); // Add the deskew to the denorm_. DENORM* new_denorm = new DENORM; - new_denorm->SetupNormalization(nullptr, &deskew_, denorm_, - 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); + new_denorm->SetupNormalization(nullptr, &deskew_, denorm_, 0.0f, 0.0f, + 1.0f, 1.0f, 0.0f, 0.0f); denorm_ = new_denorm; } SetBlockRuleEdges(input_block); @@ -375,12 +378,12 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, // Refill the grid using rectangular spreading, and get the benefit // of the completed tab vectors marking the rule edges of each blob. Clear(); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (textord_tabfind_show_reject_blobs) { ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs"); input_block->plot_graded_blobs(rej_win); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED InsertBlobsToGrid(false, false, &image_bblobs_, this); InsertBlobsToGrid(true, true, &input_block->blobs, this); @@ -433,19 +436,18 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, part_grid_.RefinePartitionPartners(true); SmoothPartnerRuns(); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (textord_tabfind_show_partitions) { ScrollView* window = MakeWindow(400, 300, "Partitions"); if (window != nullptr) { part_grid_.DisplayBoxes(window); - if (!textord_debug_printable) - DisplayTabVectors(window); + if (!textord_debug_printable) DisplayTabVectors(window); if (window != nullptr && textord_tabfind_show_partitions > 1) { delete window->AwaitEvent(SVET_DESTROY); } } } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED part_grid_.AssertNoDuplicates(); } // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, @@ -461,14 +463,14 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, else TransformToBlocks(blocks, to_blocks); if (textord_debug_tabfind) { - tprintf("Found %d blocks, %d to_blocks\n", - blocks->length(), to_blocks->length()); + tprintf("Found %d blocks, %d to_blocks\n", blocks->length(), + to_blocks->length()); } DisplayBlocks(blocks); RotateAndReskewBlocks(input_is_rtl, to_blocks); int result = 0; - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (blocks_win_ != nullptr) { bool waiting = false; do { @@ -487,7 +489,7 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, delete event; } while (waiting); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED return result; } @@ -518,9 +520,9 @@ void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) { for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { BLOCK* block = block_it.data(); - block->pdblk.plot(blocks_win_, serial++, - textord_debug_printable ? ScrollView::BLUE - : ScrollView::GREEN); + block->pdblk.plot( + blocks_win_, serial++, + textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); } blocks_win_->Update(); } @@ -550,8 +552,7 @@ bool ColumnFinder::MakeColumns(bool single_column) { // at horizontal slices through the page. PartSetVector part_sets; if (!single_column) { - if (!part_grid_.MakeColPartSets(&part_sets)) - return false; // Empty page. + if (!part_grid_.MakeColPartSets(&part_sets)) return false; // Empty page. ASSERT_HOST(part_grid_.gridheight() == gridheight_); // Try using only the good parts first. bool good_only = true; @@ -566,12 +567,10 @@ bool ColumnFinder::MakeColumns(bool single_column) { } good_only = !good_only; } while (column_sets_.empty() && !good_only); - if (textord_debug_tabfind) - PrintColumnCandidates("Column candidates"); + if (textord_debug_tabfind) PrintColumnCandidates("Column candidates"); // Improve the column candidates against themselves. ImproveColumnCandidates(&column_sets_, &column_sets_); - if (textord_debug_tabfind) - PrintColumnCandidates("Improved columns"); + if (textord_debug_tabfind) PrintColumnCandidates("Improved columns"); // Improve the column candidates using the part_sets_. ImproveColumnCandidates(&part_sets, &column_sets_); } @@ -582,8 +581,7 @@ bool ColumnFinder::MakeColumns(bool single_column) { // single column mode. single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); } - if (textord_debug_tabfind) - PrintColumnCandidates("Final Columns"); + if (textord_debug_tabfind) PrintColumnCandidates("Final Columns"); bool has_columns = !column_sets_.empty(); if (has_columns) { // Divide the page into sections of uniform column layout. @@ -611,8 +609,7 @@ void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets, PartSetVector* column_sets) { PartSetVector temp_cols; temp_cols.move(column_sets); - if (src_sets == column_sets) - src_sets = &temp_cols; + if (src_sets == column_sets) src_sets = &temp_cols; int set_size = temp_cols.size(); // Try using only the good parts first. bool good_only = true; @@ -636,7 +633,7 @@ void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets, // Prints debug information on the column candidates. void ColumnFinder::PrintColumnCandidates(const char* title) { - int set_size = column_sets_.size(); + int set_size = column_sets_.size(); tprintf("Found %d %s:\n", set_size, title); if (textord_debug_tabfind >= 3) { for (int i = 0; i < set_size; ++i) { @@ -660,8 +657,7 @@ bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) { ASSERT_HOST(set_count == gridheight()); // Allocate and init the best_columns_. best_columns_ = new ColPartitionSet*[set_count]; - for (int y = 0; y < set_count; ++y) - best_columns_[y] = nullptr; + for (int y = 0; y < set_count; ++y) best_columns_[y] = nullptr; int column_count = column_sets_.size(); // column_set_costs[part_sets_ index][column_sets_ index] is // < INT32_MAX if the partition set is compatible with the column set, @@ -686,17 +682,16 @@ bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) { any_columns_possible[part_i] = false; assigned_costs[part_i] = INT32_MAX; for (int col_i = 0; col_i < column_count; ++col_i) { - if (line_set != nullptr && - column_sets_.get(col_i)->CompatibleColumns(debug, line_set, - WidthCB())) { + if (line_set != nullptr && column_sets_.get(col_i)->CompatibleColumns( + debug, line_set, WidthCB())) { column_set_costs[part_i][col_i] = column_sets_.get(col_i)->UnmatchedWidth(line_set); any_columns_possible[part_i] = true; } else { column_set_costs[part_i][col_i] = INT32_MAX; if (debug) - tprintf("Set id %d did not match at y=%d, lineset =%p\n", - col_i, part_i, line_set); + tprintf("Set id %d did not match at y=%d, lineset =%p\n", col_i, + part_i, line_set); } } } @@ -704,37 +699,36 @@ bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) { // Assign a column set to each vertical grid position. // While there is an unassigned range, find its mode. int start, end; - while (BiggestUnassignedRange(set_count, any_columns_possible, - &start, &end)) { + while ( + BiggestUnassignedRange(set_count, any_columns_possible, &start, &end)) { if (textord_debug_tabfind >= 2) tprintf("Biggest unassigned range = %d- %d\n", start, end); // Find the modal column_set_id in the range. - int column_set_id = RangeModalColumnSet(column_set_costs, - assigned_costs, start, end); + int column_set_id = + RangeModalColumnSet(column_set_costs, assigned_costs, start, end); if (textord_debug_tabfind >= 2) { tprintf("Range modal column id = %d\n", column_set_id); column_sets_.get(column_set_id)->Print(); } // Now find the longest run of the column_set_id in the range. ShrinkRangeToLongestRun(column_set_costs, assigned_costs, - any_columns_possible, - column_set_id, &start, &end); + any_columns_possible, column_set_id, &start, &end); if (textord_debug_tabfind >= 2) tprintf("Shrunk range = %d- %d\n", start, end); // Extend the start and end past the longest run, while there are // only small gaps in compatibility that can be overcome by larger // regions of compatibility beyond. ExtendRangePastSmallGaps(column_set_costs, assigned_costs, - any_columns_possible, - column_set_id, -1, -1, &start); + any_columns_possible, column_set_id, -1, -1, + &start); --end; ExtendRangePastSmallGaps(column_set_costs, assigned_costs, - any_columns_possible, - column_set_id, 1, set_count, &end); + any_columns_possible, column_set_id, 1, set_count, + &end); ++end; if (textord_debug_tabfind) - tprintf("Column id %d applies to range = %d - %d\n", - column_set_id, start, end); + tprintf("Column id %d applies to range = %d - %d\n", column_set_id, start, + end); // Assign the column to the range, which now may overlap with other ranges. AssignColumnToRange(column_set_id, start, end, column_set_costs, assigned_costs); @@ -748,11 +742,11 @@ bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) { } // Free memory. for (int i = 0; i < set_count; ++i) { - delete [] column_set_costs[i]; + delete[] column_set_costs[i]; } - delete [] assigned_costs; - delete [] any_columns_possible; - delete [] column_set_costs; + delete[] assigned_costs; + delete[] any_columns_possible; + delete[] column_set_costs; return any_multi_column; } @@ -768,18 +762,15 @@ bool ColumnFinder::BiggestUnassignedRange(int set_count, for (int start = 0; start < gridheight_; start = end) { // Find the first unassigned index in start. while (start < set_count) { - if (best_columns_[start] == nullptr && any_columns_possible[start]) - break; + if (best_columns_[start] == nullptr && any_columns_possible[start]) break; ++start; } // Find the first past the end and count the good ones in between. int range_size = 1; // Number of non-null, but unassigned line sets. end = start + 1; while (end < set_count) { - if (best_columns_[end] != nullptr) - break; - if (any_columns_possible[end]) - ++range_size; + if (best_columns_[end] != nullptr) break; + if (any_columns_possible[end]) ++range_size; ++end; } if (start < set_count && range_size > best_range_size) { @@ -793,8 +784,8 @@ bool ColumnFinder::BiggestUnassignedRange(int set_count, // Finds the modal compatible column_set_ index within the given range. int ColumnFinder::RangeModalColumnSet(int** column_set_costs, - const int* assigned_costs, - int start, int end) { + const int* assigned_costs, int start, + int end) { int column_count = column_sets_.size(); STATS column_stats(0, column_count); for (int part_i = start; part_i < end; ++part_i) { @@ -814,8 +805,8 @@ int ColumnFinder::RangeModalColumnSet(int** column_set_costs, void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, const int* assigned_costs, const bool* any_columns_possible, - int column_set_id, - int* best_start, int* best_end) { + int column_set_id, int* best_start, + int* best_end) { // orig_start and orig_end are the maximum range we will look at. int orig_start = *best_start; int orig_end = *best_end; @@ -836,7 +827,7 @@ void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, while (end < orig_end) { if (column_set_costs[end][column_set_id] >= assigned_costs[start] && any_columns_possible[end]) - break; + break; ++end; } if (start < orig_end && end - start > best_range_size) { @@ -853,13 +844,11 @@ void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, const int* assigned_costs, const bool* any_columns_possible, - int column_set_id, - int step, int end, int* start) { + int column_set_id, int step, + int end, int* start) { if (textord_debug_tabfind > 2) - tprintf("Starting expansion at %d, step=%d, limit=%d\n", - *start, step, end); - if (*start == end) - return; // Cannot be expanded. + tprintf("Starting expansion at %d, step=%d, limit=%d\n", *start, step, end); + if (*start == end) return; // Cannot be expanded. int barrier_size = 0; int good_size = 0; @@ -871,13 +860,11 @@ void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, if (column_set_costs[i][column_set_id] < assigned_costs[i]) break; // We are back on. // Locations where none are possible don't count. - if (any_columns_possible[i]) - ++barrier_size; + if (any_columns_possible[i]) ++barrier_size; } if (textord_debug_tabfind > 2) tprintf("At %d, Barrier size=%d\n", i, barrier_size); - if (barrier_size > kMaxIncompatibleColumnCount) - return; // Barrier too big. + if (barrier_size > kMaxIncompatibleColumnCount) return; // Barrier too big. if (i == end) { // We can't go any further, but the barrier was small, so go to the end. *start = i - step; @@ -894,8 +881,7 @@ void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, if (textord_debug_tabfind > 2) tprintf("At %d, good size = %d\n", i, good_size); // If we had enough good ones we can extend the start and keep looking. - if (good_size >= barrier_size) - *start = i - step; + if (good_size >= barrier_size) *start = i - step; } while (good_size >= barrier_size); } @@ -918,13 +904,12 @@ void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) { int width_samples = 0; for (int i = 0; i < gridheight_; ++i) { ASSERT_HOST(best_columns_[i] != nullptr); - best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width, - &width_samples, - &total_gap, - &gap_samples); + best_columns_[i]->AccumulateColumnWidthsAndGaps( + &total_width, &width_samples, &total_gap, &gap_samples); } mean_column_gap_ = any_multi_column && gap_samples > 0 - ? total_gap / gap_samples : total_width / width_samples; + ? total_gap / gap_samples + : total_width / width_samples; } //////// Functions that manipulate ColPartitions in the part_grid_ ///// @@ -956,8 +941,8 @@ void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) { // Splits partitions that cross columns where they have nothing in the gap. void ColumnFinder::GridSplitPartitions() { // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); + GridSearch gsearch( + &part_grid_); gsearch.StartFullSearch(); ColPartition* dont_repeat = nullptr; ColPartition* part; @@ -969,16 +954,14 @@ void ColumnFinder::GridSplitPartitions() { int last_col = -1; // Find which columns the partition spans. part->ColumnRange(resolution_, column_set, &first_col, &last_col); - if (first_col > 0) - --first_col; + if (first_col > 0) --first_col; // Convert output column indices to physical column indices. first_col /= 2; last_col /= 2; // We will only consider cases where a partition spans two columns, // since a heading that spans more columns than that is most likely // genuine. - if (last_col != first_col + 1) - continue; + if (last_col != first_col + 1) continue; // Set up a rectangle search x-bounded by the column gap and y by the part. int y = part->MidY(); TBOX margin_box = part->bounding_box(); @@ -989,28 +972,24 @@ void ColumnFinder::GridSplitPartitions() { part->Print(); } ColPartition* column = column_set->GetColumnByIndex(first_col); - if (column == nullptr) - continue; + if (column == nullptr) continue; margin_box.set_left(column->RightAtY(y) + 2); column = column_set->GetColumnByIndex(last_col); - if (column == nullptr) - continue; + if (column == nullptr) continue; margin_box.set_right(column->LeftAtY(y) - 2); // TODO(rays) Decide whether to keep rectangular filling or not in the // main grid and therefore whether we need a fancier search here. // Now run the rect search on the main blob grid. GridSearch rectsearch(this); if (debug) { - tprintf("Searching box (%d,%d)->(%d,%d)\n", - margin_box.left(), margin_box.bottom(), - margin_box.right(), margin_box.top()); + tprintf("Searching box (%d,%d)->(%d,%d)\n", margin_box.left(), + margin_box.bottom(), margin_box.right(), margin_box.top()); part->Print(); } rectsearch.StartRectSearch(margin_box); BLOBNBOX* bbox; while ((bbox = rectsearch.NextRectSearch()) != nullptr) { - if (bbox->bounding_box().overlap(margin_box)) - break; + if (bbox->bounding_box().overlap(margin_box)) break; } if (bbox == nullptr) { // There seems to be nothing in the hole, so split the partition. @@ -1030,8 +1009,7 @@ void ColumnFinder::GridSplitPartitions() { part_grid_.InsertBBox(true, true, split_part); } else { // Split had no effect - if (debug) - tprintf("Split had no effect\n"); + if (debug) tprintf("Split had no effect\n"); dont_repeat = part; } part_grid_.InsertBBox(true, true, part); @@ -1048,13 +1026,12 @@ void ColumnFinder::GridSplitPartitions() { // and the horizontal gap is small enough. void ColumnFinder::GridMergePartitions() { // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); + GridSearch gsearch( + &part_grid_); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->IsUnMergeableType()) - continue; + if (part->IsUnMergeableType()) continue; // Set up a rectangle search x-bounded by the column and y by the part. ColPartitionSet* columns = best_columns_[gsearch.GridY()]; TBOX box = part->bounding_box(); @@ -1067,23 +1044,21 @@ void ColumnFinder::GridMergePartitions() { ColPartition* left_column = columns->ColumnContaining(box.left(), y); ColPartition* right_column = columns->ColumnContaining(box.right(), y); if (left_column == nullptr || right_column != left_column) { - if (debug) - tprintf("In different columns\n"); + if (debug) tprintf("In different columns\n"); continue; } box.set_left(left_column->LeftAtY(y)); box.set_right(right_column->RightAtY(y)); // Now run the rect search. bool modified_box = false; - GridSearch - rsearch(&part_grid_); + GridSearch rsearch( + &part_grid_); rsearch.SetUniqueMode(true); rsearch.StartRectSearch(box); ColPartition* neighbour; while ((neighbour = rsearch.NextRectSearch()) != nullptr) { - if (neighbour == part || neighbour->IsUnMergeableType()) - continue; + if (neighbour == part || neighbour->IsUnMergeableType()) continue; const TBOX& neighbour_box = neighbour->bounding_box(); if (debug) { tprintf("Considering merge with neighbour at:"); @@ -1109,7 +1084,7 @@ void ColumnFinder::GridMergePartitions() { part_box.left() > neighbour->right_margin()) continue; // Neighbour is too far to the left. int h_gap = std::max(part_box.left(), neighbour_box.left()) - - std::min(part_box.right(), neighbour_box.right()); + std::min(part_box.right(), neighbour_box.right()); if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction || part_box.width() < mean_column_gap_ || neighbour_box.width() < mean_column_gap_) { @@ -1164,8 +1139,7 @@ void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) { ColPartition* best_part = nullptr; int best_distance = 0; while ((part = rsearch.NextRectSearch()) != nullptr) { - if (part->IsUnMergeableType()) - continue; + if (part->IsUnMergeableType()) continue; int distance = projection_.DistanceOfBoxFromPartition( blob->bounding_box(), *part, denorm_, debug); if (best_part == nullptr || distance < best_distance) { @@ -1177,9 +1151,9 @@ void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) { best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) { // Close enough to merge. if (debug) { - tprintf("Adding noise blob with distance %d, thr=%g:box:", - best_distance, - kMaxDistToPartSizeRatio * best_part->median_size()); + tprintf( + "Adding noise blob with distance %d, thr=%g:box:", best_distance, + kMaxDistToPartSizeRatio * best_part->median_size()); blob->bounding_box().print(); tprintf("To partition:"); best_part->Print(); @@ -1219,8 +1193,7 @@ void ColumnFinder::GridRemoveUnderlinePartitions() { TabVector_IT hline_it(&horizontal_lines_); for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { TabVector* hline = hline_it.data(); - if (hline->intersects_other_lines()) - continue; + if (hline->intersects_other_lines()) continue; TBOX line_box = BoxFromHLine(hline); TBOX search_box = line_box; search_box.pad(0, line_box.height()); @@ -1241,7 +1214,7 @@ void ColumnFinder::GridRemoveUnderlinePartitions() { if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top()) touched_text = true; } else if (covered->blob_type() == BRT_HLINE && - line_box.contains(covered->bounding_box())) { + line_box.contains(covered->bounding_box())) { line_part = covered; } } @@ -1259,8 +1232,8 @@ void ColumnFinder::GridInsertHLinePartitions() { TabVector* hline = hline_it.data(); TBOX line_box = BoxFromHLine(hline); ColPartition* part = ColPartition::MakeLinePartition( - BRT_HLINE, vertical_skew_, - line_box.left(), line_box.bottom(), line_box.right(), line_box.top()); + BRT_HLINE, vertical_skew_, line_box.left(), line_box.bottom(), + line_box.right(), line_box.top()); part->set_type(PT_HORZ_LINE); bool any_image = false; ColPartitionGridSearch part_search(&part_grid_); @@ -1285,8 +1258,7 @@ void ColumnFinder::GridInsertVLinePartitions() { TabVector_IT vline_it(dead_vectors()); for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { TabVector* vline = vline_it.data(); - if (!vline->IsSeparator()) - continue; + if (!vline->IsSeparator()) continue; int left = std::min(vline->startpt().x(), vline->endpt().x()); int right = std::max(vline->startpt().x(), vline->endpt().x()); right += vline->mean_width(); @@ -1297,8 +1269,8 @@ void ColumnFinder::GridInsertVLinePartitions() { ++right; } ColPartition* part = ColPartition::MakeLinePartition( - BRT_VLINE, vertical_skew_, - left, vline->startpt().y(), right, vline->endpt().y()); + BRT_VLINE, vertical_skew_, left, vline->startpt().y(), right, + vline->endpt().y()); part->set_type(PT_VERT_LINE); bool any_image = false; ColPartitionGridSearch part_search(&part_grid_); @@ -1321,8 +1293,8 @@ void ColumnFinder::GridInsertVLinePartitions() { // For every ColPartition in the grid, sets its type based on position // in the columns. void ColumnFinder::SetPartitionTypes() { - GridSearch - gsearch(&part_grid_); + GridSearch gsearch( + &part_grid_); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -1334,8 +1306,8 @@ void ColumnFinder::SetPartitionTypes() { // Sets the type of all in the group to the maximum of the group. void ColumnFinder::SmoothPartnerRuns() { // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); + GridSearch gsearch( + &part_grid_); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -1378,8 +1350,7 @@ void ColumnFinder::AddToTempPartList(ColPartition* part, int neighbour_bottom = test_part->median_bottom(); int neighbour_top = test_part->median_top(); int neighbour_y = (neighbour_bottom + neighbour_top) / 2; - if (neighbour_y < mid_y) - break; // part is above test_part so insert it. + if (neighbour_y < mid_y) break; // part is above test_part so insert it. if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part)) continue; // Incompatibles stay in order } @@ -1395,8 +1366,8 @@ void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list, WorkingPartSet_LIST* work_set) { ColPartition_C_IT it(temp_list); while (!it.empty()) { - it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, - &good_parts_, work_set); + it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, &good_parts_, + work_set); it.forward(); } } @@ -1412,8 +1383,8 @@ void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks, // like horizontal lines going before the text lines above them. ColPartition_CLIST temp_part_list; // Iterate the ColPartitions in the grid. It starts at the top - GridSearch - gsearch(&part_grid_); + GridSearch gsearch( + &part_grid_); gsearch.StartFullSearch(); int prev_grid_y = -1; ColPartition* part; @@ -1427,8 +1398,8 @@ void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks, column_set = best_columns_[grid_y]; // Every line should have a non-null best column. ASSERT_HOST(column_set != nullptr); - column_set->ChangeWorkColumns(bleft_, tright_, resolution_, - &good_parts_, &work_set); + column_set->ChangeWorkColumns(bleft_, tright_, resolution_, &good_parts_, + &work_set); if (textord_debug_tabfind) tprintf("Changed column groups at grid index %d, y=%d\n", gsearch.GridY(), gsearch.GridY() * gridsize()); @@ -1474,8 +1445,8 @@ void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) { ReflectBlobList(&input_block->large_blobs); // Update the denorm with the reflection. DENORM* new_denorm = new DENORM; - new_denorm->SetupNormalization(nullptr, nullptr, denorm_, - 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); + new_denorm->SetupNormalization(nullptr, nullptr, denorm_, 0.0f, 0.0f, -1.0f, + 1.0f, 0.0f, 0.0f); denorm_ = new_denorm; } @@ -1483,8 +1454,7 @@ void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) { // exploding multi-outline blobs back to single blobs and accumulating // the bounding box widths and heights. static void RotateAndExplodeBlobList(const FCOORD& blob_rotation, - BLOBNBOX_LIST* bblobs, - STATS* widths, + BLOBNBOX_LIST* bblobs, STATS* widths, STATS* heights) { BLOBNBOX_IT it(bblobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { @@ -1495,7 +1465,7 @@ static void RotateAndExplodeBlobList(const FCOORD& blob_rotation, if (!outlines->singleton()) { // This blob has multiple outlines from CJK repair. // Explode the blob back into individual outlines. - for (;!ol_it.empty(); ol_it.forward()) { + for (; !ol_it.empty(); ol_it.forward()) { C_OUTLINE* outline = ol_it.extract(); BLOBNBOX* new_blob = BLOBNBOX::RealBlob(outline); // This blob will be revisited later since we add_after_stay_put here. @@ -1558,19 +1528,19 @@ void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, // Compute the block median blob width and height as we go. STATS widths(0, block->pdblk.bounding_box().width()); STATS heights(0, block->pdblk.bounding_box().height()); - RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, - &widths, &heights); + RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, &widths, + &heights); TO_ROW_IT row_it(to_block->get_rows()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { TO_ROW* row = row_it.data(); - RotateAndExplodeBlobList(blob_rotation, row->blob_list(), - &widths, &heights); + RotateAndExplodeBlobList(blob_rotation, row->blob_list(), &widths, + &heights); } block->set_median_size(static_cast(widths.median() + 0.5), static_cast(heights.median() + 0.5)); if (textord_debug_tabfind >= 2) - tprintf("Block median size = (%d, %d)\n", - block->median_size().x(), block->median_size().y()); + tprintf("Block median size = (%d, %d)\n", block->median_size().x(), + block->median_size().y()); } } @@ -1579,7 +1549,8 @@ void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, // of the given block. // Returns the rotation that needs to be applied to the blobs to make // them sit in the rotated block. -FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) { +FCOORD +ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) { // The text_rotation_ tells us the gross page text rotation that needs // to be applied for classification // TODO(rays) find block-level classify rotation by orientation detection. diff --git a/src/textord/colfind.h b/src/textord/colfind.h index 9c537104b7..44e1de68f0 100644 --- a/src/textord/colfind.h +++ b/src/textord/colfind.h @@ -63,20 +63,14 @@ class ColumnFinder : public TabFind { // textlines. ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright, int resolution, bool cjk_script, double aligned_gap_fraction, - TabVector_LIST* vlines, TabVector_LIST* hlines, - int vertical_x, int vertical_y); + TabVector_LIST* vlines, TabVector_LIST* hlines, int vertical_x, + int vertical_y); virtual ~ColumnFinder(); // Accessors for testing - const DENORM* denorm() const { - return denorm_; - } - const TextlineProjection* projection() const { - return &projection_; - } - void set_cjk_script(bool is_cjk) { - cjk_script_ = is_cjk; - } + const DENORM* denorm() const { return denorm_; } + const TextlineProjection* projection() const { return &projection_; } + void set_cjk_script(bool is_cjk) { cjk_script_ = is_cjk; } // ====================================================================== // The main function of ColumnFinder is broken into pieces to facilitate @@ -122,8 +116,8 @@ class ColumnFinder : public TabFind { // horizontal but whose text appears vertically aligned because the image is // not the right way up. // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. - bool IsVerticallyAlignedText(double find_vertical_text_ratio, - TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs); + bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK* block, + BLOBNBOX_CLIST* osd_blobs); // Rotates the blobs and the TabVectors so that the gross writing direction // (text lines) are horizontal and lines are read down the page. @@ -211,16 +205,16 @@ class ColumnFinder : public TabFind { void ShrinkRangeToLongestRun(int** column_set_costs, const int* assigned_costs, const bool* any_columns_possible, - int column_set_id, - int* best_start, int* best_end); + int column_set_id, int* best_start, + int* best_end); // Moves start in the direction of step, up to, but not including end while // the only incompatible regions are no more than kMaxIncompatibleColumnCount // in size, and the compatible regions beyond are bigger. void ExtendRangePastSmallGaps(int** column_set_costs, const int* assigned_costs, const bool* any_columns_possible, - int column_set_id, - int step, int end, int* start); + int column_set_id, int step, int end, + int* start); // Assigns the given column_set_id to the part_sets_ in the given range. void AssignColumnToRange(int column_set_id, int start, int end, int** column_set_costs, int* assigned_costs); diff --git a/src/textord/colpartition.cpp b/src/textord/colpartition.cpp index 587d8aa7f6..e215fe17de 100644 --- a/src/textord/colpartition.cpp +++ b/src/textord/colpartition.cpp @@ -79,21 +79,44 @@ const int kMaxColorDistance = 900; // blob_type is the blob_region_type_ of the blobs in this partition. // Vertical is the direction of logical vertical on the possibly skewed image. ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical) - : left_margin_(-INT32_MAX), right_margin_(INT32_MAX), - median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_size_(0), - median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0), - blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0), - good_width_(false), good_column_(false), - left_key_tab_(false), right_key_tab_(false), - left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical), - working_set_(nullptr), last_add_was_vertical_(false), block_owned_(false), - desperately_merged_(false), - first_column_(-1), last_column_(-1), column_set_(nullptr), - side_step_(0), top_spacing_(0), bottom_spacing_(0), - type_before_table_(PT_UNKNOWN), inside_table_column_(false), - nearest_neighbor_above_(nullptr), nearest_neighbor_below_(nullptr), - space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0), - owns_blobs_(true) { + : left_margin_(-INT32_MAX), + right_margin_(INT32_MAX), + median_bottom_(INT32_MAX), + median_top_(-INT32_MAX), + median_size_(0), + median_left_(INT32_MAX), + median_right_(-INT32_MAX), + median_width_(0), + blob_type_(blob_type), + flow_(BTFT_NONE), + good_blob_score_(0), + good_width_(false), + good_column_(false), + left_key_tab_(false), + right_key_tab_(false), + left_key_(0), + right_key_(0), + type_(PT_UNKNOWN), + vertical_(vertical), + working_set_(nullptr), + last_add_was_vertical_(false), + block_owned_(false), + desperately_merged_(false), + first_column_(-1), + last_column_(-1), + column_set_(nullptr), + side_step_(0), + top_spacing_(0), + bottom_spacing_(0), + type_before_table_(PT_UNKNOWN), + inside_table_column_(false), + nearest_neighbor_above_(nullptr), + nearest_neighbor_below_(nullptr), + space_above_(0), + space_below_(0), + space_to_left_(0), + space_to_right_(0), + owns_blobs_(true) { memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); } @@ -156,9 +179,8 @@ ColPartition::~ColPartition() { // Constructs a fake ColPartition with no BLOBNBOXes to represent a // horizontal or vertical line, given a type and a bounding box. ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type, - const ICOORD& vertical, - int left, int bottom, - int right, int top) { + const ICOORD& vertical, int left, + int bottom, int right, int top) { ColPartition* part = new ColPartition(blob_type, vertical); part->bounding_box_ = TBOX(left, bottom, right, top); part->median_bottom_ = bottom; @@ -172,7 +194,6 @@ ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type, return part; } - // Adds the given box to the partition, updating the partition bounds. // The list of boxes in the partition is updated, ensuring that no box is // recorded twice, and the boxes are kept in increasing left position. @@ -198,10 +219,8 @@ void ColPartition::AddBox(BLOBNBOX* bbox) { } boxes_.add_sorted(SortByBoxLeft, true, bbox); } - if (!left_key_tab_) - left_key_ = BoxLeftKey(); - if (!right_key_tab_) - right_key_ = BoxRightKey(); + if (!left_key_tab_) left_key_ = BoxLeftKey(); + if (!right_key_tab_) right_key_ = BoxRightKey(); if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", box.left(), box.bottom(), box.right(), box.top(), @@ -287,8 +306,7 @@ void ColPartition::DisownBoxesNoAssert() { BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.data(); - if (bblob->owner() == this) - bblob->set_owner(nullptr); + if (bblob->owner() == this) bblob->set_owner(nullptr); } } @@ -367,8 +385,8 @@ bool ColPartition::IsLegal() { } if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) { if (textord_debug_bugs) { - tprintf("Key inside box: %d v %d or %d v %d\n", - left_key_, BoxLeftKey(), right_key_, BoxRightKey()); + tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(), + right_key_, BoxRightKey()); Print(); } return false; // Keys inside the box. @@ -395,17 +413,15 @@ bool ColPartition::MatchingTextColor(const ColPartition& other) const { return false; // Too noisy. // Colors must match for other to count. - double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_, - other.color2_, - color1_); - double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_, - other.color2_, - color2_); - double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_, - other.color1_); - double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_, - other.color2_); -// All 4 distances must be small enough. + double d_this1_o = + ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_); + double d_this2_o = + ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_); + double d_o1_this = + ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_); + double d_o2_this = + ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_); + // All 4 distances must be small enough. return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance && d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance; } @@ -447,9 +463,8 @@ bool ColPartition::MatchingStrokeWidth(const ColPartition& other, box_it.mark_cycle_pt(); other_it.mark_cycle_pt(); while (!box_it.cycled_list() && !other_it.cycled_list()) { - if (box_it.data()->MatchingStrokeWidth(*other_it.data(), - fractional_tolerance, - constant_tolerance)) + if (box_it.data()->MatchingStrokeWidth( + *other_it.data(), fractional_tolerance, constant_tolerance)) ++match_count; else ++nonmatch_count; @@ -479,21 +494,20 @@ bool ColPartition::OKDiacriticMerge(const ColPartition& candidate, } return false; // All blobs must have diacritic bases. } - if (blob->base_char_top() < min_top) - min_top = blob->base_char_top(); + if (blob->base_char_top() < min_top) min_top = blob->base_char_top(); if (blob->base_char_bottom() > max_bottom) max_bottom = blob->base_char_bottom(); } // If the intersection of all vertical ranges of all base characters // overlaps the median range of this, then it is OK. - bool result = min_top > candidate.median_bottom_ && - max_bottom < candidate.median_top_; + bool result = + min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_; if (debug) { if (result) tprintf("OKDiacritic!\n"); else - tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", - max_bottom, min_top, median_bottom_, median_top_); + tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top, + median_bottom_, median_top_); } return result; } @@ -508,8 +522,7 @@ void ColPartition::SetLeftTab(const TabVector* tab_vector) { } else { left_key_tab_ = false; } - if (!left_key_tab_) - left_key_ = BoxLeftKey(); + if (!left_key_tab_) left_key_ = BoxLeftKey(); } // As SetLeftTab, but with the right. @@ -520,8 +533,7 @@ void ColPartition::SetRightTab(const TabVector* tab_vector) { } else { right_key_tab_ = false; } - if (!right_key_tab_) - right_key_ = BoxRightKey(); + if (!right_key_tab_) right_key_ = BoxRightKey(); } // Copies the left/right tab from the src partition, but if take_box is @@ -534,8 +546,7 @@ void ColPartition::CopyLeftTab(const ColPartition& src, bool take_box) { bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY())); left_key_ = BoxLeftKey(); } - if (left_margin_ > bounding_box_.left()) - left_margin_ = src.left_margin_; + if (left_margin_ > bounding_box_.left()) left_margin_ = src.left_margin_; } // As CopyLeftTab, but with the right. @@ -547,8 +558,7 @@ void ColPartition::CopyRightTab(const ColPartition& src, bool take_box) { bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY())); right_key_ = BoxRightKey(); } - if (right_margin_ < bounding_box_.right()) - right_margin_ = src.right_margin_; + if (right_margin_ < bounding_box_.right()) right_margin_ = src.right_margin_; } // Returns the left rule line x coord of the leftmost blob. @@ -583,8 +593,8 @@ int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) { return count; } -void ColPartition::SetSpecialBlobsDensity( - const BlobSpecialTextType type, const float density) { +void ColPartition::SetSpecialBlobsDensity(const BlobSpecialTextType type, + const float density) { ASSERT_HOST(type < BSTT_COUNT); special_blobs_densities_[type] = density; } @@ -612,12 +622,12 @@ void ColPartition::ComputeSpecialBlobsDensity() { // Partnerships are added symmetrically to partner and this. void ColPartition::AddPartner(bool upper, ColPartition* partner) { if (upper) { - partner->lower_partners_.add_sorted(SortByBoxLeft, - true, this); + partner->lower_partners_.add_sorted(SortByBoxLeft, true, + this); upper_partners_.add_sorted(SortByBoxLeft, true, partner); } else { - partner->upper_partners_.add_sorted(SortByBoxLeft, - true, this); + partner->upper_partners_.add_sorted(SortByBoxLeft, true, + this); lower_partners_.add_sorted(SortByBoxLeft, true, partner); } } @@ -638,8 +648,7 @@ void ColPartition::RemovePartner(bool upper, ColPartition* partner) { // Returns the partner if the given partner is a singleton, otherwise nullptr. ColPartition* ColPartition::SingletonPartner(bool upper) { ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; - if (!partners->singleton()) - return nullptr; + if (!partners->singleton()) return nullptr; ColPartition_C_IT it(partners); return it.data(); } @@ -665,7 +674,7 @@ void ColPartition::Absorb(ColPartition* other, WidthCallback* cb) { for (int type = 0; type < BSTT_COUNT; ++type) { int w1 = boxes_.length(), w2 = other->boxes_.length(); float new_val = special_blobs_densities_[type] * w1 + - other->special_blobs_densities_[type] * w2; + other->special_blobs_densities_[type] * w2; if (!w1 || !w2) { special_blobs_densities_[type] = new_val / (w1 + w2); } @@ -682,8 +691,7 @@ void ColPartition::Absorb(ColPartition* other, WidthCallback* cb) { continue; } ASSERT_HOST(prev_owner == other || prev_owner == nullptr); - if (prev_owner == other) - bbox2->set_owner(this); + if (prev_owner == other) bbox2->set_owner(this); it.add_to_end(bbox2); } left_margin_ = std::min(left_margin_, other->left_margin_); @@ -746,15 +754,13 @@ bool ColPartition::OKMergeOverlap(const ColPartition& merge1, int ok_box_overlap, bool debug) { // Vertical partitions are not allowed to be involved. if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) { - if (debug) - tprintf("Vertical partition\n"); + if (debug) tprintf("Vertical partition\n"); return false; } // The merging partitions must strongly overlap each other. if (!merge1.VSignificantCoreOverlap(merge2)) { if (debug) - tprintf("Voverlap %d (%d)\n", - merge1.VCoreOverlap(merge2), + tprintf("Voverlap %d (%d)\n", merge1.VCoreOverlap(merge2), merge1.VSignificantCoreOverlap(merge2)); return false; } @@ -764,8 +770,7 @@ bool ColPartition::OKMergeOverlap(const ColPartition& merge1, if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ && merged_box.bottom() < bounding_box_.top() - ok_box_overlap && merged_box.top() > bounding_box_.bottom() + ok_box_overlap) { - if (debug) - tprintf("Excessive box overlap\n"); + if (debug) tprintf("Excessive box overlap\n"); return false; } // Looks OK! @@ -775,15 +780,13 @@ bool ColPartition::OKMergeOverlap(const ColPartition& merge1, // Find the blob at which to split this to minimize the overlap with the // given box. Returns the first blob to go in the second partition. BLOBNBOX* ColPartition::OverlapSplitBlob(const TBOX& box) { - if (boxes_.empty() || boxes_.singleton()) - return nullptr; + if (boxes_.empty() || boxes_.singleton()) return nullptr; BLOBNBOX_C_IT it(&boxes_); TBOX left_box(it.data()->bounding_box()); for (it.forward(); !it.at_first(); it.forward()) { BLOBNBOX* bbox = it.data(); left_box += bbox->bounding_box(); - if (left_box.overlap(box)) - return bbox; + if (left_box.overlap(box)) return bbox; } return nullptr; } @@ -802,8 +805,7 @@ ColPartition* ColPartition::SplitAtBlob(BLOBNBOX* split_blob) { ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr); if (bbox == split_blob || !split_part->boxes_.empty()) { split_part->AddBox(it.extract()); - if (owns_blobs() && prev_owner != nullptr) - bbox->set_owner(split_part); + if (owns_blobs() && prev_owner != nullptr) bbox->set_owner(split_part); } } ASSERT_HOST(!it.empty()); @@ -841,8 +843,7 @@ ColPartition* ColPartition::SplitAt(int split_x) { const TBOX& box = bbox->bounding_box(); if (box.left() >= split_x) { split_part->AddBox(it.extract()); - if (owns_blobs() && prev_owner != nullptr) - bbox->set_owner(split_part); + if (owns_blobs() && prev_owner != nullptr) bbox->set_owner(split_part); } } if (it.empty()) { @@ -880,26 +881,22 @@ void ColPartition::ComputeLimits() { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); bounding_box_ += bbox->bounding_box(); - if (bbox->flow() != BTFT_LEADER) - ++non_leader_count; + if (bbox->flow() != BTFT_LEADER) ++non_leader_count; } } - if (!left_key_tab_) - left_key_ = BoxLeftKey(); + if (!left_key_tab_) left_key_ = BoxLeftKey(); if (left_key_ > BoxLeftKey() && textord_debug_bugs) { // TODO(rays) investigate the causes of these error messages, to find // out if they are genuinely harmful, or just indicative of junk input. tprintf("Computed left-illegal partition\n"); Print(); } - if (!right_key_tab_) - right_key_ = BoxRightKey(); + if (!right_key_tab_) right_key_ = BoxRightKey(); if (right_key_ < BoxRightKey() && textord_debug_bugs) { tprintf("Computed right-illegal partition\n"); Print(); } - if (it.empty()) - return; + if (it.empty()) return; if (IsImageType() || blob_type() == BRT_RECTIMAGE || blob_type() == BRT_POLYIMAGE) { median_top_ = bounding_box_.top(); @@ -970,8 +967,7 @@ int ColPartition::CountOverlappingBoxes(const TBOX& box) { int overlap_count = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); - if (box.overlap(bbox->bounding_box())) - ++overlap_count; + if (box.overlap(bbox->bounding_box())) ++overlap_count; } return overlap_count; } @@ -980,13 +976,11 @@ int ColPartition::CountOverlappingBoxes(const TBOX& box) { // resolution refers to the ppi resolution of the image. void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) { int first_spanned_col = -1; - ColumnSpanningType span_type = - columns->SpanningType(resolution, - bounding_box_.left(), bounding_box_.right(), - std::min(bounding_box_.height(), bounding_box_.width()), - MidY(), left_margin_, right_margin_, - &first_column_, &last_column_, - &first_spanned_col); + ColumnSpanningType span_type = columns->SpanningType( + resolution, bounding_box_.left(), bounding_box_.right(), + std::min(bounding_box_.height(), bounding_box_.width()), MidY(), + left_margin_, right_margin_, &first_column_, &last_column_, + &first_spanned_col); column_set_ = columns; if (first_column_ < last_column_ && span_type == CST_PULLOUT && !IsLineType()) { @@ -1064,13 +1058,10 @@ PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const { void ColPartition::ColumnRange(int resolution, ColPartitionSet* columns, int* first_col, int* last_col) { int first_spanned_col = -1; - ColumnSpanningType span_type = - columns->SpanningType(resolution, - bounding_box_.left(), bounding_box_.right(), - std::min(bounding_box_.height(), bounding_box_.width()), - MidY(), left_margin_, right_margin_, - first_col, last_col, - &first_spanned_col); + ColumnSpanningType span_type = columns->SpanningType( + resolution, bounding_box_.left(), bounding_box_.right(), + std::min(bounding_box_.height(), bounding_box_.width()), MidY(), + left_margin_, right_margin_, first_col, last_col, &first_spanned_col); type_ = PartitionType(span_type); } @@ -1115,8 +1106,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { double min_width = std::min(median_gap, median_width); double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f); if (textord_debug_tabfind >= 4) { - tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", - gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax, + tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count, + max_width * kMaxLeaderGapFractionOfMax, min_width * kMaxLeaderGapFractionOfMin); } if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax && @@ -1142,9 +1133,9 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { projection[left - part_left].AddLocalCost(height); } } - DPPoint* best_end = DPPoint::Solve(min_step, max_step, false, - &DPPoint::CostWithVariance, - part_width, projection); + DPPoint* best_end = + DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance, + part_width, projection); if (best_end != nullptr && best_end->total_cost() < blob_count) { // Good enough. Call it a leader. result = true; @@ -1155,7 +1146,7 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { // If the first or last blob is spaced too much, don't mark it. if (it.at_first()) { int gap = it.data_relative(1)->bounding_box().left() - - blob->bounding_box().right(); + blob->bounding_box().right(); if (blob->bounding_box().width() + gap > max_step) { it.extract(); modified_blob_list = true; @@ -1164,7 +1155,7 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { } if (it.at_last()) { int gap = blob->bounding_box().left() - - it.data_relative(-1)->bounding_box().right(); + it.data_relative(-1)->bounding_box().right(); if (blob->bounding_box().width() + gap > max_step) { it.extract(); modified_blob_list = true; @@ -1185,7 +1176,7 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { blob_count); } } - delete [] projection; + delete[] projection; } return result; } @@ -1245,8 +1236,7 @@ void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) { else flow_ = BTFT_NEIGHBOURS; // Upgrade chain to strong chain if the other indicators are good - if (flow_ == BTFT_CHAIN && strong_score == 3) - flow_ = BTFT_STRONG_CHAIN; + if (flow_ == BTFT_CHAIN && strong_score == 3) flow_ = BTFT_STRONG_CHAIN; // Downgrade strong vertical text to chain if the indicators are bad. if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2) flow_ = BTFT_CHAIN; @@ -1255,15 +1245,15 @@ void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) { // Check for noisy neighbours. if (noisy_count >= blob_count) { flow_ = BTFT_NONTEXT; - blob_type_= BRT_NOISE; + blob_type_ = BRT_NOISE; } } if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) { tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", blob_count, noisy_count, good_blob_score_); - tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", - value, flow_, blob_type_); + tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_, + blob_type_); Print(); } SetBlobTypes(); @@ -1272,13 +1262,11 @@ void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) { // Sets all blobs with the partition blob type and flow, but never overwrite // leader blobs, as we need to be able to identify them later. void ColPartition::SetBlobTypes() { - if (!owns_blobs()) - return; + if (!owns_blobs()) return; BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); - if (blob->flow() != BTFT_LEADER) - blob->set_flow(flow_); + if (blob->flow() != BTFT_LEADER) blob->set_flow(flow_); blob->set_region_type(blob_type_); ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this); } @@ -1343,8 +1331,7 @@ bool ColPartition::HasGoodBaseline() { width = last_pt.x() - first_pt.x(); } // Maximum median error allowed to be a good text line. - if (height_count == 0) - return false; + if (height_count == 0) return false; double max_error = kMaxBaselineError * total_height / height_count; ICOORD start_pt, end_pt; double error = linepoints.Fit(&start_pt, &end_pt); @@ -1357,8 +1344,7 @@ void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, int resolution, ColPartition_LIST* used_parts, WorkingPartSet_LIST* working_sets) { - if (block_owned_) - return; // Done it already. + if (block_owned_) return; // Done it already. block_owned_ = true; WorkingPartSet_IT it(working_sets); // If there is an upper partner use its working_set_ directly. @@ -1377,9 +1363,9 @@ void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, WorkingPartSet* work_set = nullptr; it.move_to_first(); int col_index = 0; - for (it.mark_cycle_pt(); !it.cycled_list() && - col_index != first_column_; - it.forward(), ++col_index); + for (it.mark_cycle_pt(); !it.cycled_list() && col_index != first_column_; + it.forward(), ++col_index) + ; if (textord_debug_tabfind >= 2) { tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between"); Print(); @@ -1441,8 +1427,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, BLOBNBOX* blob = blob_it.data(); int bottom = blob->bounding_box().bottom(); int step = bottom - prev_bottom; - if (step < 0) - step = -step; + if (step < 0) step = -step; side_steps.add(step, 1); prev_bottom = bottom; } @@ -1463,8 +1448,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, } ++part_count; } - if (part_count == 0) - return; + if (part_count == 0) return; SmoothSpacings(resolution, page_height, block_parts); @@ -1488,12 +1472,13 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, // its matched size, otherwise it goes with the smallest spacing. ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1); if (textord_debug_tabfind) { - tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d," - " sizes %d %d %d\n", - part->top_spacing(), part->bottom_spacing(), - next_part->top_spacing(), next_part->bottom_spacing(), - part->median_size(), next_part->median_size(), - third_part != nullptr ? third_part->median_size() : 0); + tprintf( + "Spacings unequal: upper:%d/%d, lower:%d/%d," + " sizes %d %d %d\n", + part->top_spacing(), part->bottom_spacing(), + next_part->top_spacing(), next_part->bottom_spacing(), + part->median_size(), next_part->median_size(), + third_part != nullptr ? third_part->median_size() : 0); } // We can only consider adding the next line to the block if the sizes // match and the lines are close enough for their size. @@ -1504,13 +1489,12 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, part->top_spacing()) { // Even now, we can only add it as long as the third line doesn't // match in the same way and have a smaller bottom spacing. - if (third_part == nullptr || - !next_part->SizesSimilar(*third_part) || + if (third_part == nullptr || !next_part->SizesSimilar(*third_part) || third_part->median_size() * kMaxSameBlockLineSpacing <= next_part->bottom_spacing() || next_part->median_size() * kMaxSameBlockLineSpacing <= next_part->top_spacing() || - next_part->bottom_spacing() > part->bottom_spacing()) { + next_part->bottom_spacing() > part->bottom_spacing()) { // Add to the current block. sp_block_it.add_to_end(it.extract()); it.forward(); @@ -1540,14 +1524,10 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, // Helper function to clip the input pos to the given bleft, tright bounds. static void ClipCoord(const ICOORD& bleft, const ICOORD& tright, ICOORD* pos) { - if (pos->x() < bleft.x()) - pos->set_x(bleft.x()); - if (pos->x() > tright.x()) - pos->set_x(tright.x()); - if (pos->y() < bleft.y()) - pos->set_y(bleft.y()); - if (pos->y() > tright.y()) - pos->set_y(tright.y()); + if (pos->x() < bleft.x()) pos->set_x(bleft.x()); + if (pos->x() > tright.x()) pos->set_x(tright.x()); + if (pos->y() < bleft.y()) pos->set_y(bleft.y()); + if (pos->y() > tright.y()) pos->set_y(tright.y()); } // Helper moves the blobs from the given list of block_parts into the block @@ -1555,8 +1535,7 @@ static void ClipCoord(const ICOORD& bleft, const ICOORD& tright, ICOORD* pos) { // vertical and horizontal text. The partitions are moved to used_parts // afterwards, as they cannot be deleted yet. static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing, - BLOCK* block, - ColPartition_LIST* block_parts, + BLOCK* block, ColPartition_LIST* block_parts, ColPartition_LIST* used_parts) { // Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it. // Move all the parts to a done list as they are no longer needed, except @@ -1574,8 +1553,7 @@ static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing, // Transfer blobs from all regions to the output blocks. // Blobs for non-text regions will be used to define the polygonal // bounds of the region. - for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty(); - bb_it.forward()) { + for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.extract(); if (bblob->owner() != part) { tprintf("Ownership incorrect for blob:"); @@ -1613,14 +1591,12 @@ static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing, to_block->line_size = sizes.median(); if (vertical_text) { int block_width = block->pdblk.bounding_box().width(); - if (block_width < line_spacing) - line_spacing = block_width; + if (block_width < line_spacing) line_spacing = block_width; to_block->line_spacing = static_cast(line_spacing); to_block->max_blob_size = static_cast(block_width + 1); } else { int block_height = block->pdblk.bounding_box().height(); - if (block_height < line_spacing) - line_spacing = block_height; + if (block_height < line_spacing) line_spacing = block_height; to_block->line_spacing = static_cast(line_spacing); to_block->max_blob_size = static_cast(block_height + 1); } @@ -1632,8 +1608,7 @@ static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing, TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, ColPartition_LIST* block_parts, ColPartition_LIST* used_parts) { - if (block_parts->empty()) - return nullptr; // Nothing to do. + if (block_parts->empty()) return nullptr; // Nothing to do. // If the block_parts are not in reading order, then it will make an invalid // block polygon and bounding_box, so sort by bounding box now just to make // sure. @@ -1670,15 +1645,13 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, UpdateRange(end.x(), &min_x, &max_x); UpdateRange(start.y(), &min_y, &max_y); UpdateRange(end.y(), &min_y, &max_y); - if ((iteration == 0 && it.at_first()) || - (iteration == 1 && it.at_last())) { + if ((iteration == 0 && it.at_first()) || (iteration == 1 && it.at_last())) { ++iteration; it.move_to_last(); } } while (iteration < 2); if (textord_debug_tabfind) - tprintf("Making block at (%d,%d)->(%d,%d)\n", - min_x, min_y, max_x, max_y); + tprintf("Making block at (%d,%d)->(%d,%d)\n", min_x, min_y, max_x, max_y); BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y); block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type)); return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts); @@ -1690,8 +1663,7 @@ TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft, const ICOORD& tright, ColPartition_LIST* block_parts, ColPartition_LIST* used_parts) { - if (block_parts->empty()) - return nullptr; // Nothing to do. + if (block_parts->empty()) return nullptr; // Nothing to do. ColPartition_IT it(block_parts); ColPartition* part = it.data(); TBOX block_box = part->bounding_box(); @@ -1719,16 +1691,15 @@ TO_ROW* ColPartition::MakeToRow() { // Add all the blobs to a single TO_ROW. for (; !blob_it.empty(); blob_it.forward()) { BLOBNBOX* blob = blob_it.extract(); -// blob->compute_bounding_box(); + // blob->compute_bounding_box(); int top = blob->bounding_box().top(); int bottom = blob->bounding_box().bottom(); if (row == nullptr) { - row = new TO_ROW(blob, static_cast(top), - static_cast(bottom), - static_cast(line_size)); + row = + new TO_ROW(blob, static_cast(top), static_cast(bottom), + static_cast(line_size)); } else { - row->add_blob(blob, static_cast(top), - static_cast(bottom), + row->add_blob(blob, static_cast(top), static_cast(bottom), static_cast(line_size)); } } @@ -1777,9 +1748,8 @@ ColPartition* ColPartition::CopyButDontOwnBlobs() { #ifndef GRAPHICS_DISABLED // Provides a color for BBGrid to draw the rectangle. // Must be kept in sync with PolyBlockType. -ScrollView::Color ColPartition::BoxColor() const { - if (type_ == PT_UNKNOWN) - return BLOBNBOX::TextlineColor(blob_type_, flow_); +ScrollView::Color ColPartition::BoxColor() const { + if (type_ == PT_UNKNOWN) return BLOBNBOX::TextlineColor(blob_type_, flow_); return POLY_BLOCK::ColorForPolyBlockType(type_); } #endif // GRAPHICS_DISABLED @@ -1790,26 +1760,24 @@ static char kBlobTypes[BRT_COUNT + 1] = "NHSRIUVT"; // Prints debug information on this. void ColPartition::Print() const { int y = MidY(); - tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" - " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" - " ts=%d bs=%d ls=%d rs=%d\n", - boxes_.empty() ? 'E' : ' ', - left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y), - bounding_box_.left(), median_left_, - bounding_box_.bottom(), median_bottom_, - bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', - right_margin_, median_right_, bounding_box_.top(), median_top_, - good_width_, good_column_, type_, - kBlobTypes[blob_type_], flow_, - first_column_, last_column_, boxes_.length(), - space_above_, space_below_, space_to_left_, space_to_right_); + tprintf( + "ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" + " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" + " ts=%d bs=%d ls=%d rs=%d\n", + boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B', + LeftAtY(y), bounding_box_.left(), median_left_, bounding_box_.bottom(), + median_bottom_, bounding_box_.right(), RightAtY(y), + right_key_tab_ ? 'T' : 'B', right_margin_, median_right_, + bounding_box_.top(), median_top_, good_width_, good_column_, type_, + kBlobTypes[blob_type_], flow_, first_column_, last_column_, + boxes_.length(), space_above_, space_below_, space_to_left_, + space_to_right_); } // Prints debug information on the colors. void ColPartition::PrintColors() { - tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", - color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE], - color1_[L_ALPHA_CHANNEL], + tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED], + color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL], color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]); } @@ -1821,8 +1789,7 @@ void ColPartition::SmoothPartnerRun(int working_set_count) { ColPartition* partner; for (partner = SingletonPartner(false); partner != nullptr; partner = partner->SingletonPartner(false)) { - if (partner->type_ > max_type) - max_type = partner->type_; + if (partner->type_ > max_type) max_type = partner->type_; if (column_set_ == partner->column_set_) { left_stats.add(partner->first_column_, 1); right_stats.add(partner->last_column_, 1); @@ -1940,8 +1907,8 @@ void ColPartition::RefinePartnersByType(bool upper, bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); if (debug) { - tprintf("Refining %d %s partners by type for:\n", - partners->length(), upper ? "Upper" : "Lower"); + tprintf("Refining %d %s partners by type for:\n", partners->length(), + upper ? "Upper" : "Lower"); Print(); } ColPartition_C_IT it(partners); @@ -2021,11 +1988,9 @@ void ColPartition::RefinePartnerShortcuts(bool upper, break; } } - if (done_any) - break; + if (done_any) break; } - if (done_any) - break; + if (done_any) break; } } while (done_any && !partners->empty() && !partners->singleton()); } @@ -2044,8 +2009,8 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); if (debug) { - tprintf("Refining %d %s partners by merge for:\n", - partners->length(), upper ? "Upper" : "Lower"); + tprintf("Refining %d %s partners by merge for:\n", partners->length(), + upper ? "Upper" : "Lower"); Print(); } while (!partners->empty() && !partners->singleton()) { @@ -2064,8 +2029,8 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, cand_it.add_after_then_move(it.data()); } int overlap_increase; - ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug, - nullptr, &overlap_increase); + ColPartition* candidate = grid->BestMergeCandidate( + part, &candidates, debug, nullptr, &overlap_increase); if (candidate != nullptr && (overlap_increase <= 0 || desperate)) { if (debug) { tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", @@ -2078,8 +2043,7 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, part->Absorb(candidate, nullptr); // We modified the box of part, so re-insert it into the grid. grid->InsertBBox(true, true, part); - if (overlap_increase > 0) - part->desperately_merged_ = true; + if (overlap_increase > 0) part->desperately_merged_ = true; } else { break; // Can't merge. } @@ -2093,8 +2057,8 @@ void ColPartition::RefinePartnersByOverlap(bool upper, bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); if (debug) { - tprintf("Refining %d %s partners by overlap for:\n", - partners->length(), upper ? "Upper" : "Lower"); + tprintf("Refining %d %s partners by overlap for:\n", partners->length(), + upper ? "Upper" : "Lower"); Print(); } ColPartition_C_IT it(partners); @@ -2103,8 +2067,9 @@ void ColPartition::RefinePartnersByOverlap(bool upper, int best_overlap = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* partner = it.data(); - int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right()) - - std::max(bounding_box_.left(), partner->bounding_box_.left()); + int overlap = + std::min(bounding_box_.right(), partner->bounding_box_.right()) - + std::max(bounding_box_.left(), partner->bounding_box_.left()); if (overlap > best_overlap) { best_overlap = overlap; best_partner = partner; @@ -2131,31 +2096,26 @@ bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox, // Margins take priority. int left = box.left(); int right = box.right(); - if (left < left_margin_ || right > right_margin_) - return false; - if (left < other.left_margin_ || right > other.right_margin_) - return true; + if (left < left_margin_ || right > right_margin_) return false; + if (left < other.left_margin_ || right > other.right_margin_) return true; int top = box.top(); int bottom = box.bottom(); - int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_); - int other_overlap = std::min(top, other.median_top_) - - std::max(bottom, other.median_bottom_); + int this_overlap = + std::min(top, median_top_) - std::max(bottom, median_bottom_); + int other_overlap = + std::min(top, other.median_top_) - std::max(bottom, other.median_bottom_); int this_miss = median_top_ - median_bottom_ - this_overlap; int other_miss = other.median_top_ - other.median_bottom_ - other_overlap; if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) { tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n", - box.left(), box.bottom(), box.right(), box.top(), - this_overlap, other_overlap, this_miss, other_miss, - median_top_, other.median_top_); - } - if (this_miss < other_miss) - return true; - if (this_miss > other_miss) - return false; - if (this_overlap > other_overlap) - return true; - if (this_overlap < other_overlap) - return false; + box.left(), box.bottom(), box.right(), box.top(), this_overlap, + other_overlap, this_miss, other_miss, median_top_, + other.median_top_); + } + if (this_miss < other_miss) return true; + if (this_miss > other_miss) return false; + if (this_overlap > other_overlap) return true; + if (this_overlap < other_overlap) return false; return median_top_ >= other.median_top_; } @@ -2215,8 +2175,7 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, if (i < PN_UPPER || it.cycled_list()) { neighbourhood[i] = nullptr; } else { - if (i == PN_LOWER) - end_it = it; + if (i == PN_LOWER) end_it = it; neighbourhood[i] = it.data(); it.forward(); } @@ -2281,7 +2240,8 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, if (neighbourhood[i] == nullptr) { tprintf("NULL"); if (i > 0 && neighbourhood[i - 1] != nullptr) { - if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) { + if (neighbourhood[i - 1]->SingletonPartner(false) != + nullptr) { tprintf(" Lower partner:"); neighbourhood[i - 1]->SingletonPartner(false)->Print(); } else { @@ -2337,12 +2297,11 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, // and how it is used. bool ColPartition::OKSpacingBlip(int resolution, int median_spacing, ColPartition** parts) { - if (parts[PN_UPPER] == nullptr || parts[PN_LOWER] == nullptr) - return false; + if (parts[PN_UPPER] == nullptr || parts[PN_LOWER] == nullptr) return false; // The blip is OK if upper and lower sum to an OK value and at least // one of above1 and below1 is equal to the median. - return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], - median_spacing, resolution) && + return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], median_spacing, + resolution) && ((parts[PN_ABOVE1] != nullptr && parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) || (parts[PN_BELOW1] != nullptr && @@ -2363,9 +2322,9 @@ bool ColPartition::SpacingEqual(int spacing, int resolution) const { bool ColPartition::SpacingsEqual(const ColPartition& other, int resolution) const { int bottom_error = std::max(BottomSpacingMargin(resolution), - other.BottomSpacingMargin(resolution)); + other.BottomSpacingMargin(resolution)); int top_error = std::max(TopSpacingMargin(resolution), - other.TopSpacingMargin(resolution)); + other.TopSpacingMargin(resolution)); return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) && (NearlyEqual(top_spacing_, other.top_spacing_, top_error) || NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2, @@ -2375,12 +2334,12 @@ bool ColPartition::SpacingsEqual(const ColPartition& other, // Returns true if the sum spacing of this and other match the given // spacing (or twice the given spacing) to within a suitable margin dictated // by the image resolution. -bool ColPartition::SummedSpacingOK(const ColPartition& other, - int spacing, int resolution) const { +bool ColPartition::SummedSpacingOK(const ColPartition& other, int spacing, + int resolution) const { int bottom_error = std::max(BottomSpacingMargin(resolution), - other.BottomSpacingMargin(resolution)); + other.BottomSpacingMargin(resolution)); int top_error = std::max(TopSpacingMargin(resolution), - other.TopSpacingMargin(resolution)); + other.TopSpacingMargin(resolution)); int bottom_total = bottom_spacing_ + other.bottom_spacing_; int top_total = top_spacing_ + other.top_spacing_; return (NearlyEqual(spacing, bottom_total, bottom_error) && @@ -2412,8 +2371,8 @@ bool ColPartition::SizesSimilar(const ColPartition& other) const { // Helper updates margin_left and margin_right, being the bounds of the left // margin of part of a block. Returns false and does not update the bounds if // this partition has a disjoint margin with the established margin. -static bool UpdateLeftMargin(const ColPartition& part, - int* margin_left, int* margin_right) { +static bool UpdateLeftMargin(const ColPartition& part, int* margin_left, + int* margin_right) { const TBOX& part_box = part.bounding_box(); int top = part_box.top(); int bottom = part_box.bottom(); @@ -2437,8 +2396,8 @@ static bool UpdateLeftMargin(const ColPartition& part, // condition that the intersection of the left margins is non-empty, ie the // rightmost left margin is to the left of the leftmost left bounding box edge. // On return the iterator is set to the start of the next run. -void ColPartition::LeftEdgeRun(ColPartition_IT* part_it, - ICOORD* start, ICOORD* end) { +void ColPartition::LeftEdgeRun(ColPartition_IT* part_it, ICOORD* start, + ICOORD* end) { ColPartition* part = part_it->data(); ColPartition* start_part = part; int start_y = part->bounding_box_.top(); @@ -2491,15 +2450,15 @@ void ColPartition::LeftEdgeRun(ColPartition_IT* part_it, end->set_x(part->XAtY(margin_right, end_y)); if (textord_debug_tabfind && !part_it->at_first()) tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", - start_y, end_y, part->XAtY(margin_left, end_y), - end->x(), part->left_margin_, part->bounding_box_.left()); + start_y, end_y, part->XAtY(margin_left, end_y), end->x(), + part->left_margin_, part->bounding_box_.left()); } // Helper updates margin_left and margin_right, being the bounds of the right // margin of part of a block. Returns false and does not update the bounds if // this partition has a disjoint margin with the established margin. -static bool UpdateRightMargin(const ColPartition& part, - int* margin_left, int* margin_right) { +static bool UpdateRightMargin(const ColPartition& part, int* margin_left, + int* margin_right) { const TBOX& part_box = part.bounding_box(); int top = part_box.top(); int bottom = part_box.bottom(); @@ -2524,8 +2483,8 @@ static bool UpdateRightMargin(const ColPartition& part, // leftmost right margin is to the right of the rightmost right bounding box // edge. // On return the iterator is set to the start of the next run. -void ColPartition::RightEdgeRun(ColPartition_IT* part_it, - ICOORD* start, ICOORD* end) { +void ColPartition::RightEdgeRun(ColPartition_IT* part_it, ICOORD* start, + ICOORD* end) { ColPartition* part = part_it->data(); ColPartition* start_part = part; int start_y = part->bounding_box_.bottom(); @@ -2556,23 +2515,20 @@ void ColPartition::RightEdgeRun(ColPartition_IT* part_it, next_it.backward(); part = next_it.data(); } while (!next_it.at_last() && - UpdateRightMargin(*part, &next_margin_left, - &next_margin_right)); + UpdateRightMargin(*part, &next_margin_left, &next_margin_right)); // Now extend the next run forwards into the original run to get the // tightest fit. do { part_it->forward(); part = part_it->data(); } while (part != start_part && - UpdateRightMargin(*part, &next_margin_left, - &next_margin_right)); + UpdateRightMargin(*part, &next_margin_left, &next_margin_right)); part_it->backward(); } // Now calculate the end_y. part = part_it->data_relative(1); end_y = part->bounding_box().top(); - if (!part_it->at_last() && - part_it->data()->bounding_box_.bottom() > end_y) + if (!part_it->at_last() && part_it->data()->bounding_box_.bottom() > end_y) end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2; start->set_y(start_y); start->set_x(part->XAtY(margin_left, start_y)); diff --git a/src/textord/colpartition.h b/src/textord/colpartition.h index 2f0b201d81..44f01bccf2 100644 --- a/src/textord/colpartition.h +++ b/src/textord/colpartition.h @@ -22,13 +22,13 @@ #define TESSERACT_TEXTORD_COLPARTITION_H_ #include "bbgrid.h" -#include "blobbox.h" // For BlobRegionType. +#include "blobbox.h" // For BlobRegionType. #include "ndminx.h" #include "ocrblock.h" -#include "rect.h" // For TBOX. +#include "rect.h" // For TBOX. #include "scrollview.h" -#include "tabfind.h" // For WidthCallback. -#include "tabvector.h" // For BLOBNBOX_CLIST. +#include "tabfind.h" // For WidthCallback. +#include "tabvector.h" // For BLOBNBOX_CLIST. #include @@ -47,11 +47,11 @@ class WorkingPartSet_LIST; // The order of flowing/heading/pullout must be kept consistent with // PolyBlockType. enum ColumnSpanningType { - CST_NOISE, // Strictly between columns. - CST_FLOWING, // Strictly within a single column. - CST_HEADING, // Spans multiple columns. - CST_PULLOUT, // Touches multiple columns, but doesn't span them. - CST_COUNT // Number of entries. + CST_NOISE, // Strictly between columns. + CST_FLOWING, // Strictly within a single column. + CST_HEADING, // Spans multiple columns. + CST_PULLOUT, // Touches multiple columns, but doesn't span them. + CST_COUNT // Number of entries. }; ELIST2IZEH(ColPartition) @@ -75,7 +75,8 @@ class ColPartition : public ELIST2_LINK { /** * @param blob_type is the blob_region_type_ of the blobs in this partition. - * @param vertical is the direction of logical vertical on the possibly skewed image. + * @param vertical is the direction of logical vertical on the possibly skewed + * image. */ ColPartition(BlobRegionType blob_type, const ICOORD& vertical); /** @@ -83,17 +84,15 @@ class ColPartition : public ELIST2_LINK { * horizontal or vertical line, given a type and a bounding box. */ static ColPartition* MakeLinePartition(BlobRegionType blob_type, - const ICOORD& vertical, - int left, int bottom, - int right, int top); + const ICOORD& vertical, int left, + int bottom, int right, int top); // Constructs and returns a fake ColPartition with a single fake BLOBNBOX, // all made from a single TBOX. // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and // the ColPartition owns the BLOBNBOX!!! // Call DeleteBoxes before deleting the ColPartition. - static ColPartition* FakePartition(const TBOX& box, - PolyBlockType block_type, + static ColPartition* FakePartition(const TBOX& box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow); @@ -108,129 +107,49 @@ class ColPartition : public ELIST2_LINK { ~ColPartition(); // Simple accessors. - const TBOX& bounding_box() const { - return bounding_box_; - } - int left_margin() const { - return left_margin_; - } - void set_left_margin(int margin) { - left_margin_ = margin; - } - int right_margin() const { - return right_margin_; - } - void set_right_margin(int margin) { - right_margin_ = margin; - } - int median_top() const { - return median_top_; - } - int median_bottom() const { - return median_bottom_; - } - int median_left() const { - return median_left_; - } - int median_right() const { - return median_right_; - } - int median_size() const { - return median_size_; - } - void set_median_size(int size) { - median_size_ = size; - } - int median_width() const { - return median_width_; - } - void set_median_width(int width) { - median_width_ = width; - } - BlobRegionType blob_type() const { - return blob_type_; - } - void set_blob_type(BlobRegionType t) { - blob_type_ = t; - } - BlobTextFlowType flow() const { - return flow_; - } - void set_flow(BlobTextFlowType f) { - flow_ = f; - } - int good_blob_score() const { - return good_blob_score_; - } - bool good_width() const { - return good_width_; - } - bool good_column() const { - return good_column_; - } - bool left_key_tab() const { - return left_key_tab_; - } - int left_key() const { - return left_key_; - } - bool right_key_tab() const { - return right_key_tab_; - } - int right_key() const { - return right_key_; - } - PolyBlockType type() const { - return type_; - } - void set_type(PolyBlockType t) { - type_ = t; - } - BLOBNBOX_CLIST* boxes() { - return &boxes_; - } - int boxes_count() const { - return boxes_.length(); - } - void set_vertical(const ICOORD& v) { - vertical_ = v; - } - ColPartition_CLIST* upper_partners() { - return &upper_partners_; - } - ColPartition_CLIST* lower_partners() { - return &lower_partners_; - } + const TBOX& bounding_box() const { return bounding_box_; } + int left_margin() const { return left_margin_; } + void set_left_margin(int margin) { left_margin_ = margin; } + int right_margin() const { return right_margin_; } + void set_right_margin(int margin) { right_margin_ = margin; } + int median_top() const { return median_top_; } + int median_bottom() const { return median_bottom_; } + int median_left() const { return median_left_; } + int median_right() const { return median_right_; } + int median_size() const { return median_size_; } + void set_median_size(int size) { median_size_ = size; } + int median_width() const { return median_width_; } + void set_median_width(int width) { median_width_ = width; } + BlobRegionType blob_type() const { return blob_type_; } + void set_blob_type(BlobRegionType t) { blob_type_ = t; } + BlobTextFlowType flow() const { return flow_; } + void set_flow(BlobTextFlowType f) { flow_ = f; } + int good_blob_score() const { return good_blob_score_; } + bool good_width() const { return good_width_; } + bool good_column() const { return good_column_; } + bool left_key_tab() const { return left_key_tab_; } + int left_key() const { return left_key_; } + bool right_key_tab() const { return right_key_tab_; } + int right_key() const { return right_key_; } + PolyBlockType type() const { return type_; } + void set_type(PolyBlockType t) { type_ = t; } + BLOBNBOX_CLIST* boxes() { return &boxes_; } + int boxes_count() const { return boxes_.length(); } + void set_vertical(const ICOORD& v) { vertical_ = v; } + ColPartition_CLIST* upper_partners() { return &upper_partners_; } + ColPartition_CLIST* lower_partners() { return &lower_partners_; } void set_working_set(WorkingPartSet* working_set) { working_set_ = working_set; } - bool block_owned() const { - return block_owned_; - } - void set_block_owned(bool owned) { - block_owned_ = owned; - } - bool desperately_merged() const { - return desperately_merged_; - } - ColPartitionSet* column_set() const { - return column_set_; - } - void set_side_step(int step) { - side_step_ = step; - } - int bottom_spacing() const { - return bottom_spacing_; - } - void set_bottom_spacing(int spacing) { - bottom_spacing_ = spacing; - } - int top_spacing() const { - return top_spacing_; - } - void set_top_spacing(int spacing) { - top_spacing_ = spacing; - } + bool block_owned() const { return block_owned_; } + void set_block_owned(bool owned) { block_owned_ = owned; } + bool desperately_merged() const { return desperately_merged_; } + ColPartitionSet* column_set() const { return column_set_; } + void set_side_step(int step) { side_step_ = step; } + int bottom_spacing() const { return bottom_spacing_; } + void set_bottom_spacing(int spacing) { bottom_spacing_ = spacing; } + int top_spacing() const { return top_spacing_; } + void set_top_spacing(int spacing) { top_spacing_ = spacing; } void set_table_type() { if (type_ != PT_TABLE) { @@ -239,15 +158,10 @@ class ColPartition : public ELIST2_LINK { } } void clear_table_type() { - if (type_ == PT_TABLE) - type_ = type_before_table_; - } - bool inside_table_column() { - return inside_table_column_; - } - void set_inside_table_column(bool val) { - inside_table_column_ = val; + if (type_ == PT_TABLE) type_ = type_before_table_; } + bool inside_table_column() { return inside_table_column_; } + void set_inside_table_column(bool val) { inside_table_column_ = val; } ColPartition* nearest_neighbor_above() const { return nearest_neighbor_above_; } @@ -260,39 +174,17 @@ class ColPartition : public ELIST2_LINK { void set_nearest_neighbor_below(ColPartition* part) { nearest_neighbor_below_ = part; } - int space_above() const { - return space_above_; - } - void set_space_above(int space) { - space_above_ = space; - } - int space_below() const { - return space_below_; - } - void set_space_below(int space) { - space_below_ = space; - } - int space_to_left() const { - return space_to_left_; - } - void set_space_to_left(int space) { - space_to_left_ = space; - } - int space_to_right() const { - return space_to_right_; - } - void set_space_to_right(int space) { - space_to_right_ = space; - } - uint8_t* color1() { - return color1_; - } - uint8_t* color2() { - return color2_; - } - bool owns_blobs() const { - return owns_blobs_; - } + int space_above() const { return space_above_; } + void set_space_above(int space) { space_above_ = space; } + int space_below() const { return space_below_; } + void set_space_below(int space) { space_below_ = space; } + int space_to_left() const { return space_to_left_; } + void set_space_to_left(int space) { space_to_left_ = space; } + int space_to_right() const { return space_to_right_; } + void set_space_to_right(int space) { space_to_right_ = space; } + uint8_t* color1() { return color1_; } + uint8_t* color2() { return color2_; } + bool owns_blobs() const { return owns_blobs_; } void set_owns_blobs(bool owns_blobs) { // Do NOT change ownership flag when there are blobs in the list. // Immediately set the ownership flag when creating copies. @@ -307,9 +199,7 @@ class ColPartition : public ELIST2_LINK { return (bounding_box_.top() + bounding_box_.bottom()) / 2; } // Returns the middle y-coord of the median top and bottom. - int MedianY() const { - return (median_top_ + median_bottom_) / 2; - } + int MedianY() const { return (median_top_ + median_bottom_) / 2; } // Returns the middle x-coord of the bounding box. int MidX() const { return (bounding_box_.left() + bounding_box_.right()) / 2; @@ -327,25 +217,15 @@ class ColPartition : public ELIST2_LINK { return (right_key - left_key) / vertical_.y(); } // Returns the column width between the left and right keys. - int ColumnWidth() const { - return KeyWidth(left_key_, right_key_); - } + int ColumnWidth() const { return KeyWidth(left_key_, right_key_); } // Returns the sort key of the box left edge. - int BoxLeftKey() const { - return SortKey(bounding_box_.left(), MidY()); - } + int BoxLeftKey() const { return SortKey(bounding_box_.left(), MidY()); } // Returns the sort key of the box right edge. - int BoxRightKey() const { - return SortKey(bounding_box_.right(), MidY()); - } + int BoxRightKey() const { return SortKey(bounding_box_.right(), MidY()); } // Returns the left edge at the given y, using the sort key. - int LeftAtY(int y) const { - return XAtY(left_key_, y); - } + int LeftAtY(int y) const { return XAtY(left_key_, y); } // Returns the right edge at the given y, using the sort key. - int RightAtY(int y) const { - return XAtY(right_key_, y); - } + int RightAtY(int y) const { return XAtY(right_key_, y); } // Returns true if the right edge of this is to the left of the right // edge of other. bool IsLeftOf(const ColPartition& other) const { @@ -356,13 +236,9 @@ class ColPartition : public ELIST2_LINK { return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1; } // Returns true if there are no blobs in the list. - bool IsEmpty() const { - return boxes_.empty(); - } + bool IsEmpty() const { return boxes_.empty(); } // Returns true if there is a single blob in the list. - bool IsSingleton() const { - return boxes_.singleton(); - } + bool IsSingleton() const { return boxes_.singleton(); } // Returns true if this and other overlap horizontally by bounding box. bool HOverlaps(const ColPartition& other) const { return bounding_box_.x_overlap(other.bounding_box_); @@ -376,20 +252,20 @@ class ColPartition : public ELIST2_LINK { // WARNING! Only makes sense on horizontal partitions! int VCoreOverlap(const ColPartition& other) const { return std::min(median_top_, other.median_top_) - - std::max(median_bottom_, other.median_bottom_); + std::max(median_bottom_, other.median_bottom_); } // Returns the horizontal overlap (by median) of this and other. // WARNING! Only makes sense on vertical partitions! int HCoreOverlap(const ColPartition& other) const { return std::min(median_right_, other.median_right_) - - std::max(median_left_, other.median_left_); + std::max(median_left_, other.median_left_); } // Returns true if this and other overlap significantly vertically. // WARNING! Only makes sense on horizontal partitions! bool VSignificantCoreOverlap(const ColPartition& other) const { int overlap = VCoreOverlap(other); int height = std::min(median_top_ - median_bottom_, - other.median_top_ - other.median_bottom_); + other.median_top_ - other.median_bottom_); return overlap * 3 > height; } // Returns true if this and other can be combined without putting a @@ -418,21 +294,13 @@ class ColPartition : public ELIST2_LINK { } // Returns true if partitions is of horizontal line type - bool IsLineType() const { - return PTIsLineType(type_); - } + bool IsLineType() const { return PTIsLineType(type_); } // Returns true if partitions is of image type - bool IsImageType() const { - return PTIsImageType(type_); - } + bool IsImageType() const { return PTIsImageType(type_); } // Returns true if partitions is of text type - bool IsTextType() const { - return PTIsTextType(type_); - } + bool IsTextType() const { return PTIsTextType(type_); } // Returns true if partitions is of pullout(inter-column) type - bool IsPulloutType() const { - return PTIsPulloutType(type_); - } + bool IsPulloutType() const { return PTIsPulloutType(type_); } // Returns true if the partition is of an exclusively vertical type. bool IsVerticalType() const { return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE; @@ -447,14 +315,10 @@ class ColPartition : public ELIST2_LINK { } // Returns true if this partition is a vertical line // TODO(nbeato): Use PartitionType enum when Ray's code is submitted. - bool IsVerticalLine() const { - return IsVerticalType() && IsLineType(); - } + bool IsVerticalLine() const { return IsVerticalType() && IsLineType(); } // Returns true if this partition is a horizontal line // TODO(nbeato): Use PartitionType enum when Ray's code is submitted. - bool IsHorizontalLine() const { - return IsHorizontalType() && IsLineType(); - } + bool IsHorizontalLine() const { return IsHorizontalType() && IsLineType(); } // Adds the given box to the partition, updating the partition bounds. // The list of boxes in the partition is updated, ensuring that no box is @@ -550,8 +414,8 @@ class ColPartition : public ELIST2_LINK { // Set the density value for a particular BlobSpecialTextType, should ONLY be // used for debugging or testing. In production code, use // ComputeSpecialBlobsDensity instead. - void SetSpecialBlobsDensity( - const BlobSpecialTextType type, const float density); + void SetSpecialBlobsDensity(const BlobSpecialTextType type, + const float density); // Compute the SpecialTextType density of blobs, where we assume // that the SpecialTextType in the boxes_ has been set. void ComputeSpecialBlobsDensity(); @@ -609,8 +473,8 @@ class ColPartition : public ELIST2_LINK { // Returns the first and last column touched by this partition. // resolution refers to the ppi resolution of the image. - void ColumnRange(int resolution, ColPartitionSet* columns, - int* first_col, int* last_col); + void ColumnRange(int resolution, ColPartitionSet* columns, int* first_col, + int* last_col); // Sets the internal flags good_width_ and good_column_. void SetColumnGoodness(WidthCallback* cb); @@ -649,8 +513,7 @@ class ColPartition : public ELIST2_LINK { // to in the partition grid. bleft, tright and resolution are the bounds // and resolution of the original image. static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, - int resolution, - ColPartition_LIST* block_parts, + int resolution, ColPartition_LIST* block_parts, ColPartition_LIST* used_parts, BLOCK_LIST* completed_blocks, TO_BLOCK_LIST* to_blocks); @@ -671,7 +534,6 @@ class ColPartition : public ELIST2_LINK { // ownership to to returned TO_ROW. TO_ROW* MakeToRow(); - // Returns a copy of everything except the list of boxes. The resulting // ColPartition is only suitable for keeping in a column candidate list. ColPartition* ShallowCopy() const; @@ -680,10 +542,10 @@ class ColPartition : public ELIST2_LINK { // treated as read-only. ColPartition* CopyButDontOwnBlobs(); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED // Provides a color for BBGrid to draw the rectangle. - ScrollView::Color BoxColor() const; - #endif // GRAPHICS_DISABLED + ScrollView::Color BoxColor() const; +#endif // GRAPHICS_DISABLED // Prints debug information on this. void Print() const; @@ -724,12 +586,8 @@ class ColPartition : public ELIST2_LINK { } // Sets the column bounds. Primarily used in testing. - void set_first_column(int column) { - first_column_ = column; - } - void set_last_column(int column) { - last_column_ = column; - } + void set_first_column(int column) { first_column_ = column; } + void set_last_column(int column) { last_column_ = column; } private: // enum to refer to the entries in a neighbourhood of lines. @@ -795,8 +653,8 @@ class ColPartition : public ELIST2_LINK { // Returns true if the sum spacing of this and other match the given // spacing (or twice the given spacing) to within a suitable margin dictated // by the image resolution. - bool SummedSpacingOK(const ColPartition& other, - int spacing, int resolution) const; + bool SummedSpacingOK(const ColPartition& other, int spacing, + int resolution) const; // Returns a suitable spacing margin that can be applied to bottoms of // text lines, based on the resolution and the stored side_step_. @@ -816,16 +674,15 @@ class ColPartition : public ELIST2_LINK { // leftmost left bounding box edge. // TODO(rays) Not good enough. Needs improving to tightly wrap text in both // directions, and to loosely wrap images. - static void LeftEdgeRun(ColPartition_IT* part_it, - ICOORD* start, ICOORD* end); + static void LeftEdgeRun(ColPartition_IT* part_it, ICOORD* start, ICOORD* end); // Computes and returns in start, end a line segment formed from a // backwards-iterated group of right edges of partitions that satisfy the // condition that the leftmost right margin is to the right of the // rightmost right bounding box edge. // TODO(rays) Not good enough. Needs improving to tightly wrap text in both // directions, and to loosely wrap images. - static void RightEdgeRun(ColPartition_IT* part_it, - ICOORD* start, ICOORD* end); + static void RightEdgeRun(ColPartition_IT* part_it, ICOORD* start, + ICOORD* end); // The margins are determined by the position of the nearest vertically // overlapping neighbour to the side. They indicate the maximum extent @@ -914,10 +771,10 @@ class ColPartition : public ELIST2_LINK { ColPartition* nearest_neighbor_above_; // Nearest neighbor below with major x-overlap ColPartition* nearest_neighbor_below_; - int space_above_; // Distance from nearest_neighbor_above - int space_below_; // Distance from nearest_neighbor_below - int space_to_left_; // Distance from the left edge of the column - int space_to_right_; // Distance from the right edge of the column + int space_above_; // Distance from nearest_neighbor_above + int space_below_; // Distance from nearest_neighbor_below + int space_to_left_; // Distance from the left edge of the column + int space_to_right_; // Distance from the right edge of the column // Color foreground/background data. uint8_t color1_[kRGBRMSColors]; uint8_t color2_[kRGBRMSColors]; @@ -927,9 +784,8 @@ class ColPartition : public ELIST2_LINK { }; // Typedef it now in case it becomes a class later. -using ColPartitionGridSearch = GridSearch ; +using ColPartitionGridSearch = + GridSearch; } // namespace tesseract. diff --git a/src/textord/colpartitiongrid.cpp b/src/textord/colpartitiongrid.cpp index e06f4a6675..7dba70c596 100644 --- a/src/textord/colpartitiongrid.cpp +++ b/src/textord/colpartitiongrid.cpp @@ -66,16 +66,15 @@ const double kMaxPartitionSpacing = 1.75; // decision in GridSmoothNeighbour. const int kSmoothDecisionMargin = 4; -ColPartitionGrid::ColPartitionGrid(int gridsize, - const ICOORD& bleft, const ICOORD& tright) - : BBGrid(gridsize, - bleft, tright) { -} +ColPartitionGrid::ColPartitionGrid(int gridsize, const ICOORD& bleft, + const ICOORD& tright) + : BBGrid( + gridsize, bleft, tright) {} // Handles a click event in a display window. void ColPartitionGrid::HandleClick(int x, int y) { - BBGrid::HandleClick(x, y); + BBGrid::HandleClick(x, + y); // Run a radial search for partitions that overlap. ColPartitionGridSearch radsearch(this); radsearch.SetUniqueMode(true); @@ -101,15 +100,14 @@ void ColPartitionGrid::HandleClick(int x, int y) { // Both callbacks are deleted before returning. void ColPartitionGrid::Merges( TessResultCallback2* box_cb, - TessResultCallback2* confirm_cb) { + TessResultCallback2* + confirm_cb) { // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { - if (MergePart(box_cb, confirm_cb, part)) - gsearch.RepositionIterator(); + if (MergePart(box_cb, confirm_cb, part)) gsearch.RepositionIterator(); } delete box_cb; delete confirm_cb; @@ -122,11 +120,10 @@ void ColPartitionGrid::Merges( // Returns true if the partition is consumed by one or more merges. bool ColPartitionGrid::MergePart( TessResultCallback2* box_cb, - TessResultCallback2* confirm_cb, + TessResultCallback2* + confirm_cb, ColPartition* part) { - if (part->IsUnMergeableType()) - return false; + if (part->IsUnMergeableType()) return false; bool any_done = false; // Repeatedly merge part while we find a best merge candidate that works. bool merge_done = false; @@ -139,16 +136,14 @@ bool ColPartitionGrid::MergePart( box.print(); } // Set up a rectangle search bounded by the part. - if (!box_cb->Run(part, &box)) - continue; + if (!box_cb->Run(part, &box)) continue; // Create a list of merge candidates. ColPartition_CLIST merge_candidates; FindMergeCandidates(part, box, debug, &merge_candidates); // Find the best merge candidate based on minimal overlap increase. int overlap_increase; ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug, - confirm_cb, - &overlap_increase); + confirm_cb, &overlap_increase); if (neighbour != nullptr && overlap_increase <= 0) { if (debug) { tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", @@ -182,11 +177,9 @@ bool ColPartitionGrid::MergePart( // are on the same text line, ie their median limits overlap, but we have // to make exceptions for diacritics and stray punctuation. static bool OKMergeCandidate(const ColPartition* part, - const ColPartition* candidate, - bool debug) { + const ColPartition* candidate, bool debug) { const TBOX& part_box = part->bounding_box(); - if (candidate == part) - return false; // Ignore itself. + if (candidate == part) return false; // Ignore itself. if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType()) return false; // Don't mix inappropriate types. @@ -199,16 +192,14 @@ static bool OKMergeCandidate(const ColPartition* part, if (candidate->IsVerticalType() || part->IsVerticalType()) { int h_dist = -part->HCoreOverlap(*candidate); if (h_dist >= std::max(part_box.width(), c_box.width()) / 2) { - if (debug) - tprintf("Too far away: h_dist = %d\n", h_dist); + if (debug) tprintf("Too far away: h_dist = %d\n", h_dist); return false; } } else { // Coarse filter by vertical distance between partitions. int v_dist = -part->VCoreOverlap(*candidate); if (v_dist >= std::max(part_box.height(), c_box.height()) / 2) { - if (debug) - tprintf("Too far away: v_dist = %d\n", v_dist); + if (debug) tprintf("Too far away: v_dist = %d\n", v_dist); return false; } // Candidates must either overlap in median y, @@ -216,8 +207,7 @@ static bool OKMergeCandidate(const ColPartition* part, if (!part->VSignificantCoreOverlap(*candidate) && !part->OKDiacriticMerge(*candidate, debug) && !candidate->OKDiacriticMerge(*part, debug)) { - if (debug) - tprintf("Candidate fails overlap and diacritic tests!\n"); + if (debug) tprintf("Candidate fails overlap and diacritic tests!\n"); return false; } } @@ -230,8 +220,7 @@ static bool OKMergeCandidate(const ColPartition* part, // An overlap is not counted if passes the OKMergeOverlap test with ok_overlap // as the pixel overlap limit. merge1 and merge2 must both be non-nullptr. static int IncreaseInOverlap(const ColPartition* merge1, - const ColPartition* merge2, - int ok_overlap, + const ColPartition* merge2, int ok_overlap, ColPartition_CLIST* parts) { ASSERT_HOST(merge1 != nullptr && merge2 != nullptr); int total_area = 0; @@ -240,18 +229,16 @@ static int IncreaseInOverlap(const ColPartition* merge1, merged_box += merge2->bounding_box(); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* part = it.data(); - if (part == merge1 || part == merge2) - continue; + if (part == merge1 || part == merge2) continue; TBOX part_box = part->bounding_box(); // Compute the overlap of the merged box with part. int overlap_area = part_box.intersection(merged_box).area(); - if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, - ok_overlap, false)) { + if (overlap_area > 0 && + !part->OKMergeOverlap(*merge1, *merge2, ok_overlap, false)) { total_area += overlap_area; // Subtract the overlap of merge1 and merge2 individually. overlap_area = part_box.intersection(merge1->bounding_box()).area(); - if (overlap_area > 0) - total_area -= overlap_area; + if (overlap_area > 0) total_area -= overlap_area; TBOX intersection_box = part_box.intersection(merge2->bounding_box()); overlap_area = intersection_box.area(); if (overlap_area > 0) { @@ -259,8 +246,7 @@ static int IncreaseInOverlap(const ColPartition* merge1, // Add back the 3-way area. intersection_box &= merge1->bounding_box(); // In-place intersection. overlap_area = intersection_box.area(); - if (overlap_area > 0) - total_area += overlap_area; + if (overlap_area > 0) total_area += overlap_area; } } } @@ -405,12 +391,11 @@ void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box, // in overlap, or tightly spaced text would end up in bits. ColPartition* ColPartitionGrid::BestMergeCandidate( const ColPartition* part, ColPartition_CLIST* candidates, bool debug, - TessResultCallback2* confirm_cb, + TessResultCallback2* + confirm_cb, int* overlap_increase) { - if (overlap_increase != nullptr) - *overlap_increase = 0; - if (candidates->empty()) - return nullptr; + if (overlap_increase != nullptr) *overlap_increase = 0; + if (candidates->empty()) return nullptr; int ok_overlap = static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); // The best neighbour to merge with is the one that causes least @@ -477,8 +462,7 @@ ColPartition* ColPartitionGrid::BestMergeCandidate( } increase = IncreaseInOverlap(part, candidate, ok_overlap, &non_candidate_neighbours); - if (increase > worst_nc_increase) - worst_nc_increase = increase; + if (increase > worst_nc_increase) worst_nc_increase = increase; } if (best_increase > 0) { // If the worst non-candidate increase is less than the best increase @@ -492,8 +476,7 @@ ColPartition* ColPartitionGrid::BestMergeCandidate( best_increase = worst_nc_increase; } } - if (overlap_increase != nullptr) - *overlap_increase = best_increase; + if (overlap_increase != nullptr) *overlap_increase = best_increase; return best_candidate; } @@ -505,7 +488,6 @@ static void RemoveBadBox(BLOBNBOX* box, ColPartition* part, ColPartition::MakeBigPartition(box, part_list); } - // Split partitions where it reduces overlap between their bounding boxes. // ColPartitions are after all supposed to be a partitioning of the blobs // AND of the space on the page! @@ -530,8 +512,7 @@ void ColPartitionGrid::SplitOverlappingPartitions( ColPartition* neighbour; while ((neighbour = rsearch.NextRectSearch()) != nullptr) { - if (neighbour == part) - continue; + if (neighbour == part) continue; const TBOX& neighbour_box = neighbour->bounding_box(); if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) && part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) @@ -546,7 +527,7 @@ void ColPartitionGrid::SplitOverlappingPartitions( TBOX shrunken = part->BoundsWithoutBox(excluded); if (!shrunken.overlap(neighbour_box) && excluded->bounding_box().height() > - kBigPartSizeRatio * shrunken.height()) { + kBigPartSizeRatio * shrunken.height()) { // Removing the biggest box fixes the overlap, so do it! gsearch.RemoveBBox(); RemoveBadBox(excluded, part, big_parts); @@ -563,7 +544,7 @@ void ColPartitionGrid::SplitOverlappingPartitions( TBOX shrunken = neighbour->BoundsWithoutBox(excluded); if (!shrunken.overlap(box) && excluded->bounding_box().height() > - kBigPartSizeRatio * shrunken.height()) { + kBigPartSizeRatio * shrunken.height()) { // Removing the biggest box fixes the overlap, so do it! rsearch.RemoveBBox(); RemoveBadBox(excluded, neighbour, big_parts); @@ -689,8 +670,8 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, BlobRegionType blob_type = part->blob_type(); if (BLOBNBOX::IsTextType(blob_type) || (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) { - PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT - : PT_FLOWING_TEXT; + PolyBlockType type = + blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT; // Get metrics from the row that will be used for the block. TBOX box = part->bounding_box(); int median_width = part->median_width(); @@ -810,7 +791,7 @@ bool ColPartitionGrid::MakeColPartSets(PartSetVector* part_sets) { part_sets->push_back(line_set); } } - delete [] part_lists; + delete[] part_lists; return any_parts_found; } @@ -832,8 +813,8 @@ ColPartitionSet* ColPartitionGrid::MakeSingleColumnSet(WidthCallback* cb) { // Consider for single column. BlobTextFlowType flow = part->flow(); if ((blob_type == BRT_TEXT && - (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || - flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || + (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || + flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) { if (single_column_part == nullptr) { single_column_part = part->ShallowCopy(); @@ -905,8 +886,7 @@ void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) { any_blobs_moved = true; } else { blob->set_region_type(blob_type); - if (blob->flow() != BTFT_LEADER) - blob->set_flow(flow); + if (blob->flow() != BTFT_LEADER) blob->set_flow(flow); } } } @@ -934,8 +914,7 @@ void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) { // The boxes within the partitions have changed (by deskew) so recompute // the bounds of all the partitions and reinsert them into the grid. -void ColPartitionGrid::RecomputeBounds(int gridsize, - const ICOORD& bleft, +void ColPartitionGrid::RecomputeBounds(int gridsize, const ICOORD& bleft, const ICOORD& tright, const ICOORD& vertical) { ColPartition_LIST saved_parts; @@ -970,9 +949,8 @@ void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) { ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { // Set up a rectangle search x-bounded by the column and y by the part. - ColPartitionSet* columns = best_columns != nullptr - ? best_columns[gsearch.GridY()] - : nullptr; + ColPartitionSet* columns = + best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr; FindPartitionMargins(columns, part); const TBOX& box = part->bounding_box(); if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { @@ -1066,8 +1044,8 @@ void ColPartitionGrid::FindFigureCaptions() { while ((part = gsearch.NextFullSearch()) != nullptr) { if (part->IsImageType()) { const TBOX& part_box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), - part_box.bottom()); + bool debug = + AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom()); ColPartition* best_caption = nullptr; int best_dist = 0; // Distance to best_caption. int best_upper = 0; // Direction of best_caption. @@ -1122,8 +1100,8 @@ void ColPartitionGrid::FindFigureCaptions() { int mean_height = 0; ColPartition* end_partner = nullptr; ColPartition* next_partner = nullptr; - for (ColPartition* partner = best_caption; partner != nullptr && - line_count <= kMaxCaptionLines; + for (ColPartition* partner = best_caption; + partner != nullptr && line_count <= kMaxCaptionLines; partner = next_partner) { if (!partner->IsTextType()) { end_partner = partner; @@ -1133,8 +1111,8 @@ void ColPartitionGrid::FindFigureCaptions() { total_height += partner->bounding_box().height(); next_partner = partner->SingletonPartner(best_upper); if (next_partner != nullptr) { - int gap = partner->bounding_box().y_gap( - next_partner->bounding_box()); + int gap = + partner->bounding_box().y_gap(next_partner->bounding_box()); if (gap > biggest_gap) { biggest_gap = gap; end_partner = next_partner; @@ -1161,8 +1139,8 @@ void ColPartitionGrid::FindFigureCaptions() { end_partner = nullptr; // No gap, but line count is small. if (line_count <= kMaxCaptionLines) { // This is a qualified caption. Mark the text as caption. - for (ColPartition* partner = best_caption; partner != nullptr && - partner != end_partner; + for (ColPartition* partner = best_caption; + partner != nullptr && partner != end_partner; partner = next_partner) { partner->set_type(PT_CAPTION_TEXT); partner->SetBlobTypes(); @@ -1219,13 +1197,11 @@ void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) { int neighbour_bottom = neighbour->median_bottom(); int neighbour_top = neighbour->median_top(); int neighbour_y = (neighbour_bottom + neighbour_top) / 2; - if (upper != (neighbour_y > mid_y)) - continue; + if (upper != (neighbour_y > mid_y)) continue; if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) continue; if (!part->TypesMatch(*neighbour)) { - if (best_neighbour == nullptr) - best_neighbour = neighbour; + if (best_neighbour == nullptr) best_neighbour = neighbour; continue; } int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; @@ -1238,8 +1214,7 @@ void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) { break; } } - if (best_neighbour != nullptr) - part->AddPartner(upper, best_neighbour); + if (best_neighbour != nullptr) part->AddPartner(upper, best_neighbour); } // Finds the best partner in the given direction for the given partition. @@ -1265,10 +1240,8 @@ void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, int neighbour_left = neighbour->median_left(); int neighbour_right = neighbour->median_right(); int neighbour_x = (neighbour_left + neighbour_right) / 2; - if (to_the_left != (neighbour_x < mid_x)) - continue; - if (!part->VOverlaps(*neighbour)) - continue; + if (to_the_left != (neighbour_x < mid_x)) continue; + if (!part->VOverlaps(*neighbour)) continue; if (!part->TypesMatch(*neighbour)) continue; // Only match to other vertical text. int dist = to_the_left ? left - neighbour_right : neighbour_left - right; @@ -1283,8 +1256,7 @@ void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, } // For vertical partitions, the upper partner is to the left, and lower is // to the right. - if (best_neighbour != nullptr) - part->AddPartner(to_the_left, best_neighbour); + if (best_neighbour != nullptr) part->AddPartner(to_the_left, best_neighbour); } // For every ColPartition with multiple partners in the grid, reduces the @@ -1299,15 +1271,14 @@ void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { - part->RefinePartners(static_cast(type), - get_desperate, this); + part->RefinePartners(static_cast(type), get_desperate, + this); // Iterator may have been messed up by a merge. gsearch.RepositionIterator(); } } } - // ========================== PRIVATE CODE ======================== // Finds and returns a list of candidate ColPartitions to merge with part. @@ -1325,8 +1296,7 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition* part, rsearch.StartRectSearch(search_box); ColPartition* candidate; while ((candidate = rsearch.NextRectSearch()) != nullptr) { - if (!OKMergeCandidate(part, candidate, debug)) - continue; + if (!OKMergeCandidate(part, candidate, debug)) continue; const TBOX& c_box = candidate->bounding_box(); // Candidate seems to be a potential merge with part. If one contains // the other, then the merge is a no-brainer. Otherwise, search the @@ -1364,8 +1334,10 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition* part, } if (neighbour != nullptr) { if (debug) { - tprintf("Combined box overlaps another that is not OK despite" - " allowance of %d:", ok_overlap); + tprintf( + "Combined box overlaps another that is not OK despite" + " allowance of %d:", + ok_overlap); neighbour->bounding_box().print(); tprintf("Reason:"); OKMergeCandidate(part, neighbour, true); @@ -1397,10 +1369,8 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition* part, // nontext_map, which is used to prevent the spread of text neighbourhoods // into images. // Returns true if the partition was changed. -bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, - const TBOX& im_box, - const FCOORD& rerotation, - bool debug, +bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, const TBOX& im_box, + const FCOORD& rerotation, bool debug, ColPartition* part) { const TBOX& part_box = part->bounding_box(); if (debug) { @@ -1418,8 +1388,7 @@ bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, int dist; BlobNeighbourDir dir = static_cast(d); BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box, - rerotation, debug, *part, - &dist); + rerotation, debug, *part, &dist); if (debug) { tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist); } @@ -1435,7 +1404,7 @@ bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, if (best_dist > max_dist) return false; // Too far away to set the type with it. if (part->flow() == BTFT_STRONG_CHAIN && !all_image) { - return false; // We are not modifying it. + return false; // We are not modifying it. } BlobRegionType new_type = part->blob_type(); BlobTextFlowType new_flow = part->flow(); @@ -1467,10 +1436,8 @@ bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, // except direction. Also setup dist_scaling to weight x,y distances according // to the given direction. static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, - const TBOX& part_box, - int min_padding, - TBOX* search_box, - ICOORD* dist_scaling) { + const TBOX& part_box, int min_padding, + TBOX* search_box, ICOORD* dist_scaling) { *search_box = part_box; // Generate a pad value based on the min dimension of part_box, but at least // min_padding and then scaled by kMaxPadFactor. @@ -1522,21 +1489,20 @@ enum NeighbourPartitionType { // and the distance of the collection. If there are any pixels in the // nontext_map, then the decision is biased towards image. BlobRegionType ColPartitionGrid::SmoothInOneDirection( - BlobNeighbourDir direction, Pix* nontext_map, - const TBOX& im_box, const FCOORD& rerotation, - bool debug, const ColPartition& part, int* best_distance) { + BlobNeighbourDir direction, Pix* nontext_map, const TBOX& im_box, + const FCOORD& rerotation, bool debug, const ColPartition& part, + int* best_distance) { // Set up a rectangle search bounded by the part. const TBOX& part_box = part.bounding_box(); TBOX search_box; ICOORD dist_scaling; - ComputeSearchBoxAndScaling(direction, part_box, gridsize(), - &search_box, &dist_scaling); - bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box, - rerotation, - nontext_map) > 0; + ComputeSearchBoxAndScaling(direction, part_box, gridsize(), &search_box, + &dist_scaling); + bool image_region = ImageFind::CountPixelsInRotatedBox( + search_box, im_box, rerotation, nontext_map) > 0; GenericVector dists[NPT_COUNT]; - AccumulatePartDistances(part, dist_scaling, search_box, - nontext_map, im_box, rerotation, debug, dists); + AccumulatePartDistances(part, dist_scaling, search_box, nontext_map, im_box, + rerotation, debug, dists); // By iteratively including the next smallest distance across the vectors, // (as in a merge sort) we can use the vector indices as counts of each type // and find the nearest set of objects that give us a definite decision. @@ -1562,18 +1528,16 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection( *best_distance = min_dist; if (debug) { tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n", - counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], - counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], - counts[NPT_IMAGE], image_bias, min_dist); + counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT], + counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE], image_bias, min_dist); } // See if we have a decision yet. int image_count = counts[NPT_IMAGE]; int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - - (image_count + counts[NPT_WEAK_VTEXT]); + (image_count + counts[NPT_WEAK_VTEXT]); int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - - (image_count + counts[NPT_WEAK_HTEXT]); - if (image_count > 0 && - image_bias - htext_score >= kSmoothDecisionMargin && + (image_count + counts[NPT_WEAK_HTEXT]); + if (image_count > 0 && image_bias - htext_score >= kSmoothDecisionMargin && image_bias - vtext_score >= kSmoothDecisionMargin) { *best_distance = dists[NPT_IMAGE][0]; if (!dists[NPT_WEAK_VTEXT].empty() && @@ -1589,7 +1553,7 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection( *best_distance = dists[NPT_HTEXT][0]; return BRT_TEXT; } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && - counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) { + counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) { *best_distance = dists[NPT_VTEXT][0]; return BRT_VERT_TEXT; } @@ -1604,14 +1568,10 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection( // The nontext_map (+im_box, rerotation) is used to make text invisible if // there is non-text in between. // dists must be an array of GenericVectors of size NPT_COUNT. -void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part, - const ICOORD& dist_scaling, - const TBOX& search_box, - Pix* nontext_map, - const TBOX& im_box, - const FCOORD& rerotation, - bool debug, - GenericVector* dists) { +void ColPartitionGrid::AccumulatePartDistances( + const ColPartition& base_part, const ICOORD& dist_scaling, + const TBOX& search_box, Pix* nontext_map, const TBOX& im_box, + const FCOORD& rerotation, bool debug, GenericVector* dists) { const TBOX& part_box = base_part.bounding_box(); ColPartitionGridSearch rsearch(this); rsearch.SetUniqueMode(true); @@ -1621,8 +1581,7 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part, // on the other side of a tab vector. while ((neighbour = rsearch.NextRectSearch()) != nullptr) { if (neighbour->IsUnMergeableType() || - !base_part.ConfirmNoTabViolation(*neighbour) || - neighbour == &base_part) + !base_part.ConfirmNoTabViolation(*neighbour) || neighbour == &base_part) continue; TBOX nbox = neighbour->bounding_box(); BlobRegionType n_type = neighbour->blob_type(); @@ -1634,10 +1593,9 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part, continue; // Don't use horizontal lines as neighbours. int x_gap = std::max(part_box.x_gap(nbox), 0); int y_gap = std::max(part_box.y_gap(nbox), 0); - int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y(); + int n_dist = x_gap * dist_scaling.x() + y_gap * dist_scaling.y(); if (debug) { - tprintf("Part has x-gap=%d, y=%d, dist=%d at:", - x_gap, y_gap, n_dist); + tprintf("Part has x-gap=%d, y=%d, dist=%d at:", x_gap, y_gap, n_dist); nbox.print(); } // Truncate the number of boxes, so text doesn't get too much advantage. @@ -1665,15 +1623,13 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part, if (debug) tprintf("Image %d\n", n_boxes); } if (count_vector != nullptr) { - for (int i = 0; i < n_boxes; ++i) - count_vector->push_back(n_dist); + for (int i = 0; i < n_boxes; ++i) count_vector->push_back(n_dist); } if (debug) { neighbour->Print(); } } - for (int i = 0; i < NPT_COUNT; ++i) - dists[i].sort(); + for (int i = 0; i < NPT_COUNT; ++i) dists[i].sort(); } // Improves the margins of the part ColPartition by searching for @@ -1690,11 +1646,9 @@ void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns, int right_margin = tright().x(); if (columns != nullptr) { ColPartition* column = columns->ColumnContaining(box.left(), y); - if (column != nullptr) - left_margin = column->LeftAtY(y); + if (column != nullptr) left_margin = column->LeftAtY(y); column = columns->ColumnContaining(box.right(), y); - if (column != nullptr) - right_margin = column->RightAtY(y); + if (column != nullptr) right_margin = column->RightAtY(y); } left_margin -= kColumnWidthFactor; right_margin += kColumnWidthFactor; @@ -1729,21 +1683,18 @@ int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, TBOX box = part->bounding_box(); int min_overlap = std::min(height, static_cast(box.height())); min_overlap = static_cast(min_overlap * kMarginOverlapFraction + 0.5); - int y_overlap = std::min(y_top, static_cast(box.top())) - std::max(y_bottom, static_cast(box.bottom())); - if (y_overlap < min_overlap) - continue; + int y_overlap = std::min(y_top, static_cast(box.top())) - + std::max(y_bottom, static_cast(box.bottom())); + if (y_overlap < min_overlap) continue; // Must be going the right way. int x_edge = right_to_left ? box.right() : box.left(); - if ((x_edge < x) != right_to_left) - continue; + if ((x_edge < x) != right_to_left) continue; // If we have gone past x_limit, then x_limit will do. - if ((x_edge < x_limit) == right_to_left) - break; + if ((x_edge < x_limit) == right_to_left) break; // It reduces x limit, so save the new one. x_limit = x_edge; } return x_limit; } - } // namespace tesseract. diff --git a/src/textord/colpartitiongrid.h b/src/textord/colpartitiongrid.h index 5b70702d2b..0f2f220c68 100644 --- a/src/textord/colpartitiongrid.h +++ b/src/textord/colpartitiongrid.h @@ -30,9 +30,8 @@ class TabFind; // ColPartitionGrid is a BBGrid of ColPartition. // It collects functions that work on the grid. -class ColPartitionGrid : public BBGrid { +class ColPartitionGrid + : public BBGrid { public: ColPartitionGrid() = default; ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); @@ -82,8 +81,8 @@ class ColPartitionGrid : public BBGrid* confirm_cb, + TessResultCallback2* + confirm_cb, int* overlap_increase); // Split partitions where it reduces overlap between their bounding boxes. @@ -146,8 +145,8 @@ class ColPartitionGrid : public BBGrid* dists); + const TBOX& search_box, Pix* nontext_map, + const TBOX& im_box, const FCOORD& rerotation, + bool debug, GenericVector* dists); // Improves the margins of the ColPartition by searching for // neighbours that vertically overlap significantly. @@ -244,8 +236,8 @@ class ColPartitionGrid : public BBGridColumnContains(x, y)) - return part; + if (part->ColumnContains(x, y)) return part; } return nullptr; } @@ -95,8 +94,7 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb, // to improve this. for (int i = 0; i < set_size; ++i) { ColPartitionSet* column_set = src_sets->get(i); - if (column_set == nullptr) - continue; + if (column_set == nullptr) continue; // Iterate over the parts in this and column_set, adding bigger or // new parts in column_set to this. ColPartition_IT part_it(&parts_); @@ -132,7 +130,7 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb, int col_box_left = col_part->BoxLeftKey(); bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right)); bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right)); - if (tab_width_ok || (!part_width_ok )) { + if (tab_width_ok || (!part_width_ok)) { // The tab is leaving the good column metric at least as good as // it was before, so use the tab. part->CopyLeftTab(*col_part, false); @@ -153,7 +151,7 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb, int col_box_right = col_part->BoxRightKey(); bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right)); bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right)); - if (tab_width_ok || (!part_width_ok )) { + if (tab_width_ok || (!part_width_ok)) { // The tab is leaving the good column metric at least as good as // it was before, so use the tab. part->CopyRightTab(*col_part, false); @@ -197,25 +195,22 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets, if (good_coverage_ == columns->good_coverage_) { better = good_column_count_ > columns->good_column_count_; if (good_column_count_ == columns->good_column_count_) { - better = bad_coverage_ > columns->bad_coverage_; + better = bad_coverage_ > columns->bad_coverage_; } } if (better) { // The new one is better so add it. - if (debug) - tprintf("Good one\n"); + if (debug) tprintf("Good one\n"); column_sets->insert(this, i); return; } if (columns->CompatibleColumns(false, this, cb)) { - if (debug) - tprintf("Duplicate\n"); + if (debug) tprintf("Duplicate\n"); delete this; return; // It is not unique. } } - if (debug) - tprintf("Added to end\n"); + if (debug) tprintf("Added to end\n"); column_sets->push_back(this); } @@ -229,8 +224,7 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, other->Print(); } if (other->parts_.empty()) { - if (debug) - tprintf("CompatibleColumns true due to empty other\n"); + if (debug) tprintf("CompatibleColumns true due to empty other\n"); return true; } ColPartition_IT it(&other->parts_); @@ -263,7 +257,7 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, return false; // Partition with a good width must be in a single column. } - ColPartition_IT it2= it; + ColPartition_IT it2 = it; while (!it2.at_last()) { it2.forward(); ColPartition* next_part = it2.data(); @@ -286,8 +280,8 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, if (debug) { int next_right = next_part->bounding_box().right(); tprintf("CompatibleColumns false due to 2 parts of good width\n"); - tprintf("part1 %d-%d, part2 %d-%d\n", - left, right, next_left, next_right); + tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left, + next_right); right_col->Print(); } return false; @@ -296,8 +290,7 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, break; } } - if (debug) - tprintf("CompatibleColumns true!\n"); + if (debug) tprintf("CompatibleColumns true!\n"); return true; } @@ -320,8 +313,7 @@ int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) { // is outside. int x = (box.left() + box.right()) / 2; ColPartition* col = ColumnContaining(x, y); - if (col == nullptr) - total_width += box.width(); + if (col == nullptr) total_width += box.width(); } } return total_width; @@ -331,14 +323,12 @@ int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) { // having legal individual partitions and non-overlapping adjacent pairs. bool ColPartitionSet::LegalColumnCandidate() { ColPartition_IT it(&parts_); - if (it.empty()) - return false; + if (it.empty()) return false; bool any_text_parts = false; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* part = it.data(); if (BLOBNBOX::IsTextType(part->blob_type())) { - if (!part->IsLegal()) - return false; // Individual partition is illegal. + if (!part->IsLegal()) return false; // Individual partition is illegal. any_text_parts = true; } if (!it.at_last()) { @@ -362,14 +352,13 @@ ColPartitionSet* ColPartitionSet::Copy(bool good_only) { (!good_only || part->good_width() || part->good_column())) dest_it.add_after_then_move(part->ShallowCopy()); } - if (dest_it.empty()) - return nullptr; + if (dest_it.empty()) return nullptr; return new ColPartitionSet(©_parts); } // Return the bounding boxes of columns at the given y-range void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, - ColSegment_LIST *segments) { + ColSegment_LIST* segments) { ColPartition_IT it(&parts_); ColSegment_IT col_it(segments); col_it.move_to_last(); @@ -377,7 +366,7 @@ void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, ColPartition* part = it.data(); ICOORD bot_left(part->LeftAtY(y_top), y_bottom); ICOORD top_right(part->RightAtY(y_bottom), y_top); - ColSegment *col_seg = new ColSegment(); + ColSegment* col_seg = new ColSegment(); col_seg->InsertBox(TBOX(bot_left, top_right)); col_it.add_after_then_move(col_seg); } @@ -403,14 +392,9 @@ void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, // Column indices are 2n + 1 for real columns (0 based) and even values // represent the gaps in between columns, with 0 being left of the leftmost. // resolution refers to the ppi resolution of the image. -ColumnSpanningType ColPartitionSet::SpanningType(int resolution, - int left, int right, - int height, int y, - int left_margin, - int right_margin, - int* first_col, - int* last_col, - int* first_spanned_col) { +ColumnSpanningType ColPartitionSet::SpanningType( + int resolution, int left, int right, int height, int y, int left_margin, + int right_margin, int* first_col, int* last_col, int* first_spanned_col) { *first_col = -1; *last_col = -1; *first_spanned_col = -1; @@ -445,8 +429,7 @@ ColumnSpanningType ColPartitionSet::SpanningType(int resolution, } if (right_margin >= part->RightAtY(y)) { // It completely spans this column. - if (margin_columns == 0) - *first_spanned_col = col_index; + if (margin_columns == 0) *first_spanned_col = col_index; ++margin_columns; } *last_col = col_index; @@ -458,8 +441,7 @@ ColumnSpanningType ColPartitionSet::SpanningType(int resolution, // It started in between the previous column and the current column. *first_col = col_index - 1; } - if (margin_columns == 0) - *first_spanned_col = col_index; + if (margin_columns == 0) *first_spanned_col = col_index; *last_col = col_index; } else if (right < part->LeftAtY(y)) { // We have gone past the end. @@ -471,10 +453,8 @@ ColumnSpanningType ColPartitionSet::SpanningType(int resolution, break; } } - if (*first_col < 0) - *first_col = col_index - 1; // The last in-between. - if (*last_col < 0) - *last_col = col_index - 1; // The last in-between. + if (*first_col < 0) *first_col = col_index - 1; // The last in-between. + if (*last_col < 0) *last_col = col_index - 1; // The last in-between. ASSERT_HOST(*first_col >= 0 && *last_col >= 0); ASSERT_HOST(*first_col <= *last_col); if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) { @@ -499,8 +479,7 @@ ColumnSpanningType ColPartitionSet::SpanningType(int resolution, // As ColPartitions are turned into BLOCKs, the used ones are put in // used_parts, as they still need to be referenced in the grid. void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft, - const ICOORD& tright, - int resolution, + const ICOORD& tright, int resolution, ColPartition_LIST* used_parts, WorkingPartSet_LIST* working_set_list) { // Move the input list to a temporary location so we can delete its elements @@ -524,17 +503,15 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft, ((working_set = src_it.data())->column() == nullptr || working_set->column()->right_key() <= column->left_key())) { src_it.extract(); - working_set->ExtractCompletedBlocks(bleft, tright, resolution, - used_parts, &completed_blocks, - &to_blocks); + working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, + &completed_blocks, &to_blocks); delete working_set; src_it.forward(); } // Make a new between-column WorkingSet for before the current column. working_set = new WorkingPartSet(nullptr); dest_it.add_after_then_move(working_set); - if (first_new_set == nullptr) - first_new_set = working_set; + if (first_new_set == nullptr) first_new_set = working_set; // A matching column gets to stay, and first_new_set gets all the // completed_sets. working_set = src_it.empty() ? nullptr : src_it.data(); @@ -554,17 +531,15 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft, // Complete any remaining src working sets. while (!src_it.empty()) { working_set = src_it.extract(); - working_set->ExtractCompletedBlocks(bleft, tright, resolution, - used_parts, &completed_blocks, - &to_blocks); + working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, + &completed_blocks, &to_blocks); delete working_set; src_it.forward(); } // Make a new between-column WorkingSet for after the last column. working_set = new WorkingPartSet(nullptr); dest_it.add_after_then_move(working_set); - if (first_new_set == nullptr) - first_new_set = working_set; + if (first_new_set == nullptr) first_new_set = working_set; // The first_new_set now gets any accumulated completed_parts/blocks. first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); } @@ -593,11 +568,12 @@ void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width, // Provide debug output for this ColPartitionSet and all the ColPartitions. void ColPartitionSet::Print() { ColPartition_IT it(&parts_); - tprintf("Partition set of %d parts, %d good, coverage=%d+%d" - " (%d,%d)->(%d,%d)\n", - it.length(), good_column_count_, good_coverage_, bad_coverage_, - bounding_box_.left(), bounding_box_.bottom(), - bounding_box_.right(), bounding_box_.top()); + tprintf( + "Partition set of %d parts, %d good, coverage=%d+%d" + " (%d,%d)->(%d,%d)\n", + it.length(), good_column_count_, good_coverage_, bad_coverage_, + bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(), + bounding_box_.top()); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* part = it.data(); part->Print(); @@ -656,10 +632,8 @@ void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) { good_coverage_ += coverage; good_column_count_ += 2; } else { - if (part.blob_type() < BRT_UNKNOWN) - coverage /= 2; - if (part.good_column()) - ++good_column_count_; + if (part.blob_type() < BRT_UNKNOWN) coverage /= 2; + if (part.good_column()) ++good_column_count_; bad_coverage_ += coverage; } } diff --git a/src/textord/colpartitionset.h b/src/textord/colpartitionset.h index ccf3dc277f..caaf3634a0 100644 --- a/src/textord/colpartitionset.h +++ b/src/textord/colpartitionset.h @@ -46,15 +46,9 @@ class ColPartitionSet : public ELIST_LINK { ~ColPartitionSet() = default; // Simple accessors. - const TBOX& bounding_box() const { - return bounding_box_; - } - bool Empty() const { - return parts_.empty(); - } - int ColumnCount() const { - return parts_.length(); - } + const TBOX& bounding_box() const { return bounding_box_; } + bool Empty() const { return parts_.empty(); } + int ColumnCount() const { return parts_.length(); } // Returns the number of columns of good width. int GoodColumnCount() const; @@ -62,11 +56,12 @@ class ColPartitionSet : public ELIST_LINK { // Return an element of the parts_ list from its index. ColPartition* GetColumnByIndex(int index); - // Return the ColPartition that contains the given coords, if any, else nullptr. + // Return the ColPartition that contains the given coords, if any, else + // nullptr. ColPartition* ColumnContaining(int x, int y); // Return the bounding boxes of columns at the given y-range - void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments); + void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST* segments); // Extract all the parts from the list, relinquishing ownership. void RelinquishParts(); @@ -105,11 +100,10 @@ class ColPartitionSet : public ELIST_LINK { // represent the gaps in between columns, with 0 being left of the leftmost. // resolution refers to the ppi resolution of the image. It may be 0 if only // the first_col and last_col are required. - ColumnSpanningType SpanningType(int resolution, - int left, int right, int height, int y, - int left_margin, int right_margin, - int* first_col, int* last_col, - int* first_spanned_col); + ColumnSpanningType SpanningType(int resolution, int left, int right, + int height, int y, int left_margin, + int right_margin, int* first_col, + int* last_col, int* first_spanned_col); // The column_set has changed. Close down all in-progress WorkingPartSets in // columns that do not match and start new ones for the new columns in this. diff --git a/src/textord/devanagari_processing.cpp b/src/textord/devanagari_processing.cpp index 7b2c722455..507ffb9093 100644 --- a/src/textord/devanagari_processing.cpp +++ b/src/textord/devanagari_processing.cpp @@ -49,9 +49,7 @@ ShiroRekhaSplitter::ShiroRekhaSplitter() { ocr_split_strategy_ = NO_SPLIT; } -ShiroRekhaSplitter::~ShiroRekhaSplitter() { - Clear(); -} +ShiroRekhaSplitter::~ShiroRekhaSplitter() { Clear(); } void ShiroRekhaSplitter::Clear() { pixDestroy(&orig_pix_); @@ -78,8 +76,8 @@ void ShiroRekhaSplitter::set_orig_pix(Pix* pix) { // page segmentation. This mode uses the flag // pageseg_devanagari_split_strategy to determine the splitting strategy. bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa* pixa_debug) { - SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : - ocr_split_strategy_; + SplitStrategy split_strategy = + split_for_pageseg ? pageseg_split_strategy_ : ocr_split_strategy_; if (split_strategy == NO_SPLIT) { return false; // Nothing to do. } @@ -143,11 +141,11 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa* pixa_debug) { // larger graphemes. if (xheight == kUnspecifiedXheight || (box->w > xheight / 3 && box->h > xheight / 2)) { - SplitWordShiroRekha(split_strategy, word_pix, xheight, - box->x, box->y, regions_to_clear); + SplitWordShiroRekha(split_strategy, word_pix, xheight, box->x, box->y, + regions_to_clear); } else if (devanagari_split_debuglevel > 0) { - tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", - box->x, box->y, box->w, box->h); + tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", box->x, box->y, + box->w, box->h); } pixDestroy(&word_pix); } @@ -179,10 +177,8 @@ int ShiroRekhaSplitter::GetXheightForCC(Box* cc_bbox) { return global_xheight_; } // Compute the box coordinates in Tesseract's coordinate system. - TBOX bbox(cc_bbox->x, - pixGetHeight(orig_pix_) - cc_bbox->y - cc_bbox->h - 1, - cc_bbox->x + cc_bbox->w, - pixGetHeight(orig_pix_) - cc_bbox->y - 1); + TBOX bbox(cc_bbox->x, pixGetHeight(orig_pix_) - cc_bbox->y - cc_bbox->h - 1, + cc_bbox->x + cc_bbox->w, pixGetHeight(orig_pix_) - cc_bbox->y - 1); // Iterate over all blocks. BLOCK_IT block_it(segmentation_block_list_); for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { @@ -202,8 +198,7 @@ int ShiroRekhaSplitter::GetXheightForCC(Box* cc_bbox) { // row. In that case, return the xheight for this row. float box_middle = 0.5 * (bbox.left() + bbox.right()); int baseline = static_cast(row->base_line(box_middle) + 0.5); - TBOX test_box(box_middle - row->x_height() / 2, - baseline, + TBOX test_box(box_middle - row->x_height() / 2, baseline, box_middle + row->x_height() / 2, static_cast(baseline + row->x_height())); // Compute overlap. If it is is a major overlap, this is the right row. @@ -233,10 +228,8 @@ int ShiroRekhaSplitter::GetXheightForCC(Box* cc_bbox) { // conservative multiplier on approximate stroke width is used (which may lead // to over-splitting). void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, - Pix* pix, - int xheight, - int word_left, - int word_top, + Pix* pix, int xheight, + int word_left, int word_top, Boxa* regions_to_clear) { if (split_strategy == NO_SPLIT) { return; @@ -273,8 +266,8 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, // Clear the ascender and descender regions of the word. // Obtain a vertical projection histogram for the resulting image. - Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, - width, 5 * stroke_width / 3); + Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, + 5 * stroke_width / 3); Pix* word_in_xheight = pixCopy(nullptr, pix); pixClearInRect(word_in_xheight, box_to_clear); // Also clear any pixels which are below shirorekha_bottom + some leeway. @@ -313,8 +306,7 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, while (i < width) { if (!vert_hist.hist()[i]) { int j = 0; - while (i + j < width && !vert_hist.hist()[i+j]) - ++j; + while (i + j < width && !vert_hist.hist()[i + j]) ++j; if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) { // Perform a shiro-rekha split. The intervening region lies from i to // i+j-1. @@ -329,8 +321,7 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Box* box_to_clear = boxCreate(word_left + split_left, word_top + shirorekha_top - stroke_width / 3, - split_width, - 5 * stroke_width / 3); + split_width, 5 * stroke_width / 3); if (box_to_clear) { boxaAddBox(regions_to_clear, box_to_clear, L_CLONE); // Mark this in the debug image if needed. @@ -364,10 +355,10 @@ void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs( } C_BLOB_LIST not_found_blobs; - RefreshWordBlobsFromNewBlobs(segmentation_block_list_, - new_blobs, - ((devanagari_split_debugimage && debug_image_) ? - ¬_found_blobs : nullptr)); + RefreshWordBlobsFromNewBlobs( + segmentation_block_list_, new_blobs, + ((devanagari_split_debugimage && debug_image_) ? ¬_found_blobs + : nullptr)); if (devanagari_split_debuglevel > 0) { tprintf("After refreshing blobs:\n"); @@ -440,8 +431,7 @@ void ShiroRekhaSplitter::GetShiroRekhaYExtents(Pix* word_pix, int thresh = (topline_onpixel_count * 70) / 100; int ulimit = topline_ylevel; int llimit = topline_ylevel; - while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) - --ulimit; + while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) --ulimit; while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) ++llimit; @@ -473,14 +463,12 @@ void PixelHistogram::ConstructVerticalCountHist(Pix* pix) { hist_ = new int[width]; length_ = width; int wpl = pixGetWpl(pix); - l_uint32 *data = pixGetData(pix); - for (int i = 0; i < width; ++i) - hist_[i] = 0; + l_uint32* data = pixGetData(pix); + for (int i = 0; i < width; ++i) hist_[i] = 0; for (int i = 0; i < height; ++i) { - l_uint32 *line = data + i * wpl; + l_uint32* line = data + i * wpl; for (int j = 0; j < width; ++j) - if (GET_DATA_BIT(line, j)) - ++(hist_[j]); + if (GET_DATA_BIT(line, j)) ++(hist_[j]); } } diff --git a/src/textord/devanagari_processing.h b/src/textord/devanagari_processing.h index cca0bd625a..ebf03c97d0 100644 --- a/src/textord/devanagari_processing.h +++ b/src/textord/devanagari_processing.h @@ -22,13 +22,12 @@ struct Pix; struct Box; struct Boxa; -extern -INT_VAR_H(devanagari_split_debuglevel, 0, - "Debug level for split shiro-rekha process."); +extern INT_VAR_H(devanagari_split_debuglevel, 0, + "Debug level for split shiro-rekha process."); -extern -BOOL_VAR_H(devanagari_split_debugimage, 0, - "Whether to create a debug image for split shiro-rekha process."); +extern BOOL_VAR_H( + devanagari_split_debugimage, 0, + "Whether to create a debug image for split shiro-rekha process."); class TBOX; @@ -41,9 +40,7 @@ class PixelHistogram { length_ = 0; } - ~PixelHistogram() { - Clear(); - } + ~PixelHistogram() { Clear(); } void Clear() { delete[] hist_; @@ -52,9 +49,7 @@ class PixelHistogram { int* hist() const { return hist_; } - int length() const { - return length_; - } + int length() const { return length_; } // Methods to construct histograms from images. These clear any existing data. void ConstructVerticalCountHist(Pix* pix); @@ -108,33 +103,23 @@ class ShiroRekhaSplitter { static const int kUnspecifiedXheight = -1; - void set_global_xheight(int xheight) { - global_xheight_ = xheight; - } + void set_global_xheight(int xheight) { global_xheight_ = xheight; } - void set_perform_close(bool perform) { - perform_close_ = perform; - } + void set_perform_close(bool perform) { perform_close_ = perform; } // Returns the image obtained from shiro-rekha splitting. The returned object // is owned by this class. Callers may want to clone the returned pix to keep // it alive beyond the life of ShiroRekhaSplitter object. - Pix* splitted_image() { - return splitted_image_; - } + Pix* splitted_image() { return splitted_image_; } // On setting the input image, a clone of it is owned by this class. void set_orig_pix(Pix* pix); // Returns the input image provided to the object. This object is owned by // this class. Callers may want to clone the returned pix to work with it. - Pix* orig_pix() { - return orig_pix_; - } + Pix* orig_pix() { return orig_pix_; } - SplitStrategy ocr_split_strategy() const { - return ocr_split_strategy_; - } + SplitStrategy ocr_split_strategy() const { return ocr_split_strategy_; } void set_ocr_split_strategy(SplitStrategy strategy) { ocr_split_strategy_ = strategy; @@ -148,9 +133,7 @@ class ShiroRekhaSplitter { pageseg_split_strategy_ = strategy; } - BLOCK_LIST* segmentation_block_list() { - return segmentation_block_list_; - } + BLOCK_LIST* segmentation_block_list() { return segmentation_block_list_; } // This method returns the computed mode-height of blobs in the pix. // It also prunes very small blobs from calculation. Could be used to provide @@ -174,12 +157,8 @@ class ShiroRekhaSplitter { // conservative estimate of stroke width along with an associated multiplier // is used in its place. It is advisable to have a specified xheight when // splitting for classification/training. - void SplitWordShiroRekha(SplitStrategy split_strategy, - Pix* pix, - int xheight, - int word_left, - int word_top, - Boxa* regions_to_clear); + void SplitWordShiroRekha(SplitStrategy split_strategy, Pix* pix, int xheight, + int word_left, int word_top, Boxa* regions_to_clear); // Returns a new box object for the corresponding TBOX, based on the original // image's coordinate system. @@ -187,14 +166,13 @@ class ShiroRekhaSplitter { // This method returns y-extents of the shiro-rekha computed from the input // word image. - static void GetShiroRekhaYExtents(Pix* word_pix, - int* shirorekha_top, + static void GetShiroRekhaYExtents(Pix* word_pix, int* shirorekha_top, int* shirorekha_bottom, int* shirorekha_ylevel); - Pix* orig_pix_; // Just a clone of the input image passed. - Pix* splitted_image_; // Image produced after the last splitting round. The - // object is owned by this class. + Pix* orig_pix_; // Just a clone of the input image passed. + Pix* splitted_image_; // Image produced after the last splitting round. The + // object is owned by this class. SplitStrategy pageseg_split_strategy_; SplitStrategy ocr_split_strategy_; Pix* debug_image_; diff --git a/src/textord/drawedg.cpp b/src/textord/drawedg.cpp index 0a429b5483..bdbbdb6d5b 100644 --- a/src/textord/drawedg.cpp +++ b/src/textord/drawedg.cpp @@ -18,7 +18,7 @@ * **********************************************************************/ -#include "drawedg.h" +#include "drawedg.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -28,10 +28,10 @@ #ifndef GRAPHICS_DISABLED /** title of window */ -#define IMAGE_WIN_NAME "Edges" -#define IMAGE_XPOS 250 +#define IMAGE_WIN_NAME "Edges" +#define IMAGE_XPOS 250 /** default position */ -#define IMAGE_YPOS 0 +#define IMAGE_YPOS 0 #define EXTERN @@ -43,14 +43,14 @@ */ ScrollView* create_edges_window(ICOORD page_tr) { - ScrollView* image_win; //image window + ScrollView* image_win; // image window - //create the window - image_win = new ScrollView (IMAGE_WIN_NAME, IMAGE_XPOS, IMAGE_YPOS, 0, 0, page_tr.x (), page_tr.y ()); - return image_win; //window + // create the window + image_win = new ScrollView(IMAGE_WIN_NAME, IMAGE_XPOS, IMAGE_YPOS, 0, 0, + page_tr.x(), page_tr.y()); + return image_win; // window } - /** * @name draw_raw_edge * @@ -60,24 +60,22 @@ ScrollView* create_edges_window(ICOORD page_tr) { * @param colour colour to draw in */ -void draw_raw_edge(ScrollView* fd, - CRACKEDGE *start, - ScrollView::Color colour) { - CRACKEDGE *edgept; //current point +void draw_raw_edge(ScrollView* fd, CRACKEDGE* start, ScrollView::Color colour) { + CRACKEDGE* edgept; // current point fd->Pen(colour); edgept = start; - fd->SetCursor(edgept->pos.x (), edgept->pos.y ()); + fd->SetCursor(edgept->pos.x(), edgept->pos.y()); do { do - edgept = edgept->next; - //merge straight lines - while (edgept != start && edgept->prev->stepx == edgept->stepx && edgept->prev->stepy == edgept->stepy); + edgept = edgept->next; + // merge straight lines + while (edgept != start && edgept->prev->stepx == edgept->stepx && + edgept->prev->stepy == edgept->stepy); - //draw lines - fd->DrawTo(edgept->pos.x (), edgept->pos.y ()); - } - while (edgept != start); + // draw lines + fd->DrawTo(edgept->pos.x(), edgept->pos.y()); + } while (edgept != start); } #endif // GRAPHICS_DISABLED diff --git a/src/textord/drawedg.h b/src/textord/drawedg.h index ef5ed5e202..6a65cdcb07 100644 --- a/src/textord/drawedg.h +++ b/src/textord/drawedg.h @@ -18,20 +18,20 @@ * **********************************************************************/ -#ifndef DRAWEDG_H -#define DRAWEDG_H +#ifndef DRAWEDG_H +#define DRAWEDG_H #ifndef GRAPHICS_DISABLED -#include "scrollview.h" -#include "crakedge.h" +#include "crakedge.h" +#include "scrollview.h" -ScrollView* create_edges_window( //make window - ICOORD page_tr //size of image - ); -void draw_raw_edge( //draw the cracks - ScrollView* fd, //window to draw in - CRACKEDGE *start, //start of loop - ScrollView::Color colour //colour to draw in - ); +ScrollView* create_edges_window( // make window + ICOORD page_tr // size of image +); +void draw_raw_edge( // draw the cracks + ScrollView* fd, // window to draw in + CRACKEDGE* start, // start of loop + ScrollView::Color colour // colour to draw in +); #endif #endif diff --git a/src/textord/drawtord.cpp b/src/textord/drawtord.cpp index c0d1382142..0ff14b7054 100644 --- a/src/textord/drawtord.cpp +++ b/src/textord/drawtord.cpp @@ -21,19 +21,19 @@ #include "config_auto.h" #endif -#include "pithsync.h" -#include "topitch.h" -#include "drawtord.h" +#include "drawtord.h" +#include "pithsync.h" +#include "topitch.h" -#define TO_WIN_XPOS 0 //default window pos -#define TO_WIN_YPOS 0 -#define TO_WIN_NAME "Textord" - //title of window +#define TO_WIN_XPOS 0 // default window pos +#define TO_WIN_YPOS 0 +#define TO_WIN_NAME "Textord" +// title of window #define EXTERN -EXTERN BOOL_VAR (textord_show_fixed_cuts, FALSE, -"Draw fixed pitch cell boundaries"); +EXTERN BOOL_VAR(textord_show_fixed_cuts, FALSE, + "Draw fixed pitch cell boundaries"); EXTERN ScrollView* to_win = nullptr; @@ -46,13 +46,12 @@ EXTERN ScrollView* to_win = nullptr; ScrollView* create_to_win(ICOORD page_tr) { if (to_win != nullptr) return to_win; - to_win = new ScrollView(TO_WIN_NAME, TO_WIN_XPOS, TO_WIN_YPOS, - page_tr.x() + 1, page_tr.y() + 1, - page_tr.x(), page_tr.y(), true); + to_win = + new ScrollView(TO_WIN_NAME, TO_WIN_XPOS, TO_WIN_YPOS, page_tr.x() + 1, + page_tr.y() + 1, page_tr.x(), page_tr.y(), true); return to_win; } - void close_to_win() { // to_win is leaked, but this enables the user to view the contents. if (to_win != nullptr) { @@ -60,101 +59,97 @@ void close_to_win() { } } - /********************************************************************** * plot_box_list * * Draw a list of blobs. **********************************************************************/ -void plot_box_list( //make gradients win - ScrollView* win, //window to draw in - BLOBNBOX_LIST *list, //blob list - ScrollView::Color body_colour //colour to draw - ) { - BLOBNBOX_IT it = list; //iterator +void plot_box_list( // make gradients win + ScrollView* win, // window to draw in + BLOBNBOX_LIST* list, // blob list + ScrollView::Color body_colour // colour to draw +) { + BLOBNBOX_IT it = list; // iterator win->Pen(body_colour); win->Brush(ScrollView::NONE); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - it.data ()->bounding_box ().plot (win); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->bounding_box().plot(win); } } - /********************************************************************** * plot_to_row * * Draw the blobs of a row in a given colour and draw the line fit. **********************************************************************/ -void plot_to_row( //draw a row - TO_ROW *row, //row to draw - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ) { - FCOORD plot_pt; //point to plot - //blobs - BLOBNBOX_IT it = row->blob_list (); - float left, right; //end of row - - if (it.empty ()) { - tprintf ("No blobs in row at %g\n", row->parallel_c ()); +void plot_to_row( // draw a row + TO_ROW* row, // row to draw + ScrollView::Color colour, // colour to draw in + FCOORD rotation // rotation for line +) { + FCOORD plot_pt; // point to plot + // blobs + BLOBNBOX_IT it = row->blob_list(); + float left, right; // end of row + + if (it.empty()) { + tprintf("No blobs in row at %g\n", row->parallel_c()); return; } - left = it.data ()->bounding_box ().left (); - it.move_to_last (); - right = it.data ()->bounding_box ().right (); - plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN); + left = it.data()->bounding_box().left(); + it.move_to_last(); + right = it.data()->bounding_box().right(); + plot_blob_list(to_win, row->blob_list(), colour, ScrollView::BROWN); to_win->Pen(colour); - plot_pt = FCOORD (left, row->line_m () * left + row->line_c ()); - plot_pt.rotate (rotation); - to_win->SetCursor(plot_pt.x (), plot_pt.y ()); - plot_pt = FCOORD (right, row->line_m () * right + row->line_c ()); - plot_pt.rotate (rotation); - to_win->DrawTo(plot_pt.x (), plot_pt.y ()); + plot_pt = FCOORD(left, row->line_m() * left + row->line_c()); + plot_pt.rotate(rotation); + to_win->SetCursor(plot_pt.x(), plot_pt.y()); + plot_pt = FCOORD(right, row->line_m() * right + row->line_c()); + plot_pt.rotate(rotation); + to_win->DrawTo(plot_pt.x(), plot_pt.y()); } - /********************************************************************** * plot_parallel_row * * Draw the blobs of a row in a given colour and draw the line fit. **********************************************************************/ -void plot_parallel_row( //draw a row - TO_ROW *row, //row to draw - float gradient, //gradients of lines - int32_t left, //edge of block - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ) { - FCOORD plot_pt; //point to plot - //blobs - BLOBNBOX_IT it = row->blob_list (); - float fleft = (float) left; //floating version - float right; //end of row +void plot_parallel_row( // draw a row + TO_ROW* row, // row to draw + float gradient, // gradients of lines + int32_t left, // edge of block + ScrollView::Color colour, // colour to draw in + FCOORD rotation // rotation for line +) { + FCOORD plot_pt; // point to plot + // blobs + BLOBNBOX_IT it = row->blob_list(); + float fleft = (float)left; // floating version + float right; // end of row // left=it.data()->bounding_box().left(); - it.move_to_last (); - right = it.data ()->bounding_box ().right (); - plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN); + it.move_to_last(); + right = it.data()->bounding_box().right(); + plot_blob_list(to_win, row->blob_list(), colour, ScrollView::BROWN); to_win->Pen(colour); - plot_pt = FCOORD (fleft, gradient * left + row->max_y ()); - plot_pt.rotate (rotation); - to_win->SetCursor(plot_pt.x (), plot_pt.y ()); - plot_pt = FCOORD (fleft, gradient * left + row->min_y ()); - plot_pt.rotate (rotation); - to_win->DrawTo(plot_pt.x (), plot_pt.y ()); - plot_pt = FCOORD (fleft, gradient * left + row->parallel_c ()); - plot_pt.rotate (rotation); - to_win->SetCursor(plot_pt.x (), plot_pt.y ()); - plot_pt = FCOORD (right, gradient * right + row->parallel_c ()); - plot_pt.rotate (rotation); - to_win->DrawTo(plot_pt.x (), plot_pt.y ()); + plot_pt = FCOORD(fleft, gradient * left + row->max_y()); + plot_pt.rotate(rotation); + to_win->SetCursor(plot_pt.x(), plot_pt.y()); + plot_pt = FCOORD(fleft, gradient * left + row->min_y()); + plot_pt.rotate(rotation); + to_win->DrawTo(plot_pt.x(), plot_pt.y()); + plot_pt = FCOORD(fleft, gradient * left + row->parallel_c()); + plot_pt.rotate(rotation); + to_win->SetCursor(plot_pt.x(), plot_pt.y()); + plot_pt = FCOORD(right, gradient * right + row->parallel_c()); + plot_pt.rotate(rotation); + to_win->DrawTo(plot_pt.x(), plot_pt.y()); } - /********************************************************************** * draw_occupation * @@ -162,84 +157,81 @@ void plot_parallel_row( //draw a row * and points below the threshold in black. **********************************************************************/ -void -draw_occupation ( //draw projection -int32_t xleft, //edge of block -int32_t ybottom, //bottom of block -int32_t min_y, //coordinate limits -int32_t max_y, int32_t occupation[], //projection counts -int32_t thresholds[] //for drop out +void draw_occupation( // draw projection + int32_t xleft, // edge of block + int32_t ybottom, // bottom of block + int32_t min_y, // coordinate limits + int32_t max_y, + int32_t occupation[], // projection counts + int32_t thresholds[] // for drop out ) { - int32_t line_index; //pixel coord - ScrollView::Color colour; //of histogram - float fleft = (float) xleft; //float version + int32_t line_index; // pixel coord + ScrollView::Color colour; // of histogram + float fleft = (float)xleft; // float version colour = ScrollView::WHITE; to_win->Pen(colour); - to_win->SetCursor(fleft, (float) ybottom); + to_win->SetCursor(fleft, (float)ybottom); for (line_index = min_y; line_index <= max_y; line_index++) { if (occupation[line_index - min_y] < thresholds[line_index - min_y]) { if (colour != ScrollView::BLUE) { colour = ScrollView::BLUE; - to_win->Pen(colour); + to_win->Pen(colour); } - } - else { + } else { if (colour != ScrollView::WHITE) { colour = ScrollView::WHITE; - to_win->Pen(colour); + to_win->Pen(colour); } } - to_win->DrawTo(fleft + occupation[line_index - min_y] / 10.0, (float) line_index); + to_win->DrawTo(fleft + occupation[line_index - min_y] / 10.0, + (float)line_index); } - colour=ScrollView::STEEL_BLUE; + colour = ScrollView::STEEL_BLUE; to_win->Pen(colour); - to_win->SetCursor(fleft, (float) ybottom); + to_win->SetCursor(fleft, (float)ybottom); for (line_index = min_y; line_index <= max_y; line_index++) { - to_win->DrawTo(fleft + thresholds[line_index - min_y] / 10.0, (float) line_index); + to_win->DrawTo(fleft + thresholds[line_index - min_y] / 10.0, + (float)line_index); } } - /********************************************************************** * draw_meanlines * * Draw the meanlines of the given block in the given colour. **********************************************************************/ -void draw_meanlines( //draw a block - TO_BLOCK *block, //block to draw - float gradient, //gradients of lines - int32_t left, //edge of block - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ) { - FCOORD plot_pt; //point to plot - //rows - TO_ROW_IT row_it = block->get_rows (); - TO_ROW *row; //current row - BLOBNBOX_IT blob_it; //blobs - float right; //end of row +void draw_meanlines( // draw a block + TO_BLOCK* block, // block to draw + float gradient, // gradients of lines + int32_t left, // edge of block + ScrollView::Color colour, // colour to draw in + FCOORD rotation // rotation for line +) { + FCOORD plot_pt; // point to plot + // rows + TO_ROW_IT row_it = block->get_rows(); + TO_ROW* row; // current row + BLOBNBOX_IT blob_it; // blobs + float right; // end of row to_win->Pen(colour); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - blob_it.set_to_list (row->blob_list ()); - blob_it.move_to_last (); - right = blob_it.data ()->bounding_box ().right (); - plot_pt = - FCOORD ((float) left, - gradient * left + row->parallel_c () + row->xheight); - plot_pt.rotate (rotation); - to_win->SetCursor(plot_pt.x (), plot_pt.y ()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + blob_it.set_to_list(row->blob_list()); + blob_it.move_to_last(); + right = blob_it.data()->bounding_box().right(); plot_pt = - FCOORD ((float) right, - gradient * right + row->parallel_c () + row->xheight); - plot_pt.rotate (rotation); - to_win->DrawTo (plot_pt.x (), plot_pt.y ()); + FCOORD((float)left, gradient * left + row->parallel_c() + row->xheight); + plot_pt.rotate(rotation); + to_win->SetCursor(plot_pt.x(), plot_pt.y()); + plot_pt = FCOORD((float)right, + gradient * right + row->parallel_c() + row->xheight); + plot_pt.rotate(rotation); + to_win->DrawTo(plot_pt.x(), plot_pt.y()); } } - /********************************************************************** * plot_word_decisions * @@ -247,173 +239,169 @@ void draw_meanlines( //draw a block * highlighted. **********************************************************************/ -void plot_word_decisions( //draw words - ScrollView* win, //window tro draw in - int16_t pitch, //of block - TO_ROW *row //row to draw - ) { - ScrollView::Color colour = ScrollView::MAGENTA; //current colour - ScrollView::Color rect_colour; //fuzzy colour - int32_t prev_x; //end of prev blob - int16_t blob_count; //blobs in word - BLOBNBOX *blob; //current blob - TBOX blob_box; //bounding box - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT start_it = blob_it;//word start +void plot_word_decisions( // draw words + ScrollView* win, // window tro draw in + int16_t pitch, // of block + TO_ROW* row // row to draw +) { + ScrollView::Color colour = ScrollView::MAGENTA; // current colour + ScrollView::Color rect_colour; // fuzzy colour + int32_t prev_x; // end of prev blob + int16_t blob_count; // blobs in word + BLOBNBOX* blob; // current blob + TBOX blob_box; // bounding box + // iterator + BLOBNBOX_IT blob_it = row->blob_list(); + BLOBNBOX_IT start_it = blob_it; // word start rect_colour = ScrollView::BLACK; prev_x = -INT16_MAX; blob_count = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - if (!blob->joined_to_prev () - && blob_box.left () - prev_x > row->max_nonspace) { - if ((blob_box.left () - prev_x >= row->min_space - || blob_box.left () - prev_x > row->space_threshold) - && blob_count > 0) { + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + if (!blob->joined_to_prev() && + blob_box.left() - prev_x > row->max_nonspace) { + if ((blob_box.left() - prev_x >= row->min_space || + blob_box.left() - prev_x > row->space_threshold) && + blob_count > 0) { if (pitch > 0 && textord_show_fixed_cuts) - plot_fp_cells (win, colour, &start_it, pitch, blob_count, - &row->projection, row->projection_left, - row->projection_right, - row->xheight * textord_projection_scale); + plot_fp_cells(win, colour, &start_it, pitch, blob_count, + &row->projection, row->projection_left, + row->projection_right, + row->xheight * textord_projection_scale); blob_count = 0; start_it = blob_it; } if (colour == ScrollView::MAGENTA) colour = ScrollView::RED; else - colour = (ScrollView::Color) (colour + 1); - if (blob_box.left () - prev_x < row->min_space) { - if (blob_box.left () - prev_x > row->space_threshold) + colour = (ScrollView::Color)(colour + 1); + if (blob_box.left() - prev_x < row->min_space) { + if (blob_box.left() - prev_x > row->space_threshold) rect_colour = ScrollView::GOLDENROD; else rect_colour = ScrollView::CORAL; - //fill_color_index(win, rect_colour); + // fill_color_index(win, rect_colour); win->Brush(rect_colour); - win->Rectangle (prev_x, blob_box.bottom (), - blob_box.left (), blob_box.top ()); + win->Rectangle(prev_x, blob_box.bottom(), blob_box.left(), + blob_box.top()); } } - if (!blob->joined_to_prev()) - prev_x = blob_box.right(); - if (blob->cblob () != nullptr) - blob->cblob ()->plot (win, colour, colour); - if (!blob->joined_to_prev() && blob->cblob() != nullptr) - blob_count++; + if (!blob->joined_to_prev()) prev_x = blob_box.right(); + if (blob->cblob() != nullptr) blob->cblob()->plot(win, colour, colour); + if (!blob->joined_to_prev() && blob->cblob() != nullptr) blob_count++; } if (pitch > 0 && textord_show_fixed_cuts && blob_count > 0) - plot_fp_cells (win, colour, &start_it, pitch, blob_count, - &row->projection, row->projection_left, - row->projection_right, - row->xheight * textord_projection_scale); + plot_fp_cells(win, colour, &start_it, pitch, blob_count, &row->projection, + row->projection_left, row->projection_right, + row->xheight * textord_projection_scale); } - /********************************************************************** * plot_fp_cells * * Make a list of fixed pitch cuts and draw them. **********************************************************************/ -void plot_fp_cells( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - BLOBNBOX_IT *blob_it, //blobs - int16_t pitch, //of block - int16_t blob_count, //no of real blobs - STATS *projection, //vertical - int16_t projection_left, //edges //scale factor - int16_t projection_right, - float projection_scale) { - int16_t occupation; //occupied cells - TBOX word_box; //bounding box - FPSEGPT_LIST seg_list; //list of cuts +void plot_fp_cells( // draw words + ScrollView* win, // window tro draw in + ScrollView::Color colour, // colour of lines + BLOBNBOX_IT* blob_it, // blobs + int16_t pitch, // of block + int16_t blob_count, // no of real blobs + STATS* projection, // vertical + int16_t projection_left, // edges //scale factor + int16_t projection_right, float projection_scale) { + int16_t occupation; // occupied cells + TBOX word_box; // bounding box + FPSEGPT_LIST seg_list; // list of cuts FPSEGPT_IT seg_it; - FPSEGPT *segpt; //current point + FPSEGPT* segpt; // current point if (pitsync_linear_version) - check_pitch_sync2 (blob_it, blob_count, pitch, 2, projection, - projection_left, projection_right, - projection_scale, occupation, &seg_list, 0, 0); + check_pitch_sync2(blob_it, blob_count, pitch, 2, projection, + projection_left, projection_right, projection_scale, + occupation, &seg_list, 0, 0); else - check_pitch_sync (blob_it, blob_count, pitch, 2, projection, &seg_list); - word_box = blob_it->data ()->bounding_box (); - for (; blob_count > 0; blob_count--) - word_box += box_next (blob_it); - seg_it.set_to_list (&seg_list); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - segpt = seg_it.data (); + check_pitch_sync(blob_it, blob_count, pitch, 2, projection, &seg_list); + word_box = blob_it->data()->bounding_box(); + for (; blob_count > 0; blob_count--) word_box += box_next(blob_it); + seg_it.set_to_list(&seg_list); + for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { + segpt = seg_it.data(); if (segpt->faked) { - colour = ScrollView::WHITE; - win->Pen(colour); } - else { - win->Pen(colour); } - win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ()); + colour = ScrollView::WHITE; + win->Pen(colour); + } else { + win->Pen(colour); + } + win->Line(segpt->position(), word_box.bottom(), segpt->position(), + word_box.top()); } } - /********************************************************************** * plot_fp_cells2 * * Make a list of fixed pitch cuts and draw them. **********************************************************************/ -void plot_fp_cells2( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - TO_ROW *row, //for location - FPSEGPT_LIST *seg_list //segments to plot - ) { - TBOX word_box; //bounding box +void plot_fp_cells2( // draw words + ScrollView* win, // window tro draw in + ScrollView::Color colour, // colour of lines + TO_ROW* row, // for location + FPSEGPT_LIST* seg_list // segments to plot +) { + TBOX word_box; // bounding box FPSEGPT_IT seg_it = seg_list; - //blobs in row - BLOBNBOX_IT blob_it = row->blob_list (); - FPSEGPT *segpt; //current point - - word_box = blob_it.data ()->bounding_box (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();) - word_box += box_next (&blob_it); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - segpt = seg_it.data (); + // blobs in row + BLOBNBOX_IT blob_it = row->blob_list(); + FPSEGPT* segpt; // current point + + word_box = blob_it.data()->bounding_box(); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();) + word_box += box_next(&blob_it); + for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { + segpt = seg_it.data(); if (segpt->faked) { - colour = ScrollView::WHITE; - win->Pen(colour); } - else { - win->Pen(colour); } - win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ()); + colour = ScrollView::WHITE; + win->Pen(colour); + } else { + win->Pen(colour); + } + win->Line(segpt->position(), word_box.bottom(), segpt->position(), + word_box.top()); } } - /********************************************************************** * plot_row_cells * * Make a list of fixed pitch cuts and draw them. **********************************************************************/ -void plot_row_cells( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - TO_ROW *row, //for location - float xshift, //amount of shift - ICOORDELT_LIST *cells //cells to draw - ) { - TBOX word_box; //bounding box +void plot_row_cells( // draw words + ScrollView* win, // window tro draw in + ScrollView::Color colour, // colour of lines + TO_ROW* row, // for location + float xshift, // amount of shift + ICOORDELT_LIST* cells // cells to draw +) { + TBOX word_box; // bounding box ICOORDELT_IT cell_it = cells; - //blobs in row - BLOBNBOX_IT blob_it = row->blob_list (); - ICOORDELT *cell; //current cell + // blobs in row + BLOBNBOX_IT blob_it = row->blob_list(); + ICOORDELT* cell; // current cell - word_box = blob_it.data ()->bounding_box (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();) - word_box += box_next (&blob_it); + word_box = blob_it.data()->bounding_box(); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();) + word_box += box_next(&blob_it); win->Pen(colour); - for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); cell_it.forward ()) { - cell = cell_it.data (); - win->Line(cell->x () + xshift, word_box.bottom (), cell->x () + xshift, word_box.top ()); + for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) { + cell = cell_it.data(); + win->Line(cell->x() + xshift, word_box.bottom(), cell->x() + xshift, + word_box.top()); } } diff --git a/src/textord/drawtord.h b/src/textord/drawtord.h index 1e79252d0f..83f03a974f 100644 --- a/src/textord/drawtord.h +++ b/src/textord/drawtord.h @@ -17,83 +17,83 @@ * **********************************************************************/ -#ifndef DRAWTORD_H -#define DRAWTORD_H +#ifndef DRAWTORD_H +#define DRAWTORD_H -#include "params.h" -#include "scrollview.h" -#include "pitsync1.h" -#include "blobbox.h" +#include "blobbox.h" +#include "params.h" +#include "pitsync1.h" +#include "scrollview.h" -#define NO_SMD "none" +#define NO_SMD "none" -extern BOOL_VAR_H (textord_show_fixed_cuts, FALSE, -"Draw fixed pitch cell boundaries"); -extern STRING_VAR_H (to_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); -extern STRING_VAR_H (to_smdfile, NO_SMD, "Name of SMD file"); +extern BOOL_VAR_H(textord_show_fixed_cuts, FALSE, + "Draw fixed pitch cell boundaries"); +extern STRING_VAR_H(to_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); +extern STRING_VAR_H(to_smdfile, NO_SMD, "Name of SMD file"); extern ScrollView* to_win; -extern FILE *to_debug; +extern FILE* to_debug; // Creates a static display window for textord, and returns a pointer to it. ScrollView* create_to_win(ICOORD page_tr); -void close_to_win(); // Destroy the textord window. -void create_todebug_win(); //make gradients win -void plot_box_list( //make gradients win - ScrollView* win, //window to draw in - BLOBNBOX_LIST *list, //blob list - ScrollView::Color body_colour //colour to draw - ); -void plot_to_row( //draw a row - TO_ROW *row, //row to draw - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ); -void plot_parallel_row( //draw a row - TO_ROW *row, //row to draw - float gradient, //gradients of lines - int32_t left, //edge of block - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ); -void draw_occupation ( //draw projection -int32_t xleft, //edge of block -int32_t ybottom, //bottom of block -int32_t min_y, //coordinate limits -int32_t max_y, int32_t occupation[], //projection counts -int32_t thresholds[] //for drop out +void close_to_win(); // Destroy the textord window. +void create_todebug_win(); // make gradients win +void plot_box_list( // make gradients win + ScrollView* win, // window to draw in + BLOBNBOX_LIST* list, // blob list + ScrollView::Color body_colour // colour to draw +); +void plot_to_row( // draw a row + TO_ROW* row, // row to draw + ScrollView::Color colour, // colour to draw in + FCOORD rotation // rotation for line +); +void plot_parallel_row( // draw a row + TO_ROW* row, // row to draw + float gradient, // gradients of lines + int32_t left, // edge of block + ScrollView::Color colour, // colour to draw in + FCOORD rotation // rotation for line +); +void draw_occupation( // draw projection + int32_t xleft, // edge of block + int32_t ybottom, // bottom of block + int32_t min_y, // coordinate limits + int32_t max_y, + int32_t occupation[], // projection counts + int32_t thresholds[] // for drop out +); +void draw_meanlines( // draw a block + TO_BLOCK* block, // block to draw + float gradient, // gradients of lines + int32_t left, // edge of block + ScrollView::Color colour, // colour to draw in + FCOORD rotation // rotation for line +); +void plot_word_decisions( // draw words + ScrollView* win, // window tro draw in + int16_t pitch, // of block + TO_ROW* row // row to draw +); +void plot_fp_cells( // draw words + ScrollView* win, // window tro draw in + ScrollView::Color colour, // colour of lines + BLOBNBOX_IT* blob_it, // blobs + int16_t pitch, // of block + int16_t blob_count, // no of real blobs + STATS* projection, // vertical + int16_t projection_left, // edges //scale factor + int16_t projection_right, float projection_scale); +void plot_fp_cells2( // draw words + ScrollView* win, // window tro draw in + ScrollView::Color colour, // colour of lines + TO_ROW* row, // for location + FPSEGPT_LIST* seg_list // segments to plot +); +void plot_row_cells( // draw words + ScrollView* win, // window tro draw in + ScrollView::Color colour, // colour of lines + TO_ROW* row, // for location + float xshift, // amount of shift + ICOORDELT_LIST* cells // cells to draw ); -void draw_meanlines( //draw a block - TO_BLOCK *block, //block to draw - float gradient, //gradients of lines - int32_t left, //edge of block - ScrollView::Color colour, //colour to draw in - FCOORD rotation //rotation for line - ); -void plot_word_decisions( //draw words - ScrollView* win, //window tro draw in - int16_t pitch, //of block - TO_ROW *row //row to draw - ); -void plot_fp_cells( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - BLOBNBOX_IT *blob_it, //blobs - int16_t pitch, //of block - int16_t blob_count, //no of real blobs - STATS *projection, //vertical - int16_t projection_left, //edges //scale factor - int16_t projection_right, - float projection_scale); -void plot_fp_cells2( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - TO_ROW *row, //for location - FPSEGPT_LIST *seg_list //segments to plot - ); -void plot_row_cells( //draw words - ScrollView* win, //window tro draw in - ScrollView::Color colour, //colour of lines - TO_ROW *row, //for location - float xshift, //amount of shift - ICOORDELT_LIST *cells //cells to draw - ); #endif diff --git a/src/textord/edgblob.cpp b/src/textord/edgblob.cpp index 57bb56f89d..746662e55a 100644 --- a/src/textord/edgblob.cpp +++ b/src/textord/edgblob.cpp @@ -17,10 +17,10 @@ * **********************************************************************/ -#include "scanedg.h" +#include "edgblob.h" #include "drawedg.h" #include "edgloop.h" -#include "edgblob.h" +#include "scanedg.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -40,22 +40,17 @@ EXTERN INT_VAR(edges_max_children_per_outline, 10, "Max number of children inside a character outline"); EXTERN INT_VAR(edges_max_children_layers, 5, "Max layers of nested children inside a character outline"); -EXTERN BOOL_VAR(edges_debug, FALSE, - "turn on debugging for this module"); - +EXTERN BOOL_VAR(edges_debug, FALSE, "turn on debugging for this module"); EXTERN INT_VAR(edges_children_per_grandchild, 10, "Importance ratio for chucking outlines"); -EXTERN INT_VAR(edges_children_count_limit, 45, - "Max holes allowed in blob"); +EXTERN INT_VAR(edges_children_count_limit, 45, "Max holes allowed in blob"); EXTERN BOOL_VAR(edges_children_fix, FALSE, "Remove boxy parents of char-like children"); -EXTERN INT_VAR(edges_min_nonhole, 12, - "Min pixels for potential char in box"); +EXTERN INT_VAR(edges_min_nonhole, 12, "Min pixels for potential char in box"); EXTERN INT_VAR(edges_patharea_ratio, 40, "Max lensq/area for acceptable child outline"); -EXTERN double_VAR(edges_childarea, 0.5, - "Min area fraction of child outline"); +EXTERN double_VAR(edges_childarea, 0.5, "Min area fraction of child outline"); EXTERN double_VAR(edges_boxarea, 0.875, "Min area fraction of grandchild for box"); @@ -65,17 +60,16 @@ EXTERN double_VAR(edges_boxarea, 0.875, * Construct an array of buckets for associating outlines into blobs. */ -OL_BUCKETS::OL_BUCKETS( -ICOORD bleft, // corners -ICOORD tright): bl(bleft), tr(tright) { - bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1; - bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1; - // make array +OL_BUCKETS::OL_BUCKETS(ICOORD bleft, // corners + ICOORD tright) + : bl(bleft), tr(tright) { + bxdim = (tright.x() - bleft.x()) / BUCKETSIZE + 1; + bydim = (tright.y() - bleft.y()) / BUCKETSIZE + 1; + // make array buckets = new C_OUTLINE_LIST[bxdim * bydim]; index = 0; } - /** * @name OL_BUCKETS::operator( * @@ -83,14 +77,13 @@ ICOORD tright): bl(bleft), tr(tright) { * given pixel coordinates. */ -C_OUTLINE_LIST * -OL_BUCKETS::operator()( // array access -int16_t x, // image coords -int16_t y) { - return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE]; +C_OUTLINE_LIST* OL_BUCKETS::operator()( // array access + int16_t x, // image coords + int16_t y) { + return &buckets[(y - bl.y()) / BUCKETSIZE * bxdim + + (x - bl.x()) / BUCKETSIZE]; } - /** * @name OL_BUCKETS::outline_complexity * @@ -111,24 +104,23 @@ int16_t y) { * flattening out boxed or reversed video text regions. */ -int32_t OL_BUCKETS::outline_complexity( - C_OUTLINE *outline, // parent outline - int32_t max_count, // max output - int16_t depth // recurion depth - ) { - int16_t xmin, xmax; // coord limits +int32_t OL_BUCKETS::outline_complexity(C_OUTLINE* outline, // parent outline + int32_t max_count, // max output + int16_t depth // recurion depth +) { + int16_t xmin, xmax; // coord limits int16_t ymin, ymax; - int16_t xindex, yindex; // current bucket - C_OUTLINE *child; // current child - int32_t child_count; // no of children - int32_t grandchild_count; // no of grandchildren - C_OUTLINE_IT child_it; // search iterator + int16_t xindex, yindex; // current bucket + C_OUTLINE* child; // current child + int32_t child_count; // no of children + int32_t grandchild_count; // no of grandchildren + C_OUTLINE_IT child_it; // search iterator TBOX olbox = outline->bounding_box(); - xmin =(olbox.left() - bl.x()) / BUCKETSIZE; - xmax =(olbox.right() - bl.x()) / BUCKETSIZE; - ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; - ymax =(olbox.top() - bl.y()) / BUCKETSIZE; + xmin = (olbox.left() - bl.x()) / BUCKETSIZE; + xmax = (olbox.right() - bl.x()) / BUCKETSIZE; + ymin = (olbox.bottom() - bl.y()) / BUCKETSIZE; + ymax = (olbox.top() - bl.y()) / BUCKETSIZE; child_count = 0; grandchild_count = 0; if (++depth > edges_max_children_layers) // nested loops are too deep @@ -137,21 +129,20 @@ int32_t OL_BUCKETS::outline_complexity( for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); - if (child_it.empty()) - continue; + if (child_it.empty()) continue; for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { child = child_it.data(); - if (child == outline || !(*child < *outline)) - continue; + if (child == outline || !(*child < *outline)) continue; child_count++; - if (child_count > edges_max_children_per_outline) { // too fragmented + if (child_count > edges_max_children_per_outline) { // too fragmented if (edges_debug) - tprintf("Discard outline on child_count=%d > " - "max_children_per_outline=%d\n", - child_count, - static_cast(edges_max_children_per_outline)); + tprintf( + "Discard outline on child_count=%d > " + "max_children_per_outline=%d\n", + child_count, + static_cast(edges_max_children_per_outline)); return max_count + child_count; } @@ -162,9 +153,10 @@ int32_t OL_BUCKETS::outline_complexity( outline_complexity(child, remaining_count, depth); if (child_count + grandchild_count > max_count) { // too complex if (edges_debug) - tprintf("Disgard outline on child_count=%d + grandchild_count=%d " - "> max_count=%d\n", - child_count, grandchild_count, max_count); + tprintf( + "Disgard outline on child_count=%d + grandchild_count=%d " + "> max_count=%d\n", + child_count, grandchild_count, max_count); return child_count + grandchild_count; } } @@ -173,36 +165,35 @@ int32_t OL_BUCKETS::outline_complexity( return child_count + grandchild_count; } - /** * @name OL_BUCKETS::count_children * * Find number of descendants of this outline. */ // TODO(rays) Merge with outline_complexity. -int32_t OL_BUCKETS::count_children( // recursive count - C_OUTLINE *outline, // parent outline - int32_t max_count // max output - ) { - bool parent_box; // could it be boxy - int16_t xmin, xmax; // coord limits +int32_t OL_BUCKETS::count_children( // recursive count + C_OUTLINE* outline, // parent outline + int32_t max_count // max output +) { + bool parent_box; // could it be boxy + int16_t xmin, xmax; // coord limits int16_t ymin, ymax; - int16_t xindex, yindex; // current bucket - C_OUTLINE *child; // current child - int32_t child_count; // no of children - int32_t grandchild_count; // no of grandchildren - int32_t parent_area; // potential box - FLOAT32 max_parent_area; // potential box - int32_t child_area; // current child - int32_t child_length; // current child + int16_t xindex, yindex; // current bucket + C_OUTLINE* child; // current child + int32_t child_count; // no of children + int32_t grandchild_count; // no of grandchildren + int32_t parent_area; // potential box + FLOAT32 max_parent_area; // potential box + int32_t child_area; // current child + int32_t child_length; // current child TBOX olbox; - C_OUTLINE_IT child_it; // search iterator + C_OUTLINE_IT child_it; // search iterator olbox = outline->bounding_box(); - xmin =(olbox.left() - bl.x()) / BUCKETSIZE; - xmax =(olbox.right() - bl.x()) / BUCKETSIZE; - ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; - ymax =(olbox.top() - bl.y()) / BUCKETSIZE; + xmin = (olbox.left() - bl.x()) / BUCKETSIZE; + xmax = (olbox.right() - bl.x()) / BUCKETSIZE; + ymin = (olbox.bottom() - bl.y()) / BUCKETSIZE; + ymax = (olbox.top() - bl.y()) / BUCKETSIZE; child_count = 0; grandchild_count = 0; parent_area = 0; @@ -211,16 +202,15 @@ int32_t OL_BUCKETS::count_children( // recursive count for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); - if (child_it.empty()) - continue; + if (child_it.empty()) continue; for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { child = child_it.data(); if (child != outline && *child < *outline) { child_count++; if (child_count <= max_count) { - int max_grand =(max_count - child_count) / - edges_children_per_grandchild; + int max_grand = + (max_count - child_count) / edges_children_per_grandchild; if (max_grand > 0) grandchild_count += count_children(child, max_grand) * edges_children_per_grandchild; @@ -230,23 +220,20 @@ int32_t OL_BUCKETS::count_children( // recursive count if (child_count + grandchild_count > max_count) { if (edges_debug) tprintf("Discarding parent with child count=%d, gc=%d\n", - child_count,grandchild_count); + child_count, grandchild_count); return child_count + grandchild_count; } if (parent_area == 0) { parent_area = outline->outer_area(); - if (parent_area < 0) - parent_area = -parent_area; + if (parent_area < 0) parent_area = -parent_area; max_parent_area = outline->bounding_box().area() * edges_boxarea; - if (parent_area < max_parent_area) - parent_box = false; + if (parent_area < max_parent_area) parent_box = false; } if (parent_box && (!edges_children_fix || child->bounding_box().height() > edges_min_nonhole)) { child_area = child->outer_area(); - if (child_area < 0) - child_area = -child_area; + if (child_area < 0) child_area = -child_area; if (edges_children_fix) { if (parent_area - child_area < max_parent_area) { parent_box = false; @@ -254,29 +241,31 @@ int32_t OL_BUCKETS::count_children( // recursive count } if (grandchild_count > 0) { if (edges_debug) - tprintf("Discarding parent of area %d, child area=%d, max%g " - "with gc=%d\n", - parent_area, child_area, max_parent_area, - grandchild_count); + tprintf( + "Discarding parent of area %d, child area=%d, max%g " + "with gc=%d\n", + parent_area, child_area, max_parent_area, + grandchild_count); return max_count + 1; } child_length = child->pathlength(); if (child_length * child_length > child_area * edges_patharea_ratio) { if (edges_debug) - tprintf("Discarding parent of area %d, child area=%d, max%g " - "with child length=%d\n", - parent_area, child_area, max_parent_area, - child_length); + tprintf( + "Discarding parent of area %d, child area=%d, max%g " + "with child length=%d\n", + parent_area, child_area, max_parent_area, child_length); return max_count + 1; } } if (child_area < child->bounding_box().area() * edges_childarea) { if (edges_debug) - tprintf("Discarding parent of area %d, child area=%d, max%g " - "with child rect=%d\n", - parent_area, child_area, max_parent_area, - child->bounding_box().area()); + tprintf( + "Discarding parent of area %d, child area=%d, max%g " + "with child rect=%d\n", + parent_area, child_area, max_parent_area, + child->bounding_box().area()); return max_count + 1; } } @@ -287,30 +276,27 @@ int32_t OL_BUCKETS::count_children( // recursive count return child_count + grandchild_count; } - - - /** * @name OL_BUCKETS::extract_children * * Find number of descendants of this outline. */ -void OL_BUCKETS::extract_children( // recursive count - C_OUTLINE *outline, // parent outline - C_OUTLINE_IT *it // destination iterator - ) { - int16_t xmin, xmax; // coord limits +void OL_BUCKETS::extract_children( // recursive count + C_OUTLINE* outline, // parent outline + C_OUTLINE_IT* it // destination iterator +) { + int16_t xmin, xmax; // coord limits int16_t ymin, ymax; - int16_t xindex, yindex; // current bucket + int16_t xindex, yindex; // current bucket TBOX olbox; - C_OUTLINE_IT child_it; // search iterator + C_OUTLINE_IT child_it; // search iterator olbox = outline->bounding_box(); - xmin =(olbox.left() - bl.x()) / BUCKETSIZE; - xmax =(olbox.right() - bl.x()) / BUCKETSIZE; - ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; - ymax =(olbox.top() - bl.y()) / BUCKETSIZE; + xmin = (olbox.left() - bl.x()) / BUCKETSIZE; + xmax = (olbox.right() - bl.x()) / BUCKETSIZE; + ymin = (olbox.bottom() - bl.y()) / BUCKETSIZE; + ymax = (olbox.top() - bl.y()) / BUCKETSIZE; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); @@ -324,101 +310,95 @@ void OL_BUCKETS::extract_children( // recursive count } } - /** * @name extract_edges * * Run the edge detector over the block and return a list of blobs. */ -void extract_edges(Pix* pix, // thresholded image - BLOCK *block) { // block to scan - C_OUTLINE_LIST outlines; // outlines in block +void extract_edges(Pix* pix, // thresholded image + BLOCK* block) { // block to scan + C_OUTLINE_LIST outlines; // outlines in block C_OUTLINE_IT out_it = &outlines; block_edges(pix, &(block->pdblk), &out_it); - ICOORD bleft; // block box + ICOORD bleft; // block box ICOORD tright; block->pdblk.bounding_box(bleft, tright); - // make blobs + // make blobs outlines_to_blobs(block, bleft, tright, &outlines); } - /** * @name outlines_to_blobs * * Gather together outlines into blobs using the usual bucket sort. */ -void outlines_to_blobs( // find blobs - BLOCK *block, // block to scan - ICOORD bleft, - ICOORD tright, - C_OUTLINE_LIST *outlines) { - // make buckets +void outlines_to_blobs( // find blobs + BLOCK* block, // block to scan + ICOORD bleft, ICOORD tright, C_OUTLINE_LIST* outlines) { + // make buckets OL_BUCKETS buckets(bleft, tright); fill_buckets(outlines, &buckets); empty_buckets(block, &buckets); } - /** * @name fill_buckets * * Run the edge detector over the block and return a list of blobs. */ -void fill_buckets( // find blobs - C_OUTLINE_LIST *outlines, // outlines in block - OL_BUCKETS *buckets // output buckets - ) { +void fill_buckets( // find blobs + C_OUTLINE_LIST* outlines, // outlines in block + OL_BUCKETS* buckets // output buckets +) { TBOX ol_box; // outline box C_OUTLINE_IT out_it = outlines; // iterator C_OUTLINE_IT bucket_it; // iterator in bucket - C_OUTLINE *outline; // current outline + C_OUTLINE* outline; // current outline for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { outline = out_it.extract(); // take off list // get box ol_box = outline->bounding_box(); - bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom())); + bucket_it.set_to_list((*buckets)(ol_box.left(), ol_box.bottom())); bucket_it.add_to_end(outline); } } - /** * @name empty_buckets * * Run the edge detector over the block and return a list of blobs. */ -void empty_buckets( // find blobs - BLOCK *block, // block to scan - OL_BUCKETS *buckets // output buckets - ) { - bool good_blob; // healthy blob - C_OUTLINE_LIST outlines; // outlines in block - // iterator +void empty_buckets( // find blobs + BLOCK* block, // block to scan + OL_BUCKETS* buckets // output buckets +) { + bool good_blob; // healthy blob + C_OUTLINE_LIST outlines; // outlines in block + // iterator C_OUTLINE_IT out_it = &outlines; C_OUTLINE_IT bucket_it = buckets->start_scan(); - C_OUTLINE_IT parent_it; // parent outline + C_OUTLINE_IT parent_it; // parent outline C_BLOB_IT good_blobs = block->blob_list(); C_BLOB_IT junk_blobs = block->reject_blobs(); while (!bucket_it.empty()) { out_it.set_to_list(&outlines); do { - parent_it = bucket_it; // find outermost + parent_it = bucket_it; // find outermost do { bucket_it.forward(); } while (!bucket_it.at_first() && !(*parent_it.data() < *bucket_it.data())); } while (!bucket_it.at_first()); - // move to new list + // move to new list out_it.add_after_then_move(parent_it.extract()); good_blob = capture_children(buckets, &junk_blobs, &out_it); C_BLOB::ConstructBlobsFromOutlines(good_blob, &outlines, &good_blobs, @@ -428,7 +408,6 @@ void empty_buckets( // find blobs } } - /** * @name capture_children * @@ -437,26 +416,22 @@ void empty_buckets( // find blobs * illegal and return FALSE. */ -bool capture_children( // find children - OL_BUCKETS* buckets, // bucket sort clanss - C_BLOB_IT* reject_it, // dead grandchildren - C_OUTLINE_IT* blob_it // output outlines +bool capture_children( // find children + OL_BUCKETS* buckets, // bucket sort clanss + C_BLOB_IT* reject_it, // dead grandchildren + C_OUTLINE_IT* blob_it // output outlines ) { - C_OUTLINE *outline; // master outline - int32_t child_count; // no of children + C_OUTLINE* outline; // master outline + int32_t child_count; // no of children outline = blob_it->data(); if (edges_use_new_outline_complexity) - child_count = buckets->outline_complexity(outline, - edges_children_count_limit, - 0); + child_count = + buckets->outline_complexity(outline, edges_children_count_limit, 0); else - child_count = buckets->count_children(outline, - edges_children_count_limit); - if (child_count > edges_children_count_limit) - return false; + child_count = buckets->count_children(outline, edges_children_count_limit); + if (child_count > edges_children_count_limit) return false; - if (child_count > 0) - buckets->extract_children(outline, blob_it); + if (child_count > 0) buckets->extract_children(outline, blob_it); return true; } diff --git a/src/textord/edgblob.h b/src/textord/edgblob.h index b85505c13e..f84fc8c9bf 100644 --- a/src/textord/edgblob.h +++ b/src/textord/edgblob.h @@ -17,79 +17,79 @@ * **********************************************************************/ -#ifndef EDGBLOB_H -#define EDGBLOB_H +#ifndef EDGBLOB_H +#define EDGBLOB_H -#include "scrollview.h" -#include "params.h" -#include "ocrblock.h" -#include "coutln.h" -#include "crakedge.h" +#include "coutln.h" +#include "crakedge.h" +#include "ocrblock.h" +#include "params.h" +#include "scrollview.h" -#define BUCKETSIZE 16 +#define BUCKETSIZE 16 -class OL_BUCKETS -{ - public: - OL_BUCKETS( //constructor - ICOORD bleft, //corners - ICOORD tright); +class OL_BUCKETS { + public: + OL_BUCKETS( // constructor + ICOORD bleft, // corners + ICOORD tright); - ~OL_BUCKETS () { //cleanup - delete[]buckets; - } - C_OUTLINE_LIST *operator () (//array access - int16_t x, //image coords + ~OL_BUCKETS() { // cleanup + delete[] buckets; + } + C_OUTLINE_LIST* operator()( // array access + int16_t x, // image coords int16_t y); - //first non-empty bucket - C_OUTLINE_LIST *start_scan() { - for (index = 0; buckets[index].empty () && index < bxdim * bydim - 1; - index++); - return &buckets[index]; - } - //next non-empty bucket - C_OUTLINE_LIST *scan_next() { - for (; buckets[index].empty () && index < bxdim * bydim - 1; index++); - return &buckets[index]; - } - int32_t count_children( //recursive sum - C_OUTLINE *outline, //parent outline - int32_t max_count); // max output - int32_t outline_complexity( // new version of count_children - C_OUTLINE *outline, // parent outline - int32_t max_count, // max output - int16_t depth); // level of recursion - void extract_children( //single level get - C_OUTLINE *outline, //parent outline - C_OUTLINE_IT *it); //destination iterator + // first non-empty bucket + C_OUTLINE_LIST* start_scan() { + for (index = 0; buckets[index].empty() && index < bxdim * bydim - 1; + index++) + ; + return &buckets[index]; + } + // next non-empty bucket + C_OUTLINE_LIST* scan_next() { + for (; buckets[index].empty() && index < bxdim * bydim - 1; index++) + ; + return &buckets[index]; + } + int32_t count_children( // recursive sum + C_OUTLINE* outline, // parent outline + int32_t max_count); // max output + int32_t outline_complexity( // new version of count_children + C_OUTLINE* outline, // parent outline + int32_t max_count, // max output + int16_t depth); // level of recursion + void extract_children( // single level get + C_OUTLINE* outline, // parent outline + C_OUTLINE_IT* it); // destination iterator - private: - C_OUTLINE_LIST * buckets; //array of buckets - int16_t bxdim; //size of array - int16_t bydim; - ICOORD bl; //corners - ICOORD tr; - int32_t index; //for extraction scan + private: + C_OUTLINE_LIST* buckets; // array of buckets + int16_t bxdim; // size of array + int16_t bydim; + ICOORD bl; // corners + ICOORD tr; + int32_t index; // for extraction scan }; -void extract_edges(Pix* pix, // thresholded image - BLOCK* block); // block to scan -void outlines_to_blobs( //find blobs - BLOCK *block, //block to scan - ICOORD bleft, //block box //outlines in block - ICOORD tright, - C_OUTLINE_LIST *outlines); -void fill_buckets( //find blobs - C_OUTLINE_LIST *outlines, //outlines in block - OL_BUCKETS *buckets //output buckets - ); -void empty_buckets( //find blobs - BLOCK *block, //block to scan - OL_BUCKETS *buckets //output buckets - ); -bool capture_children( //find children - OL_BUCKETS* buckets, //bucket sort clanss - C_BLOB_IT* reject_it, //dead grandchildren - C_OUTLINE_IT* blob_it //output outlines +void extract_edges(Pix* pix, // thresholded image + BLOCK* block); // block to scan +void outlines_to_blobs( // find blobs + BLOCK* block, // block to scan + ICOORD bleft, // block box //outlines in block + ICOORD tright, C_OUTLINE_LIST* outlines); +void fill_buckets( // find blobs + C_OUTLINE_LIST* outlines, // outlines in block + OL_BUCKETS* buckets // output buckets +); +void empty_buckets( // find blobs + BLOCK* block, // block to scan + OL_BUCKETS* buckets // output buckets +); +bool capture_children( // find children + OL_BUCKETS* buckets, // bucket sort clanss + C_BLOB_IT* reject_it, // dead grandchildren + C_OUTLINE_IT* blob_it // output outlines ); #endif diff --git a/src/textord/edgloop.cpp b/src/textord/edgloop.cpp index 2c42e174d1..cd26e92bdc 100644 --- a/src/textord/edgloop.cpp +++ b/src/textord/edgloop.cpp @@ -17,16 +17,16 @@ * **********************************************************************/ -#include "scanedg.h" -#include "drawedg.h" -#include "edgloop.h" +#include "edgloop.h" +#include "drawedg.h" +#include "scanedg.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif -#define MINEDGELENGTH 8 // min decent length +#define MINEDGELENGTH 8 // min decent length /********************************************************************** * complete_edge @@ -34,26 +34,25 @@ * Complete the edge by cleaning it up. **********************************************************************/ -void complete_edge(CRACKEDGE *start, //start of loop +void complete_edge(CRACKEDGE* start, // start of loop C_OUTLINE_IT* outline_it) { - ScrollView::Color colour; //colour to draw in - int16_t looplength; //steps in loop - ICOORD botleft; //bounding box + ScrollView::Color colour; // colour to draw in + int16_t looplength; // steps in loop + ICOORD botleft; // bounding box ICOORD topright; - C_OUTLINE *outline; //new outline + C_OUTLINE* outline; // new outline - //check length etc. - colour = check_path_legal (start); + // check length etc. + colour = check_path_legal(start); if (colour == ScrollView::RED || colour == ScrollView::BLUE) { - looplength = loop_bounding_box (start, botleft, topright); - outline = new C_OUTLINE (start, botleft, topright, looplength); - //add to list - outline_it->add_after_then_move (outline); + looplength = loop_bounding_box(start, botleft, topright); + outline = new C_OUTLINE(start, botleft, topright, looplength); + // add to list + outline_it->add_after_then_move(outline); } } - /********************************************************************** * check_path_legal * @@ -64,49 +63,48 @@ void complete_edge(CRACKEDGE *start, //start of loop * These colours are used to draw the raw outline. **********************************************************************/ -ScrollView::Color check_path_legal( //certify outline - CRACKEDGE *start //start of loop - ) { - int lastchain; //last chain code - int chaindiff; //chain code diff - int32_t length; //length of loop - int32_t chainsum; //sum of chain diffs - CRACKEDGE *edgept; //current point +ScrollView::Color check_path_legal( // certify outline + CRACKEDGE* start // start of loop +) { + int lastchain; // last chain code + int chaindiff; // chain code diff + int32_t length; // length of loop + int32_t chainsum; // sum of chain diffs + CRACKEDGE* edgept; // current point const ERRCODE ED_ILLEGAL_SUM = "Illegal sum of chain codes"; length = 0; - chainsum = 0; //sum of chain codes + chainsum = 0; // sum of chain codes edgept = start; - lastchain = edgept->prev->stepdir; //previous chain code + lastchain = edgept->prev->stepdir; // previous chain code do { length++; if (edgept->stepdir != lastchain) { - //chain code difference + // chain code difference chaindiff = edgept->stepdir - lastchain; if (chaindiff > 2) chaindiff -= 4; else if (chaindiff < -2) chaindiff += 4; - chainsum += chaindiff; //sum differences + chainsum += chaindiff; // sum differences lastchain = edgept->stepdir; } edgept = edgept->next; - } - while (edgept != start && length < C_OUTLINE::kMaxOutlineLength); + } while (edgept != start && length < C_OUTLINE::kMaxOutlineLength); - if ((chainsum != 4 && chainsum != -4) - || edgept != start || length < MINEDGELENGTH) { + if ((chainsum != 4 && chainsum != -4) || edgept != start || + length < MINEDGELENGTH) { if (edgept != start) { - return ScrollView::YELLOW; + return ScrollView::YELLOW; } else if (length < MINEDGELENGTH) { - return ScrollView::MAGENTA; + return ScrollView::MAGENTA; } else { - ED_ILLEGAL_SUM.error ("check_path_legal", TESSLOG, "chainsum=%d", - chainsum); + ED_ILLEGAL_SUM.error("check_path_legal", TESSLOG, "chainsum=%d", + chainsum); return ScrollView::GREEN; } } - //colour on inside + // colour on inside return chainsum < 0 ? ScrollView::BLUE : ScrollView::RED; } @@ -116,44 +114,41 @@ ScrollView::Color check_path_legal( //certify outline * Find the bounding box of the edge loop. **********************************************************************/ -int16_t loop_bounding_box( //get bounding box - CRACKEDGE *&start, //edge loop - ICOORD &botleft, //bounding box - ICOORD &topright) { - int16_t length; //length of loop - int16_t leftmost; //on top row - CRACKEDGE *edgept; //current point - CRACKEDGE *realstart; //topleft start +int16_t loop_bounding_box( // get bounding box + CRACKEDGE*& start, // edge loop + ICOORD& botleft, // bounding box + ICOORD& topright) { + int16_t length; // length of loop + int16_t leftmost; // on top row + CRACKEDGE* edgept; // current point + CRACKEDGE* realstart; // topleft start edgept = start; realstart = start; - botleft = topright = ICOORD (edgept->pos.x (), edgept->pos.y ()); - leftmost = edgept->pos.x (); - length = 0; //coutn length + botleft = topright = ICOORD(edgept->pos.x(), edgept->pos.y()); + leftmost = edgept->pos.x(); + length = 0; // coutn length do { edgept = edgept->next; - if (edgept->pos.x () < botleft.x ()) - //get bounding box - botleft.set_x (edgept->pos.x ()); - else if (edgept->pos.x () > topright.x ()) - topright.set_x (edgept->pos.x ()); - if (edgept->pos.y () < botleft.y ()) - //get bounding box - botleft.set_y (edgept->pos.y ()); - else if (edgept->pos.y () > topright.y ()) { + if (edgept->pos.x() < botleft.x()) + // get bounding box + botleft.set_x(edgept->pos.x()); + else if (edgept->pos.x() > topright.x()) + topright.set_x(edgept->pos.x()); + if (edgept->pos.y() < botleft.y()) + // get bounding box + botleft.set_y(edgept->pos.y()); + else if (edgept->pos.y() > topright.y()) { realstart = edgept; - leftmost = edgept->pos.x (); - topright.set_y (edgept->pos.y ()); - } - else if (edgept->pos.y () == topright.y () - && edgept->pos.x () < leftmost) { - //leftmost on line - leftmost = edgept->pos.x (); + leftmost = edgept->pos.x(); + topright.set_y(edgept->pos.y()); + } else if (edgept->pos.y() == topright.y() && edgept->pos.x() < leftmost) { + // leftmost on line + leftmost = edgept->pos.x(); realstart = edgept; } - length++; //count elements - } - while (edgept != start); - start = realstart; //shift it to topleft + length++; // count elements + } while (edgept != start); + start = realstart; // shift it to topleft return length; } diff --git a/src/textord/edgloop.h b/src/textord/edgloop.h index c3686cbc02..4f31c5482f 100644 --- a/src/textord/edgloop.h +++ b/src/textord/edgloop.h @@ -17,33 +17,30 @@ * **********************************************************************/ -#ifndef EDGLOOP_H -#define EDGLOOP_H +#ifndef EDGLOOP_H +#define EDGLOOP_H -#include "scrollview.h" -#include "params.h" -#include "pdblock.h" -#include "coutln.h" -#include "crakedge.h" +#include "coutln.h" +#include "crakedge.h" +#include "params.h" +#include "pdblock.h" +#include "scrollview.h" -#define BUCKETSIZE 16 +#define BUCKETSIZE 16 - -extern INT_VAR_H (edges_children_per_grandchild, 10, -"Importance ratio for chucking outlines"); -extern INT_VAR_H (edges_children_count_limit, 45, -"Max holes allowed in blob"); -extern double_VAR_H (edges_childarea, 0.5, -"Max area fraction of child outline"); -extern double_VAR_H (edges_boxarea, 0.8, -"Min area fraction of grandchild for box"); -void complete_edge(CRACKEDGE *start, //start of loop +extern INT_VAR_H(edges_children_per_grandchild, 10, + "Importance ratio for chucking outlines"); +extern INT_VAR_H(edges_children_count_limit, 45, "Max holes allowed in blob"); +extern double_VAR_H(edges_childarea, 0.5, "Max area fraction of child outline"); +extern double_VAR_H(edges_boxarea, 0.8, + "Min area fraction of grandchild for box"); +void complete_edge(CRACKEDGE* start, // start of loop C_OUTLINE_IT* outline_it); -ScrollView::Color check_path_legal( //certify outline - CRACKEDGE *start //start of loop - ); -int16_t loop_bounding_box( //get bounding box - CRACKEDGE *&start, //edge loop - ICOORD &botleft, //bounding box - ICOORD &topright); +ScrollView::Color check_path_legal( // certify outline + CRACKEDGE* start // start of loop +); +int16_t loop_bounding_box( // get bounding box + CRACKEDGE*& start, // edge loop + ICOORD& botleft, // bounding box + ICOORD& topright); #endif diff --git a/src/textord/equationdetectbase.cpp b/src/textord/equationdetectbase.cpp index d3c2d02a43..78ea8486bb 100644 --- a/src/textord/equationdetectbase.cpp +++ b/src/textord/equationdetectbase.cpp @@ -17,14 +17,13 @@ // /////////////////////////////////////////////////////////////////////// +#include "equationdetectbase.h" #include "allheaders.h" #include "blobbox.h" -#include "equationdetectbase.h" namespace tesseract { -void EquationDetectBase::RenderSpecialText(Pix* pix, - BLOBNBOX* blob) { +void EquationDetectBase::RenderSpecialText(Pix* pix, BLOBNBOX* blob) { ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr); const TBOX& tbox = blob->bounding_box(); int height = pixGetHeight(pix); @@ -32,8 +31,8 @@ void EquationDetectBase::RenderSpecialText(Pix* pix, // Coordinate translation: tesseract use left bottom as the original, while // leptonica uses left top as the original. - Box *box = boxCreate(tbox.left(), height - tbox.top(), - tbox.width(), tbox.height()); + Box* box = + boxCreate(tbox.left(), height - tbox.top(), tbox.width(), tbox.height()); switch (blob->special_text_type()) { case BSTT_MATH: // Red box. pixRenderBoxArb(pix, box, box_width, 255, 0, 0); diff --git a/src/textord/equationdetectbase.h b/src/textord/equationdetectbase.h index 1a05a98ca2..098171d685 100644 --- a/src/textord/equationdetectbase.h +++ b/src/textord/equationdetectbase.h @@ -20,6 +20,8 @@ #ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ #define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ +#include "blobbox.h" + class BLOBNBOX_LIST; class TO_BLOCK; struct Pix; diff --git a/src/textord/fpchop.cpp b/src/textord/fpchop.cpp index 25999f43aa..a881dc52bc 100644 --- a/src/textord/fpchop.cpp +++ b/src/textord/fpchop.cpp @@ -18,15 +18,15 @@ **********************************************************************/ #ifdef __UNIX__ -#include +#include #endif -#include "stderr.h" -#include "blobbox.h" -#include "statistc.h" -#include "drawtord.h" -#include "tovars.h" -#include "topitch.h" -#include "fpchop.h" +#include "blobbox.h" +#include "drawtord.h" +#include "fpchop.h" +#include "statistc.h" +#include "stderr.h" +#include "topitch.h" +#include "tovars.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -35,10 +35,9 @@ #define EXTERN -EXTERN INT_VAR (textord_fp_chop_error, 2, -"Max allowed bending of chop cells"); -EXTERN double_VAR (textord_fp_chop_snap, 0.5, -"Max distance of chop pt from vertex"); +EXTERN INT_VAR(textord_fp_chop_error, 2, "Max allowed bending of chop cells"); +EXTERN double_VAR(textord_fp_chop_snap, 0.5, + "Max distance of chop pt from vertex"); ELISTIZE(C_OUTLINE_FRAG) //#undef ASSERT_HOST @@ -48,191 +47,178 @@ ELISTIZE(C_OUTLINE_FRAG) * * Make a ROW from a fixed pitch TO_ROW. **********************************************************************/ -ROW *fixed_pitch_words( //find lines - TO_ROW *row, //row to do - FCOORD rotation //for drawing - ) { - bool bol; //start of line - uint8_t blanks; //in front of word - uint8_t new_blanks; //blanks in empty cell - int16_t chop_coord; //chop boundary - int16_t prev_chop_coord; //start of cell - int16_t rep_left; //left edge of rep word - ROW *real_row; //output row +ROW* fixed_pitch_words( // find lines + TO_ROW* row, // row to do + FCOORD rotation // for drawing +) { + bool bol; // start of line + uint8_t blanks; // in front of word + uint8_t new_blanks; // blanks in empty cell + int16_t chop_coord; // chop boundary + int16_t prev_chop_coord; // start of cell + int16_t rep_left; // left edge of rep word + ROW* real_row; // output row C_OUTLINE_LIST left_coutlines; C_OUTLINE_LIST right_coutlines; C_BLOB_LIST cblobs; C_BLOB_IT cblob_it = &cblobs; WERD_LIST words; - WERD_IT word_it = &words; //new words - //repeated blobs + WERD_IT word_it = &words; // new words + // repeated blobs WERD_IT rep_it = &row->rep_words; - WERD *word; //new word - int32_t xstarts[2]; //row ends - int32_t prev_x; //end of prev blob - //iterator - BLOBNBOX_IT box_it = row->blob_list (); - //boundaries + WERD* word; // new word + int32_t xstarts[2]; // row ends + int32_t prev_x; // end of prev blob + // iterator + BLOBNBOX_IT box_it = row->blob_list(); + // boundaries ICOORDELT_IT cell_it = &row->char_cells; #ifndef GRAPHICS_DISABLED if (textord_show_page_cuts && to_win != nullptr) { - plot_row_cells (to_win, ScrollView::RED, row, 0, &row->char_cells); + plot_row_cells(to_win, ScrollView::RED, row, 0, &row->char_cells); } #endif prev_x = -INT16_MAX; bol = true; blanks = 0; - if (rep_it.empty ()) + if (rep_it.empty()) rep_left = INT16_MAX; else - rep_left = rep_it.data ()->bounding_box ().left (); - if (box_it.empty ()) - return nullptr; //empty row - xstarts[0] = box_it.data ()->bounding_box ().left (); + rep_left = rep_it.data()->bounding_box().left(); + if (box_it.empty()) return nullptr; // empty row + xstarts[0] = box_it.data()->bounding_box().left(); if (rep_left < xstarts[0]) { xstarts[0] = rep_left; } - if (cell_it.empty () || row->char_cells.singleton ()) { - tprintf ("Row without enough char cells!\n"); - tprintf ("Leftmost blob is at (%d,%d)\n", - box_it.data ()->bounding_box ().left (), - box_it.data ()->bounding_box ().bottom ()); + if (cell_it.empty() || row->char_cells.singleton()) { + tprintf("Row without enough char cells!\n"); + tprintf("Leftmost blob is at (%d,%d)\n", + box_it.data()->bounding_box().left(), + box_it.data()->bounding_box().bottom()); return nullptr; } - ASSERT_HOST (!cell_it.empty () && !row->char_cells.singleton ()); - prev_chop_coord = cell_it.data ()->x (); + ASSERT_HOST(!cell_it.empty() && !row->char_cells.singleton()); + prev_chop_coord = cell_it.data()->x(); word = nullptr; - while (rep_left < cell_it.data ()->x ()) { - word = add_repeated_word (&rep_it, rep_left, prev_chop_coord, - blanks, row->fixed_pitch, &word_it); + while (rep_left < cell_it.data()->x()) { + word = add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, + row->fixed_pitch, &word_it); } - cell_it.mark_cycle_pt (); - if (prev_chop_coord >= cell_it.data ()->x ()) - cell_it.forward (); - for (; !cell_it.cycled_list (); cell_it.forward ()) { - chop_coord = cell_it.data ()->x (); - while (!box_it.empty () - && box_it.data ()->bounding_box ().left () <= chop_coord) { - if (box_it.data ()->bounding_box ().right () > prev_x) - prev_x = box_it.data ()->bounding_box ().right (); - split_to_blob (box_it.extract (), chop_coord, - textord_fp_chop_error + 0.5f, - &left_coutlines, - &right_coutlines); - box_it.forward (); + cell_it.mark_cycle_pt(); + if (prev_chop_coord >= cell_it.data()->x()) cell_it.forward(); + for (; !cell_it.cycled_list(); cell_it.forward()) { + chop_coord = cell_it.data()->x(); + while (!box_it.empty() && + box_it.data()->bounding_box().left() <= chop_coord) { + if (box_it.data()->bounding_box().right() > prev_x) + prev_x = box_it.data()->bounding_box().right(); + split_to_blob(box_it.extract(), chop_coord, textord_fp_chop_error + 0.5f, + &left_coutlines, &right_coutlines); + box_it.forward(); while (!box_it.empty() && box_it.data()->cblob() == nullptr) { delete box_it.extract(); box_it.forward(); } } if (!right_coutlines.empty() && left_coutlines.empty()) - split_to_blob (nullptr, chop_coord, - textord_fp_chop_error + 0.5f, - &left_coutlines, - &right_coutlines); + split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5f, + &left_coutlines, &right_coutlines); if (!left_coutlines.empty()) { cblob_it.add_after_then_move(new C_BLOB(&left_coutlines)); } else { if (rep_left < chop_coord) { if (rep_left > prev_chop_coord) - new_blanks = (uint8_t) floor ((rep_left - prev_chop_coord) - / row->fixed_pitch + 0.5); + new_blanks = (uint8_t)floor( + (rep_left - prev_chop_coord) / row->fixed_pitch + 0.5); else new_blanks = 0; - } - else { + } else { if (chop_coord > prev_chop_coord) - new_blanks = (uint8_t) floor ((chop_coord - prev_chop_coord) - / row->fixed_pitch + 0.5); + new_blanks = (uint8_t)floor( + (chop_coord - prev_chop_coord) / row->fixed_pitch + 0.5); else new_blanks = 0; } if (!cblob_it.empty()) { - if (blanks < 1 && word != nullptr && !word->flag (W_REP_CHAR)) + if (blanks < 1 && word != nullptr && !word->flag(W_REP_CHAR)) blanks = 1; - word = new WERD (&cblobs, blanks, nullptr); - cblob_it.set_to_list (&cblobs); - word->set_flag (W_DONT_CHOP, TRUE); - word_it.add_after_then_move (word); + word = new WERD(&cblobs, blanks, nullptr); + cblob_it.set_to_list(&cblobs); + word->set_flag(W_DONT_CHOP, TRUE); + word_it.add_after_then_move(word); if (bol) { - word->set_flag (W_BOL, TRUE); + word->set_flag(W_BOL, TRUE); bol = false; } blanks = new_blanks; - } - else + } else blanks += new_blanks; while (rep_left < chop_coord) { - word = add_repeated_word (&rep_it, rep_left, prev_chop_coord, - blanks, row->fixed_pitch, &word_it); + word = add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, + row->fixed_pitch, &word_it); } } - if (prev_chop_coord < chop_coord) - prev_chop_coord = chop_coord; + if (prev_chop_coord < chop_coord) prev_chop_coord = chop_coord; } if (!cblob_it.empty()) { word = new WERD(&cblobs, blanks, nullptr); - word->set_flag (W_DONT_CHOP, TRUE); - word_it.add_after_then_move (word); - if (bol) - word->set_flag (W_BOL, TRUE); + word->set_flag(W_DONT_CHOP, TRUE); + word_it.add_after_then_move(word); + if (bol) word->set_flag(W_BOL, TRUE); } - ASSERT_HOST (word != nullptr); - while (!rep_it.empty ()) { - add_repeated_word (&rep_it, rep_left, prev_chop_coord, - blanks, row->fixed_pitch, &word_it); + ASSERT_HOST(word != nullptr); + while (!rep_it.empty()) { + add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, + row->fixed_pitch, &word_it); } - //at end of line - word_it.data ()->set_flag (W_EOL, TRUE); - if (prev_chop_coord > prev_x) - prev_x = prev_chop_coord; + // at end of line + word_it.data()->set_flag(W_EOL, TRUE); + if (prev_chop_coord > prev_x) prev_x = prev_chop_coord; xstarts[1] = prev_x + 1; - real_row = new ROW (row, (int16_t) row->kern_size, (int16_t) row->space_size); - word_it.set_to_list (real_row->word_list ()); - //put words in row - word_it.add_list_after (&words); - real_row->recalc_bounding_box (); + real_row = new ROW(row, (int16_t)row->kern_size, (int16_t)row->space_size); + word_it.set_to_list(real_row->word_list()); + // put words in row + word_it.add_list_after(&words); + real_row->recalc_bounding_box(); return real_row; } - /********************************************************************** * add_repeated_word * * Add repeated word into the row at the given point. **********************************************************************/ -WERD *add_repeated_word( //move repeated word - WERD_IT *rep_it, //repeated words - int16_t &rep_left, //left edge of word - int16_t &prev_chop_coord, //previous word end - uint8_t &blanks, //no of blanks - float pitch, //char cell size - WERD_IT *word_it //list of words - ) { - WERD *word; //word to move - int16_t new_blanks; //extra blanks +WERD* add_repeated_word( // move repeated word + WERD_IT* rep_it, // repeated words + int16_t& rep_left, // left edge of word + int16_t& prev_chop_coord, // previous word end + uint8_t& blanks, // no of blanks + float pitch, // char cell size + WERD_IT* word_it // list of words +) { + WERD* word; // word to move + int16_t new_blanks; // extra blanks if (rep_left > prev_chop_coord) { - new_blanks = (uint8_t) floor ((rep_left - prev_chop_coord) / pitch + 0.5); + new_blanks = (uint8_t)floor((rep_left - prev_chop_coord) / pitch + 0.5); blanks += new_blanks; } - word = rep_it->extract (); - prev_chop_coord = word->bounding_box ().right (); - word_it->add_after_then_move (word); - word->set_blanks (blanks); - rep_it->forward (); - if (rep_it->empty ()) + word = rep_it->extract(); + prev_chop_coord = word->bounding_box().right(); + word_it->add_after_then_move(word); + word->set_blanks(blanks); + rep_it->forward(); + if (rep_it->empty()) rep_left = INT16_MAX; else - rep_left = rep_it->data ()->bounding_box ().left (); + rep_left = rep_it->data()->bounding_box().left(); blanks = 0; return word; } - /********************************************************************** * split_to_blob * @@ -240,13 +226,13 @@ WERD *add_repeated_word( //move repeated word * into a left outline list and a right outline list. **********************************************************************/ -void split_to_blob( //split the blob - BLOBNBOX *blob, //blob to split - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_LIST *left_coutlines, //for cblobs - C_OUTLINE_LIST *right_coutlines) { - C_BLOB *real_cblob; //cblob to chop +void split_to_blob( // split the blob + BLOBNBOX* blob, // blob to split + int16_t chop_coord, // place to chop + float pitch_error, // allowed deviation + C_OUTLINE_LIST* left_coutlines, // for cblobs + C_OUTLINE_LIST* right_coutlines) { + C_BLOB* real_cblob; // cblob to chop if (blob != nullptr) { real_cblob = blob->cblob(); @@ -254,10 +240,7 @@ void split_to_blob( //split the blob real_cblob = nullptr; } if (!right_coutlines->empty() || real_cblob != nullptr) - fixed_chop_cblob(real_cblob, - chop_coord, - pitch_error, - left_coutlines, + fixed_chop_cblob(real_cblob, chop_coord, pitch_error, left_coutlines, right_coutlines); delete blob; @@ -270,45 +253,40 @@ void split_to_blob( //split the blob * produce a list of outlines left of the chop point and more to the right. **********************************************************************/ -void fixed_chop_cblob( //split the blob - C_BLOB *blob, //blob to split - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_LIST *left_outlines, //left half of chop - C_OUTLINE_LIST *right_outlines //right half of chop - ) { - C_OUTLINE *old_right; //already there - C_OUTLINE_LIST new_outlines; //new right ones - //ouput iterator +void fixed_chop_cblob( // split the blob + C_BLOB* blob, // blob to split + int16_t chop_coord, // place to chop + float pitch_error, // allowed deviation + C_OUTLINE_LIST* left_outlines, // left half of chop + C_OUTLINE_LIST* right_outlines // right half of chop +) { + C_OUTLINE* old_right; // already there + C_OUTLINE_LIST new_outlines; // new right ones + // ouput iterator C_OUTLINE_IT left_it = left_outlines; - //in/out iterator + // in/out iterator C_OUTLINE_IT right_it = right_outlines; C_OUTLINE_IT new_it = &new_outlines; - C_OUTLINE_IT blob_it; //outlines in blob - - if (!right_it.empty ()) { - while (!right_it.empty ()) { - old_right = right_it.extract (); - right_it.forward (); - fixed_split_coutline(old_right, - chop_coord, - pitch_error, - &left_it, + C_OUTLINE_IT blob_it; // outlines in blob + + if (!right_it.empty()) { + while (!right_it.empty()) { + old_right = right_it.extract(); + right_it.forward(); + fixed_split_coutline(old_right, chop_coord, pitch_error, &left_it, &new_it); } - right_it.add_list_before (&new_outlines); + right_it.add_list_before(&new_outlines); } if (blob != nullptr) { - blob_it.set_to_list (blob->out_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) - fixed_split_coutline (blob_it.extract (), chop_coord, pitch_error, - &left_it, &right_it); + blob_it.set_to_list(blob->out_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) + fixed_split_coutline(blob_it.extract(), chop_coord, pitch_error, &left_it, + &right_it); delete blob; } } - /********************************************************************** * fixed_split_outline * @@ -316,40 +294,41 @@ void fixed_chop_cblob( //split the blob * fall either side of the chop line into the appropriate list. **********************************************************************/ -void fixed_split_coutline( //chop the outline - C_OUTLINE *srcline, //source outline - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_IT *left_it, //left half of chop - C_OUTLINE_IT *right_it //right half of chop - ) { - C_OUTLINE *child; //child outline - TBOX srcbox; //box of outline - C_OUTLINE_LIST left_ch; //left children - C_OUTLINE_LIST right_ch; //right children - C_OUTLINE_FRAG_LIST left_frags;//chopped fragments - C_OUTLINE_FRAG_LIST right_frags;; +void fixed_split_coutline( // chop the outline + C_OUTLINE* srcline, // source outline + int16_t chop_coord, // place to chop + float pitch_error, // allowed deviation + C_OUTLINE_IT* left_it, // left half of chop + C_OUTLINE_IT* right_it // right half of chop +) { + C_OUTLINE* child; // child outline + TBOX srcbox; // box of outline + C_OUTLINE_LIST left_ch; // left children + C_OUTLINE_LIST right_ch; // right children + C_OUTLINE_FRAG_LIST left_frags; // chopped fragments + C_OUTLINE_FRAG_LIST right_frags; + ; C_OUTLINE_IT left_ch_it = &left_ch; - //for whole children + // for whole children C_OUTLINE_IT right_ch_it = &right_ch; - //for holes - C_OUTLINE_IT child_it = srcline->child (); + // for holes + C_OUTLINE_IT child_it = srcline->child(); srcbox = srcline->bounding_box(); - if (srcbox.left() + srcbox.right() <= chop_coord * 2 - && srcbox.right() < chop_coord + pitch_error) { + if (srcbox.left() + srcbox.right() <= chop_coord * 2 && + srcbox.right() < chop_coord + pitch_error) { // Whole outline is in the left side or not far over the chop_coord, // so put the whole thing on the left. left_it->add_after_then_move(srcline); - } else if (srcbox.left() + srcbox.right() > chop_coord * 2 - && srcbox.left () > chop_coord - pitch_error) { + } else if (srcbox.left() + srcbox.right() > chop_coord * 2 && + srcbox.left() > chop_coord - pitch_error) { // Whole outline is in the right side or not far over the chop_coord, // so put the whole thing on the right. - right_it->add_before_stay_put(srcline); + right_it->add_before_stay_put(srcline); } else { // Needs real chopping. - if (fixed_chop_coutline(srcline, chop_coord, pitch_error, - &left_frags, &right_frags)) { + if (fixed_chop_coutline(srcline, chop_coord, pitch_error, &left_frags, + &right_frags)) { for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { child = child_it.extract(); @@ -359,12 +338,12 @@ void fixed_split_coutline( //chop the outline left_ch_it.add_after_then_move(child); } else if (srcbox.left() > chop_coord) { // Whole child is on the right. - right_ch_it.add_after_then_move (child); + right_ch_it.add_after_then_move(child); } else { // No pitch_error is allowed when chopping children to prevent // impossible outlines from being created. - if (fixed_chop_coutline(child, chop_coord, 0.0f, - &left_frags, &right_frags)) { + if (fixed_chop_coutline(child, chop_coord, 0.0f, &left_frags, + &right_frags)) { delete child; } else { if (srcbox.left() + srcbox.right() <= chop_coord * 2) @@ -378,7 +357,7 @@ void fixed_split_coutline( //chop the outline close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it); ASSERT_HOST(left_ch.empty() && right_ch.empty()); // No children left. - delete srcline; // Smashed up. + delete srcline; // Smashed up. } else { // Chop failed. Just use middle coord. if (srcbox.left() + srcbox.right() <= chop_coord * 2) @@ -389,7 +368,6 @@ void fixed_split_coutline( //chop the outline } } - /********************************************************************** * fixed_chop_coutline * @@ -398,41 +376,40 @@ void fixed_split_coutline( //chop the outline * If the coutline lies too heavily to one side to chop, FALSE is returned. **********************************************************************/ -bool fixed_chop_coutline( //chop the outline - C_OUTLINE* srcline, //source outline - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_FRAG_LIST* left_frags, //left half of chop - C_OUTLINE_FRAG_LIST* right_frags //right half of chop +bool fixed_chop_coutline( // chop the outline + C_OUTLINE* srcline, // source outline + int16_t chop_coord, // place to chop + float pitch_error, // allowed deviation + C_OUTLINE_FRAG_LIST* left_frags, // left half of chop + C_OUTLINE_FRAG_LIST* right_frags // right half of chop ) { - bool first_frag; //fragment - int16_t left_edge; //of outline - int16_t startindex; //in first fragment - int32_t length; //of outline - int16_t stepindex; //into outline - int16_t head_index; //start of fragment - ICOORD head_pos; //start of fragment - int16_t tail_index; //end of fragment - ICOORD tail_pos; //end of fragment - ICOORD pos; //current point - int16_t first_index = 0; //first tail - ICOORD first_pos; //first tail - - length = srcline->pathlength (); - pos = srcline->start_pos (); - left_edge = pos.x (); + bool first_frag; // fragment + int16_t left_edge; // of outline + int16_t startindex; // in first fragment + int32_t length; // of outline + int16_t stepindex; // into outline + int16_t head_index; // start of fragment + ICOORD head_pos; // start of fragment + int16_t tail_index; // end of fragment + ICOORD tail_pos; // end of fragment + ICOORD pos; // current point + int16_t first_index = 0; // first tail + ICOORD first_pos; // first tail + + length = srcline->pathlength(); + pos = srcline->start_pos(); + left_edge = pos.x(); tail_index = 0; tail_pos = pos; for (stepindex = 0; stepindex < length; stepindex++) { - if (pos.x () < left_edge) { - left_edge = pos.x (); + if (pos.x() < left_edge) { + left_edge = pos.x(); tail_index = stepindex; tail_pos = pos; } - pos += srcline->step (stepindex); + pos += srcline->step(stepindex); } - if (left_edge >= chop_coord - pitch_error) - return false; //not worth it + if (left_edge >= chop_coord - pitch_error) return false; // not worth it startindex = tail_index; first_frag = true; @@ -440,77 +417,57 @@ bool fixed_chop_coutline( //chop the outline head_pos = tail_pos; do { do { - tail_pos += srcline->step (tail_index); + tail_pos += srcline->step(tail_index); tail_index++; - if (tail_index == length) - tail_index = 0; - } - while (tail_pos.x () != chop_coord && tail_index != startindex); + if (tail_index == length) tail_index = 0; + } while (tail_pos.x() != chop_coord && tail_index != startindex); if (tail_index == startindex) { if (first_frag) - return false; //doesn't cross line + return false; // doesn't cross line else break; } //#ifdef __UNIX__ - ASSERT_HOST (head_index != tail_index); + ASSERT_HOST(head_index != tail_index); //#endif if (!first_frag) { - save_chop_cfragment(head_index, - head_pos, - tail_index, - tail_pos, - srcline, + save_chop_cfragment(head_index, head_pos, tail_index, tail_pos, srcline, left_frags); - } - else { + } else { first_index = tail_index; first_pos = tail_pos; first_frag = false; } - while (srcline->step (tail_index).x () == 0) { - tail_pos += srcline->step (tail_index); + while (srcline->step(tail_index).x() == 0) { + tail_pos += srcline->step(tail_index); tail_index++; - if (tail_index == length) - tail_index = 0; + if (tail_index == length) tail_index = 0; } head_index = tail_index; head_pos = tail_pos; - while (srcline->step (tail_index).x () > 0) { + while (srcline->step(tail_index).x() > 0) { do { - tail_pos += srcline->step (tail_index); + tail_pos += srcline->step(tail_index); tail_index++; - if (tail_index == length) - tail_index = 0; - } - while (tail_pos.x () != chop_coord); + if (tail_index == length) tail_index = 0; + } while (tail_pos.x() != chop_coord); //#ifdef __UNIX__ - ASSERT_HOST (head_index != tail_index); + ASSERT_HOST(head_index != tail_index); //#endif - save_chop_cfragment(head_index, - head_pos, - tail_index, - tail_pos, - srcline, + save_chop_cfragment(head_index, head_pos, tail_index, tail_pos, srcline, right_frags); - while (srcline->step (tail_index).x () == 0) { - tail_pos += srcline->step (tail_index); + while (srcline->step(tail_index).x() == 0) { + tail_pos += srcline->step(tail_index); tail_index++; - if (tail_index == length) - tail_index = 0; + if (tail_index == length) tail_index = 0; } head_index = tail_index; head_pos = tail_pos; } - } - while (tail_index != startindex); - save_chop_cfragment(head_index, - head_pos, - first_index, - first_pos, - srcline, + } while (tail_index != startindex); + save_chop_cfragment(head_index, head_pos, first_index, first_pos, srcline, left_frags); - return true; //did some chopping + return true; // did some chopping } /********************************************************************** @@ -519,69 +476,61 @@ bool fixed_chop_coutline( //chop the outline * Store the given fragment in the given fragment list. **********************************************************************/ -void save_chop_cfragment( //chop the outline - int16_t head_index, //head of fragment - ICOORD head_pos, //head of fragment - int16_t tail_index, //tail of fragment - ICOORD tail_pos, //tail of fragment - C_OUTLINE *srcline, //source of edgesteps - C_OUTLINE_FRAG_LIST *frags //fragment list - ) { - int16_t jump; //gap across end - int16_t stepcount; //total steps - C_OUTLINE_FRAG *head; //head of fragment - C_OUTLINE_FRAG *tail; //tail of fragment - int16_t tail_y; //ycoord of tail - - ASSERT_HOST (tail_pos.x () == head_pos.x ()); - ASSERT_HOST (tail_index != head_index); +void save_chop_cfragment( // chop the outline + int16_t head_index, // head of fragment + ICOORD head_pos, // head of fragment + int16_t tail_index, // tail of fragment + ICOORD tail_pos, // tail of fragment + C_OUTLINE* srcline, // source of edgesteps + C_OUTLINE_FRAG_LIST* frags // fragment list +) { + int16_t jump; // gap across end + int16_t stepcount; // total steps + C_OUTLINE_FRAG* head; // head of fragment + C_OUTLINE_FRAG* tail; // tail of fragment + int16_t tail_y; // ycoord of tail + + ASSERT_HOST(tail_pos.x() == head_pos.x()); + ASSERT_HOST(tail_index != head_index); stepcount = tail_index - head_index; - if (stepcount < 0) - stepcount += srcline->pathlength (); - jump = tail_pos.y () - head_pos.y (); - if (jump < 0) - jump = -jump; - if (jump == stepcount) - return; //its a nop - tail_y = tail_pos.y (); - head = new C_OUTLINE_FRAG (head_pos, tail_pos, srcline, - head_index, tail_index); - tail = new C_OUTLINE_FRAG (head, tail_y); + if (stepcount < 0) stepcount += srcline->pathlength(); + jump = tail_pos.y() - head_pos.y(); + if (jump < 0) jump = -jump; + if (jump == stepcount) return; // its a nop + tail_y = tail_pos.y(); + head = + new C_OUTLINE_FRAG(head_pos, tail_pos, srcline, head_index, tail_index); + tail = new C_OUTLINE_FRAG(head, tail_y); head->other_end = tail; add_frag_to_list(head, frags); add_frag_to_list(tail, frags); } - /********************************************************************** * C_OUTLINE_FRAG::C_OUTLINE_FRAG * * Constructors for C_OUTLINE_FRAG. **********************************************************************/ -C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment - ICOORD start_pt, //start coord - ICOORD end_pt, //end coord - C_OUTLINE *outline, //source of steps - int16_t start_index, - int16_t end_index) { +C_OUTLINE_FRAG::C_OUTLINE_FRAG( // record fragment + ICOORD start_pt, // start coord + ICOORD end_pt, // end coord + C_OUTLINE* outline, // source of steps + int16_t start_index, int16_t end_index) { start = start_pt; end = end_pt; - ycoord = start_pt.y (); + ycoord = start_pt.y(); stepcount = end_index - start_index; - if (stepcount < 0) - stepcount += outline->pathlength (); - ASSERT_HOST (stepcount > 0); + if (stepcount < 0) stepcount += outline->pathlength(); + ASSERT_HOST(stepcount > 0); steps = new DIR128[stepcount]; if (end_index > start_index) { for (int i = start_index; i < end_index; ++i) steps[i - start_index] = outline->step_dir(i); - } - else { + } else { int len = outline->pathlength(); int i = start_index; - for (; i < len; ++i) - steps[i - start_index] = outline->step_dir(i); + for (; i < len; ++i) steps[i - start_index] = outline->step_dir(i); if (end_index > 0) for (; i < end_index + len; ++i) steps[i - start_index] = outline->step_dir(i - len); @@ -590,10 +539,9 @@ C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment delete close(); } - -C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment - C_OUTLINE_FRAG *head, //other end - int16_t tail_y) { +C_OUTLINE_FRAG::C_OUTLINE_FRAG( // record fragment + C_OUTLINE_FRAG* head, // other end + int16_t tail_y) { ycoord = tail_y; other_end = head; start = head->start; @@ -602,7 +550,6 @@ C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment stepcount = 0; } - /********************************************************************** * add_frag_to_list * @@ -610,28 +557,26 @@ C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment * them in ascending ycoord order. **********************************************************************/ -void add_frag_to_list( //ordered add - C_OUTLINE_FRAG *frag, //fragment to add - C_OUTLINE_FRAG_LIST *frags //fragment list - ) { - //output list +void add_frag_to_list( // ordered add + C_OUTLINE_FRAG* frag, // fragment to add + C_OUTLINE_FRAG_LIST* frags // fragment list +) { + // output list C_OUTLINE_FRAG_IT frag_it = frags; - if (!frags->empty ()) { - for (frag_it.mark_cycle_pt (); !frag_it.cycled_list (); - frag_it.forward ()) { - if (frag_it.data ()->ycoord > frag->ycoord - || (frag_it.data ()->ycoord == frag->ycoord - && frag->other_end->ycoord < frag->ycoord)) { - frag_it.add_before_then_move (frag); + if (!frags->empty()) { + for (frag_it.mark_cycle_pt(); !frag_it.cycled_list(); frag_it.forward()) { + if (frag_it.data()->ycoord > frag->ycoord || + (frag_it.data()->ycoord == frag->ycoord && + frag->other_end->ycoord < frag->ycoord)) { + frag_it.add_before_then_move(frag); return; } } } - frag_it.add_to_end (frag); + frag_it.add_to_end(frag); } - /********************************************************************** * close_chopped_cfragments * @@ -639,29 +584,29 @@ void add_frag_to_list( //ordered add * Each outline made soaks up any of the child outlines which it encloses. **********************************************************************/ -void close_chopped_cfragments( //chop the outline - C_OUTLINE_FRAG_LIST *frags, //list to clear - C_OUTLINE_LIST *children, //potential children - float pitch_error, //allowed shrinkage - C_OUTLINE_IT *dest_it //output list - ) { - //iterator +void close_chopped_cfragments( // chop the outline + C_OUTLINE_FRAG_LIST* frags, // list to clear + C_OUTLINE_LIST* children, // potential children + float pitch_error, // allowed shrinkage + C_OUTLINE_IT* dest_it // output list +) { + // iterator C_OUTLINE_FRAG_IT frag_it = frags; - C_OUTLINE_FRAG *bottom_frag; //bottom of cut - C_OUTLINE_FRAG *top_frag; //top of cut - C_OUTLINE *outline; //new outline - C_OUTLINE *child; //current child + C_OUTLINE_FRAG* bottom_frag; // bottom of cut + C_OUTLINE_FRAG* top_frag; // top of cut + C_OUTLINE* outline; // new outline + C_OUTLINE* child; // current child C_OUTLINE_IT child_it = children; - C_OUTLINE_IT olchild_it; //children of outline + C_OUTLINE_IT olchild_it; // children of outline while (!frag_it.empty()) { frag_it.move_to_first(); - // get bottom one + // get bottom one bottom_frag = frag_it.extract(); frag_it.forward(); top_frag = frag_it.data(); // look at next - if ((bottom_frag->steps == 0 && top_frag->steps == 0) - || (bottom_frag->steps != 0 && top_frag->steps != 0)) { + if ((bottom_frag->steps == 0 && top_frag->steps == 0) || + (bottom_frag->steps != 0 && top_frag->steps != 0)) { if (frag_it.data_relative(1)->ycoord == top_frag->ycoord) frag_it.forward(); } @@ -676,23 +621,21 @@ void close_chopped_cfragments( //chop the outline for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { child = child_it.data(); - if (*child < *outline) - olchild_it.add_to_end(child_it.extract()); + if (*child < *outline) olchild_it.add_to_end(child_it.extract()); } if (outline->bounding_box().width() > pitch_error) dest_it->add_after_then_move(outline); else - delete outline; // Make it disappear. + delete outline; // Make it disappear. } } } - while (!child_it.empty ()) { - dest_it->add_after_then_move (child_it.extract ()); - child_it.forward (); + while (!child_it.empty()) { + dest_it->add_after_then_move(child_it.extract()); + child_it.forward(); } } - /********************************************************************** * join_chopped_fragments * @@ -700,28 +643,27 @@ void close_chopped_cfragments( //chop the outline * operand keeps responsibility for the fragment. **********************************************************************/ -C_OUTLINE *join_chopped_fragments( //join pieces - C_OUTLINE_FRAG *bottom, //bottom of cut - C_OUTLINE_FRAG *top //top of cut - ) { - C_OUTLINE *outline; //closed loop +C_OUTLINE* join_chopped_fragments( // join pieces + C_OUTLINE_FRAG* bottom, // bottom of cut + C_OUTLINE_FRAG* top // top of cut +) { + C_OUTLINE* outline; // closed loop if (bottom->other_end == top) { if (bottom->steps == 0) - outline = top->close (); //turn to outline + outline = top->close(); // turn to outline else - outline = bottom->close (); + outline = bottom->close(); delete top; delete bottom; return outline; } if (bottom->steps == 0) { - ASSERT_HOST (top->steps != 0); - join_segments (bottom->other_end, top); - } - else { - ASSERT_HOST (top->steps == 0); - join_segments (top->other_end, bottom); + ASSERT_HOST(top->steps != 0); + join_segments(bottom->other_end, top); + } else { + ASSERT_HOST(top->steps == 0); + join_segments(top->other_end, bottom); } top->other_end->other_end = bottom->other_end; bottom->other_end->other_end = top->other_end; @@ -737,57 +679,53 @@ C_OUTLINE *join_chopped_fragments( //join pieces * the first and the gap between them is closed. **********************************************************************/ -void join_segments( //join pieces - C_OUTLINE_FRAG *bottom, //bottom of cut - C_OUTLINE_FRAG *top //top of cut - ) { - DIR128 *steps; //new steps - int32_t stepcount; //no of steps - int16_t fake_count; //fake steps - DIR128 fake_step; //step entry - - ASSERT_HOST (bottom->end.x () == top->start.x ()); - fake_count = top->start.y () - bottom->end.y (); +void join_segments( // join pieces + C_OUTLINE_FRAG* bottom, // bottom of cut + C_OUTLINE_FRAG* top // top of cut +) { + DIR128* steps; // new steps + int32_t stepcount; // no of steps + int16_t fake_count; // fake steps + DIR128 fake_step; // step entry + + ASSERT_HOST(bottom->end.x() == top->start.x()); + fake_count = top->start.y() - bottom->end.y(); if (fake_count < 0) { fake_count = -fake_count; fake_step = 32; - } - else + } else fake_step = 96; stepcount = bottom->stepcount + fake_count + top->stepcount; steps = new DIR128[stepcount]; - memmove (steps, bottom->steps, bottom->stepcount); - memset (steps + bottom->stepcount, fake_step.get_dir(), fake_count); - memmove (steps + bottom->stepcount + fake_count, top->steps, - top->stepcount); - delete [] bottom->steps; + memmove(steps, bottom->steps, bottom->stepcount); + memset(steps + bottom->stepcount, fake_step.get_dir(), fake_count); + memmove(steps + bottom->stepcount + fake_count, top->steps, top->stepcount); + delete[] bottom->steps; bottom->steps = steps; bottom->stepcount = stepcount; bottom->end = top->end; bottom->other_end->end = top->end; } - /********************************************************************** * C_OUTLINE_FRAG::close * * Join the ends of this fragment and turn it into an outline. **********************************************************************/ -C_OUTLINE *C_OUTLINE_FRAG::close() { //join pieces - DIR128 *new_steps; //new steps - int32_t new_stepcount; //no of steps - int16_t fake_count; //fake steps - DIR128 fake_step; //step entry +C_OUTLINE* C_OUTLINE_FRAG::close() { // join pieces + DIR128* new_steps; // new steps + int32_t new_stepcount; // no of steps + int16_t fake_count; // fake steps + DIR128 fake_step; // step entry - ASSERT_HOST (start.x () == end.x ()); - fake_count = start.y () - end.y (); + ASSERT_HOST(start.x() == end.x()); + fake_count = start.y() - end.y(); if (fake_count < 0) { fake_count = -fake_count; fake_step = 32; - } - else + } else fake_step = 96; new_stepcount = stepcount + fake_count; @@ -795,28 +733,27 @@ C_OUTLINE *C_OUTLINE_FRAG::close() { //join pieces return nullptr; // Can't join them new_steps = new DIR128[new_stepcount]; memmove(new_steps, steps, stepcount); - memset (new_steps + stepcount, fake_step.get_dir(), fake_count); - C_OUTLINE* result = new C_OUTLINE (start, new_steps, new_stepcount); - delete [] new_steps; + memset(new_steps + stepcount, fake_step.get_dir(), fake_count); + C_OUTLINE* result = new C_OUTLINE(start, new_steps, new_stepcount); + delete[] new_steps; return result; } - /********************************************************************** * C_OUTLINE_FRAG::operator= * * Copy this fragment. **********************************************************************/ - //join pieces -C_OUTLINE_FRAG & C_OUTLINE_FRAG::operator= ( -const C_OUTLINE_FRAG & src //fragment to copy +// join pieces +C_OUTLINE_FRAG& C_OUTLINE_FRAG::operator=( + const C_OUTLINE_FRAG& src // fragment to copy ) { - delete [] steps; + delete[] steps; stepcount = src.stepcount; steps = new DIR128[stepcount]; - memmove (steps, src.steps, stepcount); + memmove(steps, src.steps, stepcount); start = src.start; end = src.end; ycoord = src.ycoord; diff --git a/src/textord/fpchop.h b/src/textord/fpchop.h index 6f7c488552..0d385bbeb4 100644 --- a/src/textord/fpchop.h +++ b/src/textord/fpchop.h @@ -17,115 +17,108 @@ * **********************************************************************/ -#ifndef FPCHOP_H -#define FPCHOP_H +#ifndef FPCHOP_H +#define FPCHOP_H -#include "params.h" -#include "blobbox.h" +#include "blobbox.h" +#include "params.h" -class C_OUTLINE_FRAG:public ELIST_LINK -{ - public: - C_OUTLINE_FRAG() { //empty constructor - steps = nullptr; - stepcount = 0; - } - ~C_OUTLINE_FRAG () { - delete [] steps; - } - //start coord - C_OUTLINE_FRAG(ICOORD start_pt, - ICOORD end_pt, //end coord - C_OUTLINE *outline, //source of steps - int16_t start_index, - int16_t end_index); - //other end - C_OUTLINE_FRAG(C_OUTLINE_FRAG *head, int16_t tail_y); - C_OUTLINE *close(); //copy to outline - C_OUTLINE_FRAG & operator= ( //assign - const C_OUTLINE_FRAG & src); +class C_OUTLINE_FRAG : public ELIST_LINK { + public: + C_OUTLINE_FRAG() { // empty constructor + steps = nullptr; + stepcount = 0; + } + ~C_OUTLINE_FRAG() { delete[] steps; } + // start coord + C_OUTLINE_FRAG(ICOORD start_pt, + ICOORD end_pt, // end coord + C_OUTLINE* outline, // source of steps + int16_t start_index, int16_t end_index); + // other end + C_OUTLINE_FRAG(C_OUTLINE_FRAG* head, int16_t tail_y); + C_OUTLINE* close(); // copy to outline + C_OUTLINE_FRAG& operator=( // assign + const C_OUTLINE_FRAG& src); - ICOORD start; //start coord - ICOORD end; //end coord - DIR128 *steps; //step array - int32_t stepcount; //no of steps - C_OUTLINE_FRAG *other_end; //head if a tail - int16_t ycoord; //coord of cut pt + ICOORD start; // start coord + ICOORD end; // end coord + DIR128* steps; // step array + int32_t stepcount; // no of steps + C_OUTLINE_FRAG* other_end; // head if a tail + int16_t ycoord; // coord of cut pt - private: + private: }; ELISTIZEH(C_OUTLINE_FRAG) -extern -INT_VAR_H (textord_fp_chop_error, 2, -"Max allowed bending of chop cells"); -extern -double_VAR_H (textord_fp_chop_snap, 0.5, -"Max distance of chop pt from vertex"); -ROW *fixed_pitch_words( //find lines - TO_ROW *row, //row to do - FCOORD rotation //for drawing - ); -WERD *add_repeated_word( //move repeated word - WERD_IT *rep_it, //repeated words - int16_t &rep_left, //left edge of word - int16_t &prev_chop_coord, //previous word end - uint8_t &blanks, //no of blanks - float pitch, //char cell size - WERD_IT *word_it //list of words - ); -void split_to_blob( //split the blob - BLOBNBOX *blob, //blob to split - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_LIST *left_coutlines, //for cblobs - C_OUTLINE_LIST *right_coutlines); -void fixed_chop_cblob( //split the blob - C_BLOB *blob, //blob to split - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_LIST *left_outlines, //left half of chop - C_OUTLINE_LIST *right_outlines //right half of chop - ); -void fixed_split_coutline( //chop the outline - C_OUTLINE *srcline, //source outline - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_IT *left_it, //left half of chop - C_OUTLINE_IT *right_it //right half of chop - ); -bool fixed_chop_coutline( //chop the outline - C_OUTLINE* srcline, //source outline - int16_t chop_coord, //place to chop - float pitch_error, //allowed deviation - C_OUTLINE_FRAG_LIST* left_frags, //left half of chop - C_OUTLINE_FRAG_LIST* right_frags //right half of chop +extern INT_VAR_H(textord_fp_chop_error, 2, "Max allowed bending of chop cells"); +extern double_VAR_H(textord_fp_chop_snap, 0.5, + "Max distance of chop pt from vertex"); +ROW* fixed_pitch_words( // find lines + TO_ROW* row, // row to do + FCOORD rotation // for drawing +); +WERD* add_repeated_word( // move repeated word + WERD_IT* rep_it, // repeated words + int16_t& rep_left, // left edge of word + int16_t& prev_chop_coord, // previous word end + uint8_t& blanks, // no of blanks + float pitch, // char cell size + WERD_IT* word_it // list of words +); +void split_to_blob( // split the blob + BLOBNBOX* blob, // blob to split + int16_t chop_coord, // place to chop + float pitch_error, // allowed deviation + C_OUTLINE_LIST* left_coutlines, // for cblobs + C_OUTLINE_LIST* right_coutlines); +void fixed_chop_cblob( // split the blob + C_BLOB* blob, // blob to split + int16_t chop_coord, // place to chop + float pitch_error, // allowed deviation + C_OUTLINE_LIST* left_outlines, // left half of chop + C_OUTLINE_LIST* right_outlines // right half of chop +); +void fixed_split_coutline( // chop the outline + C_OUTLINE* srcline, // source outline + int16_t chop_coord, // place to chop + float pitch_error, // allowed deviation + C_OUTLINE_IT* left_it, // left half of chop + C_OUTLINE_IT* right_it // right half of chop +); +bool fixed_chop_coutline( // chop the outline + C_OUTLINE* srcline, // source outline + int16_t chop_coord, // place to chop + float pitch_error, // allowed deviation + C_OUTLINE_FRAG_LIST* left_frags, // left half of chop + C_OUTLINE_FRAG_LIST* right_frags // right half of chop +); +void save_chop_cfragment( // chop the outline + int16_t head_index, // head of fragment + ICOORD head_pos, // head of fragment + int16_t tail_index, // tail of fragment + ICOORD tail_pos, // tail of fragment + C_OUTLINE* srcline, // source of edgesteps + C_OUTLINE_FRAG_LIST* frags // fragment list +); +void add_frag_to_list( // ordered add + C_OUTLINE_FRAG* frag, // fragment to add + C_OUTLINE_FRAG_LIST* frags // fragment list +); +void close_chopped_cfragments( // chop the outline + C_OUTLINE_FRAG_LIST* frags, // list to clear + C_OUTLINE_LIST* children, // potential children + float pitch_error, // allowed shrinkage + C_OUTLINE_IT* dest_it // output list +); +C_OUTLINE* join_chopped_fragments( // join pieces + C_OUTLINE_FRAG* bottom, // bottom of cut + C_OUTLINE_FRAG* top // top of cut +); +void join_segments( // join pieces + C_OUTLINE_FRAG* bottom, // bottom of cut + C_OUTLINE_FRAG* top // top of cut ); -void save_chop_cfragment( //chop the outline - int16_t head_index, //head of fragment - ICOORD head_pos, //head of fragment - int16_t tail_index, //tail of fragment - ICOORD tail_pos, //tail of fragment - C_OUTLINE *srcline, //source of edgesteps - C_OUTLINE_FRAG_LIST *frags //fragment list - ); -void add_frag_to_list( //ordered add - C_OUTLINE_FRAG *frag, //fragment to add - C_OUTLINE_FRAG_LIST *frags //fragment list - ); -void close_chopped_cfragments( //chop the outline - C_OUTLINE_FRAG_LIST *frags, //list to clear - C_OUTLINE_LIST *children, //potential children - float pitch_error, //allowed shrinkage - C_OUTLINE_IT *dest_it //output list - ); -C_OUTLINE *join_chopped_fragments( //join pieces - C_OUTLINE_FRAG *bottom, //bottom of cut - C_OUTLINE_FRAG *top //top of cut - ); -void join_segments( //join pieces - C_OUTLINE_FRAG *bottom, //bottom of cut - C_OUTLINE_FRAG *top //top of cut - ); #endif diff --git a/src/textord/gap_map.cpp b/src/textord/gap_map.cpp index c4ba63d094..1e1e83d714 100644 --- a/src/textord/gap_map.cpp +++ b/src/textord/gap_map.cpp @@ -7,16 +7,16 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "statistc.h" -#include "gap_map.h" +#include "gap_map.h" +#include "statistc.h" #define EXTERN -EXTERN BOOL_VAR (gapmap_debug, FALSE, "Say which blocks have tables"); -EXTERN BOOL_VAR (gapmap_use_ends, FALSE, -"Use large space at start and end of rows"); -EXTERN BOOL_VAR (gapmap_no_isolated_quanta, FALSE, -"Ensure gaps not less than 2quanta wide"); -EXTERN double_VAR (gapmap_big_gaps, 1.75, "xht multiplier"); +EXTERN BOOL_VAR(gapmap_debug, FALSE, "Say which blocks have tables"); +EXTERN BOOL_VAR(gapmap_use_ends, FALSE, + "Use large space at start and end of rows"); +EXTERN BOOL_VAR(gapmap_no_isolated_quanta, FALSE, + "Ensure gaps not less than 2quanta wide"); +EXTERN double_VAR(gapmap_big_gaps, 1.75, "xht multiplier"); /************************************************************************* * A block gap map is a quantised histogram of whitespace regions in the @@ -32,17 +32,17 @@ EXTERN double_VAR (gapmap_big_gaps, 1.75, "xht multiplier"); * *************************************************************************/ -GAPMAP::GAPMAP( //Constructor - TO_BLOCK *block //block - ) { - TO_ROW *row; //current row - BLOBNBOX_IT blob_it; //iterator +GAPMAP::GAPMAP( // Constructor + TO_BLOCK* block // block +) { + TO_ROW* row; // current row + BLOBNBOX_IT blob_it; // iterator TBOX blob_box; TBOX prev_blob_box; int16_t gap_width; int16_t start_of_row; int16_t end_of_row; - STATS xht_stats (0, 128); + STATS xht_stats(0, 128); int16_t min_quantum; int16_t max_quantum; int16_t i; @@ -58,18 +58,16 @@ GAPMAP::GAPMAP( //Constructor // row iterator TO_ROW_IT row_it(block->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row->blob_list ()->empty ()) { + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + if (!row->blob_list()->empty()) { total_rows++; - xht_stats.add ((int16_t) floor (row->xheight + 0.5), 1); - blob_it.set_to_list (row->blob_list ()); - start_of_row = blob_it.data ()->bounding_box ().left (); - end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); - if (min_left > start_of_row) - min_left = start_of_row; - if (max_right < end_of_row) - max_right = end_of_row; + xht_stats.add((int16_t)floor(row->xheight + 0.5), 1); + blob_it.set_to_list(row->blob_list()); + start_of_row = blob_it.data()->bounding_box().left(); + end_of_row = blob_it.data_relative(-1)->bounding_box().right(); + if (min_left > start_of_row) min_left = start_of_row; + if (max_right < end_of_row) max_right = end_of_row; } } if ((total_rows < 3) || (min_left >= max_right)) { @@ -77,54 +75,45 @@ GAPMAP::GAPMAP( //Constructor min_left = max_right = 0; return; } - bucket_size = (int16_t) floor (xht_stats.median () + 0.5) / 2; + bucket_size = (int16_t)floor(xht_stats.median() + 0.5) / 2; map_max = (max_right - min_left) / bucket_size; - map = (int16_t *) alloc_mem ((map_max + 1) * sizeof (int16_t)); - for (i = 0; i <= map_max; i++) - map[i] = 0; + map = (int16_t*)alloc_mem((map_max + 1) * sizeof(int16_t)); + for (i = 0; i <= map_max; i++) map[i] = 0; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row->blob_list ()->empty ()) { - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - blob_box = box_next (&blob_it); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + if (!row->blob_list()->empty()) { + blob_it.set_to_list(row->blob_list()); + blob_it.mark_cycle_pt(); + blob_box = box_next(&blob_it); prev_blob_box = blob_box; if (gapmap_use_ends) { /* Leading space */ - gap_width = blob_box.left () - min_left; - if ((gap_width > gapmap_big_gaps * row->xheight) - && gap_width > 2) { - max_quantum = (blob_box.left () - min_left) / bucket_size; + gap_width = blob_box.left() - min_left; + if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) { + max_quantum = (blob_box.left() - min_left) / bucket_size; if (max_quantum > map_max) max_quantum = map_max; - for (i = 0; i <= max_quantum; i++) - map[i]++; + for (i = 0; i <= max_quantum; i++) map[i]++; } } - while (!blob_it.cycled_list ()) { - blob_box = box_next (&blob_it); - gap_width = blob_box.left () - prev_blob_box.right (); - if ((gap_width > gapmap_big_gaps * row->xheight) - && gap_width > 2) { - min_quantum = - (prev_blob_box.right () - min_left) / bucket_size; - max_quantum = (blob_box.left () - min_left) / bucket_size; + while (!blob_it.cycled_list()) { + blob_box = box_next(&blob_it); + gap_width = blob_box.left() - prev_blob_box.right(); + if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) { + min_quantum = (prev_blob_box.right() - min_left) / bucket_size; + max_quantum = (blob_box.left() - min_left) / bucket_size; if (max_quantum > map_max) max_quantum = map_max; - for (i = min_quantum; i <= max_quantum; i++) - map[i]++; + for (i = min_quantum; i <= max_quantum; i++) map[i]++; } prev_blob_box = blob_box; } if (gapmap_use_ends) { /* Trailing space */ - gap_width = max_right - prev_blob_box.right (); - if ((gap_width > gapmap_big_gaps * row->xheight) - && gap_width > 2) { - min_quantum = - (prev_blob_box.right () - min_left) / bucket_size; + gap_width = max_right - prev_blob_box.right(); + if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) { + min_quantum = (prev_blob_box.right() - min_left) / bucket_size; if (min_quantum < 0) min_quantum = 0; - for (i = min_quantum; i <= map_max; i++) - map[i]++; + for (i = min_quantum; i <= map_max; i++) map[i]++; } } } @@ -132,42 +121,34 @@ GAPMAP::GAPMAP( //Constructor for (i = 0; i <= map_max; i++) { if (map[i] > total_rows / 2) { if (gapmap_no_isolated_quanta && - (((i == 0) && - (map[i + 1] <= total_rows / 2)) || - ((i == map_max) && - (map[i - 1] <= total_rows / 2)) || - ((i > 0) && - (i < map_max) && - (map[i - 1] <= total_rows / 2) && - (map[i + 1] <= total_rows / 2)))) { - map[i] = 0; //prevent isolated quantum - } - else + (((i == 0) && (map[i + 1] <= total_rows / 2)) || + ((i == map_max) && (map[i - 1] <= total_rows / 2)) || + ((i > 0) && (i < map_max) && (map[i - 1] <= total_rows / 2) && + (map[i + 1] <= total_rows / 2)))) { + map[i] = 0; // prevent isolated quantum + } else any_tabs = true; } } - if (gapmap_debug && any_tabs) - tprintf ("Table found\n"); + if (gapmap_debug && any_tabs) tprintf("Table found\n"); } - /************************************************************************* * GAPMAP::table_gap() * Is there a bucket in the specified range where more than half the rows in the * block have a wide gap? *************************************************************************/ -bool GAPMAP::table_gap( //Is gap a table? - int16_t left, //From here - int16_t right //To here +bool GAPMAP::table_gap( // Is gap a table? + int16_t left, // From here + int16_t right // To here ) { int16_t min_quantum; int16_t max_quantum; int16_t i; bool tab_found = false; - if (!any_tabs) - return false; + if (!any_tabs) return false; min_quantum = (left - min_left) / bucket_size; max_quantum = (right - min_left) / bucket_size; @@ -177,7 +158,6 @@ bool GAPMAP::table_gap( //Is gap a table? if (min_quantum < 0) min_quantum = 0; if (max_quantum > map_max) max_quantum = map_max; for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++) - if (map[i] > total_rows / 2) - tab_found = true; + if (map[i] > total_rows / 2) tab_found = true; return tab_found; } diff --git a/src/textord/gap_map.h b/src/textord/gap_map.h index 4a04edcf03..0c9c623064 100644 --- a/src/textord/gap_map.h +++ b/src/textord/gap_map.h @@ -7,42 +7,40 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef GAP_MAP_H -#define GAP_MAP_H +#ifndef GAP_MAP_H +#define GAP_MAP_H -#include "blobbox.h" +#include "blobbox.h" -class GAPMAP -{ - public: - GAPMAP( //constructor - TO_BLOCK *block); +class GAPMAP { + public: + GAPMAP( // constructor + TO_BLOCK* block); - ~GAPMAP () { //destructor - if (map != nullptr) - free_mem(map); - } + ~GAPMAP() { // destructor + if (map != nullptr) free_mem(map); + } - bool table_gap( //Is gap a table? - int16_t left, //From here - int16_t right); //To here + bool table_gap( // Is gap a table? + int16_t left, // From here + int16_t right); // To here - private: - int16_t total_rows; //in block - int16_t min_left; //Left extreme - int16_t max_right; //Right extreme - int16_t bucket_size; // half an x ht - int16_t *map; //empty counts - int16_t map_max; //map[0..max_map] defind - bool any_tabs; + private: + int16_t total_rows; // in block + int16_t min_left; // Left extreme + int16_t max_right; // Right extreme + int16_t bucket_size; // half an x ht + int16_t* map; // empty counts + int16_t map_max; // map[0..max_map] defind + bool any_tabs; }; /*-----------------------------*/ -extern BOOL_VAR_H (gapmap_debug, FALSE, "Say which blocks have tables"); -extern BOOL_VAR_H (gapmap_use_ends, FALSE, -"Use large space at start and end of rows"); -extern BOOL_VAR_H (gapmap_no_isolated_quanta, FALSE, -"Ensure gaps not less than 2quanta wide"); -extern double_VAR_H (gapmap_big_gaps, 1.75, "xht multiplier"); +extern BOOL_VAR_H(gapmap_debug, FALSE, "Say which blocks have tables"); +extern BOOL_VAR_H(gapmap_use_ends, FALSE, + "Use large space at start and end of rows"); +extern BOOL_VAR_H(gapmap_no_isolated_quanta, FALSE, + "Ensure gaps not less than 2quanta wide"); +extern double_VAR_H(gapmap_big_gaps, 1.75, "xht multiplier"); #endif diff --git a/src/textord/imagefind.cpp b/src/textord/imagefind.cpp index c60f1b0e70..51fc3c8396 100644 --- a/src/textord/imagefind.cpp +++ b/src/textord/imagefind.cpp @@ -22,12 +22,12 @@ #include "config_auto.h" #endif -#include "imagefind.h" #include "colpartitiongrid.h" +#include "imagefind.h" #include "linlsq.h" #include "ndminx.h" -#include "statistc.h" #include "params.h" +#include "statistc.h" #include "allheaders.h" @@ -68,7 +68,7 @@ Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) { return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); // Reduce by factor 2. - Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); + Pix* pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixr, "CascadeReduced"); @@ -96,19 +96,18 @@ Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) { pixaDestroy(&pixadb); } pixDestroy(&pixr); - if (!ht_found && pixht2 != nullptr) - pixDestroy(&pixht2); + if (!ht_found && pixht2 != nullptr) pixDestroy(&pixht2); if (pixht2 == nullptr) return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); // Expand back up again. - Pix *pixht = pixExpandReplicate(pixht2, 2); + Pix* pixht = pixExpandReplicate(pixht2, 2); if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(pixht, "HalftoneReplicated"); pixDestroy(&pixht2); // Fill to capture pixels near the mask edges that were missed - Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8); + Pix* pixt = pixSeedfillBinary(nullptr, pixht, pix, 8); pixOr(pixht, pixht, pixt); pixDestroy(&pixt); @@ -173,8 +172,7 @@ void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, if (textord_tabfind_show_images && pixa_debug != nullptr) pixa_debug->AddPix(img_pix, "A component"); if (pixNearlyRectangular(img_pix, kMinRectangularFraction, - kMaxRectangularFraction, - kMaxRectangularGradient, + kMaxRectangularFraction, kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) { Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1); pixSetAll(simple_pix); @@ -185,8 +183,8 @@ void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, // Fix the box to match the new pix. l_int32 x, y, width, height; boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height); - Box* simple_box = boxCreate(x + x_start, y + y_start, - x_end - x_start, y_end - y_start); + Box* simple_box = + boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start); boxaReplaceBox(*boxa, i, simple_box); } pixDestroy(&img_pix); @@ -201,28 +199,25 @@ void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, // a row with pix_count > max_count then // true is returned, and *y_start = the first y with pix_count >= min_count. static bool HScanForEdge(uint32_t* data, int wpl, int x_start, int x_end, - int min_count, int mid_width, int max_count, - int y_end, int y_step, int* y_start) { + int min_count, int mid_width, int max_count, int y_end, + int y_step, int* y_start) { int mid_rows = 0; for (int y = *y_start; y != y_end; y += y_step) { - // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a subset. + // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a + // subset. int pix_count = 0; uint32_t* line = data + wpl * y; for (int x = x_start; x < x_end; ++x) { - if (GET_DATA_BIT(line, x)) - ++pix_count; + if (GET_DATA_BIT(line, x)) ++pix_count; } - if (mid_rows == 0 && pix_count < min_count) - continue; // In the min phase. + if (mid_rows == 0 && pix_count < min_count) continue; // In the min phase. if (mid_rows == 0) *y_start = y; // Save the y_start where we came out of the min phase. - if (pix_count > max_count) - return true; // Found the pattern. + if (pix_count > max_count) return true; // Found the pattern. ++mid_rows; - if (mid_rows > mid_width) - break; // Middle too big. + if (mid_rows > mid_width) break; // Middle too big. } - return false; // Never found max_count. + return false; // Never found max_count. } // Scans vertically on y=[y_start,y_end), starting with x=*x_start, @@ -233,27 +228,23 @@ static bool HScanForEdge(uint32_t* data, int wpl, int x_start, int x_end, // a column with pix_count > max_count then // true is returned, and *x_start = the first x with pix_count >= min_count. static bool VScanForEdge(uint32_t* data, int wpl, int y_start, int y_end, - int min_count, int mid_width, int max_count, - int x_end, int x_step, int* x_start) { + int min_count, int mid_width, int max_count, int x_end, + int x_step, int* x_start) { int mid_cols = 0; for (int x = *x_start; x != x_end; x += x_step) { int pix_count = 0; uint32_t* line = data + y_start * wpl; for (int y = y_start; y < y_end; ++y, line += wpl) { - if (GET_DATA_BIT(line, x)) - ++pix_count; + if (GET_DATA_BIT(line, x)) ++pix_count; } - if (mid_cols == 0 && pix_count < min_count) - continue; // In the min phase. + if (mid_cols == 0 && pix_count < min_count) continue; // In the min phase. if (mid_cols == 0) *x_start = x; // Save the place where we came out of the min phase. - if (pix_count > max_count) - return true; // found the pattern. + if (pix_count > max_count) return true; // found the pattern. ++mid_cols; - if (mid_cols > mid_width) - break; // Middle too big. + if (mid_cols > mid_width) break; // Middle too big. } - return false; // Never found max_count. + return false; // Never found max_count. } // Returns true if there is a rectangle in the source pix, such that all @@ -265,11 +256,10 @@ static bool VScanForEdge(uint32_t* data, int wpl, int y_start, int y_end, // On return, the rectangle is defined by x_start, y_start, x_end and y_end. // Note: the algorithm is iterative, allowing it to slice off pixels from // one edge, allowing it to then slice off more pixels from another edge. -bool ImageFind::pixNearlyRectangular(Pix* pix, - double min_fraction, double max_fraction, - double max_skew_gradient, - int* x_start, int* y_start, - int* x_end, int* y_end) { +bool ImageFind::pixNearlyRectangular(Pix* pix, double min_fraction, + double max_fraction, + double max_skew_gradient, int* x_start, + int* y_start, int* x_end, int* y_end) { ASSERT_HOST(pix != nullptr); *x_start = 0; *x_end = pixGetWidth(pix); @@ -291,13 +281,15 @@ bool ImageFind::pixNearlyRectangular(Pix* pix, int max_count = static_cast(width * max_fraction); int edge_width = static_cast(width * max_skew_gradient); if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, - max_count, *y_end, 1, y_start) && !top_done) { + max_count, *y_end, 1, y_start) && + !top_done) { top_done = true; any_cut = true; } --(*y_end); if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, - max_count, *y_start, -1, y_end) && !bottom_done) { + max_count, *y_start, -1, y_end) && + !bottom_done) { bottom_done = true; any_cut = true; } @@ -309,13 +301,15 @@ bool ImageFind::pixNearlyRectangular(Pix* pix, max_count = static_cast(height * max_fraction); edge_width = static_cast(height * max_skew_gradient); if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, - max_count, *x_end, 1, x_start) && !left_done) { + max_count, *x_end, 1, x_start) && + !left_done) { left_done = true; any_cut = true; } --(*x_end); if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, - max_count, *x_start, -1, x_end) && !right_done) { + max_count, *x_start, -1, x_end) && + !right_done) { right_done = true; any_cut = true; } @@ -333,8 +327,8 @@ bool ImageFind::pixNearlyRectangular(Pix* pix, // pixels at all. bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start, int* x_end, int* y_end) { - Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, - *y_end - *y_start); + Box* input_box = + boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start); Box* output_box = nullptr; pixClipBoxToForeground(pix, input_box, nullptr, &output_box); bool result = output_box != nullptr; @@ -366,12 +360,12 @@ double ImageFind::ColorDistanceFromLine(const uint8_t* line1, line_vector[L_ALPHA_CHANNEL] = 0; // Now the cross product in 3d. int cross[kRGBRMSColors]; - cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] - - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN]; - cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] - - line_vector[COLOR_RED] * point_vector[COLOR_BLUE]; - cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] - - line_vector[COLOR_GREEN] * point_vector[COLOR_RED]; + cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] - + line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN]; + cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] - + line_vector[COLOR_RED] * point_vector[COLOR_BLUE]; + cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] - + line_vector[COLOR_GREEN] * point_vector[COLOR_RED]; cross[L_ALPHA_CHANNEL] = 0; // Now the sums of the squares. double cross_sq = 0.0; @@ -386,7 +380,6 @@ double ImageFind::ColorDistanceFromLine(const uint8_t* line1, return cross_sq / line_sq; // This is the squared distance. } - // Returns the leptonica combined code for the given RGB triplet. uint32_t ImageFind::ComposeRGB(uint32_t r, uint32_t g, uint32_t b) { l_uint32 result; @@ -415,8 +408,8 @@ uint8_t ImageFind::ClipToByte(double pixel) { // color_map1, color_map2 and rms_map are assumed to be the same scale as pix. void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, Pix* color_map1, Pix* color_map2, - Pix* rms_map, - uint8_t* color1, uint8_t* color2) { + Pix* rms_map, uint8_t* color1, + uint8_t* color2) { ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32); // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more // background. @@ -430,11 +423,10 @@ void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, int bottom_pad = std::max(rect.bottom() - 2 * factor, 0) / factor; int width_pad = right_pad - left_pad; int height_pad = top_pad - bottom_pad; - if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4) - return; + if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4) return; // Now crop the pix to the rectangle. - Box* scaled_box = boxCreate(left_pad, height - top_pad, - width_pad, height_pad); + Box* scaled_box = + boxCreate(left_pad, height - top_pad, width_pad, height_pad); Pix* scaled = pixClipRectangle(pix, scaled_box, nullptr); // Compute stats over the whole image. @@ -519,14 +511,12 @@ void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, memcpy(color2, color1, 4); } if (color_map1 != nullptr) { - pixSetInRectArbitrary(color_map1, scaled_box, - ComposeRGB(color1[COLOR_RED], - color1[COLOR_GREEN], - color1[COLOR_BLUE])); - pixSetInRectArbitrary(color_map2, scaled_box, - ComposeRGB(color2[COLOR_RED], - color2[COLOR_GREEN], - color2[COLOR_BLUE])); + pixSetInRectArbitrary( + color_map1, scaled_box, + ComposeRGB(color1[COLOR_RED], color1[COLOR_GREEN], color1[COLOR_BLUE])); + pixSetInRectArbitrary( + color_map2, scaled_box, + ComposeRGB(color2[COLOR_RED], color2[COLOR_GREEN], color2[COLOR_BLUE])); pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]); } pixDestroy(&scaled); @@ -581,13 +571,11 @@ bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2, TBOX search_box(box1); search_box += box2; if (box1.x_gap(box2) >= box1.y_gap(box2)) { - if (box1.x_gap(box2) <= 0) - return true; + if (box1.x_gap(box2) <= 0) return true; search_box.set_left(std::min(box1.right(), box2.right())); search_box.set_right(std::max(box1.left(), box2.left())); } else { - if (box1.y_gap(box2) <= 0) - return true; + if (box1.y_gap(box2) <= 0) return true; search_box.set_top(std::max(box1.bottom(), box2.bottom())); search_box.set_bottom(std::min(box1.top(), box2.top())); } @@ -600,14 +588,13 @@ int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX& im_box, const FCOORD& rotation, Pix* pix) { // Intersect it with the image box. box &= im_box; // This is in-place box intersection. - if (box.null_box()) - return 0; + if (box.null_box()) return 0; box.rotate(rotation); TBOX rotated_im_box(im_box); rotated_im_box.rotate(rotation); Pix* rect_pix = pixCreate(box.width(), box.height(), 1); - pixRasterop(rect_pix, 0, 0, box.width(), box.height(), - PIX_SRC, pix, box.left() - rotated_im_box.left(), + pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix, + box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top()); l_int32 result; pixCountPixels(rect_pix, &result, nullptr); @@ -681,56 +668,48 @@ static void CutChunkFromParts(const TBOX& box, const TBOX& im_box, if (box.top() < part_box.top()) { TBOX slice(part_box); slice.set_bottom(box.top()); - if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, - pix) > 0) { + if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > + 0) { AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); - part_it.add_before_stay_put( - ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, - BTFT_NONTEXT)); + part_it.add_before_stay_put(ColPartition::FakePartition( + slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT)); } } // Left of box. if (box.left() > part_box.left()) { TBOX slice(part_box); slice.set_right(box.left()); - if (box.top() < part_box.top()) - slice.set_top(box.top()); - if (box.bottom() > part_box.bottom()) - slice.set_bottom(box.bottom()); - if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, - pix) > 0) { + if (box.top() < part_box.top()) slice.set_top(box.top()); + if (box.bottom() > part_box.bottom()) slice.set_bottom(box.bottom()); + if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > + 0) { AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); - part_it.add_before_stay_put( - ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, - BTFT_NONTEXT)); + part_it.add_before_stay_put(ColPartition::FakePartition( + slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT)); } } // Right of box. if (box.right() < part_box.right()) { TBOX slice(part_box); slice.set_left(box.right()); - if (box.top() < part_box.top()) - slice.set_top(box.top()); - if (box.bottom() > part_box.bottom()) - slice.set_bottom(box.bottom()); - if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, - pix) > 0) { + if (box.top() < part_box.top()) slice.set_top(box.top()); + if (box.bottom() > part_box.bottom()) slice.set_bottom(box.bottom()); + if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > + 0) { AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); - part_it.add_before_stay_put( - ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, - BTFT_NONTEXT)); + part_it.add_before_stay_put(ColPartition::FakePartition( + slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT)); } } // Below box. if (box.bottom() > part_box.bottom()) { TBOX slice(part_box); slice.set_top(box.bottom()); - if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, - pix) > 0) { + if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > + 0) { AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); - part_it.add_before_stay_put( - ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, - BTFT_NONTEXT)); + part_it.add_before_stay_put(ColPartition::FakePartition( + slice, PT_UNKNOWN, BRT_POLYIMAGE, BTFT_NONTEXT)); } } part->DeleteBoxes(); @@ -752,9 +731,8 @@ static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation, ColPartitionGridSearch* rectsearch, ColPartition_LIST* part_list) { // Add the full im_box partition to the list to begin with. - ColPartition* pix_part = ColPartition::FakePartition(im_box, PT_UNKNOWN, - BRT_RECTIMAGE, - BTFT_NONTEXT); + ColPartition* pix_part = ColPartition::FakePartition( + im_box, PT_UNKNOWN, BRT_RECTIMAGE, BTFT_NONTEXT); ColPartition_IT part_it(part_list); part_it.add_after_then_move(pix_part); @@ -778,12 +756,12 @@ static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation, if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) { // Eat a piece out of the image. // Pad it so that pieces eaten out look decent. - int padding = part->blob_type() == BRT_VERT_TEXT - ? part_box.width() : part_box.height(); + int padding = part->blob_type() == BRT_VERT_TEXT ? part_box.width() + : part_box.height(); part_box.set_top(part_box.top() + padding / 2); part_box.set_bottom(part_box.bottom() - padding / 2); - CutChunkFromParts(part_box, im_box, rotation, rerotation, - pix, part_list); + CutChunkFromParts(part_box, im_box, rotation, rerotation, pix, + part_list); } else { // Strong overlap with the black area, so call it text on image. part->set_flow(BTFT_TEXT_ON_IMAGE); @@ -941,24 +919,24 @@ static int ExpandImageTop(const TBOX& box, int top_limit, // in the expanded box, and // returning the increase in area resulting from the expansion. static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box, - const TBOX& limit_box, - ColPartitionGrid* part_grid, TBOX* expanded_box) { + const TBOX& limit_box, ColPartitionGrid* part_grid, + TBOX* expanded_box) { *expanded_box = im_box; switch (dir) { case BND_LEFT: - expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(), - part_grid)); + expanded_box->set_left( + ExpandImageLeft(im_box, limit_box.left(), part_grid)); break; case BND_RIGHT: - expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(), - part_grid)); + expanded_box->set_right( + ExpandImageRight(im_box, limit_box.right(), part_grid)); break; case BND_ABOVE: expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid)); break; case BND_BELOW: - expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(), - part_grid)); + expanded_box->set_bottom( + ExpandImageBottom(im_box, limit_box.bottom(), part_grid)); break; default: return 0; @@ -1095,8 +1073,7 @@ static bool ExpandImageIntoParts(const TBOX& max_image_box, } im_part_box += box; *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN, - BRT_RECTIMAGE, - BTFT_NONTEXT); + BRT_RECTIMAGE, BTFT_NONTEXT); DeletePartition(image_part); part_grid->RemoveBBox(best_part); DeletePartition(best_part); @@ -1112,8 +1089,7 @@ static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) { int intersect_area = 0; ColPartition_IT part_it(part_list); // Iterate the parts and subtract intersecting area. - for (part_it.mark_cycle_pt(); !part_it.cycled_list(); - part_it.forward()) { + for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { ColPartition* image_part = part_it.data(); TBOX intersect = box.intersection(image_part->bounding_box()); intersect_area += intersect.area(); @@ -1148,8 +1124,7 @@ static bool TestWeakIntersectedPart(const TBOX& im_box, // (basically anything that is not BRT_STRONG_CHAIN or better) from both the // part_grid and the big_parts list that are contained within im_box and // overlapped enough by the possibly polygonal image. -static void EliminateWeakParts(const TBOX& im_box, - ColPartitionGrid* part_grid, +static void EliminateWeakParts(const TBOX& im_box, ColPartitionGrid* part_grid, ColPartition_LIST* big_parts, ColPartition_LIST* part_list) { ColPartitionGridSearch rectsearch(part_grid); @@ -1192,8 +1167,7 @@ static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) { ColPartition* part; bool any_text_in_padded_rect = false; while ((part = rectsearch.NextRectSearch()) != nullptr) { - if (part->flow() == BTFT_CHAIN || - part->flow() == BTFT_STRONG_CHAIN) { + if (part->flow() == BTFT_CHAIN || part->flow() == BTFT_STRONG_CHAIN) { // Text intersects the box. any_text_in_padded_rect = true; const TBOX& part_box = part->bounding_box(); @@ -1202,8 +1176,7 @@ static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) { } } } - if (!any_text_in_padded_rect) - *box = padded_box; + if (!any_text_in_padded_rect) *box = padded_box; return false; } @@ -1215,8 +1188,7 @@ static void MarkAndDeleteImageParts(const FCOORD& rerotate, ColPartitionGrid* part_grid, ColPartition_LIST* image_parts, Pix* image_pix) { - if (image_pix == nullptr) - return; + if (image_pix == nullptr) return; int imageheight = pixGetHeight(image_pix); ColPartition_IT part_it(image_parts); for (; !part_it.empty(); part_it.forward()) { @@ -1230,8 +1202,8 @@ static void MarkAndDeleteImageParts(const FCOORD& rerotate, part_box.rotate(rerotate); int left = part_box.left(); int top = part_box.top(); - pixRasterop(image_pix, left, imageheight - top, - part_box.width(), part_box.height(), PIX_SET, nullptr, 0, 0); + pixRasterop(image_pix, left, imageheight - top, part_box.width(), + part_box.height(), PIX_SET, nullptr, 0, 0); } DeletePartition(part); } @@ -1255,7 +1227,7 @@ void ImageFind::TransferImagePartsToImageMask(const FCOORD& rerotation, ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { BlobRegionType type = part->blob_type(); - if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { + if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { part_it.add_after_then_move(part); gsearch.RemoveBBox(); } @@ -1313,13 +1285,13 @@ void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation, l_int32 x, y, width, height; boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height); Pix* pix = pixaGetPix(pixa, i, L_CLONE); - TBOX im_box(x, imageheight -y - height, x + width, imageheight - y); + TBOX im_box(x, imageheight - y - height, x + width, imageheight - y); im_box.rotate(rotation); // Now matches all partitions and blobs. ColPartitionGridSearch rectsearch(part_grid); rectsearch.SetUniqueMode(true); ColPartition_LIST part_list; - DivideImageIntoParts(im_box, rotation, rerotation, pix, - &rectsearch, &part_list); + DivideImageIntoParts(im_box, rotation, rerotation, pix, &rectsearch, + &part_list); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(pix, "ImageComponent"); tprintf("Component has %d parts\n", part_list.length()); @@ -1334,7 +1306,8 @@ void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation, ColPartition* part = part_it.extract(); TBOX text_box(im_box); MaximalImageBoundingBox(part_grid, &text_box); - while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)); + while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)) + ; part_it.set_to_list(&part_list); part_it.add_after_then_move(part); im_box = part->bounding_box(); @@ -1362,5 +1335,4 @@ void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation, } } - } // namespace tesseract. diff --git a/src/textord/imagefind.h b/src/textord/imagefind.h index 5732a54051..140052996c 100644 --- a/src/textord/imagefind.h +++ b/src/textord/imagefind.h @@ -68,23 +68,23 @@ class ImageFind { // On return, the rectangle is defined by x_start, y_start, x_end and y_end. // Note: the algorithm is iterative, allowing it to slice off pixels from // one edge, allowing it to then slice off more pixels from another edge. - static bool pixNearlyRectangular(Pix* pix, - double min_fraction, double max_fraction, - double max_skew_gradient, - int* x_start, int* y_start, - int* x_end, int* y_end); + static bool pixNearlyRectangular(Pix* pix, double min_fraction, + double max_fraction, + double max_skew_gradient, int* x_start, + int* y_start, int* x_end, int* y_end); // Given an input pix, and a bounding rectangle, the sides of the rectangle // are shrunk inwards until they bound any black pixels found within the // original rectangle. Returns false if the rectangle contains no black // pixels at all. - static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start, - int* x_end, int* y_end); + static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start, int* x_end, + int* y_end); // Given a point in 3-D (RGB) space, returns the squared Euclidean distance // of the point from the given line, defined by a pair of points in the 3-D // (RGB) space, line1 and line2. - static double ColorDistanceFromLine(const uint8_t* line1, const uint8_t* line2, + static double ColorDistanceFromLine(const uint8_t* line1, + const uint8_t* line2, const uint8_t* point); // Returns the leptonica combined code for the given RGB triplet. @@ -105,8 +105,8 @@ class ImageFind { // color_map1, color_map2 and rms_map are assumed to be the same scale as pix. static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, Pix* color_map1, Pix* color_map2, - Pix* rms_map, - uint8_t* color1, uint8_t* color2); + Pix* rms_map, uint8_t* color1, + uint8_t* color2); // Returns true if there are no black pixels in between the boxes. // The im_box must represent the bounding box of the pix in tesseract @@ -125,7 +125,6 @@ class ImageFind { static int CountPixelsInRotatedBox(TBOX box, const TBOX& im_box, const FCOORD& rotation, Pix* pix); - // Locates all the image partitions in the part_grid, that were found by a // previous call to FindImagePartitions, marks them in the image_mask, // removes them from the grid, and deletes them. This makes it possble to diff --git a/src/textord/linefind.cpp b/src/textord/linefind.cpp index 7370bb9e05..9088f8745a 100644 --- a/src/textord/linefind.cpp +++ b/src/textord/linefind.cpp @@ -22,12 +22,12 @@ #include "config_auto.h" #endif -#include "linefind.h" #include "alignedblob.h" -#include "tabvector.h" #include "blobbox.h" #include "edgblob.h" +#include "linefind.h" #include "openclwrapper.h" +#include "tabvector.h" #include "allheaders.h" @@ -76,14 +76,14 @@ static void RemoveUnusedLineSegments(bool horizontal_lines, // (to use FindVerticalAlignment) so we have to flip x and y and then // convert to Leptonica by height - flipped x (ie the right edge). // See GetLineBoxes for more explanation. - pixbox = boxCreate(box.bottom(), height - box.right(), - box.height(), box.width()); + pixbox = boxCreate(box.bottom(), height - box.right(), box.height(), + box.width()); } else { // For vertical lines, just flip upside-down to convert to Leptonica. // The y position of the box in Leptonica terms is the distance from // the top of the image to the top of the box. - pixbox = boxCreate(box.left(), height - box.top(), - box.width(), box.height()); + pixbox = boxCreate(box.left(), height - box.top(), box.width(), + box.height()); } pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); @@ -124,8 +124,7 @@ static int MaxStrokeWidth(Pix* pix) { for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { int pixel = GET_DATA_BYTE(data, x); - if (pixel > max_dist) - max_dist = pixel; + if (pixel > max_dist) max_dist = pixel; } data += wpl; } @@ -154,7 +153,8 @@ static int CountPixelsAdjacentToLine(int line_width, Box* line_box, boxGetGeometry(line_box, &x, &y, &box_width, &box_height); if (box_width > box_height) { // horizontal line. - int bottom = std::min(pixGetHeight(nonline_pix), y + box_height + line_width); + int bottom = + std::min(pixGetHeight(nonline_pix), y + box_height + line_width); y = std::max(0, y - line_width); box_height = bottom - y; } else { @@ -205,12 +205,11 @@ static int FilterFalsePositives(int resolution, Pix* nonline_pix, // Too thick for the length. bad_line = true; } - if (!bad_line && - (intersection_pix == nullptr || - NumTouchingIntersections(box, intersection_pix) < 2)) { + if (!bad_line && (intersection_pix == nullptr || + NumTouchingIntersections(box, intersection_pix) < 2)) { // Test non-line density near the line. - int nonline_count = CountPixelsAdjacentToLine(max_width, box, - nonline_pix); + int nonline_count = + CountPixelsAdjacentToLine(max_width, box, nonline_pix); if (nonline_count > box_height * box_width * kMaxNonLineDensity) bad_line = true; } @@ -295,8 +294,7 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix, pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); pixSubtract(pix, pix, *pix_music_mask); } - if (pixa_display != nullptr) - pixaAddPix(pixa_display, pix, L_CLONE); + if (pixa_display != nullptr) pixaAddPix(pixa_display, pix, L_CLONE); pixDestroy(&pix_vline); pixDestroy(&pix_non_vline); @@ -359,16 +357,15 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, // If no good lines are found, pix_vline is destroyed. // None of the input pointers may be nullptr, and if *pix_vline is nullptr then // the function does nothing. -void LineFinder::FindAndRemoveVLines(int resolution, - Pix* pix_intersections, +void LineFinder::FindAndRemoveVLines(int resolution, Pix* pix_intersections, int* vertical_x, int* vertical_y, Pix** pix_vline, Pix* pix_non_vline, Pix* src_pix, TabVector_LIST* vectors) { if (pix_vline == nullptr || *pix_vline == nullptr) return; C_BLOB_LIST line_cblobs; BLOBNBOX_LIST line_bblobs; - GetLineBoxes(false, *pix_vline, pix_intersections, - &line_cblobs, &line_bblobs); + GetLineBoxes(false, *pix_vline, pix_intersections, &line_cblobs, + &line_bblobs); int width = pixGetWidth(src_pix); int height = pixGetHeight(src_pix); ICOORD bleft(0, 0); @@ -395,8 +392,7 @@ void LineFinder::FindAndRemoveVLines(int resolution, // If no good lines are found, pix_hline is destroyed. // None of the input pointers may be nullptr, and if *pix_hline is nullptr then // the function does nothing. -void LineFinder::FindAndRemoveHLines(int resolution, - Pix* pix_intersections, +void LineFinder::FindAndRemoveHLines(int resolution, Pix* pix_intersections, int vertical_x, int vertical_y, Pix** pix_hline, Pix* pix_non_hline, Pix* src_pix, TabVector_LIST* vectors) { @@ -434,9 +430,8 @@ void LineFinder::FindAndRemoveHLines(int resolution, // The output vertical_x and vertical_y are the total of all the vectors. // The output list of TabVector makes no reference to the input BLOBNBOXes. void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, - BLOBNBOX_LIST* line_bblobs, - int* vertical_x, int* vertical_y, - TabVector_LIST* vectors) { + BLOBNBOX_LIST* line_bblobs, int* vertical_x, + int* vertical_y, TabVector_LIST* vectors) { BLOBNBOX_IT bbox_it(line_bblobs); int b_count = 0; // Put all the blobs into the grid to find the lines, and move the blobs @@ -452,8 +447,7 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, blob_grid.InsertBBox(false, true, bblob); ++b_count; } - if (b_count == 0) - return; + if (b_count == 0) return; // Search the entire grid, looking for vertical line vectors. BlobGridSearch lsearch(&blob_grid); @@ -466,12 +460,11 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) { const TBOX& box = bbox->bounding_box(); if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) - tprintf("Finding line vector starting at bbox (%d,%d)\n", - box.left(), box.bottom()); + tprintf("Finding line vector starting at bbox (%d,%d)\n", box.left(), + box.bottom()); AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); - TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox, - vertical_x, - vertical_y); + TabVector* vector = blob_grid.FindVerticalAlignment( + align_params, bbox, vertical_x, vertical_y); if (vector != nullptr) { vector->Freeze(); vector_it.add_to_end(vector); @@ -485,9 +478,8 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, // is taken to be a bar. Bars are used as a seed and the entire touching // component is added to the output music mask and subtracted from the lines. // Returns nullptr and does minimal work if no music is found. -static Pix* FilterMusic(int resolution, Pix* pix_closed, - Pix* pix_vline, Pix* pix_hline, - l_int32* v_empty, l_int32* h_empty) { +static Pix* FilterMusic(int resolution, Pix* pix_closed, Pix* pix_vline, + Pix* pix_hline, l_int32* v_empty, l_int32* h_empty) { int max_stave_height = static_cast(resolution * kMaxStaveHeight); Pix* intersection_pix = pixAnd(nullptr, pix_vline, pix_hline); Boxa* boxa = pixConnComp(pix_vline, nullptr, 8); @@ -504,8 +496,8 @@ static Pix* FilterMusic(int resolution, Pix* pix_closed, if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) { // This is a music bar. Add to the mask. if (music_mask == nullptr) - music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), - 1); + music_mask = + pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), 1); pixSetInRect(music_mask, box); } boxDestroy(&box); @@ -567,11 +559,10 @@ static Pix* FilterMusic(int resolution, Pix* pix_closed, // but any of the returns that are empty will be nullptr on output. // None of the input (1st level) pointers may be nullptr except pix_music_mask, // which will disable music detection, and pixa_display. -void LineFinder::GetLineMasks(int resolution, Pix* src_pix, - Pix** pix_vline, Pix** pix_non_vline, - Pix** pix_hline, Pix** pix_non_hline, - Pix** pix_intersections, Pix** pix_music_mask, - Pixa* pixa_display) { +void LineFinder::GetLineMasks(int resolution, Pix* src_pix, Pix** pix_vline, + Pix** pix_non_vline, Pix** pix_hline, + Pix** pix_non_hline, Pix** pix_intersections, + Pix** pix_music_mask, Pixa* pixa_display) { Pix* pix_closed = nullptr; Pix* pix_hollow = nullptr; @@ -588,9 +579,8 @@ void LineFinder::GetLineMasks(int resolution, Pix* src_pix, #ifdef USE_OPENCL if (OpenclDevice::selectedDeviceIsOpenCL()) { // OpenCL pixGetLines Operation - int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), - pixGetHeight(src_pix), - src_pix); + int clStatus = OpenclDevice::initMorphCLAllocations( + pixGetWpl(src_pix), pixGetHeight(src_pix), src_pix); bool getpixclosed = pix_music_mask != nullptr ? true : false; OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline, &pix_closed, getpixclosed, closing_brick, @@ -598,30 +588,28 @@ void LineFinder::GetLineMasks(int resolution, Pix* src_pix, min_line_length, min_line_length); } else { #endif - // Close up small holes, making it less likely that false alarms are found - // in thickened text (as it will become more solid) and also smoothing over - // some line breaks and nicks in the edges of the lines. - pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); - if (pixa_display != nullptr) - pixaAddPix(pixa_display, pix_closed, L_CLONE); - // Open up with a big box to detect solid areas, which can then be subtracted. - // This is very generous and will leave in even quite wide lines. - Pix* pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, - max_line_width); - if (pixa_display != nullptr) - pixaAddPix(pixa_display, pix_solid, L_CLONE); - pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); + // Close up small holes, making it less likely that false alarms are found + // in thickened text (as it will become more solid) and also smoothing over + // some line breaks and nicks in the edges of the lines. + pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); + if (pixa_display != nullptr) pixaAddPix(pixa_display, pix_closed, L_CLONE); + // Open up with a big box to detect solid areas, which can then be + // subtracted. This is very generous and will leave in even quite wide + // lines. + Pix* pix_solid = + pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); + if (pixa_display != nullptr) pixaAddPix(pixa_display, pix_solid, L_CLONE); + pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); - pixDestroy(&pix_solid); + pixDestroy(&pix_solid); - // Now open up in both directions independently to find lines of at least - // 1 inch/kMinLineLengthFraction in length. - if (pixa_display != nullptr) - pixaAddPix(pixa_display, pix_hollow, L_CLONE); - *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); + // Now open up in both directions independently to find lines of at least + // 1 inch/kMinLineLengthFraction in length. + if (pixa_display != nullptr) pixaAddPix(pixa_display, pix_hollow, L_CLONE); + *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); + *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - pixDestroy(&pix_hollow); + pixDestroy(&pix_hollow); #ifdef USE_OPENCL } #endif @@ -634,9 +622,8 @@ void LineFinder::GetLineMasks(int resolution, Pix* src_pix, pixZero(*pix_hline, &h_empty); if (pix_music_mask != nullptr) { if (!v_empty && !h_empty) { - *pix_music_mask = FilterMusic(resolution, pix_closed, - *pix_vline, *pix_hline, - &v_empty, &h_empty); + *pix_music_mask = FilterMusic(resolution, pix_closed, *pix_vline, + *pix_hline, &v_empty, &h_empty); } else { *pix_music_mask = nullptr; } @@ -694,7 +681,8 @@ void LineFinder::GetLineMasks(int resolution, Pix* src_pix, if (pixa_display != nullptr) { if (*pix_vline != nullptr) pixaAddPix(pixa_display, *pix_vline, L_CLONE); if (*pix_hline != nullptr) pixaAddPix(pixa_display, *pix_hline, L_CLONE); - if (pix_nonlines != nullptr) pixaAddPix(pixa_display, pix_nonlines, L_CLONE); + if (pix_nonlines != nullptr) + pixaAddPix(pixa_display, pix_nonlines, L_CLONE); if (*pix_non_vline != nullptr) pixaAddPix(pixa_display, *pix_non_vline, L_CLONE); if (*pix_non_hline != nullptr) @@ -710,9 +698,8 @@ void LineFinder::GetLineMasks(int resolution, Pix* src_pix, // Returns a list of boxes corresponding to the candidate line segments. Sets // the line_crossings member of the boxes so we can later determin the number // of intersections touched by a full line. -void LineFinder::GetLineBoxes(bool horizontal_lines, - Pix* pix_lines, Pix* pix_intersections, - C_BLOB_LIST* line_cblobs, +void LineFinder::GetLineBoxes(bool horizontal_lines, Pix* pix_lines, + Pix* pix_intersections, C_BLOB_LIST* line_cblobs, BLOBNBOX_LIST* line_bblobs) { // Put a single pixel crack in every line at an arbitrary spacing, // so they break up and the bounding boxes can be used to get the @@ -744,8 +731,8 @@ void LineFinder::GetLineBoxes(bool horizontal_lines, bbox_it.add_to_end(bblob); // Determine whether the line segment touches two intersections. const TBOX& bbox = bblob->bounding_box(); - Box* box = boxCreate(bbox.left(), bbox.bottom(), - bbox.width(), bbox.height()); + Box* box = + boxCreate(bbox.left(), bbox.bottom(), bbox.width(), bbox.height()); bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections)); boxDestroy(&box); // Transform the bounding box prior to finding lines. To save writing @@ -757,12 +744,12 @@ void LineFinder::GetLineBoxes(bool horizontal_lines, // bbox.bottom(), being the MIN y coord, is actually the top, so to get // back to Leptonica coords in RemoveUnusedLineSegments, we have to // use height - box.right() as the top, which looks very odd. - TBOX new_box(height - bbox.top(), bbox.left(), - height - bbox.bottom(), bbox.right()); + TBOX new_box(height - bbox.top(), bbox.left(), height - bbox.bottom(), + bbox.right()); bblob->set_bounding_box(new_box); } else { - TBOX new_box(bbox.left(), height - bbox.top(), - bbox.right(), height - bbox.bottom()); + TBOX new_box(bbox.left(), height - bbox.top(), bbox.right(), + height - bbox.bottom()); bblob->set_bounding_box(new_box); } } diff --git a/src/textord/linefind.h b/src/textord/linefind.h index 264850f342..7d210b7615 100644 --- a/src/textord/linefind.h +++ b/src/textord/linefind.h @@ -58,10 +58,9 @@ class LineFinder { * * The detected lines are removed from the pix. */ - static void FindAndRemoveLines(int resolution, bool debug, Pix* pix, + static void FindAndRemoveLines(int resolution, bool debug, Pix* pix, int* vertical_x, int* vertical_y, - Pix** pix_music_mask, - TabVector_LIST* v_lines, + Pix** pix_music_mask, TabVector_LIST* v_lines, TabVector_LIST* h_lines); /** @@ -85,13 +84,11 @@ class LineFinder { // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. // If no good lines are found, pix_vline is destroyed. - static void FindAndRemoveVLines(int resolution, - Pix* pix_intersections, + static void FindAndRemoveVLines(int resolution, Pix* pix_intersections, int* vertical_x, int* vertical_y, Pix** pix_vline, Pix* pix_non_vline, Pix* src_pix, TabVector_LIST* vectors); - // Finds horizontal line objects in pix_vline and removes them from src_pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. @@ -100,8 +97,7 @@ class LineFinder { // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. // If no good lines are found, pix_hline is destroyed. - static void FindAndRemoveHLines(int resolution, - Pix* pix_intersections, + static void FindAndRemoveHLines(int resolution, Pix* pix_intersections, int vertical_x, int vertical_y, Pix** pix_hline, Pix* pix_non_hline, Pix* src_pix, TabVector_LIST* vectors); @@ -112,9 +108,8 @@ class LineFinder { // The output vertical_x and vertical_y are the total of all the vectors. // The output list of TabVector makes no reference to the input BLOBNBOXes. static void FindLineVectors(const ICOORD& bleft, const ICOORD& tright, - BLOBNBOX_LIST* line_bblobs, - int* vertical_x, int* vertical_y, - TabVector_LIST* vectors); + BLOBNBOX_LIST* line_bblobs, int* vertical_x, + int* vertical_y, TabVector_LIST* vectors); // Most of the heavy lifting of line finding. Given src_pix and its separate // resolution, returns image masks: @@ -127,20 +122,19 @@ class LineFinder { // pix_music_mask candidate music staves. // This function promises to initialize all the output (2nd level) pointers, // but any of the returns that are empty will be nullptr on output. - // None of the input (1st level) pointers may be nullptr except pix_music_mask, - // which will disable music detection, and pixa_display, which is for debug. - static void GetLineMasks(int resolution, Pix* src_pix, - Pix** pix_vline, Pix** pix_non_vline, - Pix** pix_hline, Pix** pix_non_hline, - Pix** pix_intersections, Pix** pix_music_mask, - Pixa* pixa_display); + // None of the input (1st level) pointers may be nullptr except + // pix_music_mask, which will disable music detection, and pixa_display, which + // is for debug. + static void GetLineMasks(int resolution, Pix* src_pix, Pix** pix_vline, + Pix** pix_non_vline, Pix** pix_hline, + Pix** pix_non_hline, Pix** pix_intersections, + Pix** pix_music_mask, Pixa* pixa_display); // Returns a list of boxes corresponding to the candidate line segments. Sets // the line_crossings member of the boxes so we can later determin the number // of intersections touched by a full line. - static void GetLineBoxes(bool horizontal_lines, - Pix* pix_lines, Pix* pix_intersections, - C_BLOB_LIST* line_cblobs, + static void GetLineBoxes(bool horizontal_lines, Pix* pix_lines, + Pix* pix_intersections, C_BLOB_LIST* line_cblobs, BLOBNBOX_LIST* line_bblobs); }; diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp index cc753b8dc7..983cc8a495 100644 --- a/src/textord/makerow.cpp +++ b/src/textord/makerow.cpp @@ -18,23 +18,23 @@ **********************************************************************/ #ifdef __UNIX__ -#include +#include #endif -#include "stderr.h" -#include "blobbox.h" -#include "ccstruct.h" -#include "detlinefit.h" -#include "statistc.h" -#include "drawtord.h" -#include "blkocc.h" -#include "sortflts.h" -#include "oldbasel.h" -#include "textord.h" -#include "tordmain.h" -#include "underlin.h" -#include "makerow.h" -#include "tprintf.h" -#include "tovars.h" +#include "blkocc.h" +#include "blobbox.h" +#include "ccstruct.h" +#include "detlinefit.h" +#include "drawtord.h" +#include "makerow.h" +#include "oldbasel.h" +#include "sortflts.h" +#include "statistc.h" +#include "stderr.h" +#include "textord.h" +#include "tordmain.h" +#include "tovars.h" +#include "tprintf.h" +#include "underlin.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -104,14 +104,14 @@ INT_VAR(textord_lms_line_trials, 12, "Number of linew fits to do"); BOOL_VAR(textord_new_initial_xheight, TRUE, "Use test xheight mechanism"); BOOL_VAR(textord_debug_blob, FALSE, "Print test blob information"); -#define MAX_HEIGHT_MODES 12 +#define MAX_HEIGHT_MODES 12 const int kMinLeaderCount = 5; // Factored-out helper to build a single row from a list of blobs. // Returns the mean blob size. -static float MakeRowFromBlobs(float line_size, - BLOBNBOX_IT* blob_it, TO_ROW_IT* row_it) { +static float MakeRowFromBlobs(float line_size, BLOBNBOX_IT* blob_it, + TO_ROW_IT* row_it) { blob_it->sort(blob_x_order); blob_it->move_to_first(); TO_ROW* row = nullptr; @@ -142,8 +142,7 @@ float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob, TO_ROW_IT* row_it) { C_OUTLINE_IT ol_it(blob->out_list()); // Get the children. ol_it.set_to_list(ol_it.data()->child()); - if (ol_it.empty()) - return 0.0f; + if (ol_it.empty()) return 0.0f; for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { // Deep copy the child outline and use that to make a blob. C_BLOB* blob = new C_BLOB(C_OUTLINE::deep_copy(ol_it.data())); @@ -163,8 +162,8 @@ float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob, TO_ROW_IT* row_it) { * only a single blob, it makes 2 rows, in case the top-level blob * is a container of the real blobs to recognize. */ -float make_single_row(ICOORD page_tr, bool allow_sub_blobs, - TO_BLOCK* block, TO_BLOCK_LIST* blocks) { +float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block, + TO_BLOCK_LIST* blocks) { BLOBNBOX_IT blob_it = &block->blobs; TO_ROW_IT row_it = block->get_rows(); @@ -175,8 +174,7 @@ float make_single_row(ICOORD page_tr, bool allow_sub_blobs, if (block->blobs.singleton() && allow_sub_blobs) { blob_it.move_to_first(); float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it); - if (size > block->line_size) - block->line_size = size; + if (size > block->line_size) block->line_size = size; } else if (block->blobs.empty()) { // Make a fake blob. C_BLOB* blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box()); @@ -200,25 +198,24 @@ float make_single_row(ICOORD page_tr, bool allow_sub_blobs, * * Arrange the blobs into rows. */ -float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { - float port_m; // global skew - float port_err; // global noise - TO_BLOCK_IT block_it; // iterator +float make_rows(ICOORD page_tr, TO_BLOCK_LIST* port_blocks) { + float port_m; // global skew + float port_err; // global noise + TO_BLOCK_IT block_it; // iterator block_it.set_to_list(port_blocks); - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) - make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f), - !textord_test_landscape); - // compute globally + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) + make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f), + !textord_test_landscape); + // compute globally compute_page_skew(port_blocks, port_m, port_err); block_it.set_to_list(port_blocks); for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f), - block_it.data()->block->pdblk.bounding_box().left(), - !textord_test_landscape); + block_it.data()->block->pdblk.bounding_box().left(), + !textord_test_landscape); } - return port_m; // global skew + return port_m; // global skew } /** @@ -226,48 +223,46 @@ float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { * * Arrange the good blobs into rows of text. */ -void make_initial_textrows( //find lines - ICOORD page_tr, - TO_BLOCK* block, //block to do - FCOORD rotation, //for drawing - bool testing_on //correct orientation +void make_initial_textrows( // find lines + ICOORD page_tr, + TO_BLOCK* block, // block to do + FCOORD rotation, // for drawing + bool testing_on // correct orientation ) { - TO_ROW_IT row_it = block->get_rows (); + TO_ROW_IT row_it = block->get_rows(); #ifndef GRAPHICS_DISABLED - ScrollView::Color colour; //of row + ScrollView::Color colour; // of row if (textord_show_initial_rows && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); + if (to_win == nullptr) create_to_win(page_tr); } #endif - //guess skew - assign_blobs_to_rows (block, nullptr, 0, TRUE, TRUE, textord_show_initial_rows && testing_on); - row_it.move_to_first (); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - fit_lms_line (row_it.data ()); + // guess skew + assign_blobs_to_rows(block, nullptr, 0, TRUE, TRUE, + textord_show_initial_rows && testing_on); + row_it.move_to_first(); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) + fit_lms_line(row_it.data()); #ifndef GRAPHICS_DISABLED if (textord_show_initial_rows && testing_on) { colour = ScrollView::RED; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - plot_to_row (row_it.data (), colour, rotation); - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + plot_to_row(row_it.data(), colour, rotation); + colour = (ScrollView::Color)(colour + 1); + if (colour > ScrollView::MAGENTA) colour = ScrollView::RED; } } #endif } - /** * @name fit_lms_line * * Fit an LMS line to a row. */ -void fit_lms_line(TO_ROW *row) { - float m, c; // fitted line +void fit_lms_line(TO_ROW* row) { + float m, c; // fitted line tesseract::DetLineFit lms; BLOBNBOX_IT blob_it = row->blob_list(); @@ -279,110 +274,104 @@ void fit_lms_line(TO_ROW *row) { row->set_line(m, c, error); } - /** * @name compute_page_skew * * Compute the skew over a full page by averaging the gradients over * all the lines. Get the error of the same row. */ -void compute_page_skew( //get average gradient - TO_BLOCK_LIST *blocks, //list of blocks - float &page_m, //average gradient - float &page_err //average error - ) { - int32_t row_count; //total rows - int32_t blob_count; //total_blobs - int32_t row_err; //integer error - float *gradients; //of rows - float *errors; //of rows - int32_t row_index; //of total - TO_ROW *row; //current row - TO_BLOCK_IT block_it = blocks; //iterator +void compute_page_skew( // get average gradient + TO_BLOCK_LIST* blocks, // list of blocks + float& page_m, // average gradient + float& page_err // average error +) { + int32_t row_count; // total rows + int32_t blob_count; // total_blobs + int32_t row_err; // integer error + float* gradients; // of rows + float* errors; // of rows + int32_t row_index; // of total + TO_ROW* row; // current row + TO_BLOCK_IT block_it = blocks; // iterator row_count = 0; blob_count = 0; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); if (pb != nullptr && !pb->IsText()) continue; // Pretend non-text blocks don't exist. - row_count += block_it.data ()->get_rows ()->length (); - //count up rows + row_count += block_it.data()->get_rows()->length(); + // count up rows TO_ROW_IT row_it(block_it.data()->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - blob_count += row_it.data ()->blob_list ()->length (); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) + blob_count += row_it.data()->blob_list()->length(); } if (row_count == 0) { page_m = 0.0f; page_err = 0.0f; return; } - gradients = (float *) alloc_mem (blob_count * sizeof (float)); - //get mem - errors = (float *) alloc_mem (blob_count * sizeof (float)); + gradients = (float*)alloc_mem(blob_count * sizeof(float)); + // get mem + errors = (float*)alloc_mem(blob_count * sizeof(float)); if (gradients == nullptr || errors == nullptr) - MEMORY_OUT.error ("compute_page_skew", ABORT, nullptr); + MEMORY_OUT.error("compute_page_skew", ABORT, nullptr); row_index = 0; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); if (pb != nullptr && !pb->IsText()) continue; // Pretend non-text blocks don't exist. - TO_ROW_IT row_it(block_it.data ()->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - blob_count = row->blob_list ()->length (); - row_err = (int32_t) ceil (row->line_error ()); - if (row_err <= 0) - row_err = 1; + TO_ROW_IT row_it(block_it.data()->get_rows()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + blob_count = row->blob_list()->length(); + row_err = (int32_t)ceil(row->line_error()); + if (row_err <= 0) row_err = 1; if (textord_biased_skewcalc) { blob_count /= row_err; for (blob_count /= row_err; blob_count > 0; blob_count--) { - gradients[row_index] = row->line_m (); - errors[row_index] = row->line_error (); + gradients[row_index] = row->line_m(); + errors[row_index] = row->line_error(); row_index++; } - } - else if (blob_count >= textord_min_blobs_in_row) { - //get gradient - gradients[row_index] = row->line_m (); - errors[row_index] = row->line_error (); + } else if (blob_count >= textord_min_blobs_in_row) { + // get gradient + gradients[row_index] = row->line_m(); + errors[row_index] = row->line_error(); row_index++; } } } if (row_index == 0) { - //desperate - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { + // desperate + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); if (pb != nullptr && !pb->IsText()) continue; // Pretend non-text blocks don't exist. TO_ROW_IT row_it(block_it.data()->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); - row_it.forward ()) { - row = row_it.data (); - gradients[row_index] = row->line_m (); - errors[row_index] = row->line_error (); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + gradients[row_index] = row->line_m(); + errors[row_index] = row->line_error(); row_index++; } } } row_count = row_index; - row_index = choose_nth_item ((int32_t) (row_count * textord_skew_ile), - gradients, row_count); + row_index = choose_nth_item((int32_t)(row_count * textord_skew_ile), + gradients, row_count); page_m = gradients[row_index]; - row_index = choose_nth_item ((int32_t) (row_count * textord_skew_ile), - errors, row_count); + row_index = choose_nth_item((int32_t)(row_count * textord_skew_ile), errors, + row_count); page_err = errors[row_index]; free_mem(gradients); free_mem(errors); } const double kNoiseSize = 0.5; // Fraction of xheight. -const int kMinSize = 8; // Min pixels to be xheight. +const int kMinSize = 8; // Min pixels to be xheight. /** * Return true if the dot looks like it is part of the i. @@ -394,7 +383,7 @@ static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) { // Must overlap horizontally by enough and be high enough. int overlap = std::min(dotbox.right(), ibox.right()) - - std::max(dotbox.left(), ibox.left()); + std::max(dotbox.left(), ibox.left()); if (ibox.height() <= 2 * dotbox.height() || (overlap * 2 < ibox.width() && overlap < dotbox.width())) return false; @@ -408,10 +397,10 @@ static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) { // of the dot. const double kHeightFraction = 0.6; double target_height = std::min(dotbox.bottom(), ibox.top()); - target_height -= row->line_m()*dotbox.left() + row->line_c(); + target_height -= row->line_m() * dotbox.left() + row->line_c(); target_height *= kHeightFraction; int left_min = dotbox.left() - dotbox.width(); - int middle = (dotbox.left() + dotbox.right())/2; + int middle = (dotbox.left() + dotbox.right()) / 2; int right_max = dotbox.right() + dotbox.width(); int left_miny = 0; int left_maxy = 0; @@ -442,8 +431,7 @@ static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) { } else if (in_left) { // We just left the left so look for size. if (left_maxy - left_miny > target_height) { - if (found_right) - return true; + if (found_right) return true; found_left = true; } in_left = false; @@ -460,8 +448,7 @@ static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) { } else if (in_right) { // We just left the right so look for size. if (right_maxy - right_miny > target_height) { - if (found_left) - return true; + if (found_left) return true; found_right = true; } in_right = false; @@ -472,8 +459,8 @@ static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) { } void vigorous_noise_removal(TO_BLOCK* block) { - TO_ROW_IT row_it = block->get_rows (); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + TO_ROW_IT row_it = block->get_rows(); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { TO_ROW* row = row_it.data(); BLOBNBOX_IT b_it = row->blob_list(); // Estimate the xheight on the row. @@ -487,8 +474,7 @@ void vigorous_noise_removal(TO_BLOCK* block) { for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOBNBOX* blob = b_it.data(); int height = blob->bounding_box().height(); - if (height >= kMinSize) - hstats.add(blob->bounding_box().height(), 1); + if (height >= kMinSize) hstats.add(blob->bounding_box().height(), 1); } float xheight = hstats.median(); // Delete small objects. @@ -499,13 +485,11 @@ void vigorous_noise_removal(TO_BLOCK* block) { if (box.height() < kNoiseSize * xheight) { // Small so delete unless it looks like an i dot. if (prev != nullptr) { - if (dot_of_i(blob, prev, row)) - continue; // Looks OK. + if (dot_of_i(blob, prev, row)) continue; // Looks OK. } if (!b_it.at_last()) { BLOBNBOX* next = b_it.data_relative(1); - if (dot_of_i(blob, next, row)) - continue; // Looks OK. + if (dot_of_i(blob, next, row)) continue; // Looks OK. } // It might be noise so get rid of it. delete blob->cblob(); @@ -522,52 +506,45 @@ void vigorous_noise_removal(TO_BLOCK* block) { * * Remove overlapping rows and fit all the blobs to what's left. */ -void cleanup_rows_making( //find lines - ICOORD page_tr, //top right - TO_BLOCK* block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - int32_t block_edge, //edge of block - bool testing_on //correct orientation +void cleanup_rows_making( // find lines + ICOORD page_tr, // top right + TO_BLOCK* block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on // correct orientation ) { - //iterators + // iterators BLOBNBOX_IT blob_it = &block->blobs; - TO_ROW_IT row_it = block->get_rows (); + TO_ROW_IT row_it = block->get_rows(); #ifndef GRAPHICS_DISABLED if (textord_show_parallel_rows && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); + if (to_win == nullptr) create_to_win(page_tr); } #endif - //get row coords - fit_parallel_rows(block, - gradient, - rotation, - block_edge, + // get row coords + fit_parallel_rows(block, gradient, rotation, block_edge, textord_show_parallel_rows && testing_on); - delete_non_dropout_rows(block, - gradient, - rotation, - block_edge, + delete_non_dropout_rows(block, gradient, rotation, block_edge, textord_show_parallel_rows && testing_on); expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on); - blob_it.set_to_list (&block->blobs); - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - blob_it.add_list_after (row_it.data ()->blob_list ()); - //give blobs back - assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE); - //now new rows must be genuine - blob_it.set_to_list (&block->blobs); - blob_it.add_list_after (&block->large_blobs); - assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE); - //safe to use big ones now - blob_it.set_to_list (&block->blobs); - //throw all blobs in - blob_it.add_list_after (&block->noise_blobs); - blob_it.add_list_after (&block->small_blobs); - assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE); + blob_it.set_to_list(&block->blobs); + row_it.set_to_list(block->get_rows()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) + blob_it.add_list_after(row_it.data()->blob_list()); + // give blobs back + assign_blobs_to_rows(block, &gradient, 1, FALSE, FALSE, FALSE); + // now new rows must be genuine + blob_it.set_to_list(&block->blobs); + blob_it.add_list_after(&block->large_blobs); + assign_blobs_to_rows(block, &gradient, 2, TRUE, TRUE, FALSE); + // safe to use big ones now + blob_it.set_to_list(&block->blobs); + // throw all blobs in + blob_it.add_list_after(&block->noise_blobs); + blob_it.add_list_after(&block->small_blobs); + assign_blobs_to_rows(block, &gradient, 3, FALSE, FALSE, FALSE); } /** @@ -575,242 +552,225 @@ void cleanup_rows_making( //find lines * * Compute the linespacing and offset. */ -void delete_non_dropout_rows( //find lines - TO_BLOCK* block, //block to do - float gradient, //global skew - FCOORD rotation, //deskew vector - int32_t block_edge, //left edge - bool testing_on //correct orientation +void delete_non_dropout_rows( // find lines + TO_BLOCK* block, // block to do + float gradient, // global skew + FCOORD rotation, // deskew vector + int32_t block_edge, // left edge + bool testing_on // correct orientation ) { - TBOX block_box; //deskewed block - int32_t *deltas; //change in occupation - int32_t *occupation; //of pixel coords - int32_t max_y; //in block + TBOX block_box; // deskewed block + int32_t* deltas; // change in occupation + int32_t* occupation; // of pixel coords + int32_t max_y; // in block int32_t min_y; - int32_t line_index; //of scan line - int32_t line_count; //no of scan lines - int32_t distance; //to drop-out - int32_t xleft; //of block - int32_t ybottom; //of block - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); + int32_t line_index; // of scan line + int32_t line_count; // no of scan lines + int32_t distance; // to drop-out + int32_t xleft; // of block + int32_t ybottom; // of block + TO_ROW* row; // current row + TO_ROW_IT row_it = block->get_rows(); BLOBNBOX_IT blob_it = &block->blobs; - if (row_it.length () == 0) - return; //empty block - block_box = deskew_block_coords (block, gradient); - xleft = block->block->pdblk.bounding_box ().left (); - ybottom = block->block->pdblk.bounding_box ().bottom (); - min_y = block_box.bottom () - 1; - max_y = block_box.top () + 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - line_index = (int32_t) floor (row_it.data ()->intercept ()); - if (line_index <= min_y) - min_y = line_index - 1; - if (line_index >= max_y) - max_y = line_index + 1; + if (row_it.length() == 0) return; // empty block + block_box = deskew_block_coords(block, gradient); + xleft = block->block->pdblk.bounding_box().left(); + ybottom = block->block->pdblk.bounding_box().bottom(); + min_y = block_box.bottom() - 1; + max_y = block_box.top() + 1; + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + line_index = (int32_t)floor(row_it.data()->intercept()); + if (line_index <= min_y) min_y = line_index - 1; + if (line_index >= max_y) max_y = line_index + 1; } line_count = max_y - min_y + 1; - if (line_count <= 0) - return; //empty block - deltas = (int32_t *) alloc_mem (line_count * sizeof (int32_t)); - occupation = (int32_t *) alloc_mem (line_count * sizeof (int32_t)); + if (line_count <= 0) return; // empty block + deltas = (int32_t*)alloc_mem(line_count * sizeof(int32_t)); + occupation = (int32_t*)alloc_mem(line_count * sizeof(int32_t)); if (deltas == nullptr || occupation == nullptr) - MEMORY_OUT.error ("compute_line_spacing", ABORT, nullptr); + MEMORY_OUT.error("compute_line_spacing", ABORT, nullptr); compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas); - compute_occupation_threshold ((int32_t) - ceil (block->line_spacing * - (tesseract::CCStruct::kDescenderFraction + - tesseract::CCStruct::kAscenderFraction)), - (int32_t) ceil (block->line_spacing * - (tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction)), - max_y - min_y + 1, occupation, deltas); + compute_occupation_threshold( + (int32_t)ceil(block->line_spacing * + (tesseract::CCStruct::kDescenderFraction + + tesseract::CCStruct::kAscenderFraction)), + (int32_t)ceil(block->line_spacing * + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction)), + max_y - min_y + 1, occupation, deltas); #ifndef GRAPHICS_DISABLED if (testing_on) { draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas); } #endif compute_dropout_distances(occupation, deltas, line_count); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - line_index = (int32_t) floor (row->intercept ()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + line_index = (int32_t)floor(row->intercept()); distance = deltas[line_index - min_y]; - if (find_best_dropout_row (row, distance, block->line_spacing / 2, - line_index, &row_it, testing_on)) { + if (find_best_dropout_row(row, distance, block->line_spacing / 2, + line_index, &row_it, testing_on)) { #ifndef GRAPHICS_DISABLED if (testing_on) - plot_parallel_row(row, gradient, block_edge, - ScrollView::WHITE, rotation); + plot_parallel_row(row, gradient, block_edge, ScrollView::WHITE, + rotation); #endif - blob_it.add_list_after (row_it.data ()->blob_list ()); - delete row_it.extract (); //too far away + blob_it.add_list_after(row_it.data()->blob_list()); + delete row_it.extract(); // too far away } } - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - blob_it.add_list_after (row_it.data ()->blob_list ()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + blob_it.add_list_after(row_it.data()->blob_list()); } free_mem(deltas); free_mem(occupation); } - /** * @name find_best_dropout_row * * Delete this row if it has a neighbour with better dropout characteristics. * TRUE is returned if the row should be deleted. */ -bool find_best_dropout_row( //find neighbours - TO_ROW* row, //row to test - int32_t distance, //dropout dist - float dist_limit, //threshold distance - int32_t line_index, //index of row - TO_ROW_IT* row_it, //current position - bool testing_on //correct orientation +bool find_best_dropout_row( // find neighbours + TO_ROW* row, // row to test + int32_t distance, // dropout dist + float dist_limit, // threshold distance + int32_t line_index, // index of row + TO_ROW_IT* row_it, // current position + bool testing_on // correct orientation ) { - int32_t next_index; // of neighbouring row - int32_t row_offset; //from current row - int32_t abs_dist; //absolute distance - int8_t row_inc; //increment to row_index - TO_ROW *next_row; //nextious row + int32_t next_index; // of neighbouring row + int32_t row_offset; // from current row + int32_t abs_dist; // absolute distance + int8_t row_inc; // increment to row_index + TO_ROW* next_row; // nextious row if (testing_on) - tprintf ("Row at %g(%g), dropout dist=%d,", - row->intercept (), row->parallel_c (), distance); + tprintf("Row at %g(%g), dropout dist=%d,", row->intercept(), + row->parallel_c(), distance); if (distance < 0) { row_inc = 1; abs_dist = -distance; - } - else { + } else { row_inc = -1; abs_dist = distance; } if (abs_dist > dist_limit) { if (testing_on) { - tprintf (" too far - deleting\n"); + tprintf(" too far - deleting\n"); } return true; } - if ((distance < 0 && !row_it->at_last ()) - || (distance >= 0 && !row_it->at_first ())) { + if ((distance < 0 && !row_it->at_last()) || + (distance >= 0 && !row_it->at_first())) { row_offset = row_inc; do { - next_row = row_it->data_relative (row_offset); - next_index = (int32_t) floor (next_row->intercept ()); - if ((distance < 0 - && next_index < line_index - && next_index > line_index + distance + distance) - || (distance >= 0 - && next_index > line_index - && next_index < line_index + distance + distance)) { + next_row = row_it->data_relative(row_offset); + next_index = (int32_t)floor(next_row->intercept()); + if ((distance < 0 && next_index < line_index && + next_index > line_index + distance + distance) || + (distance >= 0 && next_index > line_index && + next_index < line_index + distance + distance)) { if (testing_on) { - tprintf (" nearer neighbour (%d) at %g\n", - line_index + distance - next_index, - next_row->intercept ()); + tprintf(" nearer neighbour (%d) at %g\n", + line_index + distance - next_index, next_row->intercept()); } - return true; //other is nearer - } - else if (next_index == line_index - || next_index == line_index + distance + distance) { - if (row->believability () <= next_row->believability ()) { + return true; // other is nearer + } else if (next_index == line_index || + next_index == line_index + distance + distance) { + if (row->believability() <= next_row->believability()) { if (testing_on) { - tprintf (" equal but more believable at %g (%g/%g)\n", - next_row->intercept (), - row->believability (), - next_row->believability ()); + tprintf(" equal but more believable at %g (%g/%g)\n", + next_row->intercept(), row->believability(), + next_row->believability()); } - return true; //other is more believable + return true; // other is more believable } } row_offset += row_inc; - } - while ((next_index == line_index - || next_index == line_index + distance + distance) - && row_offset < row_it->length ()); - if (testing_on) - tprintf (" keeping\n"); + } while ((next_index == line_index || + next_index == line_index + distance + distance) && + row_offset < row_it->length()); + if (testing_on) tprintf(" keeping\n"); } return false; } - /** * @name deskew_block_coords * * Compute the bounding box of all the blobs in the block * if they were deskewed without actually doing it. */ -TBOX deskew_block_coords( //block box - TO_BLOCK *block, //block to do - float gradient //global skew - ) { - TBOX result; //block bounds - TBOX blob_box; //of block - FCOORD rotation; //deskew vector - float length; //of gradient vector - TO_ROW_IT row_it = block->get_rows (); - TO_ROW *row; //current row - BLOBNBOX *blob; //current blob - BLOBNBOX_IT blob_it; //iterator - - length = sqrt (gradient * gradient + 1); - rotation = FCOORD (1 / length, -gradient / length); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - blob_it.set_to_list (row->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_box.rotate (rotation);//de-skew it +TBOX deskew_block_coords( // block box + TO_BLOCK* block, // block to do + float gradient // global skew +) { + TBOX result; // block bounds + TBOX blob_box; // of block + FCOORD rotation; // deskew vector + float length; // of gradient vector + TO_ROW_IT row_it = block->get_rows(); + TO_ROW* row; // current row + BLOBNBOX* blob; // current blob + BLOBNBOX_IT blob_it; // iterator + + length = sqrt(gradient * gradient + 1); + rotation = FCOORD(1 / length, -gradient / length); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + blob_it.set_to_list(row->blob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + blob_box.rotate(rotation); // de-skew it result += blob_box; } } return result; } - /** * @name compute_line_occupation * * Compute the pixel projection back on the y axis given the global * skew. Also compute the 1st derivative. */ -void compute_line_occupation( //project blobs - TO_BLOCK *block, //block to do - float gradient, //global skew - int32_t min_y, //min coord in block - int32_t max_y, //in block - int32_t *occupation, //output projection - int32_t *deltas //derivative - ) { - int32_t line_count; //maxy-miny+1 - int32_t line_index; //of scan line - int index; //array index for daft compilers - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - BLOBNBOX *blob; //current blob - BLOBNBOX_IT blob_it; //iterator - float length; //of skew vector - TBOX blob_box; //bounding box - FCOORD rotation; //inverse of skew +void compute_line_occupation( // project blobs + TO_BLOCK* block, // block to do + float gradient, // global skew + int32_t min_y, // min coord in block + int32_t max_y, // in block + int32_t* occupation, // output projection + int32_t* deltas // derivative +) { + int32_t line_count; // maxy-miny+1 + int32_t line_index; // of scan line + int index; // array index for daft compilers + TO_ROW* row; // current row + TO_ROW_IT row_it = block->get_rows(); + BLOBNBOX* blob; // current blob + BLOBNBOX_IT blob_it; // iterator + float length; // of skew vector + TBOX blob_box; // bounding box + FCOORD rotation; // inverse of skew line_count = max_y - min_y + 1; - length = sqrt (gradient * gradient + 1); - rotation = FCOORD (1 / length, -gradient / length); + length = sqrt(gradient * gradient + 1); + rotation = FCOORD(1 / length, -gradient / length); for (line_index = 0; line_index < line_count; line_index++) deltas[line_index] = 0; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - blob_it.set_to_list (row->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_box.rotate (rotation);//de-skew it + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + blob_it.set_to_list(row->blob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + blob_box.rotate(rotation); // de-skew it int32_t width = blob_box.right() - blob_box.left(); index = blob_box.bottom() - min_y; ASSERT_HOST(index >= 0 && index < line_count); @@ -826,30 +786,29 @@ void compute_line_occupation( //project blobs occupation[line_index] = occupation[line_index - 1] + deltas[line_index]; } - /** * compute_occupation_threshold * * Compute thresholds for textline or not for the occupation array. */ -void compute_occupation_threshold( //project blobs - int32_t low_window, //below result point - int32_t high_window, //above result point - int32_t line_count, //array sizes - int32_t *occupation, //input projection - int32_t *thresholds //output thresholds - ) { - int32_t line_index; //of thresholds line - int32_t low_index; //in occupation - int32_t high_index; //in occupation - int32_t sum; //current average - int32_t divisor; //to get thresholds - int32_t min_index; //of min occ - int32_t min_occ; //min in locality - int32_t test_index; //for finding min +void compute_occupation_threshold( // project blobs + int32_t low_window, // below result point + int32_t high_window, // above result point + int32_t line_count, // array sizes + int32_t* occupation, // input projection + int32_t* thresholds // output thresholds +) { + int32_t line_index; // of thresholds line + int32_t low_index; // in occupation + int32_t high_index; // in occupation + int32_t sum; // current average + int32_t divisor; // to get thresholds + int32_t min_index; // of min occ + int32_t min_occ; // min in locality + int32_t test_index; // for finding min divisor = - (int32_t) ceil ((low_window + high_window) / textord_occupancy_threshold); + (int32_t)ceil((low_window + high_window) / textord_occupancy_threshold); if (low_window + high_window < line_count) { for (sum = 0, high_index = 0; high_index < low_window; high_index++) sum += occupation[high_index]; @@ -860,37 +819,36 @@ void compute_occupation_threshold( //project blobs for (test_index = 1; test_index < high_index; test_index++) { if (occupation[test_index] <= min_occ) { min_occ = occupation[test_index]; - min_index = test_index; //find min in region + min_index = test_index; // find min in region } } for (line_index = 0; line_index < low_window; line_index++) thresholds[line_index] = (sum - min_occ) / divisor + min_occ; - //same out to end + // same out to end for (low_index = 0; high_index < line_count; low_index++, high_index++) { sum -= occupation[low_index]; sum += occupation[high_index]; if (occupation[high_index] <= min_occ) { - //find min in region + // find min in region min_occ = occupation[high_index]; min_index = high_index; } - //lost min from region + // lost min from region if (min_index <= low_index) { min_occ = occupation[low_index + 1]; min_index = low_index + 1; for (test_index = low_index + 2; test_index <= high_index; - test_index++) { + test_index++) { if (occupation[test_index] <= min_occ) { min_occ = occupation[test_index]; - //find min in region + // find min in region min_index = test_index; } } } thresholds[line_index++] = (sum - min_occ) / divisor + min_occ; } - } - else { + } else { min_occ = occupation[0]; min_index = 0; for (sum = 0, low_index = 0; low_index < line_count; low_index++) { @@ -904,25 +862,24 @@ void compute_occupation_threshold( //project blobs } for (; line_index < line_count; line_index++) thresholds[line_index] = (sum - min_occ) / divisor + min_occ; - //same out to end + // same out to end } - /** * @name compute_dropout_distances * * Compute the distance from each coordinate to the nearest dropout. */ -void compute_dropout_distances( //project blobs - int32_t *occupation, //input projection - int32_t *thresholds, //output thresholds - int32_t line_count //array sizes - ) { - int32_t line_index; //of thresholds line - int32_t distance; //from prev dropout - int32_t next_dist; //to next dropout - int32_t back_index; //for back filling - int32_t prev_threshold; //before overwrite +void compute_dropout_distances( // project blobs + int32_t* occupation, // input projection + int32_t* thresholds, // output thresholds + int32_t line_count // array sizes +) { + int32_t line_index; // of thresholds line + int32_t distance; // from prev dropout + int32_t next_dist; // to next dropout + int32_t back_index; // for back filling + int32_t prev_threshold; // before overwrite distance = -line_count; line_index = 0; @@ -930,13 +887,12 @@ void compute_dropout_distances( //project blobs do { distance--; prev_threshold = thresholds[line_index]; - //distance from prev + // distance from prev thresholds[line_index] = distance; line_index++; - } - while (line_index < line_count - && (occupation[line_index] < thresholds[line_index] - || occupation[line_index - 1] >= prev_threshold)); + } while (line_index < line_count && + (occupation[line_index] < thresholds[line_index] || + occupation[line_index - 1] >= prev_threshold)); if (line_index < line_count) { back_index = line_index - 1; next_dist = 1; @@ -948,11 +904,9 @@ void compute_dropout_distances( //project blobs } distance = 1; } - } - while (line_index < line_count); + } while (line_index < line_count); } - /** * @name expand_rows * @@ -960,285 +914,261 @@ void compute_dropout_distances( //project blobs * neighbours. If the expansion would entirely swallow a neighbouring row * then do so. */ -void expand_rows( //find lines - ICOORD page_tr, //top right - TO_BLOCK* block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - int32_t block_edge, //edge of block - bool testing_on //correct orientation +void expand_rows( // find lines + ICOORD page_tr, // top right + TO_BLOCK* block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on // correct orientation ) { - bool swallowed_row; //eaten a neighbour - float y_max, y_min; //new row limits - float y_bottom, y_top; //allowed limits - TO_ROW *test_row; //next row - TO_ROW *row; //current row - //iterators + bool swallowed_row; // eaten a neighbour + float y_max, y_min; // new row limits + float y_bottom, y_top; // allowed limits + TO_ROW* test_row; // next row + TO_ROW* row; // current row + // iterators BLOBNBOX_IT blob_it = &block->blobs; - TO_ROW_IT row_it = block->get_rows (); + TO_ROW_IT row_it = block->get_rows(); #ifndef GRAPHICS_DISABLED if (textord_show_expanded_rows && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); + if (to_win == nullptr) create_to_win(page_tr); } #endif - adjust_row_limits(block); //shift min,max. + adjust_row_limits(block); // shift min,max. if (textord_new_initial_xheight) { - if (block->get_rows ()->length () == 0) - return; + if (block->get_rows()->length() == 0) return; compute_row_stats(block, textord_show_expanded_rows && testing_on); } - assign_blobs_to_rows (block, &gradient, 4, true, false, false); - //get real membership - if (block->get_rows ()->length () == 0) - return; - fit_parallel_rows(block, - gradient, - rotation, - block_edge, + assign_blobs_to_rows(block, &gradient, 4, true, false, false); + // get real membership + if (block->get_rows()->length() == 0) return; + fit_parallel_rows(block, gradient, rotation, block_edge, textord_show_expanded_rows && testing_on); if (!textord_new_initial_xheight) compute_row_stats(block, textord_show_expanded_rows && testing_on); - row_it.move_to_last (); + row_it.move_to_last(); do { - row = row_it.data (); - y_max = row->max_y (); //get current limits - y_min = row->min_y (); - y_bottom = row->intercept () - block->line_size * textord_expansion_factor * - tesseract::CCStruct::kDescenderFraction; - y_top = row->intercept () + block->line_size * textord_expansion_factor * - (tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction); - if (y_min > y_bottom) { //expansion allowed + row = row_it.data(); + y_max = row->max_y(); // get current limits + y_min = row->min_y(); + y_bottom = row->intercept() - block->line_size * textord_expansion_factor * + tesseract::CCStruct::kDescenderFraction; + y_top = row->intercept() + block->line_size * textord_expansion_factor * + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction); + if (y_min > y_bottom) { // expansion allowed if (textord_show_expanded_rows && testing_on) tprintf("Expanding bottom of row at %f from %f to %f\n", row->intercept(), y_min, y_bottom); - //expandable + // expandable swallowed_row = true; - while (swallowed_row && !row_it.at_last ()) { + while (swallowed_row && !row_it.at_last()) { swallowed_row = false; - //get next one - test_row = row_it.data_relative (1); - //overlaps space - if (test_row->max_y () > y_bottom) { - if (test_row->min_y () > y_bottom) { + // get next one + test_row = row_it.data_relative(1); + // overlaps space + if (test_row->max_y() > y_bottom) { + if (test_row->min_y() > y_bottom) { if (textord_show_expanded_rows && testing_on) tprintf("Eating row below at %f\n", test_row->intercept()); - row_it.forward (); + row_it.forward(); #ifndef GRAPHICS_DISABLED if (textord_show_expanded_rows && testing_on) - plot_parallel_row(test_row, - gradient, - block_edge, - ScrollView::WHITE, - rotation); + plot_parallel_row(test_row, gradient, block_edge, + ScrollView::WHITE, rotation); #endif - blob_it.set_to_list (row->blob_list ()); - blob_it.add_list_after (test_row->blob_list ()); - //swallow complete row - delete row_it.extract (); - row_it.backward (); + blob_it.set_to_list(row->blob_list()); + blob_it.add_list_after(test_row->blob_list()); + // swallow complete row + delete row_it.extract(); + row_it.backward(); swallowed_row = true; - } - else if (test_row->max_y () < y_min) { - //shorter limit - y_bottom = test_row->max_y (); + } else if (test_row->max_y() < y_min) { + // shorter limit + y_bottom = test_row->max_y(); if (textord_show_expanded_rows && testing_on) tprintf("Truncating limit to %f due to touching row at %f\n", y_bottom, test_row->intercept()); - } - else { - y_bottom = y_min; //can't expand it + } else { + y_bottom = y_min; // can't expand it if (textord_show_expanded_rows && testing_on) - tprintf("Not expanding limit beyond %f due to touching row at %f\n", - y_bottom, test_row->intercept()); + tprintf( + "Not expanding limit beyond %f due to touching row at %f\n", + y_bottom, test_row->intercept()); } } } - y_min = y_bottom; //expand it + y_min = y_bottom; // expand it } - if (y_max < y_top) { //expansion allowed + if (y_max < y_top) { // expansion allowed if (textord_show_expanded_rows && testing_on) - tprintf("Expanding top of row at %f from %f to %f\n", - row->intercept(), y_max, y_top); + tprintf("Expanding top of row at %f from %f to %f\n", row->intercept(), + y_max, y_top); swallowed_row = true; - while (swallowed_row && !row_it.at_first ()) { + while (swallowed_row && !row_it.at_first()) { swallowed_row = false; - //get one above - test_row = row_it.data_relative (-1); - if (test_row->min_y () < y_top) { - if (test_row->max_y () < y_top) { + // get one above + test_row = row_it.data_relative(-1); + if (test_row->min_y() < y_top) { + if (test_row->max_y() < y_top) { if (textord_show_expanded_rows && testing_on) tprintf("Eating row above at %f\n", test_row->intercept()); - row_it.backward (); - blob_it.set_to_list (row->blob_list ()); + row_it.backward(); + blob_it.set_to_list(row->blob_list()); #ifndef GRAPHICS_DISABLED if (textord_show_expanded_rows && testing_on) - plot_parallel_row(test_row, - gradient, - block_edge, - ScrollView::WHITE, - rotation); + plot_parallel_row(test_row, gradient, block_edge, + ScrollView::WHITE, rotation); #endif - blob_it.add_list_after (test_row->blob_list ()); - //swallow complete row - delete row_it.extract (); - row_it.forward (); + blob_it.add_list_after(test_row->blob_list()); + // swallow complete row + delete row_it.extract(); + row_it.forward(); swallowed_row = true; - } - else if (test_row->min_y () < y_max) { - //shorter limit - y_top = test_row->min_y (); + } else if (test_row->min_y() < y_max) { + // shorter limit + y_top = test_row->min_y(); if (textord_show_expanded_rows && testing_on) tprintf("Truncating limit to %f due to touching row at %f\n", y_top, test_row->intercept()); - } - else { - y_top = y_max; //can't expand it + } else { + y_top = y_max; // can't expand it if (textord_show_expanded_rows && testing_on) - tprintf("Not expanding limit beyond %f due to touching row at %f\n", - y_top, test_row->intercept()); + tprintf( + "Not expanding limit beyond %f due to touching row at %f\n", + y_top, test_row->intercept()); } } } y_max = y_top; } - //new limits - row->set_limits (y_min, y_max); - row_it.backward (); - } - while (!row_it.at_last ()); + // new limits + row->set_limits(y_min, y_max); + row_it.backward(); + } while (!row_it.at_last()); } - /** * adjust_row_limits * * Change the limits of rows to suit the default fractions. */ -void adjust_row_limits( //tidy limits - TO_BLOCK *block //block to do - ) { - TO_ROW *row; //current row - float size; //size of row - float ymax; //top of row - float ymin; //bottom of row - TO_ROW_IT row_it = block->get_rows (); +void adjust_row_limits( // tidy limits + TO_BLOCK* block // block to do +) { + TO_ROW* row; // current row + float size; // size of row + float ymax; // top of row + float ymin; // bottom of row + TO_ROW_IT row_it = block->get_rows(); if (textord_show_expanded_rows) tprintf("Adjusting row limits for block(%d,%d)\n", block->block->pdblk.bounding_box().left(), block->block->pdblk.bounding_box().top()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - size = row->max_y () - row->min_y (); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + size = row->max_y() - row->min_y(); if (textord_show_expanded_rows) - tprintf("Row at %f has min %f, max %f, size %f\n", - row->intercept(), row->min_y(), row->max_y(), size); + tprintf("Row at %f has min %f, max %f, size %f\n", row->intercept(), + row->min_y(), row->max_y(), size); size /= tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction + - tesseract::CCStruct::kDescenderFraction; + tesseract::CCStruct::kAscenderFraction + + tesseract::CCStruct::kDescenderFraction; ymax = size * (tesseract::CCStruct::kXHeightFraction + tesseract::CCStruct::kAscenderFraction); ymin = -size * tesseract::CCStruct::kDescenderFraction; - row->set_limits (row->intercept () + ymin, row->intercept () + ymax); + row->set_limits(row->intercept() + ymin, row->intercept() + ymax); row->merged = false; } } - /** * @name compute_row_stats * * Compute the linespacing and offset. */ -void compute_row_stats( //find lines - TO_BLOCK* block, //block to do - bool testing_on //correct orientation +void compute_row_stats( // find lines + TO_BLOCK* block, // block to do + bool testing_on // correct orientation ) { - int32_t row_index; //of median - TO_ROW *row; //current row - TO_ROW *prev_row; //previous row - float iqr; //inter quartile range - TO_ROW_IT row_it = block->get_rows (); - //number of rows - int16_t rowcount = row_it.length (); - TO_ROW **rows; //for choose nth - - rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *)); - if (rows == nullptr) - MEMORY_OUT.error ("compute_row_stats", ABORT, nullptr); + int32_t row_index; // of median + TO_ROW* row; // current row + TO_ROW* prev_row; // previous row + float iqr; // inter quartile range + TO_ROW_IT row_it = block->get_rows(); + // number of rows + int16_t rowcount = row_it.length(); + TO_ROW** rows; // for choose nth + + rows = (TO_ROW**)alloc_mem(rowcount * sizeof(TO_ROW*)); + if (rows == nullptr) MEMORY_OUT.error("compute_row_stats", ABORT, nullptr); rowcount = 0; prev_row = nullptr; - row_it.move_to_last (); //start at bottom + row_it.move_to_last(); // start at bottom do { - row = row_it.data (); + row = row_it.data(); if (prev_row != nullptr) { rows[rowcount++] = prev_row; - prev_row->spacing = row->intercept () - prev_row->intercept (); + prev_row->spacing = row->intercept() - prev_row->intercept(); if (testing_on) - tprintf ("Row at %g yields spacing of %g\n", - row->intercept (), prev_row->spacing); + tprintf("Row at %g yields spacing of %g\n", row->intercept(), + prev_row->spacing); } prev_row = row; - row_it.backward (); - } - while (!row_it.at_last ()); + row_it.backward(); + } while (!row_it.at_last()); block->key_row = prev_row; - block->baseline_offset = - fmod (prev_row->parallel_c (), block->line_spacing); + block->baseline_offset = fmod(prev_row->parallel_c(), block->line_spacing); if (testing_on) - tprintf ("Blob based spacing=(%g,%g), offset=%g", - block->line_size, block->line_spacing, block->baseline_offset); + tprintf("Blob based spacing=(%g,%g), offset=%g", block->line_size, + block->line_spacing, block->baseline_offset); if (rowcount > 0) { - row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount, - sizeof (TO_ROW *), row_spacing_order); + row_index = choose_nth_item(rowcount * 3 / 4, rows, rowcount, + sizeof(TO_ROW*), row_spacing_order); iqr = rows[row_index]->spacing; - row_index = choose_nth_item (rowcount / 4, rows, rowcount, - sizeof (TO_ROW *), row_spacing_order); + row_index = choose_nth_item(rowcount / 4, rows, rowcount, sizeof(TO_ROW*), + row_spacing_order); iqr -= rows[row_index]->spacing; - row_index = choose_nth_item (rowcount / 2, rows, rowcount, - sizeof (TO_ROW *), row_spacing_order); + row_index = choose_nth_item(rowcount / 2, rows, rowcount, sizeof(TO_ROW*), + row_spacing_order); block->key_row = rows[row_index]; - if (testing_on) - tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr); - if (rowcount > 2 - && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) { + if (testing_on) tprintf(" row based=%g(%g)", rows[row_index]->spacing, iqr); + if (rowcount > 2 && + iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) { if (!textord_new_initial_xheight) { - if (rows[row_index]->spacing < block->line_spacing - && rows[row_index]->spacing > block->line_size) - //within range + if (rows[row_index]->spacing < block->line_spacing && + rows[row_index]->spacing > block->line_size) + // within range block->line_size = rows[row_index]->spacing; - //spacing=size + // spacing=size else if (rows[row_index]->spacing > block->line_spacing) block->line_size = block->line_spacing; - //too big so use max - } - else { + // too big so use max + } else { if (rows[row_index]->spacing < block->line_spacing) block->line_size = rows[row_index]->spacing; else block->line_size = block->line_spacing; - //too big so use max + // too big so use max } if (block->line_size < textord_min_xheight) - block->line_size = (float) textord_min_xheight; + block->line_size = (float)textord_min_xheight; block->line_spacing = rows[row_index]->spacing; - block->max_blob_size = - block->line_spacing * textord_excess_blobsize; + block->max_blob_size = block->line_spacing * textord_excess_blobsize; } - block->baseline_offset = fmod (rows[row_index]->intercept (), - block->line_spacing); + block->baseline_offset = + fmod(rows[row_index]->intercept(), block->line_spacing); } if (testing_on) - tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n", - block->line_size, block->line_spacing, block->baseline_offset); + tprintf("\nEstimate line size=%g, spacing=%g, offset=%g\n", + block->line_size, block->line_spacing, block->baseline_offset); free_mem(rows); } - /** * @name compute_block_xheight * @@ -1269,13 +1199,13 @@ void compute_row_stats( //find lines * */ namespace tesseract { -void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { - TO_ROW *row; // current row - float asc_frac_xheight = CCStruct::kAscenderFraction / - CCStruct::kXHeightFraction; - float desc_frac_xheight = CCStruct::kDescenderFraction / - CCStruct::kXHeightFraction; - int32_t min_height, max_height; // limits on xheight +void Textord::compute_block_xheight(TO_BLOCK* block, float gradient) { + TO_ROW* row; // current row + float asc_frac_xheight = + CCStruct::kAscenderFraction / CCStruct::kXHeightFraction; + float desc_frac_xheight = + CCStruct::kDescenderFraction / CCStruct::kXHeightFraction; + int32_t min_height, max_height; // limits on xheight TO_ROW_IT row_it = block->get_rows(); if (row_it.empty()) return; // no rows @@ -1296,8 +1226,8 @@ void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { row = row_it.data(); // Compute the xheight of this row if it has not been computed before. if (row->xheight <= 0.0) { - compute_row_xheight(row, block->block->classify_rotation(), - gradient, block->line_size); + compute_row_xheight(row, block->block->classify_rotation(), gradient, + block->line_size); } ROW_CATEGORY row_category = get_row_category(row); if (row_category == ROW_ASCENDERS_FOUND) { @@ -1313,8 +1243,8 @@ void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { row_desc_descdrop.add(static_cast(-row->descdrop), row->xheight_evidence); } else if (row_category == ROW_UNKNOWN) { - fill_heights(row, gradient, min_height, max_height, - &row_cap_xheights, &row_cap_floating_xheights); + fill_heights(row, gradient, min_height, max_height, &row_cap_xheights, + &row_cap_floating_xheights); } } @@ -1341,7 +1271,7 @@ void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { // still works as intended). compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights, textord_single_height_mode && - block->block->classify_rotation().y() == 0.0, + block->block->classify_rotation().y() == 0.0, min_height, max_height, &(xheight), &(ascrise)); if (ascrise == 0) { // assume only caps in the whole block xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio; @@ -1381,9 +1311,9 @@ void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { * Set xheigh_evidence to the number of blobs with the chosen xheight * that appear in this row. */ -void Textord::compute_row_xheight(TO_ROW *row, // row to do +void Textord::compute_row_xheight(TO_ROW* row, // row to do const FCOORD& rotation, - float gradient, // global skew + float gradient, // global skew int block_line_size) { // Find blobs representing repeated characters in rows and mark them. // This information is used for computing row xheight and at a later @@ -1396,16 +1326,14 @@ void Textord::compute_row_xheight(TO_ROW *row, // row to do get_min_max_xheight(block_line_size, &min_height, &max_height); STATS heights(min_height, max_height + 1); STATS floating_heights(min_height, max_height + 1); - fill_heights(row, gradient, min_height, max_height, - &heights, &floating_heights); + fill_heights(row, gradient, min_height, max_height, &heights, + &floating_heights); row->ascrise = 0.0f; row->xheight = 0.0f; - row->xheight_evidence = - compute_xheight_from_modes(&heights, &floating_heights, - textord_single_height_mode && - rotation.y() == 0.0, - min_height, max_height, - &(row->xheight), &(row->ascrise)); + row->xheight_evidence = compute_xheight_from_modes( + &heights, &floating_heights, + textord_single_height_mode && rotation.y() == 0.0, min_height, max_height, + &(row->xheight), &(row->ascrise)); row->descdrop = 0.0f; if (row->xheight > 0.0) { row->descdrop = static_cast( @@ -1421,22 +1349,21 @@ void Textord::compute_row_xheight(TO_ROW *row, // row to do * Fill the given heights with heights of the blobs that are legal * candidates for estimating xheight. */ -void fill_heights(TO_ROW *row, float gradient, int min_height, - int max_height, STATS *heights, STATS *floating_heights) { - float xcentre; // centre of blob - float top; // top y coord of blob - float height; // height of blob - BLOBNBOX *blob; // current blob +void fill_heights(TO_ROW* row, float gradient, int min_height, int max_height, + STATS* heights, STATS* floating_heights) { + float xcentre; // centre of blob + float top; // top y coord of blob + float height; // height of blob + BLOBNBOX* blob; // current blob int repeated_set; BLOBNBOX_IT blob_it = row->blob_list(); if (blob_it.empty()) return; // no blobs in this row - bool has_rep_chars = - row->rep_chars_marked() && row->num_repeated_sets() > 0; + bool has_rep_chars = row->rep_chars_marked() && row->num_repeated_sets() > 0; do { blob = blob_it.data(); if (!blob->joined_to_prev()) { - xcentre = (blob->bounding_box().left() + - blob->bounding_box().right()) / 2.0f; + xcentre = + (blob->bounding_box().left() + blob->bounding_box().right()) / 2.0f; top = blob->bounding_box().top(); height = blob->bounding_box().height(); if (textord_fix_xheight_bug) @@ -1482,10 +1409,10 @@ void fill_heights(TO_ROW *row, float gradient, int min_height, * (e.g. -, ', =, ^, `, ", ', etc) * If cap_only, then force finding of only the top mode. */ -int compute_xheight_from_modes( - STATS *heights, STATS *floating_heights, bool cap_only, int min_height, - int max_height, float *xheight, float *ascrise) { - int blob_index = heights->mode(); // find mode +int compute_xheight_from_modes(STATS* heights, STATS* floating_heights, + bool cap_only, int min_height, int max_height, + float* xheight, float* ascrise) { + int blob_index = heights->mode(); // find mode int blob_count = heights->pile_count(blob_index); // get count of mode if (textord_debug_xheights) { tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n", @@ -1499,10 +1426,9 @@ int compute_xheight_from_modes( bool in_best_pile = FALSE; int prev_size = -INT32_MAX; int best_count = 0; - int mode_count = compute_height_modes(heights, min_height, max_height, - modes, MAX_HEIGHT_MODES); - if (cap_only && mode_count > 1) - mode_count = 1; + int mode_count = compute_height_modes(heights, min_height, max_height, modes, + MAX_HEIGHT_MODES); + if (cap_only && mode_count > 1) mode_count = 1; int x; if (textord_debug_xheights) { tprintf("found %d modes: ", mode_count); @@ -1511,17 +1437,15 @@ int compute_xheight_from_modes( } for (x = 0; x < mode_count - 1; x++) { - if (modes[x] != prev_size + 1) - in_best_pile = FALSE; // had empty height - int modes_x_count = heights->pile_count(modes[x]) - - floating_heights->pile_count(modes[x]); + if (modes[x] != prev_size + 1) in_best_pile = FALSE; // had empty height + int modes_x_count = + heights->pile_count(modes[x]) - floating_heights->pile_count(modes[x]); if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) && (in_best_pile || modes_x_count > best_count)) { for (int asc = x + 1; asc < mode_count; asc++) { float ratio = - static_cast(modes[asc]) / static_cast(modes[x]); - if (textord_ascx_ratio_min < ratio && - ratio < textord_ascx_ratio_max && + static_cast(modes[asc]) / static_cast(modes[x]); + if (textord_ascx_ratio_min < ratio && ratio < textord_ascx_ratio_max && (heights->pile_count(modes[asc]) >= blob_count * textord_ascheight_mode_fraction)) { if (modes_x_count > best_count) { @@ -1529,8 +1453,8 @@ int compute_xheight_from_modes( best_count = modes_x_count; } if (textord_debug_xheights) { - tprintf("X=%d, asc=%d, count=%d, ratio=%g\n", - modes[x], modes[asc]-modes[x], modes_x_count, ratio); + tprintf("X=%d, asc=%d, count=%d, ratio=%g\n", modes[x], + modes[asc] - modes[x], modes_x_count, ratio); } prev_size = modes[x]; *xheight = static_cast(modes[x]); @@ -1578,13 +1502,13 @@ int compute_xheight_from_modes( * number of blobs in the row, the function returns the descender * height, returns 0 otherwise. */ -int32_t compute_row_descdrop(TO_ROW *row, float gradient, - int xheight_blob_count, STATS *asc_heights) { +int32_t compute_row_descdrop(TO_ROW* row, float gradient, + int xheight_blob_count, STATS* asc_heights) { // Count how many potential ascenders are in this row. int i_min = asc_heights->min_bucket(); if ((i_min / row->xheight) < textord_ascx_ratio_min) { - i_min = static_cast( - floor(row->xheight * textord_ascx_ratio_min + 0.5)); + i_min = + static_cast(floor(row->xheight * textord_ascx_ratio_min + 0.5)); } int i_max = asc_heights->max_bucket(); if ((i_max / row->xheight) > textord_ascx_ratio_max) { @@ -1595,60 +1519,59 @@ int32_t compute_row_descdrop(TO_ROW *row, float gradient, num_potential_asc += asc_heights->pile_count(i); } int32_t min_height = - static_cast(floor(row->xheight * textord_descx_ratio_min + 0.5)); + static_cast(floor(row->xheight * textord_descx_ratio_min + 0.5)); int32_t max_height = - static_cast(floor(row->xheight * textord_descx_ratio_max)); - float xcentre; // centre of blob - float height; // height of blob + static_cast(floor(row->xheight * textord_descx_ratio_max)); + float xcentre; // centre of blob + float height; // height of blob BLOBNBOX_IT blob_it = row->blob_list(); - BLOBNBOX *blob; // current blob - STATS heights (min_height, max_height + 1); + BLOBNBOX* blob; // current blob + STATS heights(min_height, max_height + 1); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { blob = blob_it.data(); if (!blob->joined_to_prev()) { - xcentre = (blob->bounding_box().left() + - blob->bounding_box().right()) / 2.0f; + xcentre = + (blob->bounding_box().left() + blob->bounding_box().right()) / 2.0f; height = (gradient * xcentre + row->parallel_c() - blob->bounding_box().bottom()); if (height >= min_height && height <= max_height) heights.add(static_cast(floor(height + 0.5)), 1); } } - int blob_index = heights.mode(); // find mode + int blob_index = heights.mode(); // find mode int blob_count = heights.pile_count(blob_index); // get count of mode float total_fraction = - (textord_descheight_mode_fraction + textord_ascheight_mode_fraction); + (textord_descheight_mode_fraction + textord_ascheight_mode_fraction); if (static_cast(blob_count + num_potential_asc) < xheight_blob_count * total_fraction) { blob_count = 0; } int descdrop = blob_count > 0 ? -blob_index : 0; if (textord_debug_xheights) { - tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n", - descdrop, num_potential_asc, blob_count); + tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n", descdrop, + num_potential_asc, blob_count); heights.print(); } return descdrop; } - /** * @name compute_height_modes * * Find the top maxmodes values in the input array and put their * indices in the output in the order in which they occurred. */ -int32_t compute_height_modes(STATS *heights, // stats to search - int32_t min_height, // bottom of range - int32_t max_height, // top of range - int32_t *modes, // output array - int32_t maxmodes) { // size of modes - int32_t pile_count; // no in source pile - int32_t src_count; // no of source entries - int32_t src_index; // current entry - int32_t least_count; // height of smalllest - int32_t least_index; // index of least - int32_t dest_count; // index in modes +int32_t compute_height_modes(STATS* heights, // stats to search + int32_t min_height, // bottom of range + int32_t max_height, // top of range + int32_t* modes, // output array + int32_t maxmodes) { // size of modes + int32_t pile_count; // no in source pile + int32_t src_count; // no of source entries + int32_t src_index; // current entry + int32_t least_count; // height of smalllest + int32_t least_index; // index of least + int32_t dest_count; // index in modes src_count = max_height + 1 - min_height; dest_count = 0; @@ -1693,26 +1616,25 @@ int32_t compute_height_modes(STATS *heights, // stats to search return dest_count; } - /** * @name correct_row_xheight * * Adjust the xheight etc of this row if not within reasonable limits * of the average for the block. */ -void correct_row_xheight(TO_ROW *row, float xheight, - float ascrise, float descdrop) { +void correct_row_xheight(TO_ROW* row, float xheight, float ascrise, + float descdrop) { ROW_CATEGORY row_category = get_row_category(row); if (textord_debug_xheights) { - tprintf("correcting row xheight: row->xheight %.4f" - ", row->acrise %.4f row->descdrop %.4f\n", - row->xheight, row->ascrise, row->descdrop); + tprintf( + "correcting row xheight: row->xheight %.4f" + ", row->acrise %.4f row->descdrop %.4f\n", + row->xheight, row->ascrise, row->descdrop); } bool normal_xheight = - within_error_margin(row->xheight, xheight, textord_xheight_error_margin); - bool cap_xheight = - within_error_margin(row->xheight, xheight + ascrise, - textord_xheight_error_margin); + within_error_margin(row->xheight, xheight, textord_xheight_error_margin); + bool cap_xheight = within_error_margin(row->xheight, xheight + ascrise, + textord_xheight_error_margin); // Use the average xheight/ascrise for the following cases: // -- the xheight of the row could not be determined at all // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3") @@ -1726,7 +1648,7 @@ void correct_row_xheight(TO_ROW *row, float xheight, } else if (row_category == ROW_INVALID || (row_category == ROW_DESCENDERS_FOUND && (normal_xheight || cap_xheight)) || - (row_category == ROW_UNKNOWN && normal_xheight)) { + (row_category == ROW_UNKNOWN && normal_xheight)) { if (textord_debug_xheights) tprintf("using average xheight\n"); row->xheight = xheight; row->ascrise = ascrise; @@ -1739,11 +1661,11 @@ void correct_row_xheight(TO_ROW *row, float xheight, if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n"); row->ascrise = row->xheight * (ascrise / xheight); } else if (row_category == ROW_UNKNOWN) { - // Otherwise assume this row is an all-caps or small-caps row - // and adjust xheight and ascrise of the row. + // Otherwise assume this row is an all-caps or small-caps row + // and adjust xheight and ascrise of the row. row->all_caps = true; - if (cap_xheight) { // regular all caps + if (cap_xheight) { // regular all caps if (textord_debug_xheights) tprintf("all caps\n"); row->xheight = xheight; row->ascrise = ascrise; @@ -1762,8 +1684,10 @@ void correct_row_xheight(TO_ROW *row, float xheight, } } if (textord_debug_xheights) { - tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop" - " = %.4f\n", row->xheight, row->ascrise, row->descdrop); + tprintf( + "corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop" + " = %.4f\n", + row->xheight, row->ascrise, row->descdrop); } } @@ -1773,7 +1697,7 @@ static int CountOverlaps(const TBOX& box, int min_height, BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); - const TBOX &blob_box = blob->bounding_box(); + const TBOX& blob_box = blob->bounding_box(); if (blob_box.height() >= min_height && box.major_overlap(blob_box)) { ++overlaps; } @@ -1787,55 +1711,53 @@ static int CountOverlaps(const TBOX& box, int min_height, * Test wide objects for being potential underlines. If they are then * put them in a separate list in the block. */ -void separate_underlines(TO_BLOCK* block, // block to do - float gradient, // skew angle - FCOORD rotation, // inverse landscape +void separate_underlines(TO_BLOCK* block, // block to do + float gradient, // skew angle + FCOORD rotation, // inverse landscape bool testing_on) { // correct orientation - BLOBNBOX *blob; // current blob - C_BLOB *rotated_blob; // rotated blob - TO_ROW *row; // current row - float length; // of g_vec + BLOBNBOX* blob; // current blob + C_BLOB* rotated_blob; // rotated blob + TO_ROW* row; // current row + float length; // of g_vec TBOX blob_box; - FCOORD blob_rotation; // inverse of rotation - FCOORD g_vec; // skew rotation - BLOBNBOX_IT blob_it; // iterator - // iterator + FCOORD blob_rotation; // inverse of rotation + FCOORD g_vec; // skew rotation + BLOBNBOX_IT blob_it; // iterator + // iterator BLOBNBOX_IT under_it = &block->underlines; BLOBNBOX_IT large_it = &block->large_blobs; TO_ROW_IT row_it = block->get_rows(); - int min_blob_height = static_cast(textord_min_blob_height_fraction * - block->line_size + 0.5); + int min_blob_height = static_cast( + textord_min_blob_height_fraction * block->line_size + 0.5); - // length of vector + // length of vector length = sqrt(1 + gradient * gradient); g_vec = FCOORD(1 / length, -gradient / length); blob_rotation = FCOORD(rotation.x(), -rotation.y()); blob_rotation.rotate(g_vec); // undoing everything for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); - // get blobs + // get blobs blob_it.set_to_list(row->blob_list()); - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); - blob_it.forward()) { + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { blob = blob_it.data(); blob_box = blob->bounding_box(); if (blob_box.width() > block->line_size * textord_underline_width) { ASSERT_HOST(blob->cblob() != nullptr); - rotated_blob = crotate_cblob (blob->cblob(), - blob_rotation); + rotated_blob = crotate_cblob(blob->cblob(), blob_rotation); if (test_underline( - testing_on && textord_show_final_rows, - rotated_blob, static_cast(row->intercept()), - static_cast( - block->line_size * - (tesseract::CCStruct::kXHeightFraction + - tesseract::CCStruct::kAscenderFraction / 2.0f)))) { + testing_on && textord_show_final_rows, rotated_blob, + static_cast(row->intercept()), + static_cast( + block->line_size * + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction / 2.0f)))) { under_it.add_after_then_move(blob_it.extract()); if (testing_on && textord_show_final_rows) { tprintf("Underlined blob at:"); - rotated_blob->bounding_box().print(); + rotated_blob->bounding_box().print(); tprintf("Was:"); - blob_box.print(); + blob_box.print(); } } else if (CountOverlaps(blob->bounding_box(), min_blob_height, row->blob_list()) > @@ -1843,8 +1765,7 @@ void separate_underlines(TO_BLOCK* block, // block to do large_it.add_after_then_move(blob_it.extract()); if (testing_on && textord_show_final_rows) { tprintf("Large blob overlaps %d blobs at:", - CountOverlaps(blob_box, min_blob_height, - row->blob_list())); + CountOverlaps(blob_box, min_blob_height, row->blob_list())); blob_box.print(); } } @@ -1854,45 +1775,44 @@ void separate_underlines(TO_BLOCK* block, // block to do } } - /** * @name pre_associate_blobs * * Associate overlapping blobs and fake chop wide blobs. */ -void pre_associate_blobs( //make rough chars - ICOORD page_tr, //top right - TO_BLOCK* block, //block to do - FCOORD rotation, //inverse landscape - bool testing_on //correct orientation +void pre_associate_blobs( // make rough chars + ICOORD page_tr, // top right + TO_BLOCK* block, // block to do + FCOORD rotation, // inverse landscape + bool testing_on // correct orientation ) { #ifndef GRAPHICS_DISABLED - ScrollView::Color colour; //of boxes + ScrollView::Color colour; // of boxes #endif - BLOBNBOX *blob; //current blob - BLOBNBOX *nextblob; //next in list + BLOBNBOX* blob; // current blob + BLOBNBOX* nextblob; // next in list TBOX blob_box; - FCOORD blob_rotation; //inverse of rotation - BLOBNBOX_IT blob_it; //iterator - BLOBNBOX_IT start_it; //iterator - TO_ROW_IT row_it = block->get_rows (); + FCOORD blob_rotation; // inverse of rotation + BLOBNBOX_IT blob_it; // iterator + BLOBNBOX_IT start_it; // iterator + TO_ROW_IT row_it = block->get_rows(); #ifndef GRAPHICS_DISABLED colour = ScrollView::RED; #endif - blob_rotation = FCOORD (rotation.x (), -rotation.y ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - //get blobs - blob_it.set_to_list (row_it.data ()->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - start_it = blob_it; //save start point + blob_rotation = FCOORD(rotation.x(), -rotation.y()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + // get blobs + blob_it.set_to_list(row_it.data()->blob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + start_it = blob_it; // save start point // if (testing_on && textord_show_final_blobs) // { - // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n", + // tprintf("Blob at (%d,%d)->(%d,%d), + // addr=%x, count=%d\n", // blob_box.left(),blob_box.bottom(), // blob_box.right(),blob_box.top(), // (void*)blob,blob_it.length()); @@ -1900,87 +1820,78 @@ void pre_associate_blobs( //make rough chars bool overlap; do { overlap = false; - if (!blob_it.at_last ()) { + if (!blob_it.at_last()) { nextblob = blob_it.data_relative(1); overlap = blob_box.major_x_overlap(nextblob->bounding_box()); if (overlap) { - blob->merge(nextblob); // merge new blob - blob_box = blob->bounding_box(); // get bigger box + blob->merge(nextblob); // merge new blob + blob_box = blob->bounding_box(); // get bigger box blob_it.forward(); } } - } - while (overlap); - blob->chop (&start_it, &blob_it, - blob_rotation, - block->line_size * tesseract::CCStruct::kXHeightFraction * - textord_chop_width); - //attempt chop + } while (overlap); + blob->chop(&start_it, &blob_it, blob_rotation, + block->line_size * tesseract::CCStruct::kXHeightFraction * + textord_chop_width); + // attempt chop } #ifndef GRAPHICS_DISABLED if (testing_on && textord_show_final_blobs) { - if (to_win == nullptr) - create_to_win(page_tr); + if (to_win == nullptr) create_to_win(page_tr); to_win->Pen(colour); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_box.rotate (rotation); - if (!blob->joined_to_prev ()) { - to_win->Rectangle (blob_box.left (), blob_box.bottom (), - blob_box.right (), blob_box.top ()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + blob_box.rotate(rotation); + if (!blob->joined_to_prev()) { + to_win->Rectangle(blob_box.left(), blob_box.bottom(), + blob_box.right(), blob_box.top()); } } - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; + colour = (ScrollView::Color)(colour + 1); + if (colour > ScrollView::MAGENTA) colour = ScrollView::RED; } #endif } } - /** * @name fit_parallel_rows * * Re-fit the rows in the block to the given gradient. */ -void fit_parallel_rows( //find lines - TO_BLOCK* block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - int32_t block_edge, //edge of block - bool testing_on //correct orientation +void fit_parallel_rows( // find lines + TO_BLOCK* block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on // correct orientation ) { #ifndef GRAPHICS_DISABLED - ScrollView::Color colour; //of row + ScrollView::Color colour; // of row #endif - TO_ROW_IT row_it = block->get_rows (); + TO_ROW_IT row_it = block->get_rows(); - row_it.move_to_first (); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - if (row_it.data ()->blob_list ()->empty ()) - delete row_it.extract (); //nothing in it + row_it.move_to_first(); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + if (row_it.data()->blob_list()->empty()) + delete row_it.extract(); // nothing in it else - fit_parallel_lms (gradient, row_it.data ()); + fit_parallel_lms(gradient, row_it.data()); } #ifndef GRAPHICS_DISABLED if (testing_on) { colour = ScrollView::RED; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - plot_parallel_row (row_it.data (), gradient, - block_edge, colour, rotation); - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + plot_parallel_row(row_it.data(), gradient, block_edge, colour, rotation); + colour = (ScrollView::Color)(colour + 1); + if (colour > ScrollView::MAGENTA) colour = ScrollView::RED; } } #endif - row_it.sort (row_y_order); //may have gone out of order + row_it.sort(row_y_order); // may have gone out of order } - /** * @name fit_parallel_lms * @@ -1988,10 +1899,10 @@ void fit_parallel_rows( //find lines * Make the fit parallel to the given gradient and set the * row accordingly. */ -void fit_parallel_lms(float gradient, TO_ROW *row) { - float c; // fitted line - int blobcount; // no of blobs - tesseract::DetLineFit lms; +void fit_parallel_lms(float gradient, TO_ROW* row) { + float c; // fitted line + int blobcount; // no of blobs + tesseract::DetLineFit lms; BLOBNBOX_IT blob_it = row->blob_list(); blobcount = 0; @@ -2007,42 +1918,39 @@ void fit_parallel_lms(float gradient, TO_ROW *row) { if (textord_straight_baselines && blobcount > textord_lms_line_trials) { error = lms.Fit(&gradient, &c); } - //set the other too + // set the other too row->set_line(gradient, c, error); } - /** * @name make_spline_rows * * Re-fit the rows in the block to the given gradient. */ namespace tesseract { -void Textord::make_spline_rows(TO_BLOCK* block, // block to do - float gradient, // gradient to fit +void Textord::make_spline_rows(TO_BLOCK* block, // block to do + float gradient, // gradient to fit bool testing_on) { #ifndef GRAPHICS_DISABLED - ScrollView::Color colour; //of row + ScrollView::Color colour; // of row #endif - TO_ROW_IT row_it = block->get_rows (); + TO_ROW_IT row_it = block->get_rows(); - row_it.move_to_first (); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - if (row_it.data ()->blob_list ()->empty ()) - delete row_it.extract (); //nothing in it + row_it.move_to_first(); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + if (row_it.data()->blob_list()->empty()) + delete row_it.extract(); // nothing in it else - make_baseline_spline (row_it.data (), block); + make_baseline_spline(row_it.data(), block); } if (textord_old_baselines) { #ifndef GRAPHICS_DISABLED if (testing_on) { colour = ScrollView::RED; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); - row_it.forward ()) { - row_it.data ()->baseline.plot (to_win, colour); - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row_it.data()->baseline.plot(to_win, colour); + colour = (ScrollView::Color)(colour + 1); + if (colour > ScrollView::MAGENTA) colour = ScrollView::RED; } } #endif @@ -2051,11 +1959,10 @@ void Textord::make_spline_rows(TO_BLOCK* block, // block to do #ifndef GRAPHICS_DISABLED if (testing_on) { colour = ScrollView::RED; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row_it.data ()->baseline.plot (to_win, colour); - colour = (ScrollView::Color) (colour + 1); - if (colour > ScrollView::MAGENTA) - colour = ScrollView::RED; + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row_it.data()->baseline.plot(to_win, colour); + colour = (ScrollView::Color)(colour + 1); + if (colour > ScrollView::MAGENTA) colour = ScrollView::RED; } } #endif @@ -2063,7 +1970,6 @@ void Textord::make_spline_rows(TO_BLOCK* block, // block to do } // namespace tesseract. - /** * @name make_baseline_spline * @@ -2071,31 +1977,30 @@ void Textord::make_spline_rows(TO_BLOCK* block, // block to do * Make the fit parallel to the given gradient and set the * row accordingly. */ -void make_baseline_spline(TO_ROW *row, //row to fit - TO_BLOCK *block) { - int32_t *xstarts; // spline boundaries - double *coeffs; // quadratic coeffs - int32_t segments; // no of segments +void make_baseline_spline(TO_ROW* row, // row to fit + TO_BLOCK* block) { + int32_t* xstarts; // spline boundaries + double* coeffs; // quadratic coeffs + int32_t segments; // no of segments xstarts = - (int32_t *) alloc_mem((row->blob_list()->length() + 1) * sizeof(int32_t)); - if (segment_baseline(row, block, segments, xstarts) - && !textord_straight_baselines && !textord_parallel_baselines) { + (int32_t*)alloc_mem((row->blob_list()->length() + 1) * sizeof(int32_t)); + if (segment_baseline(row, block, segments, xstarts) && + !textord_straight_baselines && !textord_parallel_baselines) { coeffs = linear_spline_baseline(row, block, segments, xstarts); } else { xstarts[1] = xstarts[segments]; segments = 1; - coeffs = (double *) alloc_mem (3 * sizeof (double)); + coeffs = (double*)alloc_mem(3 * sizeof(double)); coeffs[0] = 0; - coeffs[1] = row->line_m (); - coeffs[2] = row->line_c (); + coeffs[1] = row->line_m(); + coeffs[2] = row->line_c(); } - row->baseline = QSPLINE (segments, xstarts, coeffs); + row->baseline = QSPLINE(segments, xstarts, coeffs); free_mem(coeffs); free_mem(xstarts); } - /** * @name segment_baseline * @@ -2103,60 +2008,59 @@ void make_baseline_spline(TO_ROW *row, //row to fit * quadratic fitted to them. * Return TRUE if enough blobs were far enough away to need a quadratic. */ -bool -segment_baseline( //split baseline - TO_ROW* row, //row to fit - TO_BLOCK* block, //block it came from - int32_t& segments, //no fo segments - int32_t* xstarts //coords of segments +bool segment_baseline( // split baseline + TO_ROW* row, // row to fit + TO_BLOCK* block, // block it came from + int32_t& segments, // no fo segments + int32_t* xstarts // coords of segments ) { - bool needs_curve; //needs curved line - int blobcount; //no of blobs - int blobindex; //current blob - int last_state; //above, on , below - int state; //of current blob - float yshift; //from baseline - TBOX box; //blob box - TBOX new_box; //new_it box - float middle; //xcentre of blob - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT new_it = blob_it; //front end - SORTED_FLOATS yshifts; //shifts from baseline + bool needs_curve; // needs curved line + int blobcount; // no of blobs + int blobindex; // current blob + int last_state; // above, on , below + int state; // of current blob + float yshift; // from baseline + TBOX box; // blob box + TBOX new_box; // new_it box + float middle; // xcentre of blob + // blobs + BLOBNBOX_IT blob_it = row->blob_list(); + BLOBNBOX_IT new_it = blob_it; // front end + SORTED_FLOATS yshifts; // shifts from baseline needs_curve = false; - box = box_next_pre_chopped (&blob_it); - xstarts[0] = box.left (); + box = box_next_pre_chopped(&blob_it); + xstarts[0] = box.left(); segments = 1; - blobcount = row->blob_list ()->length (); + blobcount = row->blob_list()->length(); if (textord_oldbl_debug) - tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n", - blobcount, box.left (), box.bottom ()); - if (blobcount <= textord_spline_medianwin - || blobcount < textord_spline_minblobs) { - blob_it.move_to_last (); - box = blob_it.data ()->bounding_box (); - xstarts[1] = box.right (); + tprintf("Segmenting baseline of %d blobs at (%d,%d)\n", blobcount, + box.left(), box.bottom()); + if (blobcount <= textord_spline_medianwin || + blobcount < textord_spline_minblobs) { + blob_it.move_to_last(); + box = blob_it.data()->bounding_box(); + xstarts[1] = box.right(); return false; } last_state = 0; - new_it.mark_cycle_pt (); + new_it.mark_cycle_pt(); for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) { - new_box = box_next_pre_chopped (&new_it); - middle = (new_box.left () + new_box.right ()) / 2.0; - yshift = new_box.bottom () - row->line_m () * middle - row->line_c (); - //record shift - yshifts.add (yshift, blobindex); - if (new_it.cycled_list ()) { - xstarts[1] = new_box.right (); + new_box = box_next_pre_chopped(&new_it); + middle = (new_box.left() + new_box.right()) / 2.0; + yshift = new_box.bottom() - row->line_m() * middle - row->line_c(); + // record shift + yshifts.add(yshift, blobindex); + if (new_it.cycled_list()) { + xstarts[1] = new_box.right(); return false; } } for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++) - box = box_next_pre_chopped (&blob_it); + box = box_next_pre_chopped(&blob_it); do { - new_box = box_next_pre_chopped (&new_it); - //get middle one + new_box = box_next_pre_chopped(&new_it); + // get middle one yshift = yshifts[textord_spline_medianwin / 2]; if (yshift > textord_spline_shift_fraction * block->line_size) state = 1; @@ -2164,37 +2068,33 @@ segment_baseline( //split baseline state = -1; else state = 0; - if (state != 0) - needs_curve = true; + if (state != 0) needs_curve = true; // tprintf("State=%d, prev=%d, shift=%g\n", // state,last_state,yshift); if (state != last_state && blobcount > textord_spline_minblobs) { - xstarts[segments++] = box.left (); + xstarts[segments++] = box.left(); blobcount = 0; } last_state = state; - yshifts.remove (blobindex - textord_spline_medianwin); - box = box_next_pre_chopped (&blob_it); - middle = (new_box.left () + new_box.right ()) / 2.0; - yshift = new_box.bottom () - row->line_m () * middle - row->line_c (); - yshifts.add (yshift, blobindex); + yshifts.remove(blobindex - textord_spline_medianwin); + box = box_next_pre_chopped(&blob_it); + middle = (new_box.left() + new_box.right()) / 2.0; + yshift = new_box.bottom() - row->line_m() * middle - row->line_c(); + yshifts.add(yshift, blobindex); blobindex++; blobcount++; - } - while (!new_it.cycled_list ()); + } while (!new_it.cycled_list()); if (blobcount > textord_spline_minblobs || segments == 1) { - xstarts[segments] = new_box.right (); - } - else { - xstarts[--segments] = new_box.right (); + xstarts[segments] = new_box.right(); + } else { + xstarts[--segments] = new_box.right(); } if (textord_oldbl_debug) - tprintf ("Made %d segments on row at (%d,%d)\n", - segments, box.right (), box.bottom ()); + tprintf("Made %d segments on row at (%d,%d)\n", segments, box.right(), + box.bottom()); return needs_curve; } - /** * @name linear_spline_baseline * @@ -2202,43 +2102,42 @@ segment_baseline( //split baseline * quadratic fitted to them. * @return TRUE if enough blobs were far enough away to need a quadratic. */ -double * -linear_spline_baseline ( //split baseline -TO_ROW * row, //row to fit -TO_BLOCK * block, //block it came from -int32_t & segments, //no fo segments -int32_t xstarts[] //coords of segments +double* linear_spline_baseline( // split baseline + TO_ROW* row, // row to fit + TO_BLOCK* block, // block it came from + int32_t& segments, // no fo segments + int32_t xstarts[] // coords of segments ) { - int blobcount; //no of blobs - int blobindex; //current blob - int index1, index2; //blob numbers - int blobs_per_segment; //blobs in each - TBOX box; //blob box - TBOX new_box; //new_it box - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT new_it = blob_it; //front end - float b, c; //fitted curve + int blobcount; // no of blobs + int blobindex; // current blob + int index1, index2; // blob numbers + int blobs_per_segment; // blobs in each + TBOX box; // blob box + TBOX new_box; // new_it box + // blobs + BLOBNBOX_IT blob_it = row->blob_list(); + BLOBNBOX_IT new_it = blob_it; // front end + float b, c; // fitted curve tesseract::DetLineFit lms; - double *coeffs; //quadratic coeffs - int32_t segment; //current segment + double* coeffs; // quadratic coeffs + int32_t segment; // current segment - box = box_next_pre_chopped (&blob_it); - xstarts[0] = box.left (); + box = box_next_pre_chopped(&blob_it); + xstarts[0] = box.left(); blobcount = 1; - while (!blob_it.at_first ()) { + while (!blob_it.at_first()) { blobcount++; - box = box_next_pre_chopped (&blob_it); + box = box_next_pre_chopped(&blob_it); } segments = blobcount / textord_spline_medianwin; - if (segments < 1) - segments = 1; + if (segments < 1) segments = 1; blobs_per_segment = blobcount / segments; - coeffs = (double *) alloc_mem (segments * 3 * sizeof (double)); + coeffs = (double*)alloc_mem(segments * 3 * sizeof(double)); if (textord_oldbl_debug) - tprintf - ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n", - blobcount, box.left (), box.bottom (), segments, blobs_per_segment); + tprintf( + "Linear splining baseline of %d blobs at (%d,%d), into %d segments of " + "%d blobs\n", + blobcount, box.left(), box.bottom(), segments, blobs_per_segment); segment = 1; for (index2 = 0; index2 < blobs_per_segment / 2; index2++) box_next_pre_chopped(&new_it); @@ -2248,13 +2147,13 @@ int32_t xstarts[] //coords of segments blobindex += blobs_per_segment; lms.Clear(); while (index1 < blobindex || (segment == segments && index1 < blobcount)) { - box = box_next_pre_chopped (&blob_it); + box = box_next_pre_chopped(&blob_it); int middle = (box.left() + box.right()) / 2; lms.Add(ICOORD(middle, box.bottom())); index1++; - if (index1 == blobindex - blobs_per_segment / 2 - || index1 == blobcount - 1) { - xstarts[segment] = box.left (); + if (index1 == blobindex - blobs_per_segment / 2 || + index1 == blobcount - 1) { + xstarts[segment] = box.left(); } } lms.Fit(&b, &c); @@ -2262,19 +2161,18 @@ int32_t xstarts[] //coords of segments coeffs[segment * 3 - 2] = b; coeffs[segment * 3 - 1] = c; segment++; - if (segment > segments) - break; + if (segment > segments) break; blobindex += blobs_per_segment; lms.Clear(); while (index2 < blobindex || (segment == segments && index2 < blobcount)) { - new_box = box_next_pre_chopped (&new_it); + new_box = box_next_pre_chopped(&new_it); int middle = (new_box.left() + new_box.right()) / 2; - lms.Add(ICOORD (middle, new_box.bottom())); + lms.Add(ICOORD(middle, new_box.bottom())); index2++; - if (index2 == blobindex - blobs_per_segment / 2 - || index2 == blobcount - 1) { - xstarts[segment] = new_box.left (); + if (index2 == blobindex - blobs_per_segment / 2 || + index2 == blobcount - 1) { + xstarts[segment] = new_box.left(); } } lms.Fit(&b, &c); @@ -2282,133 +2180,122 @@ int32_t xstarts[] //coords of segments coeffs[segment * 3 - 2] = b; coeffs[segment * 3 - 1] = c; segment++; - } - while (segment <= segments); + } while (segment <= segments); return coeffs; } - /** * @name assign_blobs_to_rows * * Make enough rows to allocate all the given blobs to one. * If a block skew is given, use that, else attempt to track it. */ -void assign_blobs_to_rows( //find lines - TO_BLOCK* block, //block to do - float* gradient, //block skew - int pass, //identification - bool reject_misses, //chuck big ones out - bool make_new_rows, //add rows for unmatched - bool drawing_skew //draw smoothed skew +void assign_blobs_to_rows( // find lines + TO_BLOCK* block, // block to do + float* gradient, // block skew + int pass, // identification + bool reject_misses, // chuck big ones out + bool make_new_rows, // add rows for unmatched + bool drawing_skew // draw smoothed skew ) { - OVERLAP_STATE overlap_result; //what to do with it - float ycoord; //current y - float top, bottom; //of blob - float g_length = 1.0f; //from gradient - int16_t row_count; //no of rows - int16_t left_x; //left edge - int16_t last_x; //previous edge - float block_skew; //y delta - float smooth_factor; //for new coords - float near_dist; //dist to nearest row - ICOORD testpt; //testing only - BLOBNBOX *blob; //current blob - TO_ROW *row; //current row - TO_ROW *dest_row = nullptr; //row to put blob in - //iterators + OVERLAP_STATE overlap_result; // what to do with it + float ycoord; // current y + float top, bottom; // of blob + float g_length = 1.0f; // from gradient + int16_t row_count; // no of rows + int16_t left_x; // left edge + int16_t last_x; // previous edge + float block_skew; // y delta + float smooth_factor; // for new coords + float near_dist; // dist to nearest row + ICOORD testpt; // testing only + BLOBNBOX* blob; // current blob + TO_ROW* row; // current row + TO_ROW* dest_row = nullptr; // row to put blob in + // iterators BLOBNBOX_IT blob_it = &block->blobs; - TO_ROW_IT row_it = block->get_rows (); + TO_ROW_IT row_it = block->get_rows(); - ycoord = - (block->block->pdblk.bounding_box ().bottom () + - block->block->pdblk.bounding_box ().top ()) / 2.0f; - if (gradient != nullptr) - g_length = sqrt (1 + *gradient * *gradient); + ycoord = (block->block->pdblk.bounding_box().bottom() + + block->block->pdblk.bounding_box().top()) / + 2.0f; + if (gradient != nullptr) g_length = sqrt(1 + *gradient * *gradient); #ifndef GRAPHICS_DISABLED if (drawing_skew) - to_win->SetCursor(block->block->pdblk.bounding_box ().left (), ycoord); + to_win->SetCursor(block->block->pdblk.bounding_box().left(), ycoord); #endif - testpt = ICOORD (textord_test_x, textord_test_y); - blob_it.sort (blob_x_order); + testpt = ICOORD(textord_test_x, textord_test_y); + blob_it.sort(blob_x_order); smooth_factor = 1.0; block_skew = 0.0f; - row_count = row_it.length (); //might have rows - if (!blob_it.empty ()) { - left_x = blob_it.data ()->bounding_box ().left (); - } - else { - left_x = block->block->pdblk.bounding_box ().left (); + row_count = row_it.length(); // might have rows + if (!blob_it.empty()) { + left_x = blob_it.data()->bounding_box().left(); + } else { + left_x = block->block->pdblk.bounding_box().left(); } last_x = left_x; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); if (gradient != nullptr) { - block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom () - + *gradient / g_length * blob->bounding_box ().left (); - } - else if (blob->bounding_box ().left () - last_x > block->line_size / 2 - && last_x - left_x > block->line_size * 2 - && textord_interpolating_skew) { + block_skew = (1 - 1 / g_length) * blob->bounding_box().bottom() + + *gradient / g_length * blob->bounding_box().left(); + } else if (blob->bounding_box().left() - last_x > block->line_size / 2 && + last_x - left_x > block->line_size * 2 && + textord_interpolating_skew) { // tprintf("Interpolating skew from %g",block_skew); - block_skew *= (float) (blob->bounding_box ().left () - left_x) - / (last_x - left_x); + block_skew *= + (float)(blob->bounding_box().left() - left_x) / (last_x - left_x); // tprintf("to %g\n",block_skew); } - last_x = blob->bounding_box ().left (); - top = blob->bounding_box ().top () - block_skew; - bottom = blob->bounding_box ().bottom () - block_skew; + last_x = blob->bounding_box().left(); + top = blob->bounding_box().top() - block_skew; + bottom = blob->bounding_box().bottom() - block_skew; #ifndef GRAPHICS_DISABLED if (drawing_skew) - to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew); + to_win->DrawTo(blob->bounding_box().left(), ycoord + block_skew); #endif - if (!row_it.empty ()) { - for (row_it.move_to_first (); - !row_it.at_last () && row_it.data ()->min_y () > top; - row_it.forward ()); - row = row_it.data (); - if (row->min_y () <= top && row->max_y () >= bottom) { - //any overlap + if (!row_it.empty()) { + for (row_it.move_to_first(); + !row_it.at_last() && row_it.data()->min_y() > top; row_it.forward()) + ; + row = row_it.data(); + if (row->min_y() <= top && row->max_y() >= bottom) { + // any overlap dest_row = row; - overlap_result = most_overlapping_row (&row_it, dest_row, - top, bottom, - block->line_size, - blob->bounding_box (). - contains (testpt)); + overlap_result = most_overlapping_row( + &row_it, dest_row, top, bottom, block->line_size, + blob->bounding_box().contains(testpt)); if (overlap_result == NEW_ROW && !reject_misses) overlap_result = ASSIGN; - } - else { + } else { overlap_result = NEW_ROW; if (!make_new_rows) { - near_dist = row_it.data_relative (-1)->min_y () - top; - //below bottom - if (bottom < row->min_y ()) { - if (row->min_y () - bottom <= - (block->line_spacing - - block->line_size) * tesseract::CCStruct::kDescenderFraction) { - //done it + near_dist = row_it.data_relative(-1)->min_y() - top; + // below bottom + if (bottom < row->min_y()) { + if (row->min_y() - bottom <= + (block->line_spacing - block->line_size) * + tesseract::CCStruct::kDescenderFraction) { + // done it overlap_result = ASSIGN; dest_row = row; } - } - else if (near_dist > 0 - && near_dist < bottom - row->max_y ()) { - row_it.backward (); - dest_row = row_it.data (); - if (dest_row->min_y () - bottom <= - (block->line_spacing - - block->line_size) * tesseract::CCStruct::kDescenderFraction) { - //done it + } else if (near_dist > 0 && near_dist < bottom - row->max_y()) { + row_it.backward(); + dest_row = row_it.data(); + if (dest_row->min_y() - bottom <= + (block->line_spacing - block->line_size) * + tesseract::CCStruct::kDescenderFraction) { + // done it overlap_result = ASSIGN; } - } - else { - if (top - row->max_y () <= - (block->line_spacing - - block->line_size) * (textord_overlap_x + - tesseract::CCStruct::kAscenderFraction)) { - //done it + } else { + if (top - row->max_y() <= + (block->line_spacing - block->line_size) * + (textord_overlap_x + + tesseract::CCStruct::kAscenderFraction)) { + // done it overlap_result = ASSIGN; dest_row = row; } @@ -2416,44 +2303,36 @@ void assign_blobs_to_rows( //find lines } } if (overlap_result == ASSIGN) - dest_row->add_blob (blob_it.extract (), top, bottom, - block->line_size); + dest_row->add_blob(blob_it.extract(), top, bottom, block->line_size); if (overlap_result == NEW_ROW) { if (make_new_rows && top - bottom < block->max_blob_size) { dest_row = - new TO_ROW (blob_it.extract (), top, bottom, - block->line_size); + new TO_ROW(blob_it.extract(), top, bottom, block->line_size); row_count++; - if (bottom > row_it.data ()->min_y ()) - row_it.add_before_then_move (dest_row); - //insert in right place + if (bottom > row_it.data()->min_y()) + row_it.add_before_then_move(dest_row); + // insert in right place else - row_it.add_after_then_move (dest_row); + row_it.add_after_then_move(dest_row); smooth_factor = - 1.0 / (row_count * textord_skew_lag + - textord_skewsmooth_offset); - } - else + 1.0 / (row_count * textord_skew_lag + textord_skewsmooth_offset); + } else overlap_result = REJECT; } - } - else if (make_new_rows && top - bottom < block->max_blob_size) { + } else if (make_new_rows && top - bottom < block->max_blob_size) { overlap_result = NEW_ROW; - dest_row = - new TO_ROW(blob_it.extract(), top, bottom, block->line_size); + dest_row = new TO_ROW(blob_it.extract(), top, bottom, block->line_size); row_count++; row_it.add_after_then_move(dest_row); - smooth_factor = 1.0 / (row_count * textord_skew_lag + - textord_skewsmooth_offset2); - } - else + smooth_factor = + 1.0 / (row_count * textord_skew_lag + textord_skewsmooth_offset2); + } else overlap_result = REJECT; - if (blob->bounding_box ().contains(testpt) && textord_debug_blob) { + if (blob->bounding_box().contains(testpt) && textord_debug_blob) { if (overlap_result != REJECT) { tprintf("Test blob assigned to row at (%g,%g) on pass %d\n", - dest_row->min_y(), dest_row->max_y(), pass); - } - else { + dest_row->min_y(), dest_row->max_y(), pass); + } else { tprintf("Test blob assigned to no row on pass %d\n", pass); } } @@ -2465,10 +2344,10 @@ void assign_blobs_to_rows( //find lines row_it.add_before_then_move(row); } while (!row_it.at_last() && - row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) { + row_it.data()->min_y() < row_it.data_relative(1)->min_y()) { row = row_it.extract(); row_it.forward(); - // Keep rows in order. + // Keep rows in order. row_it.add_after_then_move(row); } BLOBNBOX_IT added_blob_it(dest_row->blob_list()); @@ -2476,9 +2355,9 @@ void assign_blobs_to_rows( //find lines TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box(); if (dest_row->blob_list()->singleton() || !prev_box.major_x_overlap(blob->bounding_box())) { - block_skew = (1 - smooth_factor) * block_skew - + smooth_factor * (blob->bounding_box().bottom() - - dest_row->initial_min_y()); + block_skew = (1 - smooth_factor) * block_skew + + smooth_factor * (blob->bounding_box().bottom() - + dest_row->initial_min_y()); } } } @@ -2488,158 +2367,148 @@ void assign_blobs_to_rows( //find lines } } - /** * @name most_overlapping_row * * Return the row which most overlaps the blob. */ -OVERLAP_STATE most_overlapping_row( //find best row - TO_ROW_IT* row_it, //iterator - TO_ROW*& best_row, //output row - float top, //top of blob - float bottom, //bottom of blob - float rowsize, //max row size - bool testing_blob //test stuff +OVERLAP_STATE +most_overlapping_row( // find best row + TO_ROW_IT* row_it, // iterator + TO_ROW*& best_row, // output row + float top, // top of blob + float bottom, // bottom of blob + float rowsize, // max row size + bool testing_blob // test stuff ) { - OVERLAP_STATE result; //result of tests - float overlap; //of blob & row - float bestover; //nearest row - float merge_top, merge_bottom; //size of merged row - ICOORD testpt; //testing only - TO_ROW *row; //current row - TO_ROW *test_row; //for multiple overlaps - BLOBNBOX_IT blob_it; //for merging rows + OVERLAP_STATE result; // result of tests + float overlap; // of blob & row + float bestover; // nearest row + float merge_top, merge_bottom; // size of merged row + ICOORD testpt; // testing only + TO_ROW* row; // current row + TO_ROW* test_row; // for multiple overlaps + BLOBNBOX_IT blob_it; // for merging rows result = ASSIGN; - row = row_it->data (); + row = row_it->data(); bestover = top - bottom; - if (top > row->max_y ()) - bestover -= top - row->max_y (); - if (bottom < row->min_y ()) - //compute overlap - bestover -= row->min_y () - bottom; + if (top > row->max_y()) bestover -= top - row->max_y(); + if (bottom < row->min_y()) + // compute overlap + bestover -= row->min_y() - bottom; if (testing_blob && textord_debug_blob) { - tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n", - bottom, top, row->min_y(), row->max_y(), rowsize, bestover); + tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n", bottom, + top, row->min_y(), row->max_y(), rowsize, bestover); } test_row = row; do { - if (!row_it->at_last ()) { - row_it->forward (); - test_row = row_it->data (); - if (test_row->min_y () <= top && test_row->max_y () >= bottom) { + if (!row_it->at_last()) { + row_it->forward(); + test_row = row_it->data(); + if (test_row->min_y() <= top && test_row->max_y() >= bottom) { merge_top = - test_row->max_y () > - row->max_y ()? test_row->max_y () : row->max_y (); + test_row->max_y() > row->max_y() ? test_row->max_y() : row->max_y(); merge_bottom = - test_row->min_y () < - row->min_y ()? test_row->min_y () : row->min_y (); + test_row->min_y() < row->min_y() ? test_row->min_y() : row->min_y(); if (merge_top - merge_bottom <= rowsize) { if (testing_blob) { - tprintf ("Merging rows at (%g,%g), (%g,%g)\n", - row->min_y (), row->max_y (), - test_row->min_y (), test_row->max_y ()); + tprintf("Merging rows at (%g,%g), (%g,%g)\n", row->min_y(), + row->max_y(), test_row->min_y(), test_row->max_y()); } - test_row->set_limits (merge_bottom, merge_top); - blob_it.set_to_list (test_row->blob_list ()); - blob_it.add_list_after (row->blob_list ()); - blob_it.sort (blob_x_order); - row_it->backward (); - delete row_it->extract (); - row_it->forward (); - bestover = -1.0f; //force replacement + test_row->set_limits(merge_bottom, merge_top); + blob_it.set_to_list(test_row->blob_list()); + blob_it.add_list_after(row->blob_list()); + blob_it.sort(blob_x_order); + row_it->backward(); + delete row_it->extract(); + row_it->forward(); + bestover = -1.0f; // force replacement } overlap = top - bottom; - if (top > test_row->max_y ()) - overlap -= top - test_row->max_y (); - if (bottom < test_row->min_y ()) - overlap -= test_row->min_y () - bottom; + if (top > test_row->max_y()) overlap -= top - test_row->max_y(); + if (bottom < test_row->min_y()) overlap -= test_row->min_y() - bottom; if (bestover >= rowsize - 1 && overlap >= rowsize - 1) { result = REJECT; } if (overlap > bestover) { - bestover = overlap; //find biggest overlap + bestover = overlap; // find biggest overlap row = test_row; } if (testing_blob && textord_debug_blob) { tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n", - bottom, top, test_row->min_y(), test_row->max_y(), - rowsize, overlap, bestover); + bottom, top, test_row->min_y(), test_row->max_y(), rowsize, + overlap, bestover); } } } - } - while (!row_it->at_last () - && test_row->min_y () <= top && test_row->max_y () >= bottom); - while (row_it->data () != row) - row_it->backward (); //make it point to row - //doesn't overlap much + } while (!row_it->at_last() && test_row->min_y() <= top && + test_row->max_y() >= bottom); + while (row_it->data() != row) + row_it->backward(); // make it point to row + // doesn't overlap much if (top - bottom - bestover > rowsize * textord_overlap_x && - (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x) - && result == ASSIGN) - result = NEW_ROW; //doesn't overlap enough + (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x) && + result == ASSIGN) + result = NEW_ROW; // doesn't overlap enough best_row = row; return result; } - /** * @name blob_x_order * * Sort function to sort blobs in x from page left. */ -int blob_x_order( //sort function - const void *item1, //items to compare - const void *item2) { - //converted ptr - BLOBNBOX *blob1 = *(BLOBNBOX **) item1; - //converted ptr - BLOBNBOX *blob2 = *(BLOBNBOX **) item2; - - if (blob1->bounding_box ().left () < blob2->bounding_box ().left ()) +int blob_x_order( // sort function + const void* item1, // items to compare + const void* item2) { + // converted ptr + BLOBNBOX* blob1 = *(BLOBNBOX**)item1; + // converted ptr + BLOBNBOX* blob2 = *(BLOBNBOX**)item2; + + if (blob1->bounding_box().left() < blob2->bounding_box().left()) return -1; - else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ()) + else if (blob1->bounding_box().left() > blob2->bounding_box().left()) return 1; else return 0; } - /** * @name row_y_order * * Sort function to sort rows in y from page top. */ -int row_y_order( //sort function - const void *item1, //items to compare - const void *item2) { - //converted ptr - TO_ROW *row1 = *(TO_ROW **) item1; - //converted ptr - TO_ROW *row2 = *(TO_ROW **) item2; - - if (row1->parallel_c () > row2->parallel_c ()) +int row_y_order( // sort function + const void* item1, // items to compare + const void* item2) { + // converted ptr + TO_ROW* row1 = *(TO_ROW**)item1; + // converted ptr + TO_ROW* row2 = *(TO_ROW**)item2; + + if (row1->parallel_c() > row2->parallel_c()) return -1; - else if (row1->parallel_c () < row2->parallel_c ()) + else if (row1->parallel_c() < row2->parallel_c()) return 1; else return 0; } - /** * @name row_spacing_order * * Qsort style function to compare 2 TO_ROWS based on their spacing value. */ -int row_spacing_order( //sort function - const void *item1, //items to compare - const void *item2) { - //converted ptr - TO_ROW *row1 = *(TO_ROW **) item1; - //converted ptr - TO_ROW *row2 = *(TO_ROW **) item2; +int row_spacing_order( // sort function + const void* item1, // items to compare + const void* item2) { + // converted ptr + TO_ROW* row1 = *(TO_ROW**)item1; + // converted ptr + TO_ROW* row2 = *(TO_ROW**)item2; if (row1->spacing < row2->spacing) return -1; @@ -2655,20 +2524,19 @@ int row_spacing_order( //sort function * Mark blobs marked with BTFT_LEADER in repeated sets using the * repeated_set member of BLOBNBOX. */ -void mark_repeated_chars(TO_ROW *row) { - BLOBNBOX_IT box_it(row->blob_list()); // Iterator. +void mark_repeated_chars(TO_ROW* row) { + BLOBNBOX_IT box_it(row->blob_list()); // Iterator. int num_repeated_sets = 0; if (!box_it.empty()) { do { BLOBNBOX* bblob = box_it.data(); int repeat_length = 1; - if (bblob->flow() == BTFT_LEADER && - !bblob->joined_to_prev() && bblob->cblob() != nullptr) { + if (bblob->flow() == BTFT_LEADER && !bblob->joined_to_prev() && + bblob->cblob() != nullptr) { BLOBNBOX_IT test_it(box_it); for (test_it.forward(); !test_it.at_first();) { bblob = test_it.data(); - if (bblob->flow() != BTFT_LEADER) - break; + if (bblob->flow() != BTFT_LEADER) break; test_it.forward(); bblob = test_it.data(); if (bblob->joined_to_prev() || bblob->cblob() == nullptr) { @@ -2684,7 +2552,7 @@ void mark_repeated_chars(TO_ROW *row) { bblob = box_it.data(); bblob->set_repeated_set(num_repeated_sets); } - } else { + } else { bblob->set_repeated_set(0); box_it.forward(); } diff --git a/src/textord/makerow.h b/src/textord/makerow.h index 10e623c02e..60f45869ef 100644 --- a/src/textord/makerow.h +++ b/src/textord/makerow.h @@ -17,19 +17,18 @@ * **********************************************************************/ -#ifndef MAKEROW_H -#define MAKEROW_H +#ifndef MAKEROW_H +#define MAKEROW_H -#include "params.h" -#include "ocrblock.h" -#include "blobs.h" -#include "blobbox.h" -#include "statistc.h" +#include "blobbox.h" +#include "blobs.h" +#include "ocrblock.h" +#include "params.h" +#include "statistc.h" -enum OVERLAP_STATE -{ - ASSIGN, //assign it to row - REJECT, //reject it - dual overlap +enum OVERLAP_STATE { + ASSIGN, // assign it to row + REJECT, // reject it - dual overlap NEW_ROW }; @@ -41,247 +40,242 @@ enum ROW_CATEGORY { }; extern BOOL_VAR_H(textord_heavy_nr, FALSE, "Vigorously remove noise"); -extern BOOL_VAR_H (textord_show_initial_rows, FALSE, -"Display row accumulation"); -extern BOOL_VAR_H (textord_show_parallel_rows, FALSE, -"Display page correlated rows"); -extern BOOL_VAR_H (textord_show_expanded_rows, FALSE, -"Display rows after expanding"); -extern BOOL_VAR_H (textord_show_final_rows, FALSE, -"Display rows after final fitting"); -extern BOOL_VAR_H (textord_show_final_blobs, FALSE, -"Display blob bounds after pre-ass"); -extern BOOL_VAR_H (textord_test_landscape, FALSE, "Tests refer to land/port"); -extern BOOL_VAR_H (textord_parallel_baselines, TRUE, -"Force parallel baselines"); -extern BOOL_VAR_H (textord_straight_baselines, FALSE, -"Force straight baselines"); -extern BOOL_VAR_H (textord_quadratic_baselines, FALSE, -"Use quadratic splines"); -extern BOOL_VAR_H (textord_old_baselines, TRUE, "Use old baseline algorithm"); -extern BOOL_VAR_H (textord_old_xheight, TRUE, "Use old xheight algorithm"); -extern BOOL_VAR_H (textord_fix_xheight_bug, TRUE, "Use spline baseline"); -extern BOOL_VAR_H (textord_fix_makerow_bug, TRUE, -"Prevent multiple baselines"); -extern BOOL_VAR_H (textord_cblob_blockocc, TRUE, -"Use new projection for underlines"); -extern BOOL_VAR_H (textord_debug_xheights, FALSE, "Test xheight algorithms"); -extern INT_VAR_H (textord_test_x, -INT32_MAX, "coord of test pt"); -extern INT_VAR_H (textord_test_y, -INT32_MAX, "coord of test pt"); -extern INT_VAR_H (textord_min_blobs_in_row, 4, -"Min blobs before gradient counted"); -extern INT_VAR_H (textord_spline_minblobs, 8, -"Min blobs in each spline segment"); -extern INT_VAR_H (textord_spline_medianwin, 6, -"Size of window for spline segmentation"); -extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight"); -extern double_VAR_H (textord_spline_shift_fraction, 0.02, -"Fraction of line spacing for quad"); -extern double_VAR_H (textord_spline_outlier_fraction, 0.1, -"Fraction of line spacing for outlier"); -extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew"); -extern double_VAR_H (textord_skew_lag, 0.75, -"Lag for skew on row accumulation"); -extern double_VAR_H (textord_linespace_iqrlimit, 0.2, -"Max iqr/median for linespace"); -extern double_VAR_H (textord_width_limit, 8, -"Max width of blobs to make rows"); -extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping"); -extern double_VAR_H (textord_minxh, 0.25, -"fraction of linesize for min xheight"); -extern double_VAR_H (textord_min_linesize, 1.25, -"* blob height for initial linesize"); -extern double_VAR_H (textord_excess_blobsize, 1.3, -"New row made if blob makes row this big"); -extern double_VAR_H (textord_occupancy_threshold, 0.4, -"Fraction of neighbourhood"); -extern double_VAR_H (textord_underline_width, 2.0, -"Multiple of line_size for underline"); -extern double_VAR_H(textord_min_blob_height_fraction, 0.75, -"Min blob height/top to include blob top into xheight stats"); -extern double_VAR_H (textord_xheight_mode_fraction, 0.4, -"Min pile height to make xheight"); -extern double_VAR_H (textord_ascheight_mode_fraction, 0.15, -"Min pile height to make ascheight"); -extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight"); -extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight"); -extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight"); -extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight"); -extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation"); -extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do"); -extern BOOL_VAR_H (textord_new_initial_xheight, TRUE, -"Use test xheight mechanism"); +extern BOOL_VAR_H(textord_show_initial_rows, FALSE, "Display row accumulation"); +extern BOOL_VAR_H(textord_show_parallel_rows, FALSE, + "Display page correlated rows"); +extern BOOL_VAR_H(textord_show_expanded_rows, FALSE, + "Display rows after expanding"); +extern BOOL_VAR_H(textord_show_final_rows, FALSE, + "Display rows after final fitting"); +extern BOOL_VAR_H(textord_show_final_blobs, FALSE, + "Display blob bounds after pre-ass"); +extern BOOL_VAR_H(textord_test_landscape, FALSE, "Tests refer to land/port"); +extern BOOL_VAR_H(textord_parallel_baselines, TRUE, "Force parallel baselines"); +extern BOOL_VAR_H(textord_straight_baselines, FALSE, + "Force straight baselines"); +extern BOOL_VAR_H(textord_quadratic_baselines, FALSE, "Use quadratic splines"); +extern BOOL_VAR_H(textord_old_baselines, TRUE, "Use old baseline algorithm"); +extern BOOL_VAR_H(textord_old_xheight, TRUE, "Use old xheight algorithm"); +extern BOOL_VAR_H(textord_fix_xheight_bug, TRUE, "Use spline baseline"); +extern BOOL_VAR_H(textord_fix_makerow_bug, TRUE, "Prevent multiple baselines"); +extern BOOL_VAR_H(textord_cblob_blockocc, TRUE, + "Use new projection for underlines"); +extern BOOL_VAR_H(textord_debug_xheights, FALSE, "Test xheight algorithms"); +extern INT_VAR_H(textord_test_x, -INT32_MAX, "coord of test pt"); +extern INT_VAR_H(textord_test_y, -INT32_MAX, "coord of test pt"); +extern INT_VAR_H(textord_min_blobs_in_row, 4, + "Min blobs before gradient counted"); +extern INT_VAR_H(textord_spline_minblobs, 8, + "Min blobs in each spline segment"); +extern INT_VAR_H(textord_spline_medianwin, 6, + "Size of window for spline segmentation"); +extern INT_VAR_H(textord_min_xheight, 10, "Min credible pixel xheight"); +extern double_VAR_H(textord_spline_shift_fraction, 0.02, + "Fraction of line spacing for quad"); +extern double_VAR_H(textord_spline_outlier_fraction, 0.1, + "Fraction of line spacing for outlier"); +extern double_VAR_H(textord_skew_ile, 0.5, "Ile of gradients for page skew"); +extern double_VAR_H(textord_skew_lag, 0.75, "Lag for skew on row accumulation"); +extern double_VAR_H(textord_linespace_iqrlimit, 0.2, + "Max iqr/median for linespace"); +extern double_VAR_H(textord_width_limit, 8, "Max width of blobs to make rows"); +extern double_VAR_H(textord_chop_width, 1.5, "Max width before chopping"); +extern double_VAR_H(textord_minxh, 0.25, + "fraction of linesize for min xheight"); +extern double_VAR_H(textord_min_linesize, 1.25, + "* blob height for initial linesize"); +extern double_VAR_H(textord_excess_blobsize, 1.3, + "New row made if blob makes row this big"); +extern double_VAR_H(textord_occupancy_threshold, 0.4, + "Fraction of neighbourhood"); +extern double_VAR_H(textord_underline_width, 2.0, + "Multiple of line_size for underline"); +extern double_VAR_H( + textord_min_blob_height_fraction, 0.75, + "Min blob height/top to include blob top into xheight stats"); +extern double_VAR_H(textord_xheight_mode_fraction, 0.4, + "Min pile height to make xheight"); +extern double_VAR_H(textord_ascheight_mode_fraction, 0.15, + "Min pile height to make ascheight"); +extern double_VAR_H(textord_ascx_ratio_min, 1.2, "Min cap/xheight"); +extern double_VAR_H(textord_ascx_ratio_max, 1.7, "Max cap/xheight"); +extern double_VAR_H(textord_descx_ratio_min, 0.15, "Min desc/xheight"); +extern double_VAR_H(textord_descx_ratio_max, 0.6, "Max desc/xheight"); +extern double_VAR_H(textord_xheight_error_margin, 0.1, "Accepted variation"); +extern INT_VAR_H(textord_lms_line_trials, 12, "Number of linew fits to do"); +extern BOOL_VAR_H(textord_new_initial_xheight, TRUE, + "Use test xheight mechanism"); extern BOOL_VAR_H(textord_debug_blob, FALSE, "Print test blob information"); -inline void get_min_max_xheight(int block_linesize, - int *min_height, int *max_height) { +inline void get_min_max_xheight(int block_linesize, int* min_height, + int* max_height) { *min_height = static_cast(floor(block_linesize * textord_minxh)); if (*min_height < textord_min_xheight) *min_height = textord_min_xheight; *max_height = static_cast(ceil(block_linesize * 3.0)); } -inline ROW_CATEGORY get_row_category(const TO_ROW *row) { +inline ROW_CATEGORY get_row_category(const TO_ROW* row) { if (row->xheight <= 0) return ROW_INVALID; - return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND : - (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN; + return (row->ascrise > 0) + ? ROW_ASCENDERS_FOUND + : (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN; } inline bool within_error_margin(float test, float num, float margin) { return (test >= num * (1 - margin) && test <= num * (1 + margin)); } -void fill_heights(TO_ROW *row, float gradient, int min_height, - int max_height, STATS *heights, STATS *floating_heights); +void fill_heights(TO_ROW* row, float gradient, int min_height, int max_height, + STATS* heights, STATS* floating_heights); float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block, TO_BLOCK_LIST* blocks); -float make_rows(ICOORD page_tr, // top right - TO_BLOCK_LIST *port_blocks); +float make_rows(ICOORD page_tr, // top right + TO_BLOCK_LIST* port_blocks); void make_initial_textrows(ICOORD page_tr, - TO_BLOCK* block, // block to do - FCOORD rotation, // for drawing + TO_BLOCK* block, // block to do + FCOORD rotation, // for drawing bool testing_on); // correct orientation -void fit_lms_line(TO_ROW *row); -void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks - float &page_m, // average gradient - float &page_err); // average error +void fit_lms_line(TO_ROW* row); +void compute_page_skew(TO_BLOCK_LIST* blocks, // list of blocks + float& page_m, // average gradient + float& page_err); // average error void vigorous_noise_removal(TO_BLOCK* block); -void cleanup_rows_making(ICOORD page_tr, // top right - TO_BLOCK* block, // block to do - float gradient, // gradient to fit - FCOORD rotation, // for drawing - int32_t block_edge, // edge of block - bool testing_on); // correct orientation -void delete_non_dropout_rows( //find lines - TO_BLOCK* block, //block to do - float gradient, //global skew - FCOORD rotation, //deskew vector - int32_t block_edge, //left edge - bool testing_on //correct orientation +void cleanup_rows_making(ICOORD page_tr, // top right + TO_BLOCK* block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on); // correct orientation +void delete_non_dropout_rows( // find lines + TO_BLOCK* block, // block to do + float gradient, // global skew + FCOORD rotation, // deskew vector + int32_t block_edge, // left edge + bool testing_on // correct orientation +); +bool find_best_dropout_row( // find neighbours + TO_ROW* row, // row to test + int32_t distance, // dropout dist + float dist_limit, // threshold distance + int32_t line_index, // index of row + TO_ROW_IT* row_it, // current position + bool testing_on // correct orientation +); +TBOX deskew_block_coords( // block box + TO_BLOCK* block, // block to do + float gradient // global skew +); +void compute_line_occupation( // project blobs + TO_BLOCK* block, // block to do + float gradient, // global skew + int32_t min_y, // min coord in block + int32_t max_y, // in block + int32_t* occupation, // output projection + int32_t* deltas // derivative +); +void compute_occupation_threshold( // project blobs + int32_t low_window, // below result point + int32_t high_window, // above result point + int32_t line_count, // array sizes + int32_t* occupation, // input projection + int32_t* thresholds // output thresholds ); -bool find_best_dropout_row( //find neighbours - TO_ROW* row, //row to test - int32_t distance, //dropout dist - float dist_limit, //threshold distance - int32_t line_index, //index of row - TO_ROW_IT* row_it, //current position - bool testing_on //correct orientation +void compute_dropout_distances( // project blobs + int32_t* occupation, // input projection + int32_t* thresholds, // output thresholds + int32_t line_count // array sizes ); -TBOX deskew_block_coords( //block box - TO_BLOCK *block, //block to do - float gradient //global skew - ); -void compute_line_occupation( //project blobs - TO_BLOCK *block, //block to do - float gradient, //global skew - int32_t min_y, //min coord in block - int32_t max_y, //in block - int32_t *occupation, //output projection - int32_t *deltas //derivative - ); -void compute_occupation_threshold( //project blobs - int32_t low_window, //below result point - int32_t high_window, //above result point - int32_t line_count, //array sizes - int32_t *occupation, //input projection - int32_t *thresholds //output thresholds - ); -void compute_dropout_distances( //project blobs - int32_t *occupation, //input projection - int32_t *thresholds, //output thresholds - int32_t line_count //array sizes - ); -void expand_rows( //find lines - ICOORD page_tr, //top right - TO_BLOCK* block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - int32_t block_edge, //edge of block - bool testing_on //correct orientation +void expand_rows( // find lines + ICOORD page_tr, // top right + TO_BLOCK* block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on // correct orientation ); -void adjust_row_limits( //tidy limits - TO_BLOCK *block //block to do - ); -void compute_row_stats( //find lines - TO_BLOCK* block, //block to do - bool testing_on //correct orientation +void adjust_row_limits( // tidy limits + TO_BLOCK* block // block to do +); +void compute_row_stats( // find lines + TO_BLOCK* block, // block to do + bool testing_on // correct orientation +); +float median_block_xheight( // find lines + TO_BLOCK* block, // block to do + float gradient // global skew ); -float median_block_xheight( //find lines - TO_BLOCK *block, //block to do - float gradient //global skew - ); -int compute_xheight_from_modes( - STATS *heights, STATS *floating_heights, bool cap_only, int min_height, - int max_height, float *xheight, float *ascrise); +int compute_xheight_from_modes(STATS* heights, STATS* floating_heights, + bool cap_only, int min_height, int max_height, + float* xheight, float* ascrise); -int32_t compute_row_descdrop(TO_ROW *row, // row to do - float gradient, // global skew - int xheight_blob_count, - STATS *heights); -int32_t compute_height_modes(STATS *heights, // stats to search - int32_t min_height, // bottom of range - int32_t max_height, // top of range - int32_t *modes, // output array - int32_t maxmodes); // size of modes -void correct_row_xheight(TO_ROW *row, // row to fix - float xheight, // average values - float ascrise, - float descdrop); -void separate_underlines(TO_BLOCK* block, // block to do - float gradient, // skew angle - FCOORD rotation, // inverse landscape - bool testing_on); // correct orientation -void pre_associate_blobs(ICOORD page_tr, // top right - TO_BLOCK* block, // block to do - FCOORD rotation, // inverse landscape - bool testing_on); // correct orientation -void fit_parallel_rows(TO_BLOCK* block, // block to do - float gradient, // gradient to fit - FCOORD rotation, // for drawing - int32_t block_edge, // edge of block - bool testing_on); // correct orientation -void fit_parallel_lms(float gradient, // forced gradient - TO_ROW *row); // row to fit -void make_baseline_spline(TO_ROW *row, // row to fit - TO_BLOCK *block); // block it came from -bool segment_baseline( //split baseline - TO_ROW* row, //row to fit - TO_BLOCK* block, //block it came from - int32_t& segments, //no fo segments - int32_t* xstarts //coords of segments +int32_t compute_row_descdrop(TO_ROW* row, // row to do + float gradient, // global skew + int xheight_blob_count, STATS* heights); +int32_t compute_height_modes(STATS* heights, // stats to search + int32_t min_height, // bottom of range + int32_t max_height, // top of range + int32_t* modes, // output array + int32_t maxmodes); // size of modes +void correct_row_xheight(TO_ROW* row, // row to fix + float xheight, // average values + float ascrise, float descdrop); +void separate_underlines(TO_BLOCK* block, // block to do + float gradient, // skew angle + FCOORD rotation, // inverse landscape + bool testing_on); // correct orientation +void pre_associate_blobs(ICOORD page_tr, // top right + TO_BLOCK* block, // block to do + FCOORD rotation, // inverse landscape + bool testing_on); // correct orientation +void fit_parallel_rows(TO_BLOCK* block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on); // correct orientation +void fit_parallel_lms(float gradient, // forced gradient + TO_ROW* row); // row to fit +void make_baseline_spline(TO_ROW* row, // row to fit + TO_BLOCK* block); // block it came from +bool segment_baseline( // split baseline + TO_ROW* row, // row to fit + TO_BLOCK* block, // block it came from + int32_t& segments, // no fo segments + int32_t* xstarts // coords of segments +); +double* linear_spline_baseline( // split baseline + TO_ROW* row, // row to fit + TO_BLOCK* block, // block it came from + int32_t& segments, // no fo segments + int32_t xstarts[] // coords of segments ); -double *linear_spline_baseline ( //split baseline -TO_ROW * row, //row to fit -TO_BLOCK * block, //block it came from -int32_t & segments, //no fo segments -int32_t xstarts[] //coords of segments +void assign_blobs_to_rows( // find lines + TO_BLOCK* block, // block to do + float* gradient, // block skew + int pass, // identification + bool reject_misses, // chuck big ones out + bool make_new_rows, // add rows for unmatched + bool drawing_skew // draw smoothed skew ); -void assign_blobs_to_rows( //find lines - TO_BLOCK* block, //block to do - float* gradient, //block skew - int pass, //identification - bool reject_misses, //chuck big ones out - bool make_new_rows, //add rows for unmatched - bool drawing_skew //draw smoothed skew +// find best row +OVERLAP_STATE +most_overlapping_row(TO_ROW_IT* row_it, // iterator + TO_ROW*& best_row, // output row + float top, // top of blob + float bottom, // bottom of blob + float rowsize, // max row size + bool testing_blob // test stuff ); - //find best row -OVERLAP_STATE most_overlapping_row(TO_ROW_IT* row_it, //iterator - TO_ROW*& best_row, //output row - float top, //top of blob - float bottom, //bottom of blob - float rowsize, //max row size - bool testing_blob //test stuff - ); -int blob_x_order( //sort function - const void *item1, //items to compare - const void *item2); -int row_y_order( //sort function - const void *item1, //items to compare - const void *item2); -int row_spacing_order( //sort function - const void *item1, //items to compare - const void *item2); +int blob_x_order( // sort function + const void* item1, // items to compare + const void* item2); +int row_y_order( // sort function + const void* item1, // items to compare + const void* item2); +int row_spacing_order( // sort function + const void* item1, // items to compare + const void* item2); -void mark_repeated_chars(TO_ROW *row); +void mark_repeated_chars(TO_ROW* row); #endif diff --git a/src/textord/oldbasel.cpp b/src/textord/oldbasel.cpp index b55b3681ae..15dc890dc2 100644 --- a/src/textord/oldbasel.cpp +++ b/src/textord/oldbasel.cpp @@ -17,13 +17,13 @@ * **********************************************************************/ +#include "oldbasel.h" #include "ccstruct.h" -#include "statistc.h" -#include "quadlsq.h" #include "detlinefit.h" -#include "makerow.h" #include "drawtord.h" -#include "oldbasel.h" +#include "makerow.h" +#include "quadlsq.h" +#include "statistc.h" #include "textord.h" #include "tprintf.h" @@ -36,43 +36,41 @@ #define EXTERN -EXTERN BOOL_VAR (textord_really_old_xheight, FALSE, -"Use original wiseowl xheight"); -EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation"); -EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation"); -EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism"); -EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines"); -EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions"); -EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights"); -EXTERN BOOL_VAR (oldbl_xhfix, FALSE, -"Fix bug in modes threshold for xheights"); +EXTERN BOOL_VAR(textord_really_old_xheight, FALSE, + "Use original wiseowl xheight"); +EXTERN BOOL_VAR(textord_oldbl_debug, FALSE, "Debug old baseline generation"); +EXTERN BOOL_VAR(textord_debug_baselines, FALSE, "Debug baseline generation"); +EXTERN BOOL_VAR(textord_oldbl_paradef, TRUE, "Use para default mechanism"); +EXTERN BOOL_VAR(textord_oldbl_split_splines, TRUE, "Split stepped splines"); +EXTERN BOOL_VAR(textord_oldbl_merge_parts, TRUE, "Merge suspect partitions"); +EXTERN BOOL_VAR(oldbl_corrfix, TRUE, "Improve correlation of heights"); +EXTERN BOOL_VAR(oldbl_xhfix, FALSE, "Fix bug in modes threshold for xheights"); EXTERN BOOL_VAR(textord_ocropus_mode, FALSE, "Make baselines for ocropus"); -EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc"); -EXTERN INT_VAR (oldbl_holed_losscount, 10, -"Max lost before fallback line used"); -EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); -EXTERN double_VAR (textord_oldbl_jumplimit, 0.15, -"X fraction for new partition"); - -#define TURNLIMIT 1 /*min size for turning point */ -#define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */ -#define DESCENDER_FRACTION 0.5 /*descender/x-height */ -#define MIN_ASC_FRACTION 0.20 /*min size of ascenders */ -#define MIN_DESC_FRACTION 0.25 /*min size of descenders */ -#define MINASCRISE 2.0 /*min ascender/desc step */ -#define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */ -#define MAXHEIGHT 300 /*max blob height */ -#define MAXOVERLAP 0.1 /*max 10% missed overlap */ -#define MAXBADRUN 2 /*max non best for failed */ -#define HEIGHTBUCKETS 200 /* Num of buckets */ -#define DELTAHEIGHT 5.0 /* Small amount of diff */ -#define GOODHEIGHT 5 -#define MAXLOOPS 10 -#define MODENUM 10 -#define MAXPARTS 6 -#define SPLINESIZE 23 - -#define ABS(x) ((x)<0 ? (-(x)) : (x)) +EXTERN double_VAR(oldbl_xhfract, 0.4, "Fraction of est allowed in calc"); +EXTERN INT_VAR(oldbl_holed_losscount, 10, "Max lost before fallback line used"); +EXTERN double_VAR(oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); +EXTERN double_VAR(textord_oldbl_jumplimit, 0.15, + "X fraction for new partition"); + +#define TURNLIMIT 1 /*min size for turning point */ +#define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */ +#define DESCENDER_FRACTION 0.5 /*descender/x-height */ +#define MIN_ASC_FRACTION 0.20 /*min size of ascenders */ +#define MIN_DESC_FRACTION 0.25 /*min size of descenders */ +#define MINASCRISE 2.0 /*min ascender/desc step */ +#define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */ +#define MAXHEIGHT 300 /*max blob height */ +#define MAXOVERLAP 0.1 /*max 10% missed overlap */ +#define MAXBADRUN 2 /*max non best for failed */ +#define HEIGHTBUCKETS 200 /* Num of buckets */ +#define DELTAHEIGHT 5.0 /* Small amount of diff */ +#define GOODHEIGHT 5 +#define MAXLOOPS 10 +#define MODENUM 10 +#define MAXPARTS 6 +#define SPLINESIZE 23 + +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) namespace tesseract { @@ -82,15 +80,15 @@ namespace tesseract { * Top level function to make baselines the old way. **********************************************************************/ -void Textord::make_old_baselines(TO_BLOCK* block, // block to do +void Textord::make_old_baselines(TO_BLOCK* block, // block to do bool testing_on, // correct orientation float gradient) { - QSPLINE *prev_baseline; // baseline of previous row - TO_ROW *row; // current row + QSPLINE* prev_baseline; // baseline of previous row + TO_ROW* row; // current row TO_ROW_IT row_it = block->get_rows(); BLOBNBOX_IT blob_it; - prev_baseline = nullptr; // nothing yet + prev_baseline = nullptr; // nothing yet for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); find_textlines(block, row, 2, nullptr); @@ -103,15 +101,14 @@ void Textord::make_old_baselines(TO_BLOCK* block, // block to do blob_it.set_to_list(row->blob_list()); if (textord_debug_baselines) tprintf("Row baseline generation failed on row at (%d,%d)\n", - blob_it.data()->bounding_box().left(), - blob_it.data()->bounding_box().bottom()); + blob_it.data()->bounding_box().left(), + blob_it.data()->bounding_box().bottom()); } } correlate_lines(block, gradient); block->block->set_xheight(block->xheight); } - /********************************************************************** * correlate_lines * @@ -120,34 +117,34 @@ void Textord::make_old_baselines(TO_BLOCK* block, // block to do * Also fix baselines of rows without a decent fit. **********************************************************************/ -void Textord::correlate_lines(TO_BLOCK *block, float gradient) { - TO_ROW **rows; //array of ptrs - int rowcount; /*no of rows to do */ - int rowindex; /*no of row */ - // iterator - TO_ROW_IT row_it = block->get_rows (); +void Textord::correlate_lines(TO_BLOCK* block, float gradient) { + TO_ROW** rows; // array of ptrs + int rowcount; /*no of rows to do */ + int rowindex; /*no of row */ + // iterator + TO_ROW_IT row_it = block->get_rows(); - rowcount = row_it.length (); + rowcount = row_it.length(); if (rowcount == 0) { - //default value + // default value block->xheight = block->line_size; - return; /*none to do */ + return; /*none to do */ } - rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *)); + rows = (TO_ROW**)alloc_mem(rowcount * sizeof(TO_ROW*)); rowindex = 0; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - //make array - rows[rowindex++] = row_it.data (); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) + // make array + rows[rowindex++] = row_it.data(); - /*try to fix bad lines */ + /*try to fix bad lines */ correlate_neighbours(block, rows, rowcount); if (textord_really_old_xheight || textord_old_xheight) { - block->xheight = (float) correlate_with_stats(rows, rowcount, block); + block->xheight = (float)correlate_with_stats(rows, rowcount, block); if (block->xheight <= 0) block->xheight = block->line_size * tesseract::CCStruct::kXHeightFraction; if (block->xheight < textord_min_xheight) - block->xheight = (float) textord_min_xheight; + block->xheight = (float)textord_min_xheight; } else { compute_block_xheight(block, gradient); } @@ -155,39 +152,40 @@ void Textord::correlate_lines(TO_BLOCK *block, float gradient) { free_mem(rows); } - /********************************************************************** * correlate_neighbours * * Try to fix rows that had a bad spline fit by using neighbours. **********************************************************************/ -void Textord::correlate_neighbours(TO_BLOCK *block, // block rows are in. - TO_ROW **rows, // rows of block. +void Textord::correlate_neighbours(TO_BLOCK* block, // block rows are in. + TO_ROW** rows, // rows of block. int rowcount) { // no of rows to do. - TO_ROW *row; /*current row */ - int rowindex; /*no of row */ - int otherrow; /*second row */ - int upperrow; /*row above to use */ - int lowerrow; /*row below to use */ + TO_ROW* row; /*current row */ + int rowindex; /*no of row */ + int otherrow; /*second row */ + int upperrow; /*row above to use */ + int lowerrow; /*row below to use */ float biggest; for (rowindex = 0; rowindex < rowcount; rowindex++) { - row = rows[rowindex]; /*current row */ + row = rows[rowindex]; /*current row */ if (row->xheight < 0) { - /*quadratic failed */ + /*quadratic failed */ for (otherrow = rowindex - 2; - otherrow >= 0 - && (rows[otherrow]->xheight < 0.0 - || !row->baseline.overlap (&rows[otherrow]->baseline, - MAXOVERLAP)); otherrow--); - upperrow = otherrow; /*decent row above */ + otherrow >= 0 && + (rows[otherrow]->xheight < 0.0 || + !row->baseline.overlap(&rows[otherrow]->baseline, MAXOVERLAP)); + otherrow--) + ; + upperrow = otherrow; /*decent row above */ for (otherrow = rowindex + 1; - otherrow < rowcount - && (rows[otherrow]->xheight < 0.0 - || !row->baseline.overlap (&rows[otherrow]->baseline, - MAXOVERLAP)); otherrow++); - lowerrow = otherrow; /*decent row below */ + otherrow < rowcount && + (rows[otherrow]->xheight < 0.0 || + !row->baseline.overlap(&rows[otherrow]->baseline, MAXOVERLAP)); + otherrow++) + ; + lowerrow = otherrow; /*decent row below */ if (upperrow >= 0) find_textlines(block, row, 2, &rows[upperrow]->baseline); if (row->xheight < 0 && lowerrow < rowcount) @@ -202,15 +200,14 @@ void Textord::correlate_neighbours(TO_BLOCK *block, // block rows are in. } for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) { - row = rows[rowindex]; /*current row */ - if (row->xheight < 0) /*linear failed */ - /*make do */ - row->xheight = -row->xheight; + row = rows[rowindex]; /*current row */ + if (row->xheight < 0) /*linear failed */ + /*make do */ + row->xheight = -row->xheight; biggest = std::max(biggest, row->xheight); } } - /********************************************************************** * correlate_with_stats * @@ -218,59 +215,57 @@ void Textord::correlate_neighbours(TO_BLOCK *block, // block rows are in. * the ascender height and descender height for rows without one. **********************************************************************/ -int Textord::correlate_with_stats(TO_ROW **rows, // rows of block. +int Textord::correlate_with_stats(TO_ROW** rows, // rows of block. int rowcount, // no of rows to do. TO_BLOCK* block) { - TO_ROW *row; /*current row */ - int rowindex; /*no of row */ - float lineheight; /*mean x-height */ - float ascheight; /*average ascenders */ - float minascheight; /*min allowed ascheight */ - int xcount; /*no of samples for xheight */ - float fullheight; /*mean top height */ - int fullcount; /*no of samples */ - float descheight; /*mean descender drop */ - float mindescheight; /*min allowed descheight */ - int desccount; /*no of samples */ - - /*no samples */ + TO_ROW* row; /*current row */ + int rowindex; /*no of row */ + float lineheight; /*mean x-height */ + float ascheight; /*average ascenders */ + float minascheight; /*min allowed ascheight */ + int xcount; /*no of samples for xheight */ + float fullheight; /*mean top height */ + int fullcount; /*no of samples */ + float descheight; /*mean descender drop */ + float mindescheight; /*min allowed descheight */ + int desccount; /*no of samples */ + + /*no samples */ xcount = fullcount = desccount = 0; lineheight = ascheight = fullheight = descheight = 0.0; for (rowindex = 0; rowindex < rowcount; rowindex++) { - row = rows[rowindex]; /*current row */ - if (row->ascrise > 0.0) { /*got ascenders? */ - lineheight += row->xheight;/*average x-heights */ - ascheight += row->ascrise; /*average ascenders */ + row = rows[rowindex]; /*current row */ + if (row->ascrise > 0.0) { /*got ascenders? */ + lineheight += row->xheight; /*average x-heights */ + ascheight += row->ascrise; /*average ascenders */ xcount++; - } - else { - fullheight += row->xheight;/*assume full height */ + } else { + fullheight += row->xheight; /*assume full height */ fullcount++; } - if (row->descdrop < 0.0) { /*got descenders? */ - /*average descenders */ + if (row->descdrop < 0.0) { /*got descenders? */ + /*average descenders */ descheight += row->descdrop; desccount++; } } if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) { - lineheight /= xcount; /*average x-height */ - /*average caps height */ + lineheight /= xcount; /*average x-height */ + /*average caps height */ fullheight = lineheight + ascheight / xcount; - /*must be decent size */ + /*must be decent size */ if (fullheight < lineheight * (1 + MIN_ASC_FRACTION)) fullheight = lineheight * (1 + MIN_ASC_FRACTION); - } - else { - fullheight /= fullcount; /*average max height */ - /*guess x-height */ + } else { + fullheight /= fullcount; /*average max height */ + /*guess x-height */ lineheight = fullheight * X_HEIGHT_FRACTION; } if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2)) - descheight /= desccount; /*average descenders */ + descheight /= desccount; /*average descenders */ else - /*guess descenders */ + /*guess descenders */ descheight = -lineheight * DESCENDER_FRACTION; if (lineheight > 0.0f) @@ -279,149 +274,134 @@ int Textord::correlate_with_stats(TO_ROW **rows, // rows of block. minascheight = lineheight * MIN_ASC_FRACTION; mindescheight = -lineheight * MIN_DESC_FRACTION; for (rowindex = 0; rowindex < rowcount; rowindex++) { - row = rows[rowindex]; /*do each row */ + row = rows[rowindex]; /*do each row */ row->all_caps = false; if (row->ascrise / row->xheight < MIN_ASC_FRACTION) { - /*no ascenders */ - if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) - && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) { + /*no ascenders */ + if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) && + row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) { row->ascrise = fullheight - lineheight; - /*set to average */ + /*set to average */ row->xheight = lineheight; - } - else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE) - && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) { + } else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE) && + row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) { row->ascrise = row->xheight - lineheight; - /*set to average */ + /*set to average */ row->xheight = lineheight; row->all_caps = true; - } - else { - row->ascrise = (fullheight - lineheight) * row->xheight - / fullheight; - /*scale it */ + } else { + row->ascrise = (fullheight - lineheight) * row->xheight / fullheight; + /*scale it */ row->xheight -= row->ascrise; row->all_caps = true; } if (row->ascrise < minascheight) row->ascrise = - row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION); + row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION); } if (row->descdrop > mindescheight) { - if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) - && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) - /*set to average */ - row->descdrop = descheight; + if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) && + row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) + /*set to average */ + row->descdrop = descheight; else row->descdrop = -row->xheight * DESCENDER_FRACTION; } } - return (int) lineheight; //block xheight + return (int)lineheight; // block xheight } - /********************************************************************** * find_textlines * * Compute the baseline for the given row. **********************************************************************/ -void Textord::find_textlines(TO_BLOCK *block, // block row is in - TO_ROW *row, // row to do - int degree, // required approximation - QSPLINE *spline) { // starting spline - int partcount; /*no of partitions of */ - bool holed_line = false; //lost too many blobs - int bestpart; /*biggest partition */ - char *partids; /*partition no of each blob */ - int partsizes[MAXPARTS]; /*no in each partition */ - int lineheight; /*guessed x-height */ - float jumplimit; /*allowed delta change */ - int *xcoords; /*useful sample points */ - int *ycoords; /*useful sample points */ - TBOX *blobcoords; /*edges of blob rectangles */ - int blobcount; /*no of blobs on line */ - float *ydiffs; /*diffs from 1st approx */ - int pointcount; /*no of coords */ - int xstarts[SPLINESIZE + 1]; //segment boundaries - int segments; //no of segments - - //no of blobs in row - blobcount = row->blob_list ()->length (); - partids = (char *) alloc_mem (blobcount * sizeof (char)); - xcoords = (int *) alloc_mem (blobcount * sizeof (int)); - ycoords = (int *) alloc_mem (blobcount * sizeof (int)); - blobcoords = (TBOX *) alloc_mem (blobcount * sizeof (TBOX)); - ydiffs = (float *) alloc_mem (blobcount * sizeof (float)); - - lineheight = get_blob_coords (row, (int) block->line_size, blobcoords, - holed_line, blobcount); - /*limit for line change */ +void Textord::find_textlines(TO_BLOCK* block, // block row is in + TO_ROW* row, // row to do + int degree, // required approximation + QSPLINE* spline) { // starting spline + int partcount; /*no of partitions of */ + bool holed_line = false; // lost too many blobs + int bestpart; /*biggest partition */ + char* partids; /*partition no of each blob */ + int partsizes[MAXPARTS]; /*no in each partition */ + int lineheight; /*guessed x-height */ + float jumplimit; /*allowed delta change */ + int* xcoords; /*useful sample points */ + int* ycoords; /*useful sample points */ + TBOX* blobcoords; /*edges of blob rectangles */ + int blobcount; /*no of blobs on line */ + float* ydiffs; /*diffs from 1st approx */ + int pointcount; /*no of coords */ + int xstarts[SPLINESIZE + 1]; // segment boundaries + int segments; // no of segments + + // no of blobs in row + blobcount = row->blob_list()->length(); + partids = (char*)alloc_mem(blobcount * sizeof(char)); + xcoords = (int*)alloc_mem(blobcount * sizeof(int)); + ycoords = (int*)alloc_mem(blobcount * sizeof(int)); + blobcoords = (TBOX*)alloc_mem(blobcount * sizeof(TBOX)); + ydiffs = (float*)alloc_mem(blobcount * sizeof(float)); + + lineheight = get_blob_coords(row, (int)block->line_size, blobcoords, + holed_line, blobcount); + /*limit for line change */ jumplimit = lineheight * textord_oldbl_jumplimit; - if (jumplimit < MINASCRISE) - jumplimit = MINASCRISE; + if (jumplimit < MINASCRISE) jumplimit = MINASCRISE; if (textord_oldbl_debug) { - tprintf - ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n", - block->line_size, lineheight, jumplimit); + tprintf("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n", + block->line_size, lineheight, jumplimit); } if (holed_line) - make_holed_baseline (blobcoords, blobcount, spline, &row->baseline, - row->line_m ()); + make_holed_baseline(blobcoords, blobcount, spline, &row->baseline, + row->line_m()); else - make_first_baseline (blobcoords, blobcount, - xcoords, ycoords, spline, &row->baseline, jumplimit); + make_first_baseline(blobcoords, blobcount, xcoords, ycoords, spline, + &row->baseline, jumplimit); #ifndef GRAPHICS_DISABLED if (textord_show_final_rows) - row->baseline.plot (to_win, ScrollView::GOLDENROD); + row->baseline.plot(to_win, ScrollView::GOLDENROD); #endif if (blobcount > 1) { - bestpart = partition_line (blobcoords, blobcount, - &partcount, partids, partsizes, - &row->baseline, jumplimit, ydiffs); - pointcount = partition_coords (blobcoords, blobcount, - partids, bestpart, xcoords, ycoords); - segments = segment_spline (blobcoords, blobcount, - xcoords, ycoords, - degree, pointcount, xstarts); + bestpart = partition_line(blobcoords, blobcount, &partcount, partids, + partsizes, &row->baseline, jumplimit, ydiffs); + pointcount = partition_coords(blobcoords, blobcount, partids, bestpart, + xcoords, ycoords); + segments = segment_spline(blobcoords, blobcount, xcoords, ycoords, degree, + pointcount, xstarts); if (!holed_line) { do { - row->baseline = QSPLINE (xstarts, segments, - xcoords, ycoords, pointcount, degree); - } - while (textord_oldbl_split_splines - && split_stepped_spline (&row->baseline, jumplimit / 2, - xcoords, xstarts, segments)); + row->baseline = + QSPLINE(xstarts, segments, xcoords, ycoords, pointcount, degree); + } while (textord_oldbl_split_splines && + split_stepped_spline(&row->baseline, jumplimit / 2, xcoords, + xstarts, segments)); } - find_lesser_parts(row, - blobcoords, - blobcount, - partids, - partsizes, - partcount, + find_lesser_parts(row, blobcoords, blobcount, partids, partsizes, partcount, bestpart); - } - else { - row->xheight = -1.0f; /*failed */ + } else { + row->xheight = -1.0f; /*failed */ row->descdrop = 0.0f; row->ascrise = 0.0f; } - row->baseline.extrapolate (row->line_m (), - block->block->pdblk.bounding_box ().left (), - block->block->pdblk.bounding_box ().right ()); + row->baseline.extrapolate(row->line_m(), + block->block->pdblk.bounding_box().left(), + block->block->pdblk.bounding_box().right()); if (textord_really_old_xheight) { - old_first_xheight (row, blobcoords, lineheight, - blobcount, &row->baseline, jumplimit); + old_first_xheight(row, blobcoords, lineheight, blobcount, &row->baseline, + jumplimit); } else if (textord_old_xheight) { - make_first_xheight (row, blobcoords, lineheight, (int) block->line_size, - blobcount, &row->baseline, jumplimit); + make_first_xheight(row, blobcoords, lineheight, (int)block->line_size, + blobcount, &row->baseline, jumplimit); } else { - compute_row_xheight(row, block->block->classify_rotation(), - row->line_m(), block->line_size); + compute_row_xheight(row, block->block->classify_rotation(), row->line_m(), + block->line_size); } free_mem(partids); free_mem(xcoords); @@ -432,7 +412,6 @@ void Textord::find_textlines(TO_BLOCK *block, // block row is in } // namespace tesseract. - /********************************************************************** * get_blob_coords * @@ -440,67 +419,60 @@ void Textord::find_textlines(TO_BLOCK *block, // block row is in * in the row. The return value is the first guess at the line height. **********************************************************************/ -int get_blob_coords( //get boxes - TO_ROW* row, //row to use - int32_t lineheight, //block level - TBOX* blobcoords, //ouput boxes - bool& holed_line, //lost a lot of blobs - int& outcount //no of real blobs +int get_blob_coords( // get boxes + TO_ROW* row, // row to use + int32_t lineheight, // block level + TBOX* blobcoords, // ouput boxes + bool& holed_line, // lost a lot of blobs + int& outcount // no of real blobs ) { - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - int blobindex; /*no along text line */ - int losscount; //lost blobs - int maxlosscount; //greatest lost blobs - /*height stat collection */ - STATS heightstat (0, MAXHEIGHT); - - if (blob_it.empty ()) - return 0; //none + // blobs + BLOBNBOX_IT blob_it = row->blob_list(); + int blobindex; /*no along text line */ + int losscount; // lost blobs + int maxlosscount; // greatest lost blobs + /*height stat collection */ + STATS heightstat(0, MAXHEIGHT); + + if (blob_it.empty()) return 0; // none maxlosscount = 0; losscount = 0; - blob_it.mark_cycle_pt (); + blob_it.mark_cycle_pt(); blobindex = 0; do { - blobcoords[blobindex] = box_next_pre_chopped (&blob_it); - if (blobcoords[blobindex].height () > lineheight * 0.25) - heightstat.add (blobcoords[blobindex].height (), 1); - if (blobindex == 0 - || blobcoords[blobindex].height () > lineheight * 0.25 - || blob_it.cycled_list ()) { - blobindex++; /*no of merged blobs */ + blobcoords[blobindex] = box_next_pre_chopped(&blob_it); + if (blobcoords[blobindex].height() > lineheight * 0.25) + heightstat.add(blobcoords[blobindex].height(), 1); + if (blobindex == 0 || blobcoords[blobindex].height() > lineheight * 0.25 || + blob_it.cycled_list()) { + blobindex++; /*no of merged blobs */ losscount = 0; - } - else { - if (blobcoords[blobindex].height () - < blobcoords[blobindex].width () * oldbl_dot_error_size - && blobcoords[blobindex].width () - < blobcoords[blobindex].height () * oldbl_dot_error_size) { - //counts as dot + } else { + if (blobcoords[blobindex].height() < + blobcoords[blobindex].width() * oldbl_dot_error_size && + blobcoords[blobindex].width() < + blobcoords[blobindex].height() * oldbl_dot_error_size) { + // counts as dot blobindex++; losscount = 0; - } - else { - losscount++; //lost it + } else { + losscount++; // lost it if (losscount > maxlosscount) - //remember max - maxlosscount = losscount; + // remember max + maxlosscount = losscount; } } - } - while (!blob_it.cycled_list ()); + } while (!blob_it.cycled_list()); holed_line = maxlosscount > oldbl_holed_losscount; - outcount = blobindex; /*total blobs */ + outcount = blobindex; /*total blobs */ - if (heightstat.get_total () > 1) - /*guess x-height */ - return (int) heightstat.ile (0.25); + if (heightstat.get_total() > 1) /*guess x-height */ + return (int)heightstat.ile(0.25); else - return blobcoords[0].height (); + return blobcoords[0].height(); } - /********************************************************************** * make_first_baseline * @@ -509,141 +481,133 @@ int get_blob_coords( //get boxes * approximation using all the blobs. **********************************************************************/ -void -make_first_baseline ( //initial approximation -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*no of blobcoords */ -int xcoords[], /*coords for spline */ -int ycoords[], /*approximator */ -QSPLINE * spline, /*initial spline */ -QSPLINE * baseline, /*output spline */ -float jumplimit /*guess half descenders */ +void make_first_baseline( // initial approximation + TBOX blobcoords[], /*blob bounding boxes */ + int blobcount, /*no of blobcoords */ + int xcoords[], /*coords for spline */ + int ycoords[], /*approximator */ + QSPLINE* spline, /*initial spline */ + QSPLINE* baseline, /*output spline */ + float jumplimit /*guess half descenders */ ) { - int leftedge; /*left edge of line */ - int rightedge; /*right edge of line */ - int blobindex; /*current blob */ - int segment; /*current segment */ - float prevy, thisy, nexty; /*3 y coords */ - float y1, y2, y3; /*3 smooth blobs */ - float maxmax, minmin; /*absolute limits */ - int x2 = 0; /*right edge of old y3 */ - int ycount; /*no of ycoords in use */ - float yturns[SPLINESIZE]; /*y coords of turn pts */ - int xturns[SPLINESIZE]; /*xcoords of turn pts */ + int leftedge; /*left edge of line */ + int rightedge; /*right edge of line */ + int blobindex; /*current blob */ + int segment; /*current segment */ + float prevy, thisy, nexty; /*3 y coords */ + float y1, y2, y3; /*3 smooth blobs */ + float maxmax, minmin; /*absolute limits */ + int x2 = 0; /*right edge of old y3 */ + int ycount; /*no of ycoords in use */ + float yturns[SPLINESIZE]; /*y coords of turn pts */ + int xturns[SPLINESIZE]; /*xcoords of turn pts */ int xstarts[SPLINESIZE + 1]; - int segments; //no of segments - ICOORD shift; //shift of spline + int segments; // no of segments + ICOORD shift; // shift of spline prevy = 0; - /*left edge of row */ - leftedge = blobcoords[0].left (); - /*right edge of line */ - rightedge = blobcoords[blobcount - 1].right (); - if (spline == nullptr /*no given spline */ - || spline->segments < 3 /*or trivial */ - /*or too non-overlap */ - || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) - || spline->xcoords[spline->segments - 1] < rightedge - - MAXOVERLAP * (rightedge - leftedge)) { - if (textord_oldbl_paradef) - return; //use default - xstarts[0] = blobcoords[0].left () - 1; + /*left edge of row */ + leftedge = blobcoords[0].left(); + /*right edge of line */ + rightedge = blobcoords[blobcount - 1].right(); + if (spline == nullptr /*no given spline */ + || spline->segments < 3 /*or trivial */ + /*or too non-overlap */ + || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) || + spline->xcoords[spline->segments - 1] < + rightedge - MAXOVERLAP * (rightedge - leftedge)) { + if (textord_oldbl_paradef) return; // use default + xstarts[0] = blobcoords[0].left() - 1; for (blobindex = 0; blobindex < blobcount; blobindex++) { - xcoords[blobindex] = (blobcoords[blobindex].left () - + blobcoords[blobindex].right ()) / 2; - ycoords[blobindex] = blobcoords[blobindex].bottom (); + xcoords[blobindex] = + (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2; + ycoords[blobindex] = blobcoords[blobindex].bottom(); } - xstarts[1] = blobcoords[blobcount - 1].right () + 1; - segments = 1; /*no of segments */ + xstarts[1] = blobcoords[blobcount - 1].right() + 1; + segments = 1; /*no of segments */ - /*linear */ - *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); + /*linear */ + *baseline = QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1); if (blobcount >= 3) { y1 = y2 = y3 = 0.0f; ycount = 0; - segment = 0; /*no of segments */ + segment = 0; /*no of segments */ maxmax = minmin = 0.0f; - thisy = ycoords[0] - baseline->y (xcoords[0]); - nexty = ycoords[1] - baseline->y (xcoords[1]); + thisy = ycoords[0] - baseline->y(xcoords[0]); + nexty = ycoords[1] - baseline->y(xcoords[1]); for (blobindex = 2; blobindex < blobcount; blobindex++) { - prevy = thisy; /*shift ycoords */ + prevy = thisy; /*shift ycoords */ thisy = nexty; - nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]); - /*middle of smooth y */ - if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) { - y1 = y2; /*shift window */ + nexty = ycoords[blobindex] - baseline->y(xcoords[blobindex]); + /*middle of smooth y */ + if (ABS(thisy - prevy) < jumplimit && ABS(thisy - nexty) < jumplimit) { + y1 = y2; /*shift window */ y2 = y3; - y3 = thisy; /*middle point */ + y3 = thisy; /*middle point */ ycount++; - /*local max */ + /*local max */ if (ycount >= 3 && ((y1 < y2 && y2 >= y3) - /*local min */ - || (y1 > y2 && y2 <= y3))) { + /*local min */ + || (y1 > y2 && y2 <= y3))) { if (segment < SPLINESIZE - 2) { - /*turning pt */ + /*turning pt */ xturns[segment] = x2; yturns[segment] = y2; - segment++; /*no of spline segs */ + segment++; /*no of spline segs */ } } if (ycount == 1) { - maxmax = minmin = y3;/*initialise limits */ - } - else { - if (y3 > maxmax) - maxmax = y3; /*biggest max */ - if (y3 < minmin) - minmin = y3; /*smallest min */ + maxmax = minmin = y3; /*initialise limits */ + } else { + if (y3 > maxmax) maxmax = y3; /*biggest max */ + if (y3 < minmin) minmin = y3; /*smallest min */ } - /*possible turning pt */ - x2 = blobcoords[blobindex - 1].right (); + /*possible turning pt */ + x2 = blobcoords[blobindex - 1].right(); } } jumplimit *= 1.2; - /*must be wavy */ + /*must be wavy */ if (maxmax - minmin > jumplimit) { - ycount = segment; /*no of segments */ - for (blobindex = 0, segment = 1; blobindex < ycount; - blobindex++) { - if (yturns[blobindex] > minmin + jumplimit - || yturns[blobindex] < maxmax - jumplimit) { - /*significant peak */ - if (segment == 1 - || yturns[blobindex] > prevy + jumplimit - || yturns[blobindex] < prevy - jumplimit) { - /*different to previous */ + ycount = segment; /*no of segments */ + for (blobindex = 0, segment = 1; blobindex < ycount; blobindex++) { + if (yturns[blobindex] > minmin + jumplimit || + yturns[blobindex] < maxmax - jumplimit) { + /*significant peak */ + if (segment == 1 || yturns[blobindex] > prevy + jumplimit || + yturns[blobindex] < prevy - jumplimit) { + /*different to previous */ xstarts[segment] = xturns[blobindex]; segment++; prevy = yturns[blobindex]; } - /*bigger max */ + /*bigger max */ else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy) - /*smaller min */ - || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) { + /*smaller min */ + || (prevy < maxmax - jumplimit && + yturns[blobindex] < prevy)) { xstarts[segment - 1] = xturns[blobindex]; - /*improved previous */ + /*improved previous */ prevy = yturns[blobindex]; } } } - xstarts[segment] = blobcoords[blobcount - 1].right () + 1; - segments = segment; /*no of segments */ - /*linear */ - *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); + xstarts[segment] = blobcoords[blobcount - 1].right() + 1; + segments = segment; /*no of segments */ + /*linear */ + *baseline = QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1); } } - } - else { - *baseline = *spline; /*copy it */ - shift = ICOORD (0, (int16_t) (blobcoords[0].bottom () - - spline->y (blobcoords[0].right ()))); - baseline->move (shift); + } else { + *baseline = *spline; /*copy it */ + shift = ICOORD(0, (int16_t)(blobcoords[0].bottom() - + spline->y(blobcoords[0].right()))); + baseline->move(shift); } } - /********************************************************************** * make_holed_baseline * @@ -652,33 +616,32 @@ float jumplimit /*guess half descenders */ * approximation using all the blobs. **********************************************************************/ -void -make_holed_baseline ( //initial approximation -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*no of blobcoords */ -QSPLINE * spline, /*initial spline */ -QSPLINE * baseline, /*output spline */ -float gradient //of line +void make_holed_baseline( // initial approximation + TBOX blobcoords[], /*blob bounding boxes */ + int blobcount, /*no of blobcoords */ + QSPLINE* spline, /*initial spline */ + QSPLINE* baseline, /*output spline */ + float gradient // of line ) { - int leftedge; /*left edge of line */ - int rightedge; /*right edge of line */ - int blobindex; /*current blob */ - float x; //centre of row - ICOORD shift; //shift of spline + int leftedge; /*left edge of line */ + int rightedge; /*right edge of line */ + int blobindex; /*current blob */ + float x; // centre of row + ICOORD shift; // shift of spline tesseract::DetLineFit lms; // straight baseline - int32_t xstarts[2]; //straight line + int32_t xstarts[2]; // straight line double coeffs[3]; - float c; //line parameter + float c; // line parameter - /*left edge of row */ - leftedge = blobcoords[0].left (); - /*right edge of line */ + /*left edge of row */ + leftedge = blobcoords[0].left(); + /*right edge of line */ rightedge = blobcoords[blobcount - 1].right(); for (blobindex = 0; blobindex < blobcount; blobindex++) { - lms.Add(ICOORD((blobcoords[blobindex].left() + - blobcoords[blobindex].right()) / 2, - blobcoords[blobindex].bottom())); + lms.Add(ICOORD( + (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2, + blobcoords[blobindex].bottom())); } lms.ConstrainedFit(gradient, &c); xstarts[0] = leftedge; @@ -686,21 +649,20 @@ float gradient //of line coeffs[0] = 0; coeffs[1] = gradient; coeffs[2] = c; - *baseline = QSPLINE (1, xstarts, coeffs); - if (spline != nullptr /*no given spline */ - && spline->segments >= 3 /*or trivial */ - /*or too non-overlap */ - && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) - && spline->xcoords[spline->segments - 1] >= rightedge - - MAXOVERLAP * (rightedge - leftedge)) { - *baseline = *spline; /*copy it */ + *baseline = QSPLINE(1, xstarts, coeffs); + if (spline != nullptr /*no given spline */ + && spline->segments >= 3 /*or trivial */ + /*or too non-overlap */ + && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) && + spline->xcoords[spline->segments - 1] >= + rightedge - MAXOVERLAP * (rightedge - leftedge)) { + *baseline = *spline; /*copy it */ x = (leftedge + rightedge) / 2.0; - shift = ICOORD (0, (int16_t) (gradient * x + c - spline->y (x))); - baseline->move (shift); + shift = ICOORD(0, (int16_t)(gradient * x + c - spline->y(x))); + baseline->move(shift); } } - /********************************************************************** * partition_line * @@ -710,80 +672,72 @@ float gradient //of line * The return value is the biggest partition **********************************************************************/ -int -partition_line ( //partition blobs -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs on row */ -int *numparts, /*number of partitions */ -char partids[], /*partition no of each blob */ -int partsizes[], /*no in each partition */ -QSPLINE * spline, /*curve to fit to */ -float jumplimit, /*allowed delta change */ -float ydiffs[] /*diff from spline */ +int partition_line( // partition blobs + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs on row */ + int* numparts, /*number of partitions */ + char partids[], /*partition no of each blob */ + int partsizes[], /*no in each partition */ + QSPLINE* spline, /*curve to fit to */ + float jumplimit, /*allowed delta change */ + float ydiffs[] /*diff from spline */ ) { - int blobindex; /*no along text line */ - int bestpart; /*best new partition */ - int biggestpart; /*part with most members */ - float diff; /*difference from line */ - int startx; /*index of start blob */ - float partdiffs[MAXPARTS]; /*step between parts */ + int blobindex; /*no along text line */ + int bestpart; /*best new partition */ + int biggestpart; /*part with most members */ + float diff; /*difference from line */ + int startx; /*index of start blob */ + float partdiffs[MAXPARTS]; /*step between parts */ for (bestpart = 0; bestpart < MAXPARTS; bestpart++) - partsizes[bestpart] = 0; /*zero them all */ + partsizes[bestpart] = 0; /*zero them all */ - startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs); - *numparts = 1; /*1 partition */ - bestpart = -1; /*first point */ + startx = get_ydiffs(blobcoords, blobcount, spline, ydiffs); + *numparts = 1; /*1 partition */ + bestpart = -1; /*first point */ float drift = 0.0f; float last_delta = 0.0f; for (blobindex = startx; blobindex < blobcount; blobindex++) { - /*do each blob in row */ - diff = ydiffs[blobindex]; /*diff from line */ + /*do each blob in row */ + diff = ydiffs[blobindex]; /*diff from line */ if (textord_oldbl_debug) { - tprintf ("%d(%d,%d), ", blobindex, - blobcoords[blobindex].left (), - blobcoords[blobindex].bottom ()); + tprintf("%d(%d,%d), ", blobindex, blobcoords[blobindex].left(), + blobcoords[blobindex].bottom()); } - bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, - &drift, &last_delta, numparts); - /*record partition */ + bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, + &last_delta, numparts); + /*record partition */ partids[blobindex] = bestpart; - partsizes[bestpart]++; /*another in it */ + partsizes[bestpart]++; /*another in it */ } - bestpart = -1; /*first point */ + bestpart = -1; /*first point */ drift = 0.0f; last_delta = 0.0f; - partsizes[0]--; /*doing 1st pt again */ - /*do each blob in row */ + partsizes[0]--; /*doing 1st pt again */ + /*do each blob in row */ for (blobindex = startx; blobindex >= 0; blobindex--) { - diff = ydiffs[blobindex]; /*diff from line */ + diff = ydiffs[blobindex]; /*diff from line */ if (textord_oldbl_debug) { - tprintf ("%d(%d,%d), ", blobindex, - blobcoords[blobindex].left (), - blobcoords[blobindex].bottom ()); + tprintf("%d(%d,%d), ", blobindex, blobcoords[blobindex].left(), + blobcoords[blobindex].bottom()); } - bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, - &drift, &last_delta, numparts); - /*record partition */ + bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, + &last_delta, numparts); + /*record partition */ partids[blobindex] = bestpart; - partsizes[bestpart]++; /*another in it */ + partsizes[bestpart]++; /*another in it */ } for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) if (partsizes[bestpart] >= partsizes[biggestpart]) - biggestpart = bestpart; /*new biggest */ + biggestpart = bestpart; /*new biggest */ if (textord_oldbl_merge_parts) - merge_oldbl_parts(blobcoords, - blobcount, - partids, - partsizes, - biggestpart, + merge_oldbl_parts(blobcoords, blobcount, partids, partsizes, biggestpart, jumplimit); - return biggestpart; /*biggest partition */ + return biggestpart; /*biggest partition */ } - /********************************************************************** * merge_oldbl_parts * @@ -791,96 +745,88 @@ float ydiffs[] /*diff from spline */ * main part if they fit closely to neighbours in the main part. **********************************************************************/ -void -merge_oldbl_parts ( //partition blobs -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs on row */ -char partids[], /*partition no of each blob */ -int partsizes[], /*no in each partition */ -int biggestpart, //major partition -float jumplimit /*allowed delta change */ +void merge_oldbl_parts( // partition blobs + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs on row */ + char partids[], /*partition no of each blob */ + int partsizes[], /*no in each partition */ + int biggestpart, // major partition + float jumplimit /*allowed delta change */ ) { - bool found_one; //found a bestpart blob - bool close_one; //found was close enough - int blobindex; /*no along text line */ - int prevpart; //previous iteration - int runlength; //no in this part - float diff; /*difference from line */ - int startx; /*index of start blob */ - int test_blob; //another index - FCOORD coord; //blob coordinate - float m, c; //fitted line - QLSQ stats; //line stuff + bool found_one; // found a bestpart blob + bool close_one; // found was close enough + int blobindex; /*no along text line */ + int prevpart; // previous iteration + int runlength; // no in this part + float diff; /*difference from line */ + int startx; /*index of start blob */ + int test_blob; // another index + FCOORD coord; // blob coordinate + float m, c; // fitted line + QLSQ stats; // line stuff prevpart = biggestpart; runlength = 0; startx = 0; for (blobindex = 0; blobindex < blobcount; blobindex++) { if (partids[blobindex] != prevpart) { - // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n", + // tprintf("Partition change at (%d,%d) from %d to %d + // after run of %d\n", // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(), // prevpart,partids[blobindex],runlength); if (prevpart != biggestpart && runlength > MAXBADRUN) { - stats.clear (); + stats.clear(); for (test_blob = startx; test_blob < blobindex; test_blob++) { - coord = FCOORD ((blobcoords[test_blob].left () - + blobcoords[test_blob].right ()) / 2.0, - blobcoords[test_blob].bottom ()); - stats.add (coord.x (), coord.y ()); + coord = FCOORD( + (blobcoords[test_blob].left() + blobcoords[test_blob].right()) / + 2.0, + blobcoords[test_blob].bottom()); + stats.add(coord.x(), coord.y()); } - stats.fit (1); - m = stats.get_b (); - c = stats.get_c (); - if (textord_oldbl_debug) - tprintf ("Fitted line y=%g x + %g\n", m, c); + stats.fit(1); + m = stats.get_b(); + c = stats.get_c(); + if (textord_oldbl_debug) tprintf("Fitted line y=%g x + %g\n", m, c); found_one = false; close_one = false; - for (test_blob = 1; !found_one - && (startx - test_blob >= 0 - || blobindex + test_blob <= blobcount); test_blob++) { - if (startx - test_blob >= 0 - && partids[startx - test_blob] == biggestpart) { + for (test_blob = 1; !found_one && (startx - test_blob >= 0 || + blobindex + test_blob <= blobcount); + test_blob++) { + if (startx - test_blob >= 0 && + partids[startx - test_blob] == biggestpart) { found_one = true; - coord = FCOORD ((blobcoords[startx - test_blob].left () - + blobcoords[startx - - test_blob].right ()) / - 2.0, - blobcoords[startx - - test_blob].bottom ()); - diff = m * coord.x () + c - coord.y (); + coord = FCOORD((blobcoords[startx - test_blob].left() + + blobcoords[startx - test_blob].right()) / + 2.0, + blobcoords[startx - test_blob].bottom()); + diff = m * coord.x() + c - coord.y(); if (textord_oldbl_debug) - tprintf - ("Diff of common blob to suspect part=%g at (%g,%g)\n", - diff, coord.x (), coord.y ()); - if (diff < jumplimit && -diff < jumplimit) - close_one = true; + tprintf("Diff of common blob to suspect part=%g at (%g,%g)\n", + diff, coord.x(), coord.y()); + if (diff < jumplimit && -diff < jumplimit) close_one = true; } - if (blobindex + test_blob <= blobcount - && partids[blobindex + test_blob - 1] == biggestpart) { + if (blobindex + test_blob <= blobcount && + partids[blobindex + test_blob - 1] == biggestpart) { found_one = true; - coord = - FCOORD ((blobcoords[blobindex + test_blob - 1]. - left () + blobcoords[blobindex + test_blob - - 1].right ()) / 2.0, - blobcoords[blobindex + test_blob - - 1].bottom ()); - diff = m * coord.x () + c - coord.y (); + coord = FCOORD((blobcoords[blobindex + test_blob - 1].left() + + blobcoords[blobindex + test_blob - 1].right()) / + 2.0, + blobcoords[blobindex + test_blob - 1].bottom()); + diff = m * coord.x() + c - coord.y(); if (textord_oldbl_debug) - tprintf - ("Diff of common blob to suspect part=%g at (%g,%g)\n", - diff, coord.x (), coord.y ()); - if (diff < jumplimit && -diff < jumplimit) - close_one = true; + tprintf("Diff of common blob to suspect part=%g at (%g,%g)\n", + diff, coord.x(), coord.y()); + if (diff < jumplimit && -diff < jumplimit) close_one = true; } } if (close_one) { if (textord_oldbl_debug) - tprintf - ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n", - runlength, biggestpart, prevpart, - blobcoords[startx].left (), - blobcoords[startx].bottom ()); - //switch sides + tprintf( + "Merged %d blobs back into part %d from %d starting at " + "(%d,%d)\n", + runlength, biggestpart, prevpart, blobcoords[startx].left(), + blobcoords[startx].bottom()); + // switch sides partsizes[prevpart] -= runlength; for (test_blob = startx; test_blob < blobindex; test_blob++) partids[test_blob] = biggestpart; @@ -889,13 +835,11 @@ float jumplimit /*allowed delta change */ prevpart = partids[blobindex]; runlength = 1; startx = blobindex; - } - else + } else runlength++; } } - /********************************************************************** * get_ydiffs * @@ -904,42 +848,41 @@ float jumplimit /*allowed delta change */ * of the blob in the middle of the "best behaved" region **********************************************************************/ -int -get_ydiffs ( //evaluate differences -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs */ -QSPLINE * spline, /*approximating spline */ -float ydiffs[] /*output */ +int get_ydiffs( // evaluate differences + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs */ + QSPLINE* spline, /*approximating spline */ + float ydiffs[] /*output */ ) { - int blobindex; /*current blob */ - int xcentre; /*xcoord */ - int lastx; /*last xcentre */ - float diffsum; /*sum of diffs */ - float diff; /*current difference */ - float drift; /*sum of spline steps */ - float bestsum; /*smallest diffsum */ - int bestindex; /*index of bestsum */ + int blobindex; /*current blob */ + int xcentre; /*xcoord */ + int lastx; /*last xcentre */ + float diffsum; /*sum of diffs */ + float diff; /*current difference */ + float drift; /*sum of spline steps */ + float bestsum; /*smallest diffsum */ + int bestindex; /*index of bestsum */ diffsum = 0.0f; bestindex = 0; - bestsum = (float) INT32_MAX; + bestsum = (float)INT32_MAX; drift = 0.0f; - lastx = blobcoords[0].left (); - /*do each blob in row */ + lastx = blobcoords[0].left(); + /*do each blob in row */ for (blobindex = 0; blobindex < blobcount; blobindex++) { - /*centre of blob */ - xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; - //step functions in spline - drift += spline->step (lastx, xcentre); + /*centre of blob */ + xcentre = + (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1; + // step functions in spline + drift += spline->step(lastx, xcentre); lastx = xcentre; - diff = blobcoords[blobindex].bottom (); - diff -= spline->y (xcentre); + diff = blobcoords[blobindex].bottom(); + diff -= spline->y(xcentre); diff += drift; - ydiffs[blobindex] = diff; /*store difference */ - if (blobindex > 2) - /*remove old one */ - diffsum -= ABS (ydiffs[blobindex - 3]); - diffsum += ABS (diff); /*add new one */ + ydiffs[blobindex] = diff; /*store difference */ + if (blobindex > 2) /*remove old one */ + diffsum -= ABS(ydiffs[blobindex - 3]); + diffsum += ABS(diff); /*add new one */ if (blobindex >= 2 && diffsum < bestsum) { bestsum = diffsum; /*find min sum */ bestindex = blobindex - 1; /*middle of set */ @@ -948,73 +891,67 @@ float ydiffs[] /*output */ return bestindex; } - /********************************************************************** * choose_partition * * Choose a partition for the point and return the index. **********************************************************************/ -int -choose_partition ( //select partition -float diff, /*diff from spline */ -float partdiffs[], /*diff on all parts */ -int lastpart, /*last assigned partition */ -float jumplimit, /*new part threshold */ -float* drift, -float* lastdelta, -int *partcount /*no of partitions */ +int choose_partition( // select partition + float diff, /*diff from spline */ + float partdiffs[], /*diff on all parts */ + int lastpart, /*last assigned partition */ + float jumplimit, /*new part threshold */ + float* drift, float* lastdelta, int* partcount /*no of partitions */ ) { - int partition; /*partition no */ - int bestpart; /*best new partition */ - float bestdelta; /*best gap from a part */ - float delta; /*diff from part */ + int partition; /*partition no */ + int bestpart; /*best new partition */ + float bestdelta; /*best gap from a part */ + float delta; /*diff from part */ if (lastpart < 0) { partdiffs[0] = diff; - lastpart = 0; /*first point */ + lastpart = 0; /*first point */ *drift = 0.0f; *lastdelta = 0.0f; } - /*adjusted diff from part */ + /*adjusted diff from part */ delta = diff - partdiffs[lastpart] - *drift; if (textord_oldbl_debug) { - tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift); + tprintf("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift); } - if (ABS (delta) > jumplimit / 2) { - /*delta on part 0 */ + if (ABS(delta) > jumplimit / 2) { + /*delta on part 0 */ bestdelta = diff - partdiffs[0] - *drift; - bestpart = 0; /*0 best so far */ + bestpart = 0; /*0 best so far */ for (partition = 1; partition < *partcount; partition++) { delta = diff - partdiffs[partition] - *drift; - if (ABS (delta) < ABS (bestdelta)) { + if (ABS(delta) < ABS(bestdelta)) { bestdelta = delta; - bestpart = partition; /*part with nearest jump */ + bestpart = partition; /*part with nearest jump */ } } delta = bestdelta; - /*too far away */ - if (ABS (bestdelta) > jumplimit - && *partcount < MAXPARTS) { /*and spare part left */ + /*too far away */ + if (ABS(bestdelta) > jumplimit && + *partcount < MAXPARTS) { /*and spare part left */ bestpart = (*partcount)++; /*best was new one */ /*start new one */ partdiffs[bestpart] = diff - *drift; delta = 0.0f; } - } - else { - bestpart = lastpart; /*best was last one */ + } else { + bestpart = lastpart; /*best was last one */ } - if (bestpart == lastpart - && (ABS (delta - *lastdelta) < jumplimit / 2 - || ABS (delta) < jumplimit / 2)) - /*smooth the drift */ + if (bestpart == lastpart && + (ABS(delta - *lastdelta) < jumplimit / 2 || ABS(delta) < jumplimit / 2)) + /*smooth the drift */ *drift = (3 * *drift + delta) / 3; *lastdelta = delta; if (textord_oldbl_debug) { - tprintf ("P=%d\n", bestpart); + tprintf("P=%d\n", bestpart); } return bestpart; @@ -1027,30 +964,29 @@ int *partcount /*no of partitions */ * in xcoords,ycoords. Return the number of points found. **********************************************************************/ -int -partition_coords ( //find relevant coords -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs in row */ -char partids[], /*partition no of each blob */ -int bestpart, /*best new partition */ -int xcoords[], /*points to work on */ -int ycoords[] /*points to work on */ +int partition_coords( // find relevant coords + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs in row */ + char partids[], /*partition no of each blob */ + int bestpart, /*best new partition */ + int xcoords[], /*points to work on */ + int ycoords[] /*points to work on */ ) { - int blobindex; /*no along text line */ - int pointcount; /*no of points */ + int blobindex; /*no along text line */ + int pointcount; /*no of points */ pointcount = 0; for (blobindex = 0; blobindex < blobcount; blobindex++) { if (partids[blobindex] == bestpart) { - /*centre of blob */ - xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; - ycoords[pointcount++] = blobcoords[blobindex].bottom (); + /*centre of blob */ + xcoords[pointcount] = + (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1; + ycoords[pointcount++] = blobcoords[blobindex].bottom(); } } - return pointcount; /*no of points found */ + return pointcount; /*no of points found */ } - /********************************************************************** * segment_spline * @@ -1058,85 +994,76 @@ int ycoords[] /*points to work on */ * The xstarts of the segments are returned and the number found. **********************************************************************/ -int -segment_spline ( //make xstarts -TBOX blobcoords[], //boundign boxes -int blobcount, /*no of blobs in row */ -int xcoords[], /*points to work on */ -int ycoords[], /*points to work on */ -int degree, int pointcount, /*no of points */ -int xstarts[] //result +int segment_spline( // make xstarts + TBOX blobcoords[], // boundign boxes + int blobcount, /*no of blobs in row */ + int xcoords[], /*points to work on */ + int ycoords[], /*points to work on */ + int degree, int pointcount, /*no of points */ + int xstarts[] // result ) { - int ptindex; /*no along text line */ - int segment; /*partition no */ - int lastmin, lastmax; /*possible turn points */ - int turnpoints[SPLINESIZE]; /*good turning points */ - int turncount; /*no of turning points */ - int max_x; //max specified coord - - xstarts[0] = xcoords[0] - 1; //leftmost defined pt + int ptindex; /*no along text line */ + int segment; /*partition no */ + int lastmin, lastmax; /*possible turn points */ + int turnpoints[SPLINESIZE]; /*good turning points */ + int turncount; /*no of turning points */ + int max_x; // max specified coord + + xstarts[0] = xcoords[0] - 1; // leftmost defined pt max_x = xcoords[pointcount - 1] + 1; - if (degree < 2) - pointcount = 0; - turncount = 0; /*no turning points yet */ + if (degree < 2) pointcount = 0; + turncount = 0; /*no turning points yet */ if (pointcount > 3) { ptindex = 1; - lastmax = lastmin = 0; /*start with first one */ + lastmax = lastmin = 0; /*start with first one */ while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) { - /*minimum */ - if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) { + /*minimum */ + if (ycoords[ptindex - 1] > ycoords[ptindex] && + ycoords[ptindex] <= ycoords[ptindex + 1]) { if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) { if (turncount == 0 || turnpoints[turncount - 1] != lastmax) - /*new max point */ + /*new max point */ turnpoints[turncount++] = lastmax; - lastmin = ptindex; /*latest minimum */ - } - else if (ycoords[ptindex] < ycoords[lastmin]) { - lastmin = ptindex; /*lower minimum */ + lastmin = ptindex; /*latest minimum */ + } else if (ycoords[ptindex] < ycoords[lastmin]) { + lastmin = ptindex; /*lower minimum */ } } - /*maximum */ - if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) { + /*maximum */ + if (ycoords[ptindex - 1] < ycoords[ptindex] && + ycoords[ptindex] >= ycoords[ptindex + 1]) { if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) { if (turncount == 0 || turnpoints[turncount - 1] != lastmin) - /*new min point */ + /*new min point */ turnpoints[turncount++] = lastmin; - lastmax = ptindex; /*latest maximum */ - } - else if (ycoords[ptindex] > ycoords[lastmax]) { - lastmax = ptindex; /*higher maximum */ + lastmax = ptindex; /*latest maximum */ + } else if (ycoords[ptindex] > ycoords[lastmax]) { + lastmax = ptindex; /*higher maximum */ } } ptindex++; } - /*possible global min */ - if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT - && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) { - if (turncount < SPLINESIZE - 1) - /*2 more turns */ + /*possible global min */ + if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT && + (turncount == 0 || turnpoints[turncount - 1] != lastmax)) { + if (turncount < SPLINESIZE - 1) /*2 more turns */ turnpoints[turncount++] = lastmax; - if (turncount < SPLINESIZE - 1) - turnpoints[turncount++] = ptindex; - } - else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT - /*possible global max */ - && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) { - if (turncount < SPLINESIZE - 1) - /*2 more turns */ + if (turncount < SPLINESIZE - 1) turnpoints[turncount++] = ptindex; + } else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT + /*possible global max */ + && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) { + if (turncount < SPLINESIZE - 1) /*2 more turns */ turnpoints[turncount++] = lastmin; - if (turncount < SPLINESIZE - 1) - turnpoints[turncount++] = ptindex; - } - else if (turncount > 0 && turnpoints[turncount - 1] == lastmin - && turncount < SPLINESIZE - 1) { + if (turncount < SPLINESIZE - 1) turnpoints[turncount++] = ptindex; + } else if (turncount > 0 && turnpoints[turncount - 1] == lastmin && + turncount < SPLINESIZE - 1) { if (ycoords[ptindex] > ycoords[lastmax]) turnpoints[turncount++] = ptindex; else turnpoints[turncount++] = lastmax; - } - else if (turncount > 0 && turnpoints[turncount - 1] == lastmax - && turncount < SPLINESIZE - 1) { + } else if (turncount > 0 && turnpoints[turncount - 1] == lastmax && + turncount < SPLINESIZE - 1) { if (ycoords[ptindex] < ycoords[lastmin]) turnpoints[turncount++] = ptindex; else @@ -1145,37 +1072,44 @@ int xstarts[] //result } if (textord_oldbl_debug && turncount > 0) - tprintf ("First turn is %d at (%d,%d)\n", - turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]); + tprintf("First turn is %d at (%d,%d)\n", turnpoints[0], + xcoords[turnpoints[0]], ycoords[turnpoints[0]]); for (segment = 1; segment < turncount; segment++) { - /*centre y coord */ - lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2; + /*centre y coord */ + lastmax = + (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2; /* fix alg so that it works with both rising and falling sections */ if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) - /*find rising y centre */ - for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++); + /*find rising y centre */ + for (ptindex = turnpoints[segment - 1] + 1; + ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; + ptindex++) + ; else - /*find falling y centre */ - for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++); - - /*centre x */ - xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] - + xcoords[turnpoints[segment - 1]] - + xcoords[turnpoints[segment]] + 2) / 4; + /*find falling y centre */ + for (ptindex = turnpoints[segment - 1] + 1; + ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; + ptindex++) + ; + + /*centre x */ + xstarts[segment] = + (xcoords[ptindex - 1] + xcoords[ptindex] + + xcoords[turnpoints[segment - 1]] + xcoords[turnpoints[segment]] + 2) / + 4; /*halfway between turns */ if (textord_oldbl_debug) - tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", - segment, turnpoints[segment], - xcoords[turnpoints[segment]], ycoords[turnpoints[segment]], - ptindex - 1, xcoords[ptindex - 1], xstarts[segment]); + tprintf("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", segment, + turnpoints[segment], xcoords[turnpoints[segment]], + ycoords[turnpoints[segment]], ptindex - 1, xcoords[ptindex - 1], + xstarts[segment]); } xstarts[segment] = max_x; - return segment; /*no of splines */ + return segment; /*no of splines */ } - /********************************************************************** * split_stepped_spline * @@ -1183,96 +1117,77 @@ int xstarts[] //result * Return TRUE if any were done. **********************************************************************/ -bool -split_stepped_spline( //make xstarts - QSPLINE* baseline, //current shot - float jumplimit, //max step fuction - int* xcoords, /*points to work on */ - int* xstarts, //result - int& segments //no of segments +bool split_stepped_spline( // make xstarts + QSPLINE* baseline, // current shot + float jumplimit, // max step fuction + int* xcoords, /*points to work on */ + int* xstarts, // result + int& segments // no of segments ) { - bool doneany; //return value - int segment; /*partition no */ + bool doneany; // return value + int segment; /*partition no */ int startindex, centreindex, endindex; float leftcoord, rightcoord; int leftindex, rightindex; - float step; //spline step + float step; // spline step doneany = false; startindex = 0; for (segment = 1; segment < segments - 1; segment++) { - step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0, - (xstarts[segment] + xstarts[segment + 1]) / 2.0); - if (step < 0) - step = -step; + step = baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0, + (xstarts[segment] + xstarts[segment + 1]) / 2.0); + if (step < 0) step = -step; if (step > jumplimit) { - while (xcoords[startindex] < xstarts[segment - 1]) - startindex++; + while (xcoords[startindex] < xstarts[segment - 1]) startindex++; centreindex = startindex; - while (xcoords[centreindex] < xstarts[segment]) - centreindex++; + while (xcoords[centreindex] < xstarts[segment]) centreindex++; endindex = centreindex; - while (xcoords[endindex] < xstarts[segment + 1]) - endindex++; + while (xcoords[endindex] < xstarts[segment + 1]) endindex++; if (segments >= SPLINESIZE) { if (textord_debug_baselines) - tprintf ("Too many segments to resegment spline!!\n"); - } - else if (endindex - startindex >= textord_spline_medianwin * 3) { - while (centreindex - startindex < - textord_spline_medianwin * 3 / 2) + tprintf("Too many segments to resegment spline!!\n"); + } else if (endindex - startindex >= textord_spline_medianwin * 3) { + while (centreindex - startindex < textord_spline_medianwin * 3 / 2) centreindex++; - while (endindex - centreindex < - textord_spline_medianwin * 3 / 2) + while (endindex - centreindex < textord_spline_medianwin * 3 / 2) centreindex--; leftindex = (startindex + startindex + centreindex) / 3; rightindex = (centreindex + endindex + endindex) / 3; - leftcoord = - (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0; - rightcoord = - (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0; - while (xcoords[leftindex] > leftcoord - && leftindex - startindex > textord_spline_medianwin) + leftcoord = (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0; + rightcoord = (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0; + while (xcoords[leftindex] > leftcoord && + leftindex - startindex > textord_spline_medianwin) leftindex--; - while (xcoords[leftindex] < leftcoord - && centreindex - leftindex > - textord_spline_medianwin / 2) + while (xcoords[leftindex] < leftcoord && + centreindex - leftindex > textord_spline_medianwin / 2) leftindex++; - if (xcoords[leftindex] - leftcoord > - leftcoord - xcoords[leftindex - 1]) + if (xcoords[leftindex] - leftcoord > leftcoord - xcoords[leftindex - 1]) leftindex--; - while (xcoords[rightindex] > rightcoord - && rightindex - centreindex > - textord_spline_medianwin / 2) + while (xcoords[rightindex] > rightcoord && + rightindex - centreindex > textord_spline_medianwin / 2) rightindex--; - while (xcoords[rightindex] < rightcoord - && endindex - rightindex > textord_spline_medianwin) + while (xcoords[rightindex] < rightcoord && + endindex - rightindex > textord_spline_medianwin) rightindex++; if (xcoords[rightindex] - rightcoord > - rightcoord - xcoords[rightindex - 1]) + rightcoord - xcoords[rightindex - 1]) rightindex--; if (textord_debug_baselines) - tprintf ("Splitting spline at %d with step %g at (%d,%d)\n", - xstarts[segment], - baseline-> - step ((xstarts[segment - 1] + - xstarts[segment]) / 2.0, - (xstarts[segment] + - xstarts[segment + 1]) / 2.0), - (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, - (xcoords[rightindex - 1] + xcoords[rightindex]) / 2); - insert_spline_point (xstarts, segment, - (xcoords[leftindex - 1] + - xcoords[leftindex]) / 2, - (xcoords[rightindex - 1] + - xcoords[rightindex]) / 2, segments); + tprintf( + "Splitting spline at %d with step %g at (%d,%d)\n", + xstarts[segment], + baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0, + (xstarts[segment] + xstarts[segment + 1]) / 2.0), + (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, + (xcoords[rightindex - 1] + xcoords[rightindex]) / 2); + insert_spline_point( + xstarts, segment, (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, + (xcoords[rightindex - 1] + xcoords[rightindex]) / 2, segments); doneany = true; - } - else if (textord_debug_baselines) { - tprintf - ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", - startindex, centreindex, endindex, - (int32_t) textord_spline_medianwin); + } else if (textord_debug_baselines) { + tprintf("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", + startindex, centreindex, endindex, + (int32_t)textord_spline_medianwin); } } // else tprintf("Spline step at %d is %g\n", @@ -1283,21 +1198,20 @@ split_stepped_spline( //make xstarts return doneany; } - /********************************************************************** * insert_spline_point * * Insert a new spline point and shuffle up the others. **********************************************************************/ -void -insert_spline_point ( //get descenders -int xstarts[], //starts to shuffle -int segment, //insertion pt -int coord1, //coords to add -int coord2, int &segments //total segments +void insert_spline_point( // get descenders + int xstarts[], // starts to shuffle + int segment, // insertion pt + int coord1, // coords to add + int coord2, + int& segments // total segments ) { - int index; //for shuffling + int index; // for shuffling for (index = segments; index > segment; index--) xstarts[index + 1] = xstarts[index]; @@ -1306,7 +1220,6 @@ int coord2, int &segments //total segments xstarts[segment + 1] = coord2; } - /********************************************************************** * find_lesser_parts * @@ -1314,51 +1227,48 @@ int coord2, int &segments //total segments * and find the commonest partition which has a descender. **********************************************************************/ -void -find_lesser_parts ( //get descenders -TO_ROW * row, //row to process -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs */ -char partids[], /*partition of each blob */ -int partsizes[], /*size of each part */ -int partcount, /*no of partitions */ -int bestpart /*biggest partition */ +void find_lesser_parts( // get descenders + TO_ROW* row, // row to process + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs */ + char partids[], /*partition of each blob */ + int partsizes[], /*size of each part */ + int partcount, /*no of partitions */ + int bestpart /*biggest partition */ ) { - int blobindex; /*index of blob */ - int partition; /*current partition */ - int xcentre; /*centre of blob */ - int poscount; /*count of best up step */ - int negcount; /*count of best down step */ - float partsteps[MAXPARTS]; /*average step to part */ - float bestneg; /*best down step */ - int runlength; /*length of bad run */ - int biggestrun; /*biggest bad run */ + int blobindex; /*index of blob */ + int partition; /*current partition */ + int xcentre; /*centre of blob */ + int poscount; /*count of best up step */ + int negcount; /*count of best down step */ + float partsteps[MAXPARTS]; /*average step to part */ + float bestneg; /*best down step */ + int runlength; /*length of bad run */ + int biggestrun; /*biggest bad run */ biggestrun = 0; for (partition = 0; partition < partcount; partition++) - partsteps[partition] = 0.0; /*zero accumulators */ + partsteps[partition] = 0.0; /*zero accumulators */ for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) { - xcentre = (blobcoords[blobindex].left () - + blobcoords[blobindex].right ()) >> 1; - /*in other parts */ + xcentre = + (blobcoords[blobindex].left() + blobcoords[blobindex].right()) >> 1; + /*in other parts */ int part_id = static_cast(static_cast(partids[blobindex])); if (part_id != bestpart) { - runlength++; /*run of non bests */ - if (runlength > biggestrun) - biggestrun = runlength; - partsteps[part_id] += blobcoords[blobindex].bottom() - - row->baseline.y(xcentre); - } - else + runlength++; /*run of non bests */ + if (runlength > biggestrun) biggestrun = runlength; + partsteps[part_id] += + blobcoords[blobindex].bottom() - row->baseline.y(xcentre); + } else runlength = 0; } if (biggestrun > MAXBADRUN) - row->xheight = -1.0f; /*failed */ + row->xheight = -1.0f; /*failed */ else - row->xheight = 1.0f; /*success */ + row->xheight = 1.0f; /*success */ poscount = negcount = 0; - bestneg = 0.0; /*no step yet */ + bestneg = 0.0; /*no step yet */ for (partition = 0; partition < partcount; partition++) { if (partition != bestpart) { // by jetsoft divide by zero possible @@ -1368,25 +1278,24 @@ int bestpart /*biggest partition */ partsteps[partition] /= partsizes[partition]; // - if (partsteps[partition] >= MINASCRISE - && partsizes[partition] > poscount) { + if (partsteps[partition] >= MINASCRISE && + partsizes[partition] > poscount) { poscount = partsizes[partition]; } - if (partsteps[partition] <= -MINASCRISE - && partsizes[partition] > negcount) { - /*ascender rise */ + if (partsteps[partition] <= -MINASCRISE && + partsizes[partition] > negcount) { + /*ascender rise */ bestneg = partsteps[partition]; - /*2nd most popular */ + /*2nd most popular */ negcount = partsizes[partition]; } } } - /*average x-height */ + /*average x-height */ partsteps[bestpart] /= blobcount; row->descdrop = bestneg; } - /********************************************************************** * old_first_xheight * @@ -1395,83 +1304,76 @@ int bestpart /*biggest partition */ * It also finds the ascender height if it can. **********************************************************************/ -void -old_first_xheight ( //the wiseowl way -TO_ROW * row, /*current row */ -TBOX blobcoords[], /*blob bounding boxes */ -int initialheight, //initial guess -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline, /*established */ -float jumplimit /*min ascender height */ +void old_first_xheight( // the wiseowl way + TO_ROW* row, /*current row */ + TBOX blobcoords[], /*blob bounding boxes */ + int initialheight, // initial guess + int blobcount, /*blobs in blobcoords */ + QSPLINE* baseline, /*established */ + float jumplimit /*min ascender height */ ) { int blobindex; /*current blob */ /*height statistics */ - STATS heightstat (0, MAXHEIGHT); - int height; /*height of blob */ - int xcentre; /*centre of blob */ - int lineheight; /*approx xheight */ - float ascenders; /*ascender sum */ - int asccount; /*no of ascenders */ - float xsum; /*xheight sum */ - int xcount; /*xheight count */ - float diff; /*height difference */ + STATS heightstat(0, MAXHEIGHT); + int height; /*height of blob */ + int xcentre; /*centre of blob */ + int lineheight; /*approx xheight */ + float ascenders; /*ascender sum */ + int asccount; /*no of ascenders */ + float xsum; /*xheight sum */ + int xcount; /*xheight count */ + float diff; /*height difference */ if (blobcount > 1) { for (blobindex = 0; blobindex < blobcount; blobindex++) { - xcentre = (blobcoords[blobindex].left () - + blobcoords[blobindex].right ()) / 2; - /*height of blob */ - height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5); - if (height > initialheight * oldbl_xhfract - && height > textord_min_xheight) - heightstat.add (height, 1); - } - if (heightstat.get_total () > 3) { - lineheight = (int) heightstat.ile (0.25); - if (lineheight <= 0) - lineheight = (int) heightstat.ile (0.5); + xcentre = + (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2; + /*height of blob */ + height = (int)(blobcoords[blobindex].top() - baseline->y(xcentre) + 0.5); + if (height > initialheight * oldbl_xhfract && + height > textord_min_xheight) + heightstat.add(height, 1); } - else + if (heightstat.get_total() > 3) { + lineheight = (int)heightstat.ile(0.25); + if (lineheight <= 0) lineheight = (int)heightstat.ile(0.5); + } else lineheight = initialheight; - } - else { - lineheight = (int) (blobcoords[0].top () - - baseline->y ((blobcoords[0].left () - + blobcoords[0].right ()) / 2) + - 0.5); + } else { + lineheight = + (int)(blobcoords[0].top() - + baseline->y((blobcoords[0].left() + blobcoords[0].right()) / 2) + + 0.5); } xsum = 0.0f; xcount = 0; for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; - blobindex++) { - xcentre = (blobcoords[blobindex].left () - + blobcoords[blobindex].right ()) / 2; - diff = blobcoords[blobindex].top () - baseline->y (xcentre); - /*is it ascender */ + blobindex++) { + xcentre = + (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2; + diff = blobcoords[blobindex].top() - baseline->y(xcentre); + /*is it ascender */ if (diff > lineheight + jumplimit) { ascenders += diff; - asccount++; /*count ascenders */ - } - else if (diff > lineheight - jumplimit) { - xsum += diff; /*mean xheight */ + asccount++; /*count ascenders */ + } else if (diff > lineheight - jumplimit) { + xsum += diff; /*mean xheight */ xcount++; } } if (xcount > 0) - xsum /= xcount; /*average xheight */ + xsum /= xcount; /*average xheight */ else - xsum = (float) lineheight; /*guess it */ + xsum = (float)lineheight; /*guess it */ row->xheight *= xsum; if (asccount > 0) row->ascrise = ascenders / asccount - xsum; else - row->ascrise = 0.0f; /*had none */ - if (row->xheight == 0) - row->xheight = -1.0f; + row->ascrise = 0.0f; /*had none */ + if (row->xheight == 0) row->xheight = -1.0f; } - /********************************************************************** * make_first_xheight * @@ -1480,26 +1382,25 @@ float jumplimit /*min ascender height */ * It also finds the ascender height if it can. **********************************************************************/ -void -make_first_xheight ( //find xheight -TO_ROW * row, /*current row */ -TBOX blobcoords[], /*blob bounding boxes */ -int lineheight, //initial guess -int init_lineheight, //block level guess -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline, /*established */ -float jumplimit /*min ascender height */ +void make_first_xheight( // find xheight + TO_ROW* row, /*current row */ + TBOX blobcoords[], /*blob bounding boxes */ + int lineheight, // initial guess + int init_lineheight, // block level guess + int blobcount, /*blobs in blobcoords */ + QSPLINE* baseline, /*established */ + float jumplimit /*min ascender height */ ) { - STATS heightstat (0, HEIGHTBUCKETS); + STATS heightstat(0, HEIGHTBUCKETS); int lefts[HEIGHTBUCKETS]; int rights[HEIGHTBUCKETS]; int modelist[MODENUM]; int blobindex; - int mode_count; //blobs to count in thr + int mode_count; // blobs to count in thr int sign_bit; int mode_threshold; - const int kBaselineTouch = 2; // This really should change with resolution. - const int kGoodStrength = 8; // Strength of baseline-touching heights. + const int kBaselineTouch = 2; // This really should change with resolution. + const int kGoodStrength = 8; // Strength of baseline-touching heights. const float kMinHeight = 0.25; // Min fraction of lineheight to use. sign_bit = row->xheight > 0 ? 1 : -1; @@ -1508,20 +1409,19 @@ float jumplimit /*min ascender height */ memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0])); mode_count = 0; for (blobindex = 0; blobindex < blobcount; blobindex++) { - int xcenter = (blobcoords[blobindex].left () + - blobcoords[blobindex].right ()) / 2; + int xcenter = + (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2; float base = baseline->y(xcenter); float bottomdiff = fabs(base - blobcoords[blobindex].bottom()); - int strength = textord_ocropus_mode && - bottomdiff <= kBaselineTouch ? kGoodStrength : 1; - int height = static_cast(blobcoords[blobindex].top () - base + 0.5); - if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) { - if (height > lineheight * oldbl_xhfract - && height > textord_min_xheight) { - heightstat.add (height, strength); + int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch + ? kGoodStrength + : 1; + int height = static_cast(blobcoords[blobindex].top() - base + 0.5); + if (blobcoords[blobindex].height() > init_lineheight * kMinHeight) { + if (height > lineheight * oldbl_xhfract && height > textord_min_xheight) { + heightstat.add(height, strength); if (height < HEIGHTBUCKETS) { - if (xcenter > rights[height]) - rights[height] = xcenter; + if (xcenter > rights[height]) rights[height] = xcenter; if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) lefts[height] = xcenter; } @@ -1530,29 +1430,27 @@ float jumplimit /*min ascender height */ } } - mode_threshold = (int) (blobcount * 0.1); + mode_threshold = (int)(blobcount * 0.1); if (oldbl_dot_error_size > 1 || oldbl_xhfix) - mode_threshold = (int) (mode_count * 0.1); + mode_threshold = (int)(mode_count * 0.1); if (textord_oldbl_debug) { - tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n", - blobcount, mode_count, mode_threshold); + tprintf("blobcount=%d, mode_count=%d, mode_t=%d\n", blobcount, mode_count, + mode_threshold); } find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM); if (textord_oldbl_debug) { for (blobindex = 0; blobindex < MODENUM; blobindex++) - tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]); - tprintf ("\n"); + tprintf("mode[%d]=%d ", blobindex, modelist[blobindex]); + tprintf("\n"); } pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold); - if (textord_oldbl_debug) - tprintf ("Output xheight=%g\n", row->xheight); + if (textord_oldbl_debug) tprintf("Output xheight=%g\n", row->xheight); if (row->xheight < 0 && textord_oldbl_debug) - tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight); + tprintf("warning: Row Line height < 0; %4.2f\n", row->xheight); - if (sign_bit < 0) - row->xheight = -row->xheight; + if (sign_bit < 0) row->xheight = -row->xheight; } /********************************************************************** @@ -1565,11 +1463,11 @@ float jumplimit /*min ascender height */ const int kMinModeFactorOcropus = 32; const int kMinModeFactor = 12; -void -find_top_modes ( //get modes -STATS * stats, //stats to hack -int statnum, //no of piles -int modelist[], int modenum //no of modes to get +void find_top_modes( // get modes + STATS* stats, // stats to hack + int statnum, // no of piles + int modelist[], + int modenum // no of modes to get ) { int mode_count; int last_i = 0; @@ -1577,39 +1475,35 @@ int modelist[], int modenum //no of modes to get int i; int mode; int total_max = 0; - int mode_factor = textord_ocropus_mode ? - kMinModeFactorOcropus : kMinModeFactor; + int mode_factor = + textord_ocropus_mode ? kMinModeFactorOcropus : kMinModeFactor; for (mode_count = 0; mode_count < modenum; mode_count++) { mode = 0; for (i = 0; i < statnum; i++) { - if (stats->pile_count (i) > stats->pile_count (mode)) { - if ((stats->pile_count (i) < last_max) || - ((stats->pile_count (i) == last_max) && (i > last_i))) { + if (stats->pile_count(i) > stats->pile_count(mode)) { + if ((stats->pile_count(i) < last_max) || + ((stats->pile_count(i) == last_max) && (i > last_i))) { mode = i; } } } last_i = mode; - last_max = stats->pile_count (last_i); + last_max = stats->pile_count(last_i); total_max += last_max; - if (last_max <= total_max / mode_factor) - mode = 0; + if (last_max <= total_max / mode_factor) mode = 0; modelist[mode_count] = mode; } } - /********************************************************************** * pick_x_height * * Choose based on the height modes the best x height value. **********************************************************************/ -void pick_x_height(TO_ROW * row, //row to do - int modelist[], - int lefts[], int rights[], - STATS * heightstat, +void pick_x_height(TO_ROW* row, // row to do + int modelist[], int lefts[], int rights[], STATS* heightstat, int mode_threshold) { int x; int y; @@ -1624,15 +1518,15 @@ void pick_x_height(TO_ROW * row, //row to do for (y = 0; y < MODENUM; y++) { /* Check for two modes */ if (modelist[x] && modelist[y] && - heightstat->pile_count (modelist[x]) > mode_threshold && + heightstat->pile_count(modelist[x]) > mode_threshold && (!textord_ocropus_mode || - std::min(rights[modelist[x]], rights[modelist[y]]) > - std::max(lefts[modelist[x]], lefts[modelist[y]]))) { - ratio = (float) modelist[y] / (float) modelist[x]; + std::min(rights[modelist[x]], rights[modelist[y]]) > + std::max(lefts[modelist[x]], lefts[modelist[y]]))) { + ratio = (float)modelist[y] / (float)modelist[x]; if (1.2 < ratio && ratio < 1.8) { /* Two modes found */ best_x_height = modelist[x]; - num_in_best = heightstat->pile_count (modelist[x]); + num_in_best = heightstat->pile_count(modelist[x]); /* Try to get one higher */ do { @@ -1640,26 +1534,24 @@ void pick_x_height(TO_ROW * row, //row to do for (z = 0; z < MODENUM; z++) { if (modelist[z] == best_x_height + 1 && (!textord_ocropus_mode || - std::min(rights[modelist[x]], rights[modelist[y]]) > - std::max(lefts[modelist[x]], lefts[modelist[y]]))) { - ratio = (float) modelist[y] / (float) modelist[z]; + std::min(rights[modelist[x]], rights[modelist[y]]) > + std::max(lefts[modelist[x]], lefts[modelist[y]]))) { + ratio = (float)modelist[y] / (float)modelist[z]; if ((1.2 < ratio && ratio < 1.8) && - /* Should be half of best */ - heightstat->pile_count (modelist[z]) > - num_in_best * 0.5) { + /* Should be half of best */ + heightstat->pile_count(modelist[z]) > num_in_best * 0.5) { best_x_height++; found_one_bigger = TRUE; break; } } } - } - while (found_one_bigger); + } while (found_one_bigger); /* try to get a higher ascender */ best_asc = modelist[y]; - num_in_best = heightstat->pile_count (modelist[y]); + num_in_best = heightstat->pile_count(modelist[y]); /* Try to get one higher */ do { @@ -1667,49 +1559,45 @@ void pick_x_height(TO_ROW * row, //row to do for (z = 0; z < MODENUM; z++) { if (modelist[z] > best_asc && (!textord_ocropus_mode || - std::min(rights[modelist[x]], rights[modelist[y]]) > - std::max(lefts[modelist[x]], lefts[modelist[y]]))) { - ratio = (float) modelist[z] / (float) best_x_height; + std::min(rights[modelist[x]], rights[modelist[y]]) > + std::max(lefts[modelist[x]], lefts[modelist[y]]))) { + ratio = (float)modelist[z] / (float)best_x_height; if ((1.2 < ratio && ratio < 1.8) && - /* Should be half of best */ - heightstat->pile_count (modelist[z]) > - num_in_best * 0.5) { + /* Should be half of best */ + heightstat->pile_count(modelist[z]) > num_in_best * 0.5) { best_asc = modelist[z]; found_one_bigger = TRUE; break; } } } - } - while (found_one_bigger); + } while (found_one_bigger); - row->xheight = (float) best_x_height; - row->ascrise = (float) best_asc - best_x_height; + row->xheight = (float)best_x_height; + row->ascrise = (float)best_asc - best_x_height; return; } } } } - best_x_height = modelist[0]; /* Single Mode found */ - num_in_best = heightstat->pile_count (best_x_height); + best_x_height = modelist[0]; /* Single Mode found */ + num_in_best = heightstat->pile_count(best_x_height); do { - /* Try to get one higher */ + /* Try to get one higher */ found_one_bigger = FALSE; for (z = 1; z < MODENUM; z++) { /* Should be half of best */ if ((modelist[z] == best_x_height + 1) && - (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) { + (heightstat->pile_count(modelist[z]) > num_in_best * 0.5)) { best_x_height++; found_one_bigger = TRUE; break; } } - } - while (found_one_bigger); + } while (found_one_bigger); row->ascrise = 0.0f; - row->xheight = (float) best_x_height; - if (row->xheight == 0) - row->xheight = -1.0f; + row->xheight = (float)best_x_height; + if (row->xheight == 0) row->xheight = -1.0f; } diff --git a/src/textord/oldbasel.h b/src/textord/oldbasel.h index 74117777eb..08863317f6 100644 --- a/src/textord/oldbasel.h +++ b/src/textord/oldbasel.h @@ -17,159 +17,153 @@ * **********************************************************************/ -#ifndef OLDBASEL_H -#define OLDBASEL_H +#ifndef OLDBASEL_H +#define OLDBASEL_H -#include "params.h" -#include "blobbox.h" +#include "blobbox.h" +#include "params.h" -extern BOOL_VAR_H (textord_really_old_xheight, FALSE, -"Use original wiseowl xheight"); -extern BOOL_VAR_H (textord_oldbl_debug, FALSE, -"Debug old baseline generation"); -extern BOOL_VAR_H (textord_debug_baselines, FALSE, -"Debug baseline generation"); -extern BOOL_VAR_H (textord_oldbl_paradef, TRUE, "Use para default mechanism"); -extern BOOL_VAR_H (textord_oldbl_split_splines, TRUE, -"Split stepped splines"); -extern BOOL_VAR_H (textord_oldbl_merge_parts, TRUE, -"Merge suspect partitions"); -extern BOOL_VAR_H (oldbl_xhfix, FALSE, -"Fix bug in modes threshold for xheights"); -extern INT_VAR_H (oldbl_holed_losscount, 10, -"Max lost before fallback line used"); -extern double_VAR_H (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); -extern double_VAR_H (textord_oldbl_jumplimit, 0.15, -"X fraction for new partition"); -int get_blob_coords( //get boxes - TO_ROW* row, //row to use - int32_t lineheight, //block level - TBOX* blobcoords, //ouput boxes - bool& holed_line, //lost a lot of blobs - int& outcount //no of real blobs -); -void make_first_baseline ( //initial approximation -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*no of blobcoords */ -int xcoords[], /*coords for spline */ -int ycoords[], /*approximator */ -QSPLINE * spline, /*initial spline */ -QSPLINE * baseline, /*output spline */ -float jumplimit /*guess half descenders */ -); -void make_holed_baseline ( //initial approximation -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*no of blobcoords */ -QSPLINE * spline, /*initial spline */ -QSPLINE * baseline, /*output spline */ -float gradient //of line -); -int partition_line ( //partition blobs -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs on row */ -int *numparts, /*number of partitions */ -char partids[], /*partition no of each blob */ -int partsizes[], /*no in each partition */ -QSPLINE * spline, /*curve to fit to */ -float jumplimit, /*allowed delta change */ -float ydiffs[] /*diff from spline */ -); -void merge_oldbl_parts ( //partition blobs -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs on row */ -char partids[], /*partition no of each blob */ -int partsizes[], /*no in each partition */ -int biggestpart, //major partition -float jumplimit /*allowed delta change */ -); -int get_ydiffs ( //evaluate differences -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs */ -QSPLINE * spline, /*approximating spline */ -float ydiffs[] /*output */ -); -int choose_partition ( //select partition -float diff, /*diff from spline */ -float partdiffs[], /*diff on all parts */ -int lastpart, /*last assigned partition */ -float jumplimit, /*new part threshold */ -float* drift, -float* last_delta, -int *partcount /*no of partitions */ -); -int partition_coords ( //find relevant coords -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs in row */ -char partids[], /*partition no of each blob */ -int bestpart, /*best new partition */ -int xcoords[], /*points to work on */ -int ycoords[] /*points to work on */ -); -int segment_spline ( //make xstarts -TBOX blobcoords[], //boundign boxes -int blobcount, /*no of blobs in row */ -int xcoords[], /*points to work on */ -int ycoords[], /*points to work on */ -int degree, int pointcount, /*no of points */ -int xstarts[] //result -); -bool split_stepped_spline( //make xstarts - QSPLINE* baseline, //current shot - float jumplimit, //max step fuction - int* xcoords, /*points to work on */ - int* xstarts, //result - int& segments //no of segments -); -void insert_spline_point ( //get descenders -int xstarts[], //starts to shuffle -int segment, //insertion pt -int coord1, //coords to add -int coord2, int &segments //total segments -); -void find_lesser_parts ( //get descenders -TO_ROW * row, //row to process -TBOX blobcoords[], //bounding boxes -int blobcount, /*no of blobs */ -char partids[], /*partition of each blob */ -int partsizes[], /*size of each part */ -int partcount, /*no of partitions */ -int bestpart /*biggest partition */ +extern BOOL_VAR_H(textord_really_old_xheight, FALSE, + "Use original wiseowl xheight"); +extern BOOL_VAR_H(textord_oldbl_debug, FALSE, "Debug old baseline generation"); +extern BOOL_VAR_H(textord_debug_baselines, FALSE, "Debug baseline generation"); +extern BOOL_VAR_H(textord_oldbl_paradef, TRUE, "Use para default mechanism"); +extern BOOL_VAR_H(textord_oldbl_split_splines, TRUE, "Split stepped splines"); +extern BOOL_VAR_H(textord_oldbl_merge_parts, TRUE, "Merge suspect partitions"); +extern BOOL_VAR_H(oldbl_xhfix, FALSE, + "Fix bug in modes threshold for xheights"); +extern INT_VAR_H(oldbl_holed_losscount, 10, + "Max lost before fallback line used"); +extern double_VAR_H(oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); +extern double_VAR_H(textord_oldbl_jumplimit, 0.15, + "X fraction for new partition"); +int get_blob_coords( // get boxes + TO_ROW* row, // row to use + int32_t lineheight, // block level + TBOX* blobcoords, // ouput boxes + bool& holed_line, // lost a lot of blobs + int& outcount // no of real blobs +); +void make_first_baseline( // initial approximation + TBOX blobcoords[], /*blob bounding boxes */ + int blobcount, /*no of blobcoords */ + int xcoords[], /*coords for spline */ + int ycoords[], /*approximator */ + QSPLINE* spline, /*initial spline */ + QSPLINE* baseline, /*output spline */ + float jumplimit /*guess half descenders */ +); +void make_holed_baseline( // initial approximation + TBOX blobcoords[], /*blob bounding boxes */ + int blobcount, /*no of blobcoords */ + QSPLINE* spline, /*initial spline */ + QSPLINE* baseline, /*output spline */ + float gradient // of line +); +int partition_line( // partition blobs + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs on row */ + int* numparts, /*number of partitions */ + char partids[], /*partition no of each blob */ + int partsizes[], /*no in each partition */ + QSPLINE* spline, /*curve to fit to */ + float jumplimit, /*allowed delta change */ + float ydiffs[] /*diff from spline */ +); +void merge_oldbl_parts( // partition blobs + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs on row */ + char partids[], /*partition no of each blob */ + int partsizes[], /*no in each partition */ + int biggestpart, // major partition + float jumplimit /*allowed delta change */ +); +int get_ydiffs( // evaluate differences + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs */ + QSPLINE* spline, /*approximating spline */ + float ydiffs[] /*output */ +); +int choose_partition( // select partition + float diff, /*diff from spline */ + float partdiffs[], /*diff on all parts */ + int lastpart, /*last assigned partition */ + float jumplimit, /*new part threshold */ + float* drift, float* last_delta, int* partcount /*no of partitions */ +); +int partition_coords( // find relevant coords + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs in row */ + char partids[], /*partition no of each blob */ + int bestpart, /*best new partition */ + int xcoords[], /*points to work on */ + int ycoords[] /*points to work on */ +); +int segment_spline( // make xstarts + TBOX blobcoords[], // boundign boxes + int blobcount, /*no of blobs in row */ + int xcoords[], /*points to work on */ + int ycoords[], /*points to work on */ + int degree, int pointcount, /*no of points */ + int xstarts[] // result +); +bool split_stepped_spline( // make xstarts + QSPLINE* baseline, // current shot + float jumplimit, // max step fuction + int* xcoords, /*points to work on */ + int* xstarts, // result + int& segments // no of segments +); +void insert_spline_point( // get descenders + int xstarts[], // starts to shuffle + int segment, // insertion pt + int coord1, // coords to add + int coord2, + int& segments // total segments +); +void find_lesser_parts( // get descenders + TO_ROW* row, // row to process + TBOX blobcoords[], // bounding boxes + int blobcount, /*no of blobs */ + char partids[], /*partition of each blob */ + int partsizes[], /*size of each part */ + int partcount, /*no of partitions */ + int bestpart /*biggest partition */ ); -void old_first_xheight ( //the wiseowl way -TO_ROW * row, /*current row */ -TBOX blobcoords[], /*blob bounding boxes */ -int initialheight, //initial guess -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline, /*established */ -float jumplimit /*min ascender height */ +void old_first_xheight( // the wiseowl way + TO_ROW* row, /*current row */ + TBOX blobcoords[], /*blob bounding boxes */ + int initialheight, // initial guess + int blobcount, /*blobs in blobcoords */ + QSPLINE* baseline, /*established */ + float jumplimit /*min ascender height */ ); -void make_first_xheight ( //find xheight -TO_ROW * row, /*current row */ -TBOX blobcoords[], /*blob bounding boxes */ -int lineheight, //initial guess -int init_lineheight, //block level guess -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline, /*established */ -float jumplimit /*min ascender height */ +void make_first_xheight( // find xheight + TO_ROW* row, /*current row */ + TBOX blobcoords[], /*blob bounding boxes */ + int lineheight, // initial guess + int init_lineheight, // block level guess + int blobcount, /*blobs in blobcoords */ + QSPLINE* baseline, /*established */ + float jumplimit /*min ascender height */ ); -int *make_height_array ( //get array of heights -TBOX blobcoords[], /*blob bounding boxes */ -int blobcount, /*blobs in blobcoords */ -QSPLINE * baseline /*established */ +int* make_height_array( // get array of heights + TBOX blobcoords[], /*blob bounding boxes */ + int blobcount, /*blobs in blobcoords */ + QSPLINE* baseline /*established */ ); -void find_top_modes ( //get modes -STATS * stats, //stats to hack -int statnum, //no of piles -int modelist[], int modenum //no of modes to get +void find_top_modes( // get modes + STATS* stats, // stats to hack + int statnum, // no of piles + int modelist[], + int modenum // no of modes to get ); -void pick_x_height(TO_ROW * row, //row to do -int modelist[], -int lefts[], int rights[], -STATS * heightstat, -int mode_threshold); +void pick_x_height(TO_ROW* row, // row to do + int modelist[], int lefts[], int rights[], STATS* heightstat, + int mode_threshold); #endif diff --git a/src/textord/pithsync.cpp b/src/textord/pithsync.cpp index 99b8996858..a39064ddb5 100644 --- a/src/textord/pithsync.cpp +++ b/src/textord/pithsync.cpp @@ -18,17 +18,17 @@ **********************************************************************/ #ifdef __UNIX__ -#include +#include #endif -#include -#include "memry.h" -#include "makerow.h" -#include "pitsync1.h" -#include "topitch.h" -#include "pithsync.h" -#include "tprintf.h" +#include +#include "makerow.h" +#include "memry.h" +#include "pithsync.h" +#include "pitsync1.h" +#include "topitch.h" +#include "tprintf.h" -#define PROJECTION_MARGIN 10 //arbitrary +#define PROJECTION_MARGIN 10 // arbitrary #define EXTERN @@ -38,19 +38,19 @@ * Constructor to make a new FPCUTPT. **********************************************************************/ -void FPCUTPT::setup( //constructor - FPCUTPT *cutpts, //predecessors - int16_t array_origin, //start coord - STATS *projection, //vertical occupation - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t x, //position - int16_t offset //dist to gap - ) { - //half of pitch +void FPCUTPT::setup( // constructor + FPCUTPT* cutpts, // predecessors + int16_t array_origin, // start coord + STATS* projection, // vertical occupation + int16_t zero_count, // official zero + int16_t pitch, // proposed pitch + int16_t x, // position + int16_t offset // dist to gap +) { + // half of pitch int16_t half_pitch = pitch / 2 - 1; - uint32_t lead_flag; //new flag - int32_t ind; //current position + uint32_t lead_flag; // new flag + int32_t ind; // current position if (half_pitch > 31) half_pitch = 31; @@ -73,54 +73,50 @@ void FPCUTPT::setup( //constructor fwd_balance = 0; for (ind = 0; ind <= half_pitch; ind++) { fwd_balance >>= 1; - if (projection->pile_count (ind) > zero_count) - fwd_balance |= lead_flag; + if (projection->pile_count(ind) > zero_count) fwd_balance |= lead_flag; } - } - else { + } else { back_balance = cutpts[x - 1 - array_origin].back_balance << 1; back_balance &= lead_flag + lead_flag - 1; - if (projection->pile_count (x) > zero_count) - back_balance |= 1; + if (projection->pile_count(x) > zero_count) back_balance |= 1; fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; - if (projection->pile_count (x + half_pitch) > zero_count) + if (projection->pile_count(x + half_pitch) > zero_count) fwd_balance |= lead_flag; } } - /********************************************************************** * FPCUTPT::assign * * Constructor to make a new FPCUTPT. **********************************************************************/ -void FPCUTPT::assign( //constructor - FPCUTPT* cutpts, //predecessors - int16_t array_origin, //start coord - int16_t x, //position - bool faking, //faking this one - bool mid_cut, //cheap cut. - int16_t offset, //dist to gap - STATS* projection, //vertical occupation - float projection_scale, //scaling - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t pitch_error //allowed tolerance +void FPCUTPT::assign( // constructor + FPCUTPT* cutpts, // predecessors + int16_t array_origin, // start coord + int16_t x, // position + bool faking, // faking this one + bool mid_cut, // cheap cut. + int16_t offset, // dist to gap + STATS* projection, // vertical occupation + float projection_scale, // scaling + int16_t zero_count, // official zero + int16_t pitch, // proposed pitch + int16_t pitch_error // allowed tolerance ) { - int index; //test index - int balance_index; //for balance factor - int16_t balance_count; //ding factor - int16_t r_index; //test cut number - FPCUTPT *segpt; //segment point - int32_t dist; //from prev segment - double sq_dist; //squared distance - double mean; //mean pitch - double total; //total dists - double factor; //cost function - //half of pitch + int index; // test index + int balance_index; // for balance factor + int16_t balance_count; // ding factor + int16_t r_index; // test cut number + FPCUTPT* segpt; // segment point + int32_t dist; // from prev segment + double sq_dist; // squared distance + double mean; // mean pitch + double total; // total dists + double factor; // cost function + // half of pitch int16_t half_pitch = pitch / 2 - 1; - uint32_t lead_flag; //new flag + uint32_t lead_flag; // new flag if (half_pitch > 31) half_pitch = 31; @@ -130,10 +126,9 @@ void FPCUTPT::assign( //constructor back_balance = cutpts[x - 1 - array_origin].back_balance << 1; back_balance &= lead_flag + lead_flag - 1; - if (projection->pile_count (x) > zero_count) - back_balance |= 1; + if (projection->pile_count(x) > zero_count) back_balance |= 1; fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; - if (projection->pile_count (x + half_pitch) > zero_count) + if (projection->pile_count(x + half_pitch) > zero_count) fwd_balance |= lead_flag; xpos = x; @@ -144,7 +139,7 @@ void FPCUTPT::assign( //constructor region_index = 0; fake_count = INT16_MAX; for (index = x - pitch - pitch_error; index <= x - pitch + pitch_error; - index++) { + index++) { if (index >= array_origin) { segpt = &cutpts[index - array_origin]; dist = x - segpt->xpos; @@ -158,33 +153,28 @@ void FPCUTPT::assign( //constructor balance_count++; lead_flag &= lead_flag - 1; } + } else { + for (balance_index = 0; index + balance_index < x - balance_index; + balance_index++) + balance_count += + (projection->pile_count(index + balance_index) <= + zero_count) ^ + (projection->pile_count(x - balance_index) <= zero_count); } - else { - for (balance_index = 0; - index + balance_index < x - balance_index; - balance_index++) - balance_count += - (projection->pile_count (index + balance_index) <= - zero_count) ^ (projection->pile_count (x - - balance_index) - <= zero_count); - } - balance_count = - (int16_t) (balance_count * textord_balance_factor / - projection_scale); + balance_count = (int16_t)(balance_count * textord_balance_factor / + projection_scale); } r_index = segpt->region_index + 1; total = segpt->mean_sum + dist; balance_count += offset; - sq_dist = - dist * dist + segpt->sq_sum + balance_count * balance_count; + sq_dist = dist * dist + segpt->sq_sum + balance_count * balance_count; mean = total / r_index; factor = mean - pitch; factor *= factor; - factor += sq_dist / (r_index) - mean * mean; + factor += sq_dist / (r_index)-mean * mean; if (factor < cost && segpt->fake_count + faked <= fake_count) { - cost = factor; //find least cost - pred = segpt; //save path + cost = factor; // find least cost + pred = segpt; // save path mean_sum = total; sq_sum = sq_dist; fake_count = segpt->fake_count + faked; @@ -196,38 +186,37 @@ void FPCUTPT::assign( //constructor } } - /********************************************************************** * FPCUTPT::assign_cheap * * Constructor to make a new FPCUTPT on the cheap. **********************************************************************/ -void FPCUTPT::assign_cheap( //constructor - FPCUTPT *cutpts, //predecessors - int16_t array_origin, //start coord - int16_t x, //position - BOOL8 faking, //faking this one - BOOL8 mid_cut, //cheap cut. - int16_t offset, //dist to gap - STATS *projection, //vertical occupation - float projection_scale, //scaling - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t pitch_error //allowed tolerance - ) { - int index; //test index - int16_t balance_count; //ding factor - int16_t r_index; //test cut number - FPCUTPT *segpt; //segment point - int32_t dist; //from prev segment - double sq_dist; //squared distance - double mean; //mean pitch - double total; //total dists - double factor; //cost function - //half of pitch +void FPCUTPT::assign_cheap( // constructor + FPCUTPT* cutpts, // predecessors + int16_t array_origin, // start coord + int16_t x, // position + BOOL8 faking, // faking this one + BOOL8 mid_cut, // cheap cut. + int16_t offset, // dist to gap + STATS* projection, // vertical occupation + float projection_scale, // scaling + int16_t zero_count, // official zero + int16_t pitch, // proposed pitch + int16_t pitch_error // allowed tolerance +) { + int index; // test index + int16_t balance_count; // ding factor + int16_t r_index; // test cut number + FPCUTPT* segpt; // segment point + int32_t dist; // from prev segment + double sq_dist; // squared distance + double mean; // mean pitch + double total; // total dists + double factor; // cost function + // half of pitch int16_t half_pitch = pitch / 2 - 1; - uint32_t lead_flag; //new flag + uint32_t lead_flag; // new flag if (half_pitch > 31) half_pitch = 31; @@ -237,10 +226,9 @@ void FPCUTPT::assign_cheap( //constructor back_balance = cutpts[x - 1 - array_origin].back_balance << 1; back_balance &= lead_flag + lead_flag - 1; - if (projection->pile_count (x) > zero_count) - back_balance |= 1; + if (projection->pile_count(x) > zero_count) back_balance |= 1; fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; - if (projection->pile_count (x + half_pitch) > zero_count) + if (projection->pile_count(x + half_pitch) > zero_count) fwd_balance |= lead_flag; xpos = x; @@ -263,20 +251,19 @@ void FPCUTPT::assign_cheap( //constructor balance_count++; lead_flag &= lead_flag - 1; } - balance_count = (int16_t) (balance_count * textord_balance_factor - / projection_scale); + balance_count = (int16_t)(balance_count * textord_balance_factor / + projection_scale); } r_index = segpt->region_index + 1; total = segpt->mean_sum + dist; balance_count += offset; - sq_dist = - dist * dist + segpt->sq_sum + balance_count * balance_count; + sq_dist = dist * dist + segpt->sq_sum + balance_count * balance_count; mean = total / r_index; factor = mean - pitch; factor *= factor; - factor += sq_dist / (r_index) - mean * mean; - cost = factor; //find least cost - pred = segpt; //save path + factor += sq_dist / (r_index)-mean * mean; + cost = factor; // find least cost + pred = segpt; // save path mean_sum = total; sq_sum = sq_dist; fake_count = segpt->fake_count + faked; @@ -286,7 +273,6 @@ void FPCUTPT::assign_cheap( //constructor } } - /********************************************************************** * check_pitch_sync * @@ -295,122 +281,119 @@ void FPCUTPT::assign_cheap( //constructor * The return value is a measure of goodness of the sync. **********************************************************************/ -double check_pitch_sync2( //find segmentation - BLOBNBOX_IT *blob_it, //blobs to do - int16_t blob_count, //no of blobs - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - int16_t projection_left, //edges //scale factor - int16_t projection_right, - float projection_scale, - int16_t &occupation_count, //no of occupied cells - FPSEGPT_LIST *seg_list, //output list - int16_t start, //start of good range - int16_t end //end of good range - ) { - bool faking; //illegal cut pt - bool mid_cut; //cheap cut pt. - int16_t x; //current coord - int16_t blob_index; //blob number - int16_t left_edge; //of word - int16_t right_edge; //of word - int16_t array_origin; //x coord of array - int16_t offset; //dist to legal area - int16_t zero_count; //projection zero - int16_t best_left_x = 0; //for equals - int16_t best_right_x = 0; //right edge - TBOX this_box; //bounding box - TBOX next_box; //box of next blob - FPSEGPT *segpt; //segment point - FPCUTPT *cutpts; //array of points - double best_cost; //best path - double mean_sum; //computes result - FPCUTPT *best_end; //end of best path - int16_t best_fake; //best fake level - int16_t best_count; //no of cuts - BLOBNBOX_IT this_it; //copy iterator - FPSEGPT_IT seg_it = seg_list; //output iterator +double check_pitch_sync2( // find segmentation + BLOBNBOX_IT* blob_it, // blobs to do + int16_t blob_count, // no of blobs + int16_t pitch, // pitch estimate + int16_t pitch_error, // tolerance + STATS* projection, // vertical + int16_t projection_left, // edges //scale factor + int16_t projection_right, float projection_scale, + int16_t& occupation_count, // no of occupied cells + FPSEGPT_LIST* seg_list, // output list + int16_t start, // start of good range + int16_t end // end of good range +) { + bool faking; // illegal cut pt + bool mid_cut; // cheap cut pt. + int16_t x; // current coord + int16_t blob_index; // blob number + int16_t left_edge; // of word + int16_t right_edge; // of word + int16_t array_origin; // x coord of array + int16_t offset; // dist to legal area + int16_t zero_count; // projection zero + int16_t best_left_x = 0; // for equals + int16_t best_right_x = 0; // right edge + TBOX this_box; // bounding box + TBOX next_box; // box of next blob + FPSEGPT* segpt; // segment point + FPCUTPT* cutpts; // array of points + double best_cost; // best path + double mean_sum; // computes result + FPCUTPT* best_end; // end of best path + int16_t best_fake; // best fake level + int16_t best_count; // no of cuts + BLOBNBOX_IT this_it; // copy iterator + FPSEGPT_IT seg_it = seg_list; // output iterator // tprintf("Computing sync on word of %d blobs with pitch %d\n", // blob_count, pitch); // if (blob_count==8 && pitch==27) // projection->print(stdout,TRUE); zero_count = 0; - if (pitch < 3) - pitch = 3; //nothing ludicrous - if ((pitch - 3) / 2 < pitch_error) - pitch_error = (pitch - 3) / 2; + if (pitch < 3) pitch = 3; // nothing ludicrous + if ((pitch - 3) / 2 < pitch_error) pitch_error = (pitch - 3) / 2; this_it = *blob_it; - this_box = box_next (&this_it);//get box - // left_edge=this_box.left(); //left of word - // right_edge=this_box.right(); + this_box = box_next(&this_it); // get box + // left_edge=this_box.left(); //left of word right_edge=this_box.right(); // for (blob_index=1;blob_indexright_edge) // right_edge=this_box.right(); // } - for (left_edge = projection_left; projection->pile_count (left_edge) == 0 - && left_edge < projection_right; left_edge++); - for (right_edge = projection_right; projection->pile_count (right_edge) == 0 - && right_edge > left_edge; right_edge--); - ASSERT_HOST (right_edge >= left_edge); + for (left_edge = projection_left; + projection->pile_count(left_edge) == 0 && left_edge < projection_right; + left_edge++) + ; + for (right_edge = projection_right; + projection->pile_count(right_edge) == 0 && right_edge > left_edge; + right_edge--) + ; + ASSERT_HOST(right_edge >= left_edge); if (pitsync_linear_version >= 4) - return check_pitch_sync3 (projection_left, projection_right, zero_count, - pitch, pitch_error, projection, - projection_scale, occupation_count, seg_list, - start, end); + return check_pitch_sync3(projection_left, projection_right, zero_count, + pitch, pitch_error, projection, projection_scale, + occupation_count, seg_list, start, end); array_origin = left_edge - pitch; - cutpts = (FPCUTPT *) alloc_mem ((right_edge - left_edge + pitch * 2 + 1) - * sizeof (FPCUTPT)); + cutpts = (FPCUTPT*)alloc_mem((right_edge - left_edge + pitch * 2 + 1) * + sizeof(FPCUTPT)); for (x = array_origin; x < left_edge; x++) - //free cuts - cutpts[x - array_origin].setup (cutpts, array_origin, projection, zero_count, pitch, x, 0); + // free cuts + cutpts[x - array_origin].setup(cutpts, array_origin, projection, zero_count, + pitch, x, 0); for (offset = 0; offset <= pitch_error; offset++, x++) - //not quite free - cutpts[x - array_origin].setup (cutpts, array_origin, projection, zero_count, pitch, x, offset); + // not quite free + cutpts[x - array_origin].setup(cutpts, array_origin, projection, zero_count, + pitch, x, offset); this_it = *blob_it; best_cost = MAX_FLOAT32; best_end = nullptr; - this_box = box_next (&this_it);//first box - next_box = box_next (&this_it);//second box + this_box = box_next(&this_it); // first box + next_box = box_next(&this_it); // second box blob_index = 1; while (x < right_edge - pitch_error) { - if (x > this_box.right () + pitch_error && blob_index < blob_count) { + if (x > this_box.right() + pitch_error && blob_index < blob_count) { this_box = next_box; - next_box = box_next (&this_it); + next_box = box_next(&this_it); blob_index++; } faking = false; mid_cut = false; - if (x <= this_box.left ()) + if (x <= this_box.left()) offset = 0; - else if (x <= this_box.left () + pitch_error) - offset = x - this_box.left (); - else if (x >= this_box.right ()) + else if (x <= this_box.left() + pitch_error) + offset = x - this_box.left(); + else if (x >= this_box.right()) offset = 0; - else if (x >= next_box.left () && blob_index < blob_count) { - offset = x - next_box.left (); - if (this_box.right () - x < offset) - offset = this_box.right () - x; - } - else if (x >= this_box.right () - pitch_error) - offset = this_box.right () - x; - else if (x - this_box.left () > pitch * pitsync_joined_edge - && this_box.right () - x > pitch * pitsync_joined_edge) { + else if (x >= next_box.left() && blob_index < blob_count) { + offset = x - next_box.left(); + if (this_box.right() - x < offset) offset = this_box.right() - x; + } else if (x >= this_box.right() - pitch_error) + offset = this_box.right() - x; + else if (x - this_box.left() > pitch * pitsync_joined_edge && + this_box.right() - x > pitch * pitsync_joined_edge) { mid_cut = true; offset = 0; - } - else { + } else { faking = true; - offset = projection->pile_count (x); + offset = projection->pile_count(x); } - cutpts[x - array_origin].assign (cutpts, array_origin, x, - faking, mid_cut, offset, projection, - projection_scale, zero_count, pitch, - pitch_error); + cutpts[x - array_origin].assign(cutpts, array_origin, x, faking, mid_cut, + offset, projection, projection_scale, + zero_count, pitch, pitch_error); x++; } @@ -419,71 +402,68 @@ double check_pitch_sync2( //find segmentation best_count = INT16_MAX; while (x < right_edge + pitch) { offset = x < right_edge ? right_edge - x : 0; - cutpts[x - array_origin].assign (cutpts, array_origin, x, - false, false, offset, projection, - projection_scale, zero_count, pitch, - pitch_error); + cutpts[x - array_origin].assign(cutpts, array_origin, x, false, false, + offset, projection, projection_scale, + zero_count, pitch, pitch_error); cutpts[x - array_origin].terminal = true; - if (cutpts[x - array_origin].index () + - cutpts[x - array_origin].fake_count <= best_count + best_fake) { - if (cutpts[x - array_origin].fake_count < best_fake - || (cutpts[x - array_origin].fake_count == best_fake - && cutpts[x - array_origin].cost_function () < best_cost)) { + if (cutpts[x - array_origin].index() + + cutpts[x - array_origin].fake_count <= + best_count + best_fake) { + if (cutpts[x - array_origin].fake_count < best_fake || + (cutpts[x - array_origin].fake_count == best_fake && + cutpts[x - array_origin].cost_function() < best_cost)) { best_fake = cutpts[x - array_origin].fake_count; - best_cost = cutpts[x - array_origin].cost_function (); + best_cost = cutpts[x - array_origin].cost_function(); best_left_x = x; best_right_x = x; - best_count = cutpts[x - array_origin].index (); - } - else if (cutpts[x - array_origin].fake_count == best_fake - && x == best_right_x + 1 - && cutpts[x - array_origin].cost_function () == best_cost) { - //exactly equal + best_count = cutpts[x - array_origin].index(); + } else if (cutpts[x - array_origin].fake_count == best_fake && + x == best_right_x + 1 && + cutpts[x - array_origin].cost_function() == best_cost) { + // exactly equal best_right_x = x; } } x++; } - ASSERT_HOST (best_fake < INT16_MAX); + ASSERT_HOST(best_fake < INT16_MAX); best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin]; - if (this_box.right () == textord_test_x - && this_box.top () == textord_test_y) { + if (this_box.right() == textord_test_x && this_box.top() == textord_test_y) { for (x = left_edge - pitch; x < right_edge + pitch; x++) { - tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", - x, cutpts[x - array_origin].cost_function (), - cutpts[x - array_origin].sum (), - cutpts[x - array_origin].squares (), - cutpts[x - array_origin].previous ()->position ()); + tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", x, + cutpts[x - array_origin].cost_function(), + cutpts[x - array_origin].sum(), + cutpts[x - array_origin].squares(), + cutpts[x - array_origin].previous()->position()); } } occupation_count = -1; do { - for (x = best_end->position () - pitch + pitch_error; - x < best_end->position () - pitch_error - && projection->pile_count (x) == 0; x++); - if (x < best_end->position () - pitch_error) - occupation_count++; - //copy it - segpt = new FPSEGPT (best_end); - seg_it.add_before_then_move (segpt); - best_end = best_end->previous (); - } - while (best_end != nullptr); - seg_it.move_to_last (); - mean_sum = seg_it.data ()->sum (); + for (x = best_end->position() - pitch + pitch_error; + x < best_end->position() - pitch_error && + projection->pile_count(x) == 0; + x++) + ; + if (x < best_end->position() - pitch_error) occupation_count++; + // copy it + segpt = new FPSEGPT(best_end); + seg_it.add_before_then_move(segpt); + best_end = best_end->previous(); + } while (best_end != nullptr); + seg_it.move_to_last(); + mean_sum = seg_it.data()->sum(); mean_sum = mean_sum * mean_sum / best_count; - if (seg_it.data ()->squares () - mean_sum < 0) - tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", - seg_it.data ()->squares (), seg_it.data ()->sum (), best_count); + if (seg_it.data()->squares() - mean_sum < 0) + tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", + seg_it.data()->squares(), seg_it.data()->sum(), best_count); free_mem(cutpts); // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n", // blob_count,pitch,seg_it.data()->squares()-mean_sum, // occupation_count); - return seg_it.data ()->squares () - mean_sum; + return seg_it.data()->squares() - mean_sum; } - /********************************************************************** * check_pitch_sync * @@ -492,140 +472,131 @@ double check_pitch_sync2( //find segmentation * The return value is a measure of goodness of the sync. **********************************************************************/ -double check_pitch_sync3( //find segmentation - int16_t projection_left, //edges //to be considered 0 - int16_t projection_right, - int16_t zero_count, - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - float projection_scale, //scale factor - int16_t &occupation_count, //no of occupied cells - FPSEGPT_LIST *seg_list, //output list - int16_t start, //start of good range - int16_t end //end of good range - ) { - bool faking; //illegal cut pt - bool mid_cut; //cheap cut pt. - int16_t left_edge; //of word - int16_t right_edge; //of word - int16_t x; //current coord - int16_t array_origin; //x coord of array - int16_t offset; //dist to legal area - int16_t projection_offset; //from scaled projection - int16_t prev_zero; //previous zero dist - int16_t next_zero; //next zero dist - int16_t zero_offset; //scan window - int16_t best_left_x = 0; //for equals - int16_t best_right_x = 0; //right edge - FPSEGPT *segpt; //segment point - FPCUTPT *cutpts; //array of points - BOOL8 *mins; //local min results - int minindex; //next input position - int test_index; //index to mins - double best_cost; //best path - double mean_sum; //computes result - FPCUTPT *best_end; //end of best path - int16_t best_fake; //best fake level - int16_t best_count; //no of cuts - FPSEGPT_IT seg_it = seg_list; //output iterator +double check_pitch_sync3( // find segmentation + int16_t projection_left, // edges //to be considered 0 + int16_t projection_right, int16_t zero_count, + int16_t pitch, // pitch estimate + int16_t pitch_error, // tolerance + STATS* projection, // vertical + float projection_scale, // scale factor + int16_t& occupation_count, // no of occupied cells + FPSEGPT_LIST* seg_list, // output list + int16_t start, // start of good range + int16_t end // end of good range +) { + bool faking; // illegal cut pt + bool mid_cut; // cheap cut pt. + int16_t left_edge; // of word + int16_t right_edge; // of word + int16_t x; // current coord + int16_t array_origin; // x coord of array + int16_t offset; // dist to legal area + int16_t projection_offset; // from scaled projection + int16_t prev_zero; // previous zero dist + int16_t next_zero; // next zero dist + int16_t zero_offset; // scan window + int16_t best_left_x = 0; // for equals + int16_t best_right_x = 0; // right edge + FPSEGPT* segpt; // segment point + FPCUTPT* cutpts; // array of points + BOOL8* mins; // local min results + int minindex; // next input position + int test_index; // index to mins + double best_cost; // best path + double mean_sum; // computes result + FPCUTPT* best_end; // end of best path + int16_t best_fake; // best fake level + int16_t best_count; // no of cuts + FPSEGPT_IT seg_it = seg_list; // output iterator end = (end - start) % pitch; - if (pitch < 3) - pitch = 3; //nothing ludicrous - if ((pitch - 3) / 2 < pitch_error) - pitch_error = (pitch - 3) / 2; - //min dist of zero - zero_offset = (int16_t) (pitch * pitsync_joined_edge); - for (left_edge = projection_left; projection->pile_count (left_edge) == 0 - && left_edge < projection_right; left_edge++); - for (right_edge = projection_right; projection->pile_count (right_edge) == 0 - && right_edge > left_edge; right_edge--); + if (pitch < 3) pitch = 3; // nothing ludicrous + if ((pitch - 3) / 2 < pitch_error) pitch_error = (pitch - 3) / 2; + // min dist of zero + zero_offset = (int16_t)(pitch * pitsync_joined_edge); + for (left_edge = projection_left; + projection->pile_count(left_edge) == 0 && left_edge < projection_right; + left_edge++) + ; + for (right_edge = projection_right; + projection->pile_count(right_edge) == 0 && right_edge > left_edge; + right_edge--) + ; array_origin = left_edge - pitch; - cutpts = (FPCUTPT *) alloc_mem ((right_edge - left_edge + pitch * 2 + 1) - * sizeof (FPCUTPT)); - mins = (BOOL8 *) alloc_mem ((pitch_error * 2 + 1) * sizeof (BOOL8)); + cutpts = (FPCUTPT*)alloc_mem((right_edge - left_edge + pitch * 2 + 1) * + sizeof(FPCUTPT)); + mins = (BOOL8*)alloc_mem((pitch_error * 2 + 1) * sizeof(BOOL8)); for (x = array_origin; x < left_edge; x++) - //free cuts - cutpts[x - array_origin].setup (cutpts, array_origin, projection, zero_count, pitch, x, 0); + // free cuts + cutpts[x - array_origin].setup(cutpts, array_origin, projection, zero_count, + pitch, x, 0); prev_zero = left_edge - 1; for (offset = 0; offset <= pitch_error; offset++, x++) - //not quite free - cutpts[x - array_origin].setup (cutpts, array_origin, projection, zero_count, pitch, x, offset); + // not quite free + cutpts[x - array_origin].setup(cutpts, array_origin, projection, zero_count, + pitch, x, offset); best_cost = MAX_FLOAT32; best_end = nullptr; for (offset = -pitch_error, minindex = 0; offset < pitch_error; - offset++, minindex++) - mins[minindex] = projection->local_min (x + offset); + offset++, minindex++) + mins[minindex] = projection->local_min(x + offset); next_zero = x + zero_offset + 1; for (offset = next_zero - 1; offset >= x; offset--) { - if (projection->pile_count (offset) <= zero_count) { + if (projection->pile_count(offset) <= zero_count) { next_zero = offset; break; } } while (x < right_edge - pitch_error) { - mins[minindex] = projection->local_min (x + pitch_error); + mins[minindex] = projection->local_min(x + pitch_error); minindex++; - if (minindex > pitch_error * 2) - minindex = 0; + if (minindex > pitch_error * 2) minindex = 0; faking = false; mid_cut = false; offset = 0; - if (projection->pile_count (x) <= zero_count) { + if (projection->pile_count(x) <= zero_count) { prev_zero = x; - } - else { + } else { for (offset = 1; offset <= pitch_error; offset++) - if (projection->pile_count (x + offset) <= zero_count - || projection->pile_count (x - offset) <= zero_count) + if (projection->pile_count(x + offset) <= zero_count || + projection->pile_count(x - offset) <= zero_count) break; } if (offset > pitch_error) { if (x - prev_zero > zero_offset && next_zero - x > zero_offset) { for (offset = 0; offset <= pitch_error; offset++) { test_index = minindex + pitch_error + offset; - if (test_index > pitch_error * 2) - test_index -= pitch_error * 2 + 1; - if (mins[test_index]) - break; + if (test_index > pitch_error * 2) test_index -= pitch_error * 2 + 1; + if (mins[test_index]) break; test_index = minindex + pitch_error - offset; - if (test_index > pitch_error * 2) - test_index -= pitch_error * 2 + 1; - if (mins[test_index]) - break; + if (test_index > pitch_error * 2) test_index -= pitch_error * 2 + 1; + if (mins[test_index]) break; } } if (offset > pitch_error) { - offset = projection->pile_count (x); + offset = projection->pile_count(x); faking = true; - } - else { + } else { projection_offset = - (int16_t) (projection->pile_count (x) / projection_scale); - if (projection_offset > offset) - offset = projection_offset; + (int16_t)(projection->pile_count(x) / projection_scale); + if (projection_offset > offset) offset = projection_offset; mid_cut = true; } } - if ((start == 0 && end == 0) - || !textord_fast_pitch_test - || (x - projection_left - start) % pitch <= end) - cutpts[x - array_origin].assign (cutpts, array_origin, x, - faking, mid_cut, offset, projection, - projection_scale, zero_count, pitch, - pitch_error); + if ((start == 0 && end == 0) || !textord_fast_pitch_test || + (x - projection_left - start) % pitch <= end) + cutpts[x - array_origin].assign(cutpts, array_origin, x, faking, mid_cut, + offset, projection, projection_scale, + zero_count, pitch, pitch_error); else - cutpts[x - array_origin].assign_cheap (cutpts, array_origin, x, - faking, mid_cut, offset, - projection, projection_scale, - zero_count, pitch, - pitch_error); + cutpts[x - array_origin].assign_cheap( + cutpts, array_origin, x, faking, mid_cut, offset, projection, + projection_scale, zero_count, pitch, pitch_error); x++; if (next_zero < x || next_zero == x + zero_offset) next_zero = x + zero_offset + 1; - if (projection->pile_count (x + zero_offset) <= zero_count) + if (projection->pile_count(x + zero_offset) <= zero_count) next_zero = x + zero_offset; } @@ -634,32 +605,31 @@ double check_pitch_sync3( //find segmentation best_count = INT16_MAX; while (x < right_edge + pitch) { offset = x < right_edge ? right_edge - x : 0; - cutpts[x - array_origin].assign (cutpts, array_origin, x, - false, false, offset, projection, - projection_scale, zero_count, pitch, - pitch_error); + cutpts[x - array_origin].assign(cutpts, array_origin, x, false, false, + offset, projection, projection_scale, + zero_count, pitch, pitch_error); cutpts[x - array_origin].terminal = true; - if (cutpts[x - array_origin].index () + - cutpts[x - array_origin].fake_count <= best_count + best_fake) { - if (cutpts[x - array_origin].fake_count < best_fake - || (cutpts[x - array_origin].fake_count == best_fake - && cutpts[x - array_origin].cost_function () < best_cost)) { + if (cutpts[x - array_origin].index() + + cutpts[x - array_origin].fake_count <= + best_count + best_fake) { + if (cutpts[x - array_origin].fake_count < best_fake || + (cutpts[x - array_origin].fake_count == best_fake && + cutpts[x - array_origin].cost_function() < best_cost)) { best_fake = cutpts[x - array_origin].fake_count; - best_cost = cutpts[x - array_origin].cost_function (); + best_cost = cutpts[x - array_origin].cost_function(); best_left_x = x; best_right_x = x; - best_count = cutpts[x - array_origin].index (); - } - else if (cutpts[x - array_origin].fake_count == best_fake - && x == best_right_x + 1 - && cutpts[x - array_origin].cost_function () == best_cost) { - //exactly equal + best_count = cutpts[x - array_origin].index(); + } else if (cutpts[x - array_origin].fake_count == best_fake && + x == best_right_x + 1 && + cutpts[x - array_origin].cost_function() == best_cost) { + // exactly equal best_right_x = x; } } x++; } - ASSERT_HOST (best_fake < INT16_MAX); + ASSERT_HOST(best_fake < INT16_MAX); best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin]; // for (x=left_edge-pitch;xposition () - pitch + pitch_error; - x < best_end->position () - pitch_error - && projection->pile_count (x) == 0; x++); - if (x < best_end->position () - pitch_error) - occupation_count++; - //copy it - segpt = new FPSEGPT (best_end); - seg_it.add_before_then_move (segpt); - best_end = best_end->previous (); - } - while (best_end != nullptr); - seg_it.move_to_last (); - mean_sum = seg_it.data ()->sum (); + for (x = best_end->position() - pitch + pitch_error; + x < best_end->position() - pitch_error && + projection->pile_count(x) == 0; + x++) + ; + if (x < best_end->position() - pitch_error) occupation_count++; + // copy it + segpt = new FPSEGPT(best_end); + seg_it.add_before_then_move(segpt); + best_end = best_end->previous(); + } while (best_end != nullptr); + seg_it.move_to_last(); + mean_sum = seg_it.data()->sum(); mean_sum = mean_sum * mean_sum / best_count; - if (seg_it.data ()->squares () - mean_sum < 0) - tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", - seg_it.data ()->squares (), seg_it.data ()->sum (), best_count); + if (seg_it.data()->squares() - mean_sum < 0) + tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", + seg_it.data()->squares(), seg_it.data()->sum(), best_count); free_mem(mins); free_mem(cutpts); - return seg_it.data ()->squares () - mean_sum; + return seg_it.data()->squares() - mean_sum; } diff --git a/src/textord/pithsync.h b/src/textord/pithsync.h index c03838b63e..6219e6e727 100644 --- a/src/textord/pithsync.h +++ b/src/textord/pithsync.h @@ -17,116 +17,103 @@ * **********************************************************************/ -#ifndef PITHSYNC_H -#define PITHSYNC_H +#ifndef PITHSYNC_H +#define PITHSYNC_H -#include "blobbox.h" -#include "params.h" -#include "statistc.h" +#include "blobbox.h" +#include "params.h" +#include "statistc.h" class FPSEGPT_LIST; -class FPCUTPT -{ - public: - FPCUTPT() = default; - void setup ( //start of cut - FPCUTPT cutpts[], //predecessors - int16_t array_origin, //start coord - STATS * projection, //occupation - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t x, //position - int16_t offset); //dist to gap +class FPCUTPT { + public: + FPCUTPT() = default; + void setup( // start of cut + FPCUTPT cutpts[], // predecessors + int16_t array_origin, // start coord + STATS* projection, // occupation + int16_t zero_count, // official zero + int16_t pitch, // proposed pitch + int16_t x, // position + int16_t offset); // dist to gap - void assign( //evaluate cut - FPCUTPT cutpts[], //predecessors - int16_t array_origin, //start coord - int16_t x, //position - bool faking, //faking this one - bool mid_cut, //doing free cut - int16_t offset, //extra cost dist - STATS* projection, //occupation - float projection_scale, //scaling - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t pitch_error); //allowed tolerance + void assign( // evaluate cut + FPCUTPT cutpts[], // predecessors + int16_t array_origin, // start coord + int16_t x, // position + bool faking, // faking this one + bool mid_cut, // doing free cut + int16_t offset, // extra cost dist + STATS* projection, // occupation + float projection_scale, // scaling + int16_t zero_count, // official zero + int16_t pitch, // proposed pitch + int16_t pitch_error); // allowed tolerance - void assign_cheap ( //evaluate cut - FPCUTPT cutpts[], //predecessors - int16_t array_origin, //start coord - int16_t x, //position - BOOL8 faking, //faking this one - BOOL8 mid_cut, //doing free cut - int16_t offset, //extra cost dist - STATS * projection, //occupation - float projection_scale, //scaling - int16_t zero_count, //official zero - int16_t pitch, //proposed pitch - int16_t pitch_error); //allowed tolerance + void assign_cheap( // evaluate cut + FPCUTPT cutpts[], // predecessors + int16_t array_origin, // start coord + int16_t x, // position + BOOL8 faking, // faking this one + BOOL8 mid_cut, // doing free cut + int16_t offset, // extra cost dist + STATS* projection, // occupation + float projection_scale, // scaling + int16_t zero_count, // official zero + int16_t pitch, // proposed pitch + int16_t pitch_error); // allowed tolerance - int32_t position() { // access func - return xpos; - } - double cost_function() { - return cost; - } - double squares() { - return sq_sum; - } - double sum() { - return mean_sum; - } - FPCUTPT *previous() { - return pred; - } - int16_t cheap_cuts() const { //no of mi cuts - return mid_cuts; - } - int16_t index() const { - return region_index; - } + int32_t position() { // access func + return xpos; + } + double cost_function() { return cost; } + double squares() { return sq_sum; } + double sum() { return mean_sum; } + FPCUTPT* previous() { return pred; } + int16_t cheap_cuts() const { // no of mi cuts + return mid_cuts; + } + int16_t index() const { return region_index; } - bool faked; //faked split point - bool terminal; //successful end - int16_t fake_count; //total fakes to here + bool faked; // faked split point + bool terminal; // successful end + int16_t fake_count; // total fakes to here - private: - int16_t region_index; //cut serial number - int16_t mid_cuts; //no of cheap cuts - int32_t xpos; //location - uint32_t back_balance; //proj backwards - uint32_t fwd_balance; //proj forwards - FPCUTPT *pred; //optimal previous - double mean_sum; //mean so far - double sq_sum; //summed distsances - double cost; //cost function + private: + int16_t region_index; // cut serial number + int16_t mid_cuts; // no of cheap cuts + int32_t xpos; // location + uint32_t back_balance; // proj backwards + uint32_t fwd_balance; // proj forwards + FPCUTPT* pred; // optimal previous + double mean_sum; // mean so far + double sq_sum; // summed distsances + double cost; // cost function }; -double check_pitch_sync2( //find segmentation - BLOBNBOX_IT *blob_it, //blobs to do - int16_t blob_count, //no of blobs - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - int16_t projection_left, //edges //scale factor - int16_t projection_right, - float projection_scale, - int16_t &occupation_count, //no of occupied cells - FPSEGPT_LIST *seg_list, //output list - int16_t start, //start of good range - int16_t end //end of good range - ); -double check_pitch_sync3( //find segmentation - int16_t projection_left, //edges //to be considered 0 - int16_t projection_right, - int16_t zero_count, - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - float projection_scale, //scale factor - int16_t &occupation_count, //no of occupied cells - FPSEGPT_LIST *seg_list, //output list - int16_t start, //start of good range - int16_t end //end of good range - ); +double check_pitch_sync2( // find segmentation + BLOBNBOX_IT* blob_it, // blobs to do + int16_t blob_count, // no of blobs + int16_t pitch, // pitch estimate + int16_t pitch_error, // tolerance + STATS* projection, // vertical + int16_t projection_left, // edges //scale factor + int16_t projection_right, float projection_scale, + int16_t& occupation_count, // no of occupied cells + FPSEGPT_LIST* seg_list, // output list + int16_t start, // start of good range + int16_t end // end of good range +); +double check_pitch_sync3( // find segmentation + int16_t projection_left, // edges //to be considered 0 + int16_t projection_right, int16_t zero_count, + int16_t pitch, // pitch estimate + int16_t pitch_error, // tolerance + STATS* projection, // vertical + float projection_scale, // scale factor + int16_t& occupation_count, // no of occupied cells + FPSEGPT_LIST* seg_list, // output list + int16_t start, // start of good range + int16_t end // end of good range +); #endif diff --git a/src/textord/pitsync1.cpp b/src/textord/pitsync1.cpp index c95c670aa0..bec7ba7c63 100644 --- a/src/textord/pitsync1.cpp +++ b/src/textord/pitsync1.cpp @@ -18,24 +18,23 @@ **********************************************************************/ #ifdef __UNIX__ -#include +#include #endif -#include -#include "memry.h" -#include "pitsync1.h" +#include +#include "memry.h" +#include "pitsync1.h" -ELISTIZE (FPSEGPT) CLISTIZE (FPSEGPT_LIST) +ELISTIZE(FPSEGPT) +CLISTIZE(FPSEGPT_LIST) #define EXTERN +EXTERN INT_VAR(pitsync_linear_version, 6, "Use new fast algorithm"); EXTERN -INT_VAR (pitsync_linear_version, 6, "Use new fast algorithm"); +double_VAR(pitsync_joined_edge, 0.75, "Dist inside big blob for chopping"); EXTERN -double_VAR (pitsync_joined_edge, 0.75, -"Dist inside big blob for chopping"); +double_VAR(pitsync_offset_freecut_fraction, 0.25, + "Fraction of cut for free cuts"); EXTERN -double_VAR (pitsync_offset_freecut_fraction, 0.25, -"Fraction of cut for free cuts"); -EXTERN -INT_VAR (pitsync_fake_depth, 1, "Max advance fake generation"); +INT_VAR(pitsync_fake_depth, 1, "Max advance fake generation"); /********************************************************************** * FPSEGPT::FPSEGPT @@ -44,30 +43,30 @@ INT_VAR (pitsync_fake_depth, 1, "Max advance fake generation"); * The existing FPCUTPT is duplicated. **********************************************************************/ -FPSEGPT::FPSEGPT( //constructor - FPCUTPT *cutpt //create from new form - ) { +FPSEGPT::FPSEGPT( // constructor + FPCUTPT* cutpt // create from new form +) { pred = nullptr; - mean_sum = cutpt->sum (); - sq_sum = cutpt->squares (); - cost = cutpt->cost_function (); + mean_sum = cutpt->sum(); + sq_sum = cutpt->squares(); + cost = cutpt->cost_function(); faked = cutpt->faked; terminal = cutpt->terminal; fake_count = cutpt->fake_count; - xpos = cutpt->position (); - mid_cuts = cutpt->cheap_cuts (); + xpos = cutpt->position(); + mid_cuts = cutpt->cheap_cuts(); } - /********************************************************************** * FPSEGPT::FPSEGPT * * Constructor to make a new FPSEGPT. **********************************************************************/ -FPSEGPT::FPSEGPT ( //constructor -int16_t x //position -):xpos (x) { +FPSEGPT::FPSEGPT( // constructor + int16_t x // position + ) + : xpos(x) { pred = nullptr; mean_sum = 0; sq_sum = 0; @@ -78,30 +77,30 @@ int16_t x //position mid_cuts = 0; } - /********************************************************************** * FPSEGPT::FPSEGPT * * Constructor to make a new FPSEGPT. **********************************************************************/ -FPSEGPT::FPSEGPT ( //constructor -int16_t x, //position -BOOL8 faking, //faking this one -int16_t offset, //dist to gap -int16_t region_index, //segment number -int16_t pitch, //proposed pitch -int16_t pitch_error, //allowed tolerance -FPSEGPT_LIST * prev_list //previous segment -):xpos (x) { - int16_t best_fake; //on previous - FPSEGPT *segpt; //segment point - int32_t dist; //from prev segment - double sq_dist; //squared distance - double mean; //mean pitch - double total; //total dists - double factor; //cost function - FPSEGPT_IT pred_it = prev_list;//for previuos segment +FPSEGPT::FPSEGPT( // constructor + int16_t x, // position + BOOL8 faking, // faking this one + int16_t offset, // dist to gap + int16_t region_index, // segment number + int16_t pitch, // proposed pitch + int16_t pitch_error, // allowed tolerance + FPSEGPT_LIST* prev_list // previous segment + ) + : xpos(x) { + int16_t best_fake; // on previous + FPSEGPT* segpt; // segment point + int32_t dist; // from prev segment + double sq_dist; // squared distance + double mean; // mean pitch + double total; // total dists + double factor; // cost function + FPSEGPT_IT pred_it = prev_list; // for previuos segment cost = MAX_FLOAT32; pred = nullptr; @@ -109,34 +108,31 @@ FPSEGPT_LIST * prev_list //previous segment terminal = FALSE; best_fake = INT16_MAX; mid_cuts = 0; - for (pred_it.mark_cycle_pt (); !pred_it.cycled_list (); pred_it.forward ()) { - segpt = pred_it.data (); - if (segpt->fake_count < best_fake) - best_fake = segpt->fake_count; + for (pred_it.mark_cycle_pt(); !pred_it.cycled_list(); pred_it.forward()) { + segpt = pred_it.data(); + if (segpt->fake_count < best_fake) best_fake = segpt->fake_count; dist = x - segpt->xpos; - if (dist >= pitch - pitch_error && dist <= pitch + pitch_error - && !segpt->terminal) { + if (dist >= pitch - pitch_error && dist <= pitch + pitch_error && + !segpt->terminal) { total = segpt->mean_sum + dist; sq_dist = dist * dist + segpt->sq_sum + offset * offset; - //sum of squarees + // sum of squarees mean = total / region_index; factor = mean - pitch; factor *= factor; - factor += sq_dist / (region_index) - mean * mean; + factor += sq_dist / (region_index)-mean * mean; if (factor < cost) { - cost = factor; //find least cost - pred = segpt; //save path + cost = factor; // find least cost + pred = segpt; // save path mean_sum = total; sq_sum = sq_dist; fake_count = segpt->fake_count + faked; } } } - if (fake_count > best_fake + 1) - pred = nullptr; //fail it + if (fake_count > best_fake + 1) pred = nullptr; // fail it } - /********************************************************************** * check_pitch_sync * @@ -145,278 +141,269 @@ FPSEGPT_LIST * prev_list //previous segment * The return value is a measure of goodness of the sync. **********************************************************************/ -double check_pitch_sync( //find segmentation - BLOBNBOX_IT *blob_it, //blobs to do - int16_t blob_count, //no of blobs - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - FPSEGPT_LIST *seg_list //output list - ) { - int16_t x; //current coord - int16_t min_index; //blob number - int16_t max_index; //blob number - int16_t left_edge; //of word - int16_t right_edge; //of word - int16_t right_max; //max allowed x - int16_t min_x; //in this region +double check_pitch_sync( // find segmentation + BLOBNBOX_IT* blob_it, // blobs to do + int16_t blob_count, // no of blobs + int16_t pitch, // pitch estimate + int16_t pitch_error, // tolerance + STATS* projection, // vertical + FPSEGPT_LIST* seg_list // output list +) { + int16_t x; // current coord + int16_t min_index; // blob number + int16_t max_index; // blob number + int16_t left_edge; // of word + int16_t right_edge; // of word + int16_t right_max; // max allowed x + int16_t min_x; // in this region int16_t max_x; int16_t region_index; - int16_t best_region_index = 0; //for best result - int16_t offset; //dist to legal area - int16_t left_best_x; //edge of good region - int16_t right_best_x; //right edge - TBOX min_box; //bounding box - TBOX max_box; //bounding box - TBOX next_box; //box of next blob - FPSEGPT *segpt; //segment point - FPSEGPT_LIST *segpts; //points in a segment - double best_cost; //best path - double mean_sum; //computes result - FPSEGPT *best_end; //end of best path - BLOBNBOX_IT min_it; //copy iterator - BLOBNBOX_IT max_it; //copy iterator - FPSEGPT_IT segpt_it; //iterator - //output segments + int16_t best_region_index = 0; // for best result + int16_t offset; // dist to legal area + int16_t left_best_x; // edge of good region + int16_t right_best_x; // right edge + TBOX min_box; // bounding box + TBOX max_box; // bounding box + TBOX next_box; // box of next blob + FPSEGPT* segpt; // segment point + FPSEGPT_LIST* segpts; // points in a segment + double best_cost; // best path + double mean_sum; // computes result + FPSEGPT* best_end; // end of best path + BLOBNBOX_IT min_it; // copy iterator + BLOBNBOX_IT max_it; // copy iterator + FPSEGPT_IT segpt_it; // iterator + // output segments FPSEGPT_IT outseg_it = seg_list; - FPSEGPT_LIST_CLIST lattice; //list of lists - //region iterator + FPSEGPT_LIST_CLIST lattice; // list of lists + // region iterator FPSEGPT_LIST_C_IT lattice_it = &lattice; // tprintf("Computing sync on word of %d blobs with pitch %d\n", // blob_count, pitch); // if (blob_count==8 && pitch==27) // projection->print(stdout,TRUE); - if (pitch < 3) - pitch = 3; //nothing ludicrous - if ((pitch - 3) / 2 < pitch_error) - pitch_error = (pitch - 3) / 2; + if (pitch < 3) pitch = 3; // nothing ludicrous + if ((pitch - 3) / 2 < pitch_error) pitch_error = (pitch - 3) / 2; min_it = *blob_it; - min_box = box_next (&min_it); //get box + min_box = box_next(&min_it); // get box // if (blob_count==8 && pitch==27) // tprintf("1st box at (%d,%d)->(%d,%d)\n", // min_box.left(),min_box.bottom(), // min_box.right(),min_box.top()); - //left of word - left_edge = min_box.left () + pitch_error; + // left of word + left_edge = min_box.left() + pitch_error; for (min_index = 1; min_index < blob_count; min_index++) { - min_box = box_next (&min_it); + min_box = box_next(&min_it); // if (blob_count==8 && pitch==27) // tprintf("Box at (%d,%d)->(%d,%d)\n", // min_box.left(),min_box.bottom(), // min_box.right(),min_box.top()); } - right_edge = min_box.right (); //end of word + right_edge = min_box.right(); // end of word max_x = left_edge; - //min permissible + // min permissible min_x = max_x - pitch + pitch_error * 2 + 1; right_max = right_edge + pitch - pitch_error - 1; - segpts = new FPSEGPT_LIST; //list of points - segpt_it.set_to_list (segpts); + segpts = new FPSEGPT_LIST; // list of points + segpt_it.set_to_list(segpts); for (x = min_x; x <= max_x; x++) { - segpt = new FPSEGPT (x); //make a new one - //put in list - segpt_it.add_after_then_move (segpt); + segpt = new FPSEGPT(x); // make a new one + // put in list + segpt_it.add_after_then_move(segpt); } - //first segment - lattice_it.add_before_then_move (segpts); + // first segment + lattice_it.add_before_then_move(segpts); min_index = 0; region_index = 1; best_cost = MAX_FLOAT32; best_end = nullptr; min_it = *blob_it; - min_box = box_next (&min_it); //first box + min_box = box_next(&min_it); // first box do { left_best_x = -1; right_best_x = -1; - segpts = new FPSEGPT_LIST; //list of points - segpt_it.set_to_list (segpts); - min_x += pitch - pitch_error;//next limits + segpts = new FPSEGPT_LIST; // list of points + segpt_it.set_to_list(segpts); + min_x += pitch - pitch_error; // next limits max_x += pitch + pitch_error; - while (min_box.right () < min_x && min_index < blob_count) { + while (min_box.right() < min_x && min_index < blob_count) { min_index++; - min_box = box_next (&min_it); + min_box = box_next(&min_it); } max_it = min_it; max_index = min_index; max_box = min_box; - next_box = box_next (&max_it); + next_box = box_next(&max_it); for (x = min_x; x <= max_x && x <= right_max; x++) { - while (x < right_edge && max_index < blob_count - && x > max_box.right ()) { + while (x < right_edge && max_index < blob_count && x > max_box.right()) { max_index++; max_box = next_box; - next_box = box_next (&max_it); + next_box = box_next(&max_it); } - if (x <= max_box.left () + pitch_error - || x >= max_box.right () - pitch_error || x >= right_edge - || (max_index < blob_count - 1 && x >= next_box.left ()) - || (x - max_box.left () > pitch * pitsync_joined_edge - && max_box.right () - x > pitch * pitsync_joined_edge)) { - // || projection->local_min(x)) - if (x - max_box.left () > 0 - && x - max_box.left () <= pitch_error) - //dist to real break - offset = x - max_box.left (); - else if (max_box.right () - x > 0 - && max_box.right () - x <= pitch_error - && (max_index >= blob_count - 1 - || x < next_box.left ())) - offset = max_box.right () - x; + if (x <= max_box.left() + pitch_error || + x >= max_box.right() - pitch_error || x >= right_edge || + (max_index < blob_count - 1 && x >= next_box.left()) || + (x - max_box.left() > pitch * pitsync_joined_edge && + max_box.right() - x > pitch * pitsync_joined_edge)) { + // || projection->local_min(x)) + if (x - max_box.left() > 0 && x - max_box.left() <= pitch_error) + // dist to real break + offset = x - max_box.left(); + else if (max_box.right() - x > 0 && + max_box.right() - x <= pitch_error && + (max_index >= blob_count - 1 || x < next_box.left())) + offset = max_box.right() - x; else offset = 0; // offset=pitsync_offset_freecut_fraction*projection->pile_count(x); - segpt = new FPSEGPT (x, FALSE, offset, region_index, - pitch, pitch_error, lattice_it.data ()); - } - else { - offset = projection->pile_count (x); - segpt = new FPSEGPT (x, TRUE, offset, region_index, - pitch, pitch_error, lattice_it.data ()); + segpt = new FPSEGPT(x, FALSE, offset, region_index, pitch, pitch_error, + lattice_it.data()); + } else { + offset = projection->pile_count(x); + segpt = new FPSEGPT(x, TRUE, offset, region_index, pitch, pitch_error, + lattice_it.data()); } - if (segpt->previous () != nullptr) { - segpt_it.add_after_then_move (segpt); + if (segpt->previous() != nullptr) { + segpt_it.add_after_then_move(segpt); if (x >= right_edge - pitch_error) { - segpt->terminal = TRUE;//no more wanted - if (segpt->cost_function () < best_cost) { - best_cost = segpt->cost_function (); - //find least + segpt->terminal = TRUE; // no more wanted + if (segpt->cost_function() < best_cost) { + best_cost = segpt->cost_function(); + // find least best_end = segpt; best_region_index = region_index; left_best_x = x; right_best_x = x; - } - else if (segpt->cost_function () == best_cost - && right_best_x == x - 1) + } else if (segpt->cost_function() == best_cost && + right_best_x == x - 1) right_best_x = x; } - } - else { - delete segpt; //no good + } else { + delete segpt; // no good } } - if (segpts->empty ()) { - if (best_end != nullptr) - break; //already found one - make_illegal_segment (lattice_it.data (), min_box, min_it, - region_index, pitch, pitch_error, segpts); - } - else { + if (segpts->empty()) { + if (best_end != nullptr) break; // already found one + make_illegal_segment(lattice_it.data(), min_box, min_it, region_index, + pitch, pitch_error, segpts); + } else { if (right_best_x > left_best_x + 1) { left_best_x = (left_best_x + right_best_x + 1) / 2; - for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list () - && segpt_it.data ()->position () != left_best_x; - segpt_it.forward ()); - if (segpt_it.data ()->position () == left_best_x) - //middle of region - best_end = segpt_it.data (); + for (segpt_it.mark_cycle_pt(); + !segpt_it.cycled_list() && + segpt_it.data()->position() != left_best_x; + segpt_it.forward()) + ; + if (segpt_it.data()->position() == left_best_x) + // middle of region + best_end = segpt_it.data(); } } - //new segment - lattice_it.add_before_then_move (segpts); + // new segment + lattice_it.add_before_then_move(segpts); region_index++; - } - while (min_x < right_edge); - ASSERT_HOST (best_end != nullptr);//must always find some + } while (min_x < right_edge); + ASSERT_HOST(best_end != nullptr); // must always find some - for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list (); - lattice_it.forward ()) { - segpts = lattice_it.data (); - segpt_it.set_to_list (segpts); + for (lattice_it.mark_cycle_pt(); !lattice_it.cycled_list(); + lattice_it.forward()) { + segpts = lattice_it.data(); + segpt_it.set_to_list(segpts); // if (blob_count==8 && pitch==27) // { - // for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward()) + // for + // (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward()) // { // segpt=segpt_it.data(); - // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n", + // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, + // pred=%x\n", // segpt->position(),segpt,segpt->cost_function(), // segpt->sum(),segpt->squares(),segpt->previous()); // } // tprintf("\n"); // } - for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list () - && segpt_it.data () != best_end; segpt_it.forward ()); - if (segpt_it.data () == best_end) { - //save good one - segpt = segpt_it.extract (); - outseg_it.add_before_then_move (segpt); - best_end = segpt->previous (); + for (segpt_it.mark_cycle_pt(); + !segpt_it.cycled_list() && segpt_it.data() != best_end; + segpt_it.forward()) + ; + if (segpt_it.data() == best_end) { + // save good one + segpt = segpt_it.extract(); + outseg_it.add_before_then_move(segpt); + best_end = segpt->previous(); } } - ASSERT_HOST (best_end == nullptr); - ASSERT_HOST (!outseg_it.empty ()); - outseg_it.move_to_last (); - mean_sum = outseg_it.data ()->sum (); + ASSERT_HOST(best_end == nullptr); + ASSERT_HOST(!outseg_it.empty()); + outseg_it.move_to_last(); + mean_sum = outseg_it.data()->sum(); mean_sum = mean_sum * mean_sum / best_region_index; - if (outseg_it.data ()->squares () - mean_sum < 0) - tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", - outseg_it.data ()->squares (), outseg_it.data ()->sum (), - best_region_index); - lattice.deep_clear (); //shift the lot - return outseg_it.data ()->squares () - mean_sum; + if (outseg_it.data()->squares() - mean_sum < 0) + tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", + outseg_it.data()->squares(), outseg_it.data()->sum(), + best_region_index); + lattice.deep_clear(); // shift the lot + return outseg_it.data()->squares() - mean_sum; } - /********************************************************************** * make_illegal_segment * * Make a fake set of chop points due to having no legal places. **********************************************************************/ -void make_illegal_segment( //find segmentation - FPSEGPT_LIST *prev_list, //previous segments - TBOX blob_box, //bounding box - BLOBNBOX_IT blob_it, //iterator - int16_t region_index, //number of segment - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - FPSEGPT_LIST *seg_list //output list - ) { - int16_t x; //current coord - int16_t min_x = 0; //in this region +void make_illegal_segment( // find segmentation + FPSEGPT_LIST* prev_list, // previous segments + TBOX blob_box, // bounding box + BLOBNBOX_IT blob_it, // iterator + int16_t region_index, // number of segment + int16_t pitch, // pitch estimate + int16_t pitch_error, // tolerance + FPSEGPT_LIST* seg_list // output list +) { + int16_t x; // current coord + int16_t min_x = 0; // in this region int16_t max_x = 0; - int16_t offset; //dist to edge - FPSEGPT *segpt; //segment point - FPSEGPT *prevpt; //previous point - float best_cost; //best path - FPSEGPT_IT segpt_it = seg_list;//iterator - //previous points + int16_t offset; // dist to edge + FPSEGPT* segpt; // segment point + FPSEGPT* prevpt; // previous point + float best_cost; // best path + FPSEGPT_IT segpt_it = seg_list; // iterator + // previous points FPSEGPT_IT prevpt_it = prev_list; best_cost = MAX_FLOAT32; - for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list (); - prevpt_it.forward ()) { - prevpt = prevpt_it.data (); - if (prevpt->cost_function () < best_cost) { - //find least - best_cost = prevpt->cost_function (); - min_x = prevpt->position (); - max_x = min_x; //limits on coords - } - else if (prevpt->cost_function () == best_cost) { - max_x = prevpt->position (); + for (prevpt_it.mark_cycle_pt(); !prevpt_it.cycled_list(); + prevpt_it.forward()) { + prevpt = prevpt_it.data(); + if (prevpt->cost_function() < best_cost) { + // find least + best_cost = prevpt->cost_function(); + min_x = prevpt->position(); + max_x = min_x; // limits on coords + } else if (prevpt->cost_function() == best_cost) { + max_x = prevpt->position(); } } min_x += pitch - pitch_error; max_x += pitch + pitch_error; for (x = min_x; x <= max_x; x++) { - while (x > blob_box.right ()) { - blob_box = box_next (&blob_it); + while (x > blob_box.right()) { + blob_box = box_next(&blob_it); } - offset = x - blob_box.left (); - if (blob_box.right () - x < offset) - offset = blob_box.right () - x; - segpt = new FPSEGPT (x, FALSE, offset, - region_index, pitch, pitch_error, prev_list); - if (segpt->previous () != nullptr) { - ASSERT_HOST (offset >= 0); - fprintf (stderr, "made fake at %d\n", x); - //make one up - segpt_it.add_after_then_move (segpt); + offset = x - blob_box.left(); + if (blob_box.right() - x < offset) offset = blob_box.right() - x; + segpt = new FPSEGPT(x, FALSE, offset, region_index, pitch, pitch_error, + prev_list); + if (segpt->previous() != nullptr) { + ASSERT_HOST(offset >= 0); + fprintf(stderr, "made fake at %d\n", x); + // make one up + segpt_it.add_after_then_move(segpt); segpt->faked = TRUE; segpt->fake_count++; - } - else + } else delete segpt; } } diff --git a/src/textord/pitsync1.h b/src/textord/pitsync1.h index a2b381db0d..2186186725 100644 --- a/src/textord/pitsync1.h +++ b/src/textord/pitsync1.h @@ -17,105 +17,93 @@ * **********************************************************************/ -#ifndef PITSYNC1_H -#define PITSYNC1_H +#ifndef PITSYNC1_H +#define PITSYNC1_H -#include "elst.h" -#include "clst.h" -#include "blobbox.h" -#include "params.h" -#include "statistc.h" -#include "pithsync.h" +#include "blobbox.h" +#include "clst.h" +#include "elst.h" +#include "params.h" +#include "pithsync.h" +#include "statistc.h" class FPSEGPT_LIST; -class FPSEGPT:public ELIST_LINK -{ - public: - FPSEGPT() = default; - FPSEGPT( //constructor - int16_t x); //position - FPSEGPT( //constructor - int16_t x, //position - BOOL8 faking, //faking this one - int16_t offset, //extra cost dist - int16_t region_index, //segment number - int16_t pitch, //proposed pitch - int16_t pitch_error, //allowed tolerance - FPSEGPT_LIST *prev_list); //previous segment - FPSEGPT(FPCUTPT *cutpt); //build from new type +class FPSEGPT : public ELIST_LINK { + public: + FPSEGPT() = default; + FPSEGPT( // constructor + int16_t x); // position + FPSEGPT( // constructor + int16_t x, // position + BOOL8 faking, // faking this one + int16_t offset, // extra cost dist + int16_t region_index, // segment number + int16_t pitch, // proposed pitch + int16_t pitch_error, // allowed tolerance + FPSEGPT_LIST* prev_list); // previous segment + FPSEGPT(FPCUTPT* cutpt); // build from new type - int32_t position() { // access func - return xpos; - } - double cost_function() { - return cost; - } - double squares() { - return sq_sum; - } - double sum() { - return mean_sum; - } - FPSEGPT *previous() { - return pred; - } - int16_t cheap_cuts() const { //no of cheap cuts - return mid_cuts; - } + int32_t position() { // access func + return xpos; + } + double cost_function() { return cost; } + double squares() { return sq_sum; } + double sum() { return mean_sum; } + FPSEGPT* previous() { return pred; } + int16_t cheap_cuts() const { // no of cheap cuts + return mid_cuts; + } - //faked split point - BOOL8 faked; - BOOL8 terminal; //successful end - int16_t fake_count; //total fakes to here + // faked split point + BOOL8 faked; + BOOL8 terminal; // successful end + int16_t fake_count; // total fakes to here - private: - int16_t mid_cuts; //no of cheap cuts - int32_t xpos; //location - FPSEGPT *pred; //optimal previous - double mean_sum; //mean so far - double sq_sum; //summed distsances - double cost; //cost function + private: + int16_t mid_cuts; // no of cheap cuts + int32_t xpos; // location + FPSEGPT* pred; // optimal previous + double mean_sum; // mean so far + double sq_sum; // summed distsances + double cost; // cost function }; -ELISTIZEH (FPSEGPT) CLISTIZEH (FPSEGPT_LIST) -extern -INT_VAR_H (pitsync_linear_version, 0, "Use new fast algorithm"); -extern -double_VAR_H (pitsync_joined_edge, 0.75, -"Dist inside big blob for chopping"); -extern -double_VAR_H (pitsync_offset_freecut_fraction, 0.25, -"Fraction of cut for free cuts"); -extern -INT_VAR_H (pitsync_fake_depth, 1, "Max advance fake generation"); -double check_pitch_sync( //find segmentation - BLOBNBOX_IT *blob_it, //blobs to do - int16_t blob_count, //no of blobs - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - STATS *projection, //vertical - FPSEGPT_LIST *seg_list //output list - ); -void make_illegal_segment( //find segmentation - FPSEGPT_LIST *prev_list, //previous segments - TBOX blob_box, //bounding box - BLOBNBOX_IT blob_it, //iterator - int16_t region_index, //number of segment - int16_t pitch, //pitch estimate - int16_t pitch_error, //tolerance - FPSEGPT_LIST *seg_list //output list - ); -int16_t vertical_torow_projection( //project whole row - TO_ROW *row, //row to do - STATS *projection //output - ); -void vertical_cblob_projection( //project outlines - C_BLOB *blob, //blob to project - STATS *stats //output - ); -void vertical_coutline_projection( //project outlines - C_OUTLINE *outline, //outline to project - STATS *stats //output - ); +ELISTIZEH(FPSEGPT) +CLISTIZEH(FPSEGPT_LIST) +extern INT_VAR_H(pitsync_linear_version, 0, "Use new fast algorithm"); +extern double_VAR_H(pitsync_joined_edge, 0.75, + "Dist inside big blob for chopping"); +extern double_VAR_H(pitsync_offset_freecut_fraction, 0.25, + "Fraction of cut for free cuts"); +extern INT_VAR_H(pitsync_fake_depth, 1, "Max advance fake generation"); +double check_pitch_sync( // find segmentation + BLOBNBOX_IT* blob_it, // blobs to do + int16_t blob_count, // no of blobs + int16_t pitch, // pitch estimate + int16_t pitch_error, // tolerance + STATS* projection, // vertical + FPSEGPT_LIST* seg_list // output list +); +void make_illegal_segment( // find segmentation + FPSEGPT_LIST* prev_list, // previous segments + TBOX blob_box, // bounding box + BLOBNBOX_IT blob_it, // iterator + int16_t region_index, // number of segment + int16_t pitch, // pitch estimate + int16_t pitch_error, // tolerance + FPSEGPT_LIST* seg_list // output list +); +int16_t vertical_torow_projection( // project whole row + TO_ROW* row, // row to do + STATS* projection // output +); +void vertical_cblob_projection( // project outlines + C_BLOB* blob, // blob to project + STATS* stats // output +); +void vertical_coutline_projection( // project outlines + C_OUTLINE* outline, // outline to project + STATS* stats // output +); #endif diff --git a/src/textord/scanedg.cpp b/src/textord/scanedg.cpp index 5760263b75..0a74f09572 100644 --- a/src/textord/scanedg.cpp +++ b/src/textord/scanedg.cpp @@ -24,10 +24,10 @@ #include "allheaders.h" #include "edgloop.h" -#define WHITE_PIX 1 /*thresholded colours */ -#define BLACK_PIX 0 +#define WHITE_PIX 1 /*thresholded colours */ +#define BLACK_PIX 0 // Flips between WHITE_PIX and BLACK_PIX. -#define FLIP_COLOUR(pix) (1-(pix)) +#define FLIP_COLOUR(pix) (1 - (pix)) /********************************************************************** * block_edges @@ -35,24 +35,24 @@ * Extract edges from a PDBLK. **********************************************************************/ -void block_edges(Pix *t_pix, // thresholded image - PDBLK *block, // block in image +void block_edges(Pix* t_pix, // thresholded image + PDBLK* block, // block in image C_OUTLINE_IT* outline_it) { - ICOORD bleft; // bounding box + ICOORD bleft; // bounding box ICOORD tright; - BLOCK_LINE_IT line_it = block; // line iterator + BLOCK_LINE_IT line_it = block; // line iterator int width = pixGetWidth(t_pix); int height = pixGetHeight(t_pix); int wpl = pixGetWpl(t_pix); - // lines in progress - CRACKEDGE **ptrline = new CRACKEDGE*[width + 1]; - CRACKEDGE *free_cracks = nullptr; + // lines in progress + CRACKEDGE** ptrline = new CRACKEDGE*[width + 1]; + CRACKEDGE* free_cracks = nullptr; block->bounding_box(bleft, tright); // block box int block_width = tright.x() - bleft.x(); for (int x = block_width; x >= 0; x--) - ptrline[x] = nullptr; // no lines in progress + ptrline[x] = nullptr; // no lines in progress uint8_t* bwline = new uint8_t[width]; @@ -69,8 +69,8 @@ void block_edges(Pix *t_pix, // thresholded image } else { memset(bwline, margin, block_width * sizeof(bwline[0])); } - line_edges(bleft.x(), y, block_width, - margin, bwline, ptrline, &free_cracks, outline_it); + line_edges(bleft.x(), y, block_width, margin, bwline, ptrline, &free_cracks, + outline_it); } free_crackedges(free_cracks); // really free them @@ -78,56 +78,52 @@ void block_edges(Pix *t_pix, // thresholded image delete[] bwline; } - /********************************************************************** * make_margins * * Get an image line and set to margin non-text pixels. **********************************************************************/ -void make_margins( //get a line - PDBLK *block, //block in image - BLOCK_LINE_IT *line_it, //for old style - uint8_t *pixels, //pixels to strip - uint8_t margin, //white-out pixel - int16_t left, //block edges - int16_t right, - int16_t y //line coord - ) { - PB_LINE_IT *lines; +void make_margins( // get a line + PDBLK* block, // block in image + BLOCK_LINE_IT* line_it, // for old style + uint8_t* pixels, // pixels to strip + uint8_t margin, // white-out pixel + int16_t left, // block edges + int16_t right, + int16_t y // line coord +) { + PB_LINE_IT* lines; ICOORDELT_IT seg_it; - int32_t start; //of segment - int16_t xext; //of segment - int xindex; //index to pixel + int32_t start; // of segment + int16_t xext; // of segment + int xindex; // index to pixel - if (block->poly_block () != nullptr) { - lines = new PB_LINE_IT (block->poly_block ()); + if (block->poly_block() != nullptr) { + lines = new PB_LINE_IT(block->poly_block()); const std::unique_ptr segments( lines->get_line(y)); - if (!segments->empty ()) { + if (!segments->empty()) { seg_it.set_to_list(segments.get()); - seg_it.mark_cycle_pt (); - start = seg_it.data ()->x (); - xext = seg_it.data ()->y (); + seg_it.mark_cycle_pt(); + start = seg_it.data()->x(); + xext = seg_it.data()->y(); for (xindex = left; xindex < right; xindex++) { - if (xindex >= start && !seg_it.cycled_list ()) { + if (xindex >= start && !seg_it.cycled_list()) { xindex = start + xext - 1; - seg_it.forward (); - start = seg_it.data ()->x (); - xext = seg_it.data ()->y (); - } - else + seg_it.forward(); + start = seg_it.data()->x(); + xext = seg_it.data()->y(); + } else pixels[xindex - left] = margin; } - } - else { + } else { for (xindex = left; xindex < right; xindex++) pixels[xindex - left] = margin; } delete lines; - } - else { - start = line_it->get_line (y, xext); + } else { + start = line_it->get_line(y, xext); for (xindex = left; xindex < start; xindex++) pixels[xindex - left] = margin; for (xindex = start + xext; xindex < right; xindex++) @@ -142,46 +138,45 @@ void make_margins( //get a line * When edges close into loops, send them for approximation. **********************************************************************/ -void line_edges(int16_t x, // coord of line start - int16_t y, // coord of line - int16_t xext, // width of line - uint8_t uppercolour, // start of prev line - uint8_t * bwpos, // thresholded line - CRACKEDGE ** prevline, // edges in progress - CRACKEDGE **free_cracks, - C_OUTLINE_IT* outline_it) { - CrackPos pos = {free_cracks, x, y }; - int xmax; // max x coord - int colour; // of current pixel - int prevcolour; // of previous pixel - CRACKEDGE *current; // current h edge - CRACKEDGE *newcurrent; // new h edge - - xmax = x + xext; // max allowable coord - prevcolour = uppercolour; // forced plain margin - current = nullptr; // nothing yet - - // do each pixel +void line_edges(int16_t x, // coord of line start + int16_t y, // coord of line + int16_t xext, // width of line + uint8_t uppercolour, // start of prev line + uint8_t* bwpos, // thresholded line + CRACKEDGE** prevline, // edges in progress + CRACKEDGE** free_cracks, C_OUTLINE_IT* outline_it) { + CrackPos pos = {free_cracks, x, y}; + int xmax; // max x coord + int colour; // of current pixel + int prevcolour; // of previous pixel + CRACKEDGE* current; // current h edge + CRACKEDGE* newcurrent; // new h edge + + xmax = x + xext; // max allowable coord + prevcolour = uppercolour; // forced plain margin + current = nullptr; // nothing yet + + // do each pixel for (; pos.x < xmax; pos.x++, prevline++) { - colour = *bwpos++; // current pixel + colour = *bwpos++; // current pixel if (*prevline != nullptr) { - // changed above - // change colour + // changed above + // change colour uppercolour = FLIP_COLOUR(uppercolour); if (colour == prevcolour) { if (colour == uppercolour) { - // finish a line + // finish a line join_edges(current, *prevline, free_cracks, outline_it); - current = nullptr; // no edge now + current = nullptr; // no edge now } else { - // new horiz edge + // new horiz edge current = h_edge(uppercolour - colour, *prevline, &pos); } - *prevline = nullptr; // no change this time + *prevline = nullptr; // no change this time } else { if (colour == uppercolour) *prevline = v_edge(colour - prevcolour, *prevline, &pos); - // 8 vs 4 connection + // 8 vs 4 connection else if (colour == WHITE_PIX) { join_edges(current, *prevline, free_cracks, outline_it); current = h_edge(uppercolour - colour, nullptr, &pos); @@ -191,7 +186,7 @@ void line_edges(int16_t x, // coord of line start *prevline = v_edge(colour - prevcolour, current, &pos); current = newcurrent; // right going h edge } - prevcolour = colour; // remember new colour + prevcolour = colour; // remember new colour } } else { if (colour != prevcolour) { @@ -201,35 +196,34 @@ void line_edges(int16_t x, // coord of line start if (colour != uppercolour) current = h_edge(uppercolour - colour, current, &pos); else - current = nullptr; // no edge now + current = nullptr; // no edge now } } if (current != nullptr) { - // out of block - if (*prevline != nullptr) { // got one to join to? + // out of block + if (*prevline != nullptr) { // got one to join to? join_edges(current, *prevline, free_cracks, outline_it); - *prevline = nullptr; // tidy now + *prevline = nullptr; // tidy now } else { - // fake vertical - *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, current, &pos); + // fake vertical + *prevline = v_edge(FLIP_COLOUR(prevcolour) - prevcolour, current, &pos); } } else if (*prevline != nullptr) { - //continue fake - *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, *prevline, &pos); + // continue fake + *prevline = v_edge(FLIP_COLOUR(prevcolour) - prevcolour, *prevline, &pos); } } - /********************************************************************** * h_edge * * Create a new horizontal CRACKEDGE and join it to the given edge. **********************************************************************/ -CRACKEDGE *h_edge(int sign, // sign of edge - CRACKEDGE* join, // edge to join to +CRACKEDGE* h_edge(int sign, // sign of edge + CRACKEDGE* join, // edge to join to CrackPos* pos) { - CRACKEDGE *newpt; // return value + CRACKEDGE* newpt; // return value if (*pos->free_cracks != nullptr) { newpt = *pos->free_cracks; @@ -237,50 +231,48 @@ CRACKEDGE *h_edge(int sign, // sign of edge } else { newpt = new CRACKEDGE; } - newpt->pos.set_y(pos->y + 1); // coords of pt + newpt->pos.set_y(pos->y + 1); // coords of pt newpt->stepy = 0; // edge is horizontal if (sign > 0) { - newpt->pos.set_x(pos->x + 1); // start location + newpt->pos.set_x(pos->x + 1); // start location newpt->stepx = -1; newpt->stepdir = 0; } else { - newpt->pos.set_x(pos->x); // start location + newpt->pos.set_x(pos->x); // start location newpt->stepx = 1; newpt->stepdir = 2; } if (join == nullptr) { - newpt->next = newpt; // ptrs to other ends + newpt->next = newpt; // ptrs to other ends newpt->prev = newpt; } else { - if (newpt->pos.x() + newpt->stepx == join->pos.x() - && newpt->pos.y() == join->pos.y()) { + if (newpt->pos.x() + newpt->stepx == join->pos.x() && + newpt->pos.y() == join->pos.y()) { newpt->prev = join->prev; // update other ends newpt->prev->next = newpt; - newpt->next = join; // join up + newpt->next = join; // join up join->prev = newpt; } else { newpt->next = join->next; // update other ends newpt->next->prev = newpt; - newpt->prev = join; // join up + newpt->prev = join; // join up join->next = newpt; } } return newpt; } - /********************************************************************** * v_edge * * Create a new vertical CRACKEDGE and join it to the given edge. **********************************************************************/ -CRACKEDGE *v_edge(int sign, // sign of edge - CRACKEDGE* join, - CrackPos* pos) { - CRACKEDGE *newpt; // return value +CRACKEDGE* v_edge(int sign, // sign of edge + CRACKEDGE* join, CrackPos* pos) { + CRACKEDGE* newpt; // return value if (*pos->free_cracks != nullptr) { newpt = *pos->free_cracks; @@ -288,40 +280,39 @@ CRACKEDGE *v_edge(int sign, // sign of edge } else { newpt = new CRACKEDGE; } - newpt->pos.set_x(pos->x); // coords of pt - newpt->stepx = 0; // edge is vertical + newpt->pos.set_x(pos->x); // coords of pt + newpt->stepx = 0; // edge is vertical if (sign > 0) { - newpt->pos.set_y(pos->y); // start location + newpt->pos.set_y(pos->y); // start location newpt->stepy = 1; newpt->stepdir = 3; } else { - newpt->pos.set_y(pos->y + 1); // start location + newpt->pos.set_y(pos->y + 1); // start location newpt->stepy = -1; newpt->stepdir = 1; } if (join == nullptr) { - newpt->next = newpt; //ptrs to other ends + newpt->next = newpt; // ptrs to other ends newpt->prev = newpt; } else { - if (newpt->pos.x() == join->pos.x() - && newpt->pos.y() + newpt->stepy == join->pos.y()) { + if (newpt->pos.x() == join->pos.x() && + newpt->pos.y() + newpt->stepy == join->pos.y()) { newpt->prev = join->prev; // update other ends newpt->prev->next = newpt; - newpt->next = join; // join up + newpt->next = join; // join up join->prev = newpt; } else { newpt->next = join->next; // update other ends newpt->next->prev = newpt; - newpt->prev = join; // join up + newpt->prev = join; // join up join->next = newpt; } } return newpt; } - /********************************************************************** * join_edges * @@ -329,45 +320,43 @@ CRACKEDGE *v_edge(int sign, // sign of edge * closed loop is formed. **********************************************************************/ -void join_edges(CRACKEDGE *edge1, // edges to join - CRACKEDGE *edge2, // no specific order - CRACKEDGE **free_cracks, - C_OUTLINE_IT* outline_it) { - if (edge1->pos.x() + edge1->stepx != edge2->pos.x() - || edge1->pos.y() + edge1->stepy != edge2->pos.y()) { - CRACKEDGE *tempedge = edge1; +void join_edges(CRACKEDGE* edge1, // edges to join + CRACKEDGE* edge2, // no specific order + CRACKEDGE** free_cracks, C_OUTLINE_IT* outline_it) { + if (edge1->pos.x() + edge1->stepx != edge2->pos.x() || + edge1->pos.y() + edge1->stepy != edge2->pos.y()) { + CRACKEDGE* tempedge = edge1; edge1 = edge2; // swap around edge2 = tempedge; } if (edge1->next == edge2) { - // already closed + // already closed complete_edge(edge1, outline_it); - // attach freelist to end + // attach freelist to end edge1->prev->next = *free_cracks; - *free_cracks = edge1; // and free list + *free_cracks = edge1; // and free list } else { - // update opposite ends + // update opposite ends edge2->prev->next = edge1->next; edge1->next->prev = edge2->prev; - edge1->next = edge2; // make joins + edge1->next = edge2; // make joins edge2->prev = edge1; } } - /********************************************************************** * free_crackedges * * Really free the CRACKEDGEs by giving them back to delete. **********************************************************************/ -void free_crackedges(CRACKEDGE *start) { - CRACKEDGE *current; // current edge to free - CRACKEDGE *next; // next one to free +void free_crackedges(CRACKEDGE* start) { + CRACKEDGE* current; // current edge to free + CRACKEDGE* next; // next one to free for (current = start; current != nullptr; current = next) { next = current->next; - delete current; // delete them all + delete current; // delete them all } } diff --git a/src/textord/scanedg.h b/src/textord/scanedg.h index 678305c729..12dfabb9b6 100644 --- a/src/textord/scanedg.h +++ b/src/textord/scanedg.h @@ -17,52 +17,50 @@ * **********************************************************************/ -#ifndef SCANEDG_H -#define SCANEDG_H +#ifndef SCANEDG_H +#define SCANEDG_H -#include "params.h" -#include "scrollview.h" -#include "pdblock.h" -#include "crakedge.h" +#include "crakedge.h" +#include "params.h" +#include "pdblock.h" +#include "scrollview.h" class C_OUTLINE_IT; struct CrackPos { - CRACKEDGE** free_cracks; // Freelist for fast allocation. - int x; // Position of new edge. + CRACKEDGE** free_cracks; // Freelist for fast allocation. + int x; // Position of new edge. int y; }; struct Pix; -void block_edges(Pix *t_image, // thresholded image - PDBLK *block, // block in image +void block_edges(Pix* t_image, // thresholded image + PDBLK* block, // block in image C_OUTLINE_IT* outline_it); -void make_margins(PDBLK *block, // block in image - BLOCK_LINE_IT *line_it, // for old style - uint8_t *pixels, // pixels to strip - uint8_t margin, // white-out pixel - int16_t left, // block edges +void make_margins(PDBLK* block, // block in image + BLOCK_LINE_IT* line_it, // for old style + uint8_t* pixels, // pixels to strip + uint8_t margin, // white-out pixel + int16_t left, // block edges int16_t right, - int16_t y); // line coord ); -void line_edges(int16_t x, // coord of line start - int16_t y, // coord of line - int16_t xext, // width of line - uint8_t uppercolour, // start of prev line - uint8_t * bwpos, // thresholded line - CRACKEDGE ** prevline, // edges in progress - CRACKEDGE **free_cracks, - C_OUTLINE_IT* outline_it); -CRACKEDGE *h_edge(int sign, // sign of edge - CRACKEDGE * join, // edge to join to + int16_t y); // line coord ); +void line_edges(int16_t x, // coord of line start + int16_t y, // coord of line + int16_t xext, // width of line + uint8_t uppercolour, // start of prev line + uint8_t* bwpos, // thresholded line + CRACKEDGE** prevline, // edges in progress + CRACKEDGE** free_cracks, C_OUTLINE_IT* outline_it); +CRACKEDGE* h_edge(int sign, // sign of edge + CRACKEDGE* join, // edge to join to CrackPos* pos); -CRACKEDGE *v_edge(int sign, // sign of edge - CRACKEDGE * join, // edge to join to +CRACKEDGE* v_edge(int sign, // sign of edge + CRACKEDGE* join, // edge to join to CrackPos* pos); -void join_edges(CRACKEDGE *edge1, // edges to join - CRACKEDGE *edge2, // no specific order - CRACKEDGE **free_cracks, - C_OUTLINE_IT* outline_it); -void free_crackedges(CRACKEDGE *start); +void join_edges(CRACKEDGE* edge1, // edges to join + CRACKEDGE* edge2, // no specific order + CRACKEDGE** free_cracks, C_OUTLINE_IT* outline_it); +void free_crackedges(CRACKEDGE* start); #endif diff --git a/src/textord/sortflts.cpp b/src/textord/sortflts.cpp index 7a8c5fe8cf..a9b7a3f48a 100644 --- a/src/textord/sortflts.cpp +++ b/src/textord/sortflts.cpp @@ -17,62 +17,57 @@ * **********************************************************************/ -#include "sortflts.h" +#include "sortflts.h" -ELISTIZE (SORTED_FLOAT) +ELISTIZE(SORTED_FLOAT) /** * @name SORTED_FLOATS::add * * Add a new entry to the sorted lsit of floats. */ -void SORTED_FLOATS::add( //add new entry - float value, - int32_t key) { - SORTED_FLOAT *new_float = new SORTED_FLOAT (value, key); +void SORTED_FLOATS::add( // add new entry + float value, int32_t key) { + SORTED_FLOAT* new_float = new SORTED_FLOAT(value, key); - if (list.empty ()) - it.add_after_stay_put (new_float); + if (list.empty()) + it.add_after_stay_put(new_float); else { - it.move_to_first (); - while (!it.at_last () && it.data ()->entry < value) - it.forward (); - if (it.data ()->entry < value) - it.add_after_stay_put (new_float); + it.move_to_first(); + while (!it.at_last() && it.data()->entry < value) it.forward(); + if (it.data()->entry < value) + it.add_after_stay_put(new_float); else - it.add_before_stay_put (new_float); + it.add_before_stay_put(new_float); } } - /** * @name SORTED_FLOATS::remove * * Remove an entry from the sorted lsit of floats. */ -void SORTED_FLOATS::remove( //remove the entry - int32_t key) { - if (!list.empty ()) { - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - if (it.data ()->address == key) { - delete it.extract (); +void SORTED_FLOATS::remove( // remove the entry + int32_t key) { + if (!list.empty()) { + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + if (it.data()->address == key) { + delete it.extract(); return; } } } } - /** * @name SORTED_FLOATS::operator[] * * Return the floating point value of the given index into the list. */ -float -SORTED_FLOATS::operator[] ( //get an entry -int32_t index //to list +float SORTED_FLOATS::operator[]( // get an entry + int32_t index // to list ) { - it.move_to_first (); - return it.data_relative (index)->entry; + it.move_to_first(); + return it.data_relative(index)->entry; } diff --git a/src/textord/sortflts.h b/src/textord/sortflts.h index 40824b5777..92115fa672 100644 --- a/src/textord/sortflts.h +++ b/src/textord/sortflts.h @@ -17,56 +17,52 @@ * **********************************************************************/ -#ifndef SORTFLTS_H -#define SORTFLTS_H +#ifndef SORTFLTS_H +#define SORTFLTS_H -#include "elst.h" +#include "elst.h" -class SORTED_FLOAT:public ELIST_LINK -{ +class SORTED_FLOAT : public ELIST_LINK { friend class SORTED_FLOATS; - public: - SORTED_FLOAT() = default; - SORTED_FLOAT( //create one - float value, //value of entry - int32_t key) { //reference - entry = value; - address = key; - } - private: - float entry; //value of float - int32_t address; //key + public: + SORTED_FLOAT() = default; + SORTED_FLOAT( // create one + float value, // value of entry + int32_t key) { // reference + entry = value; + address = key; + } + + private: + float entry; // value of float + int32_t address; // key }; -ELISTIZEH (SORTED_FLOAT) -class SORTED_FLOATS -{ - public: - /** empty constructor */ - SORTED_FLOATS() { - it.set_to_list (&list); - } - /** - * add sample - * @param value sample float - * @param key retrieval key - */ - void add(float value, - int32_t key); - /** - * delete sample - * @param key key to delete - */ - void remove(int32_t key); - /** - * index to list - * @param index item to get - */ - float operator[] (int32_t index); +ELISTIZEH(SORTED_FLOAT) +class SORTED_FLOATS { + public: + /** empty constructor */ + SORTED_FLOATS() { it.set_to_list(&list); } + /** + * add sample + * @param value sample float + * @param key retrieval key + */ + void add(float value, int32_t key); + /** + * delete sample + * @param key key to delete + */ + void remove(int32_t key); + /** + * index to list + * @param index item to get + */ + float operator[](int32_t index); - private: - SORTED_FLOAT_LIST list; //list of floats - SORTED_FLOAT_IT it; //iterator built-in + private: + SORTED_FLOAT_LIST list; // list of floats + SORTED_FLOAT_IT it; // iterator built-in }; #endif diff --git a/src/textord/strokewidth.cpp b/src/textord/strokewidth.cpp index 0080d053e3..d2f482ffde 100644 --- a/src/textord/strokewidth.cpp +++ b/src/textord/strokewidth.cpp @@ -107,10 +107,14 @@ const double kNoiseOverlapGrowthFactor = 4.0; // image to qualify as noisy. const double kNoiseOverlapAreaFactor = 1.0 / 512; -StrokeWidth::StrokeWidth(int gridsize, - const ICOORD& bleft, const ICOORD& tright) - : BlobGrid(gridsize, bleft, tright), nontext_map_(nullptr), projection_(nullptr), - denorm_(nullptr), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) { +StrokeWidth::StrokeWidth(int gridsize, const ICOORD& bleft, + const ICOORD& tright) + : BlobGrid(gridsize, bleft, tright), + nontext_map_(nullptr), + projection_(nullptr), + denorm_(nullptr), + grid_box_(bleft, tright), + rerotation_(1.0f, 0.0f) { leaders_win_ = nullptr; widths_win_ = nullptr; initial_widths_win_ = nullptr; @@ -122,11 +126,10 @@ StrokeWidth::StrokeWidth(int gridsize, StrokeWidth::~StrokeWidth() { if (widths_win_ != nullptr) { - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED delete widths_win_->AwaitEvent(SVET_DESTROY); - #endif // GRAPHICS_DISABLED - if (textord_tabfind_only_strokewidths) - exit(0); +#endif // GRAPHICS_DISABLED + if (textord_tabfind_only_strokewidths) exit(0); delete widths_win_; } delete leaders_win_; @@ -160,7 +163,8 @@ void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, // Setup the grid with the remaining (non-noise) blobs. InsertBlobs(input_block); // Repair broken CJK characters if needed. - while (cjk_merge && FixBrokenCJK(input_block)); + while (cjk_merge && FixBrokenCJK(input_block)) + ; // Grade blobs by inspection of neighbours. FindTextlineFlowDirection(pageseg_mode, false); // Clear the grid ready for rotation or leader finding. @@ -199,7 +203,6 @@ static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs, } } - // Types all the blobs as vertical or horizontal text or unknown and // returns true if the majority are vertical. // If the blobs are rotated, it is necessary to call CorrectForRotation @@ -221,9 +224,8 @@ bool StrokeWidth::TestVerticalTextDirection(double find_vertical_text_ratio, &vertical_blobs, &horizontal_blobs, &nondescript_blobs); if (textord_debug_tabfind) tprintf("TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n", - horizontal_boxes, vertical_boxes, - horizontal_blobs.length(), vertical_blobs.length(), - nondescript_blobs.length()); + horizontal_boxes, vertical_boxes, horizontal_blobs.length(), + vertical_blobs.length(), nondescript_blobs.length()); if (osd_blobs != nullptr && vertical_boxes == 0 && horizontal_boxes == 0) { // Only nondescript blobs available, so return those. BLOBNBOX_C_IT osd_it(osd_blobs); @@ -286,15 +288,13 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) { gsearch.StartFullSearch(); while ((bbox = gsearch.NextFullSearch()) != nullptr) { TBOX box = bbox->bounding_box(); - if (box.height() < box.width() * kLineResidueAspectRatio) - continue; + if (box.height() < box.width() * kLineResidueAspectRatio) continue; // Set up a rectangle search around the blob to find the size of its // neighbours. int padding = box.height() * kLineResiduePadRatio; TBOX search_box = box; search_box.pad(padding, padding); - bool debug = AlignedBlob::WithinTestRegion(2, box.left(), - box.bottom()); + bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); // Find the largest object in the search box not equal to bbox. BlobGridSearch rsearch(this); int max_size = 0; @@ -312,15 +312,15 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) { box.print(); } if (max_size * kLineResidueSizeRatio < box.height()) { - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (leaders_win_ != nullptr) { // We are debugging, so display deleted in pink blobs in the same // window that we use to display leader detection. leaders_win_->Pen(ScrollView::PINK); - leaders_win_->Rectangle(box.left(), box.bottom(), - box.right(), box.top()); + leaders_win_->Rectangle(box.left(), box.bottom(), box.right(), + box.top()); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED ColPartition::MakeBigPartition(bbox, big_part_list); } } @@ -394,7 +394,7 @@ static void PrintBoxWidths(BLOBNBOX* neighbour) { tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), - 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); + 2.0 * neighbour->cblob()->area() / neighbour->cblob()->perimeter()); } /** Handles a click event in a display window. */ @@ -419,16 +419,15 @@ void StrokeWidth::HandleClick(int x, int y) { PrintBoxWidths(neighbour->neighbour(BND_BELOW)); int gaps[BND_COUNT]; neighbour->NeighbourGaps(gaps); - tprintf("Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n" - "Good= %d %d %d %d\n", - gaps[BND_LEFT], gaps[BND_RIGHT], - gaps[BND_ABOVE], gaps[BND_BELOW], - neighbour->horz_possible(), - neighbour->vert_possible(), - neighbour->good_stroke_neighbour(BND_LEFT), - neighbour->good_stroke_neighbour(BND_RIGHT), - neighbour->good_stroke_neighbour(BND_ABOVE), - neighbour->good_stroke_neighbour(BND_BELOW)); + tprintf( + "Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n" + "Good= %d %d %d %d\n", + gaps[BND_LEFT], gaps[BND_RIGHT], gaps[BND_ABOVE], gaps[BND_BELOW], + neighbour->horz_possible(), neighbour->vert_possible(), + neighbour->good_stroke_neighbour(BND_LEFT), + neighbour->good_stroke_neighbour(BND_RIGHT), + neighbour->good_stroke_neighbour(BND_ABOVE), + neighbour->good_stroke_neighbour(BND_BELOW)); break; } } @@ -467,8 +466,8 @@ void StrokeWidth::FindLeadersAndMarkNoise(TO_BLOCK* block, for (blob = bbox; blob != nullptr && blob->flow() == BTFT_NONE; blob = blob->neighbour(BND_RIGHT)) part->AddBox(blob); - for (blob = bbox->neighbour(BND_LEFT); blob != nullptr && - blob->flow() == BTFT_NONE; + for (blob = bbox->neighbour(BND_LEFT); + blob != nullptr && blob->flow() == BTFT_NONE; blob = blob->neighbour(BND_LEFT)) part->AddBox(blob); if (part->MarkAsLeaderIfMonospaced()) @@ -487,8 +486,7 @@ void StrokeWidth::FindLeadersAndMarkNoise(TO_BLOCK* block, for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { BLOBNBOX* blob = small_it.data(); if (blob->flow() != BTFT_LEADER) { - if (blob->flow() == BTFT_NEIGHBOURS) - blob->set_flow(BTFT_NONE); + if (blob->flow() == BTFT_NEIGHBOURS) blob->set_flow(BTFT_NONE); blob->ClearNeighbours(); blob_it.add_to_end(small_it.extract()); } @@ -527,14 +525,13 @@ void StrokeWidth::MarkLeaderNeighbours(const ColPartition* part, // Search to the side of the leader for the nearest neighbour. BLOBNBOX* best_blob = nullptr; int best_gap = 0; - blobsearch.StartSideSearch(side == LR_LEFT ? part_box.left() - : part_box.right(), - part_box.bottom(), part_box.top()); + blobsearch.StartSideSearch( + side == LR_LEFT ? part_box.left() : part_box.right(), part_box.bottom(), + part_box.top()); BLOBNBOX* blob; while ((blob = blobsearch.NextSideSearch(side == LR_LEFT)) != nullptr) { const TBOX& blob_box = blob->bounding_box(); - if (!blob_box.y_overlap(part_box)) - continue; + if (!blob_box.y_overlap(part_box)) continue; int x_gap = blob_box.x_gap(part_box); if (x_gap > 2 * gridsize()) { break; @@ -548,14 +545,14 @@ void StrokeWidth::MarkLeaderNeighbours(const ColPartition* part, best_blob->set_leader_on_right(true); else best_blob->set_leader_on_left(true); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (leaders_win_ != nullptr) { leaders_win_->Pen(side == LR_LEFT ? ScrollView::RED : ScrollView::GREEN); const TBOX& blob_box = best_blob->bounding_box(); leaders_win_->Rectangle(blob_box.left(), blob_box.bottom(), blob_box.right(), blob_box.top()); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED } } @@ -591,16 +588,15 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) { if (blob->cblob() == nullptr || blob->cblob()->out_list()->empty()) continue; TBOX bbox = blob->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(), - bbox.bottom()); + bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(), bbox.bottom()); if (debug) { tprintf("Checking for Broken CJK (max size=%d):", max_size); bbox.print(); } // Generate a list of blobs that overlap or are near enough to merge. BLOBNBOX_CLIST overlapped_blobs; - AccumulateOverlaps(blob, debug, max_size, max_dist, - &bbox, &overlapped_blobs); + AccumulateOverlaps(blob, debug, max_size, max_dist, &bbox, + &overlapped_blobs); if (!overlapped_blobs.empty()) { // There are overlapping blobs, so qualify them as being satisfactory // before removing them from the grid and replacing them with the union. @@ -678,9 +674,9 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) { // The distance apart must not exceed max_dist, the combined size must // not exceed max_size, and the aspect ratio must either improve or at // least not get worse by much. -static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox, - bool debug, int max_size, int max_dist, - int* x_gap, int* y_gap) { +static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox, bool debug, + int max_size, int max_dist, int* x_gap, + int* y_gap) { *x_gap = bbox.x_gap(nbox); *y_gap = bbox.y_gap(nbox); TBOX merged(nbox); @@ -689,15 +685,14 @@ static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox, tprintf("gaps = %d, %d, merged_box:", *x_gap, *y_gap); merged.print(); } - if (*x_gap <= max_dist && *y_gap <= max_dist && - merged.width() <= max_size && merged.height() <= max_size) { + if (*x_gap <= max_dist && *y_gap <= max_dist && merged.width() <= max_size && + merged.height() <= max_size) { // Close enough to call overlapping. Check aspect ratios. double old_ratio = static_cast(bbox.width()) / bbox.height(); if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio; double new_ratio = static_cast(merged.width()) / merged.height(); if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio; - if (new_ratio <= old_ratio * kCJKAspectRatioIncrease) - return true; + if (new_ratio <= old_ratio * kCJKAspectRatioIncrease) return true; } return false; } @@ -707,8 +702,8 @@ static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox, // of all the boxes. not_this is excluded from the search, as are blobs // that cause the merged box to exceed max_size in either dimension. void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, - int max_size, int max_dist, - TBOX* bbox, BLOBNBOX_CLIST* blobs) { + int max_size, int max_dist, TBOX* bbox, + BLOBNBOX_CLIST* blobs) { // While searching, nearests holds the nearest failed blob in each // direction. When we have a nearest in each of the 4 directions, then // the search is over, and at this point the final bbox must not overlap @@ -727,8 +722,8 @@ void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, if (neighbour == not_this) continue; TBOX nbox = neighbour->bounding_box(); int x_gap, y_gap; - if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, - &x_gap, &y_gap)) { + if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, &x_gap, + &y_gap)) { // Close enough to call overlapping. Merge boxes. *bbox += nbox; blobs->add_sorted(SortByBoxLeft, true, neighbour); @@ -740,8 +735,8 @@ void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, for (int dir = 0; dir < BND_COUNT; ++dir) { if (nearests[dir] == nullptr) continue; nbox = nearests[dir]->bounding_box(); - if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, - max_dist, &x_gap, &y_gap)) { + if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, &x_gap, + &y_gap)) { // Close enough to call overlapping. Merge boxes. *bbox += nbox; blobs->add_sorted(SortByBoxLeft, true, nearests[dir]); @@ -769,8 +764,8 @@ void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, } } // If all nearests are non-null, then we have finished. - if (nearests[BND_LEFT] && nearests[BND_RIGHT] && - nearests[BND_ABOVE] && nearests[BND_BELOW]) + if (nearests[BND_LEFT] && nearests[BND_RIGHT] && nearests[BND_ABOVE] && + nearests[BND_BELOW]) break; } // Final overlap with a nearest is not allowed. @@ -783,8 +778,7 @@ void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, } if (bbox->overlap(nbox)) { blobs->shallow_clear(); - if (debug) - tprintf("Final box overlaps nearest\n"); + if (debug) tprintf("Final box overlaps nearest\n"); return; } } @@ -824,7 +818,7 @@ void StrokeWidth::FindTextlineFlowDirection(PageSegMode pageseg_mode, SetNeighbourFlows(bbox); } } - if ((textord_tabfind_show_strokewidths && display_if_debugging) || + if ((textord_tabfind_show_strokewidths && display_if_debugging) || textord_tabfind_show_strokewidths > 1) { initial_widths_win_ = DisplayGoodBlobs("InitialStrokewidths", 400, 0); } @@ -843,7 +837,7 @@ void StrokeWidth::FindTextlineFlowDirection(PageSegMode pageseg_mode, while ((bbox = gsearch.NextFullSearch()) != nullptr) { SmoothNeighbourTypes(pageseg_mode, true, bbox); } - if ((textord_tabfind_show_strokewidths && display_if_debugging) || + if ((textord_tabfind_show_strokewidths && display_if_debugging) || textord_tabfind_show_strokewidths > 1) { widths_win_ = DisplayGoodBlobs("ImprovedStrokewidths", 800, 0); } @@ -868,7 +862,6 @@ void StrokeWidth::SetNeighbours(bool leaders, bool activate_line_trap, } } - // Sets the good_stroke_neighbours member of the blob if it has a // GoodNeighbour on the given side. // Also sets the neighbour in the blob, whether or not a good one is found. @@ -879,8 +872,8 @@ int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob) { // Search for neighbours that overlap vertically. TBOX blob_box = blob->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, blob_box.left(), - blob_box.bottom()); + bool debug = + AlignedBlob::WithinTestRegion(2, blob_box.left(), blob_box.bottom()); if (debug) { tprintf("FGN in dir %d for blob:", dir); blob_box.print(); @@ -900,34 +893,32 @@ int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, int line_trap_min = std::min(width, height) * kLineTrapShortest; int line_trap_count = 0; - int min_good_overlap = (dir == BND_LEFT || dir == BND_RIGHT) - ? height / 2 : width / 2; - int min_decent_overlap = (dir == BND_LEFT || dir == BND_RIGHT) - ? height / 3 : width / 3; - if (leaders) - min_good_overlap = min_decent_overlap = 1; - - int search_pad = static_cast( - sqrt(static_cast(width * height)) * kNeighbourSearchFactor); - if (gridsize() > search_pad) - search_pad = gridsize(); + int min_good_overlap = + (dir == BND_LEFT || dir == BND_RIGHT) ? height / 2 : width / 2; + int min_decent_overlap = + (dir == BND_LEFT || dir == BND_RIGHT) ? height / 3 : width / 3; + if (leaders) min_good_overlap = min_decent_overlap = 1; + + int search_pad = static_cast(sqrt(static_cast(width * height)) * + kNeighbourSearchFactor); + if (gridsize() > search_pad) search_pad = gridsize(); TBOX search_box = blob_box; // Pad the search in the appropriate direction. switch (dir) { - case BND_LEFT: - search_box.set_left(search_box.left() - search_pad); - break; - case BND_RIGHT: - search_box.set_right(search_box.right() + search_pad); - break; - case BND_BELOW: - search_box.set_bottom(search_box.bottom() - search_pad); - break; - case BND_ABOVE: - search_box.set_top(search_box.top() + search_pad); - break; - case BND_COUNT: - return 0; + case BND_LEFT: + search_box.set_left(search_box.left() - search_pad); + break; + case BND_RIGHT: + search_box.set_right(search_box.right() + search_pad); + break; + case BND_BELOW: + search_box.set_bottom(search_box.bottom() - search_pad); + break; + case BND_ABOVE: + search_box.set_top(search_box.top() + search_pad); + break; + case BND_COUNT: + return 0; } BlobGridSearch rectsearch(this); @@ -938,8 +929,7 @@ int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* neighbour; while ((neighbour = rectsearch.NextRectSearch()) != nullptr) { TBOX nbox = neighbour->bounding_box(); - if (neighbour == blob) - continue; + if (neighbour == blob) continue; int mid_x = (nbox.left() + nbox.right()) / 2; if (mid_x < blob->left_rule() || mid_x > blob->right_rule()) continue; // In a different column. @@ -953,17 +943,17 @@ int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, int n_width = nbox.width(); int n_height = nbox.height(); if (std::min(n_width, n_height) > line_trap_min && - std::max(n_width, n_height) < line_trap_max) + std::max(n_width, n_height) < line_trap_max) ++line_trap_count; // Heavily joined text, such as Arabic may have very different sizes when // looking at the maxes, but the heights may be almost identical, so check // for a difference in height if looking sideways or width vertically. if (TabFind::VeryDifferentSizes(std::max(n_width, n_height), std::max(width, height)) && - (((dir == BND_LEFT || dir ==BND_RIGHT) && - TabFind::DifferentSizes(n_height, height)) || - ((dir == BND_BELOW || dir ==BND_ABOVE) && - TabFind::DifferentSizes(n_width, width)))) { + (((dir == BND_LEFT || dir == BND_RIGHT) && + TabFind::DifferentSizes(n_height, height)) || + ((dir == BND_BELOW || dir == BND_ABOVE) && + TabFind::DifferentSizes(n_width, width)))) { if (debug) tprintf("Bad size\n"); continue; // Could be a different font size or non-text. } @@ -976,7 +966,8 @@ int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, int perp_overlap; int gap; if (dir == BND_LEFT || dir == BND_RIGHT) { - overlap = std::min(static_cast(nbox.top()), top) - std::max(static_cast(nbox.bottom()), bottom); + overlap = std::min(static_cast(nbox.top()), top) - + std::max(static_cast(nbox.bottom()), bottom); if (overlap == nbox.height() && nbox.width() > nbox.height()) perp_overlap = nbox.width(); else @@ -988,7 +979,8 @@ int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, } gap -= n_width; } else { - overlap = std::min(static_cast(nbox.right()), right) - std::max(static_cast(nbox.left()), left); + overlap = std::min(static_cast(nbox.right()), right) - + std::max(static_cast(nbox.left()), left); if (overlap == nbox.width() && nbox.height() > nbox.width()) perp_overlap = nbox.height(); else @@ -1010,10 +1002,10 @@ int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, } bool bad_sizes = TabFind::DifferentSizes(height, n_height) && TabFind::DifferentSizes(width, n_width); - bool is_good = overlap >= min_good_overlap && !bad_sizes && - blob->MatchingStrokeWidth(*neighbour, - kStrokeWidthFractionTolerance, - kStrokeWidthTolerance); + bool is_good = + overlap >= min_good_overlap && !bad_sizes && + blob->MatchingStrokeWidth(*neighbour, kStrokeWidthFractionTolerance, + kStrokeWidthTolerance); // Best is a fuzzy combination of gap, overlap and is good. // Basically if you make one thing twice as good without making // anything else twice as bad, then it is better. @@ -1034,8 +1026,7 @@ int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, } // Helper to get a list of 1st-order neighbours. -static void ListNeighbours(const BLOBNBOX* blob, - BLOBNBOX_CLIST* neighbours) { +static void ListNeighbours(const BLOBNBOX* blob, BLOBNBOX_CLIST* neighbours) { for (int dir = 0; dir < BND_COUNT; ++dir) { BlobNeighbourDir bnd = static_cast(dir); BLOBNBOX* neighbour = blob->neighbour(bnd); @@ -1075,8 +1066,7 @@ static void List3rdNeighbours(const BLOBNBOX* blob, // in a list of neighbours. static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours, int* pure_h_count, int* pure_v_count) { - if (neighbours->length() <= kMostlyOneDirRatio) - return; + if (neighbours->length() <= kMostlyOneDirRatio) return; BLOBNBOX_C_IT it(neighbours); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); @@ -1084,8 +1074,7 @@ static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours, blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max); if (debug) tprintf("Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max); - if (h_max < v_min || - blob->leader_on_left() || blob->leader_on_right()) { + if (h_max < v_min || blob->leader_on_left() || blob->leader_on_right()) { // Horizontal gaps are clear winners. Count a pure horizontal. ++*pure_h_count; if (debug) tprintf("Horz at:"); @@ -1096,8 +1085,7 @@ static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours, } else { if (debug) tprintf("Neither at:"); } - if (debug) - blob->bounding_box().print(); + if (debug) blob->bounding_box().print(); } } @@ -1105,13 +1093,12 @@ static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours, // is clear based on gaps of 2nd order neighbours, or definite individual // blobs. void StrokeWidth::SetNeighbourFlows(BLOBNBOX* blob) { - if (blob->DefiniteIndividualFlow()) - return; + if (blob->DefiniteIndividualFlow()) return; bool debug = AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), blob->bounding_box().bottom()); if (debug) { - tprintf("SetNeighbourFlows (current flow=%d, type=%d) on:", - blob->flow(), blob->region_type()); + tprintf("SetNeighbourFlows (current flow=%d, type=%d) on:", blob->flow(), + blob->region_type()); blob->bounding_box().print(); } BLOBNBOX_CLIST neighbours; @@ -1123,8 +1110,7 @@ void StrokeWidth::SetNeighbourFlows(BLOBNBOX* blob) { if (debug) { HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1); - tprintf("SetFlows: h_count=%d, v_count=%d\n", - pure_h_count, pure_v_count); + tprintf("SetFlows: h_count=%d, v_count=%d\n", pure_h_count, pure_v_count); } if (!neighbours.empty()) { blob->set_vert_possible(true); @@ -1143,17 +1129,14 @@ void StrokeWidth::SetNeighbourFlows(BLOBNBOX* blob) { } } - // Helper to count the number of horizontal and vertical blobs in a list. -static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours, - int* pure_h_count, int* pure_v_count) { +static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours, int* pure_h_count, + int* pure_v_count) { BLOBNBOX_C_IT it(neighbours); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); - if (blob->UniquelyHorizontal()) - ++*pure_h_count; - if (blob->UniquelyVertical()) - ++*pure_v_count; + if (blob->UniquelyHorizontal()) ++*pure_h_count; + if (blob->UniquelyVertical()) ++*pure_v_count; } } @@ -1213,8 +1196,7 @@ void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all, blob->bounding_box().bottom())) { HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1); - tprintf("pure_h=%d, pure_v=%d\n", - pure_h_count, pure_v_count); + tprintf("pure_h=%d, pure_v=%d\n", pure_h_count, pure_v_count); } if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) { // Horizontal gaps are clear winners. Clear vertical neighbours. @@ -1227,7 +1209,7 @@ void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all, blob->set_vert_possible(true); } } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), - blob->bounding_box().bottom())) { + blob->bounding_box().bottom())) { HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1); tprintf("Clean on pass 3!\n"); @@ -1267,9 +1249,11 @@ PartitionFindResult StrokeWidth::FindInitialPartitions( RemoveLargeUnusedBlobs(block, part_grid, big_parts); TBOX grid_box(bleft(), tright()); while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box, - rerotation)); + rerotation)) + ; while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_, - grid_box, rerotation)); + grid_box, rerotation)) + ; int pre_overlap = part_grid->ComputeTotalOverlap(nullptr); TestDiacritics(part_grid, block); MergeDiacritics(block, part_grid); @@ -1287,12 +1271,15 @@ PartitionFindResult StrokeWidth::FindInitialPartitions( part_grid->SplitOverlappingPartitions(big_parts); EasyMerges(part_grid); while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box, - rerotation)); + rerotation)) + ; while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_, - grid_box, rerotation)); + grid_box, rerotation)) + ; // Now eliminate strong stuff in a sea of the opposite. while (part_grid->GridSmoothNeighbours(BTFT_STRONG_CHAIN, nontext_map_, - grid_box, rerotation)); + grid_box, rerotation)) + ; if (textord_tabfind_show_strokewidths) { smoothed_win_ = MakeWindow(800, 400, "Smoothed blobs"); part_grid->DisplayBoxes(smoothed_win_); @@ -1360,8 +1347,7 @@ static BLOBNBOX* MutualUnusedVNeighbour(const BLOBNBOX* blob, if (next_blob == nullptr || next_blob->owner() != nullptr || next_blob->UniquelyHorizontal()) return nullptr; - if (next_blob->neighbour(DirOtherWay(dir)) == blob) - return next_blob; + if (next_blob->neighbour(DirOtherWay(dir)) == blob) return next_blob; return nullptr; } @@ -1406,8 +1392,7 @@ static BLOBNBOX* MutualUnusedHNeighbour(const BLOBNBOX* blob, if (next_blob == nullptr || next_blob->owner() != nullptr || next_blob->UniquelyVertical()) return nullptr; - if (next_blob->neighbour(DirOtherWay(dir)) == blob) - return next_blob; + if (next_blob->neighbour(DirOtherWay(dir)) == blob) return next_blob; return nullptr; } @@ -1474,16 +1459,17 @@ void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block) { RemoveBBox(blob); small_it.add_to_end(blob_it.extract()); } else if (part != nullptr && !part->block_owned() && - part->boxes_count() < 3) { + part->boxes_count() < 3) { // We allow blobs in small partitions to become diacritics if ALL the // blobs in the partition qualify as we can then cleanly delete the // partition, turn all the blobs in it to diacritics and they can be // merged into the base character partition more easily than merging // the partitions. BLOBNBOX_C_IT box_it(part->boxes()); - for (box_it.mark_cycle_pt(); !box_it.cycled_list() && - DiacriticBlob(&small_grid, box_it.data()); - box_it.forward()); + for (box_it.mark_cycle_pt(); + !box_it.cycled_list() && DiacriticBlob(&small_grid, box_it.data()); + box_it.forward()) + ; if (box_it.cycled_list()) { // They are all good. while (!box_it.empty()) { @@ -1513,8 +1499,8 @@ void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block) { } } if (textord_tabfind_show_strokewidths) { - tprintf("Found %d small diacritics, %d medium\n", - small_diacritics, medium_diacritics); + tprintf("Found %d small diacritics, %d medium\n", small_diacritics, + medium_diacritics); } } @@ -1529,8 +1515,8 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) { blob->region_type() == BRT_VERT_TEXT) return false; TBOX small_box(blob->bounding_box()); - bool debug = AlignedBlob::WithinTestRegion(2, small_box.left(), - small_box.bottom()); + bool debug = + AlignedBlob::WithinTestRegion(2, small_box.left(), small_box.bottom()); if (debug) { tprintf("Testing blob for diacriticness at:"); small_box.print(); @@ -1574,7 +1560,7 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) { TBOX nbox = neighbour->bounding_box(); if (neighbour->owner() == nullptr || neighbour->owner()->IsVerticalType() || (neighbour->flow() != BTFT_CHAIN && - neighbour->flow() != BTFT_STRONG_CHAIN)) { + neighbour->flow() != BTFT_STRONG_CHAIN)) { if (debug) { tprintf("Neighbour not strong enough:"); nbox.print(); @@ -1590,11 +1576,10 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) { } int x_gap = small_box.x_gap(nbox); int y_gap = small_box.y_gap(nbox); - int total_distance = projection_->DistanceOfBoxFromBox(small_box, nbox, - true, denorm_, - debug); - if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n", - x_gap, y_gap, total_distance); + int total_distance = projection_->DistanceOfBoxFromBox( + small_box, nbox, true, denorm_, debug); + if (debug) + tprintf("xgap=%d, y=%d, total dist=%d\n", x_gap, y_gap, total_distance); if (total_distance > neighbour->owner()->median_size() * kMaxDiacriticDistanceRatio) { if (debug) { @@ -1694,12 +1679,11 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) { // | Base | // |---------| x<-----Dot occupying gap // The grid is const really. -bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid, - const TBOX& diacritic_box, +bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box, const TBOX& base_box) { // Since most gaps are small, use an iterative algorithm to search the gap. - int max_gap = IntCastRounded(base_box.height() * - kMaxDiacriticGapToBaseCharHeight); + int max_gap = + IntCastRounded(base_box.height() * kMaxDiacriticGapToBaseCharHeight); TBOX occupied_box(base_box); int diacritic_gap; while ((diacritic_gap = diacritic_box.x_gap(occupied_box)) > max_gap) { @@ -1726,8 +1710,7 @@ bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid, break; } } - if (neighbour == nullptr) - return false; // Found a big gap. + if (neighbour == nullptr) return false; // Found a big gap. } return true; // The gap was filled. } @@ -1803,8 +1786,7 @@ void StrokeWidth::PartitionRemainingBlobs(PageSegMode pageseg_mode, } if (bbox->owner() == nullptr) { cell_it.add_to_end(bbox); - if (bbox->flow() != BTFT_NONTEXT) - cell_all_noise = false; + if (bbox->flow() != BTFT_NONTEXT) cell_all_noise = false; } else { cell_all_noise = false; } @@ -1819,8 +1801,7 @@ void StrokeWidth::MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine, ColPartitionGrid* part_grid, BLOBNBOX_CLIST* cell_list) { - if (cell_list->empty()) - return; + if (cell_list->empty()) return; BLOBNBOX_C_IT cell_it(cell_list); if (combine) { BLOBNBOX* bbox = cell_it.extract(); @@ -1849,8 +1830,7 @@ void StrokeWidth::CompletePartition(PageSegMode pageseg_mode, ColPartitionGrid* part_grid) { part->ComputeLimits(); TBOX box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, box.left(), - box.bottom()); + bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); int value = projection_->EvaluateColPartition(*part, denorm_, debug); // Override value if pageseg_mode disagrees. if (value > 0 && FindingVerticalOnly(pageseg_mode)) { @@ -1894,23 +1874,19 @@ bool StrokeWidth::ConfirmEasyMerge(const ColPartition* p1, (p1->flow() >= BTFT_CHAIN && p2->flow() == BTFT_NONTEXT)) return false; // Don't merge confirmed image with text. if ((p1->IsVerticalType() || p2->IsVerticalType()) && - p1->HCoreOverlap(*p2) <= 0 && - ((!p1->IsSingleton() && - !p2->IsSingleton()) || - !p1->bounding_box().major_overlap(p2->bounding_box()))) + p1->HCoreOverlap(*p2) <= 0 && + ((!p1->IsSingleton() && !p2->IsSingleton()) || + !p1->bounding_box().major_overlap(p2->bounding_box()))) return false; // Overlap must be in the text line. if ((p1->IsHorizontalType() || p2->IsHorizontalType()) && p1->VCoreOverlap(*p2) <= 0 && - ((!p1->IsSingleton() && - !p2->IsSingleton()) || + ((!p1->IsSingleton() && !p2->IsSingleton()) || (!p1->bounding_box().major_overlap(p2->bounding_box()) && !p1->OKDiacriticMerge(*p2, false) && !p2->OKDiacriticMerge(*p1, false)))) return false; // Overlap must be in the text line. - if (!p1->ConfirmNoTabViolation(*p2)) - return false; - if (p1->flow() <= BTFT_NONTEXT && p2->flow() <= BTFT_NONTEXT) - return true; + if (!p1->ConfirmNoTabViolation(*p2)) return false; + if (p1->flow() <= BTFT_NONTEXT && p2->flow() <= BTFT_NONTEXT) return true; return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box()); } @@ -1923,8 +1899,8 @@ bool StrokeWidth::NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const { /** Displays the blobs colored according to the number of good neighbours * and the vertical/horizontal flow. */ -ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, - int x, int y) { +ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, int x, + int y) { ScrollView* window = nullptr; #ifndef GRAPHICS_DISABLED window = MakeWindow(x, y, window_name); @@ -1943,10 +1919,8 @@ ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, int bottom_y = box.bottom(); int goodness = bbox->GoodTextBlob(); BlobRegionType blob_type = bbox->region_type(); - if (bbox->UniquelyVertical()) - blob_type = BRT_VERT_TEXT; - if (bbox->UniquelyHorizontal()) - blob_type = BRT_TEXT; + if (bbox->UniquelyVertical()) blob_type = BRT_VERT_TEXT; + if (bbox->UniquelyHorizontal()) blob_type = BRT_TEXT; BlobTextFlowType flow = bbox->flow(); if (flow == BTFT_NONE) { if (goodness == 0) @@ -1968,15 +1942,16 @@ static void DrawDiacriticJoiner(const BLOBNBOX* blob, ScrollView* window) { #ifndef GRAPHICS_DISABLED const TBOX& blob_box(blob->bounding_box()); int top = std::max(static_cast(blob_box.top()), blob->base_char_top()); - int bottom = std::min(static_cast(blob_box.bottom()), blob->base_char_bottom()); + int bottom = + std::min(static_cast(blob_box.bottom()), blob->base_char_bottom()); int x = (blob_box.left() + blob_box.right()) / 2; window->Line(x, top, x, bottom); #endif // GRAPHICS_DISABLED } // Displays blobs colored according to whether or not they are diacritics. -ScrollView* StrokeWidth::DisplayDiacritics(const char* window_name, - int x, int y, TO_BLOCK* block) { +ScrollView* StrokeWidth::DisplayDiacritics(const char* window_name, int x, + int y, TO_BLOCK* block) { ScrollView* window = nullptr; #ifndef GRAPHICS_DISABLED window = MakeWindow(x, y, window_name); @@ -1993,7 +1968,7 @@ ScrollView* StrokeWidth::DisplayDiacritics(const char* window_name, window->Pen(blob->BoxColor()); } const TBOX& box = blob->bounding_box(); - window->Rectangle(box.left(), box. bottom(), box.right(), box.top()); + window->Rectangle(box.left(), box.bottom(), box.right(), box.top()); } it.set_to_list(&block->noise_blobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { @@ -2005,7 +1980,7 @@ ScrollView* StrokeWidth::DisplayDiacritics(const char* window_name, window->Pen(ScrollView::WHITE); } const TBOX& box = blob->bounding_box(); - window->Rectangle(box.left(), box. bottom(), box.right(), box.top()); + window->Rectangle(box.left(), box.bottom(), box.right(), box.top()); } window->Update(); #endif diff --git a/src/textord/strokewidth.h b/src/textord/strokewidth.h index 497ffaebab..3b83ec2ff3 100644 --- a/src/textord/strokewidth.h +++ b/src/textord/strokewidth.h @@ -20,8 +20,8 @@ #ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_ #define TESSERACT_TEXTORD_STROKEWIDTH_H_ -#include "blobbox.h" // BlobNeighourDir. -#include "blobgrid.h" // Base class. +#include "blobbox.h" // BlobNeighourDir. +#include "blobgrid.h" // Base class. #include "colpartitiongrid.h" #include "textlineprojection.h" @@ -36,10 +36,7 @@ class TabFind; class TextlineProjection; // Misc enums to clarify bool arguments for direction-controlling args. -enum LeftOrRight { - LR_LEFT, - LR_RIGHT -}; +enum LeftOrRight { LR_LEFT, LR_RIGHT }; // Return value from FindInitialPartitions indicates detection of severe // skew or noise. @@ -89,16 +86,14 @@ class StrokeWidth : public BlobGrid { // direction are returned for use in orientation and script detection. // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. bool TestVerticalTextDirection(double find_vertical_text_ratio, - TO_BLOCK* block, - BLOBNBOX_CLIST* osd_blobs); + TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs); // Corrects the data structures for the given rotation. void CorrectForRotation(const FCOORD& rerotation, ColPartitionGrid* part_grid); // Finds leader partitions and inserts them into the give grid. - void FindLeaderPartitions(TO_BLOCK* block, - ColPartitionGrid* part_grid); + void FindLeaderPartitions(TO_BLOCK* block, ColPartitionGrid* part_grid); // Finds and marks noise those blobs that look like bits of vertical lines // that would otherwise screw up layout analysis. @@ -162,9 +157,8 @@ class StrokeWidth : public BlobGrid { // Return them in the list of blobs and expand the bbox to be the union // of all the boxes. not_this is excluded from the search, as are blobs // that cause the merged box to exceed max_size in either dimension. - void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, - int max_size, int max_dist, - TBOX* bbox, BLOBNBOX_CLIST* blobs); + void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, int max_size, + int max_dist, TBOX* bbox, BLOBNBOX_CLIST* blobs); // For each blob in this grid, Finds the textline direction to be horizontal // or vertical according to distance to neighbours and 1st and 2nd order @@ -269,11 +263,10 @@ class StrokeWidth : public BlobGrid { // Any blobs on the large_blobs list of block that are still unowned by a // ColPartition, are probably drop-cap or vertically touching so the blobs // are removed to the big_parts list and treated separately. - void RemoveLargeUnusedBlobs(TO_BLOCK* block, - ColPartitionGrid* part_grid, + void RemoveLargeUnusedBlobs(TO_BLOCK* block, ColPartitionGrid* part_grid, ColPartition_LIST* big_parts); - // All remaining unused blobs are put in individual ColPartitions. + // All remaining unused blobs are put in individual ColPartitions. void PartitionRemainingBlobs(PageSegMode pageseg_mode, ColPartitionGrid* part_grid); @@ -326,8 +319,8 @@ class StrokeWidth : public BlobGrid { ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y); // Displays blobs colored according to whether or not they are diacritics. - ScrollView* DisplayDiacritics(const char* window_name, - int x, int y, TO_BLOCK* block); + ScrollView* DisplayDiacritics(const char* window_name, int x, int y, + TO_BLOCK* block); private: // Image map of photo/noise areas on the page. Borrowed pointer (not owned.) diff --git a/src/textord/tabfind.cpp b/src/textord/tabfind.cpp index fb0cac8e9c..cb2201c111 100644 --- a/src/textord/tabfind.cpp +++ b/src/textord/tabfind.cpp @@ -21,13 +21,13 @@ #include "config_auto.h" #endif -#include "tabfind.h" #include "alignedblob.h" #include "blobbox.h" #include "colpartitiongrid.h" #include "detlinefit.h" #include "linefind.h" #include "ndminx.h" +#include "tabfind.h" #include @@ -67,19 +67,17 @@ BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors"); TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, TabVector_LIST* vlines, int vertical_x, int vertical_y, int resolution) - : AlignedBlob(gridsize, bleft, tright), - resolution_(resolution), - image_origin_(0, tright.y() - 1), - v_it_(&vectors_) { + : AlignedBlob(gridsize, bleft, tright), + resolution_(resolution), + image_origin_(0, tright.y() - 1), + v_it_(&vectors_) { width_cb_ = nullptr; v_it_.add_list_after(vlines); SetVerticalSkewAndParellelize(vertical_x, vertical_y); width_cb_ = NewPermanentTessCallback(this, &TabFind::CommonWidth); } -TabFind::~TabFind() { - delete width_cb_; -} +TabFind::~TabFind() { delete width_cb_; } ///////////////// PUBLIC functions (mostly used by TabVector). ////////////// @@ -90,16 +88,15 @@ TabFind::~TabFind() { // around allows grid to not be derived from TabFind, eg a ColPartitionGrid, // while the grid that provides the tab stops(this) has to be derived from // TabFind. -void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread, - BLOBNBOX_LIST* blobs, - BBGrid* grid) { +void TabFind::InsertBlobsToGrid( + bool h_spread, bool v_spread, BLOBNBOX_LIST* blobs, + BBGrid* grid) { BLOBNBOX_IT blob_it(blobs); int b_count = 0; int reject_count = 0; for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); -// if (InsertBlob(true, true, blob, grid)) { + // if (InsertBlob(true, true, blob, grid)) { if (InsertBlob(h_spread, v_spread, blob, grid)) { ++b_count; } else { @@ -107,8 +104,8 @@ void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread, } } if (textord_debug_tabfind) { - tprintf("Inserted %d blobs into grid, %d rejected.\n", - b_count, reject_count); + tprintf("Inserted %d blobs into grid, %d rejected.\n", b_count, + reject_count); } } @@ -117,16 +114,15 @@ void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread, // used, otherwise, just the bottom-left. Similarly for v_spread. // A side effect is that the left and right rule edges of the blob are // set according to the tab vectors in this (not grid). -bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob, - BBGrid* grid) { +bool TabFind::InsertBlob( + bool h_spread, bool v_spread, BLOBNBOX* blob, + BBGrid* grid) { TBOX box = blob->bounding_box(); blob->set_left_rule(LeftEdgeForBox(box, false, false)); blob->set_right_rule(RightEdgeForBox(box, false, false)); blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false)); blob->set_right_crossing_rule(RightEdgeForBox(box, true, false)); - if (blob->joined_to_prev()) - return false; + if (blob->joined_to_prev()) return false; grid->InsertBBox(h_spread, v_spread, blob); return true; } @@ -166,7 +162,8 @@ int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v, bool right_to_left = v.IsLeftTab(); int bottom_x = v.XAtY(bottom_y); int top_x = v.XAtY(top_y); - int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x); + int start_x = + right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x); BlobGridSearch sidesearch(this); sidesearch.StartSideSearch(start_x, bottom_y, top_y); int min_gap = max_gutter_width; @@ -199,8 +196,7 @@ int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v, if (gap < 0 && box.right() - tab_x > *required_shift) *required_shift = box.right() - tab_x; } - if (gap > 0 && gap < min_gap) - min_gap = gap; + if (gap > 0 && gap < min_gap) min_gap = gap; } // Result may be negative, in which case, this is a really bad tabstop. return min_gap - abs(*required_shift); @@ -210,7 +206,7 @@ int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v, void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX* bbox, int* gutter_width, - int* neighbour_gap ) { + int* neighbour_gap) { const TBOX& box = bbox->bounding_box(); // The gutter and internal sides of the box. int gutter_x = left ? box.left() : box.right(); @@ -220,19 +216,17 @@ void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, *gutter_width = max_gutter; // If the box is away from the tabstop, we need to increase // the allowed gutter width. - if (tab_gap > 0) - *gutter_width += tab_gap; + if (tab_gap > 0) *gutter_width += tab_gap; bool debug = WithinTestRegion(2, box.left(), box.bottom()); - if (debug) - tprintf("Looking in gutter\n"); + if (debug) tprintf("Looking in gutter\n"); // Find the nearest blob on the outside of the column. - BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left, - bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, - *gutter_width, box.top(), box.bottom()); + BLOBNBOX* gutter_bbox = + AdjacentBlob(bbox, left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, + *gutter_width, box.top(), box.bottom()); if (gutter_bbox != nullptr) { const TBOX& gutter_box = gutter_bbox->bounding_box(); - *gutter_width = left ? tab_x - gutter_box.right() - : gutter_box.left() - tab_x; + *gutter_width = + left ? tab_x - gutter_box.right() : gutter_box.left() - tab_x; } if (*gutter_width >= max_gutter) { // If there is no box because a tab was in the way, get the tab coord. @@ -241,24 +235,20 @@ void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, gutter_box.set_left(tab_x - max_gutter - 1); gutter_box.set_right(tab_x - max_gutter); int tab_gutter = RightEdgeForBox(gutter_box, true, false); - if (tab_gutter < tab_x - 1) - *gutter_width = tab_x - tab_gutter; + if (tab_gutter < tab_x - 1) *gutter_width = tab_x - tab_gutter; } else { gutter_box.set_left(tab_x + max_gutter); gutter_box.set_right(tab_x + max_gutter + 1); int tab_gutter = LeftEdgeForBox(gutter_box, true, false); - if (tab_gutter > tab_x + 1) - *gutter_width = tab_gutter - tab_x; + if (tab_gutter > tab_x + 1) *gutter_width = tab_gutter - tab_x; } } - if (*gutter_width > max_gutter) - *gutter_width = max_gutter; + if (*gutter_width > max_gutter) *gutter_width = max_gutter; // Now look for a neighbour on the inside. - if (debug) - tprintf("Looking for neighbour\n"); - BLOBNBOX* neighbour = AdjacentBlob(bbox, !left, - bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, - *gutter_width, box.top(), box.bottom()); + if (debug) tprintf("Looking for neighbour\n"); + BLOBNBOX* neighbour = + AdjacentBlob(bbox, !left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, + *gutter_width, box.top(), box.bottom()); int neighbour_edge = left ? RightEdgeForBox(box, true, false) : LeftEdgeForBox(box, true, false); if (neighbour != nullptr) { @@ -272,8 +262,8 @@ void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, else if (!left && n_box.right() > neighbour_edge) neighbour_edge = n_box.right(); } - *neighbour_gap = left ? neighbour_edge - internal_x - : internal_x - neighbour_edge; + *neighbour_gap = + left ? neighbour_edge - internal_x : internal_x - neighbour_edge; } // Return the x-coord that corresponds to the right edge for the given @@ -305,8 +295,7 @@ int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) { // of x at y. TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing, bool extended) { - if (v_it_.empty()) - return nullptr; + if (v_it_.empty()) return nullptr; int top_y = box.top(); int bottom_y = box.bottom(); int mid_y = (top_y + bottom_y) / 2; @@ -325,9 +314,8 @@ TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing, do { TabVector* v = v_it_.data(); int x = v->XAtY(mid_y); - if (x >= right && - (v->VOverlap(top_y, bottom_y) > 0 || - (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) { + if (x >= right && (v->VOverlap(top_y, bottom_y) > 0 || + (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) { if (best_v == nullptr || x < best_x) { best_v = v; best_x = x; @@ -338,8 +326,7 @@ TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing, } // Break when the search is done to avoid wrapping the iterator and // thereby potentially slowing the next search. - if (v_it_.at_last() || - (best_v != nullptr && v->sort_key() > key_limit)) + if (v_it_.at_last() || (best_v != nullptr && v->sort_key() > key_limit)) break; // Prevent restarting list for next call. v_it_.forward(); } while (!v_it_.at_first()); @@ -349,8 +336,7 @@ TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing, // As RightTabForBox, but finds the left TabVector instead. TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing, bool extended) { - if (v_it_.empty()) - return nullptr; + if (v_it_.empty()) return nullptr; int top_y = box.top(); int bottom_y = box.bottom(); int mid_y = (top_y + bottom_y) / 2; @@ -370,9 +356,8 @@ TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing, do { TabVector* v = v_it_.data(); int x = v->XAtY(mid_y); - if (x <= left && - (v->VOverlap(top_y, bottom_y) > 0 || - (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) { + if (x <= left && (v->VOverlap(top_y, bottom_y) > 0 || + (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) { if (best_v == nullptr || x > best_x) { best_v = v; best_x = x; @@ -383,8 +368,7 @@ TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing, } // Break when the search is done to avoid wrapping the iterator and // thereby potentially slowing the next search. - if (v_it_.at_first() || - (best_v != nullptr && v->sort_key() < key_limit)) + if (v_it_.at_first() || (best_v != nullptr && v->sort_key() < key_limit)) break; // Prevent restarting list for next call. v_it_.backward(); } while (!v_it_.at_last()); @@ -398,8 +382,7 @@ bool TabFind::CommonWidth(int width) { ICOORDELT_IT it(&column_widths_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ICOORDELT* w = it.data(); - if (w->x() - 1 <= width && width <= w->y() + 1) - return true; + if (w->x() - 1 <= width && width <= w->y() + 1) return true; } return false; } @@ -421,15 +404,13 @@ bool TabFind::VeryDifferentSizes(int size1, int size2) { // Top-level function to find TabVectors in an input page block. // Returns false if the detected skew angle is impossible. // Applies the detected skew angle to deskew the tabs, blobs and part_grid. -bool TabFind::FindTabVectors(TabVector_LIST* hlines, - BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, - int min_gutter_width, +bool TabFind::FindTabVectors(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, + TO_BLOCK* block, int min_gutter_width, double tabfind_aligned_gap_fraction, - ColPartitionGrid* part_grid, - FCOORD* deskew, FCOORD* reskew) { - ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width, - tabfind_aligned_gap_fraction, - block); + ColPartitionGrid* part_grid, FCOORD* deskew, + FCOORD* reskew) { + ScrollView* tab_win = FindInitialTabVectors( + image_blobs, min_gutter_width, tabfind_aligned_gap_fraction, block); ComputeColumnWidths(tab_win, part_grid); TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this); SortVectors(); @@ -438,14 +419,14 @@ bool TabFind::FindTabVectors(TabVector_LIST* hlines, return false; // Skew angle is too large. part_grid->Deskew(*deskew); ApplyTabConstraints(); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (textord_tabfind_show_finaltabs) { tab_win = MakeWindow(640, 50, "FinalTabs"); DisplayBoxes(tab_win); DisplayTabs("FinalTabs", tab_win); tab_win = DisplayTabVectors(tab_win); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED return true; } @@ -476,14 +457,13 @@ void TabFind::TidyBlobs(TO_BLOCK* block) { } } if (textord_debug_tabfind) { - tprintf("Moved %d large blobs to normal list\n", - b_count); - #ifndef GRAPHICS_DISABLED + tprintf("Moved %d large blobs to normal list\n", b_count); +#ifndef GRAPHICS_DISABLED ScrollView* rej_win = MakeWindow(500, 300, "Image blobs"); block->plot_graded_blobs(rej_win); block->plot_noise_blobs(rej_win); rej_win->Update(); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED } block->DeleteUnownedNoise(); } @@ -522,11 +502,10 @@ ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, line_win = DisplayTabVectors(line_win); } // Prepare the grid. - if (image_blobs != nullptr) - InsertBlobsToGrid(true, false, image_blobs, this); + if (image_blobs != nullptr) InsertBlobsToGrid(true, false, image_blobs, this); InsertBlobsToGrid(true, false, &block->blobs, this); - ScrollView* initial_win = FindTabBoxes(min_gutter_width, - tabfind_aligned_gap_fraction); + ScrollView* initial_win = + FindTabBoxes(min_gutter_width, tabfind_aligned_gap_fraction); FindAllTabVectors(min_gutter_width); TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this); @@ -541,7 +520,7 @@ ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, // Helper displays all the boxes in the given vector on the given window. static void DisplayBoxVector(const GenericVector& boxes, ScrollView* win) { - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED for (int i = 0; i < boxes.size(); ++i) { TBOX box = boxes[i]->bounding_box(); int left_x = box.left(); @@ -553,7 +532,7 @@ static void DisplayBoxVector(const GenericVector& boxes, win->Rectangle(left_x, bottom_y, right_x, top_y); } win->Update(); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED } // For each box in the grid, decide whether it is a candidate tab-stop, @@ -569,10 +548,8 @@ ScrollView* TabFind::FindTabBoxes(int min_gutter_width, while ((bbox = gsearch.NextFullSearch()) != nullptr) { if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) { // If it is any kind of tab, insert it into the vectors. - if (bbox->left_tab_type() != TT_NONE) - left_tab_boxes_.push_back(bbox); - if (bbox->right_tab_type() != TT_NONE) - right_tab_boxes_.push_back(bbox); + if (bbox->left_tab_type() != TT_NONE) left_tab_boxes_.push_back(bbox); + if (bbox->right_tab_type() != TT_NONE) right_tab_boxes_.push_back(bbox); } } // Sort left tabs by left and right by right to see the outermost one first @@ -580,7 +557,7 @@ ScrollView* TabFind::FindTabBoxes(int min_gutter_width, left_tab_boxes_.sort(SortByBoxLeft); right_tab_boxes_.sort(SortRightToLeft); ScrollView* tab_win = nullptr; - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (textord_tabfind_show_initialtabs) { tab_win = MakeWindow(0, 100, "InitialTabs"); tab_win->Pen(ScrollView::BLUE); @@ -590,7 +567,7 @@ ScrollView* TabFind::FindTabBoxes(int min_gutter_width, DisplayBoxVector(right_tab_boxes_, tab_win); tab_win = DisplayTabs("Tabs", tab_win); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED return tab_win; } @@ -609,21 +586,19 @@ bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, int height = box.height(); bool debug = WithinTestRegion(3, left_x, top_y); if (debug) { - tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", - left_x, top_y, right_x, bottom_y, - left_column_edge, right_column_edge); + tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", left_x, + top_y, right_x, bottom_y, left_column_edge, right_column_edge); } // Compute a search radius based on a multiple of the height. int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_; - radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius); + radsearch.StartRadSearch((left_x + right_x) / 2, (top_y + bottom_y) / 2, + radius); // In Vertical Page mode, once we have an estimate of the vertical line // spacing, the minimum amount of gutter space before a possible tab is // increased under the assumption that column partition is always larger // than line spacing. - int min_spacing = - static_cast(height * tabfind_aligned_gap_fraction); - if (min_gutter_width > min_spacing) - min_spacing = min_gutter_width; + int min_spacing = static_cast(height * tabfind_aligned_gap_fraction); + if (min_gutter_width > min_spacing) min_spacing = min_gutter_width; int min_ragged_gutter = kRaggedGutterMultiple * gridsize(); if (min_gutter_width > min_ragged_gutter) min_ragged_gutter = min_gutter_width; @@ -667,14 +642,13 @@ bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, int alignment_tolerance = static_cast(resolution_ * kAlignedFraction); BLOBNBOX* neighbour = nullptr; while ((neighbour = radsearch.NextRadSearch()) != nullptr) { - if (neighbour == bbox) - continue; + if (neighbour == bbox) continue; TBOX nbox = neighbour->bounding_box(); int n_left = nbox.left(); int n_right = nbox.right(); if (debug) - tprintf("Neighbour at (%d,%d)->(%d,%d)\n", - n_left, nbox.bottom(), n_right, nbox.top()); + tprintf("Neighbour at (%d,%d)->(%d,%d)\n", n_left, nbox.bottom(), n_right, + nbox.top()); // If the neighbouring blob is the wrong side of a separator line, then it // "doesn't exist" as far as we are concerned. if (n_right > right_column_edge || n_left < left_column_edge || @@ -683,24 +657,19 @@ bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, int n_mid_x = (n_left + n_right) / 2; int n_mid_y = (nbox.top() + nbox.bottom()) / 2; if (n_mid_x <= left_x && n_right >= target_right) { - if (debug) - tprintf("Not a left tab\n"); + if (debug) tprintf("Not a left tab\n"); is_left_tab = false; - if (n_mid_y < top_y) - maybe_left_tab_down = -INT32_MAX; - if (n_mid_y > bottom_y) - maybe_left_tab_up = -INT32_MAX; + if (n_mid_y < top_y) maybe_left_tab_down = -INT32_MAX; + if (n_mid_y > bottom_y) maybe_left_tab_up = -INT32_MAX; } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) { - if (debug) - tprintf("Maybe a left tab\n"); + if (debug) tprintf("Maybe a left tab\n"); if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) ++maybe_left_tab_up; if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) ++maybe_left_tab_down; } else if (n_left < left_x && n_right >= left_x) { // Overlaps but not aligned so negative points on a maybe. - if (debug) - tprintf("Maybe Not a left tab\n"); + if (debug) tprintf("Maybe Not a left tab\n"); if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) --maybe_left_tab_up; if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) @@ -708,28 +677,22 @@ bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, } if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) { maybe_ragged_left = false; - if (debug) - tprintf("Not a ragged left\n"); + if (debug) tprintf("Not a ragged left\n"); } if (n_mid_x >= right_x && n_left <= target_left) { - if (debug) - tprintf("Not a right tab\n"); + if (debug) tprintf("Not a right tab\n"); is_right_tab = false; - if (n_mid_y < top_y) - maybe_right_tab_down = -INT32_MAX; - if (n_mid_y > bottom_y) - maybe_right_tab_up = -INT32_MAX; + if (n_mid_y < top_y) maybe_right_tab_down = -INT32_MAX; + if (n_mid_y > bottom_y) maybe_right_tab_up = -INT32_MAX; } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) { - if (debug) - tprintf("Maybe a right tab\n"); + if (debug) tprintf("Maybe a right tab\n"); if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) ++maybe_right_tab_up; if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) ++maybe_right_tab_down; } else if (n_right > right_x && n_left <= right_x) { // Overlaps but not aligned so negative points on a maybe. - if (debug) - tprintf("Maybe Not a right tab\n"); + if (debug) tprintf("Maybe Not a right tab\n"); if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) --maybe_right_tab_up; if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) @@ -737,8 +700,7 @@ bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, } if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) { maybe_ragged_right = false; - if (debug) - tprintf("Not a ragged right\n"); + if (debug) tprintf("Not a ragged right\n"); } if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX && maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX) @@ -760,11 +722,14 @@ bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, bbox->set_right_tab_type(TT_NONE); } if (debug) { - tprintf("Left result = %s, Right result=%s\n", - bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" : - (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"), - bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" : - (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None")); + tprintf( + "Left result = %s, Right result=%s\n", + bbox->left_tab_type() == TT_MAYBE_ALIGNED + ? "Aligned" + : (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"), + bbox->right_tab_type() == TT_MAYBE_ALIGNED + ? "Aligned" + : (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None")); } return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE; } @@ -796,8 +761,7 @@ bool TabFind::NothingYOverlapsInBox(const TBOX& search_box, BLOBNBOX* blob; while ((blob = rsearch.NextRectSearch()) != nullptr) { const TBOX& box = blob->bounding_box(); - if (box.y_overlap(target_box) && !(box == target_box)) - return false; + if (box.y_overlap(target_box) && !(box == target_box)) return false; } return true; } @@ -812,16 +776,13 @@ void TabFind::FindAllTabVectors(int min_gutter_width) { // Slowly up the search size until we get some vectors. for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch; search_size += kMinVerticalSearch) { - int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED, - min_gutter_width, - &dummy_vectors, - &vertical_x, &vertical_y); - vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED, - min_gutter_width, - &dummy_vectors, - &vertical_x, &vertical_y); - if (vector_count > 0) - break; + int vector_count = + FindTabVectors(search_size, TA_LEFT_ALIGNED, min_gutter_width, + &dummy_vectors, &vertical_x, &vertical_y); + vector_count += + FindTabVectors(search_size, TA_RIGHT_ALIGNED, min_gutter_width, + &dummy_vectors, &vertical_x, &vertical_y); + if (vector_count > 0) break; } // Get rid of the test vectors and reset the types of the tabs. dummy_vectors.clear(); @@ -836,8 +797,8 @@ void TabFind::FindAllTabVectors(int min_gutter_width) { bbox->set_right_tab_type(TT_MAYBE_ALIGNED); } if (textord_debug_tabfind) { - tprintf("Beginning real tab search with vertical = %d,%d...\n", - vertical_x, vertical_y); + tprintf("Beginning real tab search with vertical = %d,%d...\n", vertical_x, + vertical_y); } // Now do the real thing ,but keep the vectors in the dummy_vectors list // until they are all done, so we don't get the tab vectors confused with @@ -865,15 +826,15 @@ int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment, int vector_count = 0; // Search the right or left tab boxes, looking for tab vectors. bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED; - const GenericVector& boxes = right ? right_tab_boxes_ - : left_tab_boxes_; + const GenericVector& boxes = + right ? right_tab_boxes_ : left_tab_boxes_; for (int i = 0; i < boxes.size(); ++i) { BLOBNBOX* bbox = boxes[i]; if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) || (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) { - TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width, - alignment, - bbox, vertical_x, vertical_y); + TabVector* vector = + FindTabVector(search_size_multiple, min_gutter_width, alignment, bbox, + vertical_x, vertical_y); if (vector != nullptr) { ++vector_count; vector_it.add_to_end(vector); @@ -891,13 +852,12 @@ int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment, // vertical direction. (skew finding.) // Returns nullptr if no decent tabstop can be found. TabVector* TabFind::FindTabVector(int search_size_multiple, - int min_gutter_width, - TabAlignment alignment, - BLOBNBOX* bbox, - int* vertical_x, int* vertical_y) { - int height = std::max(static_cast(bbox->bounding_box().height()), gridsize()); - AlignedBlobParams align_params(*vertical_x, *vertical_y, - height, + int min_gutter_width, TabAlignment alignment, + BLOBNBOX* bbox, int* vertical_x, + int* vertical_y) { + int height = + std::max(static_cast(bbox->bounding_box().height()), gridsize()); + AlignedBlobParams align_params(*vertical_x, *vertical_y, height, search_size_multiple, min_gutter_width, resolution_, alignment); // FindVerticalAlignment is in the parent (AlignedBlob) class. @@ -910,8 +870,8 @@ void TabFind::SetVerticalSkewAndParellelize(int vertical_x, int vertical_y) { // Fit the vertical vector into an ICOORD, which is 16 bit. vertical_skew_.set_with_shrink(vertical_x, vertical_y); if (textord_debug_tabfind) - tprintf("Vertical skew vector=(%d,%d)\n", - vertical_skew_.x(), vertical_skew_.y()); + tprintf("Vertical skew vector=(%d,%d)\n", vertical_skew_.x(), + vertical_skew_.y()); v_it_.set_to_list(&vectors_); for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) { TabVector* v = v_it_.data(); @@ -935,8 +895,7 @@ void TabFind::EvaluateTabs() { if (!tab->IsSeparator()) { tab->Evaluate(vertical_skew_, this); if (tab->BoxCount() < kMinEvaluatedTabs) { - if (textord_debug_tabfind > 2) - tab->Print("Too few boxes"); + if (textord_debug_tabfind > 2) tab->Print("Too few boxes"); delete rule_it.extract(); v_it_.set_to_list(&vectors_); } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) { @@ -951,21 +910,19 @@ void TabFind::EvaluateTabs() { // can be tested for being a common width with a simple callback function. void TabFind::ComputeColumnWidths(ScrollView* tab_win, ColPartitionGrid* part_grid) { - #ifndef GRAPHICS_DISABLED - if (tab_win != nullptr) - tab_win->Pen(ScrollView::WHITE); - #endif // GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED + if (tab_win != nullptr) tab_win->Pen(ScrollView::WHITE); +#endif // GRAPHICS_DISABLED // Accumulate column sections into a STATS int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor; STATS col_widths(0, col_widths_size + 1); ApplyPartitionsToColumnWidths(part_grid, &col_widths); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (tab_win != nullptr) { tab_win->Update(); } - #endif // GRAPHICS_DISABLED - if (textord_debug_tabfind > 1) - col_widths.print(); +#endif // GRAPHICS_DISABLED + if (textord_debug_tabfind > 1) col_widths.print(); // Now make a list of column widths. MakeColumnWidths(col_widths_size, &col_widths); // Turn the column width into a range. @@ -987,19 +944,16 @@ void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid, ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { BLOBNBOX_C_IT blob_it(part->boxes()); - if (blob_it.empty()) - continue; + if (blob_it.empty()) continue; BLOBNBOX* left_blob = blob_it.data(); blob_it.move_to_last(); BLOBNBOX* right_blob = blob_it.data(); - TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(), - true, false); - if (left_vector == nullptr || left_vector->IsRightTab()) - continue; - TabVector* right_vector = RightTabForBox(right_blob->bounding_box(), - true, false); - if (right_vector == nullptr || right_vector->IsLeftTab()) - continue; + TabVector* left_vector = + LeftTabForBox(left_blob->bounding_box(), true, false); + if (left_vector == nullptr || left_vector->IsRightTab()) continue; + TabVector* right_vector = + RightTabForBox(right_blob->bounding_box(), true, false); + if (right_vector == nullptr || right_vector->IsLeftTab()) continue; int line_left = left_vector->XAtY(left_blob->bounding_box().bottom()); int line_right = right_vector->XAtY(right_blob->bounding_box().bottom()); @@ -1016,8 +970,7 @@ void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid, ICOORDELT* w = it.data(); if (NearlyEqual(width, w->y(), 1)) { int true_width = part->bounding_box().width() / kColumnWidthFactor; - if (true_width <= w->y() && true_width > w->x()) - w->set_x(true_width); + if (true_width <= w->y() && true_width > w->x()) w->set_x(true_width); break; } } @@ -1036,15 +989,14 @@ void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) { int col_count = col_widths->pile_count(width); col_widths->add(width, -col_count); // Get the entire peak. - for (int left = width - 1; left > 0 && - col_widths->pile_count(left) > 0; + for (int left = width - 1; left > 0 && col_widths->pile_count(left) > 0; --left) { int new_count = col_widths->pile_count(left); col_count += new_count; col_widths->add(left, -new_count); } - for (int right = width + 1; right < col_widths_size && - col_widths->pile_count(right) > 0; + for (int right = width + 1; + right < col_widths_size && col_widths->pile_count(right) > 0; ++right) { int new_count = col_widths->pile_count(right); col_count += new_count; @@ -1056,8 +1008,8 @@ void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) { w_it.add_after_then_move(w); if (textord_debug_tabfind) tprintf("Column of width %d has %d = %.2f%% lines\n", - width * kColumnWidthFactor, col_count, - 100.0 * col_count / total_col_count); + width * kColumnWidthFactor, col_count, + 100.0 * col_count / total_col_count); } } } @@ -1065,21 +1017,19 @@ void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) { // Mark blobs as being in a vertical text line where that is the case. // Returns true if the majority of the image is vertical text lines. void TabFind::MarkVerticalText() { - if (textord_debug_tabfind) - tprintf("Checking for vertical lines\n"); + if (textord_debug_tabfind) tprintf("Checking for vertical lines\n"); BlobGridSearch gsearch(this); gsearch.StartFullSearch(); BLOBNBOX* blob = nullptr; while ((blob = gsearch.NextFullSearch()) != nullptr) { - if (blob->region_type() < BRT_UNKNOWN) - continue; + if (blob->region_type() < BRT_UNKNOWN) continue; if (blob->UniquelyVertical()) { blob->set_region_type(BRT_VERT_TEXT); } } } -int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) { +int TabFind::FindMedianGutterWidth(TabVector_LIST* lines) { TabVector_IT it(lines); int prev_right = -1; int max_gap = static_cast(kMaxGutterWidthAbsolute * resolution_); @@ -1107,9 +1057,8 @@ int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) { // the [top_y, bottom_y] range. // If ignore_images is true, then blobs with aligned_text() < 0 are treated // as if they do not exist. -BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox, - bool look_left, bool ignore_images, - double min_overlap_fraction, +BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox, bool look_left, + bool ignore_images, double min_overlap_fraction, int gap_limit, int top_y, int bottom_y) { GridSearch sidesearch(this); const TBOX& box = bbox->bounding_box(); @@ -1145,26 +1094,25 @@ BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox, if (h_gap > gap_limit) { // Hit a big gap before next tab so don't return anything. if (debug) - tprintf("Giving up due to big gap = %d vs %d\n", - h_gap, gap_limit); + tprintf("Giving up due to big gap = %d vs %d\n", h_gap, gap_limit); return result; } - if (h_gap > 0 && (look_left ? neighbour->right_tab_type() - : neighbour->left_tab_type()) >= TT_CONFIRMED) { + if (h_gap > 0 && + (look_left ? neighbour->right_tab_type() + : neighbour->left_tab_type()) >= TT_CONFIRMED) { // Hit a tab facing the wrong way. Stop in case we are crossing // the column boundary. if (debug) tprintf("Collision with like tab of type %d at %d,%d\n", look_left ? neighbour->right_tab_type() - : neighbour->left_tab_type(), + : neighbour->left_tab_type(), n_left, nbox.bottom()); return result; } // This is a good fit to the line. Continue with this // neighbour as the bbox if the best gap. if (result == nullptr || h_gap < best_gap) { - if (debug) - tprintf("Good result\n"); + if (debug) tprintf("Good result\n"); result = neighbour; best_gap = h_gap; } else { @@ -1208,8 +1156,8 @@ void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob, if (right->IsSeparator()) { // Try to find a nearby left edge to extend. if (WithinTestRegion(3, right_box.right(), right_box.bottom())) { - tprintf("Box edge (%d,%d-%d)", - right_box.right(), right_box.bottom(), right_box.top()); + tprintf("Box edge (%d,%d-%d)", right_box.right(), right_box.bottom(), + right_box.top()); right->Print(" looking for improvement for"); } TabVector* v = RightTabForBox(right_box, true, true); @@ -1222,8 +1170,8 @@ void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob, } } else { // Fake a vector. - right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_, - right_blob); + right = + new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_, right_blob); vectors_.add_sorted(TabVector::SortVectorsByKey, right); v_it_.move_to_first(); if (WithinTestRegion(3, right_box.right(), right_box.bottom())) { @@ -1267,8 +1215,7 @@ void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) { bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) { ComputeDeskewVectors(deskew, reskew); - if (deskew->x() < kCosMaxSkewAngle) - return false; + if (deskew->x() < kCosMaxSkewAngle) return false; RotateBlobList(*deskew, image_blobs); RotateBlobList(*deskew, &block->blobs); RotateBlobList(*deskew, &block->small_blobs); @@ -1323,8 +1270,7 @@ void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, // Adjust the min gutter width for better tabbox selection // in 2nd call to FindInitialTabVectors(). int median_gutter = FindMedianGutterWidth(&vlines); - if (median_gutter > *min_gutter_width) - *min_gutter_width = median_gutter; + if (median_gutter > *min_gutter_width) *min_gutter_width = median_gutter; TabVector_IT h_it(horizontal_lines); for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { @@ -1347,8 +1293,7 @@ void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, void TabFind::Reset() { v_it_.move_to_first(); for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) { - if (!v_it_.data()->IsSeparator()) - delete v_it_.extract(); + if (!v_it_.data()->IsSeparator()) delete v_it_.extract(); } Clear(); } @@ -1408,22 +1353,19 @@ void TabFind::ApplyTabConstraints() { // list of partners like there is with the front-to-front. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabVector* v = it.data(); - if (!v->IsRightTab()) - continue; + if (!v->IsRightTab()) continue; // For each back-to-back pair of vectors, try for common top and bottom. TabVector_IT partner_it(it); for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) { TabVector* partner = partner_it.data(); - if (!partner->IsLeftTab() || !v->VOverlap(*partner)) - continue; + if (!partner->IsLeftTab() || !v->VOverlap(*partner)) continue; v->SetupPartnerConstraints(partner); } } // Now actually apply the constraints to get common start/end points. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabVector* v = it.data(); - if (!v->IsSeparator()) - v->ApplyConstraints(); + if (!v->IsSeparator()) v->ApplyConstraints(); } // TODO(rays) Where constraint application fails, it would be good to try // checking the ends to see if they really should be moved. diff --git a/src/textord/tabfind.h b/src/textord/tabfind.h index 86bb04ea7a..b36e37ef0b 100644 --- a/src/textord/tabfind.h +++ b/src/textord/tabfind.h @@ -21,9 +21,9 @@ #define TESSERACT_TEXTORD_TABFIND_H_ #include "alignedblob.h" -#include "tesscallback.h" -#include "tabvector.h" #include "linefind.h" +#include "tabvector.h" +#include "tesscallback.h" class BLOBNBOX; class BLOBNBOX_LIST; @@ -65,8 +65,7 @@ class TabFind : public AlignedBlob { * while the grid that provides the tab stops(this) has to be derived from * TabFind. */ - void InsertBlobsToGrid(bool h_spread, bool v_spread, - BLOBNBOX_LIST* blobs, + void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST* blobs, BBGrid* grid); /** @@ -97,9 +96,8 @@ class TabFind : public AlignedBlob { /** * Find the gutter width and distance to inner neighbour for the given blob. */ - void GutterWidthAndNeighbourGap(int tab_x, int mean_height, - int max_gutter, bool left, - BLOBNBOX* bbox, int* gutter_width, + void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, + bool left, BLOBNBOX* bbox, int* gutter_width, int* neighbour_gap); /** @@ -155,27 +153,19 @@ class TabFind : public AlignedBlob { /** * Return a callback for testing CommonWidth. */ - WidthCallback* WidthCB() { - return width_cb_; - } + WidthCallback* WidthCB() { return width_cb_; } /** * Return the coords at which to draw the image backdrop. */ - const ICOORD& image_origin() const { - return image_origin_; - } + const ICOORD& image_origin() const { return image_origin_; } protected: /** // Accessors */ - TabVector_LIST* vectors() { - return &vectors_; - } - TabVector_LIST* dead_vectors() { - return &dead_vectors_; - } + TabVector_LIST* vectors() { return &vectors_; } + TabVector_LIST* dead_vectors() { return &dead_vectors_; } /** * Top-level function to find TabVectors in an input page block. @@ -184,16 +174,16 @@ class TabFind : public AlignedBlob { * tabfind_aligned_gap_fraction should be the value of parameter * textord_tabfind_aligned_gap_fraction */ - bool FindTabVectors(TabVector_LIST* hlines, - BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, - int min_gutter_width, double tabfind_aligned_gap_fraction, - ColPartitionGrid* part_grid, - FCOORD* deskew, FCOORD* reskew); + bool FindTabVectors(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, + TO_BLOCK* block, int min_gutter_width, + double tabfind_aligned_gap_fraction, + ColPartitionGrid* part_grid, FCOORD* deskew, + FCOORD* reskew); // Top-level function to not find TabVectors in an input page block, // but setup for single column mode. - void DontFindTabVectors(BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); + void DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, + FCOORD* deskew, FCOORD* reskew); // Cleans up the lists of blobs in the block ready for use by TabFind. // Large blobs that look like text are moved to the main blobs list. @@ -268,10 +258,8 @@ class TabFind : public AlignedBlob { // and estimates the logical vertical direction. void FindAllTabVectors(int min_gutter_width); // Helper for FindAllTabVectors finds the vectors of a particular type. - int FindTabVectors(int search_size_multiple, - TabAlignment alignment, - int min_gutter_width, - TabVector_LIST* vectors, + int FindTabVectors(int search_size_multiple, TabAlignment alignment, + int min_gutter_width, TabVector_LIST* vectors, int* vertical_x, int* vertical_y); // Finds a vector corresponding to a tabstop running through the // given box of the given alignment type. @@ -281,8 +269,7 @@ class TabFind : public AlignedBlob { // vertical direction. (skew finding.) // Returns nullptr if no decent tabstop can be found. TabVector* FindTabVector(int search_size_multiple, int min_gutter_width, - TabAlignment alignment, - BLOBNBOX* bbox, + TabAlignment alignment, BLOBNBOX* bbox, int* vertical_x, int* vertical_y); // Set the vertical_skew_ member from the given vector and refit @@ -298,8 +285,7 @@ class TabFind : public AlignedBlob { // Trace textlines from one side to the other of each tab vector, saving // the most frequent column widths found in a list so that a given width // can be tested for being a common width with a simple callback function. - void ComputeColumnWidths(ScrollView* tab_win, - ColPartitionGrid* part_grid); + void ComputeColumnWidths(ScrollView* tab_win, ColPartitionGrid* part_grid); // Finds column width and: // if col_widths is not null (pass1): @@ -328,9 +314,8 @@ class TabFind : public AlignedBlob { // the [top_y, bottom_y] range. // If ignore_images is true, then blobs with aligned_text() < 0 are treated // as if they do not exist. - BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox, - bool look_left, bool ignore_images, - double min_overlap_fraction, + BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox, bool look_left, + bool ignore_images, double min_overlap_fraction, int gap_limit, int top_y, int bottom_y); // Add a bi-directional partner relationship between the left @@ -364,13 +349,13 @@ class TabFind : public AlignedBlob { void ApplyTabConstraints(); protected: - ICOORD vertical_skew_; //< Estimate of true vertical in this image. - int resolution_; //< Of source image in pixels per inch. + ICOORD vertical_skew_; //< Estimate of true vertical in this image. + int resolution_; //< Of source image in pixels per inch. private: - ICOORD image_origin_; //< Top-left of image in deskewed coords - TabVector_LIST vectors_; //< List of rule line and tabstops. - TabVector_IT v_it_; //< Iterator for searching vectors_. - TabVector_LIST dead_vectors_; //< Separators and unpartnered tab vectors. + ICOORD image_origin_; //< Top-left of image in deskewed coords + TabVector_LIST vectors_; //< List of rule line and tabstops. + TabVector_IT v_it_; //< Iterator for searching vectors_. + TabVector_LIST dead_vectors_; //< Separators and unpartnered tab vectors. // List of commonly occurring width ranges with x=min and y=max. ICOORDELT_LIST column_widths_; //< List of commonly occurring width ranges. /** Callback to test an int for being a common width. */ diff --git a/src/textord/tablefind.cpp b/src/textord/tablefind.cpp index 37a91c0c33..bb7df4cca7 100644 --- a/src/textord/tablefind.cpp +++ b/src/textord/tablefind.cpp @@ -21,9 +21,9 @@ #include "config_auto.h" #endif -#include "tablefind.h" #include #include +#include "tablefind.h" #include "allheaders.h" @@ -153,7 +153,8 @@ CLISTIZE(ColSegment) // Templated helper function used to create destructor callbacks for the // BBGrid::ClearGridData() method. -template void DeleteObject(T *object) { +template +void DeleteObject(T* object) { delete object; } @@ -162,8 +163,7 @@ TableFinder::TableFinder() global_median_xheight_(0), global_median_blob_width_(0), global_median_ledding_(0), - left_to_right_language_(true) { -} + left_to_right_language_(true) {} TableFinder::~TableFinder() { // ColPartitions and ColSegments created by this class for storage in grids @@ -222,11 +222,10 @@ void TableFinder::InsertCleanPartitions(ColPartitionGrid* grid, BLOBNBOX_CLIST* part_boxes = part->boxes(); BLOBNBOX_C_IT pit(part_boxes); for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { - BLOBNBOX *pblob = pit.data(); + BLOBNBOX* pblob = pit.data(); // Bad blobs... happens in UNLV set. // news.3G1, page 17 (around x=6) - if (!AllowBlob(*pblob)) - continue; + if (!AllowBlob(*pblob)) continue; if (pblob->flow() == BTFT_LEADER) { if (leader_part == nullptr) { leader_part = part->ShallowCopy(); @@ -259,8 +258,7 @@ void TableFinder::InsertCleanPartitions(ColPartitionGrid* grid, // High level function to perform table detection void TableFinder::LocateTables(ColPartitionGrid* grid, ColPartitionSet** all_columns, - WidthCallback* width_cb, - const FCOORD& reskew) { + WidthCallback* width_cb, const FCOORD& reskew) { // initialize spacing, neighbors, and columns InitializePartitions(all_columns); @@ -345,8 +343,8 @@ void TableFinder::LocateTables(ColPartitionGrid* grid, #ifndef GRAPHICS_DISABLED if (textord_show_tables) { ScrollView* table_win = MakeWindow(1400, 600, "Recognized Tables"); - DisplayColPartitions(table_win, &clean_part_grid_, - ScrollView::BLUE, ScrollView::BLUE); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, + ScrollView::BLUE); table_grid_.DisplayBoxes(table_win); } #endif // GRAPHICS_DISABLED @@ -360,8 +358,8 @@ void TableFinder::LocateTables(ColPartitionGrid* grid, #ifndef GRAPHICS_DISABLED if (textord_show_tables) { ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables"); - DisplayColPartitions(table_win, &clean_part_grid_, - ScrollView::BLUE, ScrollView::BLUE); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, + ScrollView::BLUE); table_grid_.DisplayBoxes(table_win); } #endif // GRAPHICS_DISABLED @@ -376,21 +374,11 @@ void TableFinder::LocateTables(ColPartitionGrid* grid, // the part_grid_ that is passed to InsertCleanPartitions, which was the same as // the grid that is the base of ColumnFinder. Just return the clean_part_grid_ // dimensions instead of duplicated memory. -int TableFinder::gridsize() const { - return clean_part_grid_.gridsize(); -} -int TableFinder::gridwidth() const { - return clean_part_grid_.gridwidth(); -} -int TableFinder::gridheight() const { - return clean_part_grid_.gridheight(); -} -const ICOORD& TableFinder::bleft() const { - return clean_part_grid_.bleft(); -} -const ICOORD& TableFinder::tright() const { - return clean_part_grid_.tright(); -} +int TableFinder::gridsize() const { return clean_part_grid_.gridsize(); } +int TableFinder::gridwidth() const { return clean_part_grid_.gridwidth(); } +int TableFinder::gridheight() const { return clean_part_grid_.gridheight(); } +const ICOORD& TableFinder::bleft() const { return clean_part_grid_.bleft(); } +const ICOORD& TableFinder::tright() const { return clean_part_grid_.tright(); } void TableFinder::InsertTextPartition(ColPartition* part) { ASSERT_HOST(part != nullptr); @@ -507,8 +495,7 @@ bool TableFinder::AllowBlob(const BLOBNBOX& blob) const { const int median_area = global_median_xheight_ * global_median_blob_width_; const double kAreaRequired = median_area * kAllowBlobArea; // Keep comparisons strictly greater to disallow 0! - return box.height() > kHeightRequired && - box.width() > kWidthRequired && + return box.height() > kHeightRequired && box.width() > kWidthRequired && box.area() > kAreaRequired; } @@ -528,7 +515,8 @@ void TableFinder::GetColumnBlocks(ColPartitionSet** all_columns, if (columns != nullptr) { ColSegment_LIST new_blocks; // Get boxes from the current vertical position on the grid - columns->GetColumnBoxes(i * gridsize(), (i+1) * gridsize(), &new_blocks); + columns->GetColumnBoxes(i * gridsize(), (i + 1) * gridsize(), + &new_blocks); // Merge the new_blocks boxes into column_blocks if they are well-aligned GroupColumnBlocks(&new_blocks, column_blocks); } @@ -566,13 +554,13 @@ void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks, } // are the two boxes immediate neighbors along the vertical direction -bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) { +bool TableFinder::ConsecutiveBoxes(const TBOX& b1, const TBOX& b2) { int x_margin = 20; int y_margin = 5; return (abs(b1.left() - b2.left()) < x_margin) && - (abs(b1.right() - b2.right()) < x_margin) && - (abs(b1.top()-b2.bottom()) < y_margin || - abs(b2.top()-b1.bottom()) < y_margin); + (abs(b1.right() - b2.right()) < x_margin) && + (abs(b1.top() - b2.bottom()) < y_margin || + abs(b2.top() - b1.bottom()) < y_margin); } // Set up info for clean_part_grid_ partitions to be valid during detection @@ -639,8 +627,9 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid* grid, ColPartition* upper_part = part->SingletonPartner(true); if (upper_part) { - int space = std::max(0, static_cast(upper_part->bounding_box().bottom() - - part->bounding_box().bottom())); + int space = + std::max(0, static_cast(upper_part->bounding_box().bottom() - + part->bounding_box().bottom())); part->set_space_above(space); } else { // TODO(nbeato): What constitutes a good value? @@ -651,8 +640,9 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid* grid, ColPartition* lower_part = part->SingletonPartner(false); if (lower_part) { - int space = std::max(0, static_cast(part->bounding_box().bottom() - - lower_part->bounding_box().bottom())); + int space = + std::max(0, static_cast(part->bounding_box().bottom() - + lower_part->bounding_box().bottom())); part->set_space_below(space); } else { // TODO(nbeato): What constitutes a good value? @@ -666,15 +656,17 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid* grid, // Set spacing and closest neighbors above and below a given colpartition. void TableFinder::SetVerticalSpacing(ColPartition* part) { TBOX box = part->bounding_box(); - int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast(tright().y())); - int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast(bleft().y())); + int top_range = + std::min(box.top() + kMaxVerticalSpacing, static_cast(tright().y())); + int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, + static_cast(bleft().y())); box.set_top(top_range); box.set_bottom(bottom_range); TBOX part_box = part->bounding_box(); // Start a rect search - GridSearch - rectsearch(&clean_part_grid_); + GridSearch rectsearch( + &clean_part_grid_); rectsearch.StartRectSearch(box); ColPartition* neighbor; int min_space_above = kMaxVerticalSpacing; @@ -682,14 +674,12 @@ void TableFinder::SetVerticalSpacing(ColPartition* part) { ColPartition* above_neighbor = nullptr; ColPartition* below_neighbor = nullptr; while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { - if (neighbor == part) - continue; + if (neighbor == part) continue; TBOX neighbor_box = neighbor->bounding_box(); if (neighbor_box.major_x_overlap(part_box)) { int gap = abs(part->median_bottom() - neighbor->median_bottom()); // If neighbor is below current partition - if (neighbor_box.top() < part_box.bottom() && - gap < min_space_below) { + if (neighbor_box.top() < part_box.bottom() && gap < min_space_below) { min_space_below = gap; below_neighbor = neighbor; } // If neighbor is above current partition @@ -697,7 +687,7 @@ void TableFinder::SetVerticalSpacing(ColPartition* part) { gap < min_space_above) { min_space_above = gap; above_neighbor = neighbor; - } + } } } part->set_space_above(min_space_above); @@ -743,7 +733,7 @@ void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) { set_global_median_xheight(static_cast(xheight_stats.median() + 0.5)); set_global_median_blob_width(static_cast(width_stats.median() + 0.5)); set_global_median_ledding(static_cast(ledding_stats.median() + 0.5)); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (textord_tablefind_show_stats) { const char* kWindowName = "X-height (R), X-width (G), and ledding (B)"; ScrollView* stats_win = MakeWindow(500, 10, kWindowName); @@ -751,7 +741,7 @@ void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) { width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN); ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED } void TableFinder::set_global_median_xheight(int xheight) { @@ -771,15 +761,13 @@ void TableFinder::FindNeighbors() { while ((part = gsearch.NextFullSearch()) != nullptr) { // TODO(nbeato): Rename this function, meaning is different now. // IT is finding nearest neighbors its own way - //SetVerticalSpacing(part); + // SetVerticalSpacing(part); ColPartition* upper = part->SingletonPartner(true); - if (upper) - part->set_nearest_neighbor_above(upper); + if (upper) part->set_nearest_neighbor_above(upper); ColPartition* lower = part->SingletonPartner(false); - if (lower) - part->set_nearest_neighbor_below(lower); + if (lower) part->set_nearest_neighbor_below(lower); } } @@ -827,8 +815,8 @@ void TableFinder::MarkTablePartitions() { // 4- Partitions with leaders before/after them. void TableFinder::MarkPartitionsUsingLocalInformation() { // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&clean_part_grid_); + GridSearch gsearch( + &clean_part_grid_); gsearch.StartFullSearch(); ColPartition* part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -846,8 +834,7 @@ void TableFinder::MarkPartitionsUsingLocalInformation() { // - numbered equations // - line drawing regions // TODO(faisal): detect and fix above-mentioned cases - if (HasWideOrNoInterWordGap(part) || - HasLeaderAdjacent(*part)) { + if (HasWideOrNoInterWordGap(part) || HasLeaderAdjacent(*part)) { part->set_table_type(); } } @@ -863,7 +850,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { BLOBNBOX_C_IT it(part_boxes); // Check if this is a relatively small partition (such as a single word) if (part->bounding_box().width() < - kMinBoxesInTextPartition * part->median_size() && + kMinBoxesInTextPartition * part->median_size() && part_boxes->length() < kMinBoxesInTextPartition) return true; @@ -908,17 +895,15 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { } // If a large enough gap is found, mark it as a table cell (return true) - if (gap > max_gap) - return true; - if (gap > largest_partition_gap_found) - largest_partition_gap_found = gap; + if (gap > max_gap) return true; + if (gap > largest_partition_gap_found) largest_partition_gap_found = gap; } previous_x1 = current_x1; } // Since no large gap was found, return false if the partition is too // long to be a data cell if (part->bounding_box().width() > - kMaxBoxesInDataPartition * part->median_size() || + kMaxBoxesInDataPartition * part->median_size() || part_boxes->length() > kMaxBoxesInDataPartition) return false; @@ -927,8 +912,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { // Detect these as table partitions? Shouldn't this be case by case? // The behavior before was to ignore this, making max_partition_gap < 0 // and implicitly return true. Just making it explicit. - if (largest_partition_gap_found == -1) - return true; + if (largest_partition_gap_found == -1) return true; // return true if the maximum gap found is smaller than the minimum allowed // max_gap in a text partition. This indicates that there is no significant @@ -945,8 +929,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { // As these arise, the aggressive nature of this search may need to be // trimmed down. bool TableFinder::HasLeaderAdjacent(const ColPartition& part) { - if (part.flow() == BTFT_LEADER) - return true; + if (part.flow() == BTFT_LEADER) return true; // Search range is left and right bounded by an offset of the // median xheight. This offset is to allow some tolerance to the // the leaders on the page in the event that the alignment is still @@ -964,17 +947,14 @@ bool TableFinder::HasLeaderAdjacent(const ColPartition& part) { while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) { // The leader could be a horizontal ruling in the grid. // Make sure it is actually a leader. - if (leader->flow() != BTFT_LEADER) - continue; + if (leader->flow() != BTFT_LEADER) continue; // This should not happen, they are in different grids. ASSERT_HOST(&part != leader); // Make sure the leader shares a page column with the partition, // otherwise we are spreading across columns. - if (!part.IsInSameColumnAs(*leader)) - break; + if (!part.IsInSameColumnAs(*leader)) break; // There should be a significant vertical overlap - if (!leader->VSignificantCoreOverlap(part)) - continue; + if (!leader->VSignificantCoreOverlap(part)) continue; // Leader passed all tests, so it is adjacent. return true; } @@ -999,17 +979,13 @@ void TableFinder::FilterParagraphEndings() { gsearch.StartFullSearch(); ColPartition* part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->type() != PT_TABLE) - continue; // Consider only table partitions + if (part->type() != PT_TABLE) continue; // Consider only table partitions // Paragraph ending should have flowing text above it. ColPartition* upper_part = part->nearest_neighbor_above(); - if (!upper_part) - continue; - if (upper_part->type() != PT_FLOWING_TEXT) - continue; - if (upper_part->bounding_box().width() < - 2 * part->bounding_box().width()) + if (!upper_part) continue; + if (upper_part->type() != PT_FLOWING_TEXT) continue; + if (upper_part->bounding_box().width() < 2 * part->bounding_box().width()) continue; // Check if its the last line of a paragraph. // In most cases, a paragraph ending should be left-aligned to text line @@ -1019,21 +995,22 @@ void TableFinder::FilterParagraphEndings() { // the left of the one above it. int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2; int upper_mid = (upper_part->bounding_box().left() + - upper_part->bounding_box().right()) / 2; + upper_part->bounding_box().right()) / + 2; int current_spacing = 0; // spacing of the current line to margin int upper_spacing = 0; // spacing of the previous line to the margin if (left_to_right_language_) { // Left to right languages, use mid - left to figure out the distance // the middle is from the left margin. int left = std::min(part->bounding_box().left(), - upper_part->bounding_box().left()); + upper_part->bounding_box().left()); current_spacing = mid - left; upper_spacing = upper_mid - left; } else { // Right to left languages, use right - mid to figure out the distance // the middle is from the right margin. int right = std::max(part->bounding_box().right(), - upper_part->bounding_box().right()); + upper_part->bounding_box().right()); current_spacing = right - mid; upper_spacing = right - upper_mid; } @@ -1082,8 +1059,7 @@ void TableFinder::FilterHeaderAndFooter() { gsearch.StartFullSearch(); ColPartition* part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { - if (!part->IsTextType()) - continue; // Consider only text partitions + if (!part->IsTextType()) continue; // Consider only text partitions int top = part->bounding_box().top(); int bottom = part->bounding_box().bottom(); if (top > max_top) { @@ -1095,10 +1071,8 @@ void TableFinder::FilterHeaderAndFooter() { footer = part; } } - if (header) - header->clear_table_type(); - if (footer) - footer->clear_table_type(); + if (header) header->clear_table_type(); + if (footer) footer->clear_table_type(); } // Mark all ColPartitions as table cells that have a table cell above @@ -1116,8 +1090,7 @@ void TableFinder::SmoothTablePartitionRuns() { continue; // Consider only text partitions ColPartition* upper_part = part->nearest_neighbor_above(); ColPartition* lower_part = part->nearest_neighbor_below(); - if (!upper_part || !lower_part) - continue; + if (!upper_part || !lower_part) continue; if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE) part->set_table_type(); } @@ -1127,8 +1100,7 @@ void TableFinder::SmoothTablePartitionRuns() { gsearch.StartFullSearch(); part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->type() != PT_TABLE) - continue; // Consider only text partitions + if (part->type() != PT_TABLE) continue; // Consider only text partitions ColPartition* upper_part = part->nearest_neighbor_above(); ColPartition* lower_part = part->nearest_neighbor_below(); @@ -1148,8 +1120,8 @@ void TableFinder::SetColumnsType(ColSegment_LIST* column_blocks) { TBOX box = seg->bounding_box(); int num_table_cells = 0; int num_text_cells = 0; - GridSearch - rsearch(&clean_part_grid_); + GridSearch rsearch( + &clean_part_grid_); rsearch.SetUniqueMode(true); rsearch.StartRectSearch(box); ColPartition* part = nullptr; @@ -1174,8 +1146,8 @@ void TableFinder::SetColumnsType(ColSegment_LIST* column_blocks) { } // Move column blocks to grid -void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, - ColSegmentGrid *col_seg_grid) { +void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST* segments, + ColSegmentGrid* col_seg_grid) { ColSegment_IT it(segments); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColSegment* seg = it.extract(); @@ -1197,8 +1169,8 @@ void TableFinder::GridMergeColumnBlocks() { int margin = gridsize(); // Iterate the Column Blocks in the grid. - GridSearch - gsearch(&col_seg_grid_); + GridSearch gsearch( + &col_seg_grid_); gsearch.StartFullSearch(); ColSegment* seg; while ((seg = gsearch.NextFullSearch()) != nullptr) { @@ -1211,18 +1183,19 @@ void TableFinder::GridMergeColumnBlocks() { do { TBOX box = seg->bounding_box(); // slightly expand the search region vertically - int top_range = std::min(box.top() + margin, static_cast(tright().y())); - int bottom_range = std::max(box.bottom() - margin, static_cast(bleft().y())); + int top_range = + std::min(box.top() + margin, static_cast(tright().y())); + int bottom_range = + std::max(box.bottom() - margin, static_cast(bleft().y())); box.set_top(top_range); box.set_bottom(bottom_range); neighbor_found = false; - GridSearch - rectsearch(&col_seg_grid_); + GridSearch rectsearch( + &col_seg_grid_); rectsearch.StartRectSearch(box); ColSegment* neighbor = nullptr; while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { - if (neighbor == seg) - continue; + if (neighbor == seg) continue; const TBOX& neighbor_box = neighbor->bounding_box(); // If the neighbor box significantly overlaps with the current // box (due to the expansion of the current box in the @@ -1237,11 +1210,9 @@ void TableFinder::GridMergeColumnBlocks() { continue; } // Only expand if the neighbor box is of table type - if (neighbor->type() != COL_TABLE) - continue; + if (neighbor->type() != COL_TABLE) continue; // Insert the neighbor box into the current column block - if (neighbor_box.major_x_overlap(box) && - !box.contains(neighbor_box)) { + if (neighbor_box.major_x_overlap(box) && !box.contains(neighbor_box)) { seg->InsertBox(neighbor_box); neighbor_found = true; modified = true; @@ -1271,11 +1242,11 @@ void TableFinder::GridMergeColumnBlocks() { // column below/above it // 4- cells from two vertically adjacent tables merge together to make a // single column resulting in merging of the two tables -void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { +void TableFinder::GetTableColumns(ColSegment_LIST* table_columns) { ColSegment_IT it(table_columns); // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&clean_part_grid_); + GridSearch gsearch( + &clean_part_grid_); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -1288,22 +1259,19 @@ void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { // Start a search below the current cell to find bottom neighbours // Note: a full search will always process things above it first, so // this should be starting at the highest cell and working its way down. - GridSearch - vsearch(&clean_part_grid_); + GridSearch vsearch( + &clean_part_grid_); vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom()); ColPartition* neighbor = nullptr; bool found_neighbours = false; while ((neighbor = vsearch.NextVerticalSearch(true)) != nullptr) { // only consider neighbors not assigned to any column yet - if (neighbor->inside_table_column()) - continue; + if (neighbor->inside_table_column()) continue; // Horizontal lines should not break the flow - if (neighbor->IsHorizontalLine()) - continue; + if (neighbor->IsHorizontalLine()) continue; // presence of a non-table neighbor marks the end of current // table column - if (neighbor->type() != PT_TABLE) - break; + if (neighbor->type() != PT_TABLE) break; // add the neighbor partition to the table column const TBOX& neighbor_box = neighbor->bounding_box(); col->InsertBox(neighbor_box); @@ -1326,8 +1294,8 @@ void TableFinder::GetTableRegions(ColSegment_LIST* table_columns, ColSegment_IT cit(table_columns); ColSegment_IT rit(table_regions); // Iterate through column blocks - GridSearch - gsearch(&col_seg_grid_); + GridSearch gsearch( + &col_seg_grid_); gsearch.StartFullSearch(); ColSegment* part; int page_height = tright().y() - bleft().y(); @@ -1386,8 +1354,8 @@ void TableFinder::GetTableRegions(ColSegment_LIST* table_columns, // single line and hence the tables get merged together void TableFinder::GridMergeTableRegions() { // Iterate the table regions in the grid. - GridSearch - gsearch(&table_grid_); + GridSearch gsearch( + &table_grid_); gsearch.StartFullSearch(); ColSegment* seg = nullptr; while ((seg = gsearch.NextFullSearch()) != nullptr) { @@ -1400,13 +1368,12 @@ void TableFinder::GridMergeTableRegions() { search_region.set_left(bleft().x()); search_region.set_right(tright().x()); neighbor_found = false; - GridSearch - rectsearch(&table_grid_); + GridSearch rectsearch( + &table_grid_); rectsearch.StartRectSearch(search_region); ColSegment* neighbor = nullptr; while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { - if (neighbor == seg) - continue; + if (neighbor == seg) continue; const TBOX& neighbor_box = neighbor->bounding_box(); // Check if a neighbor box has a large overlap with the table // region. This may happen as a result of merging two table @@ -1442,17 +1409,16 @@ void TableFinder::GridMergeTableRegions() { // Decide if two table regions belong to one table based on a common // horizontal ruling line or another colpartition -bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) { +bool TableFinder::BelongToOneTable(const TBOX& box1, const TBOX& box2) { // Check the obvious case. Most likely not true because overlapping boxes // should already be merged, but seems like a good thing to do in case things // change. - if (box1.overlap(box2)) - return true; + if (box1.overlap(box2)) return true; // Check for ColPartitions spanning both table regions TBOX bbox = box1.bounding_union(box2); // Start a rect search on bbox - GridSearch - rectsearch(&clean_part_grid_); + GridSearch rectsearch( + &clean_part_grid_); rectsearch.StartRectSearch(bbox); ColPartition* part = nullptr; while ((part = rectsearch.NextRectSearch()) != nullptr) { @@ -1546,15 +1512,14 @@ void TableFinder::GrowTableToIncludePartials(const TBOX& table_box, // Rulings are in a different grid, so search 2 grids for rulings, text, // and table partitions that are not entirely within the new box. for (int i = 0; i < 2; ++i) { - ColPartitionGrid* grid = (i == 0) ? &fragmented_text_grid_ : - &leader_and_ruling_grid_; + ColPartitionGrid* grid = + (i == 0) ? &fragmented_text_grid_ : &leader_and_ruling_grid_; ColPartitionGridSearch rectsearch(grid); rectsearch.StartRectSearch(search_range); ColPartition* part = nullptr; while ((part = rectsearch.NextRectSearch()) != nullptr) { - // Only include text and table types. - if (part->IsImageType()) - continue; + // Only include text and table types. + if (part->IsImageType()) continue; const TBOX& part_box = part->bounding_box(); // Include partition in the table if more than half of it // is covered by the table @@ -1579,13 +1544,11 @@ void TableFinder::GrowTableToIncludeLines(const TBOX& table_box, // TODO(nbeato) This should also do vertical, but column // boundaries are breaking things. This function needs to be // updated to allow vertical lines as well. - if (!part->IsLineType()) - continue; + if (!part->IsLineType()) continue; // Avoid the following function call if the result of the // function is irrelevant. const TBOX& part_box = part->bounding_box(); - if (result_box->contains(part_box)) - continue; + if (result_box->contains(part_box)) continue; // Include a partially overlapping horizontal line only if the // extra ColPartitions that will be included due to expansion // have large side spacing w.r.t. columns containing them. @@ -1600,11 +1563,9 @@ void TableFinder::GrowTableToIncludeLines(const TBOX& table_box, // due to expansion bool TableFinder::HLineBelongsToTable(const ColPartition& part, const TBOX& table_box) { - if (!part.IsHorizontalLine()) - return false; + if (!part.IsHorizontalLine()) return false; const TBOX& part_box = part.bounding_box(); - if (!part_box.major_x_overlap(table_box)) - return false; + if (!part_box.major_x_overlap(table_box)) return false; // Do not consider top-most horizontal line since it usually // originates from noise. // TODO(nbeato): I had to comment this out because the ruling grid doesn't @@ -1623,8 +1584,8 @@ bool TableFinder::HLineBelongsToTable(const ColPartition& part, // Rulings are in a different grid, so search 2 grids for rulings, text, // and table partitions that are introduced by the new box. for (int i = 0; i < 2; ++i) { - ColPartitionGrid* grid = (i == 0) ? &clean_part_grid_ : - &leader_and_ruling_grid_; + ColPartitionGrid* grid = + (i == 0) ? &clean_part_grid_ : &leader_and_ruling_grid_; // Start a rect search on bbox ColPartitionGridSearch rectsearch(grid); rectsearch.SetUniqueMode(true); @@ -1636,8 +1597,7 @@ bool TableFinder::HLineBelongsToTable(const ColPartition& part, if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable) continue; // Non-text ColPartitions do not contribute - if (extra_part->IsImageType()) - continue; + if (extra_part->IsImageType()) continue; // Consider this partition. num_extra_partitions++; // presence of a table cell is a strong hint, so just increment the scores @@ -1650,14 +1610,13 @@ bool TableFinder::HLineBelongsToTable(const ColPartition& part, int space_threshold = kSideSpaceMargin * part.median_size(); if (extra_part->space_to_right() > space_threshold) extra_space_to_right++; - if (extra_part->space_to_left() > space_threshold) - extra_space_to_left++; + if (extra_part->space_to_left() > space_threshold) extra_space_to_left++; } } // tprintf("%d %d %d\n", // num_extra_partitions,extra_space_to_right,extra_space_to_left); return (extra_space_to_right > num_extra_partitions / 2) || - (extra_space_to_left > num_extra_partitions / 2); + (extra_space_to_left > num_extra_partitions / 2); } // Look for isolated column headers above the given table box and @@ -1671,13 +1630,11 @@ void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) { ColPartition* previous_neighbor = nullptr; while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) { // Max distance to find a table heading. - const int max_distance = kMaxColumnHeaderDistance * - neighbor->median_size(); + const int max_distance = kMaxColumnHeaderDistance * neighbor->median_size(); int table_top = table_box->top(); const TBOX& box = neighbor->bounding_box(); // Do not continue if the next box is way above - if (box.bottom() - table_top > max_distance) - break; + if (box.bottom() - table_top > max_distance) break; // Unconditionally include partitions of type TABLE or LINE // TODO(faisal): add some reasonable conditions here if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) { @@ -1691,8 +1648,7 @@ void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) { previous_neighbor = neighbor; } else { const TBOX& previous_box = previous_neighbor->bounding_box(); - if (!box.major_y_overlap(previous_box)) - break; + if (!box.major_y_overlap(previous_box)) break; } } } @@ -1707,8 +1663,8 @@ void TableFinder::DeleteSingleColumnTables() { // create an integer array to hold projection on x-axis int* table_xprojection = new int[page_width]; // Iterate through all tables in the table grid - GridSearch - table_search(&table_grid_); + GridSearch table_search( + &table_grid_); table_search.StartFullSearch(); ColSegment* table; while ((table = table_search.NextFullSearch()) != nullptr) { @@ -1718,20 +1674,18 @@ void TableFinder::DeleteSingleColumnTables() { table_xprojection[i] = 0; } // Start a rect search on table_box - GridSearch - rectsearch(&clean_part_grid_); + GridSearch rectsearch( + &clean_part_grid_); rectsearch.SetUniqueMode(true); rectsearch.StartRectSearch(table_box); ColPartition* part; while ((part = rectsearch.NextRectSearch()) != nullptr) { - if (!part->IsTextType()) - continue; // Do not consider non-text partitions + if (!part->IsTextType()) continue; // Do not consider non-text partitions if (part->flow() == BTFT_LEADER) continue; // Assume leaders are in tables TBOX part_box = part->bounding_box(); // Do not consider partitions partially covered by the table - if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable) - continue; + if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable) continue; BLOBNBOX_CLIST* part_boxes = part->boxes(); BLOBNBOX_C_IT pit(part_boxes); @@ -1743,7 +1697,7 @@ void TableFinder::DeleteSingleColumnTables() { int next_position_to_write = 0; for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { - BLOBNBOX *pblob = pit.data(); + BLOBNBOX* pblob = pit.data(); // ignore blob height for the purpose of projection since we // are only interested in finding valleys int xstart = pblob->bounding_box().left(); @@ -1777,8 +1731,7 @@ bool TableFinder::GapInXProjection(int* xprojection, int length) { // Peak value represents the maximum number of horizontally // overlapping colpartitions, so this can be considered as the // number of rows in the table - if (peak_value < kMinRowsInTable) - return false; + if (peak_value < kMinRowsInTable) return false; double projection_threshold = kSmallTableProjectionThreshold * peak_value; if (peak_value >= kLargeTableRowCount) projection_threshold = kLargeTableProjectionThreshold * peak_value; @@ -1797,8 +1750,7 @@ bool TableFinder::GapInXProjection(int* xprojection, int length) { // detect end of a run of zeros and update the value of largest gap if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) { int gap = i - run_start; - if (gap > largest_gap) - largest_gap = gap; + if (gap > largest_gap) largest_gap = gap; run_start = -1; } } @@ -1819,12 +1771,11 @@ void TableFinder::RecognizeTables() { ScrollView* table_win = nullptr; if (textord_show_tables) { table_win = MakeWindow(0, 0, "Table Structure"); - DisplayColPartitions(table_win, &fragmented_text_grid_, - ScrollView::BLUE, ScrollView::LIGHT_BLUE); + DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE, + ScrollView::LIGHT_BLUE); // table_grid_.DisplayBoxes(table_win); } - TableRecognizer recognizer; recognizer.Init(); recognizer.set_line_grid(&leader_and_ruling_grid_); @@ -1869,8 +1820,7 @@ void TableFinder::RecognizeTables() { } // Displays the column segments in some window. -void TableFinder::DisplayColSegments(ScrollView* win, - ColSegment_LIST *segments, +void TableFinder::DisplayColSegments(ScrollView* win, ColSegment_LIST* segments, ScrollView::Color color) { #ifndef GRAPHICS_DISABLED win->Pen(color); @@ -1890,11 +1840,10 @@ void TableFinder::DisplayColSegments(ScrollView* win, } void TableFinder::DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, - ScrollView::Color color) { + ScrollView::Color color) { #ifndef GRAPHICS_DISABLED // Iterate the ColPartitions in the grid. - GridSearch - gsearch(grid); + GridSearch gsearch(grid); gsearch.StartFullSearch(); ColSegment* seg = nullptr; while ((seg = gsearch.NextFullSearch()) != nullptr) { @@ -1914,21 +1863,18 @@ void TableFinder::DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, // Displays the colpartitions using a new coloring on an existing window. // Note: This method is only for debug purpose during development and // would not be part of checked in code -void TableFinder::DisplayColPartitions(ScrollView* win, - ColPartitionGrid* grid, +void TableFinder::DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, ScrollView::Color default_color, ScrollView::Color table_color) { #ifndef GRAPHICS_DISABLED ScrollView::Color color = default_color; // Iterate the ColPartitions in the grid. - GridSearch - gsearch(grid); + GridSearch gsearch(grid); gsearch.StartFullSearch(); ColPartition* part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { color = default_color; - if (part->type() == PT_TABLE) - color = table_color; + if (part->type() == PT_TABLE) color = table_color; const TBOX& box = part->bounding_box(); int left_x = box.left(); @@ -1942,20 +1888,17 @@ void TableFinder::DisplayColPartitions(ScrollView* win, win->UpdateWindow(); #endif } -void TableFinder::DisplayColPartitions(ScrollView* win, - ColPartitionGrid* grid, +void TableFinder::DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, ScrollView::Color default_color) { DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW); } -void TableFinder::DisplayColPartitionConnections( - ScrollView* win, - ColPartitionGrid* grid, - ScrollView::Color color) { +void TableFinder::DisplayColPartitionConnections(ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color color) { #ifndef GRAPHICS_DISABLED // Iterate the ColPartitions in the grid. - GridSearch - gsearch(grid); + GridSearch gsearch(grid); gsearch.StartFullSearch(); ColPartition* part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -2000,8 +1943,7 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid* grid, WidthCallback* width_cb) { // Since we have table blocks already, remove table tags from all // colpartitions - GridSearch - gsearch(grid); + GridSearch gsearch(grid); gsearch.StartFullSearch(); ColPartition* part = nullptr; @@ -2012,22 +1954,21 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid* grid, } // Now make a single colpartition out of each table block and remove // all colpartitions contained within a table - GridSearch - table_search(&table_grid_); + GridSearch table_search( + &table_grid_); table_search.StartFullSearch(); ColSegment* table; while ((table = table_search.NextFullSearch()) != nullptr) { const TBOX& table_box = table->bounding_box(); // Start a rect search on table_box - GridSearch - rectsearch(grid); + GridSearch rectsearch( + grid); rectsearch.StartRectSearch(table_box); ColPartition* part; ColPartition* table_partition = nullptr; while ((part = rectsearch.NextRectSearch()) != nullptr) { - // Do not consider image partitions - if (!part->IsTextType()) - continue; + // Do not consider image partitions + if (!part->IsTextType()) continue; TBOX part_box = part->bounding_box(); // Include partition in the table if more than half of it // is covered by the table @@ -2064,16 +2005,15 @@ ColSegment::ColSegment() : ELIST_LINK(), num_table_cells_(0), num_text_cells_(0), - type_(COL_UNKNOWN) { -} + type_(COL_UNKNOWN) {} // Provides a color for BBGrid to draw the rectangle. -ScrollView::Color ColSegment::BoxColor() const { +ScrollView::Color ColSegment::BoxColor() const { const ScrollView::Color kBoxColors[PT_COUNT] = { - ScrollView::YELLOW, - ScrollView::BLUE, - ScrollView::YELLOW, - ScrollView::MAGENTA, + ScrollView::YELLOW, + ScrollView::BLUE, + ScrollView::YELLOW, + ScrollView::MAGENTA, }; return kBoxColors[type_]; } diff --git a/src/textord/tablefind.h b/src/textord/tablefind.h index 20e746a240..c9118382d2 100644 --- a/src/textord/tablefind.h +++ b/src/textord/tablefind.h @@ -27,13 +27,7 @@ namespace tesseract { // Possible types for a column segment. -enum ColSegType { - COL_UNKNOWN, - COL_TEXT, - COL_TABLE, - COL_MIXED, - COL_COUNT -}; +enum ColSegType { COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED, COL_COUNT }; class ColPartitionSet; @@ -49,76 +43,51 @@ class ColSegment : public ELIST_LINK { ~ColSegment() = default; // Simple accessors and mutators - const TBOX& bounding_box() const { - return bounding_box_; - } + const TBOX& bounding_box() const { return bounding_box_; } - void set_top(int y) { - bounding_box_.set_top(y); - } + void set_top(int y) { bounding_box_.set_top(y); } - void set_bottom(int y) { - bounding_box_.set_bottom(y); - } + void set_bottom(int y) { bounding_box_.set_bottom(y); } - void set_left(int x) { - bounding_box_.set_left(x); - } + void set_left(int x) { bounding_box_.set_left(x); } - void set_right(int x) { - bounding_box_.set_right(x); - } + void set_right(int x) { bounding_box_.set_right(x); } - void set_bounding_box(const TBOX& other) { - bounding_box_ = other; - } + void set_bounding_box(const TBOX& other) { bounding_box_ = other; } - int get_num_table_cells() const { - return num_table_cells_; - } + int get_num_table_cells() const { return num_table_cells_; } // set the number of table colpartitions covered by the bounding_box_ - void set_num_table_cells(int n) { - num_table_cells_ = n; - } + void set_num_table_cells(int n) { num_table_cells_ = n; } - int get_num_text_cells() const { - return num_text_cells_; - } + int get_num_text_cells() const { return num_text_cells_; } // set the number of text colpartitions covered by the bounding_box_ - void set_num_text_cells(int n) { - num_text_cells_ = n; - } + void set_num_text_cells(int n) { num_text_cells_ = n; } - ColSegType type() const { - return type_; - } + ColSegType type() const { return type_; } // set the type of the block based on the ratio of table to text // colpartitions covered by it. void set_type(); // Provides a color for BBGrid to draw the rectangle. - ScrollView::Color BoxColor() const; + ScrollView::Color BoxColor() const; // Insert a rectangle into bounding_box_ void InsertBox(const TBOX& other); private: - TBOX bounding_box_; // bounding box + TBOX bounding_box_; // bounding box int num_table_cells_; int num_text_cells_; ColSegType type_; }; // Typedef BBGrid of ColSegments -using ColSegmentGrid = BBGrid; -using ColSegmentGridSearch = GridSearch; +using ColSegmentGrid = BBGrid; +using ColSegmentGridSearch = + GridSearch; // TableFinder is a utility class to find a set of tables given a set of // ColPartitions and Columns. The TableFinder will mark candidate ColPartitions @@ -135,9 +104,7 @@ class TableFinder { ~TableFinder(); // Set the resolution of the connected components in ppi. - void set_resolution(int resolution) { - resolution_ = resolution; - } + void set_resolution(int resolution) { resolution_ = resolution; } // Change the reading order. Initially it is left to right. void set_left_to_right_language(bool order); @@ -154,10 +121,8 @@ class TableFinder { // tables. The columns and width callbacks are used to merge tables. // The reskew argument is only used to write the tables to the out.png // if that feature is enabled. - void LocateTables(ColPartitionGrid* grid, - ColPartitionSet** columns, - WidthCallback* width_cb, - const FCOORD& reskew); + void LocateTables(ColPartitionGrid* grid, ColPartitionSet** columns, + WidthCallback* width_cb, const FCOORD& reskew); protected: // Access for the grid dimensions. @@ -279,14 +244,14 @@ class TableFinder { // Get Column segments from best_columns_ void GetColumnBlocks(ColPartitionSet** columns, - ColSegment_LIST *col_segments); + ColSegment_LIST* col_segments); // Group Column segments into consecutive single column regions. - void GroupColumnBlocks(ColSegment_LIST *current_segments, - ColSegment_LIST *col_segments); + void GroupColumnBlocks(ColSegment_LIST* current_segments, + ColSegment_LIST* col_segments); // Check if two boxes are consecutive within the same column - bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2); + bool ConsecutiveBoxes(const TBOX& b1, const TBOX& b2); // Set the ratio of candidate table partitions in each column void SetColumnsType(ColSegment_LIST* col_segments); @@ -304,7 +269,7 @@ class TableFinder { // Differs from paper by just looking at marked table partitions // instead of similarity metric. // Modified section 4.1 of paper. - void GetTableColumns(ColSegment_LIST *table_columns); + void GetTableColumns(ColSegment_LIST* table_columns); // Finds regions within a column that potentially contain a table. // Ie, the table columns from GetTableColumns are turned into boxes @@ -312,16 +277,15 @@ class TableFinder { // earlier functions) in the x direction and the min/max extent of // overlapping table columns in the y direction. // Section 4.2 of paper. - void GetTableRegions(ColSegment_LIST *table_columns, - ColSegment_LIST *table_regions); - + void GetTableRegions(ColSegment_LIST* table_columns, + ColSegment_LIST* table_regions); //////// Functions to "patch up" found tables //////// // Merge table regions corresponding to tables spanning multiple columns void GridMergeTableRegions(); - bool BelongToOneTable(const TBOX &box1, const TBOX &box2); + bool BelongToOneTable(const TBOX& box1, const TBOX& box2); // Adjust table boundaries by building a tight bounding box around all // ColPartitions contained in it. @@ -336,8 +300,7 @@ class TableFinder { // Grow a table by increasing the size of the box to include // partitions with significant overlap with the table. void GrowTableToIncludePartials(const TBOX& table_box, - const TBOX& search_range, - TBOX* result_box); + const TBOX& search_range, TBOX* result_box); // Grow a table by expanding to the extents of significantly // overlapping lines. void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range, @@ -372,7 +335,7 @@ class TableFinder { // Displays Colpartitions marked as table row. Overlays them on top of // part_grid_. - void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols, + void DisplayColSegments(ScrollView* win, ColSegment_LIST* cols, ScrollView::Color color); // Displays the colpartitions using a new coloring on an existing window. @@ -383,8 +346,7 @@ class TableFinder { ScrollView::Color table_color); void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, ScrollView::Color default_color); - void DisplayColPartitionConnections(ScrollView* win, - ColPartitionGrid* grid, + void DisplayColPartitionConnections(ScrollView* win, ColPartitionGrid* grid, ScrollView::Color default_color); void DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, ScrollView::Color color); @@ -392,8 +354,7 @@ class TableFinder { // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. - void MakeTableBlocks(ColPartitionGrid* grid, - ColPartitionSet** columns, + void MakeTableBlocks(ColPartitionGrid* grid, ColPartitionSet** columns, WidthCallback* width_cb); ///////////////////////////////////////////////// diff --git a/src/textord/tablerecog.cpp b/src/textord/tablerecog.cpp index 00ecd87a19..6f372c5dbf 100644 --- a/src/textord/tablerecog.cpp +++ b/src/textord/tablerecog.cpp @@ -53,8 +53,8 @@ const double kMarginFactor = 1.1; const double kMaxRowSize = 2.5; // Number of filled columns required to form a strong table row. // For small tables, this is an absolute number. -const double kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }; -const int kGoodRowNumberOfColumnsSmallSize = +const double kGoodRowNumberOfColumnsSmall[] = {2, 2, 2, 2, 2, 3, 3}; +const int kGoodRowNumberOfColumnsSmallSize = sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1; // For large tables, it is a relative number const double kGoodRowNumberOfColumnsLarge = 0.7; @@ -76,11 +76,9 @@ StructuredTable::StructuredTable() space_right_(0), median_cell_height_(0), median_cell_width_(0), - max_text_height_(INT32_MAX) { -} + max_text_height_(INT32_MAX) {} -void StructuredTable::Init() { -} +void StructuredTable::Init() {} void StructuredTable::set_text_grid(ColPartitionGrid* text_grid) { text_grid_ = text_grid; @@ -91,30 +89,18 @@ void StructuredTable::set_line_grid(ColPartitionGrid* line_grid) { void StructuredTable::set_max_text_height(int height) { max_text_height_ = height; } -bool StructuredTable::is_lined() const { - return is_lined_; -} +bool StructuredTable::is_lined() const { return is_lined_; } int StructuredTable::row_count() const { return cell_y_.length() == 0 ? 0 : cell_y_.length() - 1; } int StructuredTable::column_count() const { return cell_x_.length() == 0 ? 0 : cell_x_.length() - 1; } -int StructuredTable::cell_count() const { - return row_count() * column_count(); -} -void StructuredTable::set_bounding_box(const TBOX& box) { - bounding_box_ = box; -} -const TBOX& StructuredTable::bounding_box() const { - return bounding_box_; -} -int StructuredTable::median_cell_height() { - return median_cell_height_; -} -int StructuredTable::median_cell_width() { - return median_cell_width_; -} +int StructuredTable::cell_count() const { return row_count() * column_count(); } +void StructuredTable::set_bounding_box(const TBOX& box) { bounding_box_ = box; } +const TBOX& StructuredTable::bounding_box() const { return bounding_box_; } +int StructuredTable::median_cell_height() { return median_cell_height_; } +int StructuredTable::median_cell_width() { return median_cell_width_; } int StructuredTable::row_height(int row) const { ASSERT_HOST(0 <= row && row < row_count()); return cell_y_[row + 1] - cell_y_[row]; @@ -123,12 +109,8 @@ int StructuredTable::column_width(int column) const { ASSERT_HOST(0 <= column && column < column_count()); return cell_x_[column + 1] - cell_x_[column]; } -int StructuredTable::space_above() const { - return space_above_; -} -int StructuredTable::space_below() const { - return space_below_; -} +int StructuredTable::space_above() const { return space_above_; } +int StructuredTable::space_below() const { return space_below_; } // At this point, we know that the lines are contained // by the box (by FindLinesBoundingBox). @@ -148,17 +130,14 @@ bool StructuredTable::FindLinedStructure() { ColPartition* line = nullptr; while ((line = box_search.NextRectSearch()) != nullptr) { - if (line->IsHorizontalLine()) - cell_y_.push_back(line->MidY()); - if (line->IsVerticalLine()) - cell_x_.push_back(line->MidX()); + if (line->IsHorizontalLine()) cell_y_.push_back(line->MidY()); + if (line->IsVerticalLine()) cell_x_.push_back(line->MidX()); } // HasSignificantLines should guarantee cells. // Because that code is a different class, just gracefully // return false. This could be an assert. - if (cell_x_.length() < 3 || cell_y_.length() < 3) - return false; + if (cell_x_.length() < 3 || cell_y_.length() < 3) return false; cell_x_.sort(); cell_y_.sort(); @@ -211,11 +190,9 @@ bool StructuredTable::FindWhitespacedStructure() { bool StructuredTable::DoesPartitionFit(const ColPartition& part) const { const TBOX& box = part.bounding_box(); for (int i = 0; i < cell_x_.length(); ++i) - if (box.left() < cell_x_[i] && cell_x_[i] < box.right()) - return false; + if (box.left() < cell_x_[i] && cell_x_[i] < box.right()) return false; for (int i = 0; i < cell_y_.length(); ++i) - if (box.bottom() < cell_y_[i] && cell_y_[i] < box.top()) - return false; + if (box.bottom() < cell_y_[i] && cell_y_[i] < box.top()) return false; return true; } @@ -230,7 +207,7 @@ int StructuredTable::CountFilledCellsInColumn(int column) { return CountFilledCells(0, row_count() - 1, column, column); } int StructuredTable::CountFilledCells(int row_start, int row_end, - int column_start, int column_end) { + int column_start, int column_end) { ASSERT_HOST(0 <= row_start && row_start <= row_end && row_end < row_count()); ASSERT_HOST(0 <= column_start && column_start <= column_end && column_end < column_count()); @@ -242,8 +219,7 @@ int StructuredTable::CountFilledCells(int row_start, int row_end, for (int col = column_start; col <= column_end; ++col) { cell_box.set_left(cell_x_[col]); cell_box.set_right(cell_x_[col + 1]); - if (CountPartitions(cell_box) > 0) - ++cell_count; + if (CountPartitions(cell_box) > 0) ++cell_count; } } return cell_count; @@ -255,8 +231,7 @@ int StructuredTable::CountFilledCells(int row_start, int row_end, bool StructuredTable::VerifyRowFilled(int row) { for (int i = 0; i < column_count(); ++i) { double area_filled = CalculateCellFilledPercentage(row, i); - if (area_filled >= kMinFilledArea) - return true; + if (area_filled >= kMinFilledArea) return true; } return false; } @@ -266,8 +241,8 @@ bool StructuredTable::VerifyRowFilled(int row) { double StructuredTable::CalculateCellFilledPercentage(int row, int column) { ASSERT_HOST(0 <= row && row <= row_count()); ASSERT_HOST(0 <= column && column <= column_count()); - const TBOX kCellBox(cell_x_[column], cell_y_[row], - cell_x_[column + 1], cell_y_[row + 1]); + const TBOX kCellBox(cell_x_[column], cell_y_[row], cell_x_[column + 1], + cell_y_[row + 1]); ASSERT_HOST(!kCellBox.null_box()); ColPartitionGridSearch gsearch(text_grid_); @@ -293,12 +268,12 @@ void StructuredTable::Display(ScrollView* window, ScrollView::Color color) { window->Rectangle(bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(), bounding_box_.top()); for (int i = 0; i < cell_x_.length(); i++) { - window->Line(cell_x_[i], bounding_box_.bottom(), - cell_x_[i], bounding_box_.top()); + window->Line(cell_x_[i], bounding_box_.bottom(), cell_x_[i], + bounding_box_.top()); } for (int i = 0; i < cell_y_.length(); i++) { - window->Line(bounding_box_.left(), cell_y_[i], - bounding_box_.right(), cell_y_[i]); + window->Line(bounding_box_.left(), cell_y_[i], bounding_box_.right(), + cell_y_[i]); } window->UpdateWindow(); #endif @@ -323,12 +298,10 @@ bool StructuredTable::VerifyLinedTableCells() { // Function only called when lines exist. ASSERT_HOST(cell_y_.length() >= 2 && cell_x_.length() >= 2); for (int i = 0; i < cell_y_.length(); ++i) { - if (CountHorizontalIntersections(cell_y_[i]) > 0) - return false; + if (CountHorizontalIntersections(cell_y_[i]) > 0) return false; } for (int i = 0; i < cell_x_.length(); ++i) { - if (CountVerticalIntersections(cell_x_[i]) > 0) - return false; + if (CountVerticalIntersections(cell_x_[i]) > 0) return false; } return true; } @@ -364,18 +337,16 @@ void StructuredTable::FindWhitespacedColumns() { gsearch.StartRectSearch(bounding_box_); ColPartition* text = nullptr; while ((text = gsearch.NextRectSearch()) != nullptr) { - if (!text->IsTextType()) - continue; + if (!text->IsTextType()) continue; ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right()); - int spacing = static_cast(text->median_width() * - kHorizontalSpacing / 2.0 + 0.5); + int spacing = + static_cast(text->median_width() * kHorizontalSpacing / 2.0 + 0.5); left_sides.push_back(text->bounding_box().left() - spacing); right_sides.push_back(text->bounding_box().right() + spacing); } // It causes disaster below, so avoid it! - if (left_sides.length() == 0 || right_sides.length() == 0) - return; + if (left_sides.length() == 0 || right_sides.length() == 0) return; // Since data may be inserted in grid order, we sort the left/right sides. left_sides.sort(); @@ -414,34 +385,31 @@ void StructuredTable::FindWhitespacedRows() { gsearch.StartRectSearch(bounding_box_); ColPartition* text = nullptr; while ((text = gsearch.NextRectSearch()) != nullptr) { - if (!text->IsTextType()) - continue; + if (!text->IsTextType()) continue; ASSERT_HOST(text->bounding_box().bottom() < text->bounding_box().top()); - min_bottom = std::min(min_bottom, static_cast(text->bounding_box().bottom())); + min_bottom = + std::min(min_bottom, static_cast(text->bounding_box().bottom())); max_top = std::max(max_top, static_cast(text->bounding_box().top())); // Ignore "tall" text partitions, as these are usually false positive // vertical text or multiple lines pulled together. - if (text->bounding_box().height() > max_text_height_) - continue; + if (text->bounding_box().height() > max_text_height_) continue; - int spacing = static_cast(text->bounding_box().height() * - kVerticalSpacing / 2.0 + 0.5); + int spacing = static_cast( + text->bounding_box().height() * kVerticalSpacing / 2.0 + 0.5); int bottom = text->bounding_box().bottom() - spacing; int top = text->bounding_box().top() + spacing; // For horizontal text, the factor can be negative. This should // probably cause a warning or failure. I haven't actually checked if // it happens. - if (bottom >= top) - continue; + if (bottom >= top) continue; bottom_sides.push_back(bottom); top_sides.push_back(top); } // It causes disaster below, so avoid it! - if (bottom_sides.length() == 0 || top_sides.length() == 0) - return; + if (bottom_sides.length() == 0 || top_sides.length() == 0) return; // Since data may be inserted in grid order, we sort the bottom/top sides. bottom_sides.sort(); @@ -489,12 +457,10 @@ int StructuredTable::FindVerticalMargin(ColPartitionGrid* grid, int border, border); ColPartition* part = nullptr; while ((part = gsearch.NextVerticalSearch(decrease)) != nullptr) { - if (!part->IsTextType() && !part->IsHorizontalLine()) - continue; + if (!part->IsTextType() && !part->IsHorizontalLine()) continue; int distance = decrease ? border - part->bounding_box().top() : part->bounding_box().bottom() - border; - if (distance >= 0) - return distance; + if (distance >= 0) return distance; } return INT32_MAX; } @@ -505,12 +471,10 @@ int StructuredTable::FindHorizontalMargin(ColPartitionGrid* grid, int border, gsearch.StartSideSearch(border, bounding_box_.bottom(), bounding_box_.top()); ColPartition* part = nullptr; while ((part = gsearch.NextSideSearch(decrease)) != nullptr) { - if (!part->IsTextType() && !part->IsVerticalLine()) - continue; + if (!part->IsTextType() && !part->IsVerticalLine()) continue; int distance = decrease ? border - part->bounding_box().right() : part->bounding_box().left() - border; - if (distance >= 0) - return distance; + if (distance >= 0) return distance; } return INT32_MAX; } @@ -546,14 +510,11 @@ void StructuredTable::AbsorbNearbyLines() { gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), bounding_box_.top()); while ((line = gsearch.NextVerticalSearch(false)) != nullptr) { - if (!line->IsHorizontalLine()) - break; + if (!line->IsHorizontalLine()) break; TBOX text_search(bounding_box_.left(), bounding_box_.top() + 1, bounding_box_.right(), line->MidY()); - if (text_search.height() > median_cell_height_ * 2) - break; - if (CountPartitions(text_search) > 0) - break; + if (text_search.height() > median_cell_height_ * 2) break; + if (CountPartitions(text_search) > 0) break; bounding_box_.set_top(line->MidY()); } // As above, is the closest line below good? @@ -561,20 +522,16 @@ void StructuredTable::AbsorbNearbyLines() { gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), bounding_box_.bottom()); while ((line = gsearch.NextVerticalSearch(true)) != nullptr) { - if (!line->IsHorizontalLine()) - break; - TBOX text_search(bounding_box_.left(), line->MidY(), - bounding_box_.right(), bounding_box_.bottom() - 1); - if (text_search.height() > median_cell_height_ * 2) - break; - if (CountPartitions(text_search) > 0) - break; + if (!line->IsHorizontalLine()) break; + TBOX text_search(bounding_box_.left(), line->MidY(), bounding_box_.right(), + bounding_box_.bottom() - 1); + if (text_search.height() > median_cell_height_ * 2) break; + if (CountPartitions(text_search) > 0) break; bounding_box_.set_bottom(line->MidY()); } // TODO(nbeato): vertical lines } - // This function will find all "0 valleys" (of any length) given two // arrays. The arrays are the mins and maxes of partitions (either // left and right or bottom and top). Since the min/max lists are generated @@ -595,8 +552,7 @@ void StructuredTable::FindCellSplitLocations(const GenericVector& min_list, GenericVector* locations) { locations->clear(); ASSERT_HOST(min_list.length() == max_list.length()); - if (min_list.length() == 0) - return; + if (min_list.length() == 0) return; ASSERT_HOST(min_list.get(0) < max_list.get(0)); ASSERT_HOST(min_list.get(min_list.length() - 1) < max_list.get(max_list.length() - 1)); @@ -613,8 +569,7 @@ void StructuredTable::FindCellSplitLocations(const GenericVector& min_list, // Increase the hill count. if (min_list[min_index] < max_list[max_index]) { ++stacked_partitions; - if (last_cross_position != INT32_MAX && - stacked_partitions > max_merged) { + if (last_cross_position != INT32_MAX && stacked_partitions > max_merged) { int mid = (last_cross_position + min_list[min_index]) / 2; locations->push_back(mid); last_cross_position = INT32_MAX; @@ -648,11 +603,9 @@ int StructuredTable::CountVerticalIntersections(int x) { gsearch.StartRectSearch(vertical_box); ColPartition* text = nullptr; while ((text = gsearch.NextRectSearch()) != nullptr) { - if (!text->IsTextType()) - continue; + if (!text->IsTextType()) continue; const TBOX& box = text->bounding_box(); - if (box.left() < x && x < box.right()) - ++count; + if (box.left() < x && x < box.right()) ++count; } return count; } @@ -672,12 +625,10 @@ int StructuredTable::CountHorizontalIntersections(int y) { gsearch.StartRectSearch(horizontal_box); ColPartition* text = nullptr; while ((text = gsearch.NextRectSearch()) != nullptr) { - if (!text->IsTextType()) - continue; + if (!text->IsTextType()) continue; const TBOX& box = text->bounding_box(); - if (box.bottom() < y && y < box.top()) - ++count; + if (box.bottom() < y && y < box.top()) ++count; } return count; } @@ -692,8 +643,7 @@ int StructuredTable::CountPartitions(const TBOX& box) { int count = 0; ColPartition* text = nullptr; while ((text = gsearch.NextRectSearch()) != nullptr) { - if (text->IsTextType()) - ++count; + if (text->IsTextType()) ++count; } return count; } @@ -707,14 +657,11 @@ TableRecognizer::TableRecognizer() line_grid_(nullptr), min_height_(0), min_width_(0), - max_text_height_(INT32_MAX) { -} + max_text_height_(INT32_MAX) {} -TableRecognizer::~TableRecognizer() { -} +TableRecognizer::~TableRecognizer() {} -void TableRecognizer::Init() { -} +void TableRecognizer::Init() {} void TableRecognizer::set_text_grid(ColPartitionGrid* text_grid) { text_grid_ = text_grid; @@ -722,12 +669,8 @@ void TableRecognizer::set_text_grid(ColPartitionGrid* text_grid) { void TableRecognizer::set_line_grid(ColPartitionGrid* line_grid) { line_grid_ = line_grid; } -void TableRecognizer::set_min_height(int height) { - min_height_ = height; -} -void TableRecognizer::set_min_width(int width) { - min_width_ = width; -} +void TableRecognizer::set_min_height(int height) { min_height_ = height; } +void TableRecognizer::set_min_width(int width) { min_width_ = width; } void TableRecognizer::set_max_text_height(int height) { max_text_height_ = height; } @@ -741,14 +684,12 @@ StructuredTable* TableRecognizer::RecognizeTable(const TBOX& guess) { // Try to solve this simple case, a table with *both* // vertical and horizontal lines. - if (RecognizeLinedTable(guess, table)) - return table; + if (RecognizeLinedTable(guess, table)) return table; // Fallback to whitespace if that failed. // TODO(nbeato): Break this apart to take advantage of horizontal // lines or vertical lines when present. - if (RecognizeWhitespacedTable(guess, table)) - return table; + if (RecognizeWhitespacedTable(guess, table)) return table; // No table found... delete table; @@ -757,11 +698,9 @@ StructuredTable* TableRecognizer::RecognizeTable(const TBOX& guess) { bool TableRecognizer::RecognizeLinedTable(const TBOX& guess_box, StructuredTable* table) { - if (!HasSignificantLines(guess_box)) - return false; + if (!HasSignificantLines(guess_box)) return false; TBOX line_bound = guess_box; - if (!FindLinesBoundingBox(&line_bound)) - return false; + if (!FindLinesBoundingBox(&line_bound)) return false; table->set_bounding_box(line_bound); return table->FindLinedStructure(); } @@ -781,10 +720,8 @@ bool TableRecognizer::HasSignificantLines(const TBOX& guess) { int horizontal_count = 0; while ((line = box_search.NextRectSearch()) != nullptr) { - if (line->IsHorizontalLine()) - ++horizontal_count; - if (line->IsVerticalLine()) - ++vertical_count; + if (line->IsHorizontalLine()) ++horizontal_count; + if (line->IsVerticalLine()) ++vertical_count; } return vertical_count >= kLinedTableMinVerticalLines && @@ -814,8 +751,7 @@ bool TableRecognizer::HasSignificantLines(const TBOX& guess) { bool TableRecognizer::FindLinesBoundingBox(TBOX* bounding_box) { // The first iteration will tell us if there are lines // present and shrink the box to a minimal iterative size. - if (!FindLinesBoundingBoxIteration(bounding_box)) - return false; + if (!FindLinesBoundingBoxIteration(bounding_box)) return false; // Keep growing until the area of the table stabilizes. // The box can only get bigger, increasing area. @@ -938,18 +874,16 @@ bool TableRecognizer::RecognizeWhitespacedTable(const TBOX& guess_box, } previous_below = table->space_below(); } else { - --chances; + --chances; } } - if (chances <= 0) - break; + if (chances <= 0) break; last_bottom = bottom; bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(), last_bottom, true); } - if (!found_good_border) - return false; + if (!found_good_border) return false; // TODO(nbeato) comments: follow modified code above... put it in a function! found_good_border = false; @@ -985,24 +919,21 @@ bool TableRecognizer::RecognizeWhitespacedTable(const TBOX& guess_box, } previous_above = table->space_above(); } else { - --chances; + --chances; } } - if (chances <= 0) - break; + if (chances <= 0) break; last_top = top; - top = NextHorizontalSplit(guess_box.left(), guess_box.right(), - last_top, false); + top = NextHorizontalSplit(guess_box.left(), guess_box.right(), last_top, + false); } - if (!found_good_border) - return false; + if (!found_good_border) return false; // If we get here, this shouldn't happen. It can be an assert, but // I haven't tested it enough to make it crash things. - if (best_box.null_box()) - return false; + if (best_box.null_box()) return false; // Given the best locations, fit the box to those locations. table->set_bounding_box(best_box); @@ -1023,10 +954,8 @@ int TableRecognizer::NextHorizontalSplit(int left, int right, int y, ColPartition* text = nullptr; int last_y = y; while ((text = gsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { - if (!text->IsTextType() || !text->IsHorizontalType()) - continue; - if (text->bounding_box().height() > max_text_height_) - continue; + if (!text->IsTextType() || !text->IsHorizontalType()) continue; + if (text->bounding_box().height() > max_text_height_) continue; const TBOX& text_box = text->bounding_box(); if (top_to_bottom && (last_y >= y || last_y <= text_box.top())) { @@ -1050,8 +979,7 @@ int TableRecognizer::NextHorizontalSplit(int left, int right, int y, // sometimes (like a phantom row is introduced). There's something going // on in the cell_y_ data member before this is called... not certain. bool TableRecognizer::IsWeakTableRow(StructuredTable* table, int row) { - if (!table->VerifyRowFilled(row)) - return false; + if (!table->VerifyRowFilled(row)) return false; double threshold = 0.0; if (table->column_count() > kGoodRowNumberOfColumnsSmallSize) diff --git a/src/textord/tablerecog.h b/src/textord/tablerecog.h index c1019df753..eaa8a0cf8f 100644 --- a/src/textord/tablerecog.h +++ b/src/textord/tablerecog.h @@ -122,8 +122,8 @@ class StructuredTable { int CountFilledCells(); int CountFilledCellsInRow(int row); int CountFilledCellsInColumn(int column); - int CountFilledCells(int row_start, int row_end, - int column_start, int column_end); + int CountFilledCells(int row_start, int row_end, int column_start, + int column_end); // Makes sure that at least one cell in a row has substantial area filled. // This can filter out large whitespace caused by growing tables too far @@ -234,8 +234,8 @@ class StructuredTable { //////// // Input data, used as read only data to make decisions. - ColPartitionGrid* text_grid_; // Text ColPartitions - ColPartitionGrid* line_grid_; // Line ColPartitions + ColPartitionGrid* text_grid_; // Text ColPartitions + ColPartitionGrid* line_grid_; // Line ColPartitions // Table structure. // bounding box is a convenient external representation. // cell_x_ and cell_y_ indicate the grid lines. @@ -364,8 +364,8 @@ class TableRecognizer { static bool IsWeakTableRow(StructuredTable* table, int row); // Input data, used as read only data to make decisions. - ColPartitionGrid* text_grid_; // Text ColPartitions - ColPartitionGrid* line_grid_; // Line ColPartitions + ColPartitionGrid* text_grid_; // Text ColPartitions + ColPartitionGrid* line_grid_; // Line ColPartitions // Table constraints, a "good" table must satisfy these. int min_height_; int min_width_; @@ -375,4 +375,4 @@ class TableRecognizer { } // namespace tesseract -#endif /* TABLERECOG_H_ */ +#endif /* TABLERECOG_H_ */ diff --git a/src/textord/tabvector.cpp b/src/textord/tabvector.cpp index 6bd5477821..13a4fbcf39 100644 --- a/src/textord/tabvector.cpp +++ b/src/textord/tabvector.cpp @@ -21,12 +21,12 @@ #include "config_auto.h" #endif -#include "tabvector.h" #include "blobbox.h" #include "colfind.h" #include "colpartitionset.h" #include "detlinefit.h" #include "statistc.h" +#include "tabvector.h" #include @@ -53,10 +53,11 @@ const double kMinAlignedGutter = 0.25; const double kMinRaggedGutter = 1.5; double_VAR(textord_tabvector_vertical_gap_fraction, 0.5, - "max fraction of mean blob width allowed for vertical gaps in vertical text"); + "max fraction of mean blob width allowed for vertical gaps in " + "vertical text"); double_VAR(textord_tabvector_vertical_box_ratio, 0.5, - "Fraction of box matches required to declare a line vertical"); + "Fraction of box matches required to declare a line vertical"); ELISTIZE(TabConstraint) @@ -75,12 +76,10 @@ void TabConstraint::CreateConstraint(TabVector* vector, bool is_top) { // Test to see if the constraints are compatible enough to merge. bool TabConstraint::CompatibleConstraints(TabConstraint_LIST* list1, TabConstraint_LIST* list2) { - if (list1 == list2) - return false; + if (list1 == list2) return false; int y_min = -INT32_MAX; int y_max = INT32_MAX; - if (textord_debug_tabfind > 3) - tprintf("Testing constraint compatibility\n"); + if (textord_debug_tabfind > 3) tprintf("Testing constraint compatibility\n"); GetConstraints(list1, &y_min, &y_max); GetConstraints(list2, &y_min, &y_max); if (textord_debug_tabfind > 3) @@ -92,16 +91,13 @@ bool TabConstraint::CompatibleConstraints(TabConstraint_LIST* list1, // The second list is deleted. void TabConstraint::MergeConstraints(TabConstraint_LIST* list1, TabConstraint_LIST* list2) { - if (list1 == list2) - return; + if (list1 == list2) return; TabConstraint_IT it(list2); - if (textord_debug_tabfind > 3) - tprintf("Merging constraints\n"); + if (textord_debug_tabfind > 3) tprintf("Merging constraints\n"); // The vectors of all constraints on list2 are now going to be on list1. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabConstraint* constraint = it.data(); - if (textord_debug_tabfind> 3) - constraint->vector_->Print("Merge"); + if (textord_debug_tabfind > 3) constraint->vector_->Print("Merge"); if (constraint->is_top_) constraint->vector_->set_top_constraints(list1); else @@ -135,7 +131,7 @@ void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) { } TabConstraint::TabConstraint(TabVector* vector, bool is_top) - : vector_(vector), is_top_(is_top) { + : vector_(vector), is_top_(is_top) { if (is_top) { y_min_ = vector->endpt().y(); y_max_ = vector->extended_ymax(); @@ -146,8 +142,8 @@ TabConstraint::TabConstraint(TabVector* vector, bool is_top) } // Get the max of the mins and the min of the maxes. -void TabConstraint::GetConstraints(TabConstraint_LIST* constraints, - int* y_min, int* y_max) { +void TabConstraint::GetConstraints(TabConstraint_LIST* constraints, int* y_min, + int* y_max) { TabConstraint_IT it(constraints); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabConstraint* constraint = it.data(); @@ -165,7 +161,6 @@ CLISTIZE(TabVector) // The constructor is private. See the bottom of the file... - // Public factory to build a TabVector from a list of boxes. // The TabVector will be of the given alignment type. // The input vertical vector is used in fitting, and the output @@ -175,11 +170,11 @@ CLISTIZE(TabVector) // extension to the line segment that can be used to align with others. // The input CLIST of BLOBNBOX good_points is consumed and taken over. TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical, - int extended_start_y, int extended_end_y, - BLOBNBOX_CLIST* good_points, - int* vertical_x, int* vertical_y) { - TabVector* vector = new TabVector(extended_start_y, extended_end_y, - alignment, good_points); + int extended_start_y, int extended_end_y, + BLOBNBOX_CLIST* good_points, int* vertical_x, + int* vertical_y) { + TabVector* vector = + new TabVector(extended_start_y, extended_end_y, alignment, good_points); if (!vector->Fit(vertical, false)) { delete vector; return nullptr; @@ -198,11 +193,17 @@ TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical, // of the blob, but its extended bounds from the bounds of the original. TabVector::TabVector(const TabVector& src, TabAlignment alignment, const ICOORD& vertical_skew, BLOBNBOX* blob) - : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_), - sort_key_(0), percent_score_(0), mean_width_(0), - needs_refit_(true), needs_evaluation_(true), intersects_other_lines_(false), - alignment_(alignment), - top_constraints_(nullptr), bottom_constraints_(nullptr) { + : extended_ymin_(src.extended_ymin_), + extended_ymax_(src.extended_ymax_), + sort_key_(0), + percent_score_(0), + mean_width_(0), + needs_refit_(true), + needs_evaluation_(true), + intersects_other_lines_(false), + alignment_(alignment), + top_constraints_(nullptr), + bottom_constraints_(nullptr) { BLOBNBOX_C_IT it(&boxes_); it.add_to_end(blob); TBOX box = blob->bounding_box(); @@ -213,11 +214,9 @@ TabVector::TabVector(const TabVector& src, TabAlignment alignment, startpt_ = box.botright(); endpt_ = box.topright(); } - sort_key_ = SortKey(vertical_skew, - (startpt_.x() + endpt_.x()) / 2, + sort_key_ = SortKey(vertical_skew, (startpt_.x() + endpt_.x()) / 2, (startpt_.y() + endpt_.y()) / 2); - if (textord_debug_tabfind > 3) - Print("Constructed a new tab vector:"); + if (textord_debug_tabfind > 3) Print("Constructed a new tab vector:"); } // Copies basic attributes of a tab vector for simple operations. @@ -245,8 +244,7 @@ void TabVector::ExtendToBox(BLOBNBOX* new_blob) { BLOBNBOX* blob = it.data(); TBOX box = blob->bounding_box(); while (!it.at_last() && box.top() <= new_box.top()) { - if (blob == new_blob) - return; // We have it already. + if (blob == new_blob) return; // We have it already. it.forward(); blob = it.data(); box = blob->bounding_box(); @@ -386,8 +384,8 @@ void TabVector::MergeSimilarTabVectors(const ICOORD& vertical, // Return true if this vector is the same side, overlaps, and close // enough to the other to be merged. -bool TabVector::SimilarTo(const ICOORD& vertical, - const TabVector& other, BlobGrid* grid) const { +bool TabVector::SimilarTo(const ICOORD& vertical, const TabVector& other, + BlobGrid* grid) const { if ((IsRightTab() && other.IsRightTab()) || (IsLeftTab() && other.IsLeftTab())) { // If they don't overlap, at least in extensions, then there is no chance. @@ -395,8 +393,7 @@ bool TabVector::SimilarTo(const ICOORD& vertical, return false; // A fast approximation to the scale factor of the sort_key_. int v_scale = abs(vertical.y()); - if (v_scale == 0) - v_scale = 1; + if (v_scale == 0) v_scale = 1; // If they are close enough, then OK. if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ && sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) @@ -413,8 +410,8 @@ bool TabVector::SimilarTo(const ICOORD& vertical, // If there is nothing in the rectangle between the vector that is going to // move, and the place it is moving to, then they can be merged. // Setup a vertical search for any blob. - const TabVector* mover = (IsRightTab() && - sort_key_ < other.sort_key_) ? this : &other; + const TabVector* mover = + (IsRightTab() && sort_key_ < other.sort_key_) ? this : &other; int top_y = mover->endpt_.y(); int bottom_y = mover->startpt_.y(); int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y)); @@ -431,17 +428,16 @@ bool TabVector::SimilarTo(const ICOORD& vertical, BLOBNBOX* blob; while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) { const TBOX& box = blob->bounding_box(); - if (box.top() > bottom_y) - return true; // Nothing found. - if (box.bottom() < top_y) - continue; // Doesn't overlap. + if (box.top() > bottom_y) return true; // Nothing found. + if (box.bottom() < top_y) continue; // Doesn't overlap. int left_at_box = XAtY(box.bottom()); int right_at_box = left_at_box; if (IsRightTab()) right_at_box += shift; else left_at_box -= shift; - if (std::min(right_at_box, static_cast(box.right())) > std::max(left_at_box, static_cast(box.left()))) + if (std::min(right_at_box, static_cast(box.right())) > + std::max(left_at_box, static_cast(box.left()))) return false; } return true; // Nothing found. @@ -485,13 +481,11 @@ void TabVector::MergeWith(const ICOORD& vertical, TabVector* other) { // that makes them partners. // Groups of identical partners are merged into one. void TabVector::AddPartner(TabVector* partner) { - if (IsSeparator() || partner->IsSeparator()) - return; + if (IsSeparator() || partner->IsSeparator()) return; TabVector_C_IT it(&partners_); if (!it.empty()) { it.move_to_last(); - if (it.data() == partner) - return; + if (it.data() == partner) return; } it.add_after_then_move(partner); } @@ -500,21 +494,14 @@ void TabVector::AddPartner(TabVector* partner) { bool TabVector::IsAPartner(const TabVector* other) { TabVector_C_IT it(&partners_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - if (it.data() == other) - return true; + if (it.data() == other) return true; } return false; } // These names must be synced with the TabAlignment enum in tabvector.h. -const char* kAlignmentNames[] = { - "Left Aligned", - "Left Ragged", - "Center", - "Right Aligned", - "Right Ragged", - "Separator" -}; +const char* kAlignmentNames[] = {"Left Aligned", "Left Ragged", "Center", + "Right Aligned", "Right Ragged", "Separator"}; // Print basic information about this tab vector. void TabVector::Print(const char* prefix) { @@ -533,8 +520,8 @@ void TabVector::Debug(const char* prefix) { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); const TBOX& box = bbox->bounding_box(); - tprintf("Box at (%d,%d)->(%d,%d)\n", - box.left(), box.bottom(), box.right(), box.top()); + tprintf("Box at (%d,%d)->(%d,%d)\n", box.left(), box.bottom(), box.right(), + box.top()); } } @@ -567,10 +554,8 @@ void TabVector::Display(ScrollView* tab_win) { // Refit the line and/or re-evaluate the vector if the dirty flags are set. void TabVector::FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder) { - if (needs_refit_) - Fit(vertical, true); - if (needs_evaluation_) - Evaluate(vertical, finder); + if (needs_refit_) Fit(vertical, true); + if (needs_evaluation_) Evaluate(vertical, finder); } // Evaluate the vector in terms of coverage of its length by good-looking @@ -636,9 +621,9 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width, &neighbour_gap); if (debug) { - tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", - box.left(), box.bottom(), box.right(), box.top(), - gutter_width, neighbour_gap); + tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", box.left(), + box.bottom(), box.right(), box.top(), gutter_width, + neighbour_gap); } // Now we can make the test. if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) { @@ -664,14 +649,13 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { SetYStart(box.bottom()); } prev_good_box = &box; - if (bbox->flow() == BTFT_TEXT_ON_IMAGE) - text_on_image = true; + if (bbox->flow() == BTFT_TEXT_ON_IMAGE) text_on_image = true; } else { // Get rid of boxes that are not good. if (debug) { tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", - box.left(), box.bottom(), box.right(), box.top(), - gutter_width, neighbour_gap); + box.left(), box.bottom(), box.right(), box.top(), gutter_width, + neighbour_gap); } it.extract(); ++num_deleted_boxes; @@ -736,8 +720,7 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { if (num_deleted_boxes > 0) { needs_refit_ = true; FitAndEvaluateIfNeeded(vertical, finder); - if (boxes_.empty()) - return; + if (boxes_.empty()) return; } // Test the gutter over the whole vector, instead of just at the boxes. int required_shift; @@ -749,11 +732,10 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter; min_gutter_width *= mean_height; int max_gutter_width = IntCastRounded(min_gutter_width) + 1; - if (median_gutter > max_gutter_width) - max_gutter_width = median_gutter; - int gutter_width = finder->GutterWidth(search_bottom, search_top, *this, - text_on_image, max_gutter_width, - &required_shift); + if (median_gutter > max_gutter_width) max_gutter_width = median_gutter; + int gutter_width = + finder->GutterWidth(search_bottom, search_top, *this, text_on_image, + max_gutter_width, &required_shift); if (gutter_width < min_gutter_width) { if (debug) { tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n", @@ -785,8 +767,7 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) { if (boxes_.empty()) { // Don't refit something with no boxes, as that only happens // in Evaluate, and we don't want to end up with a zero vector. - if (!force_parallel) - return false; + if (!force_parallel) return false; // If we are forcing parallel, then we just need to set the sort_key_. ICOORD midpt = startpt_; midpt += endpt_; @@ -844,10 +825,8 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) { sort_key_ = key; startpt_ = ICOORD(x1, top_y); } - if (it.at_first()) - start_y = bottom_y; - if (it.at_last()) - end_y = top_y; + if (it.at_first()) start_y = bottom_y; + if (it.at_last()) end_y = top_y; } if (width_count > 0) { mean_width_ = (mean_width_ + width_count - 1) / width_count; @@ -867,8 +846,7 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) { // Returns the singleton partner if there is one, or nullptr otherwise. TabVector* TabVector::GetSinglePartner() { - if (!partners_.singleton()) - return nullptr; + if (!partners_.singleton()) return nullptr; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); return partner; @@ -877,8 +855,7 @@ TabVector* TabVector::GetSinglePartner() { // Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise nullptr. TabVector* TabVector::VerticalTextlinePartner() { - if (!partners_.singleton()) - return nullptr; + if (!partners_.singleton()) return nullptr; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); BLOBNBOX_C_IT box_it1(&boxes_); @@ -893,8 +870,7 @@ TabVector* TabVector::VerticalTextlinePartner() { int num_unmatched = 0; int total_widths = 0; int width = startpt().x() - partner->startpt().x(); - if (width < 0) - width = -width; + if (width < 0) width = -width; STATS gaps(0, width * 2); BLOBNBOX* prev_bbox = nullptr; box_it2.mark_cycle_pt(); @@ -922,14 +898,14 @@ TabVector* TabVector::VerticalTextlinePartner() { double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; int min_box_match = static_cast((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); - bool is_vertical = (gaps.get_total() > 0 && - num_matched >= min_box_match && + bool is_vertical = (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { - tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " - "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", - gaps.get_total(), num_matched, num_unmatched, min_box_match, - gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); + tprintf( + "gaps=%d, matched=%d, unmatched=%d, min_match=%d " + "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", + gaps.get_total(), num_matched, num_unmatched, min_box_match, + gaps.median(), avg_width, max_gap, is_vertical ? "Yes" : "No"); } return (is_vertical) ? partner : nullptr; } @@ -937,10 +913,16 @@ TabVector* TabVector::VerticalTextlinePartner() { // The constructor is private. TabVector::TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, BLOBNBOX_CLIST* boxes) - : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax), - sort_key_(0), percent_score_(0), mean_width_(0), - needs_refit_(true), needs_evaluation_(true), alignment_(alignment), - top_constraints_(nullptr), bottom_constraints_(nullptr) { + : extended_ymin_(extended_ymin), + extended_ymax_(extended_ymax), + sort_key_(0), + percent_score_(0), + mean_width_(0), + needs_refit_(true), + needs_evaluation_(true), + alignment_(alignment), + top_constraints_(nullptr), + bottom_constraints_(nullptr) { BLOBNBOX_C_IT it(&boxes_); it.add_list_after(boxes); } @@ -963,7 +945,8 @@ void TabVector::Delete(TabVector* replacement) { break; } } - // Remove all references to this, and replace with replacement if not nullptr. + // Remove all references to this, and replace with replacement if not + // nullptr. for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { TabVector* p_partner = p_it.data(); if (p_partner == this) { @@ -979,5 +962,4 @@ void TabVector::Delete(TabVector* replacement) { delete this; } - } // namespace tesseract. diff --git a/src/textord/tabvector.h b/src/textord/tabvector.h index 981412ab5b..b19392a8e8 100644 --- a/src/textord/tabvector.h +++ b/src/textord/tabvector.h @@ -20,12 +20,12 @@ #ifndef TESSERACT_TEXTORD_TABVECTOR_H_ #define TESSERACT_TEXTORD_TABVECTOR_H_ +#include "bbgrid.h" #include "blobgrid.h" #include "clst.h" #include "elst.h" #include "elst2.h" #include "rect.h" -#include "bbgrid.h" #include @@ -34,11 +34,12 @@ class ScrollView; namespace tesseract { - extern double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5, - "Max fraction of mean blob width allowed for vertical gaps in vertical text"); -extern double_VAR_H(textord_tabvector_vertical_box_ratio, 0.5, - "Fraction of box matches required to declare a line vertical"); + "Max fraction of mean blob width allowed for vertical gaps " + "in vertical text"); +extern double_VAR_H( + textord_tabvector_vertical_box_ratio, 0.5, + "Fraction of box matches required to declare a line vertical"); // The alignment type that a tab vector represents. // Keep this enum synced with kAlignmentNames in tabvector.cpp. @@ -95,8 +96,8 @@ class TabConstraint : public ELIST_LINK { TabConstraint(TabVector* vector, bool is_top); // Get the max of the mins and the min of the maxes. - static void GetConstraints(TabConstraint_LIST* constraints, - int* y_min, int* y_max); + static void GetConstraints(TabConstraint_LIST* constraints, int* y_min, + int* y_max); // The TabVector this constraint applies to. TabVector* vector_; @@ -125,9 +126,9 @@ class TabVector : public ELIST2_LINK { // extension to the line segment that can be used to align with others. // The input CLIST of BLOBNBOX good_points is consumed and taken over. static TabVector* FitVector(TabAlignment alignment, ICOORD vertical, - int extended_start_y, int extended_end_y, - BLOBNBOX_CLIST* good_points, - int* vertical_x, int* vertical_y); + int extended_start_y, int extended_end_y, + BLOBNBOX_CLIST* good_points, int* vertical_x, + int* vertical_y); // Build a ragged TabVector by copying another's direction, shifting it // to match the given blob, and making its initial extent the height @@ -143,42 +144,22 @@ class TabVector : public ELIST2_LINK { TabVector* ShallowCopy() const; // Simple accessors. - const ICOORD& startpt() const { - return startpt_; - } - const ICOORD& endpt() const { - return endpt_; - } - int extended_ymax() const { - return extended_ymax_; - } - int extended_ymin() const { - return extended_ymin_; - } - int sort_key() const { - return sort_key_; - } - int mean_width() const { - return mean_width_; - } + const ICOORD& startpt() const { return startpt_; } + const ICOORD& endpt() const { return endpt_; } + int extended_ymax() const { return extended_ymax_; } + int extended_ymin() const { return extended_ymin_; } + int sort_key() const { return sort_key_; } + int mean_width() const { return mean_width_; } void set_top_constraints(TabConstraint_LIST* constraints) { top_constraints_ = constraints; } void set_bottom_constraints(TabConstraint_LIST* constraints) { bottom_constraints_ = constraints; } - TabVector_CLIST* partners() { - return &partners_; - } - void set_startpt(const ICOORD& start) { - startpt_ = start; - } - void set_endpt(const ICOORD& end) { - endpt_ = end; - } - bool intersects_other_lines() const { - return intersects_other_lines_; - } + TabVector_CLIST* partners() { return &partners_; } + void set_startpt(const ICOORD& start) { startpt_ = start; } + void set_endpt(const ICOORD& end) { endpt_ = end; } + bool intersects_other_lines() const { return intersects_other_lines_; } void set_intersects_other_lines(bool value) { intersects_other_lines_ = value; } @@ -198,11 +179,12 @@ class TabVector : public ELIST2_LINK { // Compute the vertical overlap with the other TabVector. int VOverlap(const TabVector& other) const { return std::min(other.endpt_.y(), endpt_.y()) - - std::max(other.startpt_.y(), startpt_.y()); + std::max(other.startpt_.y(), startpt_.y()); } // Compute the vertical overlap with the given y bounds. int VOverlap(int top_y, int bottom_y) const { - return std::min(top_y, static_cast(endpt_.y())) - std::max(bottom_y, static_cast(startpt_.y())); + return std::min(top_y, static_cast(endpt_.y())) - + std::max(bottom_y, static_cast(startpt_.y())); } // Compute the extended vertical overlap with the given y bounds. int ExtendedOverlap(int top_y, int bottom_y) const { @@ -218,13 +200,9 @@ class TabVector : public ELIST2_LINK { return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED; } // Return true if this is a separator. - bool IsSeparator() const { - return alignment_ == TA_SEPARATOR; - } + bool IsSeparator() const { return alignment_ == TA_SEPARATOR; } // Return true if this is a center aligned tab stop. - bool IsCenterTab() const { - return alignment_ == TA_CENTER_JUSTIFIED; - } + bool IsCenterTab() const { return alignment_ == TA_CENTER_JUSTIFIED; } // Return true if this is a ragged tab top, either left or right. bool IsRagged() const { return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED; @@ -237,19 +215,13 @@ class TabVector : public ELIST2_LINK { } // Return true if the vector has no partners. - bool Partnerless() { - return partners_.empty(); - } + bool Partnerless() { return partners_.empty(); } // Return the number of tab boxes in this vector. - int BoxCount() { - return boxes_.length(); - } + int BoxCount() { return boxes_.length(); } // Lock the vector from refits by clearing the boxes_ list. - void Freeze() { - boxes_.shallow_clear(); - } + void Freeze() { boxes_.shallow_clear(); } // Flip x and y on the ends so a vector can be created from flipped input. void XYFlip() { @@ -330,8 +302,8 @@ class TabVector : public ELIST2_LINK { // Return true if this vector is the same side, overlaps, and close // enough to the other to be merged. - bool SimilarTo(const ICOORD& vertical, - const TabVector& other, BlobGrid* grid) const; + bool SimilarTo(const ICOORD& vertical, const TabVector& other, + BlobGrid* grid) const; // Eat the other TabVector into this and delete it. void MergeWith(const ICOORD& vertical, TabVector* other); @@ -384,8 +356,8 @@ class TabVector : public ELIST2_LINK { private: // Constructor is private as the static factory is the external way // to build a TabVector. - TabVector(int extended_ymin, int extended_ymax, - TabAlignment alignment, BLOBNBOX_CLIST* boxes); + TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, + BLOBNBOX_CLIST* boxes); // Delete this, but first, repoint all the partners to point to // replacement. If replacement is nullptr, then partner relationships diff --git a/src/textord/textlineprojection.cpp b/src/textord/textlineprojection.cpp index 5d48ff0210..103df66e3a 100644 --- a/src/textord/textlineprojection.cpp +++ b/src/textord/textlineprojection.cpp @@ -15,13 +15,13 @@ #include "config_auto.h" #endif -#include "textlineprojection.h" #include "allheaders.h" -#include "bbgrid.h" // Base class. -#include "blobbox.h" // BlobNeighourDir. +#include "bbgrid.h" // Base class. +#include "blobbox.h" // BlobNeighourDir. #include "blobs.h" #include "colpartition.h" #include "normalis.h" +#include "textlineprojection.h" #include @@ -45,14 +45,12 @@ const int kMaxTabStopOverrun = 6; namespace tesseract { TextlineProjection::TextlineProjection(int resolution) - : x_origin_(0), y_origin_(0), pix_(nullptr) { + : x_origin_(0), y_origin_(0), pix_(nullptr) { // The projection map should be about 100 ppi, whatever the input. scale_factor_ = IntCastRounded(resolution / 100.0); if (scale_factor_ < 1) scale_factor_ = 1; } -TextlineProjection::~TextlineProjection() { - pixDestroy(&pix_); -} +TextlineProjection::~TextlineProjection() { pixDestroy(&pix_); } // Build the projection profile given the input_block containing lists of // blobs, a rotation to convert to image coords, @@ -75,7 +73,7 @@ void TextlineProjection::ConstructProjection(TO_BLOCK* input_block, ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map); ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map); Pix* final_pix = pixBlockconv(pix_, 1, 1); -// Pix* final_pix = pixBlockconv(pix_, 2, 2); + // Pix* final_pix = pixBlockconv(pix_, 2, 2); pixDestroy(&pix_); pix_ = final_pix; } @@ -110,8 +108,7 @@ void TextlineProjection::MoveNonTextlineBlobs( for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); const TBOX& box = blob->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, box.left(), - box.bottom()); + bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) { blob->ClearNeighbours(); small_it.add_to_end(it.extract()); @@ -142,8 +139,8 @@ void TextlineProjection::DisplayProjection() const { col_data[x] = result; } } - ScrollView* win = new ScrollView("Projection", 0, 0, - width, height, width, height); + ScrollView* win = + new ScrollView("Projection", 0, 0, width, height, width, height); win->Image(pixc, 0, 0); win->Update(); pixDestroy(&pixc); @@ -169,8 +166,8 @@ int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box, part_box.set_right(part.median_right()); } // Now use DistanceOfBoxFromBox to make the actual calculation. - return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(), - denorm, debug); + return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(), denorm, + debug); } // Compute the distance from the from_box to the to_box using curved @@ -244,11 +241,11 @@ int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, denorm->DenormTransform(nullptr, end_pt, &end_pt); } if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { - perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, - end_pt.y); + perpendicular_gap = + VerticalDistance(debug, start_pt.x, start_pt.y, end_pt.y); } else { - perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, - start_pt.y); + perpendicular_gap = + HorizontalDistance(debug, start_pt.x, end_pt.x, start_pt.y); } } // The parallel_gap weighs less than the perpendicular_gap. @@ -273,8 +270,8 @@ int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, // 5 1/x // 7 1/x // Total: 1 + x + 3/x where x = kWrongWayPenalty. -int TextlineProjection::VerticalDistance(bool debug, int x, - int y1, int y2) const { +int TextlineProjection::VerticalDistance(bool debug, int x, int y1, + int y2) const { x = ImageXToProjectionX(x); y1 = ImageYToProjectionY(y1); y2 = ImageYToProjectionY(y2); @@ -290,8 +287,8 @@ int TextlineProjection::VerticalDistance(bool debug, int x, data += wpl; int pixel = GET_DATA_BYTE(data, x); if (debug) - tprintf("At (%d,%d), pix = %d, prev=%d\n", - x, y + step, pixel, prev_pixel); + tprintf("At (%d,%d), pix = %d, prev=%d\n", x, y + step, pixel, + prev_pixel); if (pixel < prev_pixel) distance += kWrongWayPenalty; else if (pixel > prev_pixel) @@ -301,7 +298,7 @@ int TextlineProjection::VerticalDistance(bool debug, int x, prev_pixel = pixel; } return distance * scale_factor_ + - right_way_steps * scale_factor_ / kWrongWayPenalty; + right_way_steps * scale_factor_ / kWrongWayPenalty; } // Compute the distance between (x1, y) and (x2, y) using the rule that @@ -321,8 +318,8 @@ int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2, for (int x = x1; x != x2; x += step) { int pixel = GET_DATA_BYTE(data, x + step); if (debug) - tprintf("At (%d,%d), pix = %d, prev=%d\n", - x + step, y, pixel, prev_pixel); + tprintf("At (%d,%d), pix = %d, prev=%d\n", x + step, y, pixel, + prev_pixel); if (pixel < prev_pixel) distance += kWrongWayPenalty; else if (pixel > prev_pixel) @@ -332,15 +329,15 @@ int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2, prev_pixel = pixel; } return distance * scale_factor_ + - right_way_steps * scale_factor_ / kWrongWayPenalty; + right_way_steps * scale_factor_ / kWrongWayPenalty; } // Returns true if the blob appears to be outside of a textline. // Such blobs are potentially diacritics (even if large in Thai) and should // be kept away from initial textline finding. bool TextlineProjection::BoxOutOfHTextline(const TBOX& box, - const DENORM* denorm, - bool debug) const { + const DENORM* denorm, + bool debug) const { int grad1 = 0; int grad2 = 0; EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr); @@ -349,8 +346,7 @@ bool TextlineProjection::BoxOutOfHTextline(const TBOX& box, if (total_result >= 6) return false; // Strongly in textline. // Medium strength: if either gradient is negative, it is likely outside // the body of the textline. - if (worst_result < 0) - return true; + if (worst_result < 0) return true; return false; } @@ -412,7 +408,8 @@ int TextlineProjection::EvaluateColPartition(const ColPartition& part, // several layers of helpers below. int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const { - return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr); + return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, + nullptr); } // Internal version of EvaluateBox returns the unclipped gradients as well @@ -422,15 +419,14 @@ int TextlineProjection::EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug, int* hgrad1, int* hgrad2, int* vgrad1, int* vgrad2) const { - int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), - box.top(), true); + int top_gradient = + BestMeanGradientInRow(denorm, box.left(), box.right(), box.top(), true); int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(), box.bottom(), false); int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), box.top(), true); - int right_gradient = -BestMeanGradientInColumn(denorm, box.right(), - box.bottom(), box.top(), - false); + int right_gradient = -BestMeanGradientInColumn( + denorm, box.right(), box.bottom(), box.top(), false); int top_clipped = std::max(top_gradient, 0); int bottom_clipped = std::max(bottom_gradient, 0); int left_clipped = std::max(left_gradient, 0); @@ -441,7 +437,7 @@ int TextlineProjection::EvaluateBoxInternal(const TBOX& box, box.print(); } int result = std::max(top_clipped, bottom_clipped) - - std::max(left_clipped, right_clipped); + std::max(left_clipped, right_clipped); if (hgrad1 != nullptr && hgrad2 != nullptr) { *hgrad1 = top_gradient; *hgrad2 = bottom_gradient; @@ -459,7 +455,8 @@ int TextlineProjection::EvaluateBoxInternal(const TBOX& box, // This gives a positive value for a good top edge and negative for bottom. // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm, - int16_t min_x, int16_t max_x, int16_t y, + int16_t min_x, int16_t max_x, + int16_t y, bool best_is_max) const { TPOINT start_pt(min_x, y); TPOINT end_pt(max_x, y); @@ -469,13 +466,11 @@ int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm, upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt); lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt); int gradient = lower - upper; - if ((gradient > best_gradient) == best_is_max) - best_gradient = gradient; + if ((gradient > best_gradient) == best_is_max) best_gradient = gradient; upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt); lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt); gradient = lower - upper; - if ((gradient > best_gradient) == best_is_max) - best_gradient = gradient; + if ((gradient > best_gradient) == best_is_max) best_gradient = gradient; return best_gradient; } @@ -485,8 +480,9 @@ int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm, // 2 pixels to the right. // This gives a positive value for a good left edge and negative for right. // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. -int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x, - int16_t min_y, int16_t max_y, +int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, + int16_t x, int16_t min_y, + int16_t max_y, bool best_is_max) const { TPOINT start_pt(x, min_y); TPOINT end_pt(x, max_y); @@ -496,13 +492,11 @@ int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt); right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt); int gradient = right - left; - if ((gradient > best_gradient) == best_is_max) - best_gradient = gradient; + if ((gradient > best_gradient) == best_is_max) best_gradient = gradient; left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt); right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt); gradient = right - left; - if ((gradient > best_gradient) == best_is_max) - best_gradient = gradient; + if ((gradient > best_gradient) == best_is_max) best_gradient = gradient; return best_gradient; } @@ -517,8 +511,7 @@ int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x // coordinates, which allows the caller to get a guaranteed displacement // between pixels used to calculate gradients. int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm, - int offset, - TPOINT start_pt, + int offset, TPOINT start_pt, TPOINT end_pt) const { TransformToPixCoords(denorm, &start_pt); TransformToPixCoords(denorm, &end_pt); @@ -531,8 +524,7 @@ int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm, int x_delta = end_pt.x - start_pt.x; int y_delta = end_pt.y - start_pt.y; if (abs(x_delta) >= abs(y_delta)) { - if (x_delta == 0) - return 0; + if (x_delta == 0) return 0; // Horizontal line. Add the offset vertically. int x_step = x_delta > 0 ? 1 : -1; // Correct offset for rotation, keeping it anti-clockwise of the delta. @@ -576,8 +568,8 @@ int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm, // Returns an empty box if there are no black pixels in the source box. static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) { int im_height = pixGetHeight(pix); - Box* input_box = boxCreate(box.left(), im_height - box.top(), - box.width(), box.height()); + Box* input_box = + boxCreate(box.left(), im_height - box.top(), box.width(), box.height()); Box* output_box = nullptr; pixClipBoxToForeground(pix, input_box, nullptr, &output_box); TBOX result_box; @@ -623,7 +615,6 @@ static void TruncateBoxToMissNonText(int x_middle, int y_middle, *bbox = box1; } - // Helper function to add 1 to a rectangle in source image coords to the // internal projection pix_. void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) { @@ -636,8 +627,7 @@ void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) { for (int y = scaled_top; y <= scaled_bottom; ++y) { for (int x = scaled_left; x <= scaled_right; ++x) { int pixel = GET_DATA_BYTE(data, x); - if (pixel < 255) - SET_DATA_BYTE(data, x, pixel + 1); + if (pixel < 255) SET_DATA_BYTE(data, x, pixel + 1); } data += wpl; } @@ -663,8 +653,7 @@ void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs, // Rotate to match the nontext_map. bbox.rotate(rotation); middle.rotate(rotation); - if (rotation.x() == 0.0f) - spreading_horizontally = !spreading_horizontally; + if (rotation.x() == 0.0f) spreading_horizontally = !spreading_horizontally; // Clip to the image before applying the increments. bbox &= nontext_map_box; // This is in-place box intersection. // Check for image pixels before spreading. @@ -697,30 +686,30 @@ bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) { // single pixel in the projection profile space to help join diacritics to // the textline. if ((blob->neighbour(BND_ABOVE) == nullptr || - bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) && + bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) && (blob->neighbour(BND_BELOW) == nullptr || - bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) { + bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) { ypad = scale_factor_; } } else if (blob->UniquelyVertical()) { ypad = bbox->width() * kOrientedPadFactor; if ((blob->neighbour(BND_LEFT) == nullptr || - bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) && + bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) && (blob->neighbour(BND_RIGHT) == nullptr || - bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) { + bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) { xpad = scale_factor_; } } else { if ((blob->neighbour(BND_ABOVE) != nullptr && blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) || (blob->neighbour(BND_BELOW) != nullptr && - blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) { + blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) { ypad = bbox->width() * kDefaultPadFactor; } if ((blob->neighbour(BND_RIGHT) != nullptr && blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) || (blob->neighbour(BND_LEFT) != nullptr && - blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) { + blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) { xpad = bbox->height() * kDefaultPadFactor; padding_horizontally = true; } diff --git a/src/textord/textlineprojection.h b/src/textord/textlineprojection.h index c91569b897..6ddcb808c2 100644 --- a/src/textord/textlineprojection.h +++ b/src/textord/textlineprojection.h @@ -14,7 +14,7 @@ #ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ #define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ -#include "blobgrid.h" // For BlobGrid +#include "blobgrid.h" // For BlobGrid class DENORM; struct Pix; @@ -44,8 +44,8 @@ class TextlineProjection { // The rotation is a multiple of 90 degrees, ie no deskew yet. // The blobs have had their left and right rules set to also limit // the range of projection. - void ConstructProjection(TO_BLOCK* input_block, - const FCOORD& rotation, Pix* nontext_map); + void ConstructProjection(TO_BLOCK* input_block, const FCOORD& rotation, + Pix* nontext_map); // Display the blobs in the window colored according to textline quality. void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win); @@ -76,8 +76,8 @@ class TextlineProjection { // The projection uses original image coords, so denorm is used to get // back to the image coords from box/part space. int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box, - bool horizontal_textline, - const DENORM* denorm, bool debug) const; + bool horizontal_textline, const DENORM* denorm, + bool debug) const; // Compute the distance between (x, y1) and (x, y2) using the rule that // a decrease in textline density is weighted more heavily than an increase. @@ -95,7 +95,7 @@ class TextlineProjection { // Such blobs are potentially diacritics (even if large in Thai) and should // be kept away from initial textline finding. bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm, - bool debug) const; + bool debug) const; // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below, // but uses the median top/bottom for horizontal and median left/right for @@ -128,8 +128,8 @@ class TextlineProjection { // as the result of EvaluateBox. // hgrad1 and hgrad2 are the gradients for the horizontal textline. int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug, - int* hgrad1, int* hgrad2, - int* vgrad1, int* vgrad2) const; + int* hgrad1, int* hgrad2, int* vgrad1, + int* vgrad2) const; // Helper returns the mean gradient value for the horizontal row at the given // y, (in the external coordinates) by subtracting the mean of the transformed @@ -158,8 +158,8 @@ class TextlineProjection { // perpendicular to the line direction. The offset is thus in projection image // coordinates, which allows the caller to get a guaranteed displacement // between pixels used to calculate gradients. - int MeanPixelsInLineSegment(const DENORM* denorm, int offset, - TPOINT start_pt, TPOINT end_pt) const; + int MeanPixelsInLineSegment(const DENORM* denorm, int offset, TPOINT start_pt, + TPOINT end_pt) const; // Helper function to add 1 to a rectangle in source image coords to the // internal projection pix_. diff --git a/src/textord/textord.cpp b/src/textord/textord.cpp index c8ffe98c64..fe9baa2840 100644 --- a/src/textord/textord.cpp +++ b/src/textord/textord.cpp @@ -24,9 +24,9 @@ #include "baselinedetect.h" #include "drawtord.h" -#include "textord.h" #include "makerow.h" #include "pageres.h" +#include "textord.h" #include "tordmain.h" #include "wordseg.h" @@ -260,8 +260,8 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, TO_BLOCK* to_block = it.data(); BLOCK* block = to_block->block; // Create a fake poly_block in block from its bounding box. - block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(), - PT_VERTICAL_TEXT)); + block->pdblk.set_poly_block( + new POLY_BLOCK(block->pdblk.bounding_box(), PT_VERTICAL_TEXT)); // Rotate the to_block along with its contained block and blobnbox lists. to_block->rotate(anticlockwise90); // Set the block's rotation values to obey the convention followed in @@ -280,13 +280,12 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, gradient = make_rows(page_tr_, to_blocks); } else if (!PSM_SPARSE(pageseg_mode)) { // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. - gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, - to_block, to_blocks); + gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, + to_blocks); } else { gradient = 0.0f; } - BaselineDetect baseline_detector(textord_baseline_debug, - reskew, to_blocks); + BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks); baseline_detector.ComputeStraightBaselines(use_box_bottoms); baseline_detector.ComputeBaselineSplinesAndXheights( page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, @@ -300,8 +299,8 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, // single word, and in SINGLE_CHAR mode, all the outlines // go in a single blob. TO_BLOCK* to_block = to_block_it.data(); - make_single_word(pageseg_mode == PSM_SINGLE_CHAR, - to_block->get_rows(), to_block->block->row_list()); + make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(), + to_block->block->row_list()); } // Remove empties. cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks); @@ -345,8 +344,7 @@ void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, } // Now eliminate any word not in the best row. for (it.restart_page(); it.word() != nullptr; it.forward()) { - if (it.row() != best_row) - it.DeleteCurrentWord(); + if (it.row() != best_row) it.DeleteCurrentWord(); } } diff --git a/src/textord/textord.h b/src/textord/textord.h index d798609db8..c12440fdb4 100644 --- a/src/textord/textord.h +++ b/src/textord/textord.h @@ -21,9 +21,9 @@ #ifndef TESSERACT_TEXTORD_TEXTORD_H_ #define TESSERACT_TEXTORD_TEXTORD_H_ -#include "ccstruct.h" #include "bbgrid.h" #include "blobbox.h" +#include "ccstruct.h" #include "gap_map.h" #include "publictypes.h" // For PageSegMode. @@ -41,21 +41,21 @@ namespace tesseract { class WordWithBox { public: WordWithBox() : word_(nullptr) {} - explicit WordWithBox(WERD *word) + explicit WordWithBox(WERD* word) : word_(word), bounding_box_(word->bounding_box()) { int height = bounding_box_.height(); bounding_box_.pad(height, height); } - const TBOX &bounding_box() const { return bounding_box_; } + const TBOX& bounding_box() const { return bounding_box_; } // Returns the bounding box of only the good blobs. TBOX true_bounding_box() const { return word_->true_bounding_box(); } - C_BLOB_LIST *RejBlobs() const { return word_->rej_cblob_list(); } - const WERD *word() const { return word_; } + C_BLOB_LIST* RejBlobs() const { return word_->rej_cblob_list(); } + const WERD* word() const { return word_; } private: // Borrowed pointer to a real word somewhere that must outlive this class. - WERD *word_; + WERD* word_; // Cached expanded bounding box of the word, padded all round by its height. TBOX bounding_box_; }; @@ -79,36 +79,31 @@ class Textord { // thresholds that were used to create the binary_pix from the grey_pix. // diacritic_blobs contain small confusing components that should be added // to the appropriate word(s) in case they are really diacritics. - void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, - int height, Pix *binary_pix, Pix *thresholds_pix, - Pix *grey_pix, bool use_box_bottoms, - BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, - TO_BLOCK_LIST *to_blocks); + void TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, int width, + int height, Pix* binary_pix, Pix* thresholds_pix, + Pix* grey_pix, bool use_box_bottoms, + BLOBNBOX_LIST* diacritic_blobs, BLOCK_LIST* blocks, + TO_BLOCK_LIST* to_blocks); // If we were supposed to return only a single textline, and there is more // than one, clean up and leave only the best. void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res); - bool use_cjk_fp_model() const { - return use_cjk_fp_model_; - } - void set_use_cjk_fp_model(bool flag) { - use_cjk_fp_model_ = flag; - } + bool use_cjk_fp_model() const { return use_cjk_fp_model_; } + void set_use_cjk_fp_model(bool flag) { use_cjk_fp_model_ = flag; } // tospace.cpp /////////////////////////////////////////// - void to_spacing( - ICOORD page_tr, //topright of page - TO_BLOCK_LIST *blocks //blocks on page - ); - ROW *make_prop_words(TO_ROW *row, // row to make + void to_spacing(ICOORD page_tr, // topright of page + TO_BLOCK_LIST* blocks // blocks on page + ); + ROW* make_prop_words(TO_ROW* row, // row to make FCOORD rotation // for drawing - ); - ROW *make_blob_words(TO_ROW *row, // row to make + ); + ROW* make_blob_words(TO_ROW* row, // row to make FCOORD rotation // for drawing - ); + ); // tordmain.cpp /////////////////////////////////////////// - void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); + void find_components(Pix* pix, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST* blocks, bool testing_on); private: @@ -122,128 +117,102 @@ class Textord { // makerow.cpp /////////////////////////////////////////// // Make the textlines inside each block. - void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew, - int width, int height, TO_BLOCK_LIST* to_blocks); + void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew, int width, + int height, TO_BLOCK_LIST* to_blocks); // Make the textlines inside a single block. - void MakeBlockRows(int min_spacing, int max_spacing, - const FCOORD& skew, TO_BLOCK* block, - ScrollView* win); + void MakeBlockRows(int min_spacing, int max_spacing, const FCOORD& skew, + TO_BLOCK* block, ScrollView* win); public: - void compute_block_xheight(TO_BLOCK *block, float gradient); - void compute_row_xheight(TO_ROW *row, // row to do + void compute_block_xheight(TO_BLOCK* block, float gradient); + void compute_row_xheight(TO_ROW* row, // row to do const FCOORD& rotation, - float gradient, // global skew + float gradient, // global skew int block_line_size); - void make_spline_rows(TO_BLOCK* block, // block to do - float gradient, // gradient to fit + void make_spline_rows(TO_BLOCK* block, // block to do + float gradient, // gradient to fit bool testing_on); + private: //// oldbasel.cpp //////////////////////////////////////// - void make_old_baselines(TO_BLOCK* block, // block to do + void make_old_baselines(TO_BLOCK* block, // block to do bool testing_on, // correct orientation float gradient); - void correlate_lines(TO_BLOCK *block, float gradient); - void correlate_neighbours(TO_BLOCK *block, // block rows are in. - TO_ROW **rows, // rows of block. + void correlate_lines(TO_BLOCK* block, float gradient); + void correlate_neighbours(TO_BLOCK* block, // block rows are in. + TO_ROW** rows, // rows of block. int rowcount); // no of rows to do. - int correlate_with_stats(TO_ROW **rows, // rows of block. - int rowcount, // no of rows to do. + int correlate_with_stats(TO_ROW** rows, // rows of block. + int rowcount, // no of rows to do. TO_BLOCK* block); - void find_textlines(TO_BLOCK *block, // block row is in - TO_ROW *row, // row to do - int degree, // required approximation - QSPLINE *spline); // starting spline + void find_textlines(TO_BLOCK* block, // block row is in + TO_ROW* row, // row to do + int degree, // required approximation + QSPLINE* spline); // starting spline // tospace.cpp /////////////////////////////////////////// - //DEBUG USE ONLY - void block_spacing_stats(TO_BLOCK* block, - GAPMAP* gapmap, + // DEBUG USE ONLY + void block_spacing_stats(TO_BLOCK* block, GAPMAP* gapmap, bool& old_text_ord_proportional, - //resulting estimate + // resulting estimate int16_t& block_space_gap_width, - //resulting estimate - int16_t& block_non_space_gap_width - ); - void row_spacing_stats(TO_ROW *row, - GAPMAP *gapmap, - int16_t block_idx, + // resulting estimate + int16_t& block_non_space_gap_width); + void row_spacing_stats(TO_ROW* row, GAPMAP* gapmap, int16_t block_idx, int16_t row_idx, - //estimate for block + // estimate for block int16_t block_space_gap_width, - //estimate for block - int16_t block_non_space_gap_width - ); - void old_to_method(TO_ROW *row, - STATS *all_gap_stats, - STATS *space_gap_stats, - STATS *small_gap_stats, - int16_t block_space_gap_width, - //estimate for block - int16_t block_non_space_gap_width - ); - bool isolated_row_stats(TO_ROW* row, - GAPMAP* gapmap, - STATS* all_gap_stats, - bool suspected_table, - int16_t block_idx, + // estimate for block + int16_t block_non_space_gap_width); + void old_to_method(TO_ROW* row, STATS* all_gap_stats, STATS* space_gap_stats, + STATS* small_gap_stats, int16_t block_space_gap_width, + // estimate for block + int16_t block_non_space_gap_width); + bool isolated_row_stats(TO_ROW* row, GAPMAP* gapmap, STATS* all_gap_stats, + bool suspected_table, int16_t block_idx, int16_t row_idx); - int16_t stats_count_under(STATS *stats, int16_t threshold); - void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); - bool make_a_word_break(TO_ROW* row, // row being made - TBOX blob_box, // for next_blob // how many blanks? - int16_t prev_gap, - TBOX prev_blob_box, + int16_t stats_count_under(STATS* stats, int16_t threshold); + void improve_row_threshold(TO_ROW* row, STATS* all_gap_stats); + bool make_a_word_break(TO_ROW* row, // row being made + TBOX blob_box, // for next_blob // how many blanks? + int16_t prev_gap, TBOX prev_blob_box, int16_t real_current_gap, - int16_t within_xht_current_gap, - TBOX next_blob_box, - int16_t next_gap, - uint8_t& blanks, - bool& fuzzy_sp, - bool& fuzzy_non, - bool& prev_gap_was_a_space, + int16_t within_xht_current_gap, TBOX next_blob_box, + int16_t next_gap, uint8_t& blanks, bool& fuzzy_sp, + bool& fuzzy_non, bool& prev_gap_was_a_space, bool& break_at_next_gap); bool narrow_blob(TO_ROW* row, TBOX blob_box); bool wide_blob(TO_ROW* row, TBOX blob_box); bool suspected_punct_blob(TO_ROW* row, TBOX box); - void peek_at_next_gap(TO_ROW *row, - BLOBNBOX_IT box_it, - TBOX &next_blob_box, - int16_t &next_gap, - int16_t &next_within_xht_gap); - void mark_gap(TBOX blob, //blob following gap + void peek_at_next_gap(TO_ROW* row, BLOBNBOX_IT box_it, TBOX& next_blob_box, + int16_t& next_gap, int16_t& next_within_xht_gap); + void mark_gap(TBOX blob, // blob following gap int16_t rule, // heuristic id - int16_t prev_gap, - int16_t prev_blob_width, - int16_t current_gap, - int16_t next_blob_width, - int16_t next_gap); - float find_mean_blob_spacing(WERD *word); - bool ignore_big_gap(TO_ROW* row, - int32_t row_length, - GAPMAP* gapmap, - int16_t left, - int16_t right); - //get bounding box - TBOX reduced_box_next(TO_ROW *row, //current row - BLOBNBOX_IT *it //iterator to blobds - ); - TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht); + int16_t prev_gap, int16_t prev_blob_width, int16_t current_gap, + int16_t next_blob_width, int16_t next_gap); + float find_mean_blob_spacing(WERD* word); + bool ignore_big_gap(TO_ROW* row, int32_t row_length, GAPMAP* gapmap, + int16_t left, int16_t right); + // get bounding box + TBOX reduced_box_next(TO_ROW* row, // current row + BLOBNBOX_IT* it // iterator to blobds + ); + TBOX reduced_box_for_blob(BLOBNBOX* blob, TO_ROW* row, + int16_t* left_above_xht); // tordmain.cpp /////////////////////////////////////////// - float filter_noise_blobs(BLOBNBOX_LIST *src_list, - BLOBNBOX_LIST *noise_list, - BLOBNBOX_LIST *small_list, - BLOBNBOX_LIST *large_list); + float filter_noise_blobs(BLOBNBOX_LIST* src_list, BLOBNBOX_LIST* noise_list, + BLOBNBOX_LIST* small_list, + BLOBNBOX_LIST* large_list); // Fixes the block so it obeys all the rules: // Must have at least one ROW. // Must have at least one WERD. // WERDs contain a fake blob. void cleanup_nontext_block(BLOCK* block); - void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks); + void cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks); bool clean_noise_from_row(ROW* row); - void clean_noise_from_words(ROW *row); + void clean_noise_from_words(ROW* row); // Remove outlines that are a tiny fraction in either width or height // of the word height. - void clean_small_noise_from_words(ROW *row); + void clean_small_noise_from_words(ROW* row); // Groups blocks by rotation, then, for each group, makes a WordGrid and calls // TransferDiacriticsToWords to copy the diacritic blobs to the most // appropriate words in the group of blocks. Source blobs are not touched. @@ -252,8 +221,8 @@ class Textord { // Places a copy of blobs that are near a word (after applying rotation to the // blob) in the most appropriate word, unless there is doubt, in which case a // blob can end up in two words. Source blobs are not touched. - void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, - const FCOORD &rotation, WordGrid *word_grid); + void TransferDiacriticsToWords(BLOBNBOX_LIST* diacritic_blobs, + const FCOORD& rotation, WordGrid* word_grid); public: // makerow.cpp /////////////////////////////////////////// @@ -269,41 +238,30 @@ class Textord { BOOL_VAR_H(tosp_force_wordbreak_on_punct, false, "Force word breaks on punct to break long lines in non-space " "delimited langs"); - BOOL_VAR_H(tosp_use_pre_chopping, false, - "Space stats use prechopping?"); - BOOL_VAR_H(tosp_old_to_bug_fix, false, - "Fix suspected bug in old code"); - BOOL_VAR_H(tosp_block_use_cert_spaces, true, - "Only stat OBVIOUS spaces"); - BOOL_VAR_H(tosp_row_use_cert_spaces, true, - "Only stat OBVIOUS spaces"); - BOOL_VAR_H(tosp_narrow_blobs_not_cert, true, - "Only stat OBVIOUS spaces"); - BOOL_VAR_H(tosp_row_use_cert_spaces1, true, - "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_use_pre_chopping, false, "Space stats use prechopping?"); + BOOL_VAR_H(tosp_old_to_bug_fix, false, "Fix suspected bug in old code"); + BOOL_VAR_H(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces"); BOOL_VAR_H(tosp_recovery_isolated_row_stats, true, "Use row alone when inadequate cert spaces"); BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess"); BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?"); BOOL_VAR_H(tosp_fuzzy_limit_all, true, "Don't restrict kn->sp fuzzy limit to tables"); - BOOL_VAR_H(tosp_stats_use_xht_gaps, true, - "Use within xht gap for wd breaks"); - BOOL_VAR_H(tosp_use_xht_gaps, true, - "Use within xht gap for wd breaks"); + BOOL_VAR_H(tosp_stats_use_xht_gaps, true, "Use within xht gap for wd breaks"); + BOOL_VAR_H(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks"); BOOL_VAR_H(tosp_only_use_xht_gaps, false, "Only use within xht gap for wd breaks"); BOOL_VAR_H(tosp_rule_9_test_punct, false, "Don't chng kn to space next to punct"); BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip"); BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip"); - BOOL_VAR_H(tosp_improve_thresh, false, - "Enable improvement heuristic"); + BOOL_VAR_H(tosp_improve_thresh, false, "Enable improvement heuristic"); INT_VAR_H(tosp_debug_level, 0, "Debug data"); - INT_VAR_H(tosp_enough_space_samples_for_median, 3, - "or should we use mean"); - INT_VAR_H(tosp_redo_kern_limit, 10, - "No.samples reqd to reestimate for row"); + INT_VAR_H(tosp_enough_space_samples_for_median, 3, "or should we use mean"); + INT_VAR_H(tosp_redo_kern_limit, 10, "No.samples reqd to reestimate for row"); INT_VAR_H(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table"); INT_VAR_H(tosp_short_row, 20, @@ -312,30 +270,19 @@ class Textord { double_VAR_H(tosp_old_sp_kn_th_factor, 2.0, "Factor for defining space threshold in terms of space and " "kern sizes"); - double_VAR_H(tosp_threshold_bias1, 0, - "how far between kern and space?"); - double_VAR_H(tosp_threshold_bias2, 0, - "how far between kern and space?"); - double_VAR_H(tosp_narrow_fraction, 0.3, - "Fract of xheight for narrow"); - double_VAR_H(tosp_narrow_aspect_ratio, 0.48, - "narrow if w/h less than this"); + double_VAR_H(tosp_threshold_bias1, 0, "how far between kern and space?"); + double_VAR_H(tosp_threshold_bias2, 0, "how far between kern and space?"); + double_VAR_H(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow"); + double_VAR_H(tosp_narrow_aspect_ratio, 0.48, "narrow if w/h less than this"); double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide"); - double_VAR_H(tosp_wide_aspect_ratio, 0.0, - "wide if w/h less than this"); - double_VAR_H(tosp_fuzzy_space_factor, 0.6, - "Fract of xheight for fuzz sp"); - double_VAR_H(tosp_fuzzy_space_factor1, 0.5, - "Fract of xheight for fuzz sp"); - double_VAR_H(tosp_fuzzy_space_factor2, 0.72, - "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this"); + double_VAR_H(tosp_fuzzy_space_factor, 0.6, "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_fuzzy_space_factor1, 0.5, "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_fuzzy_space_factor2, 0.72, "Fract of xheight for fuzz sp"); double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern"); - double_VAR_H(tosp_kern_gap_factor1, 2.0, - "gap ratio to flip kern->sp"); - double_VAR_H(tosp_kern_gap_factor2, 1.3, - "gap ratio to flip kern->sp"); - double_VAR_H(tosp_kern_gap_factor3, 2.5, - "gap ratio to flip kern->sp"); + double_VAR_H(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp"); + double_VAR_H(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp"); + double_VAR_H(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp"); double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier"); double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier"); double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space"); @@ -343,24 +290,20 @@ class Textord { "Fract of kerns reqd for isolated row stats"); double_VAR_H(tosp_table_kn_sp_ratio, 2.25, "Min difference of kn & sp in table"); - double_VAR_H(tosp_table_xht_sp_ratio, 0.33, - "Expect spaces bigger than this"); - double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0, - "Fuzzy if less than this"); + double_VAR_H(tosp_table_xht_sp_ratio, 0.33, "Expect spaces bigger than this"); + double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0, "Fuzzy if less than this"); double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg"); double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg"); double_VAR_H(tosp_min_sane_kn_sp, 1.5, "Don't trust spaces less than this time kn"); - double_VAR_H(tosp_init_guess_kn_mult, 2.2, - "Thresh guess - mult kn by this"); + double_VAR_H(tosp_init_guess_kn_mult, 2.2, "Thresh guess - mult kn by this"); double_VAR_H(tosp_init_guess_xht_mult, 0.28, "Thresh guess - mult xht by this"); double_VAR_H(tosp_max_sane_kn_thresh, 5.0, "Multiplier on kn to limit thresh"); double_VAR_H(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation"); - double_VAR_H(tosp_large_kerning, 0.19, - "Limit use of xht gap with large kns"); + double_VAR_H(tosp_large_kerning, 0.19, "Limit use of xht gap with large kns"); double_VAR_H(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns"); double_VAR_H(tosp_near_lh_edge, 0, @@ -390,7 +333,7 @@ class Textord { double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs"); double_VAR_H(textord_noise_sxfract, 0.4, "xh fract width error for norm blobs"); - double_VAR_H(textord_noise_hfract, 1.0/64, + double_VAR_H(textord_noise_hfract, 1.0 / 64, "Height fraction to discard outlines as speckle noise"); INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row"); double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion"); diff --git a/src/textord/topitch.cpp b/src/textord/topitch.cpp index 6a3449ab10..f18bbb6e54 100644 --- a/src/textord/topitch.cpp +++ b/src/textord/topitch.cpp @@ -18,19 +18,19 @@ **********************************************************************/ #ifdef __UNIX__ -#include +#include #endif -#include "stderr.h" -#include "blobbox.h" -#include "statistc.h" -#include "drawtord.h" -#include "makerow.h" -#include "pitsync1.h" -#include "pithsync.h" -#include "tovars.h" -#include "wordseg.h" -#include "topitch.h" -#include "helpers.h" +#include "blobbox.h" +#include "drawtord.h" +#include "helpers.h" +#include "makerow.h" +#include "pithsync.h" +#include "pitsync1.h" +#include "statistc.h" +#include "stderr.h" +#include "topitch.h" +#include "tovars.h" +#include "wordseg.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -39,28 +39,26 @@ #define EXTERN -EXTERN BOOL_VAR (textord_all_prop, FALSE, "All doc is proportial text"); -EXTERN BOOL_VAR (textord_debug_pitch_test, FALSE, -"Debug on fixed pitch test"); -EXTERN BOOL_VAR (textord_disable_pitch_test, FALSE, -"Turn off dp fixed pitch algorithm"); -EXTERN BOOL_VAR (textord_fast_pitch_test, FALSE, -"Do even faster pitch algorithm"); -EXTERN BOOL_VAR (textord_debug_pitch_metric, FALSE, -"Write full metric stuff"); -EXTERN BOOL_VAR (textord_show_row_cuts, FALSE, "Draw row-level cuts"); -EXTERN BOOL_VAR (textord_show_page_cuts, FALSE, "Draw page-level cuts"); -EXTERN BOOL_VAR (textord_pitch_cheat, FALSE, -"Use correct answer for fixed/prop"); -EXTERN BOOL_VAR (textord_blockndoc_fixed, FALSE, -"Attempt whole doc/block fixed pitch"); -EXTERN double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts"); -EXTERN double_VAR (textord_balance_factor, 1.0, -"Ding rate for unbalanced char cells"); - -#define FIXED_WIDTH_MULTIPLE 5 -#define BLOCK_STATS_CLUSTERS 10 -#define MAX_ALLOWED_PITCH 100 //max pixel pitch. +EXTERN BOOL_VAR(textord_all_prop, FALSE, "All doc is proportial text"); +EXTERN BOOL_VAR(textord_debug_pitch_test, FALSE, "Debug on fixed pitch test"); +EXTERN BOOL_VAR(textord_disable_pitch_test, FALSE, + "Turn off dp fixed pitch algorithm"); +EXTERN BOOL_VAR(textord_fast_pitch_test, FALSE, + "Do even faster pitch algorithm"); +EXTERN BOOL_VAR(textord_debug_pitch_metric, FALSE, "Write full metric stuff"); +EXTERN BOOL_VAR(textord_show_row_cuts, FALSE, "Draw row-level cuts"); +EXTERN BOOL_VAR(textord_show_page_cuts, FALSE, "Draw page-level cuts"); +EXTERN BOOL_VAR(textord_pitch_cheat, FALSE, + "Use correct answer for fixed/prop"); +EXTERN BOOL_VAR(textord_blockndoc_fixed, FALSE, + "Attempt whole doc/block fixed pitch"); +EXTERN double_VAR(textord_projection_scale, 0.200, "Ding rate for mid-cuts"); +EXTERN double_VAR(textord_balance_factor, 1.0, + "Ding rate for unbalanced char cells"); + +#define FIXED_WIDTH_MULTIPLE 5 +#define BLOCK_STATS_CLUSTERS 10 +#define MAX_ALLOWED_PITCH 100 // max pixel pitch. /********************************************************************** * compute_fixed_pitch @@ -74,51 +72,48 @@ void compute_fixed_pitch(ICOORD page_tr, // top right TO_BLOCK_LIST* port_blocks, // input list float gradient, // page skew FCOORD rotation, // for drawing - bool testing_on) { // correct orientation - TO_BLOCK_IT block_it; //iterator - TO_BLOCK *block; //current block; - TO_ROW *row; //current row - int block_index; //block number - int row_index; //row number + bool testing_on) { // correct orientation + TO_BLOCK_IT block_it; // iterator + TO_BLOCK* block; // current block; + TO_ROW* row; // current row + int block_index; // block number + int row_index; // row number #ifndef GRAPHICS_DISABLED if (textord_show_initial_words && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); + if (to_win == nullptr) create_to_win(page_tr); } #endif - block_it.set_to_list (port_blocks); + block_it.set_to_list(port_blocks); block_index = 1; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block = block_it.data(); compute_block_pitch(block, rotation, block_index, testing_on); block_index++; } - if (!try_doc_fixed (page_tr, port_blocks, gradient)) { + if (!try_doc_fixed(page_tr, port_blocks, gradient)) { block_index = 1; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - if (!try_block_fixed (block, block_index)) + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + block = block_it.data(); + if (!try_block_fixed(block, block_index)) try_rows_fixed(block, block_index, testing_on); block_index++; } } block_index = 1; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - block = block_it.data (); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block = block_it.data(); POLY_BLOCK* pb = block->block->pdblk.poly_block(); if (pb != nullptr && !pb->IsText()) continue; // Non-text doesn't exist! // row iterator TO_ROW_IT row_it(block->get_rows()); row_index = 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); fix_row_pitch(row, block, port_blocks, row_index, block_index); row_index++; } @@ -131,7 +126,6 @@ void compute_fixed_pitch(ICOORD page_tr, // top right #endif } - /********************************************************************** * fix_row_pitch * @@ -139,31 +133,31 @@ void compute_fixed_pitch(ICOORD page_tr, // top right * block, then similar rows over all the page, or any other rows at all. **********************************************************************/ -void fix_row_pitch(TO_ROW *bad_row, // row to fix - TO_BLOCK *bad_block, // block of bad_row - TO_BLOCK_LIST *blocks, // blocks to scan - int32_t row_target, // number of row - int32_t block_target) { // number of block +void fix_row_pitch(TO_ROW* bad_row, // row to fix + TO_BLOCK* bad_block, // block of bad_row + TO_BLOCK_LIST* blocks, // blocks to scan + int32_t row_target, // number of row + int32_t block_target) { // number of block int16_t mid_cuts; - int block_votes; //votes in block - int like_votes; //votes over page - int other_votes; //votes of unlike blocks - int block_index; //number of block - int row_index; //number of row - int maxwidth; //max pitch - TO_BLOCK_IT block_it = blocks; //block iterator - TO_BLOCK *block; //current block - TO_ROW *row; //current row - float sp_sd; //space deviation - STATS block_stats; //pitches in block - STATS like_stats; //pitches in page + int block_votes; // votes in block + int like_votes; // votes over page + int other_votes; // votes of unlike blocks + int block_index; // number of block + int row_index; // number of row + int maxwidth; // max pitch + TO_BLOCK_IT block_it = blocks; // block iterator + TO_BLOCK* block; // current block + TO_ROW* row; // current row + float sp_sd; // space deviation + STATS block_stats; // pitches in block + STATS like_stats; // pitches in page block_votes = like_votes = other_votes = 0; - maxwidth = (int32_t) ceil (bad_row->xheight * textord_words_maxspace); - if (bad_row->pitch_decision != PITCH_DEF_FIXED - && bad_row->pitch_decision != PITCH_DEF_PROP) { - block_stats.set_range (0, maxwidth); - like_stats.set_range (0, maxwidth); + maxwidth = (int32_t)ceil(bad_row->xheight * textord_words_maxspace); + if (bad_row->pitch_decision != PITCH_DEF_FIXED && + bad_row->pitch_decision != PITCH_DEF_PROP) { + block_stats.set_range(0, maxwidth); + like_stats.set_range(0, maxwidth); block_index = 1; for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { @@ -172,67 +166,59 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix if (pb != nullptr && !pb->IsText()) continue; // Non text doesn't exist! row_index = 1; TO_ROW_IT row_it(block->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); - row_it.forward ()) { - row = row_it.data (); - if ((bad_row->all_caps - && row->xheight + row->ascrise - < - (bad_row->xheight + bad_row->ascrise) * (1 + - textord_pitch_rowsimilarity) - && row->xheight + row->ascrise > - (bad_row->xheight + bad_row->ascrise) * (1 - - textord_pitch_rowsimilarity)) - || (!bad_row->all_caps - && row->xheight < - bad_row->xheight * (1 + textord_pitch_rowsimilarity) - && row->xheight > - bad_row->xheight * (1 - textord_pitch_rowsimilarity))) { + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + if ((bad_row->all_caps && + row->xheight + row->ascrise < + (bad_row->xheight + bad_row->ascrise) * + (1 + textord_pitch_rowsimilarity) && + row->xheight + row->ascrise > + (bad_row->xheight + bad_row->ascrise) * + (1 - textord_pitch_rowsimilarity)) || + (!bad_row->all_caps && + row->xheight < + bad_row->xheight * (1 + textord_pitch_rowsimilarity) && + row->xheight > + bad_row->xheight * (1 - textord_pitch_rowsimilarity))) { if (block_index == block_target) { if (row->pitch_decision == PITCH_DEF_FIXED) { block_votes += textord_words_veto_power; - block_stats.add ((int32_t) row->fixed_pitch, - textord_words_veto_power); - } - else if (row->pitch_decision == PITCH_MAYBE_FIXED - || row->pitch_decision == PITCH_CORR_FIXED) { + block_stats.add((int32_t)row->fixed_pitch, + textord_words_veto_power); + } else if (row->pitch_decision == PITCH_MAYBE_FIXED || + row->pitch_decision == PITCH_CORR_FIXED) { block_votes++; - block_stats.add ((int32_t) row->fixed_pitch, 1); - } - else if (row->pitch_decision == PITCH_DEF_PROP) + block_stats.add((int32_t)row->fixed_pitch, 1); + } else if (row->pitch_decision == PITCH_DEF_PROP) block_votes -= textord_words_veto_power; - else if (row->pitch_decision == PITCH_MAYBE_PROP - || row->pitch_decision == PITCH_CORR_PROP) + else if (row->pitch_decision == PITCH_MAYBE_PROP || + row->pitch_decision == PITCH_CORR_PROP) block_votes--; - } - else { + } else { if (row->pitch_decision == PITCH_DEF_FIXED) { like_votes += textord_words_veto_power; - like_stats.add ((int32_t) row->fixed_pitch, - textord_words_veto_power); - } - else if (row->pitch_decision == PITCH_MAYBE_FIXED - || row->pitch_decision == PITCH_CORR_FIXED) { + like_stats.add((int32_t)row->fixed_pitch, + textord_words_veto_power); + } else if (row->pitch_decision == PITCH_MAYBE_FIXED || + row->pitch_decision == PITCH_CORR_FIXED) { like_votes++; - like_stats.add ((int32_t) row->fixed_pitch, 1); - } - else if (row->pitch_decision == PITCH_DEF_PROP) + like_stats.add((int32_t)row->fixed_pitch, 1); + } else if (row->pitch_decision == PITCH_DEF_PROP) like_votes -= textord_words_veto_power; - else if (row->pitch_decision == PITCH_MAYBE_PROP - || row->pitch_decision == PITCH_CORR_PROP) + else if (row->pitch_decision == PITCH_MAYBE_PROP || + row->pitch_decision == PITCH_CORR_PROP) like_votes--; } - } - else { + } else { if (row->pitch_decision == PITCH_DEF_FIXED) other_votes += textord_words_veto_power; - else if (row->pitch_decision == PITCH_MAYBE_FIXED - || row->pitch_decision == PITCH_CORR_FIXED) + else if (row->pitch_decision == PITCH_MAYBE_FIXED || + row->pitch_decision == PITCH_CORR_FIXED) other_votes++; else if (row->pitch_decision == PITCH_DEF_PROP) other_votes -= textord_words_veto_power; - else if (row->pitch_decision == PITCH_MAYBE_PROP - || row->pitch_decision == PITCH_CORR_PROP) + else if (row->pitch_decision == PITCH_MAYBE_PROP || + row->pitch_decision == PITCH_CORR_PROP) other_votes--; } row_index++; @@ -240,143 +226,132 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix block_index++; } if (block_votes > textord_words_veto_power) { - bad_row->fixed_pitch = block_stats.ile (0.5); + bad_row->fixed_pitch = block_stats.ile(0.5); bad_row->pitch_decision = PITCH_CORR_FIXED; - } - else if (block_votes <= textord_words_veto_power && like_votes > 0) { - bad_row->fixed_pitch = like_stats.ile (0.5); + } else if (block_votes <= textord_words_veto_power && like_votes > 0) { + bad_row->fixed_pitch = like_stats.ile(0.5); bad_row->pitch_decision = PITCH_CORR_FIXED; - } - else { + } else { bad_row->pitch_decision = PITCH_CORR_PROP; - if (block_votes == 0 && like_votes == 0 && other_votes > 0 - && (textord_debug_pitch_test || textord_debug_pitch_metric)) - tprintf - ("Warning:row %d of block %d set prop with no like rows against trend\n", - row_target, block_target); + if (block_votes == 0 && like_votes == 0 && other_votes > 0 && + (textord_debug_pitch_test || textord_debug_pitch_metric)) + tprintf( + "Warning:row %d of block %d set prop with no like rows against " + "trend\n", + row_target, block_target); } } if (textord_debug_pitch_metric) { - tprintf(":b_votes=%d:l_votes=%d:o_votes=%d", - block_votes, like_votes, other_votes); + tprintf(":b_votes=%d:l_votes=%d:o_votes=%d", block_votes, like_votes, + other_votes); tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise); } if (bad_row->pitch_decision == PITCH_CORR_FIXED) { if (bad_row->fixed_pitch < textord_min_xheight) { if (block_votes > 0) - bad_row->fixed_pitch = block_stats.ile (0.5); + bad_row->fixed_pitch = block_stats.ile(0.5); else if (block_votes == 0 && like_votes > 0) - bad_row->fixed_pitch = like_stats.ile (0.5); + bad_row->fixed_pitch = like_stats.ile(0.5); else { - tprintf - ("Warning:guessing pitch as xheight on row %d, block %d\n", - row_target, block_target); + tprintf("Warning:guessing pitch as xheight on row %d, block %d\n", + row_target, block_target); bad_row->fixed_pitch = bad_row->xheight; } } if (bad_row->fixed_pitch < textord_min_xheight) - bad_row->fixed_pitch = (float) textord_min_xheight; + bad_row->fixed_pitch = (float)textord_min_xheight; bad_row->kern_size = bad_row->fixed_pitch / 4; - bad_row->min_space = (int32_t) (bad_row->fixed_pitch * 0.6); - bad_row->max_nonspace = (int32_t) (bad_row->fixed_pitch * 0.4); - bad_row->space_threshold = - (bad_row->min_space + bad_row->max_nonspace) / 2; + bad_row->min_space = (int32_t)(bad_row->fixed_pitch * 0.6); + bad_row->max_nonspace = (int32_t)(bad_row->fixed_pitch * 0.4); + bad_row->space_threshold = (bad_row->min_space + bad_row->max_nonspace) / 2; bad_row->space_size = bad_row->fixed_pitch; if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) { - tune_row_pitch (bad_row, &bad_row->projection, - bad_row->projection_left, bad_row->projection_right, - (bad_row->fixed_pitch + - bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch, - sp_sd, mid_cuts, &bad_row->char_cells, FALSE); + tune_row_pitch(bad_row, &bad_row->projection, bad_row->projection_left, + bad_row->projection_right, + (bad_row->fixed_pitch + bad_row->max_nonspace * 3) / 4, + bad_row->fixed_pitch, sp_sd, mid_cuts, + &bad_row->char_cells, FALSE); } - } - else if (bad_row->pitch_decision == PITCH_CORR_PROP - || bad_row->pitch_decision == PITCH_DEF_PROP) { + } else if (bad_row->pitch_decision == PITCH_CORR_PROP || + bad_row->pitch_decision == PITCH_DEF_PROP) { bad_row->fixed_pitch = 0.0f; - bad_row->char_cells.clear (); + bad_row->char_cells.clear(); } } - /********************************************************************** * compute_block_pitch * * Decide whether each block is fixed pitch individually. **********************************************************************/ -void compute_block_pitch(TO_BLOCK* block, // input list - FCOORD rotation, // for drawing - int32_t block_index, // block number - bool testing_on) { // correct orientation - TBOX block_box; //bounding box +void compute_block_pitch(TO_BLOCK* block, // input list + FCOORD rotation, // for drawing + int32_t block_index, // block number + bool testing_on) { // correct orientation + TBOX block_box; // bounding box - block_box = block->block->pdblk.bounding_box (); + block_box = block->block->pdblk.bounding_box(); if (testing_on && textord_debug_pitch_test) { - tprintf ("Block %d at (%d,%d)->(%d,%d)\n", - block_index, - block_box.left (), block_box.bottom (), - block_box.right (), block_box.top ()); + tprintf("Block %d at (%d,%d)->(%d,%d)\n", block_index, block_box.left(), + block_box.bottom(), block_box.right(), block_box.top()); } - block->min_space = (int32_t) floor (block->xheight - * textord_words_default_minspace); - block->max_nonspace = (int32_t) ceil (block->xheight - * textord_words_default_nonspace); + block->min_space = + (int32_t)floor(block->xheight * textord_words_default_minspace); + block->max_nonspace = + (int32_t)ceil(block->xheight * textord_words_default_nonspace); block->fixed_pitch = 0.0f; - block->space_size = (float) block->min_space; - block->kern_size = (float) block->max_nonspace; + block->space_size = (float)block->min_space; + block->kern_size = (float)block->max_nonspace; block->pr_nonsp = block->xheight * words_default_prop_nonspace; block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop; - if (!block->get_rows ()->empty ()) { - ASSERT_HOST (block->xheight > 0); + if (!block->get_rows()->empty()) { + ASSERT_HOST(block->xheight > 0); find_repeated_chars(block, textord_show_initial_words && testing_on); #ifndef GRAPHICS_DISABLED if (textord_show_initial_words && testing_on) - //overlap_picture_ops(TRUE); + // overlap_picture_ops(TRUE); ScrollView::Update(); #endif - compute_rows_pitch(block, - block_index, + compute_rows_pitch(block, block_index, textord_debug_pitch_test && testing_on); } } - /********************************************************************** * compute_rows_pitch * * Decide whether each row is fixed pitch individually. **********************************************************************/ -bool compute_rows_pitch( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index, //block number - bool testing_on //correct orientation +bool compute_rows_pitch( // find line stats + TO_BLOCK* block, // block to do + int32_t block_index, // block number + bool testing_on // correct orientation ) { - int32_t maxwidth; //of spaces - TO_ROW *row; //current row - int32_t row_index; //row number. - float lower, upper; //cluster thresholds - TO_ROW_IT row_it = block->get_rows (); + int32_t maxwidth; // of spaces + TO_ROW* row; // current row + int32_t row_index; // row number. + float lower, upper; // cluster thresholds + TO_ROW_IT row_it = block->get_rows(); row_index = 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - ASSERT_HOST (row->xheight > 0); - row->compute_vertical_projection (); - maxwidth = (int32_t) ceil (row->xheight * textord_words_maxspace); - if (row_pitch_stats (row, maxwidth, testing_on) - && find_row_pitch (row, maxwidth, - textord_dotmatrix_gap + 1, block, block_index, - row_index, testing_on)) { + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + ASSERT_HOST(row->xheight > 0); + row->compute_vertical_projection(); + maxwidth = (int32_t)ceil(row->xheight * textord_words_maxspace); + if (row_pitch_stats(row, maxwidth, testing_on) && + find_row_pitch(row, maxwidth, textord_dotmatrix_gap + 1, block, + block_index, row_index, testing_on)) { if (row->fixed_pitch == 0) { lower = row->pr_nonsp; upper = row->pr_space; row->space_size = upper; row->kern_size = lower; } - } - else { - row->fixed_pitch = 0.0f; //insufficient data + } else { + row->fixed_pitch = 0.0f; // insufficient data row->pitch_decision = PITCH_DUNNO; } row_index++; @@ -384,188 +359,176 @@ bool compute_rows_pitch( //find line stats return false; } - /********************************************************************** * try_doc_fixed * * Attempt to call the entire document fixed pitch. **********************************************************************/ -bool try_doc_fixed( //determine pitch - ICOORD page_tr, //top right - TO_BLOCK_LIST* port_blocks, //input list - float gradient //page skew +bool try_doc_fixed( // determine pitch + ICOORD page_tr, // top right + TO_BLOCK_LIST* port_blocks, // input list + float gradient // page skew ) { - int16_t master_x; //uniform shifts - int16_t pitch; //median pitch. - int x; //profile coord - int prop_blocks; //correct counts + int16_t master_x; // uniform shifts + int16_t pitch; // median pitch. + int x; // profile coord + int prop_blocks; // correct counts int fixed_blocks; - int total_row_count; //total in page - //iterator + int total_row_count; // total in page + // iterator TO_BLOCK_IT block_it = port_blocks; - TO_BLOCK *block; //current block; - TO_ROW *row; //current row - int16_t projection_left; //edges + TO_BLOCK* block; // current block; + TO_ROW* row; // current row + int16_t projection_left; // edges int16_t projection_right; - int16_t row_left; //edges of row + int16_t row_left; // edges of row int16_t row_right; - ICOORDELT_LIST *master_cells; //cells for page - float master_y; //uniform shifts - float shift_factor; //page skew correction - float row_shift; //shift for row - float final_pitch; //output pitch - float row_y; //baseline - STATS projection; //entire page - STATS pitches (0, MAX_ALLOWED_PITCH); - //for median - float sp_sd; //space sd - int16_t mid_cuts; //no of cheap cuts - float pitch_sd; //sync rating - - if (block_it.empty () - // || block_it.data()==block_it.data_relative(1) - || !textord_blockndoc_fixed) + ICOORDELT_LIST* master_cells; // cells for page + float master_y; // uniform shifts + float shift_factor; // page skew correction + float row_shift; // shift for row + float final_pitch; // output pitch + float row_y; // baseline + STATS projection; // entire page + STATS pitches(0, MAX_ALLOWED_PITCH); + // for median + float sp_sd; // space sd + int16_t mid_cuts; // no of cheap cuts + float pitch_sd; // sync rating + + if (block_it.empty() + // || block_it.data()==block_it.data_relative(1) + || !textord_blockndoc_fixed) return false; shift_factor = gradient / (gradient * gradient + 1); // row iterator - TO_ROW_IT row_it(block_it.data ()->get_rows()); - master_x = row_it.data ()->projection_left; - master_y = row_it.data ()->baseline.y (master_x); + TO_ROW_IT row_it(block_it.data()->get_rows()); + master_x = row_it.data()->projection_left; + master_y = row_it.data()->baseline.y(master_x); projection_left = INT16_MAX; projection_right = -INT16_MAX; prop_blocks = 0; fixed_blocks = 0; total_row_count = 0; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block = block_it.data(); + row_it.set_to_list(block->get_rows()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); total_row_count++; - if (row->fixed_pitch > 0) - pitches.add ((int32_t) (row->fixed_pitch), 1); - //find median - row_y = row->baseline.y (master_x); + if (row->fixed_pitch > 0) pitches.add((int32_t)(row->fixed_pitch), 1); + // find median + row_y = row->baseline.y(master_x); row_left = - (int16_t) (row->projection_left - - shift_factor * (master_y - row_y)); + (int16_t)(row->projection_left - shift_factor * (master_y - row_y)); row_right = - (int16_t) (row->projection_right - - shift_factor * (master_y - row_y)); - if (row_left < projection_left) - projection_left = row_left; - if (row_right > projection_right) - projection_right = row_right; + (int16_t)(row->projection_right - shift_factor * (master_y - row_y)); + if (row_left < projection_left) projection_left = row_left; + if (row_right > projection_right) projection_right = row_right; } } - if (pitches.get_total () == 0) - return false; - projection.set_range (projection_left, projection_right); - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - row_y = row->baseline.y (master_x); + if (pitches.get_total() == 0) return false; + projection.set_range(projection_left, projection_right); + + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block = block_it.data(); + row_it.set_to_list(block->get_rows()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + row_y = row->baseline.y(master_x); row_left = - (int16_t) (row->projection_left - - shift_factor * (master_y - row_y)); + (int16_t)(row->projection_left - shift_factor * (master_y - row_y)); for (x = row->projection_left; x < row->projection_right; - x++, row_left++) { - projection.add (row_left, row->projection.pile_count (x)); + x++, row_left++) { + projection.add(row_left, row->projection.pile_count(x)); } } } - row_it.set_to_list (block_it.data ()->get_rows ()); - row = row_it.data (); + row_it.set_to_list(block_it.data()->get_rows()); + row = row_it.data(); #ifndef GRAPHICS_DISABLED if (textord_show_page_cuts && to_win != nullptr) - projection.plot (to_win, projection_left, - row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); + projection.plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, + ScrollView::CORAL); #endif - final_pitch = pitches.ile (0.5); - pitch = (int16_t) final_pitch; - pitch_sd = - tune_row_pitch (row, &projection, projection_left, projection_right, - pitch * 0.75, final_pitch, sp_sd, mid_cuts, - &row->char_cells, FALSE); + final_pitch = pitches.ile(0.5); + pitch = (int16_t)final_pitch; + pitch_sd = tune_row_pitch(row, &projection, projection_left, projection_right, + pitch * 0.75, final_pitch, sp_sd, mid_cuts, + &row->char_cells, FALSE); if (textord_debug_pitch_metric) - tprintf - ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n", - prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, - pitch_sd / total_row_count, pitch_sd / pitch, - pitch_sd / total_row_count / pitch); + tprintf( + "try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%" + "g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n", + prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, + pitch_sd / total_row_count, pitch_sd / pitch, + pitch_sd / total_row_count / pitch); #ifndef GRAPHICS_DISABLED if (textord_show_page_cuts && to_win != nullptr) { master_cells = &row->char_cells; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); - row_it.forward ()) { - row = row_it.data (); - row_y = row->baseline.y (master_x); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + block = block_it.data(); + row_it.set_to_list(block->get_rows()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + row_y = row->baseline.y(master_x); row_shift = shift_factor * (master_y - row_y); - plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells); + plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, + master_cells); } } } #endif - row->char_cells.clear (); + row->char_cells.clear(); return false; } - /********************************************************************** * try_block_fixed * * Try to call the entire block fixed. **********************************************************************/ -bool try_block_fixed( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index //block number +bool try_block_fixed( // find line stats + TO_BLOCK* block, // block to do + int32_t block_index // block number ) { return false; } - /********************************************************************** * try_rows_fixed * * Decide whether each row is fixed pitch individually. **********************************************************************/ -bool try_rows_fixed( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index, //block number - bool testing_on //correct orientation +bool try_rows_fixed( // find line stats + TO_BLOCK* block, // block to do + int32_t block_index, // block number + bool testing_on // correct orientation ) { - TO_ROW *row; //current row - int32_t row_index; //row number. - int32_t def_fixed = 0; //counters + TO_ROW* row; // current row + int32_t row_index; // row number. + int32_t def_fixed = 0; // counters int32_t def_prop = 0; int32_t maybe_fixed = 0; int32_t maybe_prop = 0; int32_t dunno = 0; int32_t corr_fixed = 0; int32_t corr_prop = 0; - float lower, upper; //cluster thresholds - TO_ROW_IT row_it = block->get_rows (); + float lower, upper; // cluster thresholds + TO_ROW_IT row_it = block->get_rows(); row_index = 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - ASSERT_HOST (row->xheight > 0); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + ASSERT_HOST(row->xheight > 0); if (row->fixed_pitch > 0 && fixed_pitch_row(row, block->block, block_index)) { if (row->fixed_pitch == 0) { @@ -577,18 +540,11 @@ bool try_rows_fixed( //find line stats } row_index++; } - count_block_votes(block, - def_fixed, - def_prop, - maybe_fixed, - maybe_prop, - corr_fixed, - corr_prop, - dunno); - if (testing_on - && (textord_debug_pitch_test - || textord_blocksall_prop || textord_blocksall_fixed)) { - tprintf ("Initially:"); + count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, + corr_fixed, corr_prop, dunno); + if (testing_on && (textord_debug_pitch_test || textord_blocksall_prop || + textord_blocksall_fixed)) { + tprintf("Initially:"); print_block_counts(block, block_index); } if (def_fixed > def_prop * textord_words_veto_power) @@ -606,18 +562,17 @@ bool try_rows_fixed( //find line stats return false; } - /********************************************************************** * print_block_counts * * Count up how many rows have what decision and print the results. **********************************************************************/ -void print_block_counts( //find line stats - TO_BLOCK *block, //block to do - int32_t block_index //block number - ) { - int32_t def_fixed = 0; //counters +void print_block_counts( // find line stats + TO_BLOCK* block, // block to do + int32_t block_index // block number +) { + int32_t def_fixed = 0; // counters int32_t def_prop = 0; int32_t maybe_fixed = 0; int32_t maybe_prop = 0; @@ -625,45 +580,34 @@ void print_block_counts( //find line stats int32_t corr_fixed = 0; int32_t corr_prop = 0; - count_block_votes(block, - def_fixed, - def_prop, - maybe_fixed, - maybe_prop, - corr_fixed, - corr_prop, - dunno); - tprintf ("Block %d has (%d,%d,%d)", - block_index, def_fixed, maybe_fixed, corr_fixed); + count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, + corr_fixed, corr_prop, dunno); + tprintf("Block %d has (%d,%d,%d)", block_index, def_fixed, maybe_fixed, + corr_fixed); if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed)) - tprintf (" (Wrongly)"); - tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop); + tprintf(" (Wrongly)"); + tprintf(" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop); if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop)) - tprintf (" (Wrongly)"); - tprintf (" prop, %d dunno\n", dunno); + tprintf(" (Wrongly)"); + tprintf(" prop, %d dunno\n", dunno); } - /********************************************************************** * count_block_votes * * Count the number of rows in the block with each kind of pitch_decision. **********************************************************************/ -void count_block_votes( //find line stats - TO_BLOCK *block, //block to do - int32_t &def_fixed, //add to counts - int32_t &def_prop, - int32_t &maybe_fixed, - int32_t &maybe_prop, - int32_t &corr_fixed, - int32_t &corr_prop, - int32_t &dunno) { - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); +void count_block_votes( // find line stats + TO_BLOCK* block, // block to do + int32_t& def_fixed, // add to counts + int32_t& def_prop, int32_t& maybe_fixed, int32_t& maybe_prop, + int32_t& corr_fixed, int32_t& corr_prop, int32_t& dunno) { + TO_ROW* row; // current row + TO_ROW_IT row_it = block->get_rows(); + + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); switch (row->pitch_decision) { case PITCH_DUNNO: dunno++; @@ -690,138 +634,127 @@ void count_block_votes( //find line stats } } - /********************************************************************** * row_pitch_stats * * Decide whether each row is fixed pitch individually. **********************************************************************/ -bool row_pitch_stats( //find line stats - TO_ROW* row, //current row - int32_t maxwidth, //of spaces - bool testing_on //correct orientation +bool row_pitch_stats( // find line stats + TO_ROW* row, // current row + int32_t maxwidth, // of spaces + bool testing_on // correct orientation ) { - BLOBNBOX *blob; //current blob - int gap_index; //current gap - int32_t prev_x; //end of prev blob - int32_t cluster_count; //no of clusters - int32_t prev_count; //of clusters - int32_t smooth_factor; //for smoothing stats - TBOX blob_box; //bounding box - float lower, upper; //cluster thresholds - //gap sizes + BLOBNBOX* blob; // current blob + int gap_index; // current gap + int32_t prev_x; // end of prev blob + int32_t cluster_count; // no of clusters + int32_t prev_count; // of clusters + int32_t smooth_factor; // for smoothing stats + TBOX blob_box; // bounding box + float lower, upper; // cluster thresholds + // gap sizes float gaps[BLOCK_STATS_CLUSTERS]; - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - STATS gap_stats (0, maxwidth); + // blobs + BLOBNBOX_IT blob_it = row->blob_list(); + STATS gap_stats(0, maxwidth); STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; - //clusters + // clusters smooth_factor = - (int32_t) (row->xheight * textord_wordstats_smooth_factor + 1.5); - if (!blob_it.empty ()) { - prev_x = blob_it.data ()->bounding_box ().right (); - blob_it.forward (); - while (!blob_it.at_first ()) { - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - if (blob_box.left () - prev_x < maxwidth) - gap_stats.add (blob_box.left () - prev_x, 1); - prev_x = blob_box.right (); + (int32_t)(row->xheight * textord_wordstats_smooth_factor + 1.5); + if (!blob_it.empty()) { + prev_x = blob_it.data()->bounding_box().right(); + blob_it.forward(); + while (!blob_it.at_first()) { + blob = blob_it.data(); + if (!blob->joined_to_prev()) { + blob_box = blob->bounding_box(); + if (blob_box.left() - prev_x < maxwidth) + gap_stats.add(blob_box.left() - prev_x, 1); + prev_x = blob_box.right(); } - blob_it.forward (); + blob_it.forward(); } } - if (gap_stats.get_total () == 0) { + if (gap_stats.get_total() == 0) { return false; } cluster_count = 0; lower = row->xheight * words_initial_lower; upper = row->xheight * words_initial_upper; - gap_stats.smooth (smooth_factor); + gap_stats.smooth(smooth_factor); do { prev_count = cluster_count; - cluster_count = gap_stats.cluster (lower, upper, - textord_spacesize_ratioprop, - BLOCK_STATS_CLUSTERS, cluster_stats); - } - while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); + cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, + BLOCK_STATS_CLUSTERS, cluster_stats); + } while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); if (cluster_count < 1) { return false; } for (gap_index = 0; gap_index < cluster_count; gap_index++) - gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); - //get medians + gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5); + // get medians if (testing_on) { - tprintf ("cluster_count=%d:", cluster_count); + tprintf("cluster_count=%d:", cluster_count); for (gap_index = 0; gap_index < cluster_count; gap_index++) - tprintf (" %g(%d)", gaps[gap_index], - cluster_stats[gap_index + 1].get_total ()); - tprintf ("\n"); + tprintf(" %g(%d)", gaps[gap_index], + cluster_stats[gap_index + 1].get_total()); + tprintf("\n"); } - qsort (gaps, cluster_count, sizeof (float), sort_floats); + qsort(gaps, cluster_count, sizeof(float), sort_floats); - //Try to find proportional non-space and space for row. + // Try to find proportional non-space and space for row. lower = row->xheight * words_default_prop_nonspace; upper = row->xheight * textord_words_min_minspace; - for (gap_index = 0; gap_index < cluster_count - && gaps[gap_index] < lower; gap_index++); + for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < lower; + gap_index++) + ; if (gap_index == 0) { - if (testing_on) - tprintf ("No clusters below nonspace threshold!!\n"); + if (testing_on) tprintf("No clusters below nonspace threshold!!\n"); if (cluster_count > 1) { row->pr_nonsp = gaps[0]; row->pr_space = gaps[1]; - } - else { + } else { row->pr_nonsp = lower; row->pr_space = gaps[0]; } - } - else { + } else { row->pr_nonsp = gaps[gap_index - 1]; - while (gap_index < cluster_count && gaps[gap_index] < upper) - gap_index++; + while (gap_index < cluster_count && gaps[gap_index] < upper) gap_index++; if (gap_index == cluster_count) { - if (testing_on) - tprintf ("No clusters above nonspace threshold!!\n"); + if (testing_on) tprintf("No clusters above nonspace threshold!!\n"); row->pr_space = lower * textord_spacesize_ratioprop; - } - else + } else row->pr_space = gaps[gap_index]; } - //Now try to find the fixed pitch space and non-space. + // Now try to find the fixed pitch space and non-space. upper = row->xheight * words_default_fixed_space; - for (gap_index = 0; gap_index < cluster_count - && gaps[gap_index] < upper; gap_index++); + for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < upper; + gap_index++) + ; if (gap_index == 0) { - if (testing_on) - tprintf ("No clusters below space threshold!!\n"); + if (testing_on) tprintf("No clusters below space threshold!!\n"); row->fp_nonsp = upper; row->fp_space = gaps[0]; - } - else { + } else { row->fp_nonsp = gaps[gap_index - 1]; if (gap_index == cluster_count) { - if (testing_on) - tprintf ("No clusters above space threshold!!\n"); + if (testing_on) tprintf("No clusters above space threshold!!\n"); row->fp_space = row->xheight; - } - else + } else row->fp_space = gaps[gap_index]; } if (testing_on) { - tprintf - ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n", - row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space); + tprintf( + "Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, " + "fp_space=%g\n", + row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space); } - return true; //computed some stats + return true; // computed some stats } - /********************************************************************** * find_row_pitch * @@ -830,130 +763,131 @@ bool row_pitch_stats( //find line stats * The larger threshold is the word gap threshold. **********************************************************************/ -bool find_row_pitch( //find lines - TO_ROW* row, //row to do - int32_t maxwidth, //max permitted space - int32_t dm_gap, //ignorable gaps - TO_BLOCK* block, //block of row - int32_t block_index, //block_number - int32_t row_index, //number of row - bool testing_on //correct orientation +bool find_row_pitch( // find lines + TO_ROW* row, // row to do + int32_t maxwidth, // max permitted space + int32_t dm_gap, // ignorable gaps + TO_BLOCK* block, // block of row + int32_t block_index, // block_number + int32_t row_index, // number of row + bool testing_on // correct orientation ) { - bool used_dm_model; //looks lik dot matrix - float min_space; //estimate threshold - float non_space; //gap size - float gap_iqr; //interquartile range + bool used_dm_model; // looks lik dot matrix + float min_space; // estimate threshold + float non_space; // gap size + float gap_iqr; // interquartile range float pitch_iqr; - float dm_gap_iqr; //interquartile range + float dm_gap_iqr; // interquartile range float dm_pitch_iqr; - float dm_pitch; //pitch with dm on - float pitch; //revised estimate - float initial_pitch; //guess at pitch - STATS gap_stats (0, maxwidth); - //centre-centre - STATS pitch_stats (0, maxwidth); + float dm_pitch; // pitch with dm on + float pitch; // revised estimate + float initial_pitch; // guess at pitch + STATS gap_stats(0, maxwidth); + // centre-centre + STATS pitch_stats(0, maxwidth); row->fixed_pitch = 0.0f; initial_pitch = row->fp_space; if (initial_pitch > row->xheight * (1 + words_default_fixed_limit)) - initial_pitch = row->xheight;//keep pitch decent + initial_pitch = row->xheight; // keep pitch decent non_space = row->fp_nonsp; - if (non_space > initial_pitch) - non_space = initial_pitch; + if (non_space > initial_pitch) non_space = initial_pitch; min_space = (initial_pitch + non_space) / 2; - if (!count_pitch_stats (row, &gap_stats, &pitch_stats, - initial_pitch, min_space, TRUE, FALSE, dm_gap)) { + if (!count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, + min_space, TRUE, FALSE, dm_gap)) { dm_gap_iqr = 0.0001; dm_pitch_iqr = maxwidth * 2.0f; dm_pitch = initial_pitch; + } else { + dm_gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25); + dm_pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25); + dm_pitch = pitch_stats.ile(0.5); } - else { - dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); - dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); - dm_pitch = pitch_stats.ile (0.5); - } - gap_stats.clear (); - pitch_stats.clear (); - if (!count_pitch_stats (row, &gap_stats, &pitch_stats, - initial_pitch, min_space, TRUE, FALSE, 0)) { + gap_stats.clear(); + pitch_stats.clear(); + if (!count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, + min_space, TRUE, FALSE, 0)) { gap_iqr = 0.0001; pitch_iqr = maxwidth * 3.0f; - } - else { - gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); - pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); + } else { + gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25); + pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25); if (testing_on) - tprintf - ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n", - initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5)); - initial_pitch = pitch_stats.ile (0.5); - if (min_space > initial_pitch - && count_pitch_stats (row, &gap_stats, &pitch_stats, - initial_pitch, initial_pitch, TRUE, FALSE, 0)) { + tprintf( + "First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, " + "pitch=%g\n", + initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile(0.5)); + initial_pitch = pitch_stats.ile(0.5); + if (min_space > initial_pitch && + count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, + initial_pitch, TRUE, FALSE, 0)) { min_space = initial_pitch; - gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); - pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); + gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25); + pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25); if (testing_on) - tprintf - ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n", - initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5)); - initial_pitch = pitch_stats.ile (0.5); + tprintf( + "Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, " + "pitch=%g\n", + initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile(0.5)); + initial_pitch = pitch_stats.ile(0.5); } } if (textord_debug_pitch_metric) - tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", - block_index, row_index, 'X', - pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr, - pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' : - (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M')); + tprintf( + "Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", + block_index, row_index, 'X', pitch_iqr, gap_iqr, dm_pitch_iqr, + dm_gap_iqr, + pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth + ? 'D' + : (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M')); if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) { row->pitch_decision = PITCH_DUNNO; - if (textord_debug_pitch_metric) - tprintf ("\n"); - return false; //insufficient data + if (textord_debug_pitch_metric) tprintf("\n"); + return false; // insufficient data } if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) { if (testing_on) - tprintf - ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n", - pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); - gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); - pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); - pitch = pitch_stats.ile (0.5); + tprintf( + "Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, " + "dm_gap_iqr=%g\n", + pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); + gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25); + pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25); + pitch = pitch_stats.ile(0.5); used_dm_model = false; - } - else { + } else { if (testing_on) - tprintf - ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n", - pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); + tprintf( + "Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, " + "dm_gap_iqr=%g\n", + pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); gap_iqr = dm_gap_iqr; pitch_iqr = dm_pitch_iqr; pitch = dm_pitch; used_dm_model = true; } if (textord_debug_pitch_metric) { - tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", - pitch_iqr, gap_iqr, pitch); - tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", - pitch_iqr / gap_iqr, pitch_iqr / block->xheight, - pitch_iqr < gap_iqr * textord_fpiqr_ratio - && pitch_iqr < block->xheight * textord_max_pitch_iqr - && pitch < block->xheight * textord_words_default_maxspace - ? 'F' : 'P'); + tprintf("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", pitch_iqr, gap_iqr, pitch); + tprintf("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", pitch_iqr / gap_iqr, + pitch_iqr / block->xheight, + pitch_iqr < gap_iqr * textord_fpiqr_ratio && + pitch_iqr < block->xheight * textord_max_pitch_iqr && + pitch < block->xheight * textord_words_default_maxspace + ? 'F' + : 'P'); } - if (pitch_iqr < gap_iqr * textord_fpiqr_ratio - && pitch_iqr < block->xheight * textord_max_pitch_iqr - && pitch < block->xheight * textord_words_default_maxspace) + if (pitch_iqr < gap_iqr * textord_fpiqr_ratio && + pitch_iqr < block->xheight * textord_max_pitch_iqr && + pitch < block->xheight * textord_words_default_maxspace) row->pitch_decision = PITCH_MAYBE_FIXED; else row->pitch_decision = PITCH_MAYBE_PROP; row->fixed_pitch = pitch; - row->kern_size = gap_stats.ile (0.5); - row->min_space = (int32_t) (row->fixed_pitch + non_space) / 2; + row->kern_size = gap_stats.ile(0.5); + row->min_space = (int32_t)(row->fixed_pitch + non_space) / 2; if (row->min_space > row->fixed_pitch) - row->min_space = (int32_t) row->fixed_pitch; + row->min_space = (int32_t)row->fixed_pitch; row->max_nonspace = row->min_space; row->space_size = row->fixed_pitch; row->space_threshold = (row->max_nonspace + row->min_space) / 2; @@ -961,7 +895,6 @@ bool find_row_pitch( //find lines return true; } - /********************************************************************** * fixed_pitch_row * @@ -970,53 +903,45 @@ bool find_row_pitch( //find lines * The larger threshold is the word gap threshold. **********************************************************************/ -bool fixed_pitch_row(TO_ROW* row, // row to do +bool fixed_pitch_row(TO_ROW* row, // row to do BLOCK* block, int32_t block_index // block_number ) { - const char *res_string; // pitch result - int16_t mid_cuts; // no of cheap cuts - float non_space; // gap size - float pitch_sd; // error on pitch - float sp_sd = 0.0f; // space sd + const char* res_string; // pitch result + int16_t mid_cuts; // no of cheap cuts + float non_space; // gap size + float pitch_sd; // error on pitch + float sp_sd = 0.0f; // space sd non_space = row->fp_nonsp; - if (non_space > row->fixed_pitch) - non_space = row->fixed_pitch; + if (non_space > row->fixed_pitch) non_space = row->fixed_pitch; POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr; if (textord_all_prop || (pb != nullptr && !pb->IsText())) { // Set the decision to definitely proportional. pitch_sd = textord_words_def_prop * row->fixed_pitch; row->pitch_decision = PITCH_DEF_PROP; } else { - pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left, - row->projection_right, - (row->fixed_pitch + non_space * 3) / 4, - row->fixed_pitch, sp_sd, mid_cuts, - &row->char_cells, - block_index == textord_debug_block); - if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch - && ((pitsync_linear_version & 3) < 3 - || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model - || sp_sd > 20 - || (pitch_sd == 0 && sp_sd > 10))))) { - if (pitch_sd < textord_words_def_fixed * row->fixed_pitch - && !row->all_caps - && ((pitsync_linear_version & 3) < 3 || sp_sd > 20)) + pitch_sd = tune_row_pitch( + row, &row->projection, row->projection_left, row->projection_right, + (row->fixed_pitch + non_space * 3) / 4, row->fixed_pitch, sp_sd, + mid_cuts, &row->char_cells, block_index == textord_debug_block); + if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch && + ((pitsync_linear_version & 3) < 3 || + ((pitsync_linear_version & 3) >= 3 && + (row->used_dm_model || sp_sd > 20 || + (pitch_sd == 0 && sp_sd > 10))))) { + if (pitch_sd < textord_words_def_fixed * row->fixed_pitch && + !row->all_caps && ((pitsync_linear_version & 3) < 3 || sp_sd > 20)) row->pitch_decision = PITCH_DEF_FIXED; else row->pitch_decision = PITCH_MAYBE_FIXED; - } - else if ((pitsync_linear_version & 3) < 3 - || sp_sd > 20 - || mid_cuts > 0 - || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) { + } else if ((pitsync_linear_version & 3) < 3 || sp_sd > 20 || mid_cuts > 0 || + pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) { if (pitch_sd < textord_words_def_prop * row->fixed_pitch) row->pitch_decision = PITCH_MAYBE_PROP; else row->pitch_decision = PITCH_DEF_PROP; - } - else + } else row->pitch_decision = PITCH_DUNNO; } @@ -1038,13 +963,12 @@ bool fixed_pitch_row(TO_ROW* row, // row to do default: res_string = "??"; } - tprintf (":sd/p=%g:occ=%g:init_res=%s\n", - pitch_sd / row->fixed_pitch, sp_sd, res_string); + tprintf(":sd/p=%g:occ=%g:init_res=%s\n", pitch_sd / row->fixed_pitch, sp_sd, + res_string); } return true; } - /********************************************************************** * count_pitch_stats * @@ -1054,87 +978,84 @@ bool fixed_pitch_row(TO_ROW* row, // row to do * The return value indicates whether there were any decent values to use. **********************************************************************/ -bool count_pitch_stats( //find lines - TO_ROW* row, //row to do - STATS* gap_stats, //blob gaps - STATS* pitch_stats, //centre-centre stats - float initial_pitch, //guess at pitch - float min_space, //estimate space size - bool ignore_outsize, //discard big objects - bool split_outsize, //split big objects - int32_t dm_gap //ignorable gaps +bool count_pitch_stats( // find lines + TO_ROW* row, // row to do + STATS* gap_stats, // blob gaps + STATS* pitch_stats, // centre-centre stats + float initial_pitch, // guess at pitch + float min_space, // estimate space size + bool ignore_outsize, // discard big objects + bool split_outsize, // split big objects + int32_t dm_gap // ignorable gaps ) { - bool prev_valid; //not word broken - BLOBNBOX *blob; //current blob - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - int32_t prev_right; //end of prev blob - int32_t prev_centre; //centre of previous blob - int32_t x_centre; //centre of this blob - int32_t blob_width; //width of blob - int32_t width_units; //no of widths in blob - float width; //blob width - TBOX blob_box; //bounding box - TBOX joined_box; //of super blob - - gap_stats->clear (); - pitch_stats->clear (); - if (blob_it.empty ()) - return false; + bool prev_valid; // not word broken + BLOBNBOX* blob; // current blob + // blobs + BLOBNBOX_IT blob_it = row->blob_list(); + int32_t prev_right; // end of prev blob + int32_t prev_centre; // centre of previous blob + int32_t x_centre; // centre of this blob + int32_t blob_width; // width of blob + int32_t width_units; // no of widths in blob + float width; // blob width + TBOX blob_box; // bounding box + TBOX joined_box; // of super blob + + gap_stats->clear(); + pitch_stats->clear(); + if (blob_it.empty()) return false; prev_valid = false; prev_centre = 0; prev_right = 0; // stop compiler warning - joined_box = blob_it.data ()->bounding_box (); + joined_box = blob_it.data()->bounding_box(); do { - blob_it.forward (); - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - if ((blob_box.left () - joined_box.right () < dm_gap - && !blob_it.at_first ()) - || blob->cblob() == nullptr) - joined_box += blob_box; //merge blobs + blob_it.forward(); + blob = blob_it.data(); + if (!blob->joined_to_prev()) { + blob_box = blob->bounding_box(); + if ((blob_box.left() - joined_box.right() < dm_gap && + !blob_it.at_first()) || + blob->cblob() == nullptr) + joined_box += blob_box; // merge blobs else { - blob_width = joined_box.width (); + blob_width = joined_box.width(); if (split_outsize) { - width_units = - (int32_t) floor ((float) blob_width / initial_pitch + 0.5); - if (width_units < 1) - width_units = 1; + width_units = (int32_t)floor((float)blob_width / initial_pitch + 0.5); + if (width_units < 1) width_units = 1; width_units--; - } - else if (ignore_outsize) { - width = (float) blob_width / initial_pitch; - width_units = width < 1 + words_default_fixed_limit - && width > 1 - words_default_fixed_limit ? 0 : -1; - } - else - width_units = 0; //everything in - x_centre = (int32_t) (joined_box.left () - + (blob_width - - width_units * initial_pitch) / 2); + } else if (ignore_outsize) { + width = (float)blob_width / initial_pitch; + width_units = width < 1 + words_default_fixed_limit && + width > 1 - words_default_fixed_limit + ? 0 + : -1; + } else + width_units = 0; // everything in + x_centre = (int32_t)(joined_box.left() + + (blob_width - width_units * initial_pitch) / 2); if (prev_valid && width_units >= 0) { // if (width_units>0) // { - // tprintf("wu=%d, width=%d, xc=%d, adding %d\n", + // tprintf("wu=%d, + // width=%d, + // xc=%d, adding + // %d\n", // width_units,blob_width,x_centre,x_centre-prev_centre); // } - gap_stats->add (joined_box.left () - prev_right, 1); - pitch_stats->add (x_centre - prev_centre, 1); + gap_stats->add(joined_box.left() - prev_right, 1); + pitch_stats->add(x_centre - prev_centre, 1); } - prev_centre = (int32_t) (x_centre + width_units * initial_pitch); - prev_right = joined_box.right (); - prev_valid = blob_box.left () - joined_box.right () < min_space; + prev_centre = (int32_t)(x_centre + width_units * initial_pitch); + prev_right = joined_box.right(); + prev_valid = blob_box.left() - joined_box.right() < min_space; prev_valid = prev_valid && width_units >= 0; joined_box = blob_box; } } - } - while (!blob_it.at_first ()); - return gap_stats->get_total () >= 3; + } while (!blob_it.at_first()); + return gap_stats->get_total() >= 3; } - /********************************************************************** * tune_row_pitch * @@ -1142,111 +1063,91 @@ bool count_pitch_stats( //find lines * the cell size over the row. **********************************************************************/ -float tune_row_pitch( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge of projection - int16_t projection_right, //edge of projection - float space_size, //size of blank - float& initial_pitch, //guess at pitch - float& best_sp_sd, //space sd - int16_t& best_mid_cuts, //no of cheap cuts - ICOORDELT_LIST* best_cells, //row cells - bool testing_on //inidividual words +float tune_row_pitch( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edge of projection + int16_t projection_right, // edge of projection + float space_size, // size of blank + float& initial_pitch, // guess at pitch + float& best_sp_sd, // space sd + int16_t& best_mid_cuts, // no of cheap cuts + ICOORDELT_LIST* best_cells, // row cells + bool testing_on // inidividual words ) { - int pitch_delta; //offset pitch - int16_t mid_cuts; //cheap cuts - float pitch_sd; //current sd - float best_sd; //best result - float best_pitch; //pitch for best result - float initial_sd; //starting error - float sp_sd; //space sd - ICOORDELT_LIST test_cells; //row cells - ICOORDELT_IT best_it; //start of best list + int pitch_delta; // offset pitch + int16_t mid_cuts; // cheap cuts + float pitch_sd; // current sd + float best_sd; // best result + float best_pitch; // pitch for best result + float initial_sd; // starting error + float sp_sd; // space sd + ICOORDELT_LIST test_cells; // row cells + ICOORDELT_IT best_it; // start of best list if (textord_fast_pitch_test) - return tune_row_pitch2 (row, projection, projection_left, - projection_right, space_size, initial_pitch, - best_sp_sd, - //space sd - best_mid_cuts, best_cells, testing_on); + return tune_row_pitch2(row, projection, projection_left, projection_right, + space_size, initial_pitch, best_sp_sd, + // space sd + best_mid_cuts, best_cells, testing_on); if (textord_disable_pitch_test) { best_sp_sd = initial_pitch; return initial_pitch; } - initial_sd = - compute_pitch_sd(row, - projection, - projection_left, - projection_right, - space_size, - initial_pitch, - best_sp_sd, - best_mid_cuts, - best_cells, - testing_on); + initial_sd = compute_pitch_sd( + row, projection, projection_left, projection_right, space_size, + initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on); best_sd = initial_sd; best_pitch = initial_pitch; if (testing_on) - tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd); + tprintf("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd); for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) { - pitch_sd = - compute_pitch_sd (row, projection, projection_left, projection_right, - space_size, initial_pitch + pitch_delta, sp_sd, - mid_cuts, &test_cells, testing_on); + pitch_sd = compute_pitch_sd( + row, projection, projection_left, projection_right, space_size, + initial_pitch + pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on); if (testing_on) - tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, - pitch_sd); + tprintf("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, + pitch_sd); if (pitch_sd < best_sd) { best_sd = pitch_sd; best_mid_cuts = mid_cuts; best_sp_sd = sp_sd; best_pitch = initial_pitch + pitch_delta; - best_cells->clear (); - best_it.set_to_list (best_cells); - best_it.add_list_after (&test_cells); - } - else - test_cells.clear (); - if (pitch_sd > initial_sd) - break; //getting worse + best_cells->clear(); + best_it.set_to_list(best_cells); + best_it.add_list_after(&test_cells); + } else + test_cells.clear(); + if (pitch_sd > initial_sd) break; // getting worse } for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) { - pitch_sd = - compute_pitch_sd (row, projection, projection_left, projection_right, - space_size, initial_pitch - pitch_delta, sp_sd, - mid_cuts, &test_cells, testing_on); + pitch_sd = compute_pitch_sd( + row, projection, projection_left, projection_right, space_size, + initial_pitch - pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on); if (testing_on) - tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, - pitch_sd); + tprintf("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, + pitch_sd); if (pitch_sd < best_sd) { best_sd = pitch_sd; best_mid_cuts = mid_cuts; best_sp_sd = sp_sd; best_pitch = initial_pitch - pitch_delta; - best_cells->clear (); - best_it.set_to_list (best_cells); - best_it.add_list_after (&test_cells); - } - else - test_cells.clear (); - if (pitch_sd > initial_sd) - break; + best_cells->clear(); + best_it.set_to_list(best_cells); + best_it.add_list_after(&test_cells); + } else + test_cells.clear(); + if (pitch_sd > initial_sd) break; } initial_pitch = best_pitch; if (textord_debug_pitch_metric) - print_pitch_sd(row, - projection, - projection_left, - projection_right, - space_size, - best_pitch); + print_pitch_sd(row, projection, projection_left, projection_right, + space_size, best_pitch); return best_sd; } - /********************************************************************** * tune_row_pitch * @@ -1254,28 +1155,28 @@ float tune_row_pitch( //find fp cells * the cell size over the row. **********************************************************************/ -float tune_row_pitch2( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge of projection - int16_t projection_right, //edge of projection - float space_size, //size of blank - float& initial_pitch, //guess at pitch - float& best_sp_sd, //space sd - int16_t& best_mid_cuts, //no of cheap cuts - ICOORDELT_LIST* best_cells, //row cells - bool testing_on //inidividual words +float tune_row_pitch2( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edge of projection + int16_t projection_right, // edge of projection + float space_size, // size of blank + float& initial_pitch, // guess at pitch + float& best_sp_sd, // space sd + int16_t& best_mid_cuts, // no of cheap cuts + ICOORDELT_LIST* best_cells, // row cells + bool testing_on // inidividual words ) { - int pitch_delta; //offset pitch - int16_t pixel; //pixel coord - int16_t best_pixel; //pixel coord - int16_t best_delta; //best pitch - int16_t best_pitch; //best pitch - int16_t start; //of good range - int16_t end; //of good range - int32_t best_count; //lowest sum - float best_sd; //best result - STATS *sum_proj; //summed projection + int pitch_delta; // offset pitch + int16_t pixel; // pixel coord + int16_t best_pixel; // pixel coord + int16_t best_delta; // best pitch + int16_t best_pitch; // best pitch + int16_t start; // of good range + int16_t end; // of good range + int32_t best_count; // lowest sum + float best_sd; // best result + STATS* sum_proj; // summed projection best_sp_sd = initial_pitch; @@ -1286,10 +1187,9 @@ float tune_row_pitch2( //find fp cells sum_proj = new STATS[textord_pitch_range * 2 + 1]; for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; - pitch_delta++) - sum_proj[textord_pitch_range + pitch_delta].set_range (0, - best_pitch + - pitch_delta + 1); + pitch_delta++) + sum_proj[textord_pitch_range + pitch_delta].set_range( + 0, best_pitch + pitch_delta + 1); for (pixel = projection_left; pixel <= projection_right; pixel++) { for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) { @@ -1298,70 +1198,56 @@ float tune_row_pitch2( //find fp cells projection->pile_count(pixel)); } } - best_count = sum_proj[textord_pitch_range].pile_count (0); + best_count = sum_proj[textord_pitch_range].pile_count(0); best_delta = 0; best_pixel = 0; for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; - pitch_delta++) { + pitch_delta++) { for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) { - if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel) - < best_count) { + if (sum_proj[textord_pitch_range + pitch_delta].pile_count(pixel) < + best_count) { best_count = - sum_proj[textord_pitch_range + - pitch_delta].pile_count (pixel); + sum_proj[textord_pitch_range + pitch_delta].pile_count(pixel); best_delta = pitch_delta; best_pixel = pixel; } } } if (testing_on) - tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", - initial_pitch, best_delta, best_count); + tprintf("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", + initial_pitch, best_delta, best_count); best_pitch += best_delta; initial_pitch = best_pitch; best_count++; best_count += best_count; - for (start = best_pixel - 2; start > best_pixel - best_pitch - && sum_proj[textord_pitch_range + - best_delta].pile_count (start % best_pitch) <= best_count; - start--); + for (start = best_pixel - 2; + start > best_pixel - best_pitch && + sum_proj[textord_pitch_range + best_delta].pile_count( + start % best_pitch) <= best_count; + start--) + ; for (end = best_pixel + 2; - end < best_pixel + best_pitch - && sum_proj[textord_pitch_range + - best_delta].pile_count (end % best_pitch) <= best_count; - end++); - - best_sd = - compute_pitch_sd(row, - projection, - projection_left, - projection_right, - space_size, - initial_pitch, - best_sp_sd, - best_mid_cuts, - best_cells, - testing_on, - start, - end); + end < best_pixel + best_pitch && + sum_proj[textord_pitch_range + best_delta].pile_count( + end % best_pitch) <= best_count; + end++) + ; + + best_sd = compute_pitch_sd(row, projection, projection_left, projection_right, + space_size, initial_pitch, best_sp_sd, + best_mid_cuts, best_cells, testing_on, start, end); if (testing_on) - tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, - best_sd); + tprintf("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, best_sd); if (textord_debug_pitch_metric) - print_pitch_sd(row, - projection, - projection_left, - projection_right, - space_size, - initial_pitch); + print_pitch_sd(row, projection, projection_left, projection_right, + space_size, initial_pitch); - delete[]sum_proj; + delete[] sum_proj; return best_sd; } - /********************************************************************** * compute_pitch_sd * @@ -1369,49 +1255,48 @@ float tune_row_pitch2( //find fp cells * the cell size over the row. **********************************************************************/ -float compute_pitch_sd( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge - int16_t projection_right, //edge - float space_size, //size of blank - float initial_pitch, //guess at pitch - float& sp_sd, //space sd - int16_t& mid_cuts, //no of free cuts - ICOORDELT_LIST* row_cells, //list of chop pts - bool testing_on, //inidividual words - int16_t start, //start of good range - int16_t end //end of good range +float compute_pitch_sd( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edge + int16_t projection_right, // edge + float space_size, // size of blank + float initial_pitch, // guess at pitch + float& sp_sd, // space sd + int16_t& mid_cuts, // no of free cuts + ICOORDELT_LIST* row_cells, // list of chop pts + bool testing_on, // inidividual words + int16_t start, // start of good range + int16_t end // end of good range ) { - int16_t occupation; //no of cells in word. - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT start_it; //start of word - BLOBNBOX_IT plot_it; //for plotting - int16_t blob_count; //no of blobs - TBOX blob_box; //bounding box - TBOX prev_box; //of super blob - int32_t prev_right; //of word sync - int scale_factor; //on scores for big words - int32_t sp_count; //spaces - FPSEGPT_LIST seg_list; //char cells - FPSEGPT_IT seg_it; //iterator - int16_t segpos; //position of segment - int16_t cellpos; //previous cell boundary - //iterator + int16_t occupation; // no of cells in word. + // blobs + BLOBNBOX_IT blob_it = row->blob_list(); + BLOBNBOX_IT start_it; // start of word + BLOBNBOX_IT plot_it; // for plotting + int16_t blob_count; // no of blobs + TBOX blob_box; // bounding box + TBOX prev_box; // of super blob + int32_t prev_right; // of word sync + int scale_factor; // on scores for big words + int32_t sp_count; // spaces + FPSEGPT_LIST seg_list; // char cells + FPSEGPT_IT seg_it; // iterator + int16_t segpos; // position of segment + int16_t cellpos; // previous cell boundary + // iterator ICOORDELT_IT cell_it = row_cells; - ICOORDELT *cell; //new cell - double sqsum; //sum of squares - double spsum; //of spaces - double sp_var; //space error - double word_sync; //result for word - int32_t total_count; //total blobs + ICOORDELT* cell; // new cell + double sqsum; // sum of squares + double spsum; // of spaces + double sp_var; // space error + double word_sync; // result for word + int32_t total_count; // total blobs if ((pitsync_linear_version & 3) > 1) { - word_sync = compute_pitch_sd2 (row, projection, projection_left, - projection_right, initial_pitch, - occupation, mid_cuts, row_cells, - testing_on, start, end); + word_sync = compute_pitch_sd2(row, projection, projection_left, + projection_right, initial_pitch, occupation, + mid_cuts, row_cells, testing_on, start, end); sp_sd = occupation; return word_sync; } @@ -1422,109 +1307,99 @@ float compute_pitch_sd( //find fp cells sp_count = 0; spsum = 0; prev_right = -1; - if (blob_it.empty ()) - return space_size * 10; + if (blob_it.empty()) return space_size * 10; #ifndef GRAPHICS_DISABLED if (testing_on && to_win != nullptr) { - blob_box = blob_it.data ()->bounding_box (); - projection->plot (to_win, projection_left, - row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); + blob_box = blob_it.data()->bounding_box(); + projection->plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, + ScrollView::CORAL); } #endif start_it = blob_it; blob_count = 0; - blob_box = box_next (&blob_it);//first blob - blob_it.mark_cycle_pt (); + blob_box = box_next(&blob_it); // first blob + blob_it.mark_cycle_pt(); do { - for (; blob_count > 0; blob_count--) - box_next(&start_it); + for (; blob_count > 0; blob_count--) box_next(&start_it); do { prev_box = blob_box; blob_count++; - blob_box = box_next (&blob_it); - } - while (!blob_it.cycled_list () - && blob_box.left () - prev_box.right () < space_size); + blob_box = box_next(&blob_it); + } while (!blob_it.cycled_list() && + blob_box.left() - prev_box.right() < space_size); plot_it = start_it; if (pitsync_linear_version & 3) word_sync = - check_pitch_sync2 (&start_it, blob_count, (int16_t) initial_pitch, 2, - projection, projection_left, projection_right, - row->xheight * textord_projection_scale, - occupation, &seg_list, start, end); + check_pitch_sync2(&start_it, blob_count, (int16_t)initial_pitch, 2, + projection, projection_left, projection_right, + row->xheight * textord_projection_scale, occupation, + &seg_list, start, end); else word_sync = - check_pitch_sync (&start_it, blob_count, (int16_t) initial_pitch, 2, - projection, &seg_list); + check_pitch_sync(&start_it, blob_count, (int16_t)initial_pitch, 2, + projection, &seg_list); if (testing_on) { - tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ", - prev_box.right (), prev_box.top (), - seg_list.length () - 1, word_sync); - seg_it.set_to_list (&seg_list); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); - seg_it.forward ()) { - if (seg_it.data ()->faked) - tprintf ("(F)"); - tprintf ("%d, ", seg_it.data ()->position ()); + tprintf("Word ending at (%d,%d), len=%d, sync rating=%g, ", + prev_box.right(), prev_box.top(), seg_list.length() - 1, + word_sync); + seg_it.set_to_list(&seg_list); + for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { + if (seg_it.data()->faked) tprintf("(F)"); + tprintf("%d, ", seg_it.data()->position()); // tprintf("C=%g, s=%g, sq=%g\n", // seg_it.data()->cost_function(), // seg_it.data()->sum(), // seg_it.data()->squares()); } - tprintf ("\n"); + tprintf("\n"); } #ifndef GRAPHICS_DISABLED if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list); #endif - seg_it.set_to_list (&seg_list); + seg_it.set_to_list(&seg_list); if (prev_right >= 0) { - sp_var = seg_it.data ()->position () - prev_right; - sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch; + sp_var = seg_it.data()->position() - prev_right; + sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch; sp_var *= sp_var; spsum += sp_var; sp_count++; } - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - segpos = seg_it.data ()->position (); - if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) { - //big gap - while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) { - cell = new ICOORDELT (cellpos + (int16_t) initial_pitch, 0); - cell_it.add_after_then_move (cell); - cellpos += (int16_t) initial_pitch; + for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { + segpos = seg_it.data()->position(); + if (cell_it.empty() || segpos > cellpos + initial_pitch / 2) { + // big gap + while (!cell_it.empty() && segpos > cellpos + initial_pitch * 3 / 2) { + cell = new ICOORDELT(cellpos + (int16_t)initial_pitch, 0); + cell_it.add_after_then_move(cell); + cellpos += (int16_t)initial_pitch; } - //make new one - cell = new ICOORDELT (segpos, 0); - cell_it.add_after_then_move (cell); + // make new one + cell = new ICOORDELT(segpos, 0); + cell_it.add_after_then_move(cell); cellpos = segpos; - } - else if (segpos > cellpos - initial_pitch / 2) { - cell = cell_it.data (); - //average positions - cell->set_x ((cellpos + segpos) / 2); - cellpos = cell->x (); + } else if (segpos > cellpos - initial_pitch / 2) { + cell = cell_it.data(); + // average positions + cell->set_x((cellpos + segpos) / 2); + cellpos = cell->x(); } } - seg_it.move_to_last (); - prev_right = seg_it.data ()->position (); + seg_it.move_to_last(); + prev_right = seg_it.data()->position(); if (textord_pitch_scalebigwords) { - scale_factor = (seg_list.length () - 2) / 2; - if (scale_factor < 1) - scale_factor = 1; - } - else + scale_factor = (seg_list.length() - 2) / 2; + if (scale_factor < 1) scale_factor = 1; + } else scale_factor = 1; sqsum += word_sync * scale_factor; - total_count += (seg_list.length () - 1) * scale_factor; - seg_list.clear (); - } - while (!blob_it.cycled_list ()); - sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0; - return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10; + total_count += (seg_list.length() - 1) * scale_factor; + seg_list.clear(); + } while (!blob_it.cycled_list()); + sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0; + return total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10; } - /********************************************************************** * compute_pitch_sd2 * @@ -1532,91 +1407,85 @@ float compute_pitch_sd( //find fp cells * the cell size over the row. **********************************************************************/ -float compute_pitch_sd2( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge - int16_t projection_right, //edge - float initial_pitch, //guess at pitch - int16_t& occupation, //no of occupied cells - int16_t& mid_cuts, //no of free cuts - ICOORDELT_LIST* row_cells, //list of chop pts - bool testing_on, //inidividual words - int16_t start, //start of good range - int16_t end //end of good range +float compute_pitch_sd2( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edge + int16_t projection_right, // edge + float initial_pitch, // guess at pitch + int16_t& occupation, // no of occupied cells + int16_t& mid_cuts, // no of free cuts + ICOORDELT_LIST* row_cells, // list of chop pts + bool testing_on, // inidividual words + int16_t start, // start of good range + int16_t end // end of good range ) { - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); + // blobs + BLOBNBOX_IT blob_it = row->blob_list(); BLOBNBOX_IT plot_it; - int16_t blob_count; //no of blobs - TBOX blob_box; //bounding box - FPSEGPT_LIST seg_list; //char cells - FPSEGPT_IT seg_it; //iterator - int16_t segpos; //position of segment - //iterator + int16_t blob_count; // no of blobs + TBOX blob_box; // bounding box + FPSEGPT_LIST seg_list; // char cells + FPSEGPT_IT seg_it; // iterator + int16_t segpos; // position of segment + // iterator ICOORDELT_IT cell_it = row_cells; - ICOORDELT *cell; //new cell - double word_sync; //result for word + ICOORDELT* cell; // new cell + double word_sync; // result for word mid_cuts = 0; - if (blob_it.empty ()) { + if (blob_it.empty()) { occupation = 0; return initial_pitch * 10; } #ifndef GRAPHICS_DISABLED if (testing_on && to_win != nullptr) { - projection->plot (to_win, projection_left, - row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); + projection->plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, + ScrollView::CORAL); } #endif blob_count = 0; - blob_it.mark_cycle_pt (); + blob_it.mark_cycle_pt(); do { - //first blob - blob_box = box_next (&blob_it); + // first blob + blob_box = box_next(&blob_it); blob_count++; - } - while (!blob_it.cycled_list ()); + } while (!blob_it.cycled_list()); plot_it = blob_it; - word_sync = check_pitch_sync2 (&blob_it, blob_count, (int16_t) initial_pitch, - 2, projection, projection_left, - projection_right, - row->xheight * textord_projection_scale, - occupation, &seg_list, start, end); + word_sync = check_pitch_sync2(&blob_it, blob_count, (int16_t)initial_pitch, 2, + projection, projection_left, projection_right, + row->xheight * textord_projection_scale, + occupation, &seg_list, start, end); if (testing_on) { - tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ", - blob_box.right (), blob_box.top (), - seg_list.length () - 1, word_sync); - seg_it.set_to_list (&seg_list); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - if (seg_it.data ()->faked) - tprintf ("(F)"); - tprintf ("%d, ", seg_it.data ()->position ()); + tprintf("Row ending at (%d,%d), len=%d, sync rating=%g, ", blob_box.right(), + blob_box.top(), seg_list.length() - 1, word_sync); + seg_it.set_to_list(&seg_list); + for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { + if (seg_it.data()->faked) tprintf("(F)"); + tprintf("%d, ", seg_it.data()->position()); // tprintf("C=%g, s=%g, sq=%g\n", // seg_it.data()->cost_function(), // seg_it.data()->sum(), // seg_it.data()->squares()); } - tprintf ("\n"); + tprintf("\n"); } #ifndef GRAPHICS_DISABLED if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list); #endif - seg_it.set_to_list (&seg_list); - for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { - segpos = seg_it.data ()->position (); - //make new one - cell = new ICOORDELT (segpos, 0); - cell_it.add_after_then_move (cell); - if (seg_it.at_last ()) - mid_cuts = seg_it.data ()->cheap_cuts (); + seg_it.set_to_list(&seg_list); + for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { + segpos = seg_it.data()->position(); + // make new one + cell = new ICOORDELT(segpos, 0); + cell_it.add_after_then_move(cell); + if (seg_it.at_last()) mid_cuts = seg_it.data()->cheap_cuts(); } - seg_list.clear (); - return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10; + seg_list.clear(); + return occupation > 0 ? sqrt(word_sync / occupation) : initial_pitch * 10; } - /********************************************************************** * print_pitch_sd * @@ -1624,38 +1493,36 @@ float compute_pitch_sd2( //find fp cells * the cell size over the row. **********************************************************************/ -void print_pitch_sd( //find fp cells - TO_ROW *row, //row to do - STATS *projection, //vertical projection - int16_t projection_left, //edges //size of blank - int16_t projection_right, - float space_size, - float initial_pitch //guess at pitch - ) { - const char *res2; //pitch result - int16_t occupation; //used cells - float sp_sd; //space sd - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); - BLOBNBOX_IT start_it; //start of word - BLOBNBOX_IT row_start; //start of row - int16_t blob_count; //no of blobs - int16_t total_blob_count; //total blobs in line - TBOX blob_box; //bounding box - TBOX prev_box; //of super blob - int32_t prev_right; //of word sync - int scale_factor; //on scores for big words - int32_t sp_count; //spaces - FPSEGPT_LIST seg_list; //char cells - FPSEGPT_IT seg_it; //iterator - double sqsum; //sum of squares - double spsum; //of spaces - double sp_var; //space error - double word_sync; //result for word - double total_count; //total cuts - - if (blob_it.empty ()) - return; +void print_pitch_sd( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edges //size of blank + int16_t projection_right, float space_size, + float initial_pitch // guess at pitch +) { + const char* res2; // pitch result + int16_t occupation; // used cells + float sp_sd; // space sd + // blobs + BLOBNBOX_IT blob_it = row->blob_list(); + BLOBNBOX_IT start_it; // start of word + BLOBNBOX_IT row_start; // start of row + int16_t blob_count; // no of blobs + int16_t total_blob_count; // total blobs in line + TBOX blob_box; // bounding box + TBOX prev_box; // of super blob + int32_t prev_right; // of word sync + int scale_factor; // on scores for big words + int32_t sp_count; // spaces + FPSEGPT_LIST seg_list; // char cells + FPSEGPT_IT seg_it; // iterator + double sqsum; // sum of squares + double spsum; // of spaces + double sp_var; // space error + double word_sync; // result for word + double total_count; // total cuts + + if (blob_it.empty()) return; row_start = blob_it; total_blob_count = 0; @@ -1667,83 +1534,74 @@ void print_pitch_sd( //find fp cells blob_it = row_start; start_it = blob_it; blob_count = 0; - blob_box = box_next (&blob_it);//first blob - blob_it.mark_cycle_pt (); + blob_box = box_next(&blob_it); // first blob + blob_it.mark_cycle_pt(); do { - for (; blob_count > 0; blob_count--) - box_next(&start_it); + for (; blob_count > 0; blob_count--) box_next(&start_it); do { prev_box = blob_box; blob_count++; - blob_box = box_next (&blob_it); - } - while (!blob_it.cycled_list () - && blob_box.left () - prev_box.right () < space_size); - word_sync = - check_pitch_sync2 (&start_it, blob_count, (int16_t) initial_pitch, 2, - projection, projection_left, projection_right, - row->xheight * textord_projection_scale, - occupation, &seg_list, 0, 0); + blob_box = box_next(&blob_it); + } while (!blob_it.cycled_list() && + blob_box.left() - prev_box.right() < space_size); + word_sync = check_pitch_sync2( + &start_it, blob_count, (int16_t)initial_pitch, 2, projection, + projection_left, projection_right, + row->xheight * textord_projection_scale, occupation, &seg_list, 0, 0); total_blob_count += blob_count; - seg_it.set_to_list (&seg_list); + seg_it.set_to_list(&seg_list); if (prev_right >= 0) { - sp_var = seg_it.data ()->position () - prev_right; - sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch; + sp_var = seg_it.data()->position() - prev_right; + sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch; sp_var *= sp_var; spsum += sp_var; sp_count++; } - seg_it.move_to_last (); - prev_right = seg_it.data ()->position (); + seg_it.move_to_last(); + prev_right = seg_it.data()->position(); if (textord_pitch_scalebigwords) { - scale_factor = (seg_list.length () - 2) / 2; - if (scale_factor < 1) - scale_factor = 1; - } - else + scale_factor = (seg_list.length() - 2) / 2; + if (scale_factor < 1) scale_factor = 1; + } else scale_factor = 1; sqsum += word_sync * scale_factor; - total_count += (seg_list.length () - 1) * scale_factor; - seg_list.clear (); - } - while (!blob_it.cycled_list ()); - sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0; - word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10; - tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", - word_sync, word_sync / initial_pitch, sp_sd, - word_sync < textord_words_pitchsd_threshold * initial_pitch - ? 'F' : 'P'); + total_count += (seg_list.length() - 1) * scale_factor; + seg_list.clear(); + } while (!blob_it.cycled_list()); + sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0; + word_sync = total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10; + tprintf( + "new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", word_sync, + word_sync / initial_pitch, sp_sd, + word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P'); start_it = row_start; blob_it = row_start; - word_sync = - check_pitch_sync2 (&blob_it, total_blob_count, (int16_t) initial_pitch, 2, - projection, projection_left, projection_right, - row->xheight * textord_projection_scale, occupation, - &seg_list, 0, 0); - if (occupation > 1) - word_sync /= occupation; - word_sync = sqrt (word_sync); + word_sync = check_pitch_sync2( + &blob_it, total_blob_count, (int16_t)initial_pitch, 2, projection, + projection_left, projection_right, + row->xheight * textord_projection_scale, occupation, &seg_list, 0, 0); + if (occupation > 1) word_sync /= occupation; + word_sync = sqrt(word_sync); #ifndef GRAPHICS_DISABLED if (textord_show_row_cuts && to_win != nullptr) plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list); #endif - seg_list.clear (); + seg_list.clear(); if (word_sync < textord_words_pitchsd_threshold * initial_pitch) { - if (word_sync < textord_words_def_fixed * initial_pitch - && !row->all_caps) + if (word_sync < textord_words_def_fixed * initial_pitch && !row->all_caps) res2 = "DF"; else res2 = "MF"; - } - else + } else res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP"; - tprintf - ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n", - word_sync, word_sync / initial_pitch, - word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P', - occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps); + tprintf( + "row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, " + "all_caps=%d\n", + word_sync, word_sync / initial_pitch, + word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P', + occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps); } /********************************************************************** @@ -1752,17 +1610,17 @@ void print_pitch_sd( //find fp cells * Extract marked leader blobs and put them * into words in advance of fixed pitch checking and word generation. **********************************************************************/ -void find_repeated_chars(TO_BLOCK* block, // Block to search. - bool testing_on) { // Debug mode. +void find_repeated_chars(TO_BLOCK* block, // Block to search. + bool testing_on) { // Debug mode. POLY_BLOCK* pb = block->block->pdblk.poly_block(); if (pb != nullptr && !pb->IsText()) return; // Don't find repeated chars in non-text blocks. - TO_ROW *row; + TO_ROW* row; BLOBNBOX_IT box_it; - BLOBNBOX_IT search_it; // forward search - WERD *word; // new word - TBOX word_box; // for plotting + BLOBNBOX_IT search_it; // forward search + WERD* word; // new word + TBOX word_box; // for plotting int blobcount, repeated_set; TO_ROW_IT row_it = block->get_rows(); @@ -1770,7 +1628,7 @@ void find_repeated_chars(TO_BLOCK* block, // Block to search. for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); box_it.set_to_list(row->blob_list()); - if (box_it.empty()) continue; // no blobs in this row + if (box_it.empty()) continue; // no blobs in this row if (!row->rep_chars_marked()) { mark_repeated_chars(row); } @@ -1810,7 +1668,6 @@ void find_repeated_chars(TO_BLOCK* block, // Block to search. } } - /********************************************************************** * plot_fp_word * @@ -1818,20 +1675,20 @@ void find_repeated_chars(TO_BLOCK* block, // Block to search. **********************************************************************/ #ifndef GRAPHICS_DISABLED -void plot_fp_word( //draw block of words - TO_BLOCK *block, //block to draw - float pitch, //pitch to draw with - float nonspace //for space threshold - ) { - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - row->min_space = (int32_t) ((pitch + nonspace) / 2); +void plot_fp_word( // draw block of words + TO_BLOCK* block, // block to draw + float pitch, // pitch to draw with + float nonspace // for space threshold +) { + TO_ROW* row; // current row + TO_ROW_IT row_it = block->get_rows(); + + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + row->min_space = (int32_t)((pitch + nonspace) / 2); row->max_nonspace = row->min_space; row->space_threshold = row->min_space; - plot_word_decisions (to_win, (int16_t) pitch, row); + plot_word_decisions(to_win, (int16_t)pitch, row); } } #endif diff --git a/src/textord/topitch.h b/src/textord/topitch.h index 82d70a8876..b7e2d272c9 100644 --- a/src/textord/topitch.h +++ b/src/textord/topitch.h @@ -17,172 +17,164 @@ * **********************************************************************/ -#ifndef TOPITCH_H -#define TOPITCH_H +#ifndef TOPITCH_H +#define TOPITCH_H -#include "blobbox.h" +#include "blobbox.h" namespace tesseract { class Tesseract; } -extern BOOL_VAR_H (textord_debug_pitch_test, FALSE, -"Debug on fixed pitch test"); -extern BOOL_VAR_H (textord_debug_pitch_metric, FALSE, -"Write full metric stuff"); -extern BOOL_VAR_H (textord_show_row_cuts, FALSE, "Draw row-level cuts"); -extern BOOL_VAR_H (textord_show_page_cuts, FALSE, "Draw page-level cuts"); -extern BOOL_VAR_H (textord_pitch_cheat, FALSE, -"Use correct answer for fixed/prop"); -extern BOOL_VAR_H (textord_blockndoc_fixed, TRUE, -"Attempt whole doc/block fixed pitch"); -extern BOOL_VAR_H (textord_fast_pitch_test, FALSE, -"Do even faster pitch algorithm"); -extern double_VAR_H (textord_projection_scale, 0.125, -"Ding rate for mid-cuts"); -extern double_VAR_H (textord_balance_factor, 2.0, -"Ding rate for unbalanced char cells"); +extern BOOL_VAR_H(textord_debug_pitch_test, FALSE, "Debug on fixed pitch test"); +extern BOOL_VAR_H(textord_debug_pitch_metric, FALSE, "Write full metric stuff"); +extern BOOL_VAR_H(textord_show_row_cuts, FALSE, "Draw row-level cuts"); +extern BOOL_VAR_H(textord_show_page_cuts, FALSE, "Draw page-level cuts"); +extern BOOL_VAR_H(textord_pitch_cheat, FALSE, + "Use correct answer for fixed/prop"); +extern BOOL_VAR_H(textord_blockndoc_fixed, TRUE, + "Attempt whole doc/block fixed pitch"); +extern BOOL_VAR_H(textord_fast_pitch_test, FALSE, + "Do even faster pitch algorithm"); +extern double_VAR_H(textord_projection_scale, 0.125, "Ding rate for mid-cuts"); +extern double_VAR_H(textord_balance_factor, 2.0, + "Ding rate for unbalanced char cells"); void compute_fixed_pitch(ICOORD page_tr, // top right TO_BLOCK_LIST* port_blocks, // input list float gradient, // page skew FCOORD rotation, // for drawing - bool testing_on); // correct orientation -void fix_row_pitch( //get some value - TO_ROW *bad_row, //row to fix - TO_BLOCK *bad_block, //block of bad_row - TO_BLOCK_LIST *blocks, //blocks to scan - int32_t row_target, //number of row - int32_t block_target //number of block - ); -void compute_block_pitch(TO_BLOCK* block, // input list + bool testing_on); // correct orientation +void fix_row_pitch( // get some value + TO_ROW* bad_row, // row to fix + TO_BLOCK* bad_block, // block of bad_row + TO_BLOCK_LIST* blocks, // blocks to scan + int32_t row_target, // number of row + int32_t block_target // number of block +); +void compute_block_pitch(TO_BLOCK* block, // input list FCOORD rotation, // for drawing - int32_t block_index, // block number - bool testing_on); // correct orientation -bool compute_rows_pitch( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index, //block number - bool testing_on //correct orientation -); -bool try_doc_fixed( //determine pitch - ICOORD page_tr, //top right - TO_BLOCK_LIST* port_blocks, //input list - float gradient //page skew -); -bool try_block_fixed( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index //block number -); -bool try_rows_fixed( //find line stats - TO_BLOCK* block, //block to do - int32_t block_index, //block number - bool testing_on //correct orientation -); -void print_block_counts( //find line stats - TO_BLOCK *block, //block to do - int32_t block_index //block number - ); -void count_block_votes( //find line stats - TO_BLOCK *block, //block to do - int32_t &def_fixed, //add to counts - int32_t &def_prop, - int32_t &maybe_fixed, - int32_t &maybe_prop, - int32_t &corr_fixed, - int32_t &corr_prop, - int32_t &dunno); -bool row_pitch_stats( //find line stats - TO_ROW* row, //current row - int32_t maxwidth, //of spaces - bool testing_on //correct orientation -); -bool find_row_pitch( //find lines - TO_ROW* row, //row to do - int32_t maxwidth, //max permitted space - int32_t dm_gap, //ignorable gaps - TO_BLOCK* block, //block of row - int32_t block_index, //block_number - int32_t row_index, //number of row - bool testing_on //correct orientation -); -bool fixed_pitch_row( //find lines - TO_ROW* row, //row to do - BLOCK* block, - int32_t block_index //block_number -); -bool count_pitch_stats( //find lines - TO_ROW* row, //row to do - STATS* gap_stats, //blob gaps - STATS* pitch_stats, //centre-centre stats - float initial_pitch, //guess at pitch - float min_space, //estimate space size - bool ignore_outsize, //discard big objects - bool split_outsize, //split big objects - int32_t dm_gap //ignorable gaps -); -float tune_row_pitch( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge of projection - int16_t projection_right, //edge of projection - float space_size, //size of blank - float& initial_pitch, //guess at pitch - float& best_sp_sd, //space sd - int16_t& best_mid_cuts, //no of cheap cuts - ICOORDELT_LIST* best_cells, //row cells - bool testing_on //inidividual words -); -float tune_row_pitch2( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge of projection - int16_t projection_right, //edge of projection - float space_size, //size of blank - float& initial_pitch, //guess at pitch - float& best_sp_sd, //space sd - int16_t& best_mid_cuts, //no of cheap cuts - ICOORDELT_LIST* best_cells, //row cells - bool testing_on //inidividual words -); -float compute_pitch_sd( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge - int16_t projection_right, //edge - float space_size, //size of blank - float initial_pitch, //guess at pitch - float& sp_sd, //space sd - int16_t& mid_cuts, //no of free cuts - ICOORDELT_LIST* row_cells, //list of chop pts - bool testing_on, //inidividual words - int16_t start = 0, //start of good range - int16_t end = 0 //end of good range -); -float compute_pitch_sd2( //find fp cells - TO_ROW* row, //row to do - STATS* projection, //vertical projection - int16_t projection_left, //edge - int16_t projection_right, //edge - float initial_pitch, //guess at pitch - int16_t& occupation, //no of occupied cells - int16_t& mid_cuts, //no of free cuts - ICOORDELT_LIST* row_cells, //list of chop pts - bool testing_on, //inidividual words - int16_t start = 0, //start of good range - int16_t end = 0 //end of good range -); -void print_pitch_sd( //find fp cells - TO_ROW *row, //row to do - STATS *projection, //vertical projection - int16_t projection_left, //edges //size of blank - int16_t projection_right, - float space_size, - float initial_pitch //guess at pitch - ); -void find_repeated_chars(TO_BLOCK* block, // Block to search. + int32_t block_index, // block number + bool testing_on); // correct orientation +bool compute_rows_pitch( // find line stats + TO_BLOCK* block, // block to do + int32_t block_index, // block number + bool testing_on // correct orientation +); +bool try_doc_fixed( // determine pitch + ICOORD page_tr, // top right + TO_BLOCK_LIST* port_blocks, // input list + float gradient // page skew +); +bool try_block_fixed( // find line stats + TO_BLOCK* block, // block to do + int32_t block_index // block number +); +bool try_rows_fixed( // find line stats + TO_BLOCK* block, // block to do + int32_t block_index, // block number + bool testing_on // correct orientation +); +void print_block_counts( // find line stats + TO_BLOCK* block, // block to do + int32_t block_index // block number +); +void count_block_votes( // find line stats + TO_BLOCK* block, // block to do + int32_t& def_fixed, // add to counts + int32_t& def_prop, int32_t& maybe_fixed, int32_t& maybe_prop, + int32_t& corr_fixed, int32_t& corr_prop, int32_t& dunno); +bool row_pitch_stats( // find line stats + TO_ROW* row, // current row + int32_t maxwidth, // of spaces + bool testing_on // correct orientation +); +bool find_row_pitch( // find lines + TO_ROW* row, // row to do + int32_t maxwidth, // max permitted space + int32_t dm_gap, // ignorable gaps + TO_BLOCK* block, // block of row + int32_t block_index, // block_number + int32_t row_index, // number of row + bool testing_on // correct orientation +); +bool fixed_pitch_row( // find lines + TO_ROW* row, // row to do + BLOCK* block, + int32_t block_index // block_number +); +bool count_pitch_stats( // find lines + TO_ROW* row, // row to do + STATS* gap_stats, // blob gaps + STATS* pitch_stats, // centre-centre stats + float initial_pitch, // guess at pitch + float min_space, // estimate space size + bool ignore_outsize, // discard big objects + bool split_outsize, // split big objects + int32_t dm_gap // ignorable gaps +); +float tune_row_pitch( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edge of projection + int16_t projection_right, // edge of projection + float space_size, // size of blank + float& initial_pitch, // guess at pitch + float& best_sp_sd, // space sd + int16_t& best_mid_cuts, // no of cheap cuts + ICOORDELT_LIST* best_cells, // row cells + bool testing_on // inidividual words +); +float tune_row_pitch2( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edge of projection + int16_t projection_right, // edge of projection + float space_size, // size of blank + float& initial_pitch, // guess at pitch + float& best_sp_sd, // space sd + int16_t& best_mid_cuts, // no of cheap cuts + ICOORDELT_LIST* best_cells, // row cells + bool testing_on // inidividual words +); +float compute_pitch_sd( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edge + int16_t projection_right, // edge + float space_size, // size of blank + float initial_pitch, // guess at pitch + float& sp_sd, // space sd + int16_t& mid_cuts, // no of free cuts + ICOORDELT_LIST* row_cells, // list of chop pts + bool testing_on, // inidividual words + int16_t start = 0, // start of good range + int16_t end = 0 // end of good range +); +float compute_pitch_sd2( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edge + int16_t projection_right, // edge + float initial_pitch, // guess at pitch + int16_t& occupation, // no of occupied cells + int16_t& mid_cuts, // no of free cuts + ICOORDELT_LIST* row_cells, // list of chop pts + bool testing_on, // inidividual words + int16_t start = 0, // start of good range + int16_t end = 0 // end of good range +); +void print_pitch_sd( // find fp cells + TO_ROW* row, // row to do + STATS* projection, // vertical projection + int16_t projection_left, // edges //size of blank + int16_t projection_right, float space_size, + float initial_pitch // guess at pitch +); +void find_repeated_chars(TO_BLOCK* block, // Block to search. bool testing_on); // Debug mode. -void plot_fp_word( //draw block of words - TO_BLOCK *block, //block to draw - float pitch, //pitch to draw with - float nonspace //for space threshold - ); +void plot_fp_word( // draw block of words + TO_BLOCK* block, // block to draw + float pitch, // pitch to draw with + float nonspace // for space threshold +); #endif diff --git a/src/textord/tordmain.cpp b/src/textord/tordmain.cpp index 7fd428a38c..4bb2881b48 100644 --- a/src/textord/tordmain.cpp +++ b/src/textord/tordmain.cpp @@ -24,24 +24,24 @@ #ifdef __UNIX__ #include #endif -#include "stderr.h" -#include "globaloc.h" -#include "blread.h" #include "blobbox.h" +#include "blread.h" #include "ccstruct.h" -#include "edgblob.h" #include "drawtord.h" +#include "edgblob.h" +#include "globaloc.h" #include "makerow.h" -#include "wordseg.h" +#include "stderr.h" #include "textord.h" #include "tordmain.h" +#include "wordseg.h" #include "allheaders.h" #undef EXTERN #define EXTERN -#define MAX_NEAREST_DIST 600 //for block skew stats +#define MAX_NEAREST_DIST 600 // for block skew stats namespace tesseract { @@ -58,8 +58,8 @@ void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { const TBOX& box = blob->bounding_box(); int width = box.width(); int height = box.height(); - Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(), - width, height); + Box* blob_pix_box = + boxCreate(box.left(), pix_height - box.top(), width, height); Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr); boxDestroy(&blob_pix_box); Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG); @@ -70,7 +70,7 @@ void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { // Horizontal width of stroke. STATS h_stats(0, width + 1); for (int y = 0; y < height; ++y) { - uint32_t* pixels = data + y*wpl; + uint32_t* pixels = data + y * wpl; int prev_pixel = 0; int pixel = GET_DATA_BYTE(pixels, 0); for (int x = 1; x < width; ++x) { @@ -84,7 +84,7 @@ void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { // Single local max, so an odd width. h_stats.add(pixel * 2 - 1, 1); } else if (pixel == next_pixel && x + 1 < width && - pixel > GET_DATA_BYTE(pixels, x + 1)) { + pixel > GET_DATA_BYTE(pixels, x + 1)) { // Double local max, so an even width. h_stats.add(pixel * 2, 1); } @@ -99,7 +99,7 @@ void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { int prev_pixel = 0; int pixel = GET_DATA_BYTE(data, x); for (int y = 1; y < height; ++y) { - uint32_t* pixels = data + y*wpl; + uint32_t* pixels = data + y * wpl; int next_pixel = GET_DATA_BYTE(pixels, x); // We are looking for a pixel that is equal to its horizontal neighbours, // yet greater than its upper neighbour. @@ -110,7 +110,7 @@ void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { // Single local max, so an odd width. v_stats.add(pixel * 2 - 1, 1); } else if (pixel == next_pixel && y + 1 < height && - pixel > GET_DATA_BYTE(pixels + wpl, x)) { + pixel > GET_DATA_BYTE(pixels + wpl, x)) { // Double local max, so an even width. v_stats.add(pixel * 2, 1); } @@ -152,17 +152,17 @@ void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { **********************************************************************/ void assign_blobs_to_blocks2(Pix* pix, - BLOCK_LIST *blocks, // blocks to process - TO_BLOCK_LIST *port_blocks) { // output list - BLOCK *block; // current block - BLOBNBOX *newblob; // created blob - C_BLOB *blob; // current blob + BLOCK_LIST* blocks, // blocks to process + TO_BLOCK_LIST* port_blocks) { // output list + BLOCK* block; // current block + BLOBNBOX* newblob; // created blob + C_BLOB* blob; // current blob BLOCK_IT block_it = blocks; - C_BLOB_IT blob_it; // iterator - BLOBNBOX_IT port_box_it; // iterator - // destination iterator + C_BLOB_IT blob_it; // iterator + BLOBNBOX_IT port_box_it; // iterator + // destination iterator TO_BLOCK_IT port_block_it = port_blocks; - TO_BLOCK *port_block; // created block + TO_BLOCK* port_block; // created block for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { block = block_it.data(); @@ -202,8 +202,8 @@ void assign_blobs_to_blocks2(Pix* pix, * grades on different lists in the matching TO_BLOCK in to_blocks. **********************************************************************/ -void Textord::find_components(Pix* pix, BLOCK_LIST *blocks, - TO_BLOCK_LIST *to_blocks) { +void Textord::find_components(Pix* pix, BLOCK_LIST* blocks, + TO_BLOCK_LIST* to_blocks) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); if (width > INT16_MAX || height > INT16_MAX) { @@ -213,11 +213,11 @@ void Textord::find_components(Pix* pix, BLOCK_LIST *blocks, set_global_loc_code(LOC_EDGE_PROG); - BLOCK_IT block_it(blocks); // iterator - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { + BLOCK_IT block_it(blocks); // iterator + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { BLOCK* block = block_it.data(); - if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) { + if (block->pdblk.poly_block() == nullptr || + block->pdblk.poly_block()->IsText()) { extract_edges(pix, block); } } @@ -235,46 +235,41 @@ void Textord::find_components(Pix* pix, BLOCK_LIST *blocks, void Textord::filter_blobs(ICOORD page_tr, // top right TO_BLOCK_LIST* blocks, // output list - bool testing_on) { // for plotting - TO_BLOCK_IT block_it = blocks; // destination iterator - TO_BLOCK *block; // created block + bool testing_on) { // for plotting + TO_BLOCK_IT block_it = blocks; // destination iterator + TO_BLOCK* block; // created block - #ifndef GRAPHICS_DISABLED - if (to_win != nullptr) - to_win->Clear(); - #endif // GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED + if (to_win != nullptr) to_win->Clear(); +#endif // GRAPHICS_DISABLED - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { block = block_it.data(); - block->line_size = filter_noise_blobs(&block->blobs, - &block->noise_blobs, - &block->small_blobs, - &block->large_blobs); + block->line_size = + filter_noise_blobs(&block->blobs, &block->noise_blobs, + &block->small_blobs, &block->large_blobs); if (block->line_size == 0) block->line_size = 1; block->line_spacing = block->line_size * - (tesseract::CCStruct::kDescenderFraction + - tesseract::CCStruct::kXHeightFraction + - 2 * tesseract::CCStruct::kAscenderFraction) / - tesseract::CCStruct::kXHeightFraction; + (tesseract::CCStruct::kDescenderFraction + + tesseract::CCStruct::kXHeightFraction + + 2 * tesseract::CCStruct::kAscenderFraction) / + tesseract::CCStruct::kXHeightFraction; block->line_size *= textord_min_linesize; block->max_blob_size = block->line_size * textord_excess_blobsize; - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (textord_show_blobs && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); + if (to_win == nullptr) create_to_win(page_tr); block->plot_graded_blobs(to_win); } if (textord_show_boxes && testing_on) { - if (to_win == nullptr) - create_to_win(page_tr); + if (to_win == nullptr) create_to_win(page_tr); plot_box_list(to_win, &block->noise_blobs, ScrollView::WHITE); plot_box_list(to_win, &block->small_blobs, ScrollView::WHITE); plot_box_list(to_win, &block->large_blobs, ScrollView::WHITE); plot_box_list(to_win, &block->blobs, ScrollView::WHITE); } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED } } @@ -284,32 +279,32 @@ void Textord::filter_blobs(ICOORD page_tr, // top right * Move small blobs to a separate list. **********************************************************************/ -float Textord::filter_noise_blobs( - BLOBNBOX_LIST *src_list, // original list - BLOBNBOX_LIST *noise_list, // noise list - BLOBNBOX_LIST *small_list, // small blobs - BLOBNBOX_LIST *large_list) { // large blobs - int16_t height; //height of blob - int16_t width; //of blob - BLOBNBOX *blob; //current blob - float initial_x; //first guess - BLOBNBOX_IT src_it = src_list; //iterators +float Textord::filter_noise_blobs(BLOBNBOX_LIST* src_list, // original list + BLOBNBOX_LIST* noise_list, // noise list + BLOBNBOX_LIST* small_list, // small blobs + BLOBNBOX_LIST* large_list) { // large blobs + int16_t height; // height of blob + int16_t width; // of blob + BLOBNBOX* blob; // current blob + float initial_x; // first guess + BLOBNBOX_IT src_it = src_list; // iterators BLOBNBOX_IT noise_it = noise_list; BLOBNBOX_IT small_it = small_list; BLOBNBOX_IT large_it = large_list; - STATS size_stats (0, MAX_NEAREST_DIST); - //blob heights - float min_y; //size limits + STATS size_stats(0, MAX_NEAREST_DIST); + // blob heights + float min_y; // size limits float max_y; float max_x; - float max_height; //of good blobs + float max_height; // of good blobs for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { blob = src_it.data(); if (blob->bounding_box().height() < textord_max_noise_size) noise_it.add_after_then_move(src_it.extract()); - else if (blob->enclosed_area() >= blob->bounding_box().height() - * blob->bounding_box().width() * textord_noise_area_ratio) + else if (blob->enclosed_area() >= blob->bounding_box().height() * + blob->bounding_box().width() * + textord_noise_area_ratio) small_it.add_after_then_move(src_it.extract()); } for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { @@ -321,34 +316,32 @@ float Textord::filter_noise_blobs( tesseract::CCStruct::kXHeightFraction + 2 * tesseract::CCStruct::kAscenderFraction) / tesseract::CCStruct::kXHeightFraction); - min_y = floor (initial_x / 2); - max_x = ceil (initial_x * textord_width_limit); - small_it.move_to_first (); - for (small_it.mark_cycle_pt (); !small_it.cycled_list (); - small_it.forward ()) { + min_y = floor(initial_x / 2); + max_x = ceil(initial_x * textord_width_limit); + small_it.move_to_first(); + for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { height = small_it.data()->bounding_box().height(); if (height > max_y) - large_it.add_after_then_move(small_it.extract ()); + large_it.add_after_then_move(small_it.extract()); else if (height >= min_y) - src_it.add_after_then_move(small_it.extract ()); + src_it.add_after_then_move(small_it.extract()); } - size_stats.clear (); - for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { - height = src_it.data ()->bounding_box ().height (); - width = src_it.data ()->bounding_box ().width (); + size_stats.clear(); + for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { + height = src_it.data()->bounding_box().height(); + width = src_it.data()->bounding_box().width(); if (height < min_y) - small_it.add_after_then_move (src_it.extract ()); + small_it.add_after_then_move(src_it.extract()); else if (height > max_y || width > max_x) - large_it.add_after_then_move (src_it.extract ()); + large_it.add_after_then_move(src_it.extract()); else - size_stats.add (height, 1); + size_stats.add(height, 1); } - max_height = size_stats.ile (textord_initialasc_ile); + max_height = size_stats.ile(textord_initialasc_ile); // tprintf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,", // max_y,min_y,initial_x,max_height); max_height *= tesseract::CCStruct::kXHeightCapRatio; - if (max_height > initial_x) - initial_x = max_height; + if (max_height > initial_x) initial_x = max_height; // tprintf(" ret=%g\n",initial_x); return initial_x; } @@ -402,17 +395,17 @@ void Textord::cleanup_nontext_block(BLOCK* block) { **********************************************************************/ void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) { - BLOCK_IT block_it = blocks; //iterator - ROW_IT row_it; //row iterator + BLOCK_IT block_it = blocks; // iterator + ROW_IT row_it; // row iterator int num_rows = 0; int num_rows_all = 0; int num_blocks = 0; int num_blocks_all = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { BLOCK* block = block_it.data(); - if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { + if (block->pdblk.poly_block() != nullptr && + !block->pdblk.poly_block()->IsText()) { cleanup_nontext_block(block); continue; } @@ -429,8 +422,7 @@ void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) { row->word_list()->empty()) { delete row_it.extract(); // lose empty row. } else { - if (textord_noise_rejwords) - clean_noise_from_words(row_it.data()); + if (textord_noise_rejwords) clean_noise_from_words(row_it.data()); if (textord_blshift_maxshift >= 0) tweak_row_baseline(row, textord_blshift_maxshift, textord_blshift_xfraction); @@ -451,106 +443,96 @@ void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) { tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all); } - /********************************************************************** * clean_noise_from_row * * Move blobs of words from rows of garbage into the reject blobs list. **********************************************************************/ -bool Textord::clean_noise_from_row( //remove empties - ROW* row //row to clean +bool Textord::clean_noise_from_row( // remove empties + ROW* row // row to clean ) { bool testing_on; - TBOX blob_box; //bounding box - C_BLOB *blob; //current blob - C_OUTLINE *outline; //current outline - WERD *word; //current word - int32_t blob_size; //biggest size - int32_t trans_count = 0; //no of transitions - int32_t trans_threshold; //noise tolerance - int32_t dot_count; //small objects - int32_t norm_count; //normal objects - int32_t super_norm_count; //real char-like - //words of row - WERD_IT word_it = row->word_list (); - C_BLOB_IT blob_it; //blob iterator - C_OUTLINE_IT out_it; //outline iterator - - testing_on = textord_test_y > row->base_line (textord_test_x) - && textord_show_blobs - && textord_test_y < row->base_line (textord_test_x) + row->x_height (); + TBOX blob_box; // bounding box + C_BLOB* blob; // current blob + C_OUTLINE* outline; // current outline + WERD* word; // current word + int32_t blob_size; // biggest size + int32_t trans_count = 0; // no of transitions + int32_t trans_threshold; // noise tolerance + int32_t dot_count; // small objects + int32_t norm_count; // normal objects + int32_t super_norm_count; // real char-like + // words of row + WERD_IT word_it = row->word_list(); + C_BLOB_IT blob_it; // blob iterator + C_OUTLINE_IT out_it; // outline iterator + + testing_on = + textord_test_y > row->base_line(textord_test_x) && textord_show_blobs && + textord_test_y < row->base_line(textord_test_x) + row->x_height(); dot_count = 0; norm_count = 0; super_norm_count = 0; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); //current word - //blobs in word - blob_it.set_to_list (word->cblob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - if (!word->flag (W_DONT_CHOP)) { - //get outlines - out_it.set_to_list (blob->out_list ()); - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); - out_it.forward ()) { - outline = out_it.data (); - blob_box = outline->bounding_box (); - blob_size = - blob_box.width () > - blob_box.height ()? blob_box.width () : blob_box. - height(); - if (blob_size < textord_noise_sizelimit * row->x_height ()) - dot_count++; //count smal outlines - if (!outline->child ()->empty () - && blob_box.height () < - (1 + textord_noise_syfract) * row->x_height () - && blob_box.height () > - (1 - textord_noise_syfract) * row->x_height () - && blob_box.width () < - (1 + textord_noise_sxfract) * row->x_height () - && blob_box.width () > - (1 - textord_noise_sxfract) * row->x_height ()) - super_norm_count++; //count smal outlines + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + word = word_it.data(); // current word + // blobs in word + blob_it.set_to_list(word->cblob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + if (!word->flag(W_DONT_CHOP)) { + // get outlines + out_it.set_to_list(blob->out_list()); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.data(); + blob_box = outline->bounding_box(); + blob_size = blob_box.width() > blob_box.height() ? blob_box.width() + : blob_box.height(); + if (blob_size < textord_noise_sizelimit * row->x_height()) + dot_count++; // count smal outlines + if (!outline->child()->empty() && + blob_box.height() < + (1 + textord_noise_syfract) * row->x_height() && + blob_box.height() > + (1 - textord_noise_syfract) * row->x_height() && + blob_box.width() < + (1 + textord_noise_sxfract) * row->x_height() && + blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) + super_norm_count++; // count smal outlines } - } - else + } else super_norm_count++; - blob_box = blob->bounding_box (); - blob_size = - blob_box.width () > - blob_box.height ()? blob_box.width () : blob_box.height (); - if (blob_size >= textord_noise_sizelimit * row->x_height () - && blob_size < row->x_height () * 2) { + blob_box = blob->bounding_box(); + blob_size = blob_box.width() > blob_box.height() ? blob_box.width() + : blob_box.height(); + if (blob_size >= textord_noise_sizelimit * row->x_height() && + blob_size < row->x_height() * 2) { trans_threshold = blob_size / textord_noise_sizefraction; - trans_count = blob->count_transitions (trans_threshold); - if (trans_count < textord_noise_translimit) - norm_count++; - } - else if (blob_box.height () > row->x_height () * 2 - && (!word_it.at_first () || !blob_it.at_first ())) + trans_count = blob->count_transitions(trans_threshold); + if (trans_count < textord_noise_translimit) norm_count++; + } else if (blob_box.height() > row->x_height() * 2 && + (!word_it.at_first() || !blob_it.at_first())) dot_count += 2; if (testing_on) { - tprintf - ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n", - blob_box.left (), blob_box.bottom (), blob_box.right (), - blob_box.top (), blob->out_list ()->length (), trans_count, - blob_box.bottom () - row->base_line (blob_box.left ())); + tprintf("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n", + blob_box.left(), blob_box.bottom(), blob_box.right(), + blob_box.top(), blob->out_list()->length(), trans_count, + blob_box.bottom() - row->base_line(blob_box.left())); } } } if (textord_noise_debug) { - tprintf ("Row ending at (%d,%g):", - blob_box.right (), row->base_line (blob_box.right ())); - tprintf (" R=%g, dc=%d, nc=%d, %s\n", - norm_count > 0 ? (float) dot_count / norm_count : 9999, - dot_count, norm_count, - dot_count > norm_count * textord_noise_normratio - && dot_count > 2 ? "REJECTED" : "ACCEPTED"); + tprintf("Row ending at (%d,%g):", blob_box.right(), + row->base_line(blob_box.right())); + tprintf(" R=%g, dc=%d, nc=%d, %s\n", + norm_count > 0 ? (float)dot_count / norm_count : 9999, dot_count, + norm_count, + dot_count > norm_count * textord_noise_normratio && dot_count > 2 + ? "REJECTED" + : "ACCEPTED"); } - return super_norm_count < textord_noise_sncount - && dot_count > norm_count * textord_noise_rowratio && dot_count > 2; + return super_norm_count < textord_noise_sncount && + dot_count > norm_count * textord_noise_rowratio && dot_count > 2; } /********************************************************************** @@ -559,83 +541,73 @@ bool Textord::clean_noise_from_row( //remove empties * Move blobs of words from rows of garbage into the reject blobs list. **********************************************************************/ -void Textord::clean_noise_from_words( //remove empties - ROW *row //row to clean - ) { - TBOX blob_box; //bounding box - int8_t *word_dud; //was it chucked - C_BLOB *blob; //current blob - C_OUTLINE *outline; //current outline - WERD *word; //current word - int32_t blob_size; //biggest size - int32_t trans_count; //no of transitions - int32_t trans_threshold; //noise tolerance - int32_t dot_count; //small objects - int32_t norm_count; //normal objects - int32_t dud_words; //number discarded - int32_t ok_words; //number remaining - int32_t word_index; //current word - //words of row - WERD_IT word_it = row->word_list (); - C_BLOB_IT blob_it; //blob iterator - C_OUTLINE_IT out_it; //outline iterator - - ok_words = word_it.length (); - if (ok_words == 0 || textord_no_rejects) - return; - word_dud = (int8_t *) alloc_mem (ok_words * sizeof (int8_t)); +void Textord::clean_noise_from_words( // remove empties + ROW* row // row to clean +) { + TBOX blob_box; // bounding box + int8_t* word_dud; // was it chucked + C_BLOB* blob; // current blob + C_OUTLINE* outline; // current outline + WERD* word; // current word + int32_t blob_size; // biggest size + int32_t trans_count; // no of transitions + int32_t trans_threshold; // noise tolerance + int32_t dot_count; // small objects + int32_t norm_count; // normal objects + int32_t dud_words; // number discarded + int32_t ok_words; // number remaining + int32_t word_index; // current word + // words of row + WERD_IT word_it = row->word_list(); + C_BLOB_IT blob_it; // blob iterator + C_OUTLINE_IT out_it; // outline iterator + + ok_words = word_it.length(); + if (ok_words == 0 || textord_no_rejects) return; + word_dud = (int8_t*)alloc_mem(ok_words * sizeof(int8_t)); dud_words = 0; ok_words = 0; word_index = 0; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); //current word + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + word = word_it.data(); // current word dot_count = 0; norm_count = 0; - //blobs in word - blob_it.set_to_list (word->cblob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - if (!word->flag (W_DONT_CHOP)) { - //get outlines - out_it.set_to_list (blob->out_list ()); - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); - out_it.forward ()) { - outline = out_it.data (); - blob_box = outline->bounding_box (); - blob_size = - blob_box.width () > - blob_box.height ()? blob_box.width () : blob_box. - height(); - if (blob_size < textord_noise_sizelimit * row->x_height ()) - dot_count++; //count smal outlines - if (!outline->child ()->empty () - && blob_box.height () < - (1 + textord_noise_syfract) * row->x_height () - && blob_box.height () > - (1 - textord_noise_syfract) * row->x_height () - && blob_box.width () < - (1 + textord_noise_sxfract) * row->x_height () - && blob_box.width () > - (1 - textord_noise_sxfract) * row->x_height ()) - norm_count++; //count smal outlines + // blobs in word + blob_it.set_to_list(word->cblob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + if (!word->flag(W_DONT_CHOP)) { + // get outlines + out_it.set_to_list(blob->out_list()); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.data(); + blob_box = outline->bounding_box(); + blob_size = blob_box.width() > blob_box.height() ? blob_box.width() + : blob_box.height(); + if (blob_size < textord_noise_sizelimit * row->x_height()) + dot_count++; // count smal outlines + if (!outline->child()->empty() && + blob_box.height() < + (1 + textord_noise_syfract) * row->x_height() && + blob_box.height() > + (1 - textord_noise_syfract) * row->x_height() && + blob_box.width() < + (1 + textord_noise_sxfract) * row->x_height() && + blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) + norm_count++; // count smal outlines } - } - else + } else norm_count++; - blob_box = blob->bounding_box (); - blob_size = - blob_box.width () > - blob_box.height ()? blob_box.width () : blob_box.height (); - if (blob_size >= textord_noise_sizelimit * row->x_height () - && blob_size < row->x_height () * 2) { + blob_box = blob->bounding_box(); + blob_size = blob_box.width() > blob_box.height() ? blob_box.width() + : blob_box.height(); + if (blob_size >= textord_noise_sizelimit * row->x_height() && + blob_size < row->x_height() * 2) { trans_threshold = blob_size / textord_noise_sizefraction; - trans_count = blob->count_transitions (trans_threshold); - if (trans_count < textord_noise_translimit) - norm_count++; - } - else if (blob_box.height () > row->x_height () * 2 - && (!word_it.at_first () || !blob_it.at_first ())) + trans_count = blob->count_transitions(trans_threshold); + if (trans_count < textord_noise_translimit) norm_count++; + } else if (blob_box.height() > row->x_height() * 2 && + (!word_it.at_first() || !blob_it.at_first())) dot_count += 2; } if (dot_count > 2 && !word->flag(W_REP_CHAR)) { @@ -656,9 +628,9 @@ void Textord::clean_noise_from_words( //remove empties } word_index = 0; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - if (word_dud[word_index] == 2 - || (word_dud[word_index] == 1 && dud_words > ok_words)) { + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + if (word_dud[word_index] == 2 || + (word_dud[word_index] == 1 && dud_words > ok_words)) { word = word_it.data(); // Current word. // Previously we threw away the entire word. // Now just aggressively throw all small blobs into the reject list, where @@ -672,12 +644,12 @@ void Textord::clean_noise_from_words( //remove empties // Remove outlines that are a tiny fraction in either width or height // of the word height. -void Textord::clean_small_noise_from_words(ROW *row) { +void Textord::clean_small_noise_from_words(ROW* row) { WERD_IT word_it(row->word_list()); for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { WERD* word = word_it.data(); int min_size = static_cast( - textord_noise_hfract * word->bounding_box().height() + 0.5); + textord_noise_hfract * word->bounding_box().height() + 0.5); C_BLOB_IT blob_it(word->cblob_list()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* blob = blob_it.data(); @@ -738,7 +710,8 @@ void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs, BLOCK_IT bk_it(blocks); for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) { BLOCK* block = bk_it.data(); - if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { + if (block->pdblk.poly_block() != nullptr && + !block->pdblk.poly_block()->IsText()) { continue; } // Linear search of the groups to find a matching rotation. @@ -870,7 +843,7 @@ void Textord::TransferDiacriticsToWords(BLOBNBOX_LIST* diacritic_blobs, } } -} // tesseract +} // namespace tesseract /********************************************************************** * tweak_row_baseline @@ -879,98 +852,86 @@ void Textord::TransferDiacriticsToWords(BLOBNBOX_LIST* diacritic_blobs, * close enough. **********************************************************************/ -void tweak_row_baseline(ROW *row, - double blshift_maxshift, +void tweak_row_baseline(ROW* row, double blshift_maxshift, double blshift_xfraction) { - TBOX blob_box; //bounding box - C_BLOB *blob; //current blob - WERD *word; //current word - int32_t blob_count; //no of blobs - int32_t src_index; //source segment - int32_t dest_index; //destination segment - int32_t *xstarts; //spline segments - double *coeffs; //spline coeffs - float ydiff; //baseline error - float x_centre; //centre of blob - //words of row - WERD_IT word_it = row->word_list (); - C_BLOB_IT blob_it; //blob iterator + TBOX blob_box; // bounding box + C_BLOB* blob; // current blob + WERD* word; // current word + int32_t blob_count; // no of blobs + int32_t src_index; // source segment + int32_t dest_index; // destination segment + int32_t* xstarts; // spline segments + double* coeffs; // spline coeffs + float ydiff; // baseline error + float x_centre; // centre of blob + // words of row + WERD_IT word_it = row->word_list(); + C_BLOB_IT blob_it; // blob iterator blob_count = 0; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); //current word - //get total blobs - blob_count += word->cblob_list ()->length (); + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + word = word_it.data(); // current word + // get total blobs + blob_count += word->cblob_list()->length(); } - if (blob_count == 0) - return; - xstarts = - (int32_t *) alloc_mem ((blob_count + row->baseline.segments + 1) * - sizeof (int32_t)); - coeffs = - (double *) alloc_mem ((blob_count + row->baseline.segments) * 3 * - sizeof (double)); + if (blob_count == 0) return; + xstarts = (int32_t*)alloc_mem((blob_count + row->baseline.segments + 1) * + sizeof(int32_t)); + coeffs = (double*)alloc_mem((blob_count + row->baseline.segments) * 3 * + sizeof(double)); src_index = 0; dest_index = 0; xstarts[0] = row->baseline.xcoords[0]; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); //current word - //blobs in word - blob_it.set_to_list (word->cblob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - x_centre = (blob_box.left () + blob_box.right ()) / 2.0; - ydiff = blob_box.bottom () - row->base_line (x_centre); + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + word = word_it.data(); // current word + // blobs in word + blob_it.set_to_list(word->cblob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + x_centre = (blob_box.left() + blob_box.right()) / 2.0; + ydiff = blob_box.bottom() - row->base_line(x_centre); if (ydiff < 0) - ydiff = -ydiff / row->x_height (); + ydiff = -ydiff / row->x_height(); else - ydiff = ydiff / row->x_height (); - if (ydiff < blshift_maxshift - && blob_box.height () / row->x_height () > blshift_xfraction) { + ydiff = ydiff / row->x_height(); + if (ydiff < blshift_maxshift && + blob_box.height() / row->x_height() > blshift_xfraction) { if (xstarts[dest_index] >= x_centre) - xstarts[dest_index] = blob_box.left (); + xstarts[dest_index] = blob_box.left(); coeffs[dest_index * 3] = 0; coeffs[dest_index * 3 + 1] = 0; - coeffs[dest_index * 3 + 2] = blob_box.bottom (); - //shift it + coeffs[dest_index * 3 + 2] = blob_box.bottom(); + // shift it dest_index++; - xstarts[dest_index] = blob_box.right () + 1; - } - else { + xstarts[dest_index] = blob_box.right() + 1; + } else { if (xstarts[dest_index] <= x_centre) { - while (row->baseline.xcoords[src_index + 1] <= x_centre - && src_index < row->baseline.segments - 1) { - if (row->baseline.xcoords[src_index + 1] > - xstarts[dest_index]) { - coeffs[dest_index * 3] = - row->baseline.quadratics[src_index].a; + while (row->baseline.xcoords[src_index + 1] <= x_centre && + src_index < row->baseline.segments - 1) { + if (row->baseline.xcoords[src_index + 1] > xstarts[dest_index]) { + coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a; coeffs[dest_index * 3 + 1] = - row->baseline.quadratics[src_index].b; + row->baseline.quadratics[src_index].b; coeffs[dest_index * 3 + 2] = - row->baseline.quadratics[src_index].c; + row->baseline.quadratics[src_index].c; dest_index++; - xstarts[dest_index] = - row->baseline.xcoords[src_index + 1]; + xstarts[dest_index] = row->baseline.xcoords[src_index + 1]; } src_index++; } - coeffs[dest_index * 3] = - row->baseline.quadratics[src_index].a; - coeffs[dest_index * 3 + 1] = - row->baseline.quadratics[src_index].b; - coeffs[dest_index * 3 + 2] = - row->baseline.quadratics[src_index].c; + coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a; + coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b; + coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c; dest_index++; xstarts[dest_index] = row->baseline.xcoords[src_index + 1]; } } } } - while (src_index < row->baseline.segments - && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) + while (src_index < row->baseline.segments && + row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) src_index++; while (src_index < row->baseline.segments) { coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a; @@ -980,8 +941,8 @@ void tweak_row_baseline(ROW *row, src_index++; xstarts[dest_index] = row->baseline.xcoords[src_index]; } - //turn to spline - row->baseline = QSPLINE (dest_index, xstarts, coeffs); + // turn to spline + row->baseline = QSPLINE(dest_index, xstarts, coeffs); free_mem(xstarts); free_mem(coeffs); } diff --git a/src/textord/tordmain.h b/src/textord/tordmain.h index cb5a6a1ef2..40ce28f404 100644 --- a/src/textord/tordmain.h +++ b/src/textord/tordmain.h @@ -17,26 +17,25 @@ * **********************************************************************/ -#ifndef TORDMAIN_H -#define TORDMAIN_H +#ifndef TORDMAIN_H +#define TORDMAIN_H -#include -#include "params.h" -#include "ocrblock.h" -#include "blobs.h" -#include "blobbox.h" +#include +#include "blobbox.h" +#include "blobs.h" +#include "ocrblock.h" +#include "params.h" struct Pix; namespace tesseract { class Tesseract; void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob); -void assign_blobs_to_blocks2(Pix* pix, BLOCK_LIST *blocks, - TO_BLOCK_LIST *port_blocks); +void assign_blobs_to_blocks2(Pix* pix, BLOCK_LIST* blocks, + TO_BLOCK_LIST* port_blocks); } // namespace tesseract -void tweak_row_baseline(ROW *row, - double blshift_maxshift, +void tweak_row_baseline(ROW* row, double blshift_maxshift, double blshift_xfraction); #endif diff --git a/src/textord/tospace.cpp b/src/textord/tospace.cpp index cb266f8820..ec15ecc93a 100644 --- a/src/textord/tospace.cpp +++ b/src/textord/tospace.cpp @@ -38,36 +38,31 @@ #include -#define MAXSPACING 128 /*max expected spacing in pix */ +#define MAXSPACING 128 /*max expected spacing in pix */ namespace tesseract { -void Textord::to_spacing( - ICOORD page_tr, //topright of page - TO_BLOCK_LIST *blocks //blocks on page - ) { - TO_BLOCK_IT block_it; //iterator - TO_BLOCK *block; //current block; - TO_ROW *row; //current row - int block_index; //block number - int row_index; //row number - //estimated width of real spaces for whole block +void Textord::to_spacing(ICOORD page_tr, // topright of page + TO_BLOCK_LIST* blocks // blocks on page +) { + TO_BLOCK_IT block_it; // iterator + TO_BLOCK* block; // current block; + TO_ROW* row; // current row + int block_index; // block number + int row_index; // row number + // estimated width of real spaces for whole block int16_t block_space_gap_width; - //estimated width of non space gaps for whole block + // estimated width of non space gaps for whole block int16_t block_non_space_gap_width; - bool old_text_ord_proportional;//old fixed/prop result - GAPMAP *gapmap = nullptr; //map of big vert gaps in blk + bool old_text_ord_proportional; // old fixed/prop result + GAPMAP* gapmap = nullptr; // map of big vert gaps in blk - block_it.set_to_list (blocks); + block_it.set_to_list(blocks); block_index = 1; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - gapmap = new GAPMAP (block); - block_spacing_stats(block, - gapmap, - old_text_ord_proportional, - block_space_gap_width, - block_non_space_gap_width); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block = block_it.data(); + gapmap = new GAPMAP(block); + block_spacing_stats(block, gapmap, old_text_ord_proportional, + block_space_gap_width, block_non_space_gap_width); // Make sure relative values of block-level space and non-space gap // widths are reasonable. The ratio of 1:3 is also used in // block_spacing_stats, to corrrect the block_space_gap_width @@ -75,36 +70,30 @@ void Textord::to_spacing( // often over-estimated and should not be trusted. A similar ratio // is found in block_spacing_stats. if (tosp_old_to_method && tosp_old_to_constrain_sp_kn && - (float) block_space_gap_width / block_non_space_gap_width < 3.0) { - block_non_space_gap_width = (int16_t) floor (block_space_gap_width / 3.0); + (float)block_space_gap_width / block_non_space_gap_width < 3.0) { + block_non_space_gap_width = (int16_t)floor(block_space_gap_width / 3.0); } // row iterator TO_ROW_IT row_it(block->get_rows()); row_index = 1; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); if ((row->pitch_decision == PITCH_DEF_PROP) || - (row->pitch_decision == PITCH_CORR_PROP)) { + (row->pitch_decision == PITCH_CORR_PROP)) { if ((tosp_debug_level > 0) && !old_text_ord_proportional) - tprintf ("Block %d Row %d: Now Proportional\n", - block_index, row_index); - row_spacing_stats(row, - gapmap, - block_index, - row_index, - block_space_gap_width, - block_non_space_gap_width); - } - else { + tprintf("Block %d Row %d: Now Proportional\n", block_index, + row_index); + row_spacing_stats(row, gapmap, block_index, row_index, + block_space_gap_width, block_non_space_gap_width); + } else { if ((tosp_debug_level > 0) && old_text_ord_proportional) - tprintf - ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n", - block_index, row_index, row->pitch_decision, - row->fixed_pitch); + tprintf("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n", + block_index, row_index, row->pitch_decision, + row->fixed_pitch); } #ifndef GRAPHICS_DISABLED if (textord_show_initial_words) - plot_word_decisions (to_win, (int16_t) row->fixed_pitch, row); + plot_word_decisions(to_win, (int16_t)row->fixed_pitch, row); #endif row_index++; } @@ -113,76 +102,70 @@ void Textord::to_spacing( } } - /************************************************************************* * block_spacing_stats() *************************************************************************/ void Textord::block_spacing_stats( - TO_BLOCK* block, - GAPMAP* gapmap, - bool& old_text_ord_proportional, - int16_t& block_space_gap_width, // resulting estimate - int16_t& block_non_space_gap_width // resulting estimate + TO_BLOCK* block, GAPMAP* gapmap, bool& old_text_ord_proportional, + int16_t& block_space_gap_width, // resulting estimate + int16_t& block_non_space_gap_width // resulting estimate ) { - TO_ROW *row; // current row - BLOBNBOX_IT blob_it; // iterator + TO_ROW* row; // current row + BLOBNBOX_IT blob_it; // iterator - STATS centre_to_centre_stats (0, MAXSPACING); + STATS centre_to_centre_stats(0, MAXSPACING); // DEBUG USE ONLY - STATS all_gap_stats (0, MAXSPACING); - STATS space_gap_stats (0, MAXSPACING); - int16_t minwidth = MAXSPACING; // narrowest blob + STATS all_gap_stats(0, MAXSPACING); + STATS space_gap_stats(0, MAXSPACING); + int16_t minwidth = MAXSPACING; // narrowest blob TBOX blob_box; TBOX prev_blob_box; int16_t centre_to_centre; int16_t gap_width; float real_space_threshold; - float iqr_centre_to_centre; // DEBUG USE ONLY - float iqr_all_gap_stats; // DEBUG USE ONLY + float iqr_centre_to_centre; // DEBUG USE ONLY + float iqr_all_gap_stats; // DEBUG USE ONLY int32_t end_of_row; int32_t row_length; // row iterator TO_ROW_IT row_it(block->get_rows()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row->blob_list ()->empty () && - (!tosp_only_use_prop_rows || - (row->pitch_decision == PITCH_DEF_PROP) || - (row->pitch_decision == PITCH_CORR_PROP))) { - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + if (!row->blob_list()->empty() && + (!tosp_only_use_prop_rows || (row->pitch_decision == PITCH_DEF_PROP) || + (row->pitch_decision == PITCH_CORR_PROP))) { + blob_it.set_to_list(row->blob_list()); + blob_it.mark_cycle_pt(); + end_of_row = blob_it.data_relative(-1)->bounding_box().right(); if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); + blob_box = box_next_pre_chopped(&blob_it); else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); + blob_box = reduced_box_next(row, &blob_it); else - blob_box = box_next (&blob_it); - row_length = end_of_row - blob_box.left (); - if (blob_box.width () < minwidth) - minwidth = blob_box.width (); + blob_box = box_next(&blob_it); + row_length = end_of_row - blob_box.left(); + if (blob_box.width() < minwidth) minwidth = blob_box.width(); prev_blob_box = blob_box; - while (!blob_it.cycled_list ()) { + while (!blob_it.cycled_list()) { if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); + blob_box = box_next_pre_chopped(&blob_it); else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); + blob_box = reduced_box_next(row, &blob_it); else - blob_box = box_next (&blob_it); - if (blob_box.width () < minwidth) - minwidth = blob_box.width (); + blob_box = box_next(&blob_it); + if (blob_box.width() < minwidth) minwidth = blob_box.width(); int16_t left = prev_blob_box.right(); int16_t right = blob_box.left(); gap_width = right - left; if (!ignore_big_gap(row, row_length, gapmap, left, right)) { - all_gap_stats.add (gap_width, 1); + all_gap_stats.add(gap_width, 1); - centre_to_centre = (right + blob_box.right () - - (prev_blob_box.left () + left)) / 2; - //DEBUG - centre_to_centre_stats.add (centre_to_centre, 1); + centre_to_centre = + (right + blob_box.right() - (prev_blob_box.left() + left)) / 2; + // DEBUG + centre_to_centre_stats.add(centre_to_centre, 1); // DEBUG } prev_blob_box = blob_box; @@ -190,20 +173,18 @@ void Textord::block_spacing_stats( } } - //Inadequate samples - if (all_gap_stats.get_total () <= 1) { + // Inadequate samples + if (all_gap_stats.get_total() <= 1) { block_non_space_gap_width = minwidth; - block_space_gap_width = -1; //No est. space width - //DEBUG + block_space_gap_width = -1; // No est. space width + // DEBUG old_text_ord_proportional = true; - } - else { + } else { /* For debug only ..... */ - iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) - - centre_to_centre_stats.ile (0.25); - iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25); - old_text_ord_proportional = - iqr_centre_to_centre * 2 > iqr_all_gap_stats; + iqr_centre_to_centre = + centre_to_centre_stats.ile(0.75) - centre_to_centre_stats.ile(0.25); + iqr_all_gap_stats = all_gap_stats.ile(0.75) - all_gap_stats.ile(0.25); + old_text_ord_proportional = iqr_centre_to_centre * 2 > iqr_all_gap_stats; /* .......For debug only */ /* @@ -215,38 +196,37 @@ void Textord::block_spacing_stats( block. Do this by using a crude threshold to ignore "narrow" gaps, then find the median of the "wide" gaps and use this. */ - block_non_space_gap_width = (int16_t) floor (all_gap_stats.median ()); + block_non_space_gap_width = (int16_t)floor(all_gap_stats.median()); // median gap - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row->blob_list ()->empty () && - (!tosp_only_use_prop_rows || - (row->pitch_decision == PITCH_DEF_PROP) || - (row->pitch_decision == PITCH_CORR_PROP))) { + row_it.set_to_list(block->get_rows()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + if (!row->blob_list()->empty() && + (!tosp_only_use_prop_rows || + (row->pitch_decision == PITCH_DEF_PROP) || + (row->pitch_decision == PITCH_CORR_PROP))) { real_space_threshold = - std::max (tosp_init_guess_kn_mult * block_non_space_gap_width, - tosp_init_guess_xht_mult * row->xheight); - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - end_of_row = - blob_it.data_relative (-1)->bounding_box ().right (); + std::max(tosp_init_guess_kn_mult * block_non_space_gap_width, + tosp_init_guess_xht_mult * row->xheight); + blob_it.set_to_list(row->blob_list()); + blob_it.mark_cycle_pt(); + end_of_row = blob_it.data_relative(-1)->bounding_box().right(); if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); + blob_box = box_next_pre_chopped(&blob_it); else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); + blob_box = reduced_box_next(row, &blob_it); else - blob_box = box_next (&blob_it); - row_length = blob_box.left () - end_of_row; + blob_box = box_next(&blob_it); + row_length = blob_box.left() - end_of_row; prev_blob_box = blob_box; - while (!blob_it.cycled_list ()) { + while (!blob_it.cycled_list()) { if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); + blob_box = box_next_pre_chopped(&blob_it); else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); + blob_box = reduced_box_next(row, &blob_it); else - blob_box = box_next (&blob_it); + blob_box = box_next(&blob_it); int16_t left = prev_blob_box.right(); int16_t right = blob_box.left(); gap_width = right - left; @@ -254,57 +234,49 @@ void Textord::block_spacing_stats( !ignore_big_gap(row, row_length, gapmap, left, right)) { /* If tosp_use_cert_spaces is enabled, the estimate of the space gap is - restricted to obvious spaces - those wider than half the xht or those - with wide blobs on both sides - i.e not things that are suspect 1's or - punctuation that is sometimes widely spaced. + restricted to obvious spaces - those wider than half the xht or + those with wide blobs on both sides - i.e not things that are + suspect 1's or punctuation that is sometimes widely spaced. */ if (!tosp_block_use_cert_spaces || - (gap_width > - tosp_fuzzy_space_factor2 * row->xheight) - || - ((gap_width > - tosp_fuzzy_space_factor1 * row->xheight) - && (!tosp_narrow_blobs_not_cert - || (!narrow_blob (row, prev_blob_box) - && !narrow_blob (row, blob_box)))) - || (wide_blob (row, prev_blob_box) - && wide_blob (row, blob_box))) - space_gap_stats.add (gap_width, 1); + (gap_width > tosp_fuzzy_space_factor2 * row->xheight) || + ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) && + (!tosp_narrow_blobs_not_cert || + (!narrow_blob(row, prev_blob_box) && + !narrow_blob(row, blob_box)))) || + (wide_blob(row, prev_blob_box) && wide_blob(row, blob_box))) + space_gap_stats.add(gap_width, 1); } prev_blob_box = blob_box; } } } - //Inadequate samples - if (space_gap_stats.get_total () <= 2) - block_space_gap_width = -1;//No est. space width + // Inadequate samples + if (space_gap_stats.get_total() <= 2) + block_space_gap_width = -1; // No est. space width else block_space_gap_width = - std::max(static_cast(floor(space_gap_stats.median())), - static_cast(3 * block_non_space_gap_width)); + std::max(static_cast(floor(space_gap_stats.median())), + static_cast(3 * block_non_space_gap_width)); } } - /************************************************************************* * row_spacing_stats() * Set values for min_space, max_non_space based on row stats only * If failure - return 0 values. *************************************************************************/ void Textord::row_spacing_stats( - TO_ROW *row, - GAPMAP *gapmap, - int16_t block_idx, - int16_t row_idx, - int16_t block_space_gap_width, //estimate for block - int16_t block_non_space_gap_width //estimate for block - ) { - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - STATS all_gap_stats (0, MAXSPACING); - STATS cert_space_gap_stats (0, MAXSPACING); - STATS all_space_gap_stats (0, MAXSPACING); - STATS small_gap_stats (0, MAXSPACING); + TO_ROW* row, GAPMAP* gapmap, int16_t block_idx, int16_t row_idx, + int16_t block_space_gap_width, // estimate for block + int16_t block_non_space_gap_width // estimate for block +) { + // iterator + BLOBNBOX_IT blob_it = row->blob_list(); + STATS all_gap_stats(0, MAXSPACING); + STATS cert_space_gap_stats(0, MAXSPACING); + STATS all_space_gap_stats(0, MAXSPACING); + STATS small_gap_stats(0, MAXSPACING); TBOX blob_box; TBOX prev_blob_box; int16_t gap_width; @@ -313,7 +285,7 @@ void Textord::row_spacing_stats( int16_t index; int16_t large_gap_count = 0; bool suspected_table; - int32_t max_max_nonspace; //upper bound + int32_t max_max_nonspace; // upper bound bool good_block_space_estimate = block_space_gap_width > 0; int32_t end_of_row; int32_t row_length = 0; @@ -323,35 +295,35 @@ void Textord::row_spacing_stats( /* Collect first pass stats for row */ if (!good_block_space_estimate) - block_space_gap_width = int16_t (floor (row->xheight / 2)); - if (!row->blob_list ()->empty ()) { + block_space_gap_width = int16_t(floor(row->xheight / 2)); + if (!row->blob_list()->empty()) { if (tosp_threshold_bias1 > 0) real_space_threshold = - block_non_space_gap_width + - int16_t (floor (0.5 + - tosp_threshold_bias1 * (block_space_gap_width - - block_non_space_gap_width))); + block_non_space_gap_width + + int16_t( + floor(0.5 + tosp_threshold_bias1 * (block_space_gap_width - + block_non_space_gap_width))); else - real_space_threshold = //Old TO method - (block_space_gap_width + block_non_space_gap_width) / 2; - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); + real_space_threshold = // Old TO method + (block_space_gap_width + block_non_space_gap_width) / 2; + blob_it.set_to_list(row->blob_list()); + blob_it.mark_cycle_pt(); + end_of_row = blob_it.data_relative(-1)->bounding_box().right(); if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); + blob_box = box_next_pre_chopped(&blob_it); else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); + blob_box = reduced_box_next(row, &blob_it); else - blob_box = box_next (&blob_it); - row_length = end_of_row - blob_box.left (); + blob_box = box_next(&blob_it); + row_length = end_of_row - blob_box.left(); prev_blob_box = blob_box; - while (!blob_it.cycled_list ()) { + while (!blob_it.cycled_list()) { if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); + blob_box = box_next_pre_chopped(&blob_it); else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); + blob_box = reduced_box_next(row, &blob_it); else - blob_box = box_next (&blob_it); + blob_box = box_next(&blob_it); int16_t left = prev_blob_box.right(); int16_t right = blob_box.left(); gap_width = right - left; @@ -360,64 +332,52 @@ void Textord::row_spacing_stats( } else { if (gap_width >= real_space_threshold) { if (!tosp_row_use_cert_spaces || - (gap_width > tosp_fuzzy_space_factor2 * row->xheight) || - ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) - && (!tosp_narrow_blobs_not_cert - || (!narrow_blob (row, prev_blob_box) - && !narrow_blob (row, blob_box)))) - || (wide_blob (row, prev_blob_box) - && wide_blob (row, blob_box))) - cert_space_gap_stats.add (gap_width, 1); - all_space_gap_stats.add (gap_width, 1); - } - else - small_gap_stats.add (gap_width, 1); - all_gap_stats.add (gap_width, 1); + (gap_width > tosp_fuzzy_space_factor2 * row->xheight) || + ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) && + (!tosp_narrow_blobs_not_cert || + (!narrow_blob(row, prev_blob_box) && + !narrow_blob(row, blob_box)))) || + (wide_blob(row, prev_blob_box) && wide_blob(row, blob_box))) + cert_space_gap_stats.add(gap_width, 1); + all_space_gap_stats.add(gap_width, 1); + } else + small_gap_stats.add(gap_width, 1); + all_gap_stats.add(gap_width, 1); } prev_blob_box = blob_box; } } suspected_table = (large_gap_count > 1) || - ((large_gap_count > 0) && - (all_gap_stats.get_total () <= tosp_few_samples)); + ((large_gap_count > 0) && + (all_gap_stats.get_total() <= tosp_few_samples)); /* Now determine row kern size, space size and threshold */ - if ((cert_space_gap_stats.get_total () >= - tosp_enough_space_samples_for_median) || - ((suspected_table || - all_gap_stats.get_total () <= tosp_short_row) && - cert_space_gap_stats.get_total () > 0)) { - old_to_method(row, - &all_gap_stats, - &cert_space_gap_stats, - &small_gap_stats, - block_space_gap_width, - block_non_space_gap_width); + if ((cert_space_gap_stats.get_total() >= + tosp_enough_space_samples_for_median) || + ((suspected_table || all_gap_stats.get_total() <= tosp_short_row) && + cert_space_gap_stats.get_total() > 0)) { + old_to_method(row, &all_gap_stats, &cert_space_gap_stats, &small_gap_stats, + block_space_gap_width, block_non_space_gap_width); } else { if (!tosp_recovery_isolated_row_stats || - !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table, - block_idx, row_idx)) { + !isolated_row_stats(row, gapmap, &all_gap_stats, suspected_table, + block_idx, row_idx)) { if (tosp_row_use_cert_spaces && (tosp_debug_level > 5)) - tprintf ("B:%d R:%d -- Inadequate certain spaces.\n", - block_idx, row_idx); + tprintf("B:%d R:%d -- Inadequate certain spaces.\n", block_idx, + row_idx); if (tosp_row_use_cert_spaces1 && good_block_space_estimate) { - //Use block default + // Use block default row->space_size = block_space_gap_width; - if (all_gap_stats.get_total () > tosp_redo_kern_limit) - row->kern_size = all_gap_stats.median (); + if (all_gap_stats.get_total() > tosp_redo_kern_limit) + row->kern_size = all_gap_stats.median(); else row->kern_size = block_non_space_gap_width; - row->space_threshold = - int32_t (floor ((row->space_size + row->kern_size) / - tosp_old_sp_kn_th_factor)); - } - else - old_to_method(row, - &all_gap_stats, - &all_space_gap_stats, - &small_gap_stats, - block_space_gap_width, + row->space_threshold = int32_t(floor( + (row->space_size + row->kern_size) / tosp_old_sp_kn_th_factor)); + } else + old_to_method(row, &all_gap_stats, &all_space_gap_stats, + &small_gap_stats, block_space_gap_width, block_non_space_gap_width); } } @@ -429,40 +389,38 @@ void Textord::row_spacing_stats( are ignoring big gaps*/ if (tosp_sanity_method == 0) { if (suspected_table && - (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) { + (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) { if (tosp_debug_level > 5) tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx, row_idx, row->kern_size, row->space_threshold, row->space_size); - row->space_threshold = - (int32_t) (tosp_table_kn_sp_ratio * row->kern_size); + row->space_threshold = (int32_t)(tosp_table_kn_sp_ratio * row->kern_size); row->space_size = std::max(row->space_threshold + 1.0f, row->xheight); } - } - else if (tosp_sanity_method == 1) { + } else if (tosp_sanity_method == 1) { sane_space = row->space_size; /* NEVER let space size get too close to kern size */ - if ((row->space_size < tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f)) - || ((row->space_size - row->kern_size) < - (tosp_silly_kn_sp_gap * row->xheight))) { + if ((row->space_size < + tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f)) || + ((row->space_size - row->kern_size) < + (tosp_silly_kn_sp_gap * row->xheight))) { if (good_block_space_estimate && - (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size)) + (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size)) sane_space = block_space_gap_width; else - sane_space = - std::max(static_cast(tosp_min_sane_kn_sp) * std::max(row->kern_size, 2.5f), - row->xheight / 2.0f); + sane_space = std::max(static_cast(tosp_min_sane_kn_sp) * + std::max(row->kern_size, 2.5f), + row->xheight / 2.0f); if (tosp_debug_level > 5) tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n", block_idx, row_idx, row->kern_size, row->space_threshold, row->space_size, sane_space); row->space_size = sane_space; - row->space_threshold = - int32_t (floor ((row->space_size + row->kern_size) / - tosp_old_sp_kn_th_factor)); + row->space_threshold = int32_t( + floor((row->space_size + row->kern_size) / tosp_old_sp_kn_th_factor)); } /* NEVER let threshold get VERY far away from kern */ - sane_threshold = int32_t (floor (tosp_max_sane_kn_thresh * - std::max(row->kern_size, 2.5f))); + sane_threshold = int32_t( + floor(tosp_max_sane_kn_thresh * std::max(row->kern_size, 2.5f))); if (row->space_threshold > sane_threshold) { if (tosp_debug_level > 5) tprintf("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n", @@ -475,18 +433,17 @@ void Textord::row_spacing_stats( /* Beware of tables - there may be NO spaces */ if (suspected_table) { sane_space = std::max(tosp_table_kn_sp_ratio * row->kern_size, - tosp_table_xht_sp_ratio * row->xheight); - sane_threshold = int32_t (floor ((sane_space + row->kern_size) / 2)); + tosp_table_xht_sp_ratio * row->xheight); + sane_threshold = int32_t(floor((sane_space + row->kern_size) / 2)); if ((row->space_size < sane_space) || - (row->space_threshold < sane_threshold)) { + (row->space_threshold < sane_threshold)) { if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n", - block_idx, row_idx, - row->kern_size, - row->space_threshold, row->space_size); - //the minimum sane value - row->space_threshold = (int32_t) sane_space; + tprintf("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n", block_idx, + row_idx, row->kern_size, row->space_threshold, + row->space_size); + // the minimum sane value + row->space_threshold = (int32_t)sane_space; row->space_size = std::max(row->space_threshold + 1.0f, row->xheight); } } @@ -496,16 +453,15 @@ void Textord::row_spacing_stats( if (tosp_old_to_method) { /* Old textord made a space if gap >= threshold */ - //NO FUZZY SPACES YET + // NO FUZZY SPACES YET row->max_nonspace = row->space_threshold; - //NO FUZZY SPACES YET + // NO FUZZY SPACES YET row->min_space = row->space_threshold + 1; - } - else { + } else { /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it:-) */ row->min_space = - std::min(int32_t (ceil (tosp_fuzzy_space_factor * row->xheight)), - int32_t (row->space_size)); + std::min(int32_t(ceil(tosp_fuzzy_space_factor * row->xheight)), + int32_t(row->space_size)); if (row->min_space <= row->space_threshold) // Don't be silly row->min_space = row->space_threshold + 1; @@ -522,15 +478,15 @@ void Textord::row_spacing_stats( below the threshold. */ - max_max_nonspace = int32_t ((row->space_threshold + row->kern_size) / 2); + max_max_nonspace = int32_t((row->space_threshold + row->kern_size) / 2); - //default + // default row->max_nonspace = max_max_nonspace; for (index = 0; index <= max_max_nonspace; index++) { - if (all_gap_stats.pile_count (index) > max) - max = all_gap_stats.pile_count (index); + if (all_gap_stats.pile_count(index) > max) + max = all_gap_stats.pile_count(index); if ((index > row->kern_size) && - (all_gap_stats.pile_count (index) < 0.1 * max)) { + (all_gap_stats.pile_count(index) < 0.1 * max)) { row->max_nonspace = index; break; } @@ -540,13 +496,12 @@ void Textord::row_spacing_stats( /* Yet another algorithm - simpler this time - just choose a fraction of the threshold to space range */ - if ((tosp_fuzzy_sp_fraction > 0) && - (row->space_size > row->space_threshold)) - row->min_space = std::max(row->min_space, - (int32_t) ceil (row->space_threshold + - tosp_fuzzy_sp_fraction * - (row->space_size - - row->space_threshold))); + if ((tosp_fuzzy_sp_fraction > 0) && (row->space_size > row->space_threshold)) + row->min_space = + std::max(row->min_space, + (int32_t)ceil(row->space_threshold + + tosp_fuzzy_sp_fraction * + (row->space_size - row->space_threshold))); /* Ensure that ANY space less than some multiplier times the kern size is fuzzy. In tables there is a risk of erroneously setting a small space size @@ -556,16 +511,15 @@ void Textord::row_spacing_stats( dubious breaks. */ if ((tosp_table_fuzzy_kn_sp_ratio > 0) && - (suspected_table || tosp_fuzzy_limit_all)) - row->min_space = std::max(row->min_space, - (int32_t) ceil (tosp_table_fuzzy_kn_sp_ratio * - row->kern_size)); + (suspected_table || tosp_fuzzy_limit_all)) + row->min_space = + std::max(row->min_space, + (int32_t)ceil(tosp_table_fuzzy_kn_sp_ratio * row->kern_size)); if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold)) { - row->max_nonspace = (int32_t) floor (0.5 + row->kern_size + - tosp_fuzzy_kn_fraction * - (row->space_threshold - - row->kern_size)); + row->max_nonspace = (int32_t)floor( + 0.5 + row->kern_size + + tosp_fuzzy_kn_fraction * (row->space_threshold - row->kern_size)); } if (row->max_nonspace > row->space_threshold) { // Don't be silly @@ -573,76 +527,75 @@ void Textord::row_spacing_stats( } if (tosp_debug_level > 5) - tprintf - ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n", - block_idx, row_idx, row_length, block_non_space_gap_width, - block_space_gap_width, real_space_threshold, row->kern_size, - row->max_nonspace, row->space_threshold, row->min_space, - row->space_size); + tprintf( + "B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) " + "Sp:%3.2f\n", + block_idx, row_idx, row_length, block_non_space_gap_width, + block_space_gap_width, real_space_threshold, row->kern_size, + row->max_nonspace, row->space_threshold, row->min_space, + row->space_size); if (tosp_debug_level > 10) - tprintf("row->kern_size = %3.2f, row->space_size = %3.2f, " - "row->space_threshold = %d\n", - row->kern_size, row->space_size, row->space_threshold); + tprintf( + "row->kern_size = %3.2f, row->space_size = %3.2f, " + "row->space_threshold = %d\n", + row->kern_size, row->space_size, row->space_threshold); } void Textord::old_to_method( - TO_ROW *row, - STATS *all_gap_stats, - STATS *space_gap_stats, - STATS *small_gap_stats, - int16_t block_space_gap_width, //estimate for block - int16_t block_non_space_gap_width //estimate for block - ) { + TO_ROW* row, STATS* all_gap_stats, STATS* space_gap_stats, + STATS* small_gap_stats, + int16_t block_space_gap_width, // estimate for block + int16_t block_non_space_gap_width // estimate for block +) { /* First, estimate row space size */ /* Old to condition was > 2 */ - if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) { - //Adequate samples - /* Set space size to median of spaces BUT limits it if it seems wildly out */ - row->space_size = space_gap_stats->median (); + if (space_gap_stats->get_total() >= tosp_enough_space_samples_for_median) { + // Adequate samples + /* Set space size to median of spaces BUT limits it if it seems wildly out + */ + row->space_size = space_gap_stats->median(); if (row->space_size > block_space_gap_width * 1.5) { if (tosp_old_to_bug_fix) row->space_size = block_space_gap_width * 1.5; else - //BUG??? should be *1.5 + // BUG??? should be *1.5 row->space_size = block_space_gap_width; } if (row->space_size < (block_non_space_gap_width * 2) + 1) row->space_size = (block_non_space_gap_width * 2) + 1; } - //Only 1 or 2 samples - else if (space_gap_stats->get_total () >= 1) { - //hence mean not median - row->space_size = space_gap_stats->mean (); + // Only 1 or 2 samples + else if (space_gap_stats->get_total() >= 1) { + // hence mean not median + row->space_size = space_gap_stats->mean(); if (row->space_size > block_space_gap_width * 1.5) { if (tosp_old_to_bug_fix) row->space_size = block_space_gap_width * 1.5; else - //BUG??? should be *1.5 + // BUG??? should be *1.5 row->space_size = block_space_gap_width; } if (row->space_size < (block_non_space_gap_width * 3) + 1) row->space_size = (block_non_space_gap_width * 3) + 1; - } - else { - //Use block default + } else { + // Use block default row->space_size = block_space_gap_width; } /* Next, estimate row kern size */ if ((tosp_only_small_gaps_for_kern) && - (small_gap_stats->get_total () > tosp_redo_kern_limit)) - row->kern_size = small_gap_stats->median (); - else if (all_gap_stats->get_total () > tosp_redo_kern_limit) - row->kern_size = all_gap_stats->median (); - else //old TO -SAME FOR ALL ROWS + (small_gap_stats->get_total() > tosp_redo_kern_limit)) + row->kern_size = small_gap_stats->median(); + else if (all_gap_stats->get_total() > tosp_redo_kern_limit) + row->kern_size = all_gap_stats->median(); + else // old TO -SAME FOR ALL ROWS row->kern_size = block_non_space_gap_width; /* Finally, estimate row space threshold */ if (tosp_threshold_bias2 > 0) { - row->space_threshold = - int32_t (floor (0.5 + row->kern_size + - tosp_threshold_bias2 * (row->space_size - - row->kern_size))); + row->space_threshold = int32_t( + floor(0.5 + row->kern_size + + tosp_threshold_bias2 * (row->space_size - row->kern_size))); } else { /* NOTE old text ord uses (space_size + kern_size + 1)/2 as the threshold @@ -653,7 +606,7 @@ void Textord::old_to_method( and kern_size.) */ row->space_threshold = - int32_t (floor ((row->space_size + row->kern_size) / 2)); + int32_t(floor((row->space_size + row->kern_size) / 2)); } // Apply the same logic and ratios as in row_spacing_stats to @@ -666,119 +619,110 @@ void Textord::old_to_method( tosp_silly_kn_sp_gap * row->xheight))) { if (row->kern_size > 2.5) row->kern_size = row->space_size / tosp_min_sane_kn_sp; - row->space_threshold = int32_t (floor ((row->space_size + row->kern_size) / - tosp_old_sp_kn_th_factor)); + row->space_threshold = int32_t( + floor((row->space_size + row->kern_size) / tosp_old_sp_kn_th_factor)); } } - /************************************************************************* * isolated_row_stats() * Set values for min_space, max_non_space based on row stats only *************************************************************************/ -bool Textord::isolated_row_stats(TO_ROW* row, - GAPMAP* gapmap, - STATS* all_gap_stats, - bool suspected_table, - int16_t block_idx, - int16_t row_idx) { +bool Textord::isolated_row_stats(TO_ROW* row, GAPMAP* gapmap, + STATS* all_gap_stats, bool suspected_table, + int16_t block_idx, int16_t row_idx) { float kern_estimate; float crude_threshold_estimate; int16_t small_gaps_count; int16_t total; - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - STATS cert_space_gap_stats (0, MAXSPACING); - STATS all_space_gap_stats (0, MAXSPACING); - STATS small_gap_stats (0, MAXSPACING); + // iterator + BLOBNBOX_IT blob_it = row->blob_list(); + STATS cert_space_gap_stats(0, MAXSPACING); + STATS all_space_gap_stats(0, MAXSPACING); + STATS small_gap_stats(0, MAXSPACING); TBOX blob_box; TBOX prev_blob_box; int16_t gap_width; int32_t end_of_row; int32_t row_length; - kern_estimate = all_gap_stats->median (); + kern_estimate = all_gap_stats->median(); crude_threshold_estimate = std::max(tosp_init_guess_kn_mult * kern_estimate, - tosp_init_guess_xht_mult * row->xheight); - small_gaps_count = stats_count_under (all_gap_stats, - (int16_t) - ceil (crude_threshold_estimate)); - total = all_gap_stats->get_total (); + tosp_init_guess_xht_mult * row->xheight); + small_gaps_count = + stats_count_under(all_gap_stats, (int16_t)ceil(crude_threshold_estimate)); + total = all_gap_stats->get_total(); if ((total <= tosp_redo_kern_limit) || - ((small_gaps_count / (float) total) < tosp_enough_small_gaps) || - (total - small_gaps_count < 1)) { + ((small_gaps_count / (float)total) < tosp_enough_small_gaps) || + (total - small_gaps_count < 1)) { if (tosp_debug_level > 5) tprintf("B:%d R:%d -- Can't do isolated row stats.\n", block_idx, row_idx); return false; } - blob_it.set_to_list (row->blob_list ()); - blob_it.mark_cycle_pt (); - end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); + blob_it.set_to_list(row->blob_list()); + blob_it.mark_cycle_pt(); + end_of_row = blob_it.data_relative(-1)->bounding_box().right(); if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); + blob_box = box_next_pre_chopped(&blob_it); else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); + blob_box = reduced_box_next(row, &blob_it); else - blob_box = box_next (&blob_it); - row_length = end_of_row - blob_box.left (); + blob_box = box_next(&blob_it); + row_length = end_of_row - blob_box.left(); prev_blob_box = blob_box; - while (!blob_it.cycled_list ()) { + while (!blob_it.cycled_list()) { if (tosp_use_pre_chopping) - blob_box = box_next_pre_chopped (&blob_it); + blob_box = box_next_pre_chopped(&blob_it); else if (tosp_stats_use_xht_gaps) - blob_box = reduced_box_next (row, &blob_it); + blob_box = reduced_box_next(row, &blob_it); else - blob_box = box_next (&blob_it); + blob_box = box_next(&blob_it); int16_t left = prev_blob_box.right(); int16_t right = blob_box.left(); gap_width = right - left; if (!ignore_big_gap(row, row_length, gapmap, left, right) && - (gap_width > crude_threshold_estimate)) { + (gap_width > crude_threshold_estimate)) { if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) || - ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) && - (!tosp_narrow_blobs_not_cert || - (!narrow_blob (row, prev_blob_box) && - !narrow_blob (row, blob_box)))) || - (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box))) - cert_space_gap_stats.add (gap_width, 1); - all_space_gap_stats.add (gap_width, 1); + ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) && + (!tosp_narrow_blobs_not_cert || (!narrow_blob(row, prev_blob_box) && + !narrow_blob(row, blob_box)))) || + (wide_blob(row, prev_blob_box) && wide_blob(row, blob_box))) + cert_space_gap_stats.add(gap_width, 1); + all_space_gap_stats.add(gap_width, 1); } - if (gap_width < crude_threshold_estimate) - small_gap_stats.add (gap_width, 1); + if (gap_width < crude_threshold_estimate) small_gap_stats.add(gap_width, 1); prev_blob_box = blob_box; } - if (cert_space_gap_stats.get_total () >= - tosp_enough_space_samples_for_median) - //median - row->space_size = cert_space_gap_stats.median (); - else if (suspected_table && (cert_space_gap_stats.get_total () > 0)) - //to avoid spaced - row->space_size = cert_space_gap_stats.mean (); + if (cert_space_gap_stats.get_total() >= tosp_enough_space_samples_for_median) + // median + row->space_size = cert_space_gap_stats.median(); + else if (suspected_table && (cert_space_gap_stats.get_total() > 0)) + // to avoid spaced + row->space_size = cert_space_gap_stats.mean(); // 1's in tables - else if (all_space_gap_stats.get_total () >= - tosp_enough_space_samples_for_median) - //median - row->space_size = all_space_gap_stats.median (); + else if (all_space_gap_stats.get_total() >= + tosp_enough_space_samples_for_median) + // median + row->space_size = all_space_gap_stats.median(); else - row->space_size = all_space_gap_stats.mean (); + row->space_size = all_space_gap_stats.mean(); if (tosp_only_small_gaps_for_kern) - row->kern_size = small_gap_stats.median (); + row->kern_size = small_gap_stats.median(); else - row->kern_size = all_gap_stats->median (); - row->space_threshold = - int32_t (floor ((row->space_size + row->kern_size) / 2)); + row->kern_size = all_gap_stats->median(); + row->space_threshold = int32_t(floor((row->space_size + row->kern_size) / 2)); /* Sanity check */ if ((row->kern_size >= row->space_threshold) || - (row->space_threshold >= row->space_size) || - (row->space_threshold <= 0)) { + (row->space_threshold >= row->space_size) || + (row->space_threshold <= 0)) { if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n", - block_idx, row_idx, - row->kern_size, row->space_threshold, row->space_size); + tprintf("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n", + block_idx, row_idx, row->kern_size, row->space_threshold, + row->space_size); row->kern_size = 0.0f; row->space_threshold = 0; row->space_size = 0.0f; @@ -786,22 +730,19 @@ bool Textord::isolated_row_stats(TO_ROW* row, } if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n", - block_idx, row_idx, - row->kern_size, row->space_threshold, row->space_size); + tprintf("B:%d R:%d -- Isolated row stats: %f %d %f\n", block_idx, row_idx, + row->kern_size, row->space_threshold, row->space_size); return true; } -int16_t Textord::stats_count_under(STATS *stats, int16_t threshold) { +int16_t Textord::stats_count_under(STATS* stats, int16_t threshold) { int16_t index; int16_t total = 0; - for (index = 0; index < threshold; index++) - total += stats->pile_count (index); + for (index = 0; index < threshold; index++) total += stats->pile_count(index); return total; } - /************************************************************************* * improve_row_threshold() * Try to recognise a "normal line" - @@ -813,11 +754,11 @@ int16_t Textord::stats_count_under(STATS *stats, int16_t threshold) { * && a gap of max( 3, (sp - kn)/3 ) empty histogram positions is found * somewhere in the histogram between kn and sp * THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies - * NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!! - * try moving the default threshold to within this band but leave the + * NO!!!!! the bristol line has "11" with a gap of 12 between the + *1's!!! try moving the default threshold to within this band but leave the * fuzzy limit calculation as at present. *************************************************************************/ -void Textord::improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) { +void Textord::improve_row_threshold(TO_ROW* row, STATS* all_gap_stats) { float sp = row->space_size; float kn = row->kern_size; int16_t reqd_zero_width = 0; @@ -825,33 +766,26 @@ void Textord::improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) { int16_t zero_start = 0; int16_t index = 0; - if (tosp_debug_level > 10) - tprintf ("Improve row threshold 0"); - if ((all_gap_stats->get_total () <= 25) || - (sp <= 10) || - (sp <= 3 * kn) || - (stats_count_under (all_gap_stats, - (int16_t) ceil (kn + (sp - kn) / 3 + 0.5)) < - (0.75 * all_gap_stats->get_total ()))) + if (tosp_debug_level > 10) tprintf("Improve row threshold 0"); + if ((all_gap_stats->get_total() <= 25) || (sp <= 10) || (sp <= 3 * kn) || + (stats_count_under(all_gap_stats, + (int16_t)ceil(kn + (sp - kn) / 3 + 0.5)) < + (0.75 * all_gap_stats->get_total()))) return; - if (tosp_debug_level > 10) - tprintf (" 1"); + if (tosp_debug_level > 10) tprintf(" 1"); /* Look for the first region of all 0's in the histogram which is wider than max( 3, (sp - kn)/3 ) and starts between kn and sp. If found, and current threshold is not within it, move the threshold so that is is just inside it. */ - reqd_zero_width = (int16_t) floor ((sp - kn) / 3 + 0.5); - if (reqd_zero_width < 3) - reqd_zero_width = 3; - - for (index = int16_t (ceil (kn)); index < int16_t (floor (sp)); index++) { - if (all_gap_stats->pile_count (index) == 0) { - if (zero_width == 0) - zero_start = index; + reqd_zero_width = (int16_t)floor((sp - kn) / 3 + 0.5); + if (reqd_zero_width < 3) reqd_zero_width = 3; + + for (index = int16_t(ceil(kn)); index < int16_t(floor(sp)); index++) { + if (all_gap_stats->pile_count(index) == 0) { + if (zero_width == 0) zero_start = index; zero_width++; - } - else { + } else { if (zero_width >= reqd_zero_width) break; else { @@ -861,64 +795,58 @@ void Textord::improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) { } index--; if (tosp_debug_level > 10) - tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n", - reqd_zero_width, zero_width, zero_start, row->space_threshold); + tprintf(" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n", + reqd_zero_width, zero_width, zero_start, row->space_threshold); if ((zero_width < reqd_zero_width) || - ((row->space_threshold >= zero_start) && - (row->space_threshold <= index))) + ((row->space_threshold >= zero_start) && (row->space_threshold <= index))) return; - if (tosp_debug_level > 10) - tprintf (" 2"); + if (tosp_debug_level > 10) tprintf(" 2"); if (row->space_threshold < zero_start) { if (tosp_debug_level > 5) - tprintf - ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", - kn, sp, zero_start, index, row->space_threshold, zero_start); + tprintf("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", + kn, sp, zero_start, index, row->space_threshold, zero_start); row->space_threshold = zero_start; } if (row->space_threshold > index) { if (tosp_debug_level > 5) - tprintf - ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", - kn, sp, zero_start, index, row->space_threshold, index); + tprintf("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", + kn, sp, zero_start, index, row->space_threshold, index); row->space_threshold = index; } } - /********************************************************************** * make_prop_words * * Convert a TO_BLOCK to a BLOCK. **********************************************************************/ -ROW *Textord::make_prop_words( - TO_ROW *row, // row to make - FCOORD rotation // for drawing - ) { - bool bol; // start of line +ROW* Textord::make_prop_words(TO_ROW* row, // row to make + FCOORD rotation // for drawing +) { + bool bol; // start of line /* prev_ values are for start of word being built. non prev_ values are for the gap between the word being built and the next one. */ - bool prev_fuzzy_sp; // probably space - bool prev_fuzzy_non; // probably not - uint8_t prev_blanks; // in front of word - bool fuzzy_sp = false; // probably space - bool fuzzy_non = false; // probably not - uint8_t blanks = 0; // in front of word + bool prev_fuzzy_sp; // probably space + bool prev_fuzzy_non; // probably not + uint8_t prev_blanks; // in front of word + bool fuzzy_sp = false; // probably space + bool fuzzy_non = false; // probably not + uint8_t blanks = 0; // in front of word bool prev_gap_was_a_space = false; bool break_at_next_gap = false; - ROW *real_row; // output row + ROW* real_row; // output row C_OUTLINE_IT cout_it; C_BLOB_LIST cblobs; C_BLOB_IT cblob_it = &cblobs; WERD_LIST words; - WERD *word; // new word + WERD* word; // new word int32_t next_rep_char_word_right = INT32_MAX; - float repetition_spacing; // gap between repetitions - int32_t xstarts[2]; // row ends - int32_t prev_x; // end of prev blob - BLOBNBOX *bblob; // current blob - TBOX blob_box; // bounding box - BLOBNBOX_IT box_it; // iterator + float repetition_spacing; // gap between repetitions + int32_t xstarts[2]; // row ends + int32_t prev_x; // end of prev blob + BLOBNBOX* bblob; // current blob + TBOX blob_box; // bounding box + BLOBNBOX_IT box_it; // iterator TBOX prev_blob_box; TBOX next_blob_box; int16_t prev_gap = INT16_MAX; @@ -931,86 +859,77 @@ ROW *Textord::make_prop_words( // repeated char words WERD_IT rep_char_it(&(row->rep_words)); - if (!rep_char_it.empty ()) { - next_rep_char_word_right = - rep_char_it.data ()->bounding_box ().right (); + if (!rep_char_it.empty()) { + next_rep_char_word_right = rep_char_it.data()->bounding_box().right(); } prev_x = -INT16_MAX; - cblob_it.set_to_list (&cblobs); - box_it.set_to_list (row->blob_list ()); + cblob_it.set_to_list(&cblobs); + box_it.set_to_list(row->blob_list()); // new words WERD_IT word_it(&words); bol = true; prev_blanks = 0; prev_fuzzy_sp = false; prev_fuzzy_non = false; - if (!box_it.empty ()) { - xstarts[0] = box_it.data ()->bounding_box ().left (); + if (!box_it.empty()) { + xstarts[0] = box_it.data()->bounding_box().left(); if (xstarts[0] > next_rep_char_word_right) { /* We need to insert a repeated char word at the start of the row */ - word = rep_char_it.extract (); - word_it.add_after_then_move (word); + word = rep_char_it.extract(); + word_it.add_after_then_move(word); /* Set spaces before repeated char word */ - word->set_flag (W_BOL, true); + word->set_flag(W_BOL, true); bol = false; - word->set_blanks (0); - //NO uncertainty - word->set_flag (W_FUZZY_SP, false); - word->set_flag (W_FUZZY_NON, false); - xstarts[0] = word->bounding_box ().left (); + word->set_blanks(0); + // NO uncertainty + word->set_flag(W_FUZZY_SP, false); + word->set_flag(W_FUZZY_NON, false); + xstarts[0] = word->bounding_box().left(); /* Set spaces after repeated char word (and leave current word set) */ - repetition_spacing = find_mean_blob_spacing (word); - current_gap = box_it.data ()->bounding_box ().left () - - next_rep_char_word_right; + repetition_spacing = find_mean_blob_spacing(word); + current_gap = + box_it.data()->bounding_box().left() - next_rep_char_word_right; current_within_xht_gap = current_gap; if (current_gap > tosp_rep_space * repetition_spacing) { - prev_blanks = (uint8_t) floor (current_gap / row->space_size); - if (prev_blanks < 1) - prev_blanks = 1; - } - else + prev_blanks = (uint8_t)floor(current_gap / row->space_size); + if (prev_blanks < 1) prev_blanks = 1; + } else prev_blanks = 0; if (tosp_debug_level > 5) - tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ", - box_it.data ()->bounding_box ().left (), - box_it.data ()->bounding_box ().bottom (), - repetition_spacing, current_gap); + tprintf("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ", + box_it.data()->bounding_box().left(), + box_it.data()->bounding_box().bottom(), repetition_spacing, + current_gap); prev_fuzzy_sp = false; prev_fuzzy_non = false; - if (rep_char_it.empty ()) { + if (rep_char_it.empty()) { next_rep_char_word_right = INT32_MAX; - } - else { - rep_char_it.forward (); - next_rep_char_word_right = - rep_char_it.data ()->bounding_box ().right (); + } else { + rep_char_it.forward(); + next_rep_char_word_right = rep_char_it.data()->bounding_box().right(); } } - peek_at_next_gap(row, - box_it, - next_blob_box, - next_gap, - next_within_xht_gap); + peek_at_next_gap(row, box_it, next_blob_box, next_gap, next_within_xht_gap); do { - bblob = box_it.data (); - blob_box = bblob->bounding_box (); - if (bblob->joined_to_prev ()) { - if (bblob->cblob () != nullptr) { - cout_it.set_to_list (cblob_it.data ()->out_list ()); - cout_it.move_to_last (); - cout_it.add_list_after (bblob->cblob ()->out_list ()); - delete bblob->cblob (); + bblob = box_it.data(); + blob_box = bblob->bounding_box(); + if (bblob->joined_to_prev()) { + if (bblob->cblob() != nullptr) { + cout_it.set_to_list(cblob_it.data()->out_list()); + cout_it.move_to_last(); + cout_it.add_list_after(bblob->cblob()->out_list()); + delete bblob->cblob(); } } else { if (bblob->cblob() != nullptr) - cblob_it.add_after_then_move (bblob->cblob ()); - prev_x = blob_box.right (); + cblob_it.add_after_then_move(bblob->cblob()); + prev_x = blob_box.right(); } - box_it.forward (); //next one - bblob = box_it.data (); - blob_box = bblob->bounding_box (); + box_it.forward(); // next one + bblob = box_it.data(); + blob_box = bblob->bounding_box(); if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) { /* Real Blob - not multiple outlines or pre-chopped */ @@ -1019,10 +938,7 @@ ROW *Textord::make_prop_words( prev_blob_box = next_blob_box; current_gap = next_gap; current_within_xht_gap = next_within_xht_gap; - peek_at_next_gap(row, - box_it, - next_blob_box, - next_gap, + peek_at_next_gap(row, box_it, next_blob_box, next_gap, next_within_xht_gap); int16_t prev_gap_arg = prev_gap; @@ -1032,146 +948,128 @@ ROW *Textord::make_prop_words( next_gap_arg = next_within_xht_gap; } // Decide if a word-break should be inserted - if (blob_box.left () > next_rep_char_word_right || - make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box, - current_gap, current_within_xht_gap, - next_blob_box, next_gap_arg, - blanks, fuzzy_sp, fuzzy_non, - prev_gap_was_a_space, - break_at_next_gap) || + if (blob_box.left() > next_rep_char_word_right || + make_a_word_break( + row, blob_box, prev_gap_arg, prev_blob_box, current_gap, + current_within_xht_gap, next_blob_box, next_gap_arg, blanks, + fuzzy_sp, fuzzy_non, prev_gap_was_a_space, break_at_next_gap) || box_it.at_first()) { /* Form a new word out of the blobs collected */ - word = new WERD (&cblobs, prev_blanks, nullptr); + word = new WERD(&cblobs, prev_blanks, nullptr); word_count++; - word_it.add_after_then_move (word); + word_it.add_after_then_move(word); if (bol) { - word->set_flag (W_BOL, true); + word->set_flag(W_BOL, true); bol = false; } if (prev_fuzzy_sp) - //probably space - word->set_flag (W_FUZZY_SP, true); + // probably space + word->set_flag(W_FUZZY_SP, true); else if (prev_fuzzy_non) - word->set_flag (W_FUZZY_NON, true); - //probably not + word->set_flag(W_FUZZY_NON, true); + // probably not - if (blob_box.left () > next_rep_char_word_right) { + if (blob_box.left() > next_rep_char_word_right) { /* We need to insert a repeated char word */ - word = rep_char_it.extract (); - word_it.add_after_then_move (word); + word = rep_char_it.extract(); + word_it.add_after_then_move(word); /* Set spaces before repeated char word */ - repetition_spacing = find_mean_blob_spacing (word); - current_gap = word->bounding_box ().left () - prev_x; + repetition_spacing = find_mean_blob_spacing(word); + current_gap = word->bounding_box().left() - prev_x; current_within_xht_gap = current_gap; if (current_gap > tosp_rep_space * repetition_spacing) { - blanks = - (uint8_t) floor (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; - } - else + blanks = (uint8_t)floor(current_gap / row->space_size); + if (blanks < 1) blanks = 1; + } else blanks = 0; if (tosp_debug_level > 5) - tprintf - ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);", - word->bounding_box ().left (), - word->bounding_box ().bottom (), - repetition_spacing, current_gap, blanks); - word->set_blanks (blanks); - //NO uncertainty - word->set_flag (W_FUZZY_SP, false); - word->set_flag (W_FUZZY_NON, false); - - /* Set spaces after repeated char word (and leave current word set) */ - current_gap = - blob_box.left () - next_rep_char_word_right; + tprintf("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);", + word->bounding_box().left(), + word->bounding_box().bottom(), repetition_spacing, + current_gap, blanks); + word->set_blanks(blanks); + // NO uncertainty + word->set_flag(W_FUZZY_SP, false); + word->set_flag(W_FUZZY_NON, false); + + /* Set spaces after repeated char word (and leave current word set) + */ + current_gap = blob_box.left() - next_rep_char_word_right; if (current_gap > tosp_rep_space * repetition_spacing) { - blanks = (uint8_t) (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; - } - else + blanks = (uint8_t)(current_gap / row->space_size); + if (blanks < 1) blanks = 1; + } else blanks = 0; if (tosp_debug_level > 5) - tprintf (" Rgap:%d (%d blanks)\n", - current_gap, blanks); + tprintf(" Rgap:%d (%d blanks)\n", current_gap, blanks); fuzzy_sp = FALSE; fuzzy_non = FALSE; - if (rep_char_it.empty ()) { + if (rep_char_it.empty()) { next_rep_char_word_right = INT32_MAX; - } - else { - rep_char_it.forward (); + } else { + rep_char_it.forward(); next_rep_char_word_right = - rep_char_it.data ()->bounding_box ().right (); + rep_char_it.data()->bounding_box().right(); } } - if (box_it.at_first () && rep_char_it.empty ()) { - //at end of line - word->set_flag (W_EOL, true); + if (box_it.at_first() && rep_char_it.empty()) { + // at end of line + word->set_flag(W_EOL, true); xstarts[1] = prev_x; - } - else { + } else { prev_blanks = blanks; prev_fuzzy_sp = fuzzy_sp; prev_fuzzy_non = fuzzy_non; } } } - } - while (!box_it.at_first ()); //until back at start + } while (!box_it.at_first()); // until back at start /* Insert any further repeated char words */ - while (!rep_char_it.empty ()) { - word = rep_char_it.extract (); - word_it.add_after_then_move (word); + while (!rep_char_it.empty()) { + word = rep_char_it.extract(); + word_it.add_after_then_move(word); /* Set spaces before repeated char word */ - repetition_spacing = find_mean_blob_spacing (word); - current_gap = word->bounding_box ().left () - prev_x; + repetition_spacing = find_mean_blob_spacing(word); + current_gap = word->bounding_box().left() - prev_x; if (current_gap > tosp_rep_space * repetition_spacing) { - blanks = (uint8_t) floor (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; - } - else + blanks = (uint8_t)floor(current_gap / row->space_size); + if (blanks < 1) blanks = 1; + } else blanks = 0; if (tosp_debug_level > 5) tprintf( "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n", word->bounding_box().left(), word->bounding_box().bottom(), repetition_spacing, current_gap, blanks); - word->set_blanks (blanks); - //NO uncertainty - word->set_flag (W_FUZZY_SP, false); - word->set_flag (W_FUZZY_NON, false); - prev_x = word->bounding_box ().right (); - if (rep_char_it.empty ()) { - //at end of line - word->set_flag (W_EOL, true); + word->set_blanks(blanks); + // NO uncertainty + word->set_flag(W_FUZZY_SP, false); + word->set_flag(W_FUZZY_NON, false); + prev_x = word->bounding_box().right(); + if (rep_char_it.empty()) { + // at end of line + word->set_flag(W_EOL, true); xstarts[1] = prev_x; - } - else { - rep_char_it.forward (); + } else { + rep_char_it.forward(); } } - real_row = new ROW (row, - (int16_t) row->kern_size, (int16_t) row->space_size); - word_it.set_to_list (real_row->word_list ()); - //put words in row - word_it.add_list_after (&words); - real_row->recalc_bounding_box (); + real_row = new ROW(row, (int16_t)row->kern_size, (int16_t)row->space_size); + word_it.set_to_list(real_row->word_list()); + // put words in row + word_it.add_list_after(&words); + real_row->recalc_bounding_box(); if (tosp_debug_level > 4) { - tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n", - word_count, - real_row->bounding_box ().left (), - real_row->bounding_box ().bottom (), - real_row->bounding_box ().right (), - real_row->bounding_box ().top ()); + tprintf("Row: Made %d words in row ((%d,%d)(%d,%d))\n", word_count, + real_row->bounding_box().left(), + real_row->bounding_box().bottom(), + real_row->bounding_box().right(), real_row->bounding_box().top()); } return real_row; } @@ -1184,20 +1082,19 @@ ROW *Textord::make_prop_words( * Converts words into blobs so that each blob is a single character. * Used for chopper test. **********************************************************************/ -ROW *Textord::make_blob_words( - TO_ROW *row, // row to make - FCOORD rotation // for drawing - ) { - bool bol; // start of line - ROW *real_row; // output row +ROW* Textord::make_blob_words(TO_ROW* row, // row to make + FCOORD rotation // for drawing +) { + bool bol; // start of line + ROW* real_row; // output row C_OUTLINE_IT cout_it; C_BLOB_LIST cblobs; C_BLOB_IT cblob_it = &cblobs; WERD_LIST words; - WERD *word; // new word - BLOBNBOX *bblob; // current blob - TBOX blob_box; // bounding box - BLOBNBOX_IT box_it; // iterator + WERD* word; // new word + BLOBNBOX* bblob; // current blob + TBOX blob_box; // bounding box + BLOBNBOX_IT box_it; // iterator int16_t word_count = 0; cblob_it.set_to_list(&cblobs); @@ -1206,7 +1103,6 @@ ROW *Textord::make_blob_words( WERD_IT word_it(&words); bol = TRUE; if (!box_it.empty()) { - do { bblob = box_it.data(); blob_box = bblob->bounding_box(); @@ -1221,7 +1117,7 @@ ROW *Textord::make_blob_words( if (bblob->cblob() != nullptr) cblob_it.add_after_then_move(bblob->cblob()); } - box_it.forward(); // next one + box_it.forward(); // next one bblob = box_it.data(); blob_box = bblob->bounding_box(); @@ -1233,25 +1129,22 @@ ROW *Textord::make_blob_words( word->set_flag(W_BOL, TRUE); bol = FALSE; } - if (box_it.at_first()) { // at end of line + if (box_it.at_first()) { // at end of line word->set_flag(W_EOL, TRUE); } } - } - while (!box_it.at_first()); // until back at start + } while (!box_it.at_first()); // until back at start /* Setup the row with created words. */ - real_row = new ROW(row, (int16_t) row->kern_size, (int16_t) row->space_size); + real_row = new ROW(row, (int16_t)row->kern_size, (int16_t)row->space_size); word_it.set_to_list(real_row->word_list()); - //put words in row + // put words in row word_it.add_list_after(&words); real_row->recalc_bounding_box(); if (tosp_debug_level > 4) { - tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n", - word_count, - real_row->bounding_box().left(), - real_row->bounding_box().bottom(), - real_row->bounding_box().right(), - real_row->bounding_box().top()); + tprintf("Row:Made %d words in row ((%d,%d)(%d,%d))\n", word_count, + real_row->bounding_box().left(), + real_row->bounding_box().bottom(), + real_row->bounding_box().right(), real_row->bounding_box().top()); } return real_row; } @@ -1259,19 +1152,12 @@ ROW *Textord::make_blob_words( } bool Textord::make_a_word_break( - TO_ROW* row, // row being made - TBOX blob_box, // for next_blob // how many blanks? - int16_t prev_gap, - TBOX prev_blob_box, - int16_t real_current_gap, - int16_t within_xht_current_gap, - TBOX next_blob_box, - int16_t next_gap, - uint8_t& blanks, - bool& fuzzy_sp, - bool& fuzzy_non, - bool& prev_gap_was_a_space, - bool& break_at_next_gap) { + TO_ROW* row, // row being made + TBOX blob_box, // for next_blob // how many blanks? + int16_t prev_gap, TBOX prev_blob_box, int16_t real_current_gap, + int16_t within_xht_current_gap, TBOX next_blob_box, int16_t next_gap, + uint8_t& blanks, bool& fuzzy_sp, bool& fuzzy_non, + bool& prev_gap_was_a_space, bool& break_at_next_gap) { bool space; int16_t current_gap; float fuzzy_sp_to_kn_limit; @@ -1289,7 +1175,7 @@ bool Textord::make_a_word_break( if ((row->kern_size > tosp_large_kerning * row->xheight) || ((tosp_dont_fool_with_small_kerns >= 0) && (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size))) - //Ignore the difference + // Ignore the difference within_xht_current_gap = real_current_gap; if (tosp_use_xht_gaps && tosp_only_use_xht_gaps) @@ -1298,7 +1184,7 @@ bool Textord::make_a_word_break( current_gap = real_current_gap; if (tosp_old_to_method) { - //Boring old method + // Boring old method space = current_gap > row->max_nonspace; if (space && (current_gap < INT16_MAX)) { if (current_gap < row->min_space) { @@ -1306,29 +1192,25 @@ bool Textord::make_a_word_break( blanks = 1; fuzzy_sp = true; fuzzy_non = false; - } - else { + } else { blanks = 0; fuzzy_sp = false; fuzzy_non = true; } - } - else { - blanks = (uint8_t) (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; + } else { + blanks = (uint8_t)(current_gap / row->space_size); + if (blanks < 1) blanks = 1; fuzzy_sp = false; fuzzy_non = false; } } return space; - } - else { - /* New exciting heuristic method */ - if (prev_blob_box.null_box ()) // Beginning of row + } else { + /* New exciting heuristic method */ + if (prev_blob_box.null_box()) // Beginning of row prev_gap_was_a_space = true; - //Default as old TO + // Default as old TO space = current_gap > row->space_threshold; /* Set defaults for the word break incase we find one. Currently there are @@ -1347,152 +1229,123 @@ bool Textord::make_a_word_break( despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to context. */ - if (tosp_use_xht_gaps && - (real_current_gap <= row->max_nonspace) && - (within_xht_current_gap > row->max_nonspace)) { + if (tosp_use_xht_gaps && (real_current_gap <= row->max_nonspace) && + (within_xht_current_gap > row->max_nonspace)) { space = true; fuzzy_non = true; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 20, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 20, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif - } - else if (tosp_use_xht_gaps && - (real_current_gap <= row->space_threshold) && - (within_xht_current_gap > row->space_threshold)) { + } else if (tosp_use_xht_gaps && + (real_current_gap <= row->space_threshold) && + (within_xht_current_gap > row->space_threshold)) { space = true; if (tosp_flip_fuzz_kn_to_sp) fuzzy_sp = true; else fuzzy_non = true; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 21, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 21, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif - } - else if (tosp_use_xht_gaps && - (real_current_gap < row->min_space) && - (within_xht_current_gap >= row->min_space)) { + } else if (tosp_use_xht_gaps && (real_current_gap < row->min_space) && + (within_xht_current_gap >= row->min_space)) { space = true; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 22, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 22, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif - } - else if (tosp_force_wordbreak_on_punct && - !suspected_punct_blob(row, prev_blob_box) && - suspected_punct_blob(row, blob_box)) { + } else if (tosp_force_wordbreak_on_punct && + !suspected_punct_blob(row, prev_blob_box) && + suspected_punct_blob(row, blob_box)) { break_at_next_gap = true; } /* Now continue with normal heuristics */ else if ((current_gap < row->min_space) && - (current_gap > row->space_threshold)) { + (current_gap > row->space_threshold)) { /* Heuristics to turn dubious spaces to kerns */ if (tosp_pass_wide_fuzz_sp_to_context > 0) - fuzzy_sp_to_kn_limit = row->kern_size + - tosp_pass_wide_fuzz_sp_to_context * - (row->space_size - row->kern_size); + fuzzy_sp_to_kn_limit = + row->kern_size + tosp_pass_wide_fuzz_sp_to_context * + (row->space_size - row->kern_size); else fuzzy_sp_to_kn_limit = 99999.0f; - /* If current gap is significantly smaller than the previous space the other - side of a narrow blob then this gap is a kern. */ - if ((prev_blob_box.width () > 0) && - narrow_blob (row, prev_blob_box) && - prev_gap_was_a_space && - (current_gap <= tosp_gap_factor * prev_gap)) { - if ((tosp_all_flips_fuzzy) || - (current_gap > fuzzy_sp_to_kn_limit)) { + /* If current gap is significantly smaller than the previous space the + other side of a narrow blob then this gap is a kern. */ + if ((prev_blob_box.width() > 0) && narrow_blob(row, prev_blob_box) && + prev_gap_was_a_space && (current_gap <= tosp_gap_factor * prev_gap)) { + if ((tosp_all_flips_fuzzy) || (current_gap > fuzzy_sp_to_kn_limit)) { if (tosp_flip_fuzz_sp_to_kn) fuzzy_non = true; else fuzzy_sp = true; - } - else + } else space = false; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 1, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 1, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif } - /* If current gap not much bigger than the previous kern the other side of a - narrow blob then this gap is a kern as well */ - else if ((prev_blob_box.width () > 0) && - narrow_blob (row, prev_blob_box) && - !prev_gap_was_a_space && - (current_gap * tosp_gap_factor <= prev_gap)) { - if ((tosp_all_flips_fuzzy) || - (current_gap > fuzzy_sp_to_kn_limit)) { + /* If current gap not much bigger than the previous kern the other side of + a narrow blob then this gap is a kern as well */ + else if ((prev_blob_box.width() > 0) && narrow_blob(row, prev_blob_box) && + !prev_gap_was_a_space && + (current_gap * tosp_gap_factor <= prev_gap)) { + if ((tosp_all_flips_fuzzy) || (current_gap > fuzzy_sp_to_kn_limit)) { if (tosp_flip_fuzz_sp_to_kn) fuzzy_non = true; else fuzzy_sp = true; - } - else + } else space = false; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 2, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 2, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif - } - else if ((next_blob_box.width () > 0) && - narrow_blob (row, next_blob_box) && - (next_gap > row->space_threshold) && - (current_gap <= tosp_gap_factor * next_gap)) { - if ((tosp_all_flips_fuzzy) || - (current_gap > fuzzy_sp_to_kn_limit)) { + } else if ((next_blob_box.width() > 0) && + narrow_blob(row, next_blob_box) && + (next_gap > row->space_threshold) && + (current_gap <= tosp_gap_factor * next_gap)) { + if ((tosp_all_flips_fuzzy) || (current_gap > fuzzy_sp_to_kn_limit)) { if (tosp_flip_fuzz_sp_to_kn) fuzzy_non = true; else fuzzy_sp = true; - } - else + } else space = false; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 3, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 3, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif - } - else if ((next_blob_box.width () > 0) && - narrow_blob (row, next_blob_box) && - (next_gap <= row->space_threshold) && - (current_gap * tosp_gap_factor <= next_gap)) { - if ((tosp_all_flips_fuzzy) || - (current_gap > fuzzy_sp_to_kn_limit)) { + } else if ((next_blob_box.width() > 0) && + narrow_blob(row, next_blob_box) && + (next_gap <= row->space_threshold) && + (current_gap * tosp_gap_factor <= next_gap)) { + if ((tosp_all_flips_fuzzy) || (current_gap > fuzzy_sp_to_kn_limit)) { if (tosp_flip_fuzz_sp_to_kn) fuzzy_non = true; else fuzzy_sp = true; - } - else + } else space = false; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 4, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 4, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif - } - else if ((((next_blob_box.width () > 0) && - narrow_blob (row, next_blob_box)) || - ((prev_blob_box.width () > 0) && - narrow_blob (row, prev_blob_box)))) { + } else if ((((next_blob_box.width() > 0) && + narrow_blob(row, next_blob_box)) || + ((prev_blob_box.width() > 0) && + narrow_blob(row, prev_blob_box)))) { fuzzy_sp = true; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 6, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 6, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif } - } - else if ((current_gap > row->max_nonspace) && - (current_gap <= row->space_threshold)) { - + } else if ((current_gap > row->max_nonspace) && + (current_gap <= row->space_threshold)) { /* Heuristics to turn dubious kerns to spaces */ /* TRIED THIS BUT IT MADE THINGS WORSE if ( prev_gap == INT16_MAX ) @@ -1500,13 +1353,10 @@ bool Textord::make_a_word_break( if ( next_gap == INT16_MAX ) next_gap = 0; // end of row */ - if ((prev_blob_box.width () > 0) && - (next_blob_box.width () > 0) && - (current_gap >= - tosp_kern_gap_factor1 * std::max(prev_gap, next_gap)) && - wide_blob (row, prev_blob_box) && - wide_blob (row, next_blob_box)) { - + if ((prev_blob_box.width() > 0) && (next_blob_box.width() > 0) && + (current_gap >= + tosp_kern_gap_factor1 * std::max(prev_gap, next_gap)) && + wide_blob(row, prev_blob_box) && wide_blob(row, next_blob_box)) { space = true; /* tosp_flip_caution is an attempt to stop the default changing in cases @@ -1514,21 +1364,19 @@ bool Textord::make_a_word_break( See problem in 'chiefs' where "have" gets split in the quotation. */ if ((tosp_flip_fuzz_kn_to_sp) && - ((tosp_flip_caution <= 0) || - (tosp_flip_caution * row->kern_size > row->space_size))) + ((tosp_flip_caution <= 0) || + (tosp_flip_caution * row->kern_size > row->space_size))) fuzzy_sp = true; else fuzzy_non = true; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 7, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 7, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif - } else if (prev_blob_box.width() > 0 && - next_blob_box.width() > 0 && + } else if (prev_blob_box.width() > 0 && next_blob_box.width() > 0 && current_gap > 5 && // Rule 9 handles small gap, big ratio. current_gap >= - tosp_kern_gap_factor2 * std::max(prev_gap, next_gap) && + tosp_kern_gap_factor2 * std::max(prev_gap, next_gap) && !(narrow_blob(row, prev_blob_box) || suspected_punct_blob(row, prev_blob_box)) && !(narrow_blob(row, next_blob_box) || @@ -1536,31 +1384,29 @@ bool Textord::make_a_word_break( space = true; fuzzy_non = true; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 8, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 8, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif - } - else if ((tosp_kern_gap_factor3 > 0) && - (prev_blob_box.width () > 0) && - (next_blob_box.width () > 0) && - (current_gap >= tosp_kern_gap_factor3 * std::max(prev_gap, next_gap)) && - (!tosp_rule_9_test_punct || - (!suspected_punct_blob (row, prev_blob_box) && - !suspected_punct_blob (row, next_blob_box)))) { + } else if ((tosp_kern_gap_factor3 > 0) && (prev_blob_box.width() > 0) && + (next_blob_box.width() > 0) && + (current_gap >= + tosp_kern_gap_factor3 * std::max(prev_gap, next_gap)) && + (!tosp_rule_9_test_punct || + (!suspected_punct_blob(row, prev_blob_box) && + !suspected_punct_blob(row, next_blob_box)))) { space = true; fuzzy_non = true; #ifndef GRAPHICS_DISABLED - mark_gap (blob_box, 9, - prev_gap, prev_blob_box.width (), - current_gap, next_blob_box.width (), next_gap); + mark_gap(blob_box, 9, prev_gap, prev_blob_box.width(), current_gap, + next_blob_box.width(), next_gap); #endif } } if (tosp_debug_level > 10) - tprintf("word break = %d current_gap = %d, prev_gap = %d, " - "next_gap = %d\n", space ? 1 : 0, current_gap, - prev_gap, next_gap); + tprintf( + "word break = %d current_gap = %d, prev_gap = %d, " + "next_gap = %d\n", + space ? 1 : 0, current_gap, prev_gap, next_gap); prev_gap_was_a_space = space && !(fuzzy_non); return space; } @@ -1568,9 +1414,9 @@ bool Textord::make_a_word_break( bool Textord::narrow_blob(TO_ROW* row, TBOX blob_box) { bool result; - result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) || - (((float) blob_box.width () / blob_box.height ()) <= - tosp_narrow_aspect_ratio)); + result = ((blob_box.width() <= tosp_narrow_fraction * row->xheight) || + (((float)blob_box.width() / blob_box.height()) <= + tosp_narrow_aspect_ratio)); return result; } @@ -1578,14 +1424,13 @@ bool Textord::wide_blob(TO_ROW* row, TBOX blob_box) { bool result; if (tosp_wide_fraction > 0) { if (tosp_wide_aspect_ratio > 0) - result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) && - (((float) blob_box.width () / blob_box.height ()) > - tosp_wide_aspect_ratio)); + result = ((blob_box.width() >= tosp_wide_fraction * row->xheight) && + (((float)blob_box.width() / blob_box.height()) > + tosp_wide_aspect_ratio)); else - result = (blob_box.width () >= tosp_wide_fraction * row->xheight); - } - else - result = !narrow_blob (row, blob_box); + result = (blob_box.width() >= tosp_wide_fraction * row->xheight); + } else + result = !narrow_blob(row, blob_box); return result; } @@ -1594,51 +1439,42 @@ bool Textord::suspected_punct_blob(TO_ROW* row, TBOX box) { float baseline; float blob_x_centre; /* Find baseline of centre of blob */ - blob_x_centre = (box.right () + box.left ()) / 2.0; - baseline = row->baseline.y (blob_x_centre); + blob_x_centre = (box.right() + box.left()) / 2.0; + baseline = row->baseline.y(blob_x_centre); - result = (box.height () <= 0.66 * row->xheight) || - (box.top () < baseline + row->xheight / 2.0) || - (box.bottom () > baseline + row->xheight / 2.0); + result = (box.height() <= 0.66 * row->xheight) || + (box.top() < baseline + row->xheight / 2.0) || + (box.bottom() > baseline + row->xheight / 2.0); return result; } - -void Textord::peek_at_next_gap(TO_ROW *row, - BLOBNBOX_IT box_it, - TBOX &next_blob_box, - int16_t &next_gap, - int16_t &next_within_xht_gap) { +void Textord::peek_at_next_gap(TO_ROW* row, BLOBNBOX_IT box_it, + TBOX& next_blob_box, int16_t& next_gap, + int16_t& next_within_xht_gap) { TBOX next_reduced_blob_box; TBOX bit_beyond; BLOBNBOX_IT reduced_box_it = box_it; - next_blob_box = box_next (&box_it); - next_reduced_blob_box = reduced_box_next (row, &reduced_box_it); - if (box_it.at_first ()) { + next_blob_box = box_next(&box_it); + next_reduced_blob_box = reduced_box_next(row, &reduced_box_it); + if (box_it.at_first()) { next_gap = INT16_MAX; next_within_xht_gap = INT16_MAX; - } - else { - bit_beyond = box_it.data ()->bounding_box (); - next_gap = bit_beyond.left () - next_blob_box.right (); - bit_beyond = reduced_box_next (row, &reduced_box_it); - next_within_xht_gap = - bit_beyond.left () - next_reduced_blob_box.right (); + } else { + bit_beyond = box_it.data()->bounding_box(); + next_gap = bit_beyond.left() - next_blob_box.right(); + bit_beyond = reduced_box_next(row, &reduced_box_it); + next_within_xht_gap = bit_beyond.left() - next_reduced_blob_box.right(); } } - #ifndef GRAPHICS_DISABLED -void Textord::mark_gap( - TBOX blob, // blob following gap - int16_t rule, // heuristic id - int16_t prev_gap, - int16_t prev_blob_width, - int16_t current_gap, - int16_t next_blob_width, - int16_t next_gap) { - ScrollView::Color col; //of ellipse marking flipped gap +void Textord::mark_gap(TBOX blob, // blob following gap + int16_t rule, // heuristic id + int16_t prev_gap, int16_t prev_blob_width, + int16_t current_gap, int16_t next_blob_width, + int16_t next_gap) { + ScrollView::Color col; // of ellipse marking flipped gap switch (rule) { case 1: @@ -1684,18 +1520,18 @@ void Textord::mark_gap( } if (textord_show_initial_words) { to_win->Pen(col); - /* if (rule < 20) - //interior_style(to_win, INT_SOLID, FALSE); - else - //interior_style(to_win, INT_HOLLOW, TRUE);*/ - //x radius - to_win->Ellipse (current_gap / 2.0f, - blob.height () / 2.0f, //y radius - //x centre - blob.left () - current_gap / 2.0f, - //y centre - blob.bottom () + blob.height () / 2.0f); - } + /* if (rule < 20) + //interior_style(to_win, INT_SOLID, FALSE); + else + //interior_style(to_win, INT_HOLLOW, TRUE);*/ + // x radius + to_win->Ellipse(current_gap / 2.0f, + blob.height() / 2.0f, // y radius + // x centre + blob.left() - current_gap / 2.0f, + // y centre + blob.bottom() + blob.height() / 2.0f); + } if (tosp_debug_level > 5) tprintf(" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n", blob.left() - current_gap / 2, blob.bottom(), rule, prev_gap, @@ -1703,57 +1539,51 @@ void Textord::mark_gap( } #endif -float Textord::find_mean_blob_spacing(WERD *word) { +float Textord::find_mean_blob_spacing(WERD* word) { C_BLOB_IT cblob_it; TBOX blob_box; int32_t gap_sum = 0; int16_t gap_count = 0; int16_t prev_right; - cblob_it.set_to_list (word->cblob_list ()); - if (!cblob_it.empty ()) { - cblob_it.mark_cycle_pt (); - prev_right = cblob_it.data ()->bounding_box ().right (); - //first blob - cblob_it.forward (); - for (; !cblob_it.cycled_list (); cblob_it.forward ()) { - blob_box = cblob_it.data ()->bounding_box (); - gap_sum += blob_box.left () - prev_right; + cblob_it.set_to_list(word->cblob_list()); + if (!cblob_it.empty()) { + cblob_it.mark_cycle_pt(); + prev_right = cblob_it.data()->bounding_box().right(); + // first blob + cblob_it.forward(); + for (; !cblob_it.cycled_list(); cblob_it.forward()) { + blob_box = cblob_it.data()->bounding_box(); + gap_sum += blob_box.left() - prev_right; gap_count++; - prev_right = blob_box.right (); + prev_right = blob_box.right(); } } if (gap_count > 0) - return (gap_sum / (float) gap_count); + return (gap_sum / (float)gap_count); else return 0.0f; } - -bool Textord::ignore_big_gap(TO_ROW* row, - int32_t row_length, - GAPMAP* gapmap, - int16_t left, - int16_t right) { +bool Textord::ignore_big_gap(TO_ROW* row, int32_t row_length, GAPMAP* gapmap, + int16_t left, int16_t right) { int16_t gap = right - left + 1; if (tosp_ignore_big_gaps > 999) return FALSE; // Don't ignore if (tosp_ignore_big_gaps > 0) return (gap > tosp_ignore_big_gaps * row->xheight); - if (gap > tosp_ignore_very_big_gaps * row->xheight) - return true; + if (gap > tosp_ignore_very_big_gaps * row->xheight) return true; if (tosp_ignore_big_gaps == 0) { if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight)) return true; if ((gap > 1.75 * row->xheight) && - ((row_length > 35 * row->xheight) || - gapmap->table_gap (left, right))) + ((row_length > 35 * row->xheight) || gapmap->table_gap(left, right))) return true; - } - else { - /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */ + } else { + /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table + */ if ((gap > gapmap_big_gaps * row->xheight) && - gapmap->table_gap (left, right)) + gapmap->table_gap(left, right)) return true; } return false; @@ -1767,60 +1597,56 @@ bool Textord::ignore_big_gap(TO_ROW* row, * Then move the iterator on to the start of the next blob. * DON'T reduce the box for small things - eg punctuation. **********************************************************************/ -TBOX Textord::reduced_box_next( - TO_ROW *row, // current row - BLOBNBOX_IT *it // iterator to blobds - ) { - BLOBNBOX *blob; //current blob - BLOBNBOX *head_blob; //place to store box - TBOX full_box; //full blob boundg box - TBOX reduced_box; //box of significant part - int16_t left_above_xht; //ABOVE xht left limit - int16_t new_left_above_xht; //ABOVE xht left limit - - blob = it->data (); - if (blob->red_box_set ()) { - reduced_box = blob->reduced_box (); +TBOX Textord::reduced_box_next(TO_ROW* row, // current row + BLOBNBOX_IT* it // iterator to blobds +) { + BLOBNBOX* blob; // current blob + BLOBNBOX* head_blob; // place to store box + TBOX full_box; // full blob boundg box + TBOX reduced_box; // box of significant part + int16_t left_above_xht; // ABOVE xht left limit + int16_t new_left_above_xht; // ABOVE xht left limit + + blob = it->data(); + if (blob->red_box_set()) { + reduced_box = blob->reduced_box(); do { it->forward(); blob = it->data(); - } - while (blob->cblob() == nullptr || blob->joined_to_prev()); + } while (blob->cblob() == nullptr || blob->joined_to_prev()); return reduced_box; } head_blob = blob; - full_box = blob->bounding_box (); - reduced_box = reduced_box_for_blob (blob, row, &left_above_xht); + full_box = blob->bounding_box(); + reduced_box = reduced_box_for_blob(blob, row, &left_above_xht); do { - it->forward (); - blob = it->data (); + it->forward(); + blob = it->data(); if (blob->cblob() == nullptr) - //was pre-chopped - full_box += blob->bounding_box (); - else if (blob->joined_to_prev ()) { - reduced_box += - reduced_box_for_blob(blob, row, &new_left_above_xht); + // was pre-chopped + full_box += blob->bounding_box(); + else if (blob->joined_to_prev()) { + reduced_box += reduced_box_for_blob(blob, row, &new_left_above_xht); left_above_xht = std::min(left_above_xht, new_left_above_xht); } } - //until next real blob + // until next real blob while (blob->cblob() == nullptr || blob->joined_to_prev()); - if ((reduced_box.width () > 0) && - ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ()) - < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) { + if ((reduced_box.width() > 0) && + ((reduced_box.left() + tosp_near_lh_edge * reduced_box.width()) < + left_above_xht) && + (reduced_box.height() > 0.7 * row->xheight)) { #ifndef GRAPHICS_DISABLED if (textord_show_initial_words) - reduced_box.plot (to_win, ScrollView::YELLOW, ScrollView::YELLOW); + reduced_box.plot(to_win, ScrollView::YELLOW, ScrollView::YELLOW); #endif - } - else + } else reduced_box = full_box; - head_blob->set_reduced_box (reduced_box); + head_blob->set_reduced_box(reduced_box); return reduced_box; } - /************************************************************************* * reduced_box_for_blob() * Find box for blob which is the same height and y position as the whole blob, @@ -1841,10 +1667,8 @@ TBOX Textord::reduced_box_next( * NOTE that we need to rotate all the coordinates as * find_blob_limits finds the y min and max within a specified x band *************************************************************************/ -TBOX Textord::reduced_box_for_blob( - BLOBNBOX *blob, - TO_ROW *row, - int16_t *left_above_xht) { +TBOX Textord::reduced_box_for_blob(BLOBNBOX* blob, TO_ROW* row, + int16_t* left_above_xht) { float baseline; float blob_x_centre; float left_limit; @@ -1854,44 +1678,44 @@ TBOX Textord::reduced_box_for_blob( /* Find baseline of centre of blob */ - blob_box = blob->bounding_box (); - blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0; - baseline = row->baseline.y (blob_x_centre); + blob_box = blob->bounding_box(); + blob_x_centre = (blob_box.left() + blob_box.right()) / 2.0; + baseline = row->baseline.y(blob_x_centre); /* Find LH limit of blob ABOVE the xht. This is so that we can detect certain caps ht chars which should NOT have their box reduced: T, Y, V, W etc */ - left_limit = (float) INT32_MAX; - junk = (float) -INT32_MAX; + left_limit = (float)INT32_MAX; + junk = (float)-INT32_MAX; find_cblob_hlimits(blob->cblob(), (baseline + 1.1 * row->xheight), static_cast(INT16_MAX), left_limit, junk); if (left_limit > junk) - *left_above_xht = INT16_MAX; //No area above xht + *left_above_xht = INT16_MAX; // No area above xht else - *left_above_xht = (int16_t) floor (left_limit); + *left_above_xht = (int16_t)floor(left_limit); /* Find reduced LH limit of blob - the left extent of the region ABOVE the baseline. */ - left_limit = (float) INT32_MAX; - junk = (float) -INT32_MAX; + left_limit = (float)INT32_MAX; + junk = (float)-INT32_MAX; find_cblob_hlimits(blob->cblob(), baseline, static_cast(INT16_MAX), left_limit, junk); if (left_limit > junk) - return TBOX (); //no area within xht so return empty box + return TBOX(); // no area within xht so return empty box /* Find reduced RH limit of blob - the right extent of the region BELOW the xht. */ - junk = (float) INT32_MAX; - right_limit = (float) -INT32_MAX; + junk = (float)INT32_MAX; + right_limit = (float)-INT32_MAX; find_cblob_hlimits(blob->cblob(), static_cast(-INT16_MAX), (baseline + row->xheight), junk, right_limit); if (junk > right_limit) - return TBOX (); //no area within xht so return empty box + return TBOX(); // no area within xht so return empty box - return TBOX (ICOORD ((int16_t) floor (left_limit), blob_box.bottom ()), - ICOORD ((int16_t) ceil (right_limit), blob_box.top ())); + return TBOX(ICOORD((int16_t)floor(left_limit), blob_box.bottom()), + ICOORD((int16_t)ceil(right_limit), blob_box.top())); } } // namespace tesseract diff --git a/src/textord/tovars.cpp b/src/textord/tovars.cpp index 6b1b833248..ad55fe9cd1 100644 --- a/src/textord/tovars.cpp +++ b/src/textord/tovars.cpp @@ -22,62 +22,53 @@ #define EXTERN -EXTERN BOOL_VAR (textord_show_initial_words, FALSE, "Display separate words"); -EXTERN BOOL_VAR (textord_show_new_words, FALSE, "Display separate words"); -EXTERN BOOL_VAR (textord_show_fixed_words, FALSE, -"Display forced fixed pitch words"); -EXTERN BOOL_VAR (textord_blocksall_fixed, FALSE, "Moan about prop blocks"); -EXTERN BOOL_VAR (textord_blocksall_prop, FALSE, -"Moan about fixed pitch blocks"); -EXTERN BOOL_VAR (textord_blocksall_testing, FALSE, "Dump stats when moaning"); -EXTERN BOOL_VAR (textord_test_mode, FALSE, "Do current test"); -EXTERN INT_VAR (textord_dotmatrix_gap, 3, -"Max pixel gap for broken pixed pitch"); -EXTERN INT_VAR (textord_debug_block, 0, "Block to do debug on"); -EXTERN INT_VAR (textord_pitch_range, 2, "Max range test on pitch"); -EXTERN double_VAR (textord_wordstats_smooth_factor, 0.05, -"Smoothing gap stats"); -EXTERN double_VAR (textord_width_smooth_factor, 0.10, -"Smoothing width stats"); -EXTERN double_VAR (textord_words_width_ile, 0.4, -"Ile of blob widths for space est"); -EXTERN double_VAR (textord_words_maxspace, 4.0, "Multiple of xheight"); -EXTERN double_VAR (textord_words_default_maxspace, 3.5, -"Max believable third space"); -EXTERN double_VAR (textord_words_default_minspace, 0.6, -"Fraction of xheight"); -EXTERN double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight"); -EXTERN double_VAR (textord_words_default_nonspace, 0.2, -"Fraction of xheight"); +EXTERN BOOL_VAR(textord_show_initial_words, FALSE, "Display separate words"); +EXTERN BOOL_VAR(textord_show_new_words, FALSE, "Display separate words"); +EXTERN BOOL_VAR(textord_show_fixed_words, FALSE, + "Display forced fixed pitch words"); +EXTERN BOOL_VAR(textord_blocksall_fixed, FALSE, "Moan about prop blocks"); +EXTERN BOOL_VAR(textord_blocksall_prop, FALSE, "Moan about fixed pitch blocks"); +EXTERN BOOL_VAR(textord_blocksall_testing, FALSE, "Dump stats when moaning"); +EXTERN BOOL_VAR(textord_test_mode, FALSE, "Do current test"); +EXTERN INT_VAR(textord_dotmatrix_gap, 3, + "Max pixel gap for broken pixed pitch"); +EXTERN INT_VAR(textord_debug_block, 0, "Block to do debug on"); +EXTERN INT_VAR(textord_pitch_range, 2, "Max range test on pitch"); +EXTERN double_VAR(textord_wordstats_smooth_factor, 0.05, "Smoothing gap stats"); +EXTERN double_VAR(textord_width_smooth_factor, 0.10, "Smoothing width stats"); +EXTERN double_VAR(textord_words_width_ile, 0.4, + "Ile of blob widths for space est"); +EXTERN double_VAR(textord_words_maxspace, 4.0, "Multiple of xheight"); +EXTERN double_VAR(textord_words_default_maxspace, 3.5, + "Max believable third space"); +EXTERN double_VAR(textord_words_default_minspace, 0.6, "Fraction of xheight"); +EXTERN double_VAR(textord_words_min_minspace, 0.3, "Fraction of xheight"); +EXTERN double_VAR(textord_words_default_nonspace, 0.2, "Fraction of xheight"); EXTERN double_VAR(textord_words_initial_lower, 0.25, "Max initial cluster size"); -EXTERN double_VAR (textord_words_initial_upper, 0.15, -"Min initial cluster spacing"); -EXTERN double_VAR (textord_words_minlarge, 0.75, -"Fraction of valid gaps needed"); -EXTERN double_VAR (textord_words_pitchsd_threshold, 0.040, -"Pitch sync threshold"); -EXTERN double_VAR (textord_words_def_fixed, 0.016, -"Threshold for definite fixed"); -EXTERN double_VAR (textord_words_def_prop, 0.090, -"Threshold for definite prop"); -EXTERN INT_VAR (textord_words_veto_power, 5, -"Rows required to outvote a veto"); -EXTERN double_VAR (textord_pitch_rowsimilarity, 0.08, -"Fraction of xheight for sameness"); -EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE, -"Scale scores on big words"); +EXTERN double_VAR(textord_words_initial_upper, 0.15, + "Min initial cluster spacing"); +EXTERN double_VAR(textord_words_minlarge, 0.75, + "Fraction of valid gaps needed"); +EXTERN double_VAR(textord_words_pitchsd_threshold, 0.040, + "Pitch sync threshold"); +EXTERN double_VAR(textord_words_def_fixed, 0.016, + "Threshold for definite fixed"); +EXTERN double_VAR(textord_words_def_prop, 0.090, "Threshold for definite prop"); +EXTERN INT_VAR(textord_words_veto_power, 5, "Rows required to outvote a veto"); +EXTERN double_VAR(textord_pitch_rowsimilarity, 0.08, + "Fraction of xheight for sameness"); +EXTERN BOOL_VAR(textord_pitch_scalebigwords, FALSE, + "Scale scores on big words"); EXTERN double_VAR(words_initial_lower, 0.5, "Max initial cluster size"); -EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing"); -EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight"); -EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight"); -EXTERN double_VAR (words_default_fixed_limit, 0.6, "Allowed size variance"); -EXTERN double_VAR (textord_words_definite_spread, 0.30, -"Non-fuzzy spacing region"); -EXTERN double_VAR (textord_spacesize_ratiofp, 2.8, -"Min ratio space/nonspace"); -EXTERN double_VAR (textord_spacesize_ratioprop, 2.0, -"Min ratio space/nonspace"); -EXTERN double_VAR (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold"); -EXTERN double_VAR (textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch"); -EXTERN double_VAR (textord_fp_min_width, 0.5, "Min width of decent blobs"); +EXTERN double_VAR(words_initial_upper, 0.15, "Min initial cluster spacing"); +EXTERN double_VAR(words_default_prop_nonspace, 0.25, "Fraction of xheight"); +EXTERN double_VAR(words_default_fixed_space, 0.75, "Fraction of xheight"); +EXTERN double_VAR(words_default_fixed_limit, 0.6, "Allowed size variance"); +EXTERN double_VAR(textord_words_definite_spread, 0.30, + "Non-fuzzy spacing region"); +EXTERN double_VAR(textord_spacesize_ratiofp, 2.8, "Min ratio space/nonspace"); +EXTERN double_VAR(textord_spacesize_ratioprop, 2.0, "Min ratio space/nonspace"); +EXTERN double_VAR(textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold"); +EXTERN double_VAR(textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch"); +EXTERN double_VAR(textord_fp_min_width, 0.5, "Min width of decent blobs"); diff --git a/src/textord/tovars.h b/src/textord/tovars.h index 46315bb96b..48026981b0 100644 --- a/src/textord/tovars.h +++ b/src/textord/tovars.h @@ -17,73 +17,64 @@ * **********************************************************************/ -#ifndef TOVARS_H -#define TOVARS_H +#ifndef TOVARS_H +#define TOVARS_H -#include "params.h" +#include "params.h" -extern BOOL_VAR_H (textord_show_initial_words, FALSE, -"Display separate words"); -extern BOOL_VAR_H (textord_show_new_words, FALSE, "Display separate words"); -extern BOOL_VAR_H (textord_show_fixed_words, FALSE, -"Display forced fixed pitch words"); -extern BOOL_VAR_H (textord_blocksall_fixed, FALSE, "Moan about prop blocks"); -extern BOOL_VAR_H (textord_blocksall_prop, FALSE, -"Moan about fixed pitch blocks"); -extern BOOL_VAR_H (textord_blocksall_testing, FALSE, -"Dump stats when moaning"); -extern BOOL_VAR_H (textord_test_mode, FALSE, "Do current test"); -extern INT_VAR_H (textord_dotmatrix_gap, 3, -"Max pixel gap for broken pixed pitch"); -extern INT_VAR_H (textord_debug_block, 0, "Block to do debug on"); -extern INT_VAR_H (textord_pitch_range, 2, "Max range test on pitch"); -extern double_VAR_H (textord_wordstats_smooth_factor, 0.05, -"Smoothing gap stats"); -extern double_VAR_H (textord_width_smooth_factor, 0.10, -"Smoothing width stats"); -extern double_VAR_H (textord_words_width_ile, 0.4, -"Ile of blob widths for space est"); -extern double_VAR_H (textord_words_maxspace, 4.0, "Multiple of xheight"); -extern double_VAR_H (textord_words_default_maxspace, 3.5, -"Max believable third space"); -extern double_VAR_H (textord_words_default_minspace, 0.6, -"Fraction of xheight"); -extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight"); -extern double_VAR_H (textord_words_default_nonspace, 0.2, -"Fraction of xheight"); +extern BOOL_VAR_H(textord_show_initial_words, FALSE, "Display separate words"); +extern BOOL_VAR_H(textord_show_new_words, FALSE, "Display separate words"); +extern BOOL_VAR_H(textord_show_fixed_words, FALSE, + "Display forced fixed pitch words"); +extern BOOL_VAR_H(textord_blocksall_fixed, FALSE, "Moan about prop blocks"); +extern BOOL_VAR_H(textord_blocksall_prop, FALSE, + "Moan about fixed pitch blocks"); +extern BOOL_VAR_H(textord_blocksall_testing, FALSE, "Dump stats when moaning"); +extern BOOL_VAR_H(textord_test_mode, FALSE, "Do current test"); +extern INT_VAR_H(textord_dotmatrix_gap, 3, + "Max pixel gap for broken pixed pitch"); +extern INT_VAR_H(textord_debug_block, 0, "Block to do debug on"); +extern INT_VAR_H(textord_pitch_range, 2, "Max range test on pitch"); +extern double_VAR_H(textord_wordstats_smooth_factor, 0.05, + "Smoothing gap stats"); +extern double_VAR_H(textord_width_smooth_factor, 0.10, "Smoothing width stats"); +extern double_VAR_H(textord_words_width_ile, 0.4, + "Ile of blob widths for space est"); +extern double_VAR_H(textord_words_maxspace, 4.0, "Multiple of xheight"); +extern double_VAR_H(textord_words_default_maxspace, 3.5, + "Max believable third space"); +extern double_VAR_H(textord_words_default_minspace, 0.6, "Fraction of xheight"); +extern double_VAR_H(textord_words_min_minspace, 0.3, "Fraction of xheight"); +extern double_VAR_H(textord_words_default_nonspace, 0.2, "Fraction of xheight"); extern double_VAR_H(textord_words_initial_lower, 0.25, "Max initial cluster size"); -extern double_VAR_H (textord_words_initial_upper, 0.15, -"Min initial cluster spacing"); -extern double_VAR_H (textord_words_minlarge, 0.75, -"Fraction of valid gaps needed"); -extern double_VAR_H (textord_words_pitchsd_threshold, 0.025, -"Pitch sync threshold"); -extern double_VAR_H (textord_words_def_fixed, 0.01, -"Threshold for definite fixed"); -extern double_VAR_H (textord_words_def_prop, 0.06, -"Threshold for definite prop"); -extern INT_VAR_H (textord_words_veto_power, 5, -"Rows required to outvote a veto"); -extern double_VAR_H (textord_pitch_rowsimilarity, 0.08, -"Fraction of xheight for sameness"); -extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE, -"Scale scores on big words"); +extern double_VAR_H(textord_words_initial_upper, 0.15, + "Min initial cluster spacing"); +extern double_VAR_H(textord_words_minlarge, 0.75, + "Fraction of valid gaps needed"); +extern double_VAR_H(textord_words_pitchsd_threshold, 0.025, + "Pitch sync threshold"); +extern double_VAR_H(textord_words_def_fixed, 0.01, + "Threshold for definite fixed"); +extern double_VAR_H(textord_words_def_prop, 0.06, + "Threshold for definite prop"); +extern INT_VAR_H(textord_words_veto_power, 5, + "Rows required to outvote a veto"); +extern double_VAR_H(textord_pitch_rowsimilarity, 0.08, + "Fraction of xheight for sameness"); +extern BOOL_VAR_H(textord_pitch_scalebigwords, FALSE, + "Scale scores on big words"); extern double_VAR_H(words_initial_lower, 0.5, "Max initial cluster size"); -extern double_VAR_H (words_initial_upper, 0.15, -"Min initial cluster spacing"); -extern double_VAR_H (words_default_prop_nonspace, 0.25, -"Fraction of xheight"); -extern double_VAR_H (words_default_fixed_space, 0.75, "Fraction of xheight"); -extern double_VAR_H (words_default_fixed_limit, 0.6, "Allowed size variance"); -extern double_VAR_H (textord_words_definite_spread, 0.30, -"Non-fuzzy spacing region"); -extern double_VAR_H (textord_spacesize_ratiofp, 2.8, -"Min ratio space/nonspace"); -extern double_VAR_H (textord_spacesize_ratioprop, 2.0, -"Min ratio space/nonspace"); -extern double_VAR_H (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold"); -extern double_VAR_H (textord_max_pitch_iqr, 0.20, -"Xh fraction noise in pitch"); -extern double_VAR_H (textord_fp_min_width, 0.5, "Min width of decent blobs"); +extern double_VAR_H(words_initial_upper, 0.15, "Min initial cluster spacing"); +extern double_VAR_H(words_default_prop_nonspace, 0.25, "Fraction of xheight"); +extern double_VAR_H(words_default_fixed_space, 0.75, "Fraction of xheight"); +extern double_VAR_H(words_default_fixed_limit, 0.6, "Allowed size variance"); +extern double_VAR_H(textord_words_definite_spread, 0.30, + "Non-fuzzy spacing region"); +extern double_VAR_H(textord_spacesize_ratiofp, 2.8, "Min ratio space/nonspace"); +extern double_VAR_H(textord_spacesize_ratioprop, 2.0, + "Min ratio space/nonspace"); +extern double_VAR_H(textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold"); +extern double_VAR_H(textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch"); +extern double_VAR_H(textord_fp_min_width, 0.5, "Min width of decent blobs"); #endif diff --git a/src/textord/underlin.cpp b/src/textord/underlin.cpp index a442f3e029..90dd0e285a 100644 --- a/src/textord/underlin.cpp +++ b/src/textord/underlin.cpp @@ -18,16 +18,15 @@ **********************************************************************/ #ifdef __UNIX__ -#include +#include #endif -#include "underlin.h" +#include "underlin.h" -#define PROJECTION_MARGIN 10 //arbitrary +#define PROJECTION_MARGIN 10 // arbitrary #define EXTERN -EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore"); -EXTERN BOOL_VAR (textord_restore_underlines, TRUE, -"Chop underlines & put back"); +EXTERN double_VAR(textord_underline_offset, 0.1, "Fraction of x to ignore"); +EXTERN BOOL_VAR(textord_restore_underlines, TRUE, "Chop underlines & put back"); /********************************************************************** * restore_underlined_blobs @@ -35,58 +34,51 @@ EXTERN BOOL_VAR (textord_restore_underlines, TRUE, * Find underlined blobs and put them back in the row. **********************************************************************/ -void restore_underlined_blobs( //get chop points - TO_BLOCK *block //block to do - ) { - int16_t chop_coord; //chop boundary - TBOX blob_box; //of underline - BLOBNBOX *u_line; //underline bit - TO_ROW *row; //best row for blob - ICOORDELT_LIST chop_cells; //blobs to cut out - //real underlines +void restore_underlined_blobs( // get chop points + TO_BLOCK* block // block to do +) { + int16_t chop_coord; // chop boundary + TBOX blob_box; // of underline + BLOBNBOX* u_line; // underline bit + TO_ROW* row; // best row for blob + ICOORDELT_LIST chop_cells; // blobs to cut out + // real underlines BLOBNBOX_LIST residual_underlines; C_OUTLINE_LIST left_coutlines; C_OUTLINE_LIST right_coutlines; ICOORDELT_IT cell_it = &chop_cells; - //under lines + // under lines BLOBNBOX_IT under_it = &block->underlines; BLOBNBOX_IT ru_it = &residual_underlines; - if (block->get_rows()->empty()) - return; // Don't crash if there are no rows. - for (under_it.mark_cycle_pt (); !under_it.cycled_list (); - under_it.forward ()) { - u_line = under_it.extract (); - blob_box = u_line->bounding_box (); - row = most_overlapping_row (block->get_rows (), u_line); - if (row == nullptr) - return; // Don't crash if there is no row. - find_underlined_blobs (u_line, &row->baseline, row->xheight, - row->xheight * textord_underline_offset, - &chop_cells); - cell_it.set_to_list (&chop_cells); - for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); - cell_it.forward ()) { - chop_coord = cell_it.data ()->x (); - if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) { - split_to_blob (u_line, chop_coord, - textord_fp_chop_error + 0.5, - &left_coutlines, - &right_coutlines); + if (block->get_rows()->empty()) return; // Don't crash if there are no rows. + for (under_it.mark_cycle_pt(); !under_it.cycled_list(); under_it.forward()) { + u_line = under_it.extract(); + blob_box = u_line->bounding_box(); + row = most_overlapping_row(block->get_rows(), u_line); + if (row == nullptr) return; // Don't crash if there is no row. + find_underlined_blobs(u_line, &row->baseline, row->xheight, + row->xheight * textord_underline_offset, &chop_cells); + cell_it.set_to_list(&chop_cells); + for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) { + chop_coord = cell_it.data()->x(); + if (cell_it.data()->y() - chop_coord > textord_fp_chop_error + 1) { + split_to_blob(u_line, chop_coord, textord_fp_chop_error + 0.5, + &left_coutlines, &right_coutlines); if (!left_coutlines.empty()) { ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); } - chop_coord = cell_it.data ()->y (); + chop_coord = cell_it.data()->y(); split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines, &right_coutlines); if (!left_coutlines.empty()) { row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines))); } - u_line = nullptr; //no more blobs to add + u_line = nullptr; // no more blobs to add } delete cell_it.extract(); } - if (!right_coutlines.empty ()) { + if (!right_coutlines.empty()) { split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5, &left_coutlines, &right_coutlines); if (!left_coutlines.empty()) @@ -105,108 +97,103 @@ void restore_underlined_blobs( //get chop points } } - /********************************************************************** * most_overlapping_row * * Return the row which most overlaps the blob. **********************************************************************/ -TO_ROW *most_overlapping_row( //find best row - TO_ROW_LIST *rows, //list of rows - BLOBNBOX *blob //blob to place - ) { - int16_t x = (blob->bounding_box ().left () - + blob->bounding_box ().right ()) / 2; - TO_ROW_IT row_it = rows; //row iterator - TO_ROW *row; //current row - TO_ROW *best_row; //output row - float overlap; //of blob & row - float bestover; //best overlap +TO_ROW* most_overlapping_row( // find best row + TO_ROW_LIST* rows, // list of rows + BLOBNBOX* blob // blob to place +) { + int16_t x = (blob->bounding_box().left() + blob->bounding_box().right()) / 2; + TO_ROW_IT row_it = rows; // row iterator + TO_ROW* row; // current row + TO_ROW* best_row; // output row + float overlap; // of blob & row + float bestover; // best overlap best_row = nullptr; - bestover = (float) -INT32_MAX; - if (row_it.empty ()) - return nullptr; - row = row_it.data (); - row_it.mark_cycle_pt (); - while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top () - && !row_it.cycled_list ()) { + bestover = (float)-INT32_MAX; + if (row_it.empty()) return nullptr; + row = row_it.data(); + row_it.mark_cycle_pt(); + while (row->baseline.y(x) + row->descdrop > blob->bounding_box().top() && + !row_it.cycled_list()) { best_row = row; - bestover = - blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop; - row_it.forward (); - row = row_it.data (); + bestover = blob->bounding_box().top() - row->baseline.y(x) + row->descdrop; + row_it.forward(); + row = row_it.data(); } - while (row->baseline.y (x) + row->xheight + row->ascrise - >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) { - overlap = row->baseline.y (x) + row->xheight + row->ascrise; - if (blob->bounding_box ().top () < overlap) - overlap = blob->bounding_box ().top (); - if (blob->bounding_box ().bottom () > - row->baseline.y (x) + row->descdrop) - overlap -= blob->bounding_box ().bottom (); + while (row->baseline.y(x) + row->xheight + row->ascrise >= + blob->bounding_box().bottom() && + !row_it.cycled_list()) { + overlap = row->baseline.y(x) + row->xheight + row->ascrise; + if (blob->bounding_box().top() < overlap) + overlap = blob->bounding_box().top(); + if (blob->bounding_box().bottom() > row->baseline.y(x) + row->descdrop) + overlap -= blob->bounding_box().bottom(); else - overlap -= row->baseline.y (x) + row->descdrop; + overlap -= row->baseline.y(x) + row->descdrop; if (overlap > bestover) { bestover = overlap; best_row = row; } - row_it.forward (); - row = row_it.data (); + row_it.forward(); + row = row_it.data(); } - if (bestover < 0 - && row->baseline.y (x) + row->xheight + row->ascrise - - blob->bounding_box ().bottom () > bestover) + if (bestover < 0 && row->baseline.y(x) + row->xheight + row->ascrise - + blob->bounding_box().bottom() > + bestover) best_row = row; return best_row; } - /********************************************************************** * find_underlined_blobs * * Find the start and end coords of blobs in the underline. **********************************************************************/ -void find_underlined_blobs( //get chop points - BLOBNBOX *u_line, //underlined unit - QSPLINE *baseline, //actual baseline - float xheight, //height of line - float baseline_offset, //amount to shrinke it - ICOORDELT_LIST *chop_cells //places to chop - ) { - int16_t x, y; //sides of blob - ICOORD blob_chop; //sides of blob - TBOX blob_box = u_line->bounding_box (); - //cell iterator +void find_underlined_blobs( // get chop points + BLOBNBOX* u_line, // underlined unit + QSPLINE* baseline, // actual baseline + float xheight, // height of line + float baseline_offset, // amount to shrinke it + ICOORDELT_LIST* chop_cells // places to chop +) { + int16_t x, y; // sides of blob + ICOORD blob_chop; // sides of blob + TBOX blob_box = u_line->bounding_box(); + // cell iterator ICOORDELT_IT cell_it = chop_cells; - STATS upper_proj (blob_box.left (), blob_box.right () + 1); - STATS middle_proj (blob_box.left (), blob_box.right () + 1); - STATS lower_proj (blob_box.left (), blob_box.right () + 1); - C_OUTLINE_IT out_it; //outlines of blob + STATS upper_proj(blob_box.left(), blob_box.right() + 1); + STATS middle_proj(blob_box.left(), blob_box.right() + 1); + STATS lower_proj(blob_box.left(), blob_box.right() + 1); + C_OUTLINE_IT out_it; // outlines of blob - ASSERT_HOST (u_line->cblob () != nullptr); + ASSERT_HOST(u_line->cblob() != nullptr); - out_it.set_to_list (u_line->cblob ()->out_list ()); - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - vertical_cunderline_projection (out_it.data (), - baseline, xheight, baseline_offset, - &lower_proj, &middle_proj, &upper_proj); + out_it.set_to_list(u_line->cblob()->out_list()); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + vertical_cunderline_projection(out_it.data(), baseline, xheight, + baseline_offset, &lower_proj, &middle_proj, + &upper_proj); } - for (x = blob_box.left (); x < blob_box.right (); x++) { - if (middle_proj.pile_count (x) > 0) { - for (y = x + 1; - y < blob_box.right () && middle_proj.pile_count (y) > 0; y++); - blob_chop = ICOORD (x, y); - cell_it.add_after_then_move (new ICOORDELT (blob_chop)); + for (x = blob_box.left(); x < blob_box.right(); x++) { + if (middle_proj.pile_count(x) > 0) { + for (y = x + 1; y < blob_box.right() && middle_proj.pile_count(y) > 0; + y++) + ; + blob_chop = ICOORD(x, y); + cell_it.add_after_then_move(new ICOORDELT(blob_chop)); x = y; } } } - /********************************************************************** * vertical_cunderline_projection * @@ -214,69 +201,60 @@ void find_underlined_blobs( //get chop points * and add to the given STATS. **********************************************************************/ -void vertical_cunderline_projection( //project outlines - C_OUTLINE *outline, //outline to project - QSPLINE *baseline, //actual baseline - float xheight, //height of line - float baseline_offset, //amount to shrinke it - STATS *lower_proj, //below baseline - STATS *middle_proj, //centre region - STATS *upper_proj //top region - ) { - ICOORD pos; //current point - ICOORD step; //edge step - int16_t lower_y, upper_y; //region limits - int32_t length; //of outline - int16_t stepindex; //current step - C_OUTLINE_IT out_it = outline->child (); +void vertical_cunderline_projection( // project outlines + C_OUTLINE* outline, // outline to project + QSPLINE* baseline, // actual baseline + float xheight, // height of line + float baseline_offset, // amount to shrinke it + STATS* lower_proj, // below baseline + STATS* middle_proj, // centre region + STATS* upper_proj // top region +) { + ICOORD pos; // current point + ICOORD step; // edge step + int16_t lower_y, upper_y; // region limits + int32_t length; // of outline + int16_t stepindex; // current step + C_OUTLINE_IT out_it = outline->child(); - pos = outline->start_pos (); - length = outline->pathlength (); + pos = outline->start_pos(); + length = outline->pathlength(); for (stepindex = 0; stepindex < length; stepindex++) { - step = outline->step (stepindex); - if (step.x () > 0) { - lower_y = - (int16_t) floor (baseline->y (pos.x ()) + baseline_offset + 0.5); - upper_y = - (int16_t) floor (baseline->y (pos.x ()) + baseline_offset + - xheight + 0.5); - if (pos.y () >= lower_y) { - lower_proj->add (pos.x (), -lower_y); - if (pos.y () >= upper_y) { - middle_proj->add (pos.x (), lower_y - upper_y); - upper_proj->add (pos.x (), upper_y - pos.y ()); - } - else - middle_proj->add (pos.x (), lower_y - pos.y ()); - } - else - lower_proj->add (pos.x (), -pos.y ()); - } - else if (step.x () < 0) { + step = outline->step(stepindex); + if (step.x() > 0) { + lower_y = (int16_t)floor(baseline->y(pos.x()) + baseline_offset + 0.5); + upper_y = (int16_t)floor(baseline->y(pos.x()) + baseline_offset + + xheight + 0.5); + if (pos.y() >= lower_y) { + lower_proj->add(pos.x(), -lower_y); + if (pos.y() >= upper_y) { + middle_proj->add(pos.x(), lower_y - upper_y); + upper_proj->add(pos.x(), upper_y - pos.y()); + } else + middle_proj->add(pos.x(), lower_y - pos.y()); + } else + lower_proj->add(pos.x(), -pos.y()); + } else if (step.x() < 0) { lower_y = - (int16_t) floor (baseline->y (pos.x () - 1) + baseline_offset + - 0.5); - upper_y = - (int16_t) floor (baseline->y (pos.x () - 1) + baseline_offset + - xheight + 0.5); - if (pos.y () >= lower_y) { - lower_proj->add (pos.x () - 1, lower_y); - if (pos.y () >= upper_y) { - middle_proj->add (pos.x () - 1, upper_y - lower_y); - upper_proj->add (pos.x () - 1, pos.y () - upper_y); - } - else - middle_proj->add (pos.x () - 1, pos.y () - lower_y); - } - else - lower_proj->add (pos.x () - 1, pos.y ()); + (int16_t)floor(baseline->y(pos.x() - 1) + baseline_offset + 0.5); + upper_y = (int16_t)floor(baseline->y(pos.x() - 1) + baseline_offset + + xheight + 0.5); + if (pos.y() >= lower_y) { + lower_proj->add(pos.x() - 1, lower_y); + if (pos.y() >= upper_y) { + middle_proj->add(pos.x() - 1, upper_y - lower_y); + upper_proj->add(pos.x() - 1, pos.y() - upper_y); + } else + middle_proj->add(pos.x() - 1, pos.y() - lower_y); + } else + lower_proj->add(pos.x() - 1, pos.y()); } pos += step; } - for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { - vertical_cunderline_projection (out_it.data (), - baseline, xheight, baseline_offset, - lower_proj, middle_proj, upper_proj); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + vertical_cunderline_projection(out_it.data(), baseline, xheight, + baseline_offset, lower_proj, middle_proj, + upper_proj); } } diff --git a/src/textord/underlin.h b/src/textord/underlin.h index c3b4b47a43..1254d88bc2 100644 --- a/src/textord/underlin.h +++ b/src/textord/underlin.h @@ -17,36 +17,35 @@ * **********************************************************************/ -#ifndef UNDERLIN_H -#define UNDERLIN_H +#ifndef UNDERLIN_H +#define UNDERLIN_H -#include "fpchop.h" +#include "fpchop.h" -extern double_VAR_H (textord_underline_offset, 0.1, -"Fraction of x to ignore"); -extern BOOL_VAR_H (textord_restore_underlines, FALSE, -"Chop underlines & put back"); -void restore_underlined_blobs( //get chop points - TO_BLOCK *block //block to do - ); -TO_ROW *most_overlapping_row( //find best row - TO_ROW_LIST *rows, //list of rows - BLOBNBOX *blob //blob to place - ); -void find_underlined_blobs( //get chop points - BLOBNBOX *u_line, //underlined unit - QSPLINE *baseline, //actual baseline - float xheight, //height of line - float baseline_offset, //amount to shrinke it - ICOORDELT_LIST *chop_cells //places to chop - ); -void vertical_cunderline_projection( //project outlines - C_OUTLINE *outline, //outline to project - QSPLINE *baseline, //actual baseline - float xheight, //height of line - float baseline_offset, //amount to shrinke it - STATS *lower_proj, //below baseline - STATS *middle_proj, //centre region - STATS *upper_proj //top region - ); +extern double_VAR_H(textord_underline_offset, 0.1, "Fraction of x to ignore"); +extern BOOL_VAR_H(textord_restore_underlines, FALSE, + "Chop underlines & put back"); +void restore_underlined_blobs( // get chop points + TO_BLOCK* block // block to do +); +TO_ROW* most_overlapping_row( // find best row + TO_ROW_LIST* rows, // list of rows + BLOBNBOX* blob // blob to place +); +void find_underlined_blobs( // get chop points + BLOBNBOX* u_line, // underlined unit + QSPLINE* baseline, // actual baseline + float xheight, // height of line + float baseline_offset, // amount to shrinke it + ICOORDELT_LIST* chop_cells // places to chop +); +void vertical_cunderline_projection( // project outlines + C_OUTLINE* outline, // outline to project + QSPLINE* baseline, // actual baseline + float xheight, // height of line + float baseline_offset, // amount to shrinke it + STATS* lower_proj, // below baseline + STATS* middle_proj, // centre region + STATS* upper_proj // top region +); #endif diff --git a/src/textord/wordseg.cpp b/src/textord/wordseg.cpp index 0daaa81b5b..5217951eaf 100644 --- a/src/textord/wordseg.cpp +++ b/src/textord/wordseg.cpp @@ -18,20 +18,20 @@ **********************************************************************/ #ifdef __UNIX__ -#include +#include #endif -#include "stderr.h" -#include "blobbox.h" -#include "statistc.h" -#include "drawtord.h" -#include "makerow.h" -#include "pitsync1.h" -#include "tovars.h" -#include "topitch.h" -#include "cjkpitch.h" -#include "textord.h" -#include "fpchop.h" -#include "wordseg.h" +#include "blobbox.h" +#include "cjkpitch.h" +#include "drawtord.h" +#include "fpchop.h" +#include "makerow.h" +#include "pitsync1.h" +#include "statistc.h" +#include "stderr.h" +#include "textord.h" +#include "topitch.h" +#include "tovars.h" +#include "wordseg.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -43,12 +43,10 @@ EXTERN BOOL_VAR(textord_fp_chopping, TRUE, "Do fixed pitch chopping"); EXTERN BOOL_VAR(textord_force_make_prop_words, FALSE, "Force proportional word segmentation on all rows"); -EXTERN BOOL_VAR(textord_chopper_test, FALSE, - "Chopper is being tested."); - -#define FIXED_WIDTH_MULTIPLE 5 -#define BLOCK_STATS_CLUSTERS 10 +EXTERN BOOL_VAR(textord_chopper_test, FALSE, "Chopper is being tested."); +#define FIXED_WIDTH_MULTIPLE 5 +#define BLOCK_STATS_CLUSTERS 10 /** * @name make_single_word @@ -57,7 +55,7 @@ EXTERN BOOL_VAR(textord_chopper_test, FALSE, * pitch detection. */ -void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) { +void make_single_word(bool one_blob, TO_ROW_LIST* rows, ROW_LIST* real_rows) { TO_ROW_IT to_row_it(rows); ROW_IT row_it(real_rows); for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); @@ -68,8 +66,8 @@ void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) { C_BLOB_LIST cblobs; C_BLOB_IT cblob_it(&cblobs); BLOBNBOX_IT box_it(row->blob_list()); - for (;!box_it.empty(); box_it.forward()) { - BLOBNBOX* bblob= box_it.extract(); + for (; !box_it.empty(); box_it.forward()) { + BLOBNBOX* bblob = box_it.extract(); if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) { if (bblob->cblob() != nullptr) { C_OUTLINE_IT cout_it(cblob_it.data()->out_list()); @@ -101,19 +99,19 @@ void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) { * * Arrange the blobs into words. */ -void make_words(tesseract::Textord *textord, +void make_words(tesseract::Textord* textord, ICOORD page_tr, // top right float gradient, // page skew - BLOCK_LIST *blocks, // block list - TO_BLOCK_LIST *port_blocks) { // output list - TO_BLOCK_IT block_it; // iterator - TO_BLOCK *block; // current block + BLOCK_LIST* blocks, // block list + TO_BLOCK_LIST* port_blocks) { // output list + TO_BLOCK_IT block_it; // iterator + TO_BLOCK* block; // current block if (textord->use_cjk_fp_model()) { compute_fixed_pitch_cjk(page_tr, port_blocks); } else { compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f), - !(BOOL8) textord_test_landscape); + !(BOOL8)textord_test_landscape); } textord->to_spacing(page_tr, port_blocks); block_it.set_to_list(port_blocks); @@ -123,7 +121,6 @@ void make_words(tesseract::Textord *textord, } } - /** * @name set_row_spaces * @@ -131,30 +128,27 @@ void make_words(tesseract::Textord *textord, * the blobs can be arranged into words. */ -void set_row_spaces( //find space sizes - TO_BLOCK* block, //block to do - FCOORD rotation, //for drawing - bool testing_on //correct orientation +void set_row_spaces( // find space sizes + TO_BLOCK* block, // block to do + FCOORD rotation, // for drawing + bool testing_on // correct orientation ) { - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); + TO_ROW* row; // current row + TO_ROW_IT row_it = block->get_rows(); - if (row_it.empty ()) - return; //empty block - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); + if (row_it.empty()) return; // empty block + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); if (row->fixed_pitch == 0) { row->min_space = - (int32_t) ceil (row->pr_space - - (row->pr_space - - row->pr_nonsp) * textord_words_definite_spread); + (int32_t)ceil(row->pr_space - (row->pr_space - row->pr_nonsp) * + textord_words_definite_spread); row->max_nonspace = - (int32_t) floor (row->pr_nonsp + - (row->pr_space - - row->pr_nonsp) * textord_words_definite_spread); + (int32_t)floor(row->pr_nonsp + (row->pr_space - row->pr_nonsp) * + textord_words_definite_spread); if (testing_on && textord_show_initial_words) { - tprintf ("Assigning defaults %d non, %d space to row at %g\n", - row->max_nonspace, row->min_space, row->intercept ()); + tprintf("Assigning defaults %d non, %d space to row at %g\n", + row->max_nonspace, row->min_space, row->intercept()); } row->space_threshold = (row->max_nonspace + row->min_space) / 2; row->space_size = row->pr_space; @@ -162,259 +156,245 @@ void set_row_spaces( //find space sizes } #ifndef GRAPHICS_DISABLED if (textord_show_initial_words && testing_on) { - plot_word_decisions (to_win, (int16_t) row->fixed_pitch, row); + plot_word_decisions(to_win, (int16_t)row->fixed_pitch, row); } #endif } } - /** * @name row_words * * Compute the max nonspace and min space for the row. */ -int32_t row_words( //compute space size - TO_BLOCK* block, //block it came from - TO_ROW* row, //row to operate on - int32_t maxwidth, //max expected space size - FCOORD rotation, //for drawing - bool testing_on //for debug +int32_t row_words( // compute space size + TO_BLOCK* block, // block it came from + TO_ROW* row, // row to operate on + int32_t maxwidth, // max expected space size + FCOORD rotation, // for drawing + bool testing_on // for debug ) { - bool testing_row; //contains testpt - bool prev_valid; //if decent size - int32_t prev_x; //end of prev blob - int32_t cluster_count; //no of clusters - int32_t gap_index; //which cluster - int32_t smooth_factor; //for smoothing stats - BLOBNBOX *blob; //current blob - float lower, upper; //clustering parameters - float gaps[3]; //gap clusers + bool testing_row; // contains testpt + bool prev_valid; // if decent size + int32_t prev_x; // end of prev blob + int32_t cluster_count; // no of clusters + int32_t gap_index; // which cluster + int32_t smooth_factor; // for smoothing stats + BLOBNBOX* blob; // current blob + float lower, upper; // clustering parameters + float gaps[3]; // gap clusers ICOORD testpt; - TBOX blob_box; //bounding box - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - STATS gap_stats (0, maxwidth); - STATS cluster_stats[4]; //clusters + TBOX blob_box; // bounding box + // iterator + BLOBNBOX_IT blob_it = row->blob_list(); + STATS gap_stats(0, maxwidth); + STATS cluster_stats[4]; // clusters - testpt = ICOORD (textord_test_x, textord_test_y); + testpt = ICOORD(textord_test_x, textord_test_y); smooth_factor = - (int32_t) (block->xheight * textord_wordstats_smooth_factor + 1.5); + (int32_t)(block->xheight * textord_wordstats_smooth_factor + 1.5); // if (testing_on) // tprintf("Row smooth factor=%d\n",smooth_factor); prev_valid = false; prev_x = -INT32_MAX; testing_row = false; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - if (blob_box.contains (testpt)) - testing_row = true; - gap_stats.add (blob_box.width (), 1); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + if (blob_box.contains(testpt)) testing_row = true; + gap_stats.add(blob_box.width(), 1); } - gap_stats.clear (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - if (prev_valid && blob_box.left () - prev_x < maxwidth) { - gap_stats.add (blob_box.left () - prev_x, 1); + gap_stats.clear(); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + if (!blob->joined_to_prev()) { + blob_box = blob->bounding_box(); + if (prev_valid && blob_box.left() - prev_x < maxwidth) { + gap_stats.add(blob_box.left() - prev_x, 1); } prev_valid = TRUE; - prev_x = blob_box.right (); + prev_x = blob_box.right(); } } - if (gap_stats.get_total () == 0) { - row->min_space = 0; //no evidence + if (gap_stats.get_total() == 0) { + row->min_space = 0; // no evidence row->max_nonspace = 0; return 0; } - gap_stats.smooth (smooth_factor); + gap_stats.smooth(smooth_factor); lower = row->xheight * textord_words_initial_lower; upper = row->xheight * textord_words_initial_upper; - cluster_count = gap_stats.cluster (lower, upper, - textord_spacesize_ratioprop, 3, - cluster_stats); - while (cluster_count < 2 && ceil (lower) < floor (upper)) { - //shrink gap + cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, + 3, cluster_stats); + while (cluster_count < 2 && ceil(lower) < floor(upper)) { + // shrink gap upper = (upper * 3 + lower) / 4; lower = (lower * 3 + upper) / 4; - cluster_count = gap_stats.cluster (lower, upper, - textord_spacesize_ratioprop, 3, - cluster_stats); + cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, + 3, cluster_stats); } if (cluster_count < 2) { - row->min_space = 0; //no evidence + row->min_space = 0; // no evidence row->max_nonspace = 0; return 0; } for (gap_index = 0; gap_index < cluster_count; gap_index++) - gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); - //get medians + gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5); + // get medians if (cluster_count > 2) { if (testing_on && textord_show_initial_words) { - tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n", - row->intercept (), - cluster_stats[1].ile (0.5), - cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5)); + tprintf("Row at %g has 3 sizes of gap:%g,%g,%g\n", row->intercept(), + cluster_stats[1].ile(0.5), cluster_stats[2].ile(0.5), + cluster_stats[3].ile(0.5)); } lower = gaps[0]; if (gaps[1] > lower) { - upper = gaps[1]; //prefer most frequent - if (upper < block->xheight * textord_words_min_minspace - && gaps[2] > gaps[1]) { + upper = gaps[1]; // prefer most frequent + if (upper < block->xheight * textord_words_min_minspace && + gaps[2] > gaps[1]) { upper = gaps[2]; } - } - else if (gaps[2] > lower - && gaps[2] >= block->xheight * textord_words_min_minspace) + } else if (gaps[2] > lower && + gaps[2] >= block->xheight * textord_words_min_minspace) upper = gaps[2]; else if (lower >= block->xheight * textord_words_min_minspace) { - upper = lower; //not nice + upper = lower; // not nice lower = gaps[1]; if (testing_on && textord_show_initial_words) { - tprintf ("Had to switch most common from lower to upper!!\n"); + tprintf("Had to switch most common from lower to upper!!\n"); gap_stats.print(); } - } - else { - row->min_space = 0; //no evidence + } else { + row->min_space = 0; // no evidence row->max_nonspace = 0; return 0; } - } - else { + } else { if (gaps[1] < gaps[0]) { if (testing_on && textord_show_initial_words) { - tprintf ("Had to switch most common from lower to upper!!\n"); + tprintf("Had to switch most common from lower to upper!!\n"); gap_stats.print(); } lower = gaps[1]; upper = gaps[0]; - } - else { + } else { upper = gaps[1]; lower = gaps[0]; } } if (upper < block->xheight * textord_words_min_minspace) { - row->min_space = 0; //no evidence + row->min_space = 0; // no evidence row->max_nonspace = 0; return 0; } - if (upper * 3 < block->min_space * 2 + block->max_nonspace - || lower * 3 > block->min_space * 2 + block->max_nonspace) { + if (upper * 3 < block->min_space * 2 + block->max_nonspace || + lower * 3 > block->min_space * 2 + block->max_nonspace) { if (testing_on && textord_show_initial_words) { - tprintf ("Disagreement between block and row at %g!!\n", - row->intercept ()); - tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper); + tprintf("Disagreement between block and row at %g!!\n", row->intercept()); + tprintf("Lower=%g, upper=%g, Stats:\n", lower, upper); gap_stats.print(); } } row->min_space = - (int32_t) ceil (upper - (upper - lower) * textord_words_definite_spread); + (int32_t)ceil(upper - (upper - lower) * textord_words_definite_spread); row->max_nonspace = - (int32_t) floor (lower + (upper - lower) * textord_words_definite_spread); + (int32_t)floor(lower + (upper - lower) * textord_words_definite_spread); row->space_threshold = (row->max_nonspace + row->min_space) / 2; row->space_size = upper; row->kern_size = lower; if (testing_on && textord_show_initial_words) { if (testing_row) { - tprintf ("GAP STATS\n"); + tprintf("GAP STATS\n"); gap_stats.print(); - tprintf ("SPACE stats\n"); + tprintf("SPACE stats\n"); cluster_stats[2].print_summary(); - tprintf ("NONSPACE stats\n"); + tprintf("NONSPACE stats\n"); cluster_stats[1].print_summary(); } - tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", - row->intercept (), row->min_space, upper, - row->max_nonspace, lower); + tprintf("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", row->intercept(), + row->min_space, upper, row->max_nonspace, lower); } - return cluster_stats[2].get_total (); + return cluster_stats[2].get_total(); } - /** * @name row_words2 * * Compute the max nonspace and min space for the row. */ -int32_t row_words2( //compute space size - TO_BLOCK* block, //block it came from - TO_ROW* row, //row to operate on - int32_t maxwidth, //max expected space size - FCOORD rotation, //for drawing - bool testing_on //for debug +int32_t row_words2( // compute space size + TO_BLOCK* block, // block it came from + TO_ROW* row, // row to operate on + int32_t maxwidth, // max expected space size + FCOORD rotation, // for drawing + bool testing_on // for debug ) { - bool testing_row; //contains testpt - bool prev_valid; //if decent size - bool this_valid; //current blob big enough - int32_t prev_x; //end of prev blob - int32_t min_width; //min interesting width - int32_t valid_count; //good gaps - int32_t total_count; //total gaps - int32_t cluster_count; //no of clusters - int32_t prev_count; //previous cluster_count - int32_t gap_index; //which cluster - int32_t smooth_factor; //for smoothing stats - BLOBNBOX *blob; //current blob - float lower, upper; //clustering parameters + bool testing_row; // contains testpt + bool prev_valid; // if decent size + bool this_valid; // current blob big enough + int32_t prev_x; // end of prev blob + int32_t min_width; // min interesting width + int32_t valid_count; // good gaps + int32_t total_count; // total gaps + int32_t cluster_count; // no of clusters + int32_t prev_count; // previous cluster_count + int32_t gap_index; // which cluster + int32_t smooth_factor; // for smoothing stats + BLOBNBOX* blob; // current blob + float lower, upper; // clustering parameters ICOORD testpt; - TBOX blob_box; //bounding box - //iterator - BLOBNBOX_IT blob_it = row->blob_list (); - STATS gap_stats (0, maxwidth); - //gap sizes + TBOX blob_box; // bounding box + // iterator + BLOBNBOX_IT blob_it = row->blob_list(); + STATS gap_stats(0, maxwidth); + // gap sizes float gaps[BLOCK_STATS_CLUSTERS]; STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; - //clusters + // clusters - testpt = ICOORD (textord_test_x, textord_test_y); + testpt = ICOORD(textord_test_x, textord_test_y); smooth_factor = - (int32_t) (block->xheight * textord_wordstats_smooth_factor + 1.5); + (int32_t)(block->xheight * textord_wordstats_smooth_factor + 1.5); // if (testing_on) // tprintf("Row smooth factor=%d\n",smooth_factor); prev_valid = false; prev_x = -INT16_MAX; testing_row = false; - //min blob size - min_width = (int32_t) block->pr_space; + // min blob size + min_width = (int32_t)block->pr_space; total_count = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - this_valid = blob_box.width () >= min_width; - if (this_valid && prev_valid - && blob_box.left () - prev_x < maxwidth) { - gap_stats.add (blob_box.left () - prev_x, 1); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + if (!blob->joined_to_prev()) { + blob_box = blob->bounding_box(); + this_valid = blob_box.width() >= min_width; + if (this_valid && prev_valid && blob_box.left() - prev_x < maxwidth) { + gap_stats.add(blob_box.left() - prev_x, 1); } - total_count++; //count possibles - prev_x = blob_box.right (); + total_count++; // count possibles + prev_x = blob_box.right(); prev_valid = this_valid; } } - valid_count = gap_stats.get_total (); + valid_count = gap_stats.get_total(); if (valid_count < total_count * textord_words_minlarge) { - gap_stats.clear (); + gap_stats.clear(); prev_x = -INT16_MAX; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - if (!blob->joined_to_prev ()) { - blob_box = blob->bounding_box (); - if (blob_box.left () - prev_x < maxwidth) { - gap_stats.add (blob_box.left () - prev_x, 1); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + if (!blob->joined_to_prev()) { + blob_box = blob->bounding_box(); + if (blob_box.left() - prev_x < maxwidth) { + gap_stats.add(blob_box.left() - prev_x, 1); } - prev_x = blob_box.right (); + prev_x = blob_box.right(); } } } - if (gap_stats.get_total () == 0) { - row->min_space = 0; //no evidence + if (gap_stats.get_total() == 0) { + row->min_space = 0; // no evidence row->max_nonspace = 0; return 0; } @@ -422,97 +402,95 @@ int32_t row_words2( //compute space size cluster_count = 0; lower = block->xheight * words_initial_lower; upper = block->xheight * words_initial_upper; - gap_stats.smooth (smooth_factor); + gap_stats.smooth(smooth_factor); do { prev_count = cluster_count; - cluster_count = gap_stats.cluster (lower, upper, - textord_spacesize_ratioprop, - BLOCK_STATS_CLUSTERS, cluster_stats); - } - while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); + cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, + BLOCK_STATS_CLUSTERS, cluster_stats); + } while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); if (cluster_count < 1) { row->min_space = 0; row->max_nonspace = 0; return 0; } for (gap_index = 0; gap_index < cluster_count; gap_index++) - gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); - //get medians + gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5); + // get medians if (testing_on) { - tprintf ("cluster_count=%d:", cluster_count); + tprintf("cluster_count=%d:", cluster_count); for (gap_index = 0; gap_index < cluster_count; gap_index++) - tprintf (" %g(%d)", gaps[gap_index], - cluster_stats[gap_index + 1].get_total ()); - tprintf ("\n"); + tprintf(" %g(%d)", gaps[gap_index], + cluster_stats[gap_index + 1].get_total()); + tprintf("\n"); } - //Try to find proportional non-space and space for row. - for (gap_index = 0; gap_index < cluster_count - && gaps[gap_index] > block->max_nonspace; gap_index++); + // Try to find proportional non-space and space for row. + for (gap_index = 0; + gap_index < cluster_count && gaps[gap_index] > block->max_nonspace; + gap_index++) + ; if (gap_index < cluster_count) - lower = gaps[gap_index]; //most frequent below + lower = gaps[gap_index]; // most frequent below else { if (testing_on) - tprintf ("No cluster below block threshold!, using default=%g\n", - block->pr_nonsp); + tprintf("No cluster below block threshold!, using default=%g\n", + block->pr_nonsp); lower = block->pr_nonsp; } - for (gap_index = 0; gap_index < cluster_count - && gaps[gap_index] <= block->max_nonspace; gap_index++); + for (gap_index = 0; + gap_index < cluster_count && gaps[gap_index] <= block->max_nonspace; + gap_index++) + ; if (gap_index < cluster_count) - upper = gaps[gap_index]; //most frequent above + upper = gaps[gap_index]; // most frequent above else { if (testing_on) - tprintf ("No cluster above block threshold!, using default=%g\n", - block->pr_space); + tprintf("No cluster above block threshold!, using default=%g\n", + block->pr_space); upper = block->pr_space; } row->min_space = - (int32_t) ceil (upper - (upper - lower) * textord_words_definite_spread); + (int32_t)ceil(upper - (upper - lower) * textord_words_definite_spread); row->max_nonspace = - (int32_t) floor (lower + (upper - lower) * textord_words_definite_spread); + (int32_t)floor(lower + (upper - lower) * textord_words_definite_spread); row->space_threshold = (row->max_nonspace + row->min_space) / 2; row->space_size = upper; row->kern_size = lower; if (testing_on) { if (testing_row) { - tprintf ("GAP STATS\n"); + tprintf("GAP STATS\n"); gap_stats.print(); - tprintf ("SPACE stats\n"); + tprintf("SPACE stats\n"); cluster_stats[2].print_summary(); - tprintf ("NONSPACE stats\n"); + tprintf("NONSPACE stats\n"); cluster_stats[1].print_summary(); } - tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", - row->intercept (), row->min_space, upper, - row->max_nonspace, lower); + tprintf("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", row->intercept(), + row->min_space, upper, row->max_nonspace, lower); } return 1; } - /** * @name make_real_words * * Convert a TO_BLOCK to a BLOCK. */ -void make_real_words( - tesseract::Textord *textord, - TO_BLOCK *block, //block to do - FCOORD rotation //for drawing - ) { - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); - ROW *real_row = nullptr; //output row - ROW_IT real_row_it = block->block->row_list (); - - if (row_it.empty ()) - return; //empty block - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->blob_list ()->empty () && !row->rep_words.empty ()) { - real_row = make_rep_words (row, block); +void make_real_words(tesseract::Textord* textord, + TO_BLOCK* block, // block to do + FCOORD rotation // for drawing +) { + TO_ROW* row; // current row + TO_ROW_IT row_it = block->get_rows(); + ROW* real_row = nullptr; // output row + ROW_IT real_row_it = block->block->row_list(); + + if (row_it.empty()) return; // empty block + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + if (row->blob_list()->empty() && !row->rep_words.empty()) { + real_row = make_rep_words(row, block); } else if (!row->blob_list()->empty()) { // In a fixed pitch document, some lines may be detected as fixed pitch // while others don't, and will go through different path. @@ -521,31 +499,30 @@ void make_real_words( // with force_make_prop_words flag. POLY_BLOCK* pb = block->block->pdblk.poly_block(); if (textord_chopper_test) { - real_row = textord->make_blob_words (row, rotation); + real_row = textord->make_blob_words(row, rotation); } else if (textord_force_make_prop_words || (pb != nullptr && !pb->IsText()) || row->pitch_decision == PITCH_DEF_PROP || row->pitch_decision == PITCH_CORR_PROP) { - real_row = textord->make_prop_words (row, rotation); + real_row = textord->make_prop_words(row, rotation); } else if (row->pitch_decision == PITCH_DEF_FIXED || row->pitch_decision == PITCH_CORR_FIXED) { - real_row = fixed_pitch_words (row, rotation); + real_row = fixed_pitch_words(row, rotation); } else { ASSERT_HOST(FALSE); } } if (real_row != nullptr) { - //put row in block - real_row_it.add_after_then_move (real_row); + // put row in block + real_row_it.add_after_then_move(real_row); } } - block->block->set_stats (block->fixed_pitch == 0, (int16_t) block->kern_size, - (int16_t) block->space_size, - (int16_t) block->fixed_pitch); - block->block->check_pitch (); + block->block->set_stats(block->fixed_pitch == 0, (int16_t)block->kern_size, + (int16_t)block->space_size, + (int16_t)block->fixed_pitch); + block->block->check_pitch(); } - /** * @name make_rep_words * @@ -553,31 +530,29 @@ void make_real_words( * Get the xheight from the block as it may be more meaningful. */ -ROW *make_rep_words( //make a row - TO_ROW *row, //row to convert - TO_BLOCK *block //block it lives in - ) { - ROW *real_row; //output row - TBOX word_box; //bounding box - //iterator +ROW* make_rep_words( // make a row + TO_ROW* row, // row to convert + TO_BLOCK* block // block it lives in +) { + ROW* real_row; // output row + TBOX word_box; // bounding box + // iterator WERD_IT word_it = &row->rep_words; - if (word_it.empty ()) - return nullptr; - word_box = word_it.data ()->bounding_box (); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) - word_box += word_it.data ()->bounding_box (); + if (word_it.empty()) return nullptr; + word_box = word_it.data()->bounding_box(); + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) + word_box += word_it.data()->bounding_box(); row->xheight = block->xheight; - real_row = new ROW(row, - (int16_t) block->kern_size, (int16_t) block->space_size); - word_it.set_to_list (real_row->word_list ()); - //put words in row - word_it.add_list_after (&row->rep_words); - real_row->recalc_bounding_box (); + real_row = + new ROW(row, (int16_t)block->kern_size, (int16_t)block->space_size); + word_it.set_to_list(real_row->word_list()); + // put words in row + word_it.add_list_after(&row->rep_words); + real_row->recalc_bounding_box(); return real_row; } - /** * @name make_real_word * @@ -585,17 +560,17 @@ ROW *make_rep_words( //make a row * list of BLOBNBOXs. */ -WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator - int32_t blobcount, //no of blobs to use - bool bol, //start of line - uint8_t blanks //no of blanks - ) { +WERD* make_real_word(BLOBNBOX_IT* box_it, // iterator + int32_t blobcount, // no of blobs to use + bool bol, // start of line + uint8_t blanks // no of blanks +) { C_OUTLINE_IT cout_it; C_BLOB_LIST cblobs; C_BLOB_IT cblob_it = &cblobs; - WERD *word; // new word - BLOBNBOX *bblob; // current blob - int32_t blobindex; // in row + WERD* word; // new word + BLOBNBOX* bblob; // current blob + int32_t blobindex; // in row for (blobindex = 0; blobindex < blobcount; blobindex++) { bblob = box_it->extract(); @@ -606,24 +581,20 @@ WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator cout_it.add_list_after(bblob->cblob()->out_list()); delete bblob->cblob(); } - } - else { + } else { if (bblob->cblob() != nullptr) cblob_it.add_after_then_move(bblob->cblob()); } delete bblob; - box_it->forward(); // next one + box_it->forward(); // next one } - if (blanks < 1) - blanks = 1; + if (blanks < 1) blanks = 1; word = new WERD(&cblobs, blanks, nullptr); - if (bol) - word->set_flag(W_BOL, true); - if (box_it->at_first()) - word->set_flag(W_EOL, true); // at end of line + if (bol) word->set_flag(W_BOL, true); + if (box_it->at_first()) word->set_flag(W_EOL, true); // at end of line return word; } diff --git a/src/textord/wordseg.h b/src/textord/wordseg.h index 3acc8c9447..17fefd4d2b 100644 --- a/src/textord/wordseg.h +++ b/src/textord/wordseg.h @@ -17,61 +17,59 @@ * **********************************************************************/ -#ifndef WORDSEG_H -#define WORDSEG_H +#ifndef WORDSEG_H +#define WORDSEG_H -#include "params.h" -#include "blobbox.h" -#include "textord.h" +#include "blobbox.h" +#include "params.h" +#include "textord.h" namespace tesseract { class Tesseract; } -extern BOOL_VAR_H (textord_fp_chopping, TRUE, "Do fixed pitch chopping"); +extern BOOL_VAR_H(textord_fp_chopping, TRUE, "Do fixed pitch chopping"); extern BOOL_VAR_H(textord_force_make_prop_words, FALSE, "Force proportional word segmentation on all rows"); -extern BOOL_VAR_H (textord_chopper_test, FALSE, - "Chopper is being tested."); +extern BOOL_VAR_H(textord_chopper_test, FALSE, "Chopper is being tested."); -void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows); -void make_words(tesseract::Textord *textord, - ICOORD page_tr, // top right +void make_single_word(bool one_blob, TO_ROW_LIST* rows, ROW_LIST* real_rows); +void make_words(tesseract::Textord* textord, + ICOORD page_tr, // top right float gradient, // page skew - BLOCK_LIST *blocks, // block list - TO_BLOCK_LIST *port_blocks); // output list -void set_row_spaces( //find space sizes - TO_BLOCK* block, //block to do - FCOORD rotation, //for drawing - bool testing_on //correct orientation + BLOCK_LIST* blocks, // block list + TO_BLOCK_LIST* port_blocks); // output list +void set_row_spaces( // find space sizes + TO_BLOCK* block, // block to do + FCOORD rotation, // for drawing + bool testing_on // correct orientation ); -int32_t row_words( //compute space size - TO_BLOCK* block, //block it came from - TO_ROW* row, //row to operate on - int32_t maxwidth, //max expected space size - FCOORD rotation, //for drawing - bool testing_on //for debug +int32_t row_words( // compute space size + TO_BLOCK* block, // block it came from + TO_ROW* row, // row to operate on + int32_t maxwidth, // max expected space size + FCOORD rotation, // for drawing + bool testing_on // for debug ); -int32_t row_words2( //compute space size - TO_BLOCK* block, //block it came from - TO_ROW* row, //row to operate on - int32_t maxwidth, //max expected space size - FCOORD rotation, //for drawing - bool testing_on //for debug +int32_t row_words2( // compute space size + TO_BLOCK* block, // block it came from + TO_ROW* row, // row to operate on + int32_t maxwidth, // max expected space size + FCOORD rotation, // for drawing + bool testing_on // for debug ); -void make_real_words( - tesseract::Textord *textord, - TO_BLOCK *block, //block to do - FCOORD rotation //for drawing - ); -ROW *make_rep_words( //make a row - TO_ROW *row, //row to convert - TO_BLOCK *block //block it lives in - ); -WERD *make_real_word( //make a WERD - BLOBNBOX_IT* box_it, //iterator - int32_t blobcount, //no of blobs to use - bool bol, //start of line - uint8_t blanks //no of blanks +void make_real_words(tesseract::Textord* textord, + TO_BLOCK* block, // block to do + FCOORD rotation // for drawing +); +ROW* make_rep_words( // make a row + TO_ROW* row, // row to convert + TO_BLOCK* block // block it lives in +); +WERD* make_real_word( // make a WERD + BLOBNBOX_IT* box_it, // iterator + int32_t blobcount, // no of blobs to use + bool bol, // start of line + uint8_t blanks // no of blanks ); #endif diff --git a/src/textord/workingpartset.cpp b/src/textord/workingpartset.cpp index 97ce70aed4..d952c2efb7 100644 --- a/src/textord/workingpartset.cpp +++ b/src/textord/workingpartset.cpp @@ -38,9 +38,9 @@ void WorkingPartSet::AddPartition(ColPartition* part) { part_it_.move_to_last(); } else if (latest_part_->SingletonPartner(false) != part) { // Reposition the iterator to the correct partner, or at the end. - for (part_it_.move_to_first(); !part_it_.at_last() && - part_it_.data() != partner; - part_it_.forward()); + for (part_it_.move_to_first(); + !part_it_.at_last() && part_it_.data() != partner; part_it_.forward()) + ; } part_it_.add_after_then_move(part); latest_part_ = part; @@ -121,8 +121,8 @@ void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright, } } while (!part_it_.empty() && next_part != nullptr); if (!text_block) { - TO_BLOCK* to_block = ColPartition::MakeBlock(bleft, tright, - &block_parts, used_parts); + TO_BLOCK* to_block = + ColPartition::MakeBlock(bleft, tright, &block_parts, used_parts); if (to_block != nullptr) { TO_BLOCK_IT to_block_it(&to_blocks_); to_block_it.add_to_end(to_block); @@ -132,8 +132,8 @@ void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright, } else { // Further sub-divide text blocks where linespacing changes. ColPartition::LineSpacingBlocks(bleft, tright, resolution, &block_parts, - used_parts, - &completed_blocks_, &to_blocks_); + used_parts, &completed_blocks_, + &to_blocks_); } } part_it_.set_to_list(&part_set_); diff --git a/src/textord/workingpartset.h b/src/textord/workingpartset.h index 6fb342aac4..c5668a2ccf 100644 --- a/src/textord/workingpartset.h +++ b/src/textord/workingpartset.h @@ -32,16 +32,11 @@ namespace tesseract { class WorkingPartSet : public ELIST_LINK { public: explicit WorkingPartSet(ColPartition* column) - : column_(column), latest_part_(nullptr), part_it_(&part_set_) { - } + : column_(column), latest_part_(nullptr), part_it_(&part_set_) {} // Simple accessors. - ColPartition* column() const { - return column_; - } - void set_column(ColPartition* col) { - column_ = col; - } + ColPartition* column() const { return column_; } + void set_column(ColPartition* col) { column_ = col; } // Add the partition to this WorkingPartSet. Partitions are generally // stored in the order in which they are received, but if the partition diff --git a/src/training/ambiguous_words.cpp b/src/training/ambiguous_words.cpp index 9cf792466e..3b6151b938 100644 --- a/src/training/ambiguous_words.cpp +++ b/src/training/ambiguous_words.cpp @@ -22,10 +22,10 @@ // #include "baseapi.h" -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion +#include "dict.h" #include "helpers.h" #include "strngs.h" -#include "dict.h" #include "tesseractclass.h" int main(int argc, char** argv) { @@ -36,8 +36,10 @@ int main(int argc, char** argv) { printf("%s\n", tesseract::TessBaseAPI::Version()); return 0; } else if (argc != 4 && (argc != 6 || strcmp(argv[1], "-l") != 0)) { - printf("Usage: %s -v | --version | %s [-l lang] tessdata_dir wordlist_file" - " output_ambiguous_wordlist_file\n", argv[0], argv[0]); + printf( + "Usage: %s -v | --version | %s [-l lang] tessdata_dir wordlist_file" + " output_ambiguous_wordlist_file\n", + argv[0], argv[0]); return 1; } int argv_offset = 0; @@ -48,9 +50,9 @@ int main(int argc, char** argv) { } else { lang = "eng"; } - const char *tessdata_dir = argv[++argv_offset]; - const char *input_file_str = argv[++argv_offset]; - const char *output_file_str = argv[++argv_offset]; + const char* tessdata_dir = argv[++argv_offset]; + const char* input_file_str = argv[++argv_offset]; + const char* output_file_str = argv[++argv_offset]; // Initialize Tesseract. tesseract::TessBaseAPI api; @@ -60,8 +62,8 @@ int main(int argc, char** argv) { vars_values.push_back(output_file_str); api.Init(tessdata_dir, lang.string(), tesseract::OEM_TESSERACT_ONLY, nullptr, 0, &vars_vec, &vars_values, false); - tesseract::Dict &dict = api.tesseract()->getDict(); - FILE *input_file = fopen(input_file_str, "rb"); + tesseract::Dict& dict = api.tesseract()->getDict(); + FILE* input_file = fopen(input_file_str, "rb"); if (input_file == nullptr) { tprintf("Failed to open input wordlist file %s\n", input_file_str); exit(1); diff --git a/src/training/boxchar.cpp b/src/training/boxchar.cpp index 62f23ef38b..7f9c103b18 100644 --- a/src/training/boxchar.cpp +++ b/src/training/boxchar.cpp @@ -22,8 +22,8 @@ #include "boxchar.h" -#include #include +#include #include #include "fileio.h" diff --git a/src/training/boxchar.h b/src/training/boxchar.h index 6d9b59a894..9f559fab51 100644 --- a/src/training/boxchar.h +++ b/src/training/boxchar.h @@ -41,8 +41,8 @@ class BoxChar { // Accessors. const std::string& ch() const { return ch_; } - const Box* box() const { return box_; } - const int& page() const { return page_; } + const Box* box() const { return box_; } + const int& page() const { return page_; } void set_rtl_index(int index) { rtl_index_ = index; } const int& rtl_index() const { return rtl_index_; } @@ -52,7 +52,7 @@ class BoxChar { void set_page(int page) { page_ = page; } std::string* mutable_ch() { return &ch_; } - Box* mutable_box() { return box_; } + Box* mutable_box() { return box_; } // Sort function for sorting by left edge of box. Note that this will not // work properly until after InsertNewlines and InsertSpaces. @@ -93,11 +93,8 @@ class BoxChar { // Rotate the vector of boxes between start and end by the given rotation. // The rotation is in radians clockwise about the given center. - static void RotateBoxes(float rotation, - int xcenter, - int ycenter, - int start_box, - int end_box, + static void RotateBoxes(float rotation, int xcenter, int ycenter, + int start_box, int end_box, std::vector* boxes); // Create a tesseract box file from the vector of boxes. The image height diff --git a/src/training/classifier_tester.cpp b/src/training/classifier_tester.cpp index c495f1239f..ed117aa0f0 100644 --- a/src/training/classifier_tester.cpp +++ b/src/training/classifier_tester.cpp @@ -34,18 +34,13 @@ STRING_PARAM_FLAG(lang, "eng", "Language to test"); STRING_PARAM_FLAG(tessdata_dir, "", "Directory of traineddata files"); DECLARE_INT_PARAM_FLAG(debug_level); -enum ClassifierName { - CN_PRUNER, - CN_FULL, - CN_COUNT -}; +enum ClassifierName { CN_PRUNER, CN_FULL, CN_COUNT }; static const char* names[] = {"pruner", "full"}; static tesseract::ShapeClassifier* InitializeClassifier( - const char* classifer_name, const UNICHARSET& unicharset, - int argc, char **argv, - tesseract::TessBaseAPI** api) { + const char* classifer_name, const UNICHARSET& unicharset, int argc, + char** argv, tesseract::TessBaseAPI** api) { // Decode the classifier string. ClassifierName classifier = CN_COUNT; for (int c = 0; c < CN_COUNT; ++c) { @@ -64,10 +59,9 @@ static tesseract::ShapeClassifier* InitializeClassifier( tesseract::OcrEngineMode engine_mode = tesseract::OEM_TESSERACT_ONLY; tesseract::Tesseract* tesseract = nullptr; tesseract::Classify* classify = nullptr; - if ( - classifier == CN_PRUNER || classifier == CN_FULL) { + if (classifier == CN_PRUNER || classifier == CN_FULL) { if ((*api)->Init(FLAGS_tessdata_dir.c_str(), FLAGS_lang.c_str(), - engine_mode) < 0) { + engine_mode) < 0) { fprintf(stderr, "Tesseract initialization failed!\n"); return nullptr; } @@ -108,7 +102,7 @@ static tesseract::ShapeClassifier* InitializeClassifier( // pruner : Tesseract class pruner only. // full : Tesseract full classifier. // with an input trainer.) -int main(int argc, char **argv) { +int main(int argc, char** argv) { tesseract::CheckSharedLibraryVersion(); ParseArguments(&argc, &argv); STRING file_prefix; @@ -128,9 +122,10 @@ int main(int argc, char **argv) { // We want to test with replicated samples too. trainer->ReplicateAndRandomizeSamplesIfRequired(); - trainer->TestClassifierOnSamples(tesseract::CT_UNICHAR_TOP1_ERR, - std::max(3, static_cast(FLAGS_debug_level)), false, - shape_classifier, nullptr); + trainer->TestClassifierOnSamples( + tesseract::CT_UNICHAR_TOP1_ERR, + std::max(3, static_cast(FLAGS_debug_level)), false, shape_classifier, + nullptr); delete shape_classifier; delete api; delete trainer; diff --git a/src/training/cntraining.cpp b/src/training/cntraining.cpp index e3949edfbe..3a239de2ce 100644 --- a/src/training/cntraining.cpp +++ b/src/training/cntraining.cpp @@ -8,34 +8,34 @@ ** Tuesday, May 17, 1998 Changes made to make feature specific and ** simplify structures. First step in simplifying training process. ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. +** (c) Copyright Hewlett-Packard Company, 1988. +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** http://www.apache.org/licenses/LICENSE-2.0 +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. ******************************************************************************/ /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ -#include "oldlist.h" +#include +#include +#include +#include "cluster.h" +#include "clusttool.h" +#include "commontraining.h" #include "efio.h" #include "emalloc.h" #include "featdefs.h" -#include "tessopt.h" #include "ocrfeatures.h" -#include "clusttool.h" -#include "cluster.h" -#include -#include -#include +#include "oldlist.h" +#include "tessopt.h" #include "unichar.h" -#include "commontraining.h" #define PROGRAM_FEATURE_TYPE "cn" @@ -45,8 +45,8 @@ DECLARE_STRING_PARAM_FLAG(D); Private Function Prototypes ----------------------------------------------------------------------------*/ -void WriteNormProtos(const char *Directory, LIST LabeledProtoList, - const FEATURE_DESC_STRUCT *feature_desc); +void WriteNormProtos(const char* Directory, LIST LabeledProtoList, + const FEATURE_DESC_STRUCT* feature_desc); /* PARAMDESC *ConvertToPARAMDESC( @@ -54,22 +54,15 @@ PARAMDESC *ConvertToPARAMDESC( int N); */ -void WriteProtos( - FILE *File, - uint16_t N, - LIST ProtoList, - BOOL8 WriteSigProtos, - BOOL8 WriteInsigProtos); +void WriteProtos(FILE* File, uint16_t N, LIST ProtoList, BOOL8 WriteSigProtos, + BOOL8 WriteInsigProtos); /*---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------*/ /* global variable to hold configuration parameters to control clustering */ //-M 0.025 -B 0.05 -I 0.8 -C 1e-3 -CLUSTERCONFIG CNConfig = -{ - elliptical, 0.025, 0.05, 0.8, 1e-3, 0 -}; +CLUSTERCONFIG CNConfig = {elliptical, 0.025, 0.05, 0.8, 1e-3, 0}; /*---------------------------------------------------------------------------- Public Code @@ -123,18 +116,18 @@ CLUSTERCONFIG CNConfig = * @note Exceptions: none * @note History: Fri Aug 18 08:56:17 1989, DSJ, Created. */ -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { tesseract::CheckSharedLibraryVersion(); // Set the global Config parameters before parsing the command line. Config = CNConfig; - const char *PageName; - FILE *TrainingPage; - LIST CharList = NIL_LIST; - CLUSTERER *Clusterer = nullptr; - LIST ProtoList = NIL_LIST; - LIST NormProtoList = NIL_LIST; + const char* PageName; + FILE* TrainingPage; + LIST CharList = NIL_LIST; + CLUSTERER* Clusterer = nullptr; + LIST ProtoList = NIL_LIST; + LIST NormProtoList = NIL_LIST; LIST pCharList; LABELEDLIST CharSample; FEATURE_DEFS_STRUCT FeatureDefs; @@ -159,10 +152,10 @@ int main(int argc, char *argv[]) { // freeable_protos, so they can be freed later. GenericVector freeable_protos; iterate(pCharList) { - //Cluster + // Cluster CharSample = (LABELEDLIST)first_node(pCharList); Clusterer = - SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); + SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); if (Clusterer == nullptr) { // To avoid a SIGSEGV fprintf(stderr, "Error: nullptr clusterer!\n"); return 1; @@ -178,9 +171,10 @@ int main(int argc, char *argv[]) { break; } else { Config.MinSamples *= 0.95; - printf("0 significant protos for %s." - " Retrying clustering with MinSamples = %f%%\n", - CharSample->Label, Config.MinSamples); + printf( + "0 significant protos for %s." + " Retrying clustering with MinSamples = %f%%\n", + CharSample->Label, Config.MinSamples); } } Config.MinSamples = SavedMinSamples; @@ -196,7 +190,7 @@ int main(int argc, char *argv[]) { for (int i = 0; i < freeable_protos.size(); ++i) { FreeProtoList(&freeable_protos[i]); } - printf ("\n"); + printf("\n"); return 0; } // main @@ -206,19 +200,19 @@ int main(int argc, char *argv[]) { /*----------------------------------------------------------------------------*/ /** -* This routine writes the specified samples into files which -* are organized according to the font name and character name -* of the samples. -* @param Directory directory to place sample files into -* @param LabeledProtoList List of labeled protos -* @param feature_desc Description of the features -* @return none -* @note Exceptions: none -* @note History: Fri Aug 18 16:17:06 1989, DSJ, Created. -*/ -void WriteNormProtos(const char *Directory, LIST LabeledProtoList, - const FEATURE_DESC_STRUCT *feature_desc) { - FILE *File; + * This routine writes the specified samples into files which + * are organized according to the font name and character name + * of the samples. + * @param Directory directory to place sample files into + * @param LabeledProtoList List of labeled protos + * @param feature_desc Description of the features + * @return none + * @note Exceptions: none + * @note History: Fri Aug 18 16:17:06 1989, DSJ, Created. + */ +void WriteNormProtos(const char* Directory, LIST LabeledProtoList, + const FEATURE_DESC_STRUCT* feature_desc) { + FILE* File; STRING Filename; LABELEDLIST LabeledProto; int N; @@ -229,46 +223,39 @@ void WriteNormProtos(const char *Directory, LIST LabeledProtoList, Filename += "/"; } Filename += "normproto"; - printf ("\nWriting %s ...", Filename.string()); - File = Efopen (Filename.string(), "wb"); + printf("\nWriting %s ...", Filename.string()); + File = Efopen(Filename.string(), "wb"); fprintf(File, "%0d\n", feature_desc->NumParams); WriteParamDesc(File, feature_desc->NumParams, feature_desc->ParamDesc); - iterate(LabeledProtoList) - { - LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); + iterate(LabeledProtoList) { + LabeledProto = (LABELEDLIST)first_node(LabeledProtoList); N = NumberOfProtos(LabeledProto->List, true, false); if (N < 1) { - printf ("\nError! Not enough protos for %s: %d protos" - " (%d significant protos" - ", %d insignificant protos)\n", - LabeledProto->Label, N, - NumberOfProtos(LabeledProto->List, 1, 0), - NumberOfProtos(LabeledProto->List, 0, 1)); + printf( + "\nError! Not enough protos for %s: %d protos" + " (%d significant protos" + ", %d insignificant protos)\n", + LabeledProto->Label, N, NumberOfProtos(LabeledProto->List, 1, 0), + NumberOfProtos(LabeledProto->List, 0, 1)); exit(1); } fprintf(File, "\n%s %d\n", LabeledProto->Label, N); WriteProtos(File, feature_desc->NumParams, LabeledProto->List, true, false); } - fclose (File); + fclose(File); } // WriteNormProtos /*-------------------------------------------------------------------------*/ -void WriteProtos( - FILE *File, - uint16_t N, - LIST ProtoList, - BOOL8 WriteSigProtos, - BOOL8 WriteInsigProtos) -{ - PROTOTYPE *Proto; +void WriteProtos(FILE* File, uint16_t N, LIST ProtoList, BOOL8 WriteSigProtos, + BOOL8 WriteInsigProtos) { + PROTOTYPE* Proto; // write prototypes - iterate(ProtoList) - { - Proto = (PROTOTYPE *) first_node ( ProtoList ); - if (( Proto->Significant && WriteSigProtos ) || - ( ! Proto->Significant && WriteInsigProtos ) ) - WritePrototype( File, N, Proto ); + iterate(ProtoList) { + Proto = (PROTOTYPE*)first_node(ProtoList); + if ((Proto->Significant && WriteSigProtos) || + (!Proto->Significant && WriteInsigProtos)) + WritePrototype(File, N, Proto); } } // WriteProtos diff --git a/src/training/combine_lang_model.cpp b/src/training/combine_lang_model.cpp index c1ce1cf5dc..cb12d77eca 100644 --- a/src/training/combine_lang_model.cpp +++ b/src/training/combine_lang_model.cpp @@ -17,7 +17,7 @@ // limitations under the License. #include "commandlineflags.h" -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion #include "lang_model_helpers.h" #include "tprintf.h" #include "unicharset_training_utils.h" @@ -85,6 +85,7 @@ int main(int argc, char** argv) { return tesseract::CombineLangModel( unicharset, FLAGS_script_dir.c_str(), FLAGS_version_str.c_str(), FLAGS_output_dir.c_str(), FLAGS_lang.c_str(), FLAGS_pass_through_recoder, - words, puncs, numbers, FLAGS_lang_is_rtl, /*reader*/ nullptr, + words, puncs, numbers, FLAGS_lang_is_rtl, + /*reader*/ nullptr, /*writer*/ nullptr); } diff --git a/src/training/combine_tessdata.cpp b/src/training/combine_tessdata.cpp index 3eb8d8541b..13bf42ac86 100644 --- a/src/training/combine_tessdata.cpp +++ b/src/training/combine_tessdata.cpp @@ -18,7 +18,7 @@ // /////////////////////////////////////////////////////////////////////// -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion #include "lstmrecognizer.h" #include "tessdatamanager.h" @@ -65,7 +65,7 @@ // This will create /home/$USER/temp/eng.* files with individual tessdata // components from tessdata/eng.traineddata. // -int main(int argc, char **argv) { +int main(int argc, char** argv) { tesseract::CheckSharedLibraryVersion(); int i; @@ -76,19 +76,17 @@ int main(int argc, char **argv) { } else if (argc == 2) { printf("Combining tessdata files\n"); STRING lang = argv[1]; - char* last = &argv[1][strlen(argv[1])-1]; - if (*last != '.') - lang += '.'; + char* last = &argv[1][strlen(argv[1]) - 1]; + if (*last != '.') lang += '.'; STRING output_file = lang; output_file += kTrainedDataSuffix; if (!tm.CombineDataFiles(lang.string(), output_file.string())) { - printf("Error combining tessdata files into %s\n", - output_file.string()); + printf("Error combining tessdata files into %s\n", output_file.string()); } else { printf("Output %s created successfully.\n", output_file.string()); } - } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 || - strcmp(argv[1], "-u") == 0)) { + } else if (argc >= 4 && + (strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) { // Initialize TessdataManager with the data in the given traineddata file. if (!tm.Init(argv[2])) { tprintf("Failed to read %s\n", argv[2]); @@ -100,16 +98,17 @@ int main(int argc, char **argv) { if (tm.ExtractToFile(argv[i])) { printf("Wrote %s\n", argv[i]); } else { - printf("Not extracting %s, since this component" - " is not present\n", argv[i]); + printf( + "Not extracting %s, since this component" + " is not present\n", + argv[i]); } } } else { // extract all the components for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) { STRING filename = argv[3]; - char* last = &argv[3][strlen(argv[3])-1]; - if (*last != '.') - filename += '.'; + char* last = &argv[3][strlen(argv[3]) - 1]; + if (*last != '.') filename += '.'; filename += tesseract::kTessdataFileSuffixes[i]; if (tm.ExtractToFile(filename.string())) { printf("Wrote %s\n", filename.string()); @@ -118,7 +117,7 @@ int main(int argc, char **argv) { } } else if (argc >= 4 && strcmp(argv[1], "-o") == 0) { // Rename the current traineddata file to a temporary name. - const char *new_traineddata_filename = argv[2]; + const char* new_traineddata_filename = argv[2]; STRING traineddata_filename = new_traineddata_filename; traineddata_filename += ".__tmp__"; if (rename(new_traineddata_filename, traineddata_filename.string()) != 0) { @@ -131,7 +130,7 @@ int main(int argc, char **argv) { tm.Init(traineddata_filename.string()); // Write the updated traineddata file. - tm.OverwriteComponents(new_traineddata_filename, argv+3, argc-3); + tm.OverwriteComponents(new_traineddata_filename, argv + 3, argc - 3); } else if (argc == 3 && strcmp(argv[1], "-c") == 0) { if (!tm.Init(argv[2])) { tprintf("Failed to read %s\n", argv[2]); @@ -161,20 +160,26 @@ int main(int argc, char **argv) { // Initialize TessdataManager with the data in the given traineddata file. tm.Init(argv[2]); } else { - printf("Usage for combining tessdata components:\n" - " %s language_data_path_prefix\n" - " (e.g. %s tessdata/eng.)\n\n", argv[0], argv[0]); - printf("Usage for extracting tessdata components:\n" - " %s -e traineddata_file [output_component_file...]\n" - " (e.g. %s -e eng.traineddata eng.unicharset)\n\n", - argv[0], argv[0]); - printf("Usage for overwriting tessdata components:\n" - " %s -o traineddata_file [input_component_file...]\n" - " (e.g. %s -o eng.traineddata eng.unicharset)\n\n", - argv[0], argv[0]); - printf("Usage for unpacking all tessdata components:\n" - " %s -u traineddata_file output_path_prefix\n" - " (e.g. %s -u eng.traineddata tmp/eng.)\n", argv[0], argv[0]); + printf( + "Usage for combining tessdata components:\n" + " %s language_data_path_prefix\n" + " (e.g. %s tessdata/eng.)\n\n", + argv[0], argv[0]); + printf( + "Usage for extracting tessdata components:\n" + " %s -e traineddata_file [output_component_file...]\n" + " (e.g. %s -e eng.traineddata eng.unicharset)\n\n", + argv[0], argv[0]); + printf( + "Usage for overwriting tessdata components:\n" + " %s -o traineddata_file [input_component_file...]\n" + " (e.g. %s -o eng.traineddata eng.unicharset)\n\n", + argv[0], argv[0]); + printf( + "Usage for unpacking all tessdata components:\n" + " %s -u traineddata_file output_path_prefix\n" + " (e.g. %s -u eng.traineddata tmp/eng.)\n", + argv[0], argv[0]); printf( "Usage for listing directory of components:\n" " %s -d traineddata_file\n", diff --git a/src/training/commandlineflags.cpp b/src/training/commandlineflags.cpp index b857232169..a1268a7241 100644 --- a/src/training/commandlineflags.cpp +++ b/src/training/commandlineflags.cpp @@ -8,8 +8,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "baseapi.h" // TessBaseAPI::Version #include "commandlineflags.h" +#include "baseapi.h" // TessBaseAPI::Version #ifndef GOOGLE_TESSERACT @@ -18,7 +18,7 @@ bool IntFlagExists(const char* flag_name, int32_t* value) { STRING full_flag_name("FLAGS_"); full_flag_name += flag_name; GenericVector empty; - IntParam *p = ParamUtils::FindParam( + IntParam* p = ParamUtils::FindParam( full_flag_name.string(), GlobalParams()->int_params, empty); if (p == nullptr) return false; *value = (int32_t)(*p); @@ -29,7 +29,7 @@ bool DoubleFlagExists(const char* flag_name, double* value) { STRING full_flag_name("FLAGS_"); full_flag_name += flag_name; GenericVector empty; - DoubleParam *p = ParamUtils::FindParam( + DoubleParam* p = ParamUtils::FindParam( full_flag_name.string(), GlobalParams()->double_params, empty); if (p == nullptr) return false; *value = static_cast(*p); @@ -40,7 +40,7 @@ bool BoolFlagExists(const char* flag_name, bool* value) { STRING full_flag_name("FLAGS_"); full_flag_name += flag_name; GenericVector empty; - BoolParam *p = ParamUtils::FindParam( + BoolParam* p = ParamUtils::FindParam( full_flag_name.string(), GlobalParams()->bool_params, empty); if (p == nullptr) return false; *value = (BOOL8)(*p); @@ -51,7 +51,7 @@ bool StringFlagExists(const char* flag_name, const char** value) { STRING full_flag_name("FLAGS_"); full_flag_name += flag_name; GenericVector empty; - StringParam *p = ParamUtils::FindParam( + StringParam* p = ParamUtils::FindParam( full_flag_name.string(), GlobalParams()->string_params, empty); *value = (p != nullptr) ? p->string() : nullptr; return p != nullptr; @@ -61,7 +61,7 @@ void SetIntFlagValue(const char* flag_name, const int32_t new_val) { STRING full_flag_name("FLAGS_"); full_flag_name += flag_name; GenericVector empty; - IntParam *p = ParamUtils::FindParam( + IntParam* p = ParamUtils::FindParam( full_flag_name.string(), GlobalParams()->int_params, empty); ASSERT_HOST(p != nullptr); p->set_value(new_val); @@ -71,7 +71,7 @@ void SetDoubleFlagValue(const char* flag_name, const double new_val) { STRING full_flag_name("FLAGS_"); full_flag_name += flag_name; GenericVector empty; - DoubleParam *p = ParamUtils::FindParam( + DoubleParam* p = ParamUtils::FindParam( full_flag_name.string(), GlobalParams()->double_params, empty); ASSERT_HOST(p != nullptr); p->set_value(new_val); @@ -81,7 +81,7 @@ void SetBoolFlagValue(const char* flag_name, const bool new_val) { STRING full_flag_name("FLAGS_"); full_flag_name += flag_name; GenericVector empty; - BoolParam *p = ParamUtils::FindParam( + BoolParam* p = ParamUtils::FindParam( full_flag_name.string(), GlobalParams()->bool_params, empty); ASSERT_HOST(p != nullptr); p->set_value(new_val); @@ -91,7 +91,7 @@ void SetStringFlagValue(const char* flag_name, const char* new_val) { STRING full_flag_name("FLAGS_"); full_flag_name += flag_name; GenericVector empty; - StringParam *p = ParamUtils::FindParam( + StringParam* p = ParamUtils::FindParam( full_flag_name.string(), GlobalParams()->string_params, empty); ASSERT_HOST(p != nullptr); p->set_value(STRING(new_val)); @@ -113,8 +113,8 @@ void PrintCommandLineFlags() { const char* kFlagNamePrefix = "FLAGS_"; const int kFlagNamePrefixLen = strlen(kFlagNamePrefix); for (int i = 0; i < GlobalParams()->int_params.size(); ++i) { - if (!strncmp(GlobalParams()->int_params[i]->name_str(), - kFlagNamePrefix, kFlagNamePrefixLen)) { + if (!strncmp(GlobalParams()->int_params[i]->name_str(), kFlagNamePrefix, + kFlagNamePrefixLen)) { printf(" --%s %s (type:int default:%d)\n", GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen, GlobalParams()->int_params[i]->info_str(), @@ -122,8 +122,8 @@ void PrintCommandLineFlags() { } } for (int i = 0; i < GlobalParams()->double_params.size(); ++i) { - if (!strncmp(GlobalParams()->double_params[i]->name_str(), - kFlagNamePrefix, kFlagNamePrefixLen)) { + if (!strncmp(GlobalParams()->double_params[i]->name_str(), kFlagNamePrefix, + kFlagNamePrefixLen)) { printf(" --%s %s (type:double default:%g)\n", GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen, GlobalParams()->double_params[i]->info_str(), @@ -131,8 +131,8 @@ void PrintCommandLineFlags() { } } for (int i = 0; i < GlobalParams()->bool_params.size(); ++i) { - if (!strncmp(GlobalParams()->bool_params[i]->name_str(), - kFlagNamePrefix, kFlagNamePrefixLen)) { + if (!strncmp(GlobalParams()->bool_params[i]->name_str(), kFlagNamePrefix, + kFlagNamePrefixLen)) { printf(" --%s %s (type:bool default:%s)\n", GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen, GlobalParams()->bool_params[i]->info_str(), @@ -140,8 +140,8 @@ void PrintCommandLineFlags() { } } for (int i = 0; i < GlobalParams()->string_params.size(); ++i) { - if (!strncmp(GlobalParams()->string_params[i]->name_str(), - kFlagNamePrefix, kFlagNamePrefixLen)) { + if (!strncmp(GlobalParams()->string_params[i]->name_str(), kFlagNamePrefix, + kFlagNamePrefixLen)) { printf(" --%s %s (type:string default:%s)\n", GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen, GlobalParams()->string_params[i]->info_str(), @@ -150,8 +150,7 @@ void PrintCommandLineFlags() { } } -void ParseCommandLineFlags(const char* usage, - int* argc, char*** argv, +void ParseCommandLineFlags(const char* usage, int* argc, char*** argv, const bool remove_flags) { if (*argc == 1) { printf("USAGE: %s\n", usage); @@ -159,7 +158,8 @@ void ParseCommandLineFlags(const char* usage, exit(0); } - if (*argc > 1 && (!strcmp((*argv)[1], "-v") || !strcmp((*argv)[1], "--version"))) { + if (*argc > 1 && + (!strcmp((*argv)[1], "-v") || !strcmp((*argv)[1], "--version"))) { printf("%s\n", TessBaseAPI::Version()); exit(0); } @@ -213,8 +213,8 @@ void ParseCommandLineFlags(const char* usage, exit(1); } if (!SafeAtoi(rhs, &int_val)) { - tprintf("ERROR: Could not parse int from %s in flag %s\n", - rhs, (*argv)[i]); + tprintf("ERROR: Could not parse int from %s in flag %s\n", rhs, + (*argv)[i]); exit(1); } } else { @@ -245,8 +245,8 @@ void ParseCommandLineFlags(const char* usage, exit(1); } if (!SafeAtod(rhs, &double_val)) { - tprintf("ERROR: Could not parse double from %s in flag %s\n", - rhs, (*argv)[i]); + tprintf("ERROR: Could not parse double from %s in flag %s\n", rhs, + (*argv)[i]); exit(1); } } else { @@ -329,8 +329,7 @@ void ParseCommandLineFlags(const char* usage, #include "base/init_google.h" namespace tesseract { -void ParseCommandLineFlags(const char* usage, - int* argc, char*** argv, +void ParseCommandLineFlags(const char* usage, int* argc, char*** argv, const bool remove_flags) { InitGoogle(usage, argc, argv, remove_flags); } diff --git a/src/training/commandlineflags.h b/src/training/commandlineflags.h index 7e563b2dce..220d4bd354 100644 --- a/src/training/commandlineflags.h +++ b/src/training/commandlineflags.h @@ -22,45 +22,33 @@ #ifndef GOOGLE_TESSERACT #include -#include "tprintf.h" #include "params.h" +#include "tprintf.h" -#define INT_PARAM_FLAG(name, val, comment) \ - INT_VAR(FLAGS_##name, val, comment) -#define DECLARE_INT_PARAM_FLAG(name) \ - extern INT_VAR_H(FLAGS_##name, 0, "") -#define DOUBLE_PARAM_FLAG(name, val, comment) \ +#define INT_PARAM_FLAG(name, val, comment) INT_VAR(FLAGS_##name, val, comment) +#define DECLARE_INT_PARAM_FLAG(name) extern INT_VAR_H(FLAGS_##name, 0, "") +#define DOUBLE_PARAM_FLAG(name, val, comment) \ double_VAR(FLAGS_##name, val, comment) -#define DECLARE_DOUBLE_PARAM_FLAG(name) \ +#define DECLARE_DOUBLE_PARAM_FLAG(name) \ extern double_VAR_H(FLAGS_##name, "", "") -#define BOOL_PARAM_FLAG(name, val, comment) \ - BOOL_VAR(FLAGS_##name, val, comment) -#define DECLARE_BOOL_PARAM_FLAG(name) \ - extern BOOL_VAR_H(FLAGS_##name, 0, "") -#define STRING_PARAM_FLAG(name, val, comment) \ +#define BOOL_PARAM_FLAG(name, val, comment) BOOL_VAR(FLAGS_##name, val, comment) +#define DECLARE_BOOL_PARAM_FLAG(name) extern BOOL_VAR_H(FLAGS_##name, 0, "") +#define STRING_PARAM_FLAG(name, val, comment) \ STRING_VAR(FLAGS_##name, val, comment) -#define DECLARE_STRING_PARAM_FLAG(name) \ +#define DECLARE_STRING_PARAM_FLAG(name) \ extern STRING_VAR_H(FLAGS_##name, "", "") #else #include "base/commandlineflags.h" -#define INT_PARAM_FLAG(name, val, comment) \ - DEFINE_int32(name, val, comment) -#define DECLARE_INT_PARAM_FLAG(name) \ - DECLARE_int32(name) -#define DOUBLE_PARAM_FLAG(name, val, comment) \ - DEFINE_double(name, val, comment) -#define DECLARE_DOUBLE_PARAM_FLAG(name) \ - DECLARE_double(name) -#define BOOL_PARAM_FLAG(name, val, comment) \ - DEFINE_bool(name, val, comment) -#define DECLARE_BOOL_PARAM_FLAG(name) \ - DECLARE_bool(name) -#define STRING_PARAM_FLAG(name, val, comment) \ - DEFINE_string(name, val, comment) -#define DECLARE_STRING_PARAM_FLAG(name) \ - DECLARE_string(name) +#define INT_PARAM_FLAG(name, val, comment) DEFINE_int32(name, val, comment) +#define DECLARE_INT_PARAM_FLAG(name) DECLARE_int32(name) +#define DOUBLE_PARAM_FLAG(name, val, comment) DEFINE_double(name, val, comment) +#define DECLARE_DOUBLE_PARAM_FLAG(name) DECLARE_double(name) +#define BOOL_PARAM_FLAG(name, val, comment) DEFINE_bool(name, val, comment) +#define DECLARE_BOOL_PARAM_FLAG(name) DECLARE_bool(name) +#define STRING_PARAM_FLAG(name, val, comment) DEFINE_string(name, val, comment) +#define DECLARE_STRING_PARAM_FLAG(name) DECLARE_string(name) #endif @@ -75,9 +63,9 @@ namespace tesseract { // eg. If the input *argv is // { "program", "--foo=4", "--bar=true", "file1", "file2" } with *argc = 5, the // output *argv is { "program", "file1", "file2" } with *argc = 3 -void ParseCommandLineFlags(const char* usage, int* argc, - char*** argv, const bool remove_flags); +void ParseCommandLineFlags(const char* usage, int* argc, char*** argv, + const bool remove_flags); -} +} // namespace tesseract #endif // TESSERACT_TRAINING_COMMANDLINEFLAGS_H_ diff --git a/src/training/commontraining.cpp b/src/training/commontraining.cpp index d888850c02..5d2ec04891 100644 --- a/src/training/commontraining.cpp +++ b/src/training/commontraining.cpp @@ -13,8 +13,8 @@ #include "commontraining.h" -#include #include +#include #include #include "allheaders.h" @@ -48,7 +48,7 @@ using tesseract::ShapeTable; // global variable to hold configuration parameters to control clustering // -M 0.625 -B 0.05 -I 1.0 -C 1e-6. -CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 }; +CLUSTERCONFIG Config = {elliptical, 0.625, 0.05, 1.0, 1e-6, 0}; FEATURE_DEFS_STRUCT feature_defs; CCUtil ccutil; @@ -84,7 +84,7 @@ DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence, * @return none * @note Exceptions: Illegal options terminate the program. */ -void ParseArguments(int* argc, char ***argv) { +void ParseArguments(int* argc, char*** argv) { STRING usage; if (*argc) { usage += (*argv)[0]; @@ -97,19 +97,18 @@ void ParseArguments(int* argc, char ***argv) { // remove_flags to true when parsing the flags. tessoptind = 1; // Set some global values based on the flags. - Config.MinSamples = - std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_min_samples_fraction))); + Config.MinSamples = std::max( + 0.0, std::min(1.0, double(FLAGS_clusterconfig_min_samples_fraction))); Config.MaxIllegal = - std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_max_illegal))); + std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_max_illegal))); Config.Independence = - std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_independence))); + std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_independence))); Config.Confidence = - std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_confidence))); + std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_confidence))); // Set additional parameters from config file if specified. if (!FLAGS_configfile.empty()) { tesseract::ParamUtils::ReadParamsFile( - FLAGS_configfile.c_str(), - tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY, + FLAGS_configfile.c_str(), tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY, ccutil.params()); } } @@ -130,8 +129,8 @@ ShapeTable* LoadShapeTable(const STRING& file_prefix) { shape_table_file.string()); } else { int num_shapes = shape_table->NumShapes(); - tprintf("Read shape table %s of %d shapes\n", - shape_table_file.string(), num_shapes); + tprintf("Read shape table %s of %d shapes\n", shape_table_file.string(), + num_shapes); } } else { tprintf("Warning: No shape table file present: %s\n", @@ -172,9 +171,8 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) { * If shape_table is not nullptr, but failed to load, make a fake flat one, * as shape clustering was not run. */ -MasterTrainer* LoadTrainingData(int argc, const char* const * argv, - bool replication, - ShapeTable** shape_table, +MasterTrainer* LoadTrainingData(int argc, const char* const* argv, + bool replication, ShapeTable** shape_table, STRING* file_prefix) { InitFeatureDefs(&feature_defs); InitIntegerFX(); @@ -194,10 +192,8 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv, } else { shape_analysis = true; } - MasterTrainer* trainer = new MasterTrainer(NM_CHAR_ANISOTROPIC, - shape_analysis, - replication, - FLAGS_debug_level); + MasterTrainer* trainer = new MasterTrainer( + NM_CHAR_ANISOTROPIC, shape_analysis, replication, FLAGS_debug_level); IntFeatureSpace fs; fs.Init(kBoostXYBuckets, kBoostXYBuckets, kBoostDirBuckets); trainer->LoadUnicharset(FLAGS_U.c_str()); @@ -286,7 +282,7 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv, * @note Exceptions: none * @note History: Fri Aug 18 09:34:12 1989, DSJ, Created. */ -const char *GetNextFilename(int argc, const char* const * argv) { +const char* GetNextFilename(int argc, const char* const* argv) { if (tessoptind < argc) return argv[tessoptind++]; else @@ -305,14 +301,13 @@ const char *GetNextFilename(int argc, const char* const * argv) { * @note Exceptions: none * @note History: Fri Aug 18 15:57:41 1989, DSJ, Created. */ -LABELEDLIST FindList(LIST List, char* Label) { +LABELEDLIST +FindList(LIST List, char* Label) { LABELEDLIST LabeledList; - iterate (List) - { - LabeledList = (LABELEDLIST) first_node (List); - if (strcmp (LabeledList->Label, Label) == 0) - return (LabeledList); + iterate(List) { + LabeledList = (LABELEDLIST)first_node(List); + if (strcmp(LabeledList->Label, Label) == 0) return (LabeledList); } return (nullptr); @@ -328,12 +323,13 @@ LABELEDLIST FindList(LIST List, char* Label) { * @note Exceptions: none * @note History: Fri Aug 18 16:08:46 1989, DSJ, Created. */ -LABELEDLIST NewLabeledList(const char* Label) { +LABELEDLIST +NewLabeledList(const char* Label) { LABELEDLIST LabeledList; - LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE)); - LabeledList->Label = (char*)Emalloc (strlen (Label)+1); - strcpy (LabeledList->Label, Label); + LabeledList = (LABELEDLIST)Emalloc(sizeof(LABELEDLISTNODE)); + LabeledList->Label = (char*)Emalloc(strlen(Label) + 1); + strcpy(LabeledList->Label, Label); LabeledList->List = NIL_LIST; LabeledList->SampleCount = 0; LabeledList->font_sample_count = 0; @@ -364,11 +360,11 @@ LABELEDLIST NewLabeledList(const char* Label) { * font, and feature specification levels of structure. */ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, - const char *feature_name, int max_samples, - UNICHARSET* unicharset, - FILE* file, LIST* training_samples) { - char buffer[2048]; - char unichar[UNICHAR_LEN + 1]; + const char* feature_name, int max_samples, + UNICHARSET* unicharset, FILE* file, + LIST* training_samples) { + char buffer[2048]; + char unichar[UNICHAR_LEN + 1]; LABELEDLIST char_sample; FEATURE_SET feature_samples; CHAR_DESC char_desc; @@ -382,15 +378,15 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, } while (fgets(buffer, 2048, file) != nullptr) { - if (buffer[0] == '\n') - continue; + if (buffer[0] == '\n') continue; sscanf(buffer, "%*s %s", unichar); if (unicharset != nullptr && !unicharset->contains_unichar(unichar)) { unicharset->unichar_insert(unichar); if (unicharset->size() > MAX_NUM_CLASSES) { - tprintf("Error: Size of unicharset in training is " - "greater than MAX_NUM_CLASSES\n"); + tprintf( + "Error: Size of unicharset in training is " + "greater than MAX_NUM_CLASSES\n"); exit(1); } } @@ -409,14 +405,12 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, FreeFeatureSet(feature_samples); } for (size_t i = 0; i < char_desc->NumFeatureSets; i++) { - if (feature_type != i) - FreeFeatureSet(char_desc->FeatureSets[i]); + if (feature_type != i) FreeFeatureSet(char_desc->FeatureSets[i]); } free(char_desc); } } // ReadTrainingSamples - /*---------------------------------------------------------------------------*/ /** * This routine deallocates all of the space allocated to @@ -434,16 +428,16 @@ void FreeTrainingSamples(LIST CharList) { LIST nodes = CharList; iterate(CharList) { /* iterate through all of the fonts */ - char_sample = (LABELEDLIST) first_node(CharList); + char_sample = (LABELEDLIST)first_node(CharList); FeatureList = char_sample->List; iterate(FeatureList) { /* iterate through all of the classes */ - FeatureSet = (FEATURE_SET) first_node(FeatureList); + FeatureSet = (FEATURE_SET)first_node(FeatureList); FreeFeatureSet(FeatureSet); } FreeLabeledList(char_sample); } destroy(nodes); -} /* FreeTrainingSamples */ +} /* FreeTrainingSamples */ /*---------------------------------------------------------------------------*/ /** @@ -460,7 +454,7 @@ void FreeLabeledList(LABELEDLIST LabeledList) { destroy(LabeledList->List); free(LabeledList->Label); free(LabeledList); -} /* FreeLabeledList */ +} /* FreeLabeledList */ /*---------------------------------------------------------------------------*/ /** @@ -476,13 +470,13 @@ void FreeLabeledList(LABELEDLIST LabeledList) { * @note Exceptions: None * @note History: 8/16/89, DSJ, Created. */ -CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, +CLUSTERER* SetUpForClustering(const FEATURE_DEFS_STRUCT& FeatureDefs, LABELEDLIST char_sample, const char* program_feature_type) { uint16_t N; int i, j; FLOAT32* Sample = nullptr; - CLUSTERER *Clusterer; + CLUSTERER* Clusterer; int32_t CharID; LIST FeatureList = nullptr; FEATURE_SET FeatureSet = nullptr; @@ -495,12 +489,11 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, FeatureList = char_sample->List; CharID = 0; iterate(FeatureList) { - FeatureSet = (FEATURE_SET) first_node(FeatureList); + FeatureSet = (FEATURE_SET)first_node(FeatureList); for (i = 0; i < FeatureSet->MaxNumFeatures; i++) { if (Sample == nullptr) Sample = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); - for (j = 0; j < N; j++) - Sample[j] = FeatureSet->Features[i]->Params[j]; - MakeSample (Clusterer, Sample, CharID); + for (j = 0; j < N; j++) Sample[j] = FeatureSet->Features[i]->Params[j]; + MakeSample(Clusterer, Sample, CharID); } CharID++; } @@ -517,19 +510,18 @@ void MergeInsignificantProtos(LIST ProtoList, const char* label, LIST pProtoList = ProtoList; iterate(pProtoList) { - Prototype = (PROTOTYPE *) first_node (pProtoList); - if (Prototype->Significant || Prototype->Merged) - continue; + Prototype = (PROTOTYPE*)first_node(pProtoList); + if (Prototype->Significant || Prototype->Merged) continue; FLOAT32 best_dist = 0.125; PROTOTYPE* best_match = nullptr; // Find the nearest alive prototype. LIST list_it = ProtoList; iterate(list_it) { - PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it); + PROTOTYPE* test_p = (PROTOTYPE*)first_node(list_it); if (test_p != Prototype && !test_p->Merged) { - FLOAT32 dist = ComputeDistance(Clusterer->SampleSize, - Clusterer->ParamDesc, - Prototype->Mean, test_p->Mean); + FLOAT32 dist = + ComputeDistance(Clusterer->SampleSize, Clusterer->ParamDesc, + Prototype->Mean, test_p->Mean); if (dist < best_dist) { best_match = test_p; best_dist = dist; @@ -540,49 +532,44 @@ void MergeInsignificantProtos(LIST ProtoList, const char* label, if (debug) tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n", best_match->NumSamples, Prototype->NumSamples, - best_match->Mean[0], best_match->Mean[1], - Prototype->Mean[0], Prototype->Mean[1]); - best_match->NumSamples = MergeClusters(Clusterer->SampleSize, - Clusterer->ParamDesc, - best_match->NumSamples, - Prototype->NumSamples, - best_match->Mean, - best_match->Mean, Prototype->Mean); + best_match->Mean[0], best_match->Mean[1], Prototype->Mean[0], + Prototype->Mean[1]); + best_match->NumSamples = + MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc, + best_match->NumSamples, Prototype->NumSamples, + best_match->Mean, best_match->Mean, Prototype->Mean); Prototype->NumSamples = 0; Prototype->Merged = 1; } else if (best_match != nullptr) { if (debug) tprintf("Red proto at %g,%g matched a green one at %g,%g\n", - Prototype->Mean[0], Prototype->Mean[1], - best_match->Mean[0], best_match->Mean[1]); + Prototype->Mean[0], Prototype->Mean[1], best_match->Mean[0], + best_match->Mean[1]); Prototype->Merged = 1; } } // Mark significant those that now have enough samples. - int min_samples = (int32_t) (Config->MinSamples * Clusterer->NumChar); + int min_samples = (int32_t)(Config->MinSamples * Clusterer->NumChar); pProtoList = ProtoList; iterate(pProtoList) { - Prototype = (PROTOTYPE *) first_node (pProtoList); + Prototype = (PROTOTYPE*)first_node(pProtoList); // Process insignificant protos that do not match a green one if (!Prototype->Significant && Prototype->NumSamples >= min_samples && !Prototype->Merged) { if (debug) - tprintf("Red proto at %g,%g becoming green\n", - Prototype->Mean[0], Prototype->Mean[1]); + tprintf("Red proto at %g,%g becoming green\n", Prototype->Mean[0], + Prototype->Mean[1]); Prototype->Significant = true; } } } /* MergeInsignificantProtos */ /*-----------------------------------------------------------------------------*/ -void CleanUpUnusedData( - LIST ProtoList) -{ +void CleanUpUnusedData(LIST ProtoList) { PROTOTYPE* Prototype; - iterate(ProtoList) - { - Prototype = (PROTOTYPE *) first_node (ProtoList); + iterate(ProtoList) { + Prototype = (PROTOTYPE*)first_node(ProtoList); free(Prototype->Variance.Elliptical); Prototype->Variance.Elliptical = nullptr; free(Prototype->Magnitude.Elliptical); @@ -593,11 +580,8 @@ void CleanUpUnusedData( } /*------------------------------------------------------------------------*/ -LIST RemoveInsignificantProtos( - LIST ProtoList, - BOOL8 KeepSigProtos, - BOOL8 KeepInsigProtos, - int N) +LIST RemoveInsignificantProtos(LIST ProtoList, BOOL8 KeepSigProtos, + BOOL8 KeepInsigProtos, int N) { LIST NewProtoList = NIL_LIST; @@ -607,45 +591,39 @@ LIST RemoveInsignificantProtos( int i; pProtoList = ProtoList; - iterate(pProtoList) - { - Proto = (PROTOTYPE *) first_node (pProtoList); + iterate(pProtoList) { + Proto = (PROTOTYPE*)first_node(pProtoList); if ((Proto->Significant && KeepSigProtos) || - (!Proto->Significant && KeepInsigProtos)) - { - NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE)); + (!Proto->Significant && KeepInsigProtos)) { + NewProto = (PROTOTYPE*)Emalloc(sizeof(PROTOTYPE)); - NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); + NewProto->Mean = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); NewProto->Significant = Proto->Significant; NewProto->Style = Proto->Style; NewProto->NumSamples = Proto->NumSamples; NewProto->Cluster = nullptr; NewProto->Distrib = nullptr; - for (i=0; i < N; i++) - NewProto->Mean[i] = Proto->Mean[i]; + for (i = 0; i < N; i++) NewProto->Mean[i] = Proto->Mean[i]; if (Proto->Variance.Elliptical != nullptr) { - NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); - for (i=0; i < N; i++) + NewProto->Variance.Elliptical = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); + for (i = 0; i < N; i++) NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i]; - } - else + } else NewProto->Variance.Elliptical = nullptr; //--------------------------------------------- if (Proto->Magnitude.Elliptical != nullptr) { - NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); - for (i=0; i < N; i++) + NewProto->Magnitude.Elliptical = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); + for (i = 0; i < N; i++) NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i]; - } - else + } else NewProto->Magnitude.Elliptical = nullptr; //------------------------------------------------ if (Proto->Weight.Elliptical != nullptr) { - NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); - for (i=0; i < N; i++) + NewProto->Weight.Elliptical = (FLOAT32*)Emalloc(N * sizeof(FLOAT32)); + for (i = 0; i < N; i++) NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i]; - } - else + } else NewProto->Weight.Elliptical = nullptr; NewProto->TotalMagnitude = Proto->TotalMagnitude; @@ -658,27 +636,27 @@ LIST RemoveInsignificantProtos( } /* RemoveInsignificantProtos */ /*----------------------------------------------------------------------------*/ -MERGE_CLASS FindClass(LIST List, const char* Label) { +MERGE_CLASS +FindClass(LIST List, const char* Label) { MERGE_CLASS MergeClass; - iterate (List) - { - MergeClass = (MERGE_CLASS) first_node (List); - if (strcmp (MergeClass->Label, Label) == 0) - return (MergeClass); + iterate(List) { + MergeClass = (MERGE_CLASS)first_node(List); + if (strcmp(MergeClass->Label, Label) == 0) return (MergeClass); } return (nullptr); } /* FindClass */ /*---------------------------------------------------------------------------*/ -MERGE_CLASS NewLabeledClass(const char* Label) { +MERGE_CLASS +NewLabeledClass(const char* Label) { MERGE_CLASS MergeClass; MergeClass = new MERGE_CLASS_NODE; - MergeClass->Label = (char*)Emalloc (strlen (Label)+1); - strcpy (MergeClass->Label, Label); - MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS); + MergeClass->Label = (char*)Emalloc(strlen(Label) + 1); + strcpy(MergeClass->Label, Label); + MergeClass->Class = NewClass(MAX_NUM_PROTOS, MAX_NUM_CONFIGS); return (MergeClass); } /* NewLabeledClass */ @@ -699,8 +677,8 @@ void FreeLabeledClassList(LIST ClassList) { LIST nodes = ClassList; iterate(ClassList) /* iterate through all of the fonts */ { - MergeClass = (MERGE_CLASS) first_node (ClassList); - free (MergeClass->Label); + MergeClass = (MERGE_CLASS)first_node(ClassList); + free(MergeClass->Label); FreeClass(MergeClass->Class); delete MergeClass; } @@ -726,19 +704,17 @@ CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, // printf("Float2Int ...\n"); CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()]; - iterate(LabeledClassList) - { - UnicityTableEqEq font_set; - MergeClass = (MERGE_CLASS) first_node (LabeledClassList); + iterate(LabeledClassList) { + UnicityTableEqEq font_set; + MergeClass = (MERGE_CLASS)first_node(LabeledClassList); Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)]; NumProtos = MergeClass->Class->NumProtos; NumConfigs = MergeClass->Class->NumConfigs; font_set.move(&MergeClass->Class->font_set); Class->NumProtos = NumProtos; Class->MaxNumProtos = NumProtos; - Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos); - for(i=0; i < NumProtos; i++) - { + Class->Prototypes = (PROTO)Emalloc(sizeof(PROTO_STRUCT) * NumProtos); + for (i = 0; i < NumProtos; i++) { NewProto = ProtoIn(Class, i); OldProto = ProtoIn(MergeClass->Class, i); Values[0] = OldProto->X; @@ -757,36 +733,33 @@ CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, Class->NumConfigs = NumConfigs; Class->MaxNumConfigs = NumConfigs; Class->font_set.move(&font_set); - Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs); + Class->Configurations = + (BIT_VECTOR*)Emalloc(sizeof(BIT_VECTOR) * NumConfigs); NumWords = WordsInVectorOfSize(NumProtos); - for(i=0; i < NumConfigs; i++) - { + for (i = 0; i < NumConfigs; i++) { NewConfig = NewBitVector(NumProtos); OldConfig = MergeClass->Class->Configurations[i]; - for(j=0; j < NumWords; j++) - NewConfig[j] = OldConfig[j]; + for (j = 0; j < NumWords; j++) NewConfig[j] = OldConfig[j]; Class->Configurations[i] = NewConfig; } } return float_classes; -} // SetUpForFloat2Int +} // SetUpForFloat2Int /*--------------------------------------------------------------------------*/ -void Normalize ( - float *Values) -{ +void Normalize(float* Values) { float Slope; float Intercept; float Normalizer; - Slope = tan (Values [2] * 2 * PI); - Intercept = Values [1] - Slope * Values [0]; - Normalizer = 1 / sqrt (Slope * Slope + 1.0); + Slope = tan(Values[2] * 2 * PI); + Intercept = Values[1] - Slope * Values[0]; + Normalizer = 1 / sqrt(Slope * Slope + 1.0); - Values [0] = Slope * Normalizer; - Values [1] = - Normalizer; - Values [2] = Intercept * Normalizer; -} // Normalize + Values[0] = Slope * Normalizer; + Values[1] = -Normalizer; + Values[2] = Intercept * Normalizer; +} // Normalize /*-------------------------------------------------------------------------*/ void FreeNormProtoList(LIST CharList) @@ -797,26 +770,21 @@ void FreeNormProtoList(LIST CharList) LIST nodes = CharList; iterate(CharList) /* iterate through all of the fonts */ { - char_sample = (LABELEDLIST) first_node (CharList); - FreeLabeledList (char_sample); + char_sample = (LABELEDLIST)first_node(CharList); + FreeLabeledList(char_sample); } destroy(nodes); } // FreeNormProtoList /*---------------------------------------------------------------------------*/ -void AddToNormProtosList( - LIST* NormProtoList, - LIST ProtoList, - char* CharName) -{ +void AddToNormProtosList(LIST* NormProtoList, LIST ProtoList, char* CharName) { PROTOTYPE* Proto; LABELEDLIST LabeledProtoList; LabeledProtoList = NewLabeledList(CharName); - iterate(ProtoList) - { - Proto = (PROTOTYPE *) first_node (ProtoList); + iterate(ProtoList) { + Proto = (PROTOTYPE*)first_node(ProtoList); LabeledProtoList->List = push(LabeledProtoList->List, Proto); } *NormProtoList = push(*NormProtoList, LabeledProtoList); @@ -828,12 +796,11 @@ int NumberOfProtos(LIST ProtoList, BOOL8 CountSigProtos, int N = 0; PROTOTYPE* Proto; - iterate(ProtoList) - { - Proto = (PROTOTYPE *) first_node ( ProtoList ); + iterate(ProtoList) { + Proto = (PROTOTYPE*)first_node(ProtoList); if ((Proto->Significant && CountSigProtos) || (!Proto->Significant && CountInsigProtos)) N++; } - return(N); + return (N); } diff --git a/src/training/commontraining.h b/src/training/commontraining.h index 492ba93bbd..5a6164f5f6 100644 --- a/src/training/commontraining.h +++ b/src/training/commontraining.h @@ -15,8 +15,8 @@ #define TESSERACT_TRAINING_COMMONTRAINING_H_ #ifdef HAVE_CONFIG_H -#include "config_auto.h" #include "baseapi.h" +#include "config_auto.h" #endif #include "cluster.h" #include "commandlineflags.h" @@ -28,7 +28,7 @@ namespace tesseract { class Classify; class MasterTrainer; class ShapeTable; -} +} // namespace tesseract ////////////////////////////////////////////////////////////////////////////// // Globals /////////////////////////////////////////////////////////////////// @@ -42,24 +42,20 @@ extern CLUSTERCONFIG Config; ////////////////////////////////////////////////////////////////////////////// // Structs /////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -typedef struct -{ - char *Label; - int SampleCount; - int font_sample_count; - LIST List; -} -LABELEDLISTNODE, *LABELEDLIST; +typedef struct { + char* Label; + int SampleCount; + int font_sample_count; + LIST List; +} LABELEDLISTNODE, *LABELEDLIST; -typedef struct -{ +typedef struct { char* Label; - int NumMerged[MAX_NUM_PROTOS]; + int NumMerged[MAX_NUM_PROTOS]; CLASS_TYPE Class; -}MERGE_CLASS_NODE; +} MERGE_CLASS_NODE; using MERGE_CLASS = MERGE_CLASS_NODE*; - ////////////////////////////////////////////////////////////////////////////// // Functions ///////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// @@ -70,13 +66,13 @@ namespace tesseract { // Check whether the shared tesseract library is the right one. // This function must be inline because otherwise it would be part of // the shared library, so it could not compare the versions. -static inline void CheckSharedLibraryVersion() -{ +static inline void CheckSharedLibraryVersion() { #ifdef HAVE_CONFIG_H if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) { - tprintf("ERROR: shared library version mismatch (was %s, expected %s\n" - "Did you use a wrong shared tesseract library?\n", - TessBaseAPI::Version(), TESSERACT_VERSION_STR); + tprintf( + "ERROR: shared library version mismatch (was %s, expected %s\n" + "Did you use a wrong shared tesseract library?\n", + TessBaseAPI::Version(), TESSERACT_VERSION_STR); exit(1); } #endif @@ -100,90 +96,65 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table); // Computes canonical and cloud features. // If shape_table is not nullptr, but failed to load, make a fake flat one, // as shape clustering was not run. -MasterTrainer* LoadTrainingData(int argc, const char* const * argv, - bool replication, - ShapeTable** shape_table, +MasterTrainer* LoadTrainingData(int argc, const char* const* argv, + bool replication, ShapeTable** shape_table, STRING* file_prefix); } // namespace tesseract. -const char *GetNextFilename(int argc, const char* const * argv); +const char* GetNextFilename(int argc, const char* const* argv); -LABELEDLIST FindList( - LIST List, - char *Label); +LABELEDLIST +FindList(LIST List, char* Label); -LABELEDLIST NewLabeledList( - const char *Label); +LABELEDLIST +NewLabeledList(const char* Label); void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, - const char *feature_name, int max_samples, - UNICHARSET* unicharset, - FILE* file, LIST* training_samples); + const char* feature_name, int max_samples, + UNICHARSET* unicharset, FILE* file, + LIST* training_samples); -void WriteTrainingSamples( - const FEATURE_DEFS_STRUCT &FeatureDefs, - char *Directory, - LIST CharList, - const char *program_feature_type); +void WriteTrainingSamples(const FEATURE_DEFS_STRUCT& FeatureDefs, + char* Directory, LIST CharList, + const char* program_feature_type); -void FreeTrainingSamples( - LIST CharList); +void FreeTrainingSamples(LIST CharList); -void FreeLabeledList( - LABELEDLIST LabeledList); +void FreeLabeledList(LABELEDLIST LabeledList); -void FreeLabeledClassList( - LIST ClassListList); +void FreeLabeledClassList(LIST ClassListList); -CLUSTERER *SetUpForClustering( - const FEATURE_DEFS_STRUCT &FeatureDefs, - LABELEDLIST CharSample, - const char *program_feature_type); +CLUSTERER* SetUpForClustering(const FEATURE_DEFS_STRUCT& FeatureDefs, + LABELEDLIST CharSample, + const char* program_feature_type); -LIST RemoveInsignificantProtos( - LIST ProtoList, - BOOL8 KeepSigProtos, - BOOL8 KeepInsigProtos, - int N); +LIST RemoveInsignificantProtos(LIST ProtoList, BOOL8 KeepSigProtos, + BOOL8 KeepInsigProtos, int N); -void CleanUpUnusedData( - LIST ProtoList); +void CleanUpUnusedData(LIST ProtoList); -void MergeInsignificantProtos( - LIST ProtoList, - const char *label, - CLUSTERER *Clusterer, - CLUSTERCONFIG *Config); +void MergeInsignificantProtos(LIST ProtoList, const char* label, + CLUSTERER* Clusterer, CLUSTERCONFIG* Config); -MERGE_CLASS FindClass( - LIST List, - const char *Label); +MERGE_CLASS +FindClass(LIST List, const char* Label); -MERGE_CLASS NewLabeledClass( - const char *Label); +MERGE_CLASS +NewLabeledClass(const char* Label); -void FreeTrainingSamples( - LIST CharList); +void FreeTrainingSamples(LIST CharList); CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, LIST LabeledClassList); -void Normalize( - float *Values); - -void FreeNormProtoList( - LIST CharList); +void Normalize(float* Values); -void AddToNormProtosList( - LIST* NormProtoList, - LIST ProtoList, - char *CharName); +void FreeNormProtoList(LIST CharList); -int NumberOfProtos( - LIST ProtoList, - BOOL8 CountSigProtos, - BOOL8 CountInsigProtos); +void AddToNormProtosList(LIST* NormProtoList, LIST ProtoList, char* CharName); +int NumberOfProtos(LIST ProtoList, BOOL8 CountSigProtos, + BOOL8 CountInsigProtos); void allocNormProtos(); #endif // TESSERACT_TRAINING_COMMONTRAINING_H_ diff --git a/src/training/dawg2wordlist.cpp b/src/training/dawg2wordlist.cpp index 355c6fba81..2905983a01 100644 --- a/src/training/dawg2wordlist.cpp +++ b/src/training/dawg2wordlist.cpp @@ -17,7 +17,7 @@ // /////////////////////////////////////////////////////////////////////// -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion #include "dawg.h" #include "host.h" #include "serialis.h" @@ -27,8 +27,8 @@ const int kDictDebugLevel = 1; -tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset, - const char *filename) { +tesseract::Dawg* LoadSquishedDawg(const UNICHARSET& unicharset, + const char* filename) { const int kDictDebugLevel = 1; tesseract::TFile dawg_file; if (!dawg_file.Open(filename, nullptr)) { @@ -36,7 +36,7 @@ tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset, return nullptr; } tprintf("Loading word list from %s\n", filename); - tesseract::SquishedDawg *retval = new tesseract::SquishedDawg( + tesseract::SquishedDawg* retval = new tesseract::SquishedDawg( tesseract::DAWG_TYPE_WORD, "eng", SYSTEM_DAWG_PERM, kDictDebugLevel); if (!retval->Load(&dawg_file)) { tprintf("Could not read %s\n", filename); @@ -49,30 +49,30 @@ tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset, class WordOutputter { public: - WordOutputter(FILE *file) : file_(file) {} - void output_word(const char *word) { fprintf(file_, "%s\n", word); } + WordOutputter(FILE* file) : file_(file) {} + void output_word(const char* word) { fprintf(file_, "%s\n", word); } + private: - FILE *file_; + FILE* file_; }; // returns 0 if successful. -int WriteDawgAsWordlist(const UNICHARSET &unicharset, - const tesseract::Dawg *dawg, - const char *outfile_name) { - FILE *out = fopen(outfile_name, "wb"); +int WriteDawgAsWordlist(const UNICHARSET& unicharset, + const tesseract::Dawg* dawg, const char* outfile_name) { + FILE* out = fopen(outfile_name, "wb"); if (out == nullptr) { tprintf("Could not open %s for writing.\n", outfile_name); return 1; } WordOutputter outputter(out); - TessCallback1 *print_word_cb = + TessCallback1* print_word_cb = NewPermanentTessCallback(&outputter, &WordOutputter::output_word); dawg->iterate_words(unicharset, print_word_cb); delete print_word_cb; return fclose(out); } -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { tesseract::CheckSharedLibraryVersion(); if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) { @@ -80,19 +80,21 @@ int main(int argc, char *argv[]) { return 0; } else if (argc != 4) { tprintf("Print all the words in a given dawg.\n"); - tprintf("Usage: %s -v | --version | %s \n", - argv[0], argv[0]); + tprintf( + "Usage: %s -v | --version | %s " + "\n", + argv[0], argv[0]); return 1; } - const char *unicharset_file = argv[1]; - const char *dawg_file = argv[2]; - const char *wordlist_file = argv[3]; + const char* unicharset_file = argv[1]; + const char* dawg_file = argv[2]; + const char* wordlist_file = argv[3]; UNICHARSET unicharset; if (!unicharset.load_from_file(unicharset_file)) { tprintf("Error loading unicharset from %s.\n", unicharset_file); return 1; } - tesseract::Dawg *dict = LoadSquishedDawg(unicharset, dawg_file); + tesseract::Dawg* dict = LoadSquishedDawg(unicharset, dawg_file); if (dict == nullptr) { tprintf("Error loading dictionary from %s.\n", dawg_file); return 1; diff --git a/src/training/degradeimage.cpp b/src/training/degradeimage.cpp index 3f449cc7ca..0934cd5a97 100644 --- a/src/training/degradeimage.cpp +++ b/src/training/degradeimage.cpp @@ -21,7 +21,7 @@ #include "degradeimage.h" #include -#include "allheaders.h" // from leptonica +#include "allheaders.h" // from leptonica #include "genericvector.h" #include "helpers.h" // For TRand. #include "rect.h" @@ -116,9 +116,8 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer, radians_clockwise = randomizer->SignedRand(kRotationRange); } - input = pixRotate(pix, radians_clockwise, - L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, - 0, 0); + input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, + L_BRING_IN_WHITE, 0, 0); // Rotate the boxes to match. *rotation = radians_clockwise; pixDestroy(&pix); @@ -142,8 +141,7 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer, // For light and 0 exposure, there is no dilation, so compensate for the // convolution with a big darkening bias which is undone for lighter // exposures. - if (exposure <= 0) - erosion_offset = -3 * kExposureFactor; + if (exposure <= 0) erosion_offset = -3 * kExposureFactor; // Add in a general offset of the greyscales for the exposure level so // a threshold of 128 gives a reasonable binary result. erosion_offset -= exposure * kExposureFactor; @@ -155,14 +153,12 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer, for (int x = 0; x < width; ++x) { int pixel = GET_DATA_BYTE(data, x); if (randomizer != nullptr) - pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper; + pixel += randomizer->IntRand() % (kSaltnPepper * 2 + 1) - kSaltnPepper; if (height + width > kMinRampSize) - pixel -= (2*x + y) * 32 / (height + width); + pixel -= (2 * x + y) * 32 / (height + width); pixel += erosion_offset; - if (pixel < 0) - pixel = 0; - if (pixel > 255) - pixel = 255; + if (pixel < 0) pixel = 0; + if (pixel > 255) pixel = 255; SET_DATA_BYTE(data, x, pixel); } data += input->wpl; diff --git a/src/training/fileio.cpp b/src/training/fileio.cpp index dffe31f7cc..69d0c5854e 100644 --- a/src/training/fileio.cpp +++ b/src/training/fileio.cpp @@ -23,8 +23,8 @@ #include #endif -#include #include +#include #include #include "fileio.h" @@ -39,8 +39,7 @@ FILE* File::Open(const std::string& filename, const std::string& mode) { return fopen(filename.c_str(), mode.c_str()); } -FILE* File::OpenOrDie(const std::string& filename, - const std::string& mode) { +FILE* File::OpenOrDie(const std::string& filename, const std::string& mode) { FILE* stream = fopen(filename.c_str(), mode.c_str()); if (stream == nullptr) { tprintf("Unable to open '%s' in mode '%s'\n", filename.c_str(), @@ -78,7 +77,8 @@ bool File::ReadFileToString(const std::string& filename, std::string* out) { return in.CloseFile(); } -std::string File::JoinPath(const std::string& prefix, const std::string& suffix) { +std::string File::JoinPath(const std::string& prefix, + const std::string& suffix) { return (prefix.empty() || prefix[prefix.size() - 1] == '/') ? prefix + suffix : prefix + "/" + suffix; @@ -95,22 +95,22 @@ bool File::Delete(const char* pathname) { #ifdef _WIN32 bool File::DeleteMatchingFiles(const char* pattern) { - WIN32_FIND_DATA data; - BOOL result = TRUE; - HANDLE handle = FindFirstFile(pattern, &data); - bool all_deleted = true; - if (handle != INVALID_HANDLE_VALUE) { - for (; result; result = FindNextFile(handle, &data)) { + WIN32_FIND_DATA data; + BOOL result = TRUE; + HANDLE handle = FindFirstFile(pattern, &data); + bool all_deleted = true; + if (handle != INVALID_HANDLE_VALUE) { + for (; result; result = FindNextFile(handle, &data)) { all_deleted &= File::Delete(data.cFileName); - } - FindClose(handle); - } - return all_deleted; + } + FindClose(handle); + } + return all_deleted; } #else bool File::DeleteMatchingFiles(const char* pattern) { glob_t pglob; - char **paths; + char** paths; bool all_deleted = true; if (glob(pattern, 0, nullptr, &pglob) == 0) { for (paths = pglob.gl_pathv; *paths != nullptr; paths++) { @@ -125,18 +125,16 @@ bool File::DeleteMatchingFiles(const char* pattern) { /////////////////////////////////////////////////////////////////////////////// // InputBuffer:: /////////////////////////////////////////////////////////////////////////////// -InputBuffer::InputBuffer(FILE* stream) - : stream_(stream) { - fseek(stream_, 0, SEEK_END); - filesize_ = ftell(stream_); - fseek(stream_, 0, SEEK_SET); +InputBuffer::InputBuffer(FILE* stream) : stream_(stream) { + fseek(stream_, 0, SEEK_END); + filesize_ = ftell(stream_); + fseek(stream_, 0, SEEK_SET); } -InputBuffer::InputBuffer(FILE* stream, size_t) - : stream_(stream) { - fseek(stream_, 0, SEEK_END); - filesize_ = ftell(stream_); - fseek(stream_, 0, SEEK_SET); +InputBuffer::InputBuffer(FILE* stream, size_t) : stream_(stream) { + fseek(stream_, 0, SEEK_END); + filesize_ = ftell(stream_); + fseek(stream_, 0, SEEK_SET); } InputBuffer::~InputBuffer() { @@ -169,13 +167,9 @@ bool InputBuffer::CloseFile() { // OutputBuffer:: /////////////////////////////////////////////////////////////////////////////// -OutputBuffer::OutputBuffer(FILE* stream) - : stream_(stream) { -} +OutputBuffer::OutputBuffer(FILE* stream) : stream_(stream) {} -OutputBuffer::OutputBuffer(FILE* stream, size_t) - : stream_(stream) { -} +OutputBuffer::OutputBuffer(FILE* stream, size_t) : stream_(stream) {} OutputBuffer::~OutputBuffer() { if (stream_ != nullptr) { diff --git a/src/training/fileio.h b/src/training/fileio.h index e72439a954..cc50690182 100644 --- a/src/training/fileio.h +++ b/src/training/fileio.h @@ -35,7 +35,8 @@ class File { // Try to open the file 'filename' and to write 'str' in it. // Stop the program if it fails. - static void WriteStringToFileOrDie(const std::string& str, const std::string& filename); + static void WriteStringToFileOrDie(const std::string& str, + const std::string& filename); // Return true if the file 'filename' is readable. static bool Readable(const std::string& filename); @@ -45,7 +46,8 @@ class File { // Helper methods // Concatenate file paths removing any extra intervening '/' symbols. - static std::string JoinPath(const std::string& prefix, const std::string& suffix); + static std::string JoinPath(const std::string& prefix, + const std::string& suffix); // Delete a filename or all filenames matching a glob pattern. static bool Delete(const char* pathname); static bool DeleteMatchingFiles(const char* pattern); @@ -71,7 +73,7 @@ class InputBuffer { private: FILE* stream_; - int filesize_; + int filesize_; }; // A class to manipulate Files for writing. diff --git a/src/training/lang_model_helpers.cpp b/src/training/lang_model_helpers.cpp index e42613abdb..e067a6eb22 100644 --- a/src/training/lang_model_helpers.cpp +++ b/src/training/lang_model_helpers.cpp @@ -54,7 +54,8 @@ bool WriteFile(const std::string& output_dir, const std::string& lang, // Helper reads a file with optional reader and returns a STRING. // On failure emits a warning message and returns and empty STRING. -STRING ReadFile(const std::string& filename, FileReader reader) { +STRING +ReadFile(const std::string& filename, FileReader reader) { if (filename.empty()) return STRING(); GenericVector data; bool read_result; @@ -68,9 +69,9 @@ STRING ReadFile(const std::string& filename, FileReader reader) { } // Helper writes the unicharset to file and to the traineddata. -bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir, - const std::string& lang, FileWriter writer, - TessdataManager* traineddata) { +bool WriteUnicharset(const UNICHARSET& unicharset, + const std::string& output_dir, const std::string& lang, + FileWriter writer, TessdataManager* traineddata) { GenericVector unicharset_data; TFile fp; fp.OpenWrite(&unicharset_data); @@ -182,9 +183,11 @@ static bool WriteDawgs(const GenericVector& words, // The main function for combine_lang_model.cpp. // Returns EXIT_SUCCESS or EXIT_FAILURE for error. -int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir, - const std::string& version_str, const std::string& output_dir, - const std::string& lang, bool pass_through_recoder, +int CombineLangModel(const UNICHARSET& unicharset, + const std::string& script_dir, + const std::string& version_str, + const std::string& output_dir, const std::string& lang, + bool pass_through_recoder, const GenericVector& words, const GenericVector& puncs, const GenericVector& numbers, bool lang_is_rtl, @@ -203,7 +206,8 @@ int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir tprintf("Config file is optional, continuing...\n"); } // If there is a config file, read it and add to traineddata. - std::string config_filename = script_dir + "/" + lang + "/" + lang + ".config"; + std::string config_filename = + script_dir + "/" + lang + "/" + lang + ".config"; STRING config_file = ReadFile(config_filename, reader); if (config_file.length() > 0) { traineddata.OverwriteEntry(TESSDATA_LANG_CONFIG, &config_file[0], diff --git a/src/training/lang_model_helpers.h b/src/training/lang_model_helpers.h index 58e73c45c2..6afe24381b 100644 --- a/src/training/lang_model_helpers.h +++ b/src/training/lang_model_helpers.h @@ -34,12 +34,13 @@ bool WriteFile(const std::string& output_dir, const std::string& lang, FileWriter writer); // Helper reads a file with optional reader and returns a STRING. // On failure emits a warning message and returns and empty STRING. -STRING ReadFile(const std::string& filename, FileReader reader); +STRING +ReadFile(const std::string& filename, FileReader reader); // Helper writes the unicharset to file and to the traineddata. -bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir, - const std::string& lang, FileWriter writer, - TessdataManager* traineddata); +bool WriteUnicharset(const UNICHARSET& unicharset, + const std::string& output_dir, const std::string& lang, + FileWriter writer, TessdataManager* traineddata); // Helper creates the recoder from the unicharset and writes it to the // traineddata, with a human-readable form to file at: // //.charset_size= for some num being the size @@ -71,9 +72,11 @@ bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through, // puncs must be non-empty. // lang_is_rtl indicates that the language is generally written from right // to left (eg Arabic/Hebrew). -int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir, - const std::string& version_str, const std::string& output_dir, - const std::string& lang, bool pass_through_recoder, +int CombineLangModel(const UNICHARSET& unicharset, + const std::string& script_dir, + const std::string& version_str, + const std::string& output_dir, const std::string& lang, + bool pass_through_recoder, const GenericVector& words, const GenericVector& puncs, const GenericVector& numbers, bool lang_is_rtl, diff --git a/src/training/ligature_table.cpp b/src/training/ligature_table.cpp index 38b18dceb9..3ffb264d96 100644 --- a/src/training/ligature_table.cpp +++ b/src/training/ligature_table.cpp @@ -57,8 +57,11 @@ LigatureTable* LigatureTable::Get() { return instance_.get(); } -LigatureTable::LigatureTable() : min_lig_length_(0), max_lig_length_(0), - min_norm_length_(0), max_norm_length_(0) {} +LigatureTable::LigatureTable() + : min_lig_length_(0), + max_lig_length_(0), + min_norm_length_(0), + max_norm_length_(0) {} void LigatureTable::Init() { if (norm_to_lig_table_.empty()) { @@ -75,8 +78,7 @@ void LigatureTable::Init() { normed8_result.toUTF8String(normed8); // The icu::Normalizer maps the "LONG S T" ligature to "st". Correct that // here manually so that AddLigatures() will work as desired. - if (lig8 == "\uFB05") - normed8 = "ſt"; + if (lig8 == "\uFB05") normed8 = "ſt"; int lig_length = lig8.length(); int norm_length = normed8.size(); if (normed8 != lig8 && lig_length > 1 && norm_length > 1) { @@ -84,12 +86,10 @@ void LigatureTable::Init() { lig_to_norm_table_[lig8] = normed8; if (min_lig_length_ == 0 || lig_length < min_lig_length_) min_lig_length_ = lig_length; - if (lig_length > max_lig_length_) - max_lig_length_ = lig_length; + if (lig_length > max_lig_length_) max_lig_length_ = lig_length; if (min_norm_length_ == 0 || norm_length < min_norm_length_) min_norm_length_ = norm_length; - if (norm_length > max_norm_length_) - max_norm_length_ = norm_length; + if (norm_length > max_norm_length_) max_norm_length_ = norm_length; } } // Add custom extra ligatures. @@ -99,8 +99,7 @@ void LigatureTable::Init() { int norm_length = strlen(UNICHARSET::kCustomLigatures[i][0]); if (min_norm_length_ == 0 || norm_length < min_norm_length_) min_norm_length_ = norm_length; - if (norm_length > max_norm_length_) - max_norm_length_ = norm_length; + if (norm_length > max_norm_length_) max_norm_length_ = norm_length; lig_to_norm_table_[UNICHARSET::kCustomLigatures[i][1]] = UNICHARSET::kCustomLigatures[i][0]; diff --git a/src/training/ligature_table.h b/src/training/ligature_table.h index 725432c004..0f05f28933 100644 --- a/src/training/ligature_table.h +++ b/src/training/ligature_table.h @@ -43,18 +43,15 @@ class LigatureTable { // Convert the utf8 string so that ligaturizable sequences, such as "fi" get // replaced by the (utf8 code for) appropriate ligature characters. Only do so // if the corresponding ligature character is renderable in the current font. - std::string AddLigatures(const std::string& str, const PangoFontInfo* font) const; + std::string AddLigatures(const std::string& str, + const PangoFontInfo* font) const; // Remove all ligatures. std::string RemoveLigatures(const std::string& str) const; // Remove only custom ligatures (eg. "ct") encoded in the private-use-area. std::string RemoveCustomLigatures(const std::string& str) const; - const LigHash& norm_to_lig_table() const { - return norm_to_lig_table_; - } - const LigHash& lig_to_norm_table() const { - return lig_to_norm_table_; - } + const LigHash& norm_to_lig_table() const { return norm_to_lig_table_; } + const LigHash& lig_to_norm_table() const { return lig_to_norm_table_; } protected: LigatureTable(); diff --git a/src/training/lstmeval.cpp b/src/training/lstmeval.cpp index 3492a14c92..0452ec3ed9 100644 --- a/src/training/lstmeval.cpp +++ b/src/training/lstmeval.cpp @@ -35,7 +35,7 @@ INT_PARAM_FLAG(max_image_MB, 2000, "Max memory to use for images."); INT_PARAM_FLAG(verbosity, 1, "Amount of diagnosting information to output (0-2)."); -int main(int argc, char **argv) { +int main(int argc, char** argv) { tesseract::CheckSharedLibraryVersion(); ParseArguments(&argc, &argv); if (FLAGS_model.empty()) { diff --git a/src/training/lstmtester.cpp b/src/training/lstmtester.cpp index c20a2d9efb..f071d9eed4 100644 --- a/src/training/lstmtester.cpp +++ b/src/training/lstmtester.cpp @@ -49,9 +49,9 @@ bool LSTMTester::LoadAllEvalData(const GenericVector& filenames) { // Runs an evaluation asynchronously on the stored data and returns a string // describing the results of the previous test. -STRING LSTMTester::RunEvalAsync(int iteration, const double* training_errors, - const TessdataManager& model_mgr, - int training_stage) { +STRING +LSTMTester::RunEvalAsync(int iteration, const double* training_errors, + const TessdataManager& model_mgr, int training_stage) { STRING result; if (total_pages_ == 0) { result.add_str_int("No test data at iteration", iteration); @@ -79,9 +79,10 @@ STRING LSTMTester::RunEvalAsync(int iteration, const double* training_errors, // Runs an evaluation synchronously on the stored data and returns a string // describing the results. -STRING LSTMTester::RunEvalSync(int iteration, const double* training_errors, - const TessdataManager& model_mgr, - int training_stage, int verbosity) { +STRING +LSTMTester::RunEvalSync(int iteration, const double* training_errors, + const TessdataManager& model_mgr, int training_stage, + int verbosity) { LSTMTrainer trainer; trainer.InitCharSet(model_mgr); TFile fp; diff --git a/src/training/lstmtraining.cpp b/src/training/lstmtraining.cpp index ac7017c478..07506b973a 100644 --- a/src/training/lstmtraining.cpp +++ b/src/training/lstmtraining.cpp @@ -45,12 +45,13 @@ STRING_PARAM_FLAG(train_listfile, "", STRING_PARAM_FLAG(eval_listfile, "", "File listing eval files in lstmf training format."); BOOL_PARAM_FLAG(stop_training, false, - "Just convert the training model to a runtime model."); + "Just convert the training model to a runtime model."); BOOL_PARAM_FLAG(convert_to_int, false, "Convert the recognition model to an integer model."); BOOL_PARAM_FLAG(sequential_training, false, "Use the training files sequentially instead of round-robin."); -INT_PARAM_FLAG(append_index, -1, "Index in continue_from Network at which to" +INT_PARAM_FLAG(append_index, -1, + "Index in continue_from Network at which to" " attach the new network defined by net_spec"); BOOL_PARAM_FLAG(debug_network, false, "Get info on distribution of weight values"); @@ -70,7 +71,7 @@ const int kNumPagesPerBatch = 100; // were previously created using tesseract with the lstm.train config file. // The program iterates over the inputs, feeding the data to the network, // until the error rate reaches a specified target or max_iterations is reached. -int main(int argc, char **argv) { +int main(int argc, char** argv) { tesseract::CheckSharedLibraryVersion(); ParseArguments(&argc, &argv); // Purify the model name in case it is based on the network string. @@ -84,10 +85,8 @@ int main(int argc, char **argv) { } STRING model_output = FLAGS_model_output.c_str(); for (int i = 0; i < model_output.length(); ++i) { - if (model_output[i] == '[' || model_output[i] == ']') - model_output[i] = '-'; - if (model_output[i] == '(' || model_output[i] == ')') - model_output[i] = '_'; + if (model_output[i] == '[' || model_output[i] == ']') model_output[i] = '-'; + if (model_output[i] == '(' || model_output[i] == ')') model_output[i] = '_'; } // Setup the trainer. STRING checkpoint_file = FLAGS_model_output.c_str(); diff --git a/src/training/merge_unicharsets.cpp b/src/training/merge_unicharsets.cpp index f5facbda48..2b3ff95ff2 100644 --- a/src/training/merge_unicharsets.cpp +++ b/src/training/merge_unicharsets.cpp @@ -17,7 +17,7 @@ // /////////////////////////////////////////////////////////////////////// -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion #include "unicharset.h" int main(int argc, char** argv) { @@ -28,9 +28,10 @@ int main(int argc, char** argv) { return 0; } else if (argc < 4) { // Print usage - printf("Usage: %s -v | --version |\n" - " %s unicharset-in-1 ... unicharset-in-n unicharset-out\n", - argv[0], argv[0]); + printf( + "Usage: %s -v | --version |\n" + " %s unicharset-in-1 ... unicharset-in-n unicharset-out\n", + argv[0], argv[0]); return 1; } diff --git a/src/training/mergenf.cpp b/src/training/mergenf.cpp index b1f7f35933..ea0af56cf8 100644 --- a/src/training/mergenf.cpp +++ b/src/training/mergenf.cpp @@ -4,35 +4,35 @@ ** Author: Dan Johnson ** History: Wed Nov 21 09:55:23 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. +** (c) Copyright Hewlett-Packard Company, 1988. +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** http://www.apache.org/licenses/LICENSE-2.0 +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. ******************************************************************************/ #include "mergenf.h" -#include "host.h" -#include "efio.h" -#include "clusttool.h" #include "cluster.h" -#include "oldlist.h" -#include "protos.h" -#include "ndminx.h" -#include "ocrfeatures.h" +#include "clusttool.h" #include "const.h" +#include "efio.h" #include "featdefs.h" +#include "host.h" #include "intproto.h" +#include "ndminx.h" +#include "ocrfeatures.h" +#include "oldlist.h" #include "params.h" +#include "protos.h" #include +#include #include #include -#include /*-------------------once in subfeat---------------------------------*/ double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ..."); @@ -41,10 +41,12 @@ double_VAR(training_similarity_midpoint, 0.0075, "Similarity Midpoint ..."); double_VAR(training_similarity_curl, 2.0, "Similarity Curl ..."); -/*-----------------------------once in fasttrain----------------------------------*/ +/*-----------------------------once in + * fasttrain----------------------------------*/ double_VAR(training_tangent_bbox_pad, 0.5, "Tangent bounding box pad ..."); -double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ..."); +double_VAR(training_orthogonal_bbox_pad, 2.5, + "Orthogonal bounding box pad ..."); double_VAR(training_angle_pad, 45.0, "Angle pad ..."); @@ -64,53 +66,51 @@ double_VAR(training_angle_pad, 45.0, "Angle pad ..."); * @note Exceptions: none * @note History: Mon Nov 26 08:27:53 1990, DSJ, Created. */ -FLOAT32 CompareProtos(PROTO p1, PROTO p2) { +FLOAT32 +CompareProtos(PROTO p1, PROTO p2) { FEATURE Feature; FLOAT32 WorstEvidence = WORST_EVIDENCE; FLOAT32 Evidence; FLOAT32 Angle, Length; /* if p1 and p2 are not close in length, don't let them match */ - Length = fabs (p1->Length - p2->Length); - if (Length > MAX_LENGTH_MISMATCH) - return (0.0); + Length = fabs(p1->Length - p2->Length); + if (Length > MAX_LENGTH_MISMATCH) return (0.0); /* create a dummy pico-feature to be used for comparisons */ - Feature = NewFeature (&PicoFeatDesc); + Feature = NewFeature(&PicoFeatDesc); Feature->Params[PicoFeatDir] = p1->Angle; /* convert angle to radians */ Angle = p1->Angle * 2.0 * PI; /* find distance from center of p1 to 1/2 picofeat from end */ - Length = p1->Length / 2.0 - GetPicoFeatureLength () / 2.0; + Length = p1->Length / 2.0 - GetPicoFeatureLength() / 2.0; if (Length < 0) Length = 0; /* set the dummy pico-feature at one end of p1 and match it to p2 */ - Feature->Params[PicoFeatX] = p1->X + cos (Angle) * Length; - Feature->Params[PicoFeatY] = p1->Y + sin (Angle) * Length; - if (DummyFastMatch (Feature, p2)) { - Evidence = SubfeatureEvidence (Feature, p2); - if (Evidence < WorstEvidence) - WorstEvidence = Evidence; + Feature->Params[PicoFeatX] = p1->X + cos(Angle) * Length; + Feature->Params[PicoFeatY] = p1->Y + sin(Angle) * Length; + if (DummyFastMatch(Feature, p2)) { + Evidence = SubfeatureEvidence(Feature, p2); + if (Evidence < WorstEvidence) WorstEvidence = Evidence; } else { FreeFeature(Feature); return 0.0; } /* set the dummy pico-feature at the other end of p1 and match it to p2 */ - Feature->Params[PicoFeatX] = p1->X - cos (Angle) * Length; - Feature->Params[PicoFeatY] = p1->Y - sin (Angle) * Length; - if (DummyFastMatch (Feature, p2)) { - Evidence = SubfeatureEvidence (Feature, p2); - if (Evidence < WorstEvidence) - WorstEvidence = Evidence; + Feature->Params[PicoFeatX] = p1->X - cos(Angle) * Length; + Feature->Params[PicoFeatY] = p1->Y - sin(Angle) * Length; + if (DummyFastMatch(Feature, p2)) { + Evidence = SubfeatureEvidence(Feature, p2); + if (Evidence < WorstEvidence) WorstEvidence = Evidence; } else { FreeFeature(Feature); return 0.0; } - FreeFeature (Feature); + FreeFeature(Feature); return (WorstEvidence); } /* CompareProtos */ @@ -130,11 +130,8 @@ FLOAT32 CompareProtos(PROTO p1, PROTO p2) { * @note Exceptions: none * @note History: Mon Nov 26 08:15:08 1990, DSJ, Created. */ -void ComputeMergedProto (PROTO p1, - PROTO p2, - FLOAT32 w1, - FLOAT32 w2, - PROTO MergedProto) { +void ComputeMergedProto(PROTO p1, PROTO p2, FLOAT32 w1, FLOAT32 w2, + PROTO MergedProto) { FLOAT32 TotalWeight; TotalWeight = w1 + w2; @@ -165,23 +162,23 @@ void ComputeMergedProto (PROTO p1, * @note History: Sat Nov 24 11:42:58 1990, DSJ, Created. */ int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], - PROTOTYPE *Prototype) { - PROTO_STRUCT NewProto; - PROTO_STRUCT MergedProto; - int Pid; - PROTO Proto; - int BestProto; + PROTOTYPE* Prototype) { + PROTO_STRUCT NewProto; + PROTO_STRUCT MergedProto; + int Pid; + PROTO Proto; + int BestProto; FLOAT32 BestMatch; FLOAT32 Match, OldMatch, NewMatch; - MakeNewFromOld (&NewProto, Prototype); + MakeNewFromOld(&NewProto, Prototype); BestProto = NO_PROTO; BestMatch = WORST_MATCH_ALLOWED; for (Pid = 0; Pid < Class->NumProtos; Pid++) { - Proto = ProtoIn(Class, Pid); - ComputeMergedProto(Proto, &NewProto, - (FLOAT32) NumMerged[Pid], 1.0, &MergedProto); + Proto = ProtoIn(Class, Pid); + ComputeMergedProto(Proto, &NewProto, (FLOAT32)NumMerged[Pid], 1.0, + &MergedProto); OldMatch = CompareProtos(Proto, &MergedProto); NewMatch = CompareProtos(&NewProto, &MergedProto); Match = std::min(OldMatch, NewMatch); @@ -205,7 +202,7 @@ int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], * Exceptions: none * History: Mon Nov 26 09:45:39 1990, DSJ, Created. */ -void MakeNewFromOld(PROTO New, PROTOTYPE *Old) { +void MakeNewFromOld(PROTO New, PROTOTYPE* Old) { New->X = CenterX(Old->Mean); New->Y = CenterY(Old->Mean); New->Length = LengthOf(Old->Mean); @@ -220,20 +217,20 @@ void MakeNewFromOld(PROTO New, PROTOTYPE *Old) { * * Compare a feature to a prototype. Print the result. */ -FLOAT32 SubfeatureEvidence(FEATURE Feature, PROTO Proto) { - float Distance; - float Dangle; +FLOAT32 +SubfeatureEvidence(FEATURE Feature, PROTO Proto) { + float Distance; + float Dangle; - Dangle = Proto->Angle - Feature->Params[PicoFeatDir]; + Dangle = Proto->Angle - Feature->Params[PicoFeatDir]; if (Dangle < -0.5) Dangle += 1.0; - if (Dangle > 0.5) Dangle -= 1.0; + if (Dangle > 0.5) Dangle -= 1.0; Dangle *= training_angle_match_scale; Distance = Proto->A * Feature->Params[PicoFeatX] + - Proto->B * Feature->Params[PicoFeatY] + - Proto->C; + Proto->B * Feature->Params[PicoFeatY] + Proto->C; - return (EvidenceOf (Distance * Distance + Dangle * Dangle)); + return (EvidenceOf(Distance * Distance + Dangle * Dangle)); } /** @@ -244,8 +241,7 @@ FLOAT32 SubfeatureEvidence(FEATURE Feature, PROTO Proto) { * approximation. The equation that represents the transform is: * 1 / (1 + (sim / midpoint) ^ curl) */ -double EvidenceOf (double Similarity) { - +double EvidenceOf(double Similarity) { Similarity /= training_similarity_midpoint; if (training_similarity_curl == 3) @@ -253,7 +249,7 @@ double EvidenceOf (double Similarity) { else if (training_similarity_curl == 2) Similarity = Similarity * Similarity; else - Similarity = pow (Similarity, training_similarity_curl); + Similarity = pow(Similarity, training_similarity_curl); return (1.0 / (1.0 + Similarity)); } @@ -273,26 +269,21 @@ double EvidenceOf (double Similarity) { * @note Exceptions: none * @note History: Wed Nov 14 17:19:58 1990, DSJ, Created. */ -BOOL8 DummyFastMatch ( - FEATURE Feature, - PROTO Proto) -{ - FRECT BoundingBox; +BOOL8 +DummyFastMatch(FEATURE Feature, PROTO Proto) { + FRECT BoundingBox; FLOAT32 MaxAngleError; FLOAT32 AngleError; MaxAngleError = training_angle_pad / 360.0; - AngleError = fabs (Proto->Angle - Feature->Params[PicoFeatDir]); - if (AngleError > 0.5) - AngleError = 1.0 - AngleError; + AngleError = fabs(Proto->Angle - Feature->Params[PicoFeatDir]); + if (AngleError > 0.5) AngleError = 1.0 - AngleError; - if (AngleError > MaxAngleError) - return (FALSE); + if (AngleError > MaxAngleError) return (FALSE); - ComputePaddedBoundingBox (Proto, - training_tangent_bbox_pad * GetPicoFeatureLength (), - training_orthogonal_bbox_pad * GetPicoFeatureLength (), - &BoundingBox); + ComputePaddedBoundingBox( + Proto, training_tangent_bbox_pad * GetPicoFeatureLength(), + training_orthogonal_bbox_pad * GetPicoFeatureLength(), &BoundingBox); return PointInside(&BoundingBox, Feature->Params[PicoFeatX], Feature->Params[PicoFeatY]); @@ -315,13 +306,13 @@ BOOL8 DummyFastMatch ( * @note Exceptions: none * @note History: Wed Nov 14 14:55:30 1990, DSJ, Created. */ -void ComputePaddedBoundingBox (PROTO Proto, FLOAT32 TangentPad, - FLOAT32 OrthogonalPad, FRECT *BoundingBox) { +void ComputePaddedBoundingBox(PROTO Proto, FLOAT32 TangentPad, + FLOAT32 OrthogonalPad, FRECT* BoundingBox) { FLOAT32 Pad, Length, Angle; FLOAT32 CosOfAngle, SinOfAngle; - Length = Proto->Length / 2.0 + TangentPad; - Angle = Proto->Angle * 2.0 * PI; + Length = Proto->Length / 2.0 + TangentPad; + Angle = Proto->Angle * 2.0 * PI; CosOfAngle = fabs(cos(Angle)); SinOfAngle = fabs(sin(Angle)); @@ -344,7 +335,8 @@ void ComputePaddedBoundingBox (PROTO Proto, FLOAT32 TangentPad, * @note Exceptions: none * @note History: Wed Nov 14 17:26:35 1990, DSJ, Created. */ -BOOL8 PointInside(FRECT *Rectangle, FLOAT32 X, FLOAT32 Y) { +BOOL8 +PointInside(FRECT* Rectangle, FLOAT32 X, FLOAT32 Y) { if (X < Rectangle->MinX) return (FALSE); if (X > Rectangle->MaxX) return (FALSE); if (Y < Rectangle->MinY) return (FALSE); diff --git a/src/training/mergenf.h b/src/training/mergenf.h index 44325b935a..401ccd3339 100644 --- a/src/training/mergenf.h +++ b/src/training/mergenf.h @@ -4,100 +4,80 @@ ** Author: Dan Johnson ** History: Wed Nov 21 09:55:23 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. +** (c) Copyright Hewlett-Packard Company, 1988. +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** http://www.apache.org/licenses/LICENSE-2.0 +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. ******************************************************************************/ #ifndef TESSERACT_TRAINING_MERGENF_H_ #define TESSERACT_TRAINING_MERGENF_H_ /**---------------------------------------------------------------------------- - Include Files and Type Defines + Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "protos.h" +#include "callcpp.h" #include "cluster.h" #include "ocrfeatures.h" -#include "callcpp.h" #include "picofeat.h" +#include "protos.h" - -#define WORST_MATCH_ALLOWED (0.9) +#define WORST_MATCH_ALLOWED (0.9) #define WORST_EVIDENCE (1.0) -#define MAX_LENGTH_MISMATCH (2.0 * GetPicoFeatureLength ()) - - -#define PROTO_SUFFIX ".mf.p" -#define CONFIG_SUFFIX ".cl" -#define NO_PROTO (-1) -#define XPOSITION 0 -#define YPOSITION 1 -#define MFLENGTH 2 -#define ORIENTATION 3 - -typedef struct -{ - FLOAT32 MinX, MaxX, MinY, MaxY; +#define MAX_LENGTH_MISMATCH (2.0 * GetPicoFeatureLength()) + +#define PROTO_SUFFIX ".mf.p" +#define CONFIG_SUFFIX ".cl" +#define NO_PROTO (-1) +#define XPOSITION 0 +#define YPOSITION 1 +#define MFLENGTH 2 +#define ORIENTATION 3 + +typedef struct { + FLOAT32 MinX, MaxX, MinY, MaxY; } FRECT; /**---------------------------------------------------------------------------- - Public Macros + Public Macros ----------------------------------------------------------------------------**/ -#define CenterX(M) ( (M)[XPOSITION] ) -#define CenterY(M) ( (M)[YPOSITION] ) -#define LengthOf(M) ( (M)[MFLENGTH] ) -#define OrientationOf(M) ( (M)[ORIENTATION] ) +#define CenterX(M) ((M)[XPOSITION]) +#define CenterY(M) ((M)[YPOSITION]) +#define LengthOf(M) ((M)[MFLENGTH]) +#define OrientationOf(M) ((M)[ORIENTATION]) /**---------------------------------------------------------------------------- - Public Function Prototypes + Public Function Prototypes ----------------------------------------------------------------------------**/ -FLOAT32 CompareProtos ( - PROTO p1, - PROTO p2); - -void ComputeMergedProto ( - PROTO p1, - PROTO p2, - FLOAT32 w1, - FLOAT32 w2, - PROTO MergedProto); - -int FindClosestExistingProto ( - CLASS_TYPE Class, - int NumMerged[], - PROTOTYPE *Prototype); - -void MakeNewFromOld ( - PROTO New, - PROTOTYPE *Old); - -FLOAT32 SubfeatureEvidence ( - FEATURE Feature, - PROTO Proto); - -double EvidenceOf ( - double Similarity); - -BOOL8 DummyFastMatch ( - FEATURE Feature, - PROTO Proto); - -void ComputePaddedBoundingBox ( - PROTO Proto, - FLOAT32 TangentPad, - FLOAT32 OrthogonalPad, - FRECT *BoundingBox); - -BOOL8 PointInside ( - FRECT *Rectangle, - FLOAT32 X, - FLOAT32 Y); +FLOAT32 +CompareProtos(PROTO p1, PROTO p2); + +void ComputeMergedProto(PROTO p1, PROTO p2, FLOAT32 w1, FLOAT32 w2, + PROTO MergedProto); + +int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], + PROTOTYPE* Prototype); + +void MakeNewFromOld(PROTO New, PROTOTYPE* Old); + +FLOAT32 +SubfeatureEvidence(FEATURE Feature, PROTO Proto); + +double EvidenceOf(double Similarity); + +BOOL8 +DummyFastMatch(FEATURE Feature, PROTO Proto); + +void ComputePaddedBoundingBox(PROTO Proto, FLOAT32 TangentPad, + FLOAT32 OrthogonalPad, FRECT* BoundingBox); + +BOOL8 +PointInside(FRECT* Rectangle, FLOAT32 X, FLOAT32 Y); #endif // TESSERACT_TRAINING_MERGENF_H_ diff --git a/src/training/mftraining.cpp b/src/training/mftraining.cpp index 8230222611..7828afc420 100644 --- a/src/training/mftraining.cpp +++ b/src/training/mftraining.cpp @@ -30,8 +30,8 @@ #include "config_auto.h" #endif -#include #include +#include #define _USE_MATH_DEFINES #include #ifdef _WIN32 @@ -81,11 +81,11 @@ DECLARE_STRING_PARAM_FLAG(test_ch); -----------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED static void DisplayProtoList(const char* ch, LIST protolist) { - void* window = c_create_window("Char samples", 50, 200, - 520, 520, -130.0, 130.0, -130.0, 130.0); + void* window = c_create_window("Char samples", 50, 200, 520, 520, -130.0, + 130.0, -130.0, 130.0); LIST proto = protolist; iterate(proto) { - PROTOTYPE* prototype = reinterpret_cast(first_node(proto)); + PROTOTYPE* prototype = reinterpret_cast(first_node(proto)); if (prototype->Significant) c_line_color_index(window, Green); else if (prototype->NumSamples == 0) @@ -102,11 +102,11 @@ static void DisplayProtoList(const char* ch, LIST protolist) { c_move(window, (x - dx) * 256, (y - dy) * 256); c_draw(window, (x + dx) * 256, (y + dy) * 256); if (prototype->Significant) - tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n", - x, y, dx, dy, prototype->NumSamples); + tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n", x, y, dx, dy, + prototype->NumSamples); else if (prototype->NumSamples > 0 && !prototype->Merged) - tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n", - x, y, dx, dy, prototype->NumSamples); + tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n", x, y, dx, dy, + prototype->NumSamples); } c_make_current(window); } @@ -115,14 +115,11 @@ static void DisplayProtoList(const char* ch, LIST protolist) { // Helper to run clustering on a single config. // Mostly copied from the old mftraining, but with renamed variables. static LIST ClusterOneConfig(int shape_id, const char* class_label, - LIST mf_classes, - const ShapeTable& shape_table, + LIST mf_classes, const ShapeTable& shape_table, MasterTrainer* trainer) { int num_samples; - CLUSTERER *clusterer = trainer->SetupForClustering(shape_table, - feature_defs, - shape_id, - &num_samples); + CLUSTERER* clusterer = trainer->SetupForClustering(shape_table, feature_defs, + shape_id, &num_samples); Config.MagicSamples = num_samples; LIST proto_list = ClusterSamples(clusterer, &Config); CleanUpUnusedData(proto_list); @@ -130,14 +127,13 @@ static LIST ClusterOneConfig(int shape_id, const char* class_label, // Merge protos where reasonable to make more of them significant by // representing almost all samples of the class/font. MergeInsignificantProtos(proto_list, class_label, clusterer, &Config); - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (strcmp(FLAGS_test_ch.c_str(), class_label) == 0) DisplayProtoList(FLAGS_test_ch.c_str(), proto_list); - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED // Delete the protos that will not be used in the inttemp output file. - proto_list = RemoveInsignificantProtos(proto_list, true, - false, - clusterer->SampleSize); + proto_list = + RemoveInsignificantProtos(proto_list, true, false, clusterer->SampleSize); FreeClusterer(clusterer); MERGE_CLASS merge_class = FindClass(mf_classes, class_label); if (merge_class == nullptr) { @@ -163,8 +159,7 @@ static LIST ClusterOneConfig(int shape_id, const char* class_label, // Merge with the similar proto. ComputeMergedProto(ProtoIn(merge_class->Class, p_id), &dummy_proto, static_cast(merge_class->NumMerged[p_id]), - 1.0, - ProtoIn(merge_class->Class, p_id)); + 1.0, ProtoIn(merge_class->Class, p_id)); merge_class->NumMerged[p_id]++; } AddProtoToConfig(p_id, merge_class->Class->Configurations[config_id]); @@ -227,7 +222,7 @@ static void SetupConfigMap(ShapeTable* shape_table, IndexMapBiDi* config_map) { * @note History: Fri Aug 18 08:56:17 1989, DSJ, Created. * @note History: Mon May 18 1998, Christy Russson, Revistion started. */ -int main (int argc, char **argv) { +int main(int argc, char** argv) { tesseract::CheckSharedLibraryVersion(); ParseArguments(&argc, &argv); @@ -235,10 +230,8 @@ int main (int argc, char **argv) { ShapeTable* shape_table = nullptr; STRING file_prefix; // Load the training data. - MasterTrainer* trainer = tesseract::LoadTrainingData(argc, argv, - false, - &shape_table, - &file_prefix); + MasterTrainer* trainer = tesseract::LoadTrainingData( + argc, argv, false, &shape_table, &file_prefix); if (trainer == nullptr) return 1; // Failed. // Setup an index mapping from the shapes in the shape table to the classes @@ -282,8 +275,8 @@ int main (int argc, char **argv) { shape_table->GetFirstUnicharAndFont(s, &unichar_id, &font_id); } const char* class_label = unicharset->id_to_unichar(unichar_id); - mf_classes = ClusterOneConfig(s, class_label, mf_classes, *shape_table, - trainer); + mf_classes = + ClusterOneConfig(s, class_label, mf_classes, *shape_table, trainer); } STRING inttemp_file = file_prefix; inttemp_file += "inttemp"; @@ -291,14 +284,13 @@ int main (int argc, char **argv) { pffmtable_file += "pffmtable"; CLASS_STRUCT* float_classes = SetUpForFloat2Int(*unicharset, mf_classes); // Now write the inttemp and pffmtable. - trainer->WriteInttempAndPFFMTable(trainer->unicharset(), *unicharset, - *shape_table, float_classes, - inttemp_file.string(), - pffmtable_file.string()); + trainer->WriteInttempAndPFFMTable( + trainer->unicharset(), *unicharset, *shape_table, float_classes, + inttemp_file.string(), pffmtable_file.string()); for (int c = 0; c < unicharset->size(); ++c) { FreeClassFields(&float_classes[c]); } - delete [] float_classes; + delete[] float_classes; FreeLabeledClassList(mf_classes); delete trainer; delete shape_table; @@ -306,7 +298,8 @@ int main (int argc, char **argv) { if (!FLAGS_test_ch.empty()) { // If we are displaying debug window(s), wait for the user to look at them. printf("Hit return to exit...\n"); - while (getchar() != '\n'); + while (getchar() != '\n') + ; } return 0; -} /* main */ +} /* main */ diff --git a/src/training/normstrngs.cpp b/src/training/normstrngs.cpp index 13c6e58490..81e4e43171 100644 --- a/src/training/normstrngs.cpp +++ b/src/training/normstrngs.cpp @@ -217,7 +217,8 @@ bool IsOCREquivalent(char32 ch1, char32 ch2) { bool IsValidCodepoint(const char32 ch) { // In the range [0, 0xD800) or [0xE000, 0x10FFFF] - return (static_cast(ch) < 0xD800) || (ch >= 0xE000 && ch <= 0x10FFFF); + return (static_cast(ch) < 0xD800) || + (ch >= 0xE000 && ch <= 0x10FFFF); } bool IsWhitespace(const char32 ch) { diff --git a/src/training/pango_font_info.cpp b/src/training/pango_font_info.cpp index 4f92b72023..ffa3f09a1e 100644 --- a/src/training/pango_font_info.cpp +++ b/src/training/pango_font_info.cpp @@ -27,24 +27,24 @@ #undef __STRICT_ANSI__ #endif -#include #include +#include #include #ifndef _MSC_VER #include #endif #include -#include "pango_font_info.h" #include "commandlineflags.h" #include "fileio.h" #include "normstrngs.h" -#include "tlog.h" -#include "unichar.h" -#include "util.h" #include "pango/pango.h" #include "pango/pangocairo.h" #include "pango/pangofc-font.h" +#include "pango_font_info.h" +#include "tlog.h" +#include "unichar.h" +#include "util.h" STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp", "Overrides fontconfig default temporary dir"); @@ -139,7 +139,8 @@ void PangoFontInfo::HardInitFontConfig(const std::string& fonts_dir, "\n" "", fonts_dir.c_str(), cache_dir_.c_str()); - std::string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf"); + std::string fonts_conf_file = + File::JoinPath(cache_dir_.c_str(), "fonts.conf"); File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file); #ifdef _WIN32 std::string env("FONTCONFIG_PATH="); @@ -160,15 +161,14 @@ void PangoFontInfo::HardInitFontConfig(const std::string& fonts_dir, pango_cairo_font_map_set_default(nullptr); } -static void ListFontFamilies(PangoFontFamily*** families, - int* n_families) { +static void ListFontFamilies(PangoFontFamily*** families, int* n_families) { PangoFontInfo::SoftInitFontConfig(); PangoFontMap* font_map = pango_cairo_font_map_get_default(); DISABLE_HEAP_LEAK_CHECK; pango_font_map_list_families(font_map, families, n_families); } -bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) { +bool PangoFontInfo::ParseFontDescription(const PangoFontDescription* desc) { Clear(); const char* family = pango_font_description_get_family(desc); if (!family) { @@ -191,7 +191,7 @@ bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) { } bool PangoFontInfo::ParseFontDescriptionName(const std::string& name) { - PangoFontDescription *desc = pango_font_description_from_string(name.c_str()); + PangoFontDescription* desc = pango_font_description_from_string(name.c_str()); bool success = ParseFontDescription(desc); pango_font_description_free(desc); return success; @@ -215,14 +215,13 @@ PangoFont* PangoFontInfo::ToPangoFont() const { return font; } -bool PangoFontInfo::CoversUTF8Text(const char* utf8_text, int byte_length) const { +bool PangoFontInfo::CoversUTF8Text(const char* utf8_text, + int byte_length) const { PangoFont* font = ToPangoFont(); PangoCoverage* coverage = pango_font_get_coverage(font, nullptr); for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length); - it != UNICHAR::end(utf8_text, byte_length); - ++it) { - if (IsWhitespace(*it) || pango_is_zero_width(*it)) - continue; + it != UNICHAR::end(utf8_text, byte_length); ++it) { + if (IsWhitespace(*it) || pango_is_zero_width(*it)) continue; if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) { char tmp[5]; int len = it.get_utf8(tmp); @@ -311,13 +310,13 @@ bool PangoFontInfo::GetSpacingProperties(const std::string& utf8_char, // Handle multi-unicode strings by reporting the left-most position of the // x-bearing, and right-most position of the x-advance if the string were to // be rendered. - const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(), - utf8_char.length()); - const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(), - utf8_char.length()); + const UNICHAR::const_iterator it_begin = + UNICHAR::begin(utf8_char.c_str(), utf8_char.length()); + const UNICHAR::const_iterator it_end = + UNICHAR::end(utf8_char.c_str(), utf8_char.length()); for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { - PangoGlyph glyph_index = pango_fc_font_get_glyph( - reinterpret_cast(font), *it); + PangoGlyph glyph_index = + pango_fc_font_get_glyph(reinterpret_cast(font), *it); if (!glyph_index) { // Glyph for given unicode character doesn't exist in font. g_object_unref(font); @@ -371,14 +370,14 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len, if (desc_) { pango_layout_set_font_description(layout, desc_); } else { - PangoFontDescription *desc = pango_font_description_from_string( - DescriptionName().c_str()); + PangoFontDescription* desc = + pango_font_description_from_string(DescriptionName().c_str()); pango_layout_set_font_description(layout, desc); pango_font_description_free(desc); } pango_layout_set_text(layout, utf8_word, len); PangoLayoutIter* run_iter = nullptr; - { // Fontconfig caches some information here that is not freed before exit. + { // Fontconfig caches some information here that is not freed before exit. DISABLE_HEAP_LEAK_CHECK; run_iter = pango_layout_get_iter(layout); } @@ -411,8 +410,8 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len, PangoGlyphItemIter cluster_iter; gboolean have_cluster; - for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, - run, utf8_word); + for (have_cluster = + pango_glyph_item_iter_init_start(&cluster_iter, run, utf8_word); have_cluster && !bad_glyph; have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) { const int start_byte_index = cluster_iter.start_index; @@ -428,12 +427,12 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len, } if (TLOG_IS_ON(2)) { printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ", - start_byte_index, end_byte_index, - start_glyph_index, end_glyph_index); + start_byte_index, end_byte_index, start_glyph_index, + end_glyph_index); } for (int i = start_glyph_index, step = (end_glyph_index > start_glyph_index) ? 1 : -1; - !bad_glyph && i != end_glyph_index; i+= step) { + !bad_glyph && i != end_glyph_index; i += step) { const bool unknown_glyph = (cluster_iter.glyph_item->glyphs->glyphs[i].glyph & PANGO_GLYPH_UNKNOWN_FLAG); @@ -449,8 +448,7 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len, if (TLOG_IS_ON(2)) { printf(" '%s'\n", cluster_text.c_str()); } - if (bad_glyph) - tlog(1, "Found illegal glyph!\n"); + if (bad_glyph) tlog(1, "Found illegal glyph!\n"); } } while (!bad_glyph && pango_layout_iter_next_run(run_iter)); @@ -461,7 +459,6 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len, return !bad_glyph; } - // ------------------------ FontUtils ------------------------------------ std::vector FontUtils::available_fonts_; // cache list @@ -478,8 +475,8 @@ std::vector FontUtils::available_fonts_; // cache list bool FontUtils::IsAvailableFont(const char* input_query_desc, std::string* best_match) { std::string query_desc(input_query_desc); - PangoFontDescription *desc = pango_font_description_from_string( - query_desc.c_str()); + PangoFontDescription* desc = + pango_font_description_from_string(query_desc.c_str()); PangoFont* selected_font = nullptr; { PangoFontInfo::SoftInitFontConfig(); @@ -528,8 +525,7 @@ static bool ShouldIgnoreFontFamilyName(const char* query) { nullptr}; const char** list = kIgnoredFamilyNames; for (; *list != nullptr; ++list) { - if (!strcmp(*list, query)) - return true; + if (!strcmp(*list, query)) return true; } return false; } @@ -583,7 +579,6 @@ const std::vector& FontUtils::ListAvailableFonts() { return available_fonts_; } - static void CharCoverageMapToBitmap(PangoCoverage* coverage, std::vector* unichar_bitmap) { const int kMinUnicodeValue = 33; @@ -592,8 +587,8 @@ static void CharCoverageMapToBitmap(PangoCoverage* coverage, // Mark off characters that the font can render. for (int i = kMinUnicodeValue; i <= kMaxUnicodeValue; ++i) { if (IsInterchangeValid(i)) { - (*unichar_bitmap)[i] - = (pango_coverage_get(coverage, i) == PANGO_COVERAGE_EXACT); + (*unichar_bitmap)[i] = + (pango_coverage_get(coverage, i) == PANGO_COVERAGE_EXACT); } } } @@ -616,8 +611,8 @@ void FontUtils::GetAllRenderableCharacters(const std::string& font_name, } /* static */ -void FontUtils::GetAllRenderableCharacters(const std::vector& fonts, - std::vector* unichar_bitmap) { +void FontUtils::GetAllRenderableCharacters( + const std::vector& fonts, std::vector* unichar_bitmap) { // Form the union of coverage maps from the fonts PangoCoverage* all_coverage = pango_coverage_new(); tlog(1, "Processing %u fonts\n", static_cast(fonts.size())); @@ -634,7 +629,6 @@ void FontUtils::GetAllRenderableCharacters(const std::vector& fonts pango_coverage_unref(all_coverage); } - // Utilities written to be backward compatible with StringRender /* static */ @@ -656,9 +650,9 @@ int FontUtils::FontScore(const std::unordered_map& ch_map, int ok_chars = 0; for (std::unordered_map::const_iterator it = ch_map.begin(); it != ch_map.end(); ++it) { - bool covered = (IsWhitespace(it->first) || - (pango_coverage_get(coverage, it->first) - == PANGO_COVERAGE_EXACT)); + bool covered = + (IsWhitespace(it->first) || + (pango_coverage_get(coverage, it->first) == PANGO_COVERAGE_EXACT)); if (covered) { ++(*raw_score); ok_chars += it->second; @@ -672,18 +666,17 @@ int FontUtils::FontScore(const std::unordered_map& ch_map, return ok_chars; } - /* static */ std::string FontUtils::BestFonts( const std::unordered_map& ch_map, - std::vector > >* fonts) { + std::vector>>* fonts) { const double kMinOKFraction = 0.99; // Weighted fraction of characters that must be renderable in a font to make // it OK even if the raw count is not good. const double kMinWeightedFraction = 0.99995; fonts->clear(); - std::vector > font_flags; + std::vector> font_flags; std::vector font_scores; std::vector raw_scores; int most_ok_chars = 0; @@ -720,17 +713,15 @@ std::string FontUtils::BestFonts( if ((score >= least_good_enough && raw_score >= least_raw_enough) || score >= override_enough) { fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i])); - tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n", - font_names[i].c_str(), - 100.0 * score / most_ok_chars, - raw_score, 100.0 * raw_score / best_raw_score); + tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(), + 100.0 * score / most_ok_chars, raw_score, + 100.0 * raw_score / best_raw_score); font_list += font_names[i]; font_list += "\n"; } else if (score >= least_good_enough || raw_score >= least_raw_enough) { tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n", - font_names[i].c_str(), - 100.0 * score / most_ok_chars, - raw_score, 100.0 * raw_score / best_raw_score); + font_names[i].c_str(), 100.0 * score / most_ok_chars, raw_score, + 100.0 * raw_score / best_raw_score); } } return font_list; @@ -738,7 +729,8 @@ std::string FontUtils::BestFonts( /* static */ bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len, - std::string* font_name, std::vector* graphemes) { + std::string* font_name, + std::vector* graphemes) { return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name, graphemes); } @@ -746,7 +738,8 @@ bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len, /* static */ bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len, const std::vector& all_fonts, - std::string* font_name, std::vector* graphemes) { + std::string* font_name, + std::vector* graphemes) { if (font_name) font_name->clear(); if (graphemes) graphemes->clear(); for (unsigned i = 0; i < all_fonts.size(); ++i) { diff --git a/src/training/pango_font_info.h b/src/training/pango_font_info.h index 5ee585cd55..aeadfd2743 100644 --- a/src/training/pango_font_info.h +++ b/src/training/pango_font_info.h @@ -87,8 +87,8 @@ class PangoFontInfo { // font. Returns false if the glyph for the character could not be found in // the font. // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html - bool GetSpacingProperties(const std::string& utf8_char, - int* x_bearing, int* x_advance) const; + bool GetSpacingProperties(const std::string& utf8_char, int* x_bearing, + int* x_advance) const; // If not already initialized, initializes FontConfig by setting its // environment variable and creating a fonts.conf file that points to the @@ -102,15 +102,13 @@ class PangoFontInfo { // Accessors std::string DescriptionName() const; // Font Family name eg. "Arial" - const std::string& family_name() const { return family_name_; } + const std::string& family_name() const { return family_name_; } // Size in points (1/72"), rounded to the nearest integer. int font_size() const { return font_size_; } FontTypeEnum font_type() const { return font_type_; } int resolution() const { return resolution_; } - void set_resolution(const int resolution) { - resolution_ = resolution; - } + void set_resolution(const int resolution) { resolution_ = resolution; } private: friend class FontUtils; @@ -165,21 +163,24 @@ class FontUtils { // and returns the font description name and the decomposition of the word to // graphemes. Returns false if no suitable font was found. static bool SelectFont(const char* utf8_word, const int utf8_len, - std::string* font_name, std::vector* graphemes); + std::string* font_name, + std::vector* graphemes); // Picks font among all_fonts that covers and can render the given word, // and returns the font description name and the decomposition of the word to // graphemes. Returns false if no suitable font was found. static bool SelectFont(const char* utf8_word, const int utf8_len, const std::vector& all_fonts, - std::string* font_name, std::vector* graphemes); + std::string* font_name, + std::vector* graphemes); // Returns a bitmask where the value of true at index 'n' implies that unicode // value 'n' is renderable by at least one available font. static void GetAllRenderableCharacters(std::vector* unichar_bitmap); // Variant of the above function that inspects only the provided font names. - static void GetAllRenderableCharacters(const std::vector& font_names, - std::vector* unichar_bitmap); + static void GetAllRenderableCharacters( + const std::vector& font_names, + std::vector* unichar_bitmap); static void GetAllRenderableCharacters(const std::string& font_name, std::vector* unichar_bitmap); @@ -194,7 +195,7 @@ class FontUtils { // The return string is a list of the acceptable fonts that were used. static std::string BestFonts( const std::unordered_map& ch_map, - std::vector > >* font_flag); + std::vector>>* font_flag); // FontScore returns the weighted renderability score of the given // hash map character table in the given font. The unweighted score diff --git a/src/training/set_unicharset_properties.cpp b/src/training/set_unicharset_properties.cpp index bdd65989de..4675fef92e 100644 --- a/src/training/set_unicharset_properties.cpp +++ b/src/training/set_unicharset_properties.cpp @@ -13,7 +13,7 @@ // the result back to a file. #include "commandlineflags.h" -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion #include "tprintf.h" #include "unicharset_training_utils.h" diff --git a/src/training/shapeclustering.cpp b/src/training/shapeclustering.cpp index 8567a3daa2..9914458839 100644 --- a/src/training/shapeclustering.cpp +++ b/src/training/shapeclustering.cpp @@ -44,7 +44,7 @@ STRING_PARAM_FLAG(canonical_class2, "", "Class to show ambigs for"); // NOT in the cloud. // Otherwise, if FLAGS_canonical_class1 is set, prints a table of font-wise // cluster distances between FLAGS_canonical_class1 and FLAGS_canonical_class2. -int main(int argc, char **argv) { +int main(int argc, char** argv) { tesseract::CheckSharedLibraryVersion(); ParseArguments(&argc, &argv); @@ -53,15 +53,13 @@ int main(int argc, char **argv) { tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(argc, argv, false, nullptr, &file_prefix); - if (!trainer) - return 1; + if (!trainer) return 1; if (FLAGS_display_cloud_font >= 0) { #ifndef GRAPHICS_DISABLED - trainer->DisplaySamples(FLAGS_canonical_class1.c_str(), - FLAGS_display_cloud_font, - FLAGS_canonical_class2.c_str(), - FLAGS_display_canonical_font); + trainer->DisplaySamples( + FLAGS_canonical_class1.c_str(), FLAGS_display_cloud_font, + FLAGS_canonical_class2.c_str(), FLAGS_display_canonical_font); #endif // GRAPHICS_DISABLED return 0; } else if (!FLAGS_canonical_class1.empty()) { diff --git a/src/training/stringrenderer.cpp b/src/training/stringrenderer.cpp index 8cc9c9077b..af86bd4ee6 100644 --- a/src/training/stringrenderer.cpp +++ b/src/training/stringrenderer.cpp @@ -20,15 +20,15 @@ #include "stringrenderer.h" +#include #include #include #include -#include #include #include #include -#include "allheaders.h" // from leptonica +#include "allheaders.h" // from leptonica #include "boxchar.h" #include "ligature_table.h" #include "normstrngs.h" @@ -69,7 +69,7 @@ static bool RandBool(const double prob, TRand* rand) { } /* static */ -Pix* CairoARGB32ToPixFormat(cairo_surface_t *surface) { +Pix* CairoARGB32ToPixFormat(cairo_surface_t* surface) { if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) { printf("Unexpected surface format %d\n", cairo_image_surface_get_format(surface)); @@ -173,7 +173,7 @@ void StringRenderer::InitPangoCairo() { void StringRenderer::SetLayoutProperties() { std::string font_desc = font_.DescriptionName(); // Specify the font via a description name - PangoFontDescription *desc = + PangoFontDescription* desc = pango_font_description_from_string(font_desc.c_str()); // Assign the font description to the layout pango_layout_set_font_description(layout_, desc); @@ -287,13 +287,14 @@ int StringRenderer::FindFirstPageBreakOffset(const char* text, UNICHAR::const_iterator it = UNICHAR::begin(text, text_length); const UNICHAR::const_iterator it_end = UNICHAR::end(text, text_length); const int kMaxUnicodeBufLength = 15000; - for (int i = 0; i < kMaxUnicodeBufLength && it != it_end; ++it, ++i); + for (int i = 0; i < kMaxUnicodeBufLength && it != it_end; ++it, ++i) + ; int buf_length = it.utf8_data() - text; tlog(1, "len = %d buf_len = %d\n", text_length, buf_length); pango_layout_set_text(layout_, text, buf_length); PangoLayoutIter* line_iter = nullptr; - { // Fontconfig caches some info here that is not freed before exit. + { // Fontconfig caches some info here that is not freed before exit. DISABLE_HEAP_LEAK_CHECK; line_iter = pango_layout_get_iter(layout_); } @@ -322,19 +323,16 @@ int StringRenderer::FindFirstPageBreakOffset(const char* text, } const std::vector& StringRenderer::GetBoxes() const { - return boxchars_; + return boxchars_; } -Boxa* StringRenderer::GetPageBoxes() const { - return page_boxes_; -} +Boxa* StringRenderer::GetPageBoxes() const { return page_boxes_; } void StringRenderer::RotatePageBoxes(float rotation) { - BoxChar::RotateBoxes(rotation, page_width_ / 2, page_height_ / 2, - start_box_, boxchars_.size(), &boxchars_); + BoxChar::RotateBoxes(rotation, page_width_ / 2, page_height_ / 2, start_box_, + boxchars_.size(), &boxchars_); } - void StringRenderer::ClearBoxes() { for (size_t i = 0; i < boxchars_.size(); ++i) delete boxchars_[i]; boxchars_.clear(); @@ -365,8 +363,8 @@ bool StringRenderer::GetClusterStrings(std::vector* cluster_text) { } PangoGlyphItemIter cluster_iter; gboolean have_cluster; - for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, - run, full_text); + for (have_cluster = + pango_glyph_item_iter_init_start(&cluster_iter, run, full_text); have_cluster; have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) { const int start_byte_index = cluster_iter.start_index; @@ -390,7 +388,8 @@ bool StringRenderer::GetClusterStrings(std::vector* cluster_text) { pango_layout_iter_free(run_iter); cluster_text->clear(); - for (std::map::const_iterator it = start_byte_to_text.begin(); + for (std::map::const_iterator it = + start_byte_to_text.begin(); it != start_byte_to_text.end(); ++it) { cluster_text->push_back(it->second); } @@ -457,7 +456,6 @@ static void MergeBoxCharsToWords(std::vector* boxchars) { boxchars->swap(result); } - void StringRenderer::ComputeClusterBoxes() { const char* text = pango_layout_get_text(layout_); PangoLayoutIter* cluster_iter = pango_layout_get_iter(layout_); @@ -475,8 +473,8 @@ void StringRenderer::ComputeClusterBoxes() { std::sort(cluster_start_indices.begin(), cluster_start_indices.end()); std::map cluster_start_to_end_index; for (size_t i = 0; i + 1 < cluster_start_indices.size(); ++i) { - cluster_start_to_end_index[cluster_start_indices[i]] - = cluster_start_indices[i + 1]; + cluster_start_to_end_index[cluster_start_indices[i]] = + cluster_start_indices[i + 1]; } // Iterate again to compute cluster boxes and their text with the obtained @@ -490,8 +488,8 @@ void StringRenderer::ComputeClusterBoxes() { pango_extents_to_pixels(&cluster_rect, nullptr); const int start_byte_index = pango_layout_iter_get_index(cluster_iter); const int end_byte_index = cluster_start_to_end_index[start_byte_index]; - std::string cluster_text = std::string(text + start_byte_index, - end_byte_index - start_byte_index); + std::string cluster_text = + std::string(text + start_byte_index, end_byte_index - start_byte_index); if (!cluster_text.empty() && cluster_text[0] == '\n') { tlog(2, "Skipping newlines at start of text.\n"); continue; @@ -507,9 +505,8 @@ void StringRenderer::ComputeClusterBoxes() { } // Prepare a boxchar for addition at this byte position. tlog(2, "[%d %d], %d, %d : start_byte=%d end_byte=%d : '%s'\n", - cluster_rect.x, cluster_rect.y, - cluster_rect.width, cluster_rect.height, - start_byte_index, end_byte_index, + cluster_rect.x, cluster_rect.y, cluster_rect.width, + cluster_rect.height, start_byte_index, end_byte_index, cluster_text.c_str()); ASSERT_HOST_MSG(cluster_rect.width, "cluster_text:%s start_byte_index:%d\n", @@ -530,8 +527,8 @@ void StringRenderer::ComputeClusterBoxes() { } BoxChar* boxchar = new BoxChar(cluster_text.c_str(), cluster_text.size()); boxchar->set_page(page_); - boxchar->AddBox(cluster_rect.x, cluster_rect.y, - cluster_rect.width, cluster_rect.height); + boxchar->AddBox(cluster_rect.x, cluster_rect.y, cluster_rect.width, + cluster_rect.height); start_byte_to_box[start_byte_index] = boxchar; } while (pango_layout_iter_next_cluster(cluster_iter)); pango_layout_iter_free(cluster_iter); @@ -599,15 +596,14 @@ void StringRenderer::ComputeClusterBoxes() { } } - void StringRenderer::CorrectBoxPositionsToLayout( std::vector* boxchars) { if (vertical_text_) { - const double rotation = - pango_gravity_to_rotation( + const double rotation = -pango_gravity_to_rotation( pango_context_get_base_gravity(pango_layout_get_context(layout_))); BoxChar::TranslateBoxes(page_width_ - h_margin_, v_margin_, boxchars); - BoxChar::RotateBoxes(rotation, page_width_ - h_margin_, v_margin_, - 0, boxchars->size(), boxchars); + BoxChar::RotateBoxes(rotation, page_width_ - h_margin_, v_margin_, 0, + boxchars->size(), boxchars); } else { BoxChar::TranslateBoxes(h_margin_, v_margin_, boxchars); } @@ -671,8 +667,8 @@ int StringRenderer::RenderToBinaryImage(const char* text, int text_length, /* static */ std::string StringRenderer::InsertWordJoiners(const std::string& text) { std::string out_str; - const UNICHAR::const_iterator it_end = UNICHAR::end(text.c_str(), - text.length()); + const UNICHAR::const_iterator it_end = + UNICHAR::end(text.c_str(), text.length()); for (UNICHAR::const_iterator it = UNICHAR::begin(text.c_str(), text.length()); it != it_end; ++it) { // Add the symbol to the output string. @@ -681,8 +677,8 @@ std::string StringRenderer::InsertWordJoiners(const std::string& text) { UNICHAR::const_iterator next_it = it; ++next_it; bool next_char_is_boundary = (next_it == it_end || *next_it == ' '); - bool next_char_is_combiner = (next_it == it_end) ? - false : IsCombiner(*next_it); + bool next_char_is_combiner = + (next_it == it_end) ? false : IsCombiner(*next_it); if (*it != ' ' && *it != '\n' && !next_char_is_boundary && !next_char_is_combiner) { out_str += kWordJoinerUTF8; @@ -692,10 +688,11 @@ std::string StringRenderer::InsertWordJoiners(const std::string& text) { } // Convert halfwidth Basic Latin characters to their fullwidth forms. -std::string StringRenderer::ConvertBasicLatinToFullwidthLatin(const std::string& str) { +std::string StringRenderer::ConvertBasicLatinToFullwidthLatin( + const std::string& str) { std::string full_str; - const UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), - str.length()); + const UNICHAR::const_iterator it_end = + UNICHAR::end(str.c_str(), str.length()); for (UNICHAR::const_iterator it = UNICHAR::begin(str.c_str(), str.length()); it != it_end; ++it) { // Convert printable and non-space 7-bit ASCII characters to @@ -712,7 +709,8 @@ std::string StringRenderer::ConvertBasicLatinToFullwidthLatin(const std::string& } // Convert fullwidth Latin characters to their halfwidth forms. -std::string StringRenderer::ConvertFullwidthLatinToBasicLatin(const std::string& str) { +std::string StringRenderer::ConvertFullwidthLatinToBasicLatin( + const std::string& str) { std::string half_str; UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length()); for (UNICHAR::const_iterator it = UNICHAR::begin(str.c_str(), str.length()); @@ -720,8 +718,8 @@ std::string StringRenderer::ConvertFullwidthLatinToBasicLatin(const std::string& char32 half_char = FullwidthToHalfwidth(*it); // Convert fullwidth Latin characters to their halfwidth forms // only if halfwidth forms are printable and non-space 7-bit ASCII. - if (IsInterchangeValid7BitAscii(half_char) && - isprint(half_char) && !isspace(half_char)) { + if (IsInterchangeValid7BitAscii(half_char) && isprint(half_char) && + !isspace(half_char)) { half_str.append(EncodeAsUTF8(half_char)); } else { half_str.append(it.utf8_data(), it.utf8_len()); @@ -755,7 +753,7 @@ int StringRenderer::RenderToImage(const char* text, int text_length, // Translate to the top-right margin of page cairo_translate(cr_, page_width_ - h_margin_, v_margin_); // Rotate the layout - double rotation = - pango_gravity_to_rotation( + double rotation = -pango_gravity_to_rotation( pango_context_get_base_gravity(pango_layout_get_context(layout_))); tlog(2, "Rotating by %f radians\n", rotation); cairo_rotate(cr_, rotation); @@ -832,8 +830,8 @@ int StringRenderer::RenderToImage(const char* text, int text_length, // ... // } while (offset < strlen(text)); // -int StringRenderer::RenderAllFontsToImage(double min_coverage, - const char* text, int text_length, +int StringRenderer::RenderAllFontsToImage(double min_coverage, const char* text, + int text_length, std::string* font_used, Pix** image) { *image = nullptr; // Select a suitable font to render the title with. @@ -872,9 +870,8 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage, ClearBoxes(); // Get rid of them as they are garbage. const int kMaxTitleLength = 1024; char title[kMaxTitleLength]; - snprintf(title, kMaxTitleLength, kTitleTemplate, - all_fonts[i].c_str(), ok_chars, - 100.0 * ok_chars / total_chars_, raw_score, + snprintf(title, kMaxTitleLength, kTitleTemplate, all_fonts[i].c_str(), + ok_chars, 100.0 * ok_chars / total_chars_, raw_score, 100.0 * raw_score / char_map_.size()); tprintf("%s\n", title); // This is a good font! Store the offset to return once we've tried all @@ -896,8 +893,8 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage, // We return the real offset only after cycling through the list of fonts. return 0; } else { - tprintf("Font %s failed with %d hits = %.2f%%\n", - all_fonts[i].c_str(), ok_chars, 100.0 * ok_chars / total_chars_); + tprintf("Font %s failed with %d hits = %.2f%%\n", all_fonts[i].c_str(), + ok_chars, 100.0 * ok_chars / total_chars_); } } font_index_ = 0; diff --git a/src/training/stringrenderer.h b/src/training/stringrenderer.h index 34da0a2051..1fcd73ac5b 100644 --- a/src/training/stringrenderer.h +++ b/src/training/stringrenderer.h @@ -34,9 +34,9 @@ #include #include "host.h" -#include "pango_font_info.h" #include "pango/pango-layout.h" #include "pango/pangocairo.h" +#include "pango_font_info.h" struct Boxa; struct Pix; @@ -66,13 +66,9 @@ class StringRenderer { bool set_font(const std::string& desc); // Char spacing is in PIXELS!!!!. void set_char_spacing(int char_spacing) { char_spacing_ = char_spacing; } - void set_leading(int leading) { - leading_ = leading; - } + void set_leading(int leading) { leading_ = leading; } void set_resolution(const int resolution); - void set_vertical_text(bool vertical_text) { - vertical_text_ = vertical_text; - } + void set_vertical_text(bool vertical_text) { vertical_text_ = vertical_text; } void set_gravity_hint_strong(bool gravity_hint_strong) { gravity_hint_strong_ = gravity_hint_strong; } @@ -93,42 +89,26 @@ class StringRenderer { free(features_); features_ = strdup(features); } - void set_page(int page) { - page_ = page; - } - void set_box_padding(int val) { - box_padding_ = val; - } - void set_drop_uncovered_chars(bool val) { - drop_uncovered_chars_ = val; - } + void set_page(int page) { page_ = page; } + void set_box_padding(int val) { box_padding_ = val; } + void set_drop_uncovered_chars(bool val) { drop_uncovered_chars_ = val; } void set_strip_unrenderable_words(bool val) { strip_unrenderable_words_ = val; } - void set_output_word_boxes(bool val) { - output_word_boxes_ = val; - } + void set_output_word_boxes(bool val) { output_word_boxes_ = val; } // Before rendering the string, replace latin characters with their optional // ligatured forms (such as "fi", "ffi" etc.) if the font_ covers those // unicodes. - void set_add_ligatures(bool add_ligatures) { - add_ligatures_ = add_ligatures; - } + void set_add_ligatures(bool add_ligatures) { add_ligatures_ = add_ligatures; } // Set the rgb value of the text ink. Values range in [0, 1.0] void set_pen_color(double r, double g, double b) { pen_color_[0] = r; pen_color_[1] = g; pen_color_[2] = b; } - void set_h_margin(const int h_margin) { - h_margin_ = h_margin; - } - void set_v_margin(const int v_margin) { - v_margin_ = v_margin; - } - const PangoFontInfo& font() const { - return font_; - } + void set_h_margin(const int h_margin) { h_margin_ = h_margin; } + void set_v_margin(const int v_margin) { v_margin_ = v_margin; } + const PangoFontInfo& font() const { return font_; } int h_margin() const { return h_margin_; } int v_margin() const { return v_margin_; } @@ -210,9 +190,9 @@ class StringRenderer { // Objects cached for subsequent calls to RenderAllFontsToImage() std::unordered_map char_map_; // Time-saving char histogram. - int total_chars_; // Number in the string to be rendered. + int total_chars_; // Number in the string to be rendered. unsigned int font_index_; // Index of next font to use in font list. - int last_offset_; // Offset returned from last successful rendering + int last_offset_; // Offset returned from last successful rendering private: StringRenderer(const StringRenderer&); diff --git a/src/training/tessopt.cpp b/src/training/tessopt.cpp index 2be2c5f359..4387710010 100644 --- a/src/training/tessopt.cpp +++ b/src/training/tessopt.cpp @@ -17,12 +17,12 @@ * **********************************************************************/ -#include -#include -#include "tessopt.h" +#include "tessopt.h" +#include +#include int tessoptind; -char *tessoptarg; +char* tessoptarg; /********************************************************************** * tessopt @@ -30,30 +30,27 @@ char *tessoptarg; * parse command line args. **********************************************************************/ -int tessopt ( //parse args -int32_t argc, //arg count -char *argv[], //args -const char *arglist //string of arg chars +int tessopt( // parse args + int32_t argc, // arg count + char* argv[], // args + const char* arglist // string of arg chars ) { - const char *arg; //arg char + const char* arg; // arg char - if (tessoptind == 0) - tessoptind = 1; + if (tessoptind == 0) tessoptind = 1; if (tessoptind < argc && argv[tessoptind][0] == '-') { - arg = strchr (arglist, argv[tessoptind][1]); - if (arg == nullptr || *arg == ':') - return '?'; //dud option + arg = strchr(arglist, argv[tessoptind][1]); + if (arg == nullptr || *arg == ':') return '?'; // dud option tessoptind++; tessoptarg = argv[tessoptind]; if (arg[1] == ':') { if (argv[tessoptind - 1][2] != '\0') - //immediately after + // immediately after tessoptarg = argv[tessoptind - 1] + 2; else tessoptind++; } return *arg; - } - else + } else return EOF; } diff --git a/src/training/tessopt.h b/src/training/tessopt.h index 865e39807f..2ba7d7d99c 100644 --- a/src/training/tessopt.h +++ b/src/training/tessopt.h @@ -20,15 +20,15 @@ #ifndef TESSERACT_TRAINING_TESSOPT_H_ #define TESSERACT_TRAINING_TESSOPT_H_ -#include "host.h" +#include "host.h" extern int tessoptind; -extern char *tessoptarg; +extern char* tessoptarg; -int tessopt ( //parse args -int32_t argc, //arg count -char *argv[], //args -const char *arglist //string of arg chars +int tessopt( // parse args + int32_t argc, // arg count + char* argv[], // args + const char* arglist // string of arg chars ); #endif // TESSERACT_TRAINING_TESSOPT_H_ diff --git a/src/training/text2image.cpp b/src/training/text2image.cpp index 273d37bb0c..c8c59e2aa6 100644 --- a/src/training/text2image.cpp +++ b/src/training/text2image.cpp @@ -28,9 +28,9 @@ * **********************************************************************/ +#include #include #include -#include #include #include #include @@ -40,7 +40,7 @@ #include "allheaders.h" // from leptonica #include "boxchar.h" #include "commandlineflags.h" -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion #include "degradeimage.h" #include "errcode.h" #include "fileio.h" @@ -122,8 +122,7 @@ BOOL_PARAM_FLAG(strip_unrenderable_words, true, // Font name. STRING_PARAM_FLAG(font, "Arial", "Font description name to use"); -BOOL_PARAM_FLAG(ligatures, false, - "Rebuild and render ligatures"); +BOOL_PARAM_FLAG(ligatures, false, "Rebuild and render ligatures"); BOOL_PARAM_FLAG(find_fonts, false, "Search for all fonts that can render the text"); @@ -137,7 +136,8 @@ DOUBLE_PARAM_FLAG(min_coverage, 1.0, BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit."); -BOOL_PARAM_FLAG(render_ngrams, false, "Put each space-separated entity from the" +BOOL_PARAM_FLAG(render_ngrams, false, + "Put each space-separated entity from the" " input file into one bounding box. The ngrams in the input" " file will be randomly permuted before rendering (so that" " there is sufficient variety of characters on each line)."); @@ -165,8 +165,9 @@ BOOL_PARAM_FLAG(output_individual_glyph_images, false, "If true also outputs individual character images"); INT_PARAM_FLAG(glyph_resized_size, 0, "Each glyph is square with this side length in pixels"); -INT_PARAM_FLAG(glyph_num_border_pixels_to_pad, 0, - "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad"); +INT_PARAM_FLAG( + glyph_num_border_pixels_to_pad, 0, + "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad"); namespace tesseract { @@ -186,7 +187,8 @@ static bool IsWhitespaceBox(const BoxChar* boxchar) { } static std::string StringReplace(const std::string& in, - const std::string& oldsub, const std::string& newsub) { + const std::string& oldsub, + const std::string& newsub) { std::string out; size_t start_pos = 0, pos; while ((pos = in.find(oldsub, start_pos)) != std::string::npos) { @@ -208,9 +210,8 @@ static std::string StringReplace(const std::string& in, // with "T", such that "AT" has spacing of -5, the entry/line for unichar "A" // in .fontinfo file will be: // A 0 -1 T -5 V -7 -void ExtractFontProperties(const std::string &utf8_text, - StringRenderer *render, - const std::string &output_base) { +void ExtractFontProperties(const std::string& utf8_text, StringRenderer* render, + const std::string& output_base) { std::map spacing_map; std::map::iterator spacing_map_it0; std::map::iterator spacing_map_it1; @@ -221,7 +222,7 @@ void ExtractFontProperties(const std::string &utf8_text, while (offset < len) { offset += render->RenderToImage(text + offset, strlen(text + offset), nullptr); - const std::vector &boxes = render->GetBoxes(); + const std::vector& boxes = render->GetBoxes(); // If the page break split a bigram, correct the offset so we try the bigram // on the next iteration. @@ -238,7 +239,7 @@ void ExtractFontProperties(const std::string &utf8_text, for (size_t b = 0; b < boxes.size(); b += 2) { while (b < boxes.size() && IsWhitespaceBox(boxes[b])) ++b; if (b + 1 >= boxes.size()) break; - const std::string &ch0 = boxes[b]->ch(); + const std::string& ch0 = boxes[b]->ch(); // We encountered a ligature. This happens in at least two scenarios: // One is when the rendered bigram forms a grapheme cluster (eg. the // second character in the bigram is a combining vowel), in which case we @@ -251,11 +252,11 @@ void ExtractFontProperties(const std::string &utf8_text, // The most frequent of all is a single character "word" made by the CJK // segmenter. // Safeguard against these cases here by just skipping the bigram. - if (IsWhitespaceBox(boxes[b+1])) { + if (IsWhitespaceBox(boxes[b + 1])) { continue; } - int xgap = (boxes[b+1]->box()->x - - (boxes[b]->box()->x + boxes[b]->box()->w)); + int xgap = + (boxes[b + 1]->box()->x - (boxes[b]->box()->x + boxes[b]->box()->w)); spacing_map_it0 = spacing_map.find(ch0); int ok_count = 0; if (spacing_map_it0 == spacing_map.end() && @@ -265,13 +266,13 @@ void ExtractFontProperties(const std::string &utf8_text, spacing_map_it0 = spacing_map.find(ch0); ++ok_count; } - const std::string &ch1 = boxes[b+1]->ch(); + const std::string& ch1 = boxes[b + 1]->ch(); tlog(3, "%s%s\n", ch0.c_str(), ch1.c_str()); spacing_map_it1 = spacing_map.find(ch1); if (spacing_map_it1 == spacing_map.end() && render->font().GetSpacingProperties(ch1, &x_bearing, &x_advance)) { spacing_map[ch1] = SpacingProperties( - x_bearing, x_advance - x_bearing - boxes[b+1]->box()->w); + x_bearing, x_advance - x_bearing - boxes[b + 1]->box()->w); spacing_map_it1 = spacing_map.find(ch1); ++ok_count; } @@ -290,8 +291,7 @@ void ExtractFontProperties(const std::string &utf8_text, std::map::const_iterator spacing_map_it; for (spacing_map_it = spacing_map.begin(); spacing_map_it != spacing_map.end(); ++spacing_map_it) { - snprintf(buf, kBufSize, - "%s %d %d %d", spacing_map_it->first.c_str(), + snprintf(buf, kBufSize, "%s %d %d %d", spacing_map_it->first.c_str(), spacing_map_it->second.x_gap_before, spacing_map_it->second.x_gap_after, static_cast(spacing_map_it->second.kerned_x_gaps.size())); @@ -299,8 +299,8 @@ void ExtractFontProperties(const std::string &utf8_text, std::map::const_iterator kern_it; for (kern_it = spacing_map_it->second.kerned_x_gaps.begin(); kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) { - snprintf(buf, kBufSize, - " %s %d", kern_it->first.c_str(), kern_it->second); + snprintf(buf, kBufSize, " %s %d", kern_it->first.c_str(), + kern_it->second); output_string.append(buf); } output_string.append("\n"); @@ -308,8 +308,7 @@ void ExtractFontProperties(const std::string &utf8_text, File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo"); } -bool MakeIndividualGlyphs(Pix* pix, - const std::vector& vbox, +bool MakeIndividualGlyphs(Pix* pix, const std::vector& vbox, const int input_tiff_page) { // If checks fail, return false without exiting text2image if (!pix) { @@ -337,18 +336,21 @@ bool MakeIndividualGlyphs(Pix* pix, const int w = b->w; const int h = b->h; // Check present tiff page (for multipage tiff) - if (y < y_previous-pixGetHeight(pix)/10) { + if (y < y_previous - pixGetHeight(pix) / 10) { tprintf("ERROR: Wrap-around encountered, at i=%d\n", i); current_tiff_page++; } - if (current_tiff_page < input_tiff_page) continue; - else if (current_tiff_page > input_tiff_page) break; + if (current_tiff_page < input_tiff_page) + continue; + else if (current_tiff_page > input_tiff_page) + break; // Check box validity - if (x < 0 || y < 0 || - (x+w-1) >= pixGetWidth(pix) || - (y+h-1) >= pixGetHeight(pix)) { - tprintf("ERROR: MakeIndividualGlyphs(): Index out of range, at i=%d" - " (x=%d, y=%d, w=%d, h=%d\n)", i, x, y, w, h); + if (x < 0 || y < 0 || (x + w - 1) >= pixGetWidth(pix) || + (y + h - 1) >= pixGetHeight(pix)) { + tprintf( + "ERROR: MakeIndividualGlyphs(): Index out of range, at i=%d" + " (x=%d, y=%d, w=%d, h=%d\n)", + i, x, y, w, h); continue; } else if (w < FLAGS_glyph_num_border_pixels_to_pad && h < FLAGS_glyph_num_border_pixels_to_pad) { @@ -362,17 +364,15 @@ bool MakeIndividualGlyphs(Pix* pix, continue; } // Resize to square - Pix* pix_glyph_sq = pixScaleToSize(pix_glyph, - FLAGS_glyph_resized_size, + Pix* pix_glyph_sq = pixScaleToSize(pix_glyph, FLAGS_glyph_resized_size, FLAGS_glyph_resized_size); if (!pix_glyph_sq) { tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i); continue; } // Zero-pad - Pix* pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq, - FLAGS_glyph_num_border_pixels_to_pad, - 0); + Pix* pix_glyph_sq_pad = + pixAddBorder(pix_glyph_sq, FLAGS_glyph_num_border_pixels_to_pad, 0); if (!pix_glyph_sq_pad) { tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n", i); @@ -384,8 +384,10 @@ bool MakeIndividualGlyphs(Pix* pix, snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(), glyph_count++); if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) { - tprintf("ERROR: MakeIndividualGlyphs(): Failed to write JPEG to %s," - " at i=%d\n", filename, i); + tprintf( + "ERROR: MakeIndividualGlyphs(): Failed to write JPEG to %s," + " at i=%d\n", + filename, i); continue; } @@ -450,8 +452,7 @@ static int Main() { } } - if (FLAGS_render_ngrams) - FLAGS_output_word_boxes = true; + if (FLAGS_render_ngrams) FLAGS_output_word_boxes = true; char font_desc_name[1024]; snprintf(font_desc_name, 1024, "%s %d", FLAGS_font.c_str(), @@ -527,10 +528,10 @@ static int Main() { // If we are rendering ngrams that will be OCRed later, shuffle them so that // tesseract does not have difficulties finding correct baseline, word // spaces, etc. - const char *str8 = src_utf8.c_str(); + const char* str8 = src_utf8.c_str(); int len = src_utf8.length(); int step; - std::vector > offsets; + std::vector> offsets; int offset = SpanUTF8Whitespace(str8); while (offset < len) { step = SpanUTF8NotWhitespace(str8 + offset); @@ -542,7 +543,7 @@ static int Main() { std::random_shuffle(offsets.begin(), offsets.end()); for (size_t i = 0, line = 1; i < offsets.size(); ++i) { - const char *curr_pos = str8 + offsets[i].first; + const char* curr_pos = str8 + offsets[i].first; int ngram_len = offsets[i].second; // Skip words that contain characters not in found in unicharset. std::string cleaned = UNICHARSET::CleanupString(curr_pos, ngram_len); @@ -590,10 +591,9 @@ static int Main() { tlog(1, "Starting page %d\n", im); Pix* pix = nullptr; if (FLAGS_find_fonts) { - offset += render.RenderAllFontsToImage(FLAGS_min_coverage, - to_render_utf8 + offset, - strlen(to_render_utf8 + offset), - &font_used, &pix); + offset += render.RenderAllFontsToImage( + FLAGS_min_coverage, to_render_utf8 + offset, + strlen(to_render_utf8 + offset), &font_used, &pix); } else { offset += render.RenderToImage(to_render_utf8 + offset, strlen(to_render_utf8 + offset), &pix); @@ -622,8 +622,8 @@ static int Main() { char tiff_name[1024]; if (FLAGS_find_fonts) { if (FLAGS_render_per_font) { - std::string fontname_for_file = tesseract::StringReplace( - font_used, " ", "_"); + std::string fontname_for_file = + tesseract::StringReplace(font_used, " ", "_"); snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(), fontname_for_file.c_str()); pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w"); diff --git a/src/training/tlog.h b/src/training/tlog.h index 6dcf6e6c3b..2d962e41f5 100644 --- a/src/training/tlog.h +++ b/src/training/tlog.h @@ -30,11 +30,12 @@ DECLARE_INT_PARAM_FLAG(tlog_level); // (default 0). Code using ParseCommandLineFlags() can control its value using // the --tlog_level commandline argument. Otherwise it must be specified in a // config file like other params. -#define tlog(level, ...) { \ - if (FLAGS_tlog_level >= level) { \ - tprintf_internal(__VA_ARGS__); \ - } \ -} +#define tlog(level, ...) \ + { \ + if (FLAGS_tlog_level >= level) { \ + tprintf_internal(__VA_ARGS__); \ + } \ + } #define TLOG_IS_ON(level) (FLAGS_tlog_level >= level) diff --git a/src/training/unicharset_extractor.cpp b/src/training/unicharset_extractor.cpp index 64b93717a0..58af84c466 100644 --- a/src/training/unicharset_extractor.cpp +++ b/src/training/unicharset_extractor.cpp @@ -24,7 +24,7 @@ #include #include "boxread.h" #include "commandlineflags.h" -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion #include "genericvector.h" #include "lang_model_helpers.h" #include "normstrngs.h" @@ -50,8 +50,7 @@ static void AddStringsToUnicharset(const GenericVector& strings, /*report_errors*/ true, strings[i].string(), &normalized)) { for (const std::string& normed : normalized) { - - // normed is a UTF-8 encoded string + // normed is a UTF-8 encoded string if (normed.empty() || IsUTF8Whitespace(normed.c_str())) continue; unicharset->unichar_insert(normed.c_str()); } @@ -68,9 +67,12 @@ static int Main(int argc, char** argv) { STRING file_data = tesseract::ReadFile(argv[arg], /*reader*/ nullptr); if (file_data.length() == 0) continue; GenericVector texts; - if (ReadMemBoxes(-1, /*skip_blanks*/ true, &file_data[0], - /*continue_on_failure*/ false, /*boxes*/ nullptr, - &texts, /*box_texts*/ nullptr, /*pages*/ nullptr)) { + if (ReadMemBoxes(-1, + /*skip_blanks*/ true, &file_data[0], + /*continue_on_failure*/ false, + /*boxes*/ nullptr, &texts, + /*box_texts*/ nullptr, + /*pages*/ nullptr)) { tprintf("Extracting unicharset from box file %s\n", argv[arg]); } else { tprintf("Extracting unicharset from plain text file %s\n", argv[arg]); @@ -79,8 +81,8 @@ static int Main(int argc, char** argv) { } AddStringsToUnicharset(texts, FLAGS_norm_mode, &unicharset); } - SetupBasicProperties(/*report_errors*/ true, /*decompose*/ false, - &unicharset); + SetupBasicProperties( + /*report_errors*/ true, /*decompose*/ false, &unicharset); // Write unicharset file. if (unicharset.save_to_file(FLAGS_output_unicharset.c_str())) { tprintf("Wrote unicharset file %s\n", FLAGS_output_unicharset.c_str()); diff --git a/src/training/unicharset_training_utils.cpp b/src/training/unicharset_training_utils.cpp index 418c8052f8..a5adb5fd48 100644 --- a/src/training/unicharset_training_utils.cpp +++ b/src/training/unicharset_training_utils.cpp @@ -75,8 +75,8 @@ void SetupBasicProperties(bool report_errors, bool decompose, unicharset->set_ispunctuation(unichar_id, unichar_ispunct); tesseract::IcuErrorCode err; - unicharset->set_script(unichar_id, uscript_getName( - uscript_getScript(uni_vector[0], err))); + unicharset->set_script( + unichar_id, uscript_getName(uscript_getScript(uni_vector[0], err))); const int num_code_points = uni_vector.size(); // Obtain the lower/upper case if needed and record it in the properties. @@ -88,8 +88,8 @@ void SetupBasicProperties(bool report_errors, bool decompose, // However since they deal with UChars (so need a conversion function // from char32 or UTF8string) and require a meaningful locale string, // for now u_tolower()/u_toupper() are used. - other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) : - u_tolower(uni_vector[i]); + other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) + : u_tolower(uni_vector[i]); } std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case); UNICHAR_ID other_case_id = @@ -107,9 +107,9 @@ void SetupBasicProperties(bool report_errors, bool decompose, for (int i = 0; i < num_code_points; ++i) { mirrors[i] = u_charMirror(uni_vector[i]); if (i == 0) { // set directionality to that of the 1st code point - unicharset->set_direction(unichar_id, - static_cast( - u_charDirection(uni_vector[i]))); + unicharset->set_direction( + unichar_id, + static_cast(u_charDirection(uni_vector[i]))); } } std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors); @@ -117,8 +117,8 @@ void SetupBasicProperties(bool report_errors, bool decompose, if (mirror_uch_id != INVALID_UNICHAR_ID) { unicharset->set_mirror(unichar_id, mirror_uch_id); } else if (report_errors) { - tprintf("Mirror %s of %s is not in unicharset\n", - mirror_uch.c_str(), unichar_str); + tprintf("Mirror %s of %s is not in unicharset\n", mirror_uch.c_str(), + unichar_str); } // Record normalized version of this unichar. @@ -140,11 +140,13 @@ void SetupBasicProperties(bool report_errors, bool decompose, } // Helper sets the properties from universal script unicharsets, if found. -void SetScriptProperties(const std::string& script_dir, UNICHARSET* unicharset) { +void SetScriptProperties(const std::string& script_dir, + UNICHARSET* unicharset) { for (int s = 0; s < unicharset->get_script_table_size(); ++s) { // Load the unicharset for the script if available. std::string filename = script_dir + "/" + - unicharset->get_script_from_script_id(s) + ".unicharset"; + unicharset->get_script_from_script_id(s) + + ".unicharset"; UNICHARSET script_set; if (script_set.load_from_file(filename.c_str())) { unicharset->SetPropertiesFromOther(script_set); @@ -162,12 +164,13 @@ void SetScriptProperties(const std::string& script_dir, UNICHARSET* unicharset) // Helper gets the combined x-heights string. std::string GetXheightString(const std::string& script_dir, - const UNICHARSET& unicharset) { + const UNICHARSET& unicharset) { std::string xheights_str; for (int s = 0; s < unicharset.get_script_table_size(); ++s) { // Load the xheights for the script if available. std::string filename = script_dir + "/" + - unicharset.get_script_from_script_id(s) + ".xheights"; + unicharset.get_script_from_script_id(s) + + ".xheights"; std::string script_heights; if (File::ReadFileToString(filename, &script_heights)) xheights_str += script_heights; diff --git a/src/training/unicharset_training_utils.h b/src/training/unicharset_training_utils.h index 410eeb39b8..e709ac6e03 100644 --- a/src/training/unicharset_training_utils.h +++ b/src/training/unicharset_training_utils.h @@ -41,7 +41,8 @@ inline void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) { // Helper sets the properties from universal script unicharsets, if found. void SetScriptProperties(const std::string& script_dir, UNICHARSET* unicharset); // Helper gets the combined x-heights string. -std::string GetXheightString(const std::string& script_dir, const UNICHARSET& unicharset); +std::string GetXheightString(const std::string& script_dir, + const UNICHARSET& unicharset); // Helper to set the properties for an input unicharset file, writes to the // output file. If an appropriate script unicharset can be found in the diff --git a/src/training/util.h b/src/training/util.h index 3e9957b1ec..77b67c3230 100644 --- a/src/training/util.h +++ b/src/training/util.h @@ -39,8 +39,8 @@ struct StringHash { return hash_code; } }; -#else // COMPILER_MSVC -struct StringHash : public stdext::hash_compare { +#else // COMPILER_MSVC +struct StringHash : public stdext::hash_compare { size_t operator()(const std::string& s) const { size_t hash_code = 0; const char* str = s.c_str(); @@ -59,7 +59,8 @@ struct StringHash : public stdext::hash_compare { #include "base/heap-checker.h" #define DISABLE_HEAP_LEAK_CHECK HeapLeakChecker::Disabler disabler #else -#define DISABLE_HEAP_LEAK_CHECK {} +#define DISABLE_HEAP_LEAK_CHECK \ + {} #endif #endif // TESSERACT_TRAINING_UTIL_H_ diff --git a/src/training/validator.cpp b/src/training/validator.cpp index bcdd844e82..33afd432fe 100644 --- a/src/training/validator.cpp +++ b/src/training/validator.cpp @@ -1,9 +1,9 @@ #include "validator.h" #include +#include #include #include -#include #include "icuerrorcode.h" #include "unicode/uchar.h" // From libicu @@ -178,8 +178,8 @@ bool Validator::IsVirama(char32 unicode) { /* static */ bool Validator::IsVedicAccent(char32 unicode) { return (0x1cd0 <= unicode && unicode < 0x1d00) || - (0xa8e0 <= unicode && unicode <= 0xa8f7) || - (0x951 <= unicode && unicode <= 0x954); + (0xa8e0 <= unicode && unicode <= 0xa8f7) || + (0x951 <= unicode && unicode <= 0x954); } // Returns true if the script is one that uses subscripts for conjuncts. diff --git a/src/training/validator.h b/src/training/validator.h index 890cfac5dc..4bdb509c64 100644 --- a/src/training/validator.h +++ b/src/training/validator.h @@ -126,7 +126,7 @@ class Validator { kNukta = 'N', // Occurs only immediately after consonants. kRobat = 'R', // Khmer only. kOther = 'O', // (digits, measures, non-Indic, etc) - // Additional classes used only by ValidateGrapheme. + // Additional classes used only by ValidateGrapheme. kWhitespace = ' ', kCombiner = 'c', // Combiners other than virama. }; diff --git a/src/training/wordlist2dawg.cpp b/src/training/wordlist2dawg.cpp index dd6b730669..373a146182 100644 --- a/src/training/wordlist2dawg.cpp +++ b/src/training/wordlist2dawg.cpp @@ -21,7 +21,7 @@ // generates the corresponding squished DAWG file. #include "classify.h" -#include "commontraining.h" // CheckSharedLibraryVersion +#include "commontraining.h" // CheckSharedLibraryVersion #include "dawg.h" #include "dict.h" #include "emalloc.h" @@ -37,13 +37,15 @@ int main(int argc, char** argv) { printf("%s\n", tesseract::TessBaseAPI::Version()); return 0; } else if (!(argc == 4 || (argc == 5 && strcmp(argv[1], "-t") == 0) || - (argc == 6 && strcmp(argv[1], "-r") == 0))) { - printf("Usage: %s -v | --version |\n" - " %s [-t | -r [reverse policy] ] word_list_file" - " dawg_file unicharset_file\n", argv[0], argv[0]); + (argc == 6 && strcmp(argv[1], "-r") == 0))) { + printf( + "Usage: %s -v | --version |\n" + " %s [-t | -r [reverse policy] ] word_list_file" + " dawg_file unicharset_file\n", + argv[0], argv[0]); return 1; } - tesseract::Classify *classify = new tesseract::Classify(); + tesseract::Classify* classify = new tesseract::Classify(); int argv_index = 0; if (argc == 5) ++argv_index; tesseract::Trie::RTLReversePolicy reverse_policy = @@ -66,12 +68,12 @@ int main(int argc, char** argv) { delete classify; return 1; } - const UNICHARSET &unicharset = classify->getDict().getUnicharset(); + const UNICHARSET& unicharset = classify->getDict().getUnicharset(); if (argc == 4 || argc == 6) { tesseract::Trie trie( // the first 3 arguments are not used in this case - tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, - unicharset.size(), classify->getDict().dawg_debug_level); + tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, unicharset.size(), + classify->getDict().dawg_debug_level); tprintf("Reading word list from '%s'\n", wordlist_filename); if (!trie.read_and_add_word_list(wordlist_filename, unicharset, reverse_policy)) { @@ -79,7 +81,7 @@ int main(int argc, char** argv) { exit(1); } tprintf("Reducing Trie to SquishedDawg\n"); - tesseract::SquishedDawg *dawg = trie.trie_to_dawg(); + tesseract::SquishedDawg* dawg = trie.trie_to_dawg(); if (dawg != nullptr && dawg->NumEdges() > 0) { tprintf("Writing squished DAWG to '%s'\n", dawg_filename); dawg->write_squished_dawg(dawg_filename); @@ -89,11 +91,11 @@ int main(int argc, char** argv) { delete dawg; } else if (argc == 5) { tprintf("Loading dawg DAWG from '%s'\n", dawg_filename); - tesseract::SquishedDawg words( - dawg_filename, - // these 3 arguments are not used in this case - tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, - classify->getDict().dawg_debug_level); + tesseract::SquishedDawg words(dawg_filename, + // these 3 arguments are not used in this case + tesseract::DAWG_TYPE_WORD, "", + SYSTEM_DAWG_PERM, + classify->getDict().dawg_debug_level); tprintf("Checking word list from '%s'\n", wordlist_filename); words.check_for_words(wordlist_filename, unicharset, true); } else { // should never get here diff --git a/src/viewer/scrollview.cpp b/src/viewer/scrollview.cpp index 18a9349b9c..810f50df29 100644 --- a/src/viewer/scrollview.cpp +++ b/src/viewer/scrollview.cpp @@ -18,16 +18,16 @@ /////////////////////////////////////////////////////////////////////// // -#include #include +#include #include +#include +#include +#include #include +#include #include -#include #include -#include -#include -#include // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -55,7 +55,8 @@ static std::map svmap; static SVMutex* svmap_mu; // A map of all semaphores waiting for a specific event on a specific window. static std::map, - std::pair > waiting_for_events; + std::pair> + waiting_for_events; static SVMutex* waiting_for_events_mu; SVEvent* SVEvent::copy() { @@ -86,9 +87,9 @@ void* ScrollView::MessageReceiver(void* a) { message = ScrollView::GetStream()->Receive(); } while (message == nullptr); -// This is the main loop which iterates until the server is dead (strlen = -1). -// It basically parses for 3 different messagetypes and then distributes the -// events accordingly. + // This is the main loop which iterates until the server is dead (strlen = + // -1). It basically parses for 3 different messagetypes and then distributes + // the events accordingly. while (1) { // The new event we create. SVEvent* cur = new SVEvent; @@ -132,18 +133,19 @@ void* ScrollView::MessageReceiver(void* a) { counter_event_id += 2; // In case of an SVET_EXIT event, quit the whole application. - if (ev_type == SVET_EXIT) { ScrollView::Exit(); } + if (ev_type == SVET_EXIT) { + ScrollView::Exit(); + } // Place two copies of it in the table for the window. cur->window->SetEvent(cur); // Check if any of the threads currently waiting want it. - std::pair awaiting_list(cur->window, - cur->type); + std::pair awaiting_list(cur->window, cur->type); std::pair awaiting_list_any(cur->window, SVET_ANY); - std::pair awaiting_list_any_window((ScrollView*)0, - SVET_ANY); + std::pair awaiting_list_any_window( + (ScrollView*)0, SVET_ANY); waiting_for_events_mu->Lock(); if (waiting_for_events.count(awaiting_list) > 0) { waiting_for_events[awaiting_list].second = cur; @@ -179,62 +181,61 @@ void* ScrollView::MessageReceiver(void* a) { } // Table to implement the color index values in the old system. -int table_colors[ScrollView::GREEN_YELLOW+1][4]= { - {0, 0, 0, 0}, // NONE (transparent) - {0, 0, 0, 255}, // BLACK. - {255, 255, 255, 255}, // WHITE. - {255, 0, 0, 255}, // RED. - {255, 255, 0, 255}, // YELLOW. - {0, 255, 0, 255}, // GREEN. - {0, 255, 255, 255}, // CYAN. - {0, 0, 255, 255}, // BLUE. - {255, 0, 255, 255}, // MAGENTA. - {0, 128, 255, 255}, // AQUAMARINE. - {0, 0, 64, 255}, // DARK_SLATE_BLUE. - {128, 128, 255, 255}, // LIGHT_BLUE. - {64, 64, 255, 255}, // MEDIUM_BLUE. - {0, 0, 32, 255}, // MIDNIGHT_BLUE. - {0, 0, 128, 255}, // NAVY_BLUE. - {192, 192, 255, 255}, // SKY_BLUE. - {64, 64, 128, 255}, // SLATE_BLUE. - {32, 32, 64, 255}, // STEEL_BLUE. - {255, 128, 128, 255}, // CORAL. - {128, 64, 0, 255}, // BROWN. - {128, 128, 0, 255}, // SANDY_BROWN. - {192, 192, 0, 255}, // GOLD. - {192, 192, 128, 255}, // GOLDENROD. - {0, 64, 0, 255}, // DARK_GREEN. - {32, 64, 0, 255}, // DARK_OLIVE_GREEN. - {64, 128, 0, 255}, // FOREST_GREEN. - {128, 255, 0, 255}, // LIME_GREEN. - {192, 255, 192, 255}, // PALE_GREEN. - {192, 255, 0, 255}, // YELLOW_GREEN. - {192, 192, 192, 255}, // LIGHT_GREY. - {64, 64, 128, 255}, // DARK_SLATE_GREY. - {64, 64, 64, 255}, // DIM_GREY. - {128, 128, 128, 255}, // GREY. - {64, 192, 0, 255}, // KHAKI. - {255, 0, 192, 255}, // MAROON. - {255, 128, 0, 255}, // ORANGE. - {255, 128, 64, 255}, // ORCHID. - {255, 192, 192, 255}, // PINK. - {128, 0, 128, 255}, // PLUM. - {255, 0, 64, 255}, // INDIAN_RED. - {255, 64, 0, 255}, // ORANGE_RED. - {255, 0, 192, 255}, // VIOLET_RED. - {255, 192, 128, 255}, // SALMON. - {128, 128, 0, 255}, // TAN. - {0, 255, 255, 255}, // TURQUOISE. - {0, 128, 128, 255}, // DARK_TURQUOISE. - {192, 0, 255, 255}, // VIOLET. - {128, 128, 0, 255}, // WHEAT. - {128, 255, 0, 255} // GREEN_YELLOW +int table_colors[ScrollView::GREEN_YELLOW + 1][4] = { + {0, 0, 0, 0}, // NONE (transparent) + {0, 0, 0, 255}, // BLACK. + {255, 255, 255, 255}, // WHITE. + {255, 0, 0, 255}, // RED. + {255, 255, 0, 255}, // YELLOW. + {0, 255, 0, 255}, // GREEN. + {0, 255, 255, 255}, // CYAN. + {0, 0, 255, 255}, // BLUE. + {255, 0, 255, 255}, // MAGENTA. + {0, 128, 255, 255}, // AQUAMARINE. + {0, 0, 64, 255}, // DARK_SLATE_BLUE. + {128, 128, 255, 255}, // LIGHT_BLUE. + {64, 64, 255, 255}, // MEDIUM_BLUE. + {0, 0, 32, 255}, // MIDNIGHT_BLUE. + {0, 0, 128, 255}, // NAVY_BLUE. + {192, 192, 255, 255}, // SKY_BLUE. + {64, 64, 128, 255}, // SLATE_BLUE. + {32, 32, 64, 255}, // STEEL_BLUE. + {255, 128, 128, 255}, // CORAL. + {128, 64, 0, 255}, // BROWN. + {128, 128, 0, 255}, // SANDY_BROWN. + {192, 192, 0, 255}, // GOLD. + {192, 192, 128, 255}, // GOLDENROD. + {0, 64, 0, 255}, // DARK_GREEN. + {32, 64, 0, 255}, // DARK_OLIVE_GREEN. + {64, 128, 0, 255}, // FOREST_GREEN. + {128, 255, 0, 255}, // LIME_GREEN. + {192, 255, 192, 255}, // PALE_GREEN. + {192, 255, 0, 255}, // YELLOW_GREEN. + {192, 192, 192, 255}, // LIGHT_GREY. + {64, 64, 128, 255}, // DARK_SLATE_GREY. + {64, 64, 64, 255}, // DIM_GREY. + {128, 128, 128, 255}, // GREY. + {64, 192, 0, 255}, // KHAKI. + {255, 0, 192, 255}, // MAROON. + {255, 128, 0, 255}, // ORANGE. + {255, 128, 64, 255}, // ORCHID. + {255, 192, 192, 255}, // PINK. + {128, 0, 128, 255}, // PLUM. + {255, 0, 64, 255}, // INDIAN_RED. + {255, 64, 0, 255}, // ORANGE_RED. + {255, 0, 192, 255}, // VIOLET_RED. + {255, 192, 128, 255}, // SALMON. + {128, 128, 0, 255}, // TAN. + {0, 255, 255, 255}, // TURQUOISE. + {0, 128, 128, 255}, // DARK_TURQUOISE. + {192, 0, 255, 255}, // VIOLET. + {128, 128, 0, 255}, // WHEAT. + {128, 255, 0, 255} // GREEN_YELLOW }; - /******************************************************************************* -* Scrollview implementation. -*******************************************************************************/ + * Scrollview implementation. + *******************************************************************************/ SVNetwork* ScrollView::stream_ = nullptr; int ScrollView::nr_created_windows_ = 0; @@ -245,7 +246,8 @@ ScrollView::ScrollView(const char* name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size, int y_canvas_size, bool y_axis_reversed, const char* server_name) { Initialize(name, x_pos, y_pos, x_size, y_size, x_canvas_size, y_canvas_size, - y_axis_reversed, server_name);} + y_axis_reversed, server_name); +} /// Calls Initialize with default argument for server_name_. ScrollView::ScrollView(const char* name, int x_pos, int y_pos, int x_size, @@ -306,8 +308,8 @@ void ScrollView::Initialize(const char* name, int x_pos, int y_pos, int x_size, snprintf(message, sizeof(message), "w%u = luajava.newInstance('com.google.scrollview.ui" ".SVWindow','%s',%u,%u,%u,%u,%u,%u,%u)\n", - window_id_, window_name_, window_id_, - x_pos, y_pos, x_size, y_size, x_canvas_size, y_canvas_size); + window_id_, window_name_, window_id_, x_pos, y_pos, x_size, y_size, + x_canvas_size, y_canvas_size); SendRawMessage(message); SVSync::StartThread(StartEventHandler, this); @@ -339,22 +341,26 @@ void* ScrollView::StartEventHandler(void* a) { if (new_event != nullptr) { sv->event_table_[k] = nullptr; sv->mutex_->Unlock(); - if (sv->event_handler_ != nullptr) { sv->event_handler_->Notify(new_event); } + if (sv->event_handler_ != nullptr) { + sv->event_handler_->Notify(new_event); + } if (new_event->type == SVET_DESTROY) { // Signal the destructor that it is safe to terminate. sv->event_handler_ended_ = true; sv = nullptr; } delete new_event; // Delete the pointer after it has been processed. - } else { sv->mutex_->Unlock(); } - // The thread should run as long as its associated window is alive. + } else { + sv->mutex_->Unlock(); + } + // The thread should run as long as its associated window is alive. } while (sv != nullptr); return nullptr; } #endif // GRAPHICS_DISABLED ScrollView::~ScrollView() { - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED svmap_mu->Lock(); if (svmap[window_id_] != nullptr) { svmap_mu->Unlock(); @@ -369,8 +375,7 @@ ScrollView::~ScrollView() { // The event handler thread for this window *must* receive the // destroy event and set its pointer to this to nullptr before we allow // the destructor to exit. - while (!event_handler_ended_) - Update(); + while (!event_handler_ended_) Update(); } else { svmap_mu->Unlock(); } @@ -380,14 +385,13 @@ ScrollView::~ScrollView() { for (int i = 0; i < SVET_COUNT; i++) { delete event_table_[i]; } - #endif // GRAPHICS_DISABLED +#endif // GRAPHICS_DISABLED } #ifndef GRAPHICS_DISABLED /// Send a message to the server, attaching the window id. void ScrollView::SendMsg(const char* format, ...) { - if (!points_->empty) - SendPolygon(); + if (!points_->empty) SendPolygon(); va_list args; char message[kMaxMsgSize]; @@ -403,26 +407,22 @@ void ScrollView::SendMsg(const char* format, ...) { /// Send a message to the server without a /// window id. Used for global events like exit(). -void ScrollView::SendRawMessage(const char* msg) { - stream_->Send(msg); -} +void ScrollView::SendRawMessage(const char* msg) { stream_->Send(msg); } /// Add an Event Listener to this ScrollView Window void ScrollView::AddEventHandler(SVEventHandler* listener) { event_handler_ = listener; } -void ScrollView::Signal() { - semaphore_->Signal(); -} +void ScrollView::Signal() { semaphore_->Signal(); } void ScrollView::SetEvent(SVEvent* svevent) { -// Copy event + // Copy event SVEvent* any = svevent->copy(); SVEvent* specific = svevent->copy(); any->counter = specific->counter + 1; -// Place both events into the queue. + // Place both events into the queue. mutex_->Lock(); // Delete the old objects.. delete event_table_[specific->type]; @@ -433,7 +433,6 @@ void ScrollView::SetEvent(SVEvent* svevent) { mutex_->Unlock(); } - /// Block until an event of the given type is received. /// Note: The calling function is responsible for deleting the returned /// SVEvent afterwards! @@ -442,7 +441,7 @@ SVEvent* ScrollView::AwaitEvent(SVEventType type) { SVSemaphore* sem = new SVSemaphore(); std::pair ea(this, type); waiting_for_events_mu->Lock(); - waiting_for_events[ea] = std::pair (sem, (SVEvent*)0); + waiting_for_events[ea] = std::pair(sem, (SVEvent*)0); waiting_for_events_mu->Unlock(); // Wait on it, but first flush. stream_->Flush(); @@ -463,7 +462,7 @@ SVEvent* ScrollView::AwaitEventAnyWindow() { SVSemaphore* sem = new SVSemaphore(); std::pair ea((ScrollView*)0, SVET_ANY); waiting_for_events_mu->Lock(); - waiting_for_events[ea] = std::pair (sem, (SVEvent*)0); + waiting_for_events[ea] = std::pair(sem, (SVEvent*)0); waiting_for_events_mu->Unlock(); // Wait on it. stream_->Flush(); @@ -485,8 +484,7 @@ void ScrollView::SendPolygon() { // last setCursor has any effect. if (length == 2) { // An isolated line! - SendMsg("drawLine(%d,%d,%d,%d)", - points_->xcoords[0], points_->ycoords[0], + SendMsg("drawLine(%d,%d,%d,%d)", points_->xcoords[0], points_->ycoords[0], points_->xcoords[1], points_->ycoords[1]); } else if (length > 2) { // A polyline. @@ -494,8 +492,8 @@ void ScrollView::SendPolygon() { char coordpair[kMaxIntPairSize]; std::string decimal_coords; for (int i = 0; i < length; ++i) { - snprintf(coordpair, kMaxIntPairSize, "%d,%d,", - points_->xcoords[i], points_->ycoords[i]); + snprintf(coordpair, kMaxIntPairSize, "%d,%d,", points_->xcoords[i], + points_->ycoords[i]); decimal_coords += coordpair; } decimal_coords += '\n'; @@ -507,10 +505,9 @@ void ScrollView::SendPolygon() { } } - /******************************************************************************* -* LUA "API" functions. -*******************************************************************************/ + * LUA "API" functions. + *******************************************************************************/ // Sets the position from which to draw to (x,y). void ScrollView::SetCursor(int x, int y) { @@ -532,7 +529,7 @@ void ScrollView::Line(int x1, int y1, int x2, int y2) { // We are already at x1, y1, so just draw to x2, y2. DrawTo(x2, y2); } else if (!points_->xcoords.empty() && x2 == points_->xcoords.back() && - TranslateYCoordinate(y2) == points_->ycoords.back()) { + TranslateYCoordinate(y2) == points_->ycoords.back()) { // We are already at x2, y2, so just draw to x1, y1. DrawTo(x1, y1); } else { @@ -544,14 +541,20 @@ void ScrollView::Line(int x1, int y1, int x2, int y2) { // Set the visibility of the window. void ScrollView::SetVisible(bool visible) { - if (visible) { SendMsg("setVisible(true)"); - } else { SendMsg("setVisible(false)"); } + if (visible) { + SendMsg("setVisible(true)"); + } else { + SendMsg("setVisible(false)"); + } } // Set the alwaysOnTop flag. void ScrollView::AlwaysOnTop(bool b) { - if (b) { SendMsg("setAlwaysOnTop(true)"); - } else { SendMsg("setAlwaysOnTop(false)"); } + if (b) { + SendMsg("setAlwaysOnTop(true)"); + } else { + SendMsg("setAlwaysOnTop(false)"); + } } // Adds a message entry to the message box. @@ -572,9 +575,7 @@ void ScrollView::AddMessage(const char* format, ...) { } // Set a messagebox. -void ScrollView::AddMessageBox() { - SendMsg("addMessageBox()"); -} +void ScrollView::AddMessageBox() { SendMsg("addMessageBox()"); } // Exit the client completely (and notify the server of it). void ScrollView::Exit() { @@ -583,29 +584,24 @@ void ScrollView::Exit() { } // Clear the canvas. -void ScrollView::Clear() { - SendMsg("clear()"); -} +void ScrollView::Clear() { SendMsg("clear()"); } // Set the stroke width. -void ScrollView::Stroke(float width) { - SendMsg("setStrokeWidth(%f)", width); -} +void ScrollView::Stroke(float width) { SendMsg("setStrokeWidth(%f)", width); } // Draw a rectangle using the current pen color. // The rectangle is filled with the current brush color. void ScrollView::Rectangle(int x1, int y1, int x2, int y2) { - if (x1 == x2 && y1 == y2) - return; // Scrollviewer locks up. - SendMsg("drawRectangle(%d,%d,%d,%d)", - x1, TranslateYCoordinate(y1), x2, TranslateYCoordinate(y2)); + if (x1 == x2 && y1 == y2) return; // Scrollviewer locks up. + SendMsg("drawRectangle(%d,%d,%d,%d)", x1, TranslateYCoordinate(y1), x2, + TranslateYCoordinate(y2)); } // Draw an ellipse using the current pen color. // The ellipse is filled with the current brush color. void ScrollView::Ellipse(int x1, int y1, int width, int height) { - SendMsg("drawEllipse(%d,%d,%u,%u)", - x1, TranslateYCoordinate(y1), width, height); + SendMsg("drawEllipse(%d,%d,%u,%u)", x1, TranslateYCoordinate(y1), width, + height); } // Set the pen color to the given RGB values. @@ -629,20 +625,28 @@ void ScrollView::Brush(int red, int green, int blue, int alpha) { } // Set the attributes for future Text(..) calls. -void ScrollView::TextAttributes(const char* font, int pixel_size, - bool bold, bool italic, bool underlined) { +void ScrollView::TextAttributes(const char* font, int pixel_size, bool bold, + bool italic, bool underlined) { const char* b; const char* i; const char* u; - if (bold) { b = "true"; - } else { b = "false"; } - if (italic) { i = "true"; - } else { i = "false"; } - if (underlined) { u = "true"; - } else { u = "false"; } - SendMsg("textAttributes('%s',%u,%s,%s,%s)", font, pixel_size, - b, i, u); + if (bold) { + b = "true"; + } else { + b = "false"; + } + if (italic) { + i = "true"; + } else { + i = "false"; + } + if (underlined) { + u = "true"; + } else { + u = "false"; + } + SendMsg("textAttributes('%s',%u,%s,%s,%s)", font, pixel_size, b, i, u); } // Draw text at the given coordinates. @@ -653,77 +657,82 @@ void ScrollView::Text(int x, int y, const char* mystring) { // Open and draw an image given a name at (x,y). void ScrollView::Image(const char* image, int x_pos, int y_pos) { SendMsg("openImage('%s')", image); - SendMsg("drawImage('%s',%d,%d)", - image, x_pos, TranslateYCoordinate(y_pos)); + SendMsg("drawImage('%s',%d,%d)", image, x_pos, TranslateYCoordinate(y_pos)); } // Add new checkboxmenuentry to menubar. -void ScrollView::MenuItem(const char* parent, const char* name, - int cmdEvent, bool flag) { - if (parent == nullptr) { parent = ""; } - if (flag) { SendMsg("addMenuBarItem('%s','%s',%d,true)", - parent, name, cmdEvent); - } else { SendMsg("addMenuBarItem('%s','%s',%d,false)", - parent, name, cmdEvent); } +void ScrollView::MenuItem(const char* parent, const char* name, int cmdEvent, + bool flag) { + if (parent == nullptr) { + parent = ""; + } + if (flag) { + SendMsg("addMenuBarItem('%s','%s',%d,true)", parent, name, cmdEvent); + } else { + SendMsg("addMenuBarItem('%s','%s',%d,false)", parent, name, cmdEvent); + } } // Add new menuentry to menubar. void ScrollView::MenuItem(const char* parent, const char* name, int cmdEvent) { - if (parent == nullptr) { parent = ""; } + if (parent == nullptr) { + parent = ""; + } SendMsg("addMenuBarItem('%s','%s',%d)", parent, name, cmdEvent); } // Add new submenu to menubar. void ScrollView::MenuItem(const char* parent, const char* name) { - if (parent == nullptr) { parent = ""; } + if (parent == nullptr) { + parent = ""; + } SendMsg("addMenuBarItem('%s','%s')", parent, name); } // Add new submenu to popupmenu. void ScrollView::PopupItem(const char* parent, const char* name) { - if (parent == nullptr) { parent = ""; } + if (parent == nullptr) { + parent = ""; + } SendMsg("addPopupMenuItem('%s','%s')", parent, name); } // Add new submenuentry to popupmenu. -void ScrollView::PopupItem(const char* parent, const char* name, - int cmdEvent, const char* value, const char* desc) { - if (parent == nullptr) { parent = ""; } +void ScrollView::PopupItem(const char* parent, const char* name, int cmdEvent, + const char* value, const char* desc) { + if (parent == nullptr) { + parent = ""; + } char* esc = AddEscapeChars(value); char* esc2 = AddEscapeChars(desc); - SendMsg("addPopupMenuItem('%s','%s',%d,'%s','%s')", parent, name, - cmdEvent, esc, esc2); + SendMsg("addPopupMenuItem('%s','%s',%d,'%s','%s')", parent, name, cmdEvent, + esc, esc2); delete[] esc; delete[] esc2; } // Send an update message for a single window. -void ScrollView::UpdateWindow() { - SendMsg("update()"); -} +void ScrollView::UpdateWindow() { SendMsg("update()"); } // Note: this is an update to all windows void ScrollView::Update() { svmap_mu->Lock(); for (std::map::iterator iter = svmap.begin(); - iter != svmap.end(); ++iter) { - if (iter->second != nullptr) - iter->second->UpdateWindow(); + iter != svmap.end(); ++iter) { + if (iter->second != nullptr) iter->second->UpdateWindow(); } svmap_mu->Unlock(); } // Set the pen color, using an enum value (e.g. ScrollView::ORANGE) void ScrollView::Pen(Color color) { - Pen(table_colors[color][0], table_colors[color][1], - table_colors[color][2], table_colors[color][3]); + Pen(table_colors[color][0], table_colors[color][1], table_colors[color][2], + table_colors[color][3]); } // Set the brush color, using an enum value (e.g. ScrollView::ORANGE) void ScrollView::Brush(Color color) { - Brush(table_colors[color][0], - table_colors[color][1], - table_colors[color][2], + Brush(table_colors[color][0], table_colors[color][1], table_colors[color][2], table_colors[color][3]); } @@ -755,8 +764,8 @@ int ScrollView::ShowYesNoDialog(const char* msg) { void ScrollView::ZoomToRectangle(int x1, int y1, int x2, int y2) { y1 = TranslateYCoordinate(y1); y2 = TranslateYCoordinate(y2); - SendMsg("zoomRectangle(%d,%d,%d,%d)", - std::min(x1, x2), std::min(y1, y2), std::max(x1, x2), std::max(y1, y2)); + SendMsg("zoomRectangle(%d,%d,%d,%d)", std::min(x1, x2), std::min(y1, y2), + std::max(x1, x2), std::max(y1, y2)); } // Send an image of type Pix. @@ -769,14 +778,11 @@ void ScrollView::Image(struct Pix* image, int x_pos, int y_pos) { SendMsg("readImage(%d,%d,%d)", x_pos, y_pos, base64_len); // Base64 encode the data. const char kBase64Table[64] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', - 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '+', '/', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', }; char* base64 = new char[base64_len + 1]; memset(base64, '=', base64_len); @@ -795,10 +801,9 @@ void ScrollView::Image(struct Pix* image, int x_pos, int y_pos) { remainder = 0; } } - if (bits_left > 0) - base64[code_len++] = kBase64Table[remainder & 63]; + if (bits_left > 0) base64[code_len++] = kBase64Table[remainder & 63]; SendRawMessage(base64); - delete [] base64; + delete[] base64; lept_free(data); } @@ -810,21 +815,24 @@ char* ScrollView::AddEscapeChars(const char* input) { char* message = new char[kMaxMsgSize]; int pos = 0; while (nextptr != nullptr) { - strncpy(message+pos, lastptr, nextptr-lastptr); + strncpy(message + pos, lastptr, nextptr - lastptr); pos += nextptr - lastptr; message[pos] = '\\'; pos += 1; lastptr = nextptr; - nextptr = strchr(nextptr+1, '\''); + nextptr = strchr(nextptr + 1, '\''); } - strcpy(message+pos, lastptr); + strcpy(message + pos, lastptr); return message; } // Inverse the Y axis if the coordinates are actually inversed. int ScrollView::TranslateYCoordinate(int y) { - if (!y_axis_is_reversed_) { return y; - } else { return y_size_ - y; } + if (!y_axis_is_reversed_) { + return y; + } else { + return y_size_ - y; + } } #endif // GRAPHICS_DISABLED diff --git a/src/viewer/scrollview.h b/src/viewer/scrollview.h index f30f699f9f..0bc371a75f 100644 --- a/src/viewer/scrollview.h +++ b/src/viewer/scrollview.h @@ -55,21 +55,21 @@ enum SVEventType { SVET_MENU, // A command selected through the menubar. SVET_ANY, // Any of the above. - SVET_COUNT // Array sizing. + SVET_COUNT // Array sizing. }; struct SVEvent { - ~SVEvent() { delete [] parameter; } + ~SVEvent() { delete[] parameter; } SVEvent* copy(); SVEventType type; // What kind of event. ScrollView* window; // Window event relates to. int x; // Coords of click or selection. int y; - int x_size; // Size of selection. + int x_size; // Size of selection. int y_size; - int command_id; // The ID of the possibly associated event (e.g. MENU) - char* parameter; // Any string that might have been passed as argument. - int counter; // Used to detect which kind of event to process next. + int command_id; // The ID of the possibly associated event (e.g. MENU) + char* parameter; // Any string that might have been passed as argument. + int counter; // Used to detect which kind of event to process next. SVEvent() { window = nullptr; @@ -84,12 +84,12 @@ struct SVEvent { // class as SVEventHandler to a ScrollView Window, the SVEventHandler will be // called whenever an appropriate event occurs. class SVEventHandler { - public: - virtual ~SVEventHandler() {} + public: + virtual ~SVEventHandler() {} -// Gets called by the SV Window. Does nothing on default, overwrite this -// to implement the desired behaviour - virtual void Notify(const SVEvent* sve) { (void)sve; } + // Gets called by the SV Window. Does nothing on default, overwrite this + // to implement the desired behaviour + virtual void Notify(const SVEvent* sve) { (void)sve; } }; // The ScrollView class provides the expernal API to the scrollviewer process. @@ -101,7 +101,7 @@ class SVEventHandler { class ScrollView { public: -// Color enum for pens and brushes. + // Color enum for pens and brushes. enum Color { NONE, BLACK, @@ -152,232 +152,232 @@ class ScrollView { VIOLET, WHEAT, GREEN_YELLOW // Make sure this one is last. -}; + }; ~ScrollView(); #ifndef GRAPHICS_DISABLED -// Create a window. The pixel size of the window may be 0,0, in which case -// a default size is selected based on the size of your canvas. -// The canvas may not be 0,0 in size! + // Create a window. The pixel size of the window may be 0,0, in which case + // a default size is selected based on the size of your canvas. + // The canvas may not be 0,0 in size! ScrollView(const char* name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size, int y_canvas_size); -// With a flag whether the x axis is reversed. + // With a flag whether the x axis is reversed. ScrollView(const char* name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size, int y_canvas_size, bool y_axis_reversed); -// Connect to a server other than localhost. + // Connect to a server other than localhost. ScrollView(const char* name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size, int y_canvas_size, bool y_axis_reversed, const char* server_name); -/******************************************************************************* -* Event handling -* To register as listener, the class has to derive from the SVEventHandler -* class, which consists of a notifyMe(SVEvent*) function that should be -* overwritten to process the event the way you want. -*******************************************************************************/ - -// Add an Event Listener to this ScrollView Window. + /******************************************************************************* + * Event handling + * To register as listener, the class has to derive from the SVEventHandler + * class, which consists of a notifyMe(SVEvent*) function that should be + * overwritten to process the event the way you want. + *******************************************************************************/ + + // Add an Event Listener to this ScrollView Window. void AddEventHandler(SVEventHandler* listener); -// Block until an event of the given type is received. + // Block until an event of the given type is received. SVEvent* AwaitEvent(SVEventType type); -// Block until any event on any window is received. + // Block until any event on any window is received. SVEvent* AwaitEventAnyWindow(); -/******************************************************************************* -* Getters and Setters -*******************************************************************************/ + /******************************************************************************* + * Getters and Setters + *******************************************************************************/ -// Returns the title of the window. + // Returns the title of the window. const char* GetName() { return window_name_; } -// Returns the unique ID of the window. + // Returns the unique ID of the window. int GetId() { return window_id_; } -/******************************************************************************* -* API functions for LUA calls -* the implementations for these can be found in svapi.cc -* (keep in mind that the window is actually created through the ScrollView -* constructor, so this is not listed here) -*******************************************************************************/ + /******************************************************************************* + * API functions for LUA calls + * the implementations for these can be found in svapi.cc + * (keep in mind that the window is actually created through the ScrollView + * constructor, so this is not listed here) + *******************************************************************************/ -// Draw a Pix on (x,y). + // Draw a Pix on (x,y). void Image(struct Pix* image, int x_pos, int y_pos); -// Flush buffers and update display. + // Flush buffers and update display. static void Update(); -// Exit the program. + // Exit the program. static void Exit(); -// Update the contents of a specific window. + // Update the contents of a specific window. void UpdateWindow(); -// Erase all content from the window, but do not destroy it. + // Erase all content from the window, but do not destroy it. void Clear(); -// Set pen color with an enum. + // Set pen color with an enum. void Pen(Color color); -// Set pen color to RGB (0-255). + // Set pen color to RGB (0-255). void Pen(int red, int green, int blue); -// Set pen color to RGBA (0-255). + // Set pen color to RGBA (0-255). void Pen(int red, int green, int blue, int alpha); -// Set brush color with an enum. + // Set brush color with an enum. void Brush(Color color); -// Set brush color to RGB (0-255). + // Set brush color to RGB (0-255). void Brush(int red, int green, int blue); -// Set brush color to RGBA (0-255). + // Set brush color to RGBA (0-255). void Brush(int red, int green, int blue, int alpha); -// Set attributes for future text, like font name (e.g. -// "Times New Roman"), font size etc.. -// Note: The underlined flag is currently not supported - void TextAttributes(const char* font, int pixel_size, - bool bold, bool italic, bool underlined); + // Set attributes for future text, like font name (e.g. + // "Times New Roman"), font size etc.. + // Note: The underlined flag is currently not supported + void TextAttributes(const char* font, int pixel_size, bool bold, bool italic, + bool underlined); -// Draw line from (x1,y1) to (x2,y2) with the current pencolor. + // Draw line from (x1,y1) to (x2,y2) with the current pencolor. void Line(int x1, int y1, int x2, int y2); -// Set the stroke width of the pen. + // Set the stroke width of the pen. void Stroke(float width); -// Draw a rectangle given upper left corner and lower right corner. -// The current pencolor is used as outline, the brushcolor to fill the shape. + // Draw a rectangle given upper left corner and lower right corner. + // The current pencolor is used as outline, the brushcolor to fill the shape. void Rectangle(int x1, int y1, int x2, int y2); -// Draw an ellipse centered on (x,y). -// The current pencolor is used as outline, the brushcolor to fill the shape. + // Draw an ellipse centered on (x,y). + // The current pencolor is used as outline, the brushcolor to fill the shape. void Ellipse(int x, int y, int width, int height); -// Draw text with the current pencolor + // Draw text with the current pencolor void Text(int x, int y, const char* mystring); -// Draw an image from a local filename. This should be faster than createImage. -// WARNING: This only works on a local machine. This also only works image -// types supported by java (like bmp,jpeg,gif,png) since the image is opened by -// the server. + // Draw an image from a local filename. This should be faster than + // createImage. WARNING: This only works on a local machine. This also only + // works image types supported by java (like bmp,jpeg,gif,png) since the image + // is opened by the server. void Image(const char* image, int x_pos, int y_pos); -// Set the current position to draw from (x,y). In conjunction with... + // Set the current position to draw from (x,y). In conjunction with... void SetCursor(int x, int y); -// ...this function, which draws a line from the current to (x,y) and then -// sets the new position to the new (x,y), this can be used to easily draw -// polygons using vertices + // ...this function, which draws a line from the current to (x,y) and then + // sets the new position to the new (x,y), this can be used to easily draw + // polygons using vertices void DrawTo(int x, int y); -// Set the SVWindow visible/invisible. + // Set the SVWindow visible/invisible. void SetVisible(bool visible); -// Set the SVWindow always on top or not always on top. + // Set the SVWindow always on top or not always on top. void AlwaysOnTop(bool b); -// Shows a modal dialog with "msg" as question and returns 'y' or 'n'. + // Shows a modal dialog with "msg" as question and returns 'y' or 'n'. int ShowYesNoDialog(const char* msg); -// Shows a modal dialog with "msg" as question and returns a char* string. -// Constraint: As return, only words (e.g. no whitespaces etc.) are allowed. + // Shows a modal dialog with "msg" as question and returns a char* string. + // Constraint: As return, only words (e.g. no whitespaces etc.) are allowed. char* ShowInputDialog(const char* msg); -// Adds a messagebox to the SVWindow. This way, it can show the messages... + // Adds a messagebox to the SVWindow. This way, it can show the messages... void AddMessageBox(); -// ...which can be added by this command. -// This is intended as an "debug" output window. + // ...which can be added by this command. + // This is intended as an "debug" output window. void AddMessage(const char* format, ...); -// Zoom the window to the rectangle given upper left corner and -// lower right corner. + // Zoom the window to the rectangle given upper left corner and + // lower right corner. void ZoomToRectangle(int x1, int y1, int x2, int y2); -// Custom messages (manipulating java code directly) can be send through this. -// Send a message to the server and attach the Id of the corresponding window. -// Note: This should only be called if you are know what you are doing, since -// you are fiddling with the Java objects on the server directly. Calling -// this just for fun will likely break your application! -// It is public so you can actually take use of the LUA functionalities, but -// be careful! + // Custom messages (manipulating java code directly) can be send through this. + // Send a message to the server and attach the Id of the corresponding window. + // Note: This should only be called if you are know what you are doing, since + // you are fiddling with the Java objects on the server directly. Calling + // this just for fun will likely break your application! + // It is public so you can actually take use of the LUA functionalities, but + // be careful! void SendMsg(const char* msg, ...); -// Custom messages (manipulating java code directly) can be send through this. -// Send a message to the server without adding the -// window id. Used for global events like Exit(). -// Note: This should only be called if you are know what you are doing, since -// you are fiddling with the Java objects on the server directly. Calling -// this just for fun will likely break your application! -// It is public so you can actually take use of the LUA functionalities, but -// be careful! + // Custom messages (manipulating java code directly) can be send through this. + // Send a message to the server without adding the + // window id. Used for global events like Exit(). + // Note: This should only be called if you are know what you are doing, since + // you are fiddling with the Java objects on the server directly. Calling + // this just for fun will likely break your application! + // It is public so you can actually take use of the LUA functionalities, but + // be careful! static void SendRawMessage(const char* msg); -/******************************************************************************* -* Add new menu entries to parent. If parent is "", the entry gets added to the -* main menubar (toplevel). -*******************************************************************************/ -// This adds a new submenu to the menubar. + /******************************************************************************* + * Add new menu entries to parent. If parent is "", the entry gets added to + *the main menubar (toplevel). + *******************************************************************************/ + // This adds a new submenu to the menubar. void MenuItem(const char* parent, const char* name); -// This adds a new (normal) menu entry with an associated eventID, which should -// be unique among menubar eventIDs. + // This adds a new (normal) menu entry with an associated eventID, which + // should be unique among menubar eventIDs. void MenuItem(const char* parent, const char* name, int cmdEvent); // This adds a new checkbox entry, which might initially be flagged. - void MenuItem(const char* parent, const char* name, - int cmdEvent, bool flagged); + void MenuItem(const char* parent, const char* name, int cmdEvent, + bool flagged); -// This adds a new popup submenu to the popup menu. If parent is "", the entry -// gets added at "toplevel" popupmenu. + // This adds a new popup submenu to the popup menu. If parent is "", the entry + // gets added at "toplevel" popupmenu. void PopupItem(const char* parent, const char* name); -// This adds a new popup entry with the associated eventID, which should be -// unique among popup eventIDs. -// If value and desc are given, on a click the server will ask you to modify -// the value and return the new value. - void PopupItem(const char* parent, const char* name, - int cmdEvent, const char* value, const char* desc); + // This adds a new popup entry with the associated eventID, which should be + // unique among popup eventIDs. + // If value and desc are given, on a click the server will ask you to modify + // the value and return the new value. + void PopupItem(const char* parent, const char* name, int cmdEvent, + const char* value, const char* desc); -// Returns the correct Y coordinate for a window, depending on whether it might -// have to be flipped (by ySize). + // Returns the correct Y coordinate for a window, depending on whether it + // might have to be flipped (by ySize). int TranslateYCoordinate(int y); private: -// Transfers a binary Image. + // Transfers a binary Image. void TransferBinaryImage(struct Pix* image); -// Transfers a gray scale Image. + // Transfers a gray scale Image. void TransferGrayImage(struct Pix* image); -// Transfers a 32-Bit Image. + // Transfers a 32-Bit Image. void Transfer32bppImage(struct Pix* image); -// Sets up ScrollView, depending on the variables from the constructor. + // Sets up ScrollView, depending on the variables from the constructor. void Initialize(const char* name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size, int y_canvas_size, bool y_axis_reversed, const char* server_name); -// Send the current buffered polygon (if any) and clear it. + // Send the current buffered polygon (if any) and clear it. void SendPolygon(); -// Start the message receiving thread. + // Start the message receiving thread. static void* MessageReceiver(void* a); -// Place an event into the event_table (synchronized). + // Place an event into the event_table (synchronized). void SetEvent(SVEvent* svevent); -// Wake up the semaphore. + // Wake up the semaphore. void Signal(); -// Returns the unique, shared network stream. + // Returns the unique, shared network stream. static SVNetwork* GetStream() { return stream_; } -// Starts a new event handler. Called whenever a new window is created. + // Starts a new event handler. Called whenever a new window is created. static void* StartEventHandler(void* sv); -// Escapes the ' character with a \, so it can be processed by LUA. + // Escapes the ' character with a \, so it can be processed by LUA. char* AddEscapeChars(const char* input); // The event handler for this window. diff --git a/src/viewer/svmnode.cpp b/src/viewer/svmnode.cpp index f128ffb40a..b8315d8713 100644 --- a/src/viewer/svmnode.cpp +++ b/src/viewer/svmnode.cpp @@ -26,8 +26,8 @@ // menu bars. #include -#include #include +#include #include "svmnode.h" @@ -51,8 +51,7 @@ SVMenuNode::SVMenuNode() { is_check_box_entry_ = false; } -SVMenuNode::~SVMenuNode() { -} +SVMenuNode::~SVMenuNode() {} // Create a new sub menu node with just a caption. This is used to create // nodes which act as parent nodes to other nodes (e.g. submenus). @@ -64,14 +63,15 @@ SVMenuNode* SVMenuNode::AddChild(const char* txt) { // Create a "normal" menu node which is associated with a command event. void SVMenuNode::AddChild(const char* txt, int command_event) { - this->AddChild(new SVMenuNode(command_event, txt, false, false, nullptr, nullptr)); + this->AddChild( + new SVMenuNode(command_event, txt, false, false, nullptr, nullptr)); } // Create a menu node with an associated value (which might be changed // through the gui). -void SVMenuNode::AddChild(const char* txt, int command_event, - const char* val) { - this->AddChild(new SVMenuNode(command_event, txt, false, false, val, nullptr)); +void SVMenuNode::AddChild(const char* txt, int command_event, const char* val) { + this->AddChild( + new SVMenuNode(command_event, txt, false, false, val, nullptr)); } // Create a menu node with an associated value and description_. @@ -82,15 +82,15 @@ void SVMenuNode::AddChild(const char* txt, int command_event, const char* val, // Create a flag menu node. void SVMenuNode::AddChild(const char* txt, int command_event, int tv) { - this->AddChild(new SVMenuNode(command_event, txt, tv, true, nullptr, nullptr)); + this->AddChild( + new SVMenuNode(command_event, txt, tv, true, nullptr, nullptr)); } // Convenience function called from the different constructors to initialize // the different values of the menu node. -SVMenuNode::SVMenuNode(int command_event, const char* txt, - int tv, bool check_box_entry, const char* val, - const char* desc) - : text_(txt), value_(val), description_(desc) { +SVMenuNode::SVMenuNode(int command_event, const char* txt, int tv, + bool check_box_entry, const char* val, const char* desc) + : text_(txt), value_(val), description_(desc) { cmd_event_ = command_event; child_ = nullptr; @@ -108,7 +108,9 @@ void SVMenuNode::AddChild(SVMenuNode* svmn) { child_ = svmn; } else { SVMenuNode* cur = child_; - while (cur->next_ != nullptr) { cur = cur->next_; } + while (cur->next_ != nullptr) { + cur = cur->next_; + } cur->next_ = svmn; } } @@ -124,20 +126,23 @@ void SVMenuNode::BuildMenu(ScrollView* sv, bool menu_bar) { sv->MenuItem(parent_->text_.string(), text_.string(), cmd_event_, toggle_value_); } else { - sv->MenuItem(parent_->text_.string(), text_.string(), cmd_event_); } + sv->MenuItem(parent_->text_.string(), text_.string(), cmd_event_); + } } else if ((parent_ != nullptr) && (!menu_bar)) { if (description_.length() > 0) { sv->PopupItem(parent_->text_.string(), text_.string(), cmd_event_, value_.string(), description_.string()); - } else { + } else { sv->PopupItem(parent_->text_.string(), text_.string()); } } if (child_ != nullptr) { - child_->BuildMenu(sv, menu_bar); delete child_; + child_->BuildMenu(sv, menu_bar); + delete child_; } if (next_ != nullptr) { - next_->BuildMenu(sv, menu_bar); delete next_; + next_->BuildMenu(sv, menu_bar); + delete next_; } } diff --git a/src/viewer/svmnode.h b/src/viewer/svmnode.h index 326a88a15e..a49f420d27 100644 --- a/src/viewer/svmnode.h +++ b/src/viewer/svmnode.h @@ -55,19 +55,19 @@ class SVMenuNode { void AddChild(const char* txt, int command_event, const char* val); // Create a menu node with an associated value and description_. - void AddChild(const char* txt, int command_event, - const char* val, const char* desc); + void AddChild(const char* txt, int command_event, const char* val, + const char* desc); // Build a menu structure for the server and send the necessary messages. // Should be called on the root node. If menu_bar is true, a menu_bar menu // is built (e.g. on top of the window), if it is false a popup menu is // built which gets shown by right clicking on the window. - void BuildMenu(ScrollView *sv, bool menu_bar = true); + void BuildMenu(ScrollView* sv, bool menu_bar = true); private: // Constructor holding the actual node data. - SVMenuNode(int command_event, const char* txt, int tv, - bool check_box_entry, const char* val, const char* desc); + SVMenuNode(int command_event, const char* txt, int tv, bool check_box_entry, + const char* val, const char* desc); // Adds a new menu node to the current node. void AddChild(SVMenuNode* svmn); diff --git a/src/viewer/svpaint.cpp b/src/viewer/svpaint.cpp index 53e7ab3be0..e1a74eae12 100644 --- a/src/viewer/svpaint.cpp +++ b/src/viewer/svpaint.cpp @@ -26,51 +26,52 @@ #endif #ifndef GRAPHICS_DISABLED -#include "scrollview.h" -#include "svmnode.h" #include #include +#include "scrollview.h" +#include "svmnode.h" // The current color values we use, initially white (== ScrollView::WHITE). -int rgb[3] = { 255, 255, 255 }; +int rgb[3] = {255, 255, 255}; class SVPaint : public SVEventHandler { public: - explicit SVPaint(const char* server_name); -// This is the main event handling function that we need to overwrite, defined -// in SVEventHandler. - void Notify(const SVEvent* sv_event); + explicit SVPaint(const char* server_name); + // This is the main event handling function that we need to overwrite, defined + // in SVEventHandler. + void Notify(const SVEvent* sv_event); + private: -// The Handler take care of the SVET_POPUP, SVET_MENU, SVET_CLICK and -// SVET_SELECTION events. - void PopupHandler(const SVEvent* sv_event); - void MenuBarHandler(const SVEvent* sv_event); - void ClickHandler(const SVEvent* sv_event); - void SelectionHandler(const SVEvent* sv_event); - -// Convenience functions to build little menus. - SVMenuNode* BuildPopupMenu(); - SVMenuNode* BuildMenuBar(); - -// Our window. - ScrollView* window_; - -// The mode we are in when an SVET_CLICK or an SVET_SELECTION event occurs. - int click_mode_; - int drag_mode_; - -// In the point-to-point drawing mode, we need to set a start-point the first -// time we call it (e.g. call SetCursor). - bool has_start_point_; + // The Handler take care of the SVET_POPUP, SVET_MENU, SVET_CLICK and + // SVET_SELECTION events. + void PopupHandler(const SVEvent* sv_event); + void MenuBarHandler(const SVEvent* sv_event); + void ClickHandler(const SVEvent* sv_event); + void SelectionHandler(const SVEvent* sv_event); + + // Convenience functions to build little menus. + SVMenuNode* BuildPopupMenu(); + SVMenuNode* BuildMenuBar(); + + // Our window. + ScrollView* window_; + + // The mode we are in when an SVET_CLICK or an SVET_SELECTION event occurs. + int click_mode_; + int drag_mode_; + + // In the point-to-point drawing mode, we need to set a start-point the first + // time we call it (e.g. call SetCursor). + bool has_start_point_; }; // Build a sample popup menu. SVMenuNode* SVPaint::BuildPopupMenu() { SVMenuNode* root = new SVMenuNode(); // Empty root node // Initial color is white, so we all values to 255. - root->AddChild("R", // Shown caption. - 1, // assoc. command_id. - "255", // initial value. + root->AddChild("R", // Shown caption. + 1, // assoc. command_id. + "255", // initial value. "Red Color Value?"); // Shown description. root->AddChild("G", 2, "255", "Green Color Value?"); root->AddChild("B", 3, "255", "Blue Color Value?"); @@ -87,7 +88,7 @@ SVMenuNode* SVPaint::BuildMenuBar() { // Put some nodes into the submenus. click->AddChild("Point to Point Drawing", // Caption. - 1); // command_id. + 1); // command_id. click->AddChild("Point Drawing", 2); click->AddChild("Text Drawing", 3); drag->AddChild("Line Drawing", 4); @@ -110,9 +111,11 @@ void SVPaint::PopupHandler(const SVEvent* sv_event) { // or the drag_mode_ (commands 4-6). void SVPaint::MenuBarHandler(const SVEvent* sv_event) { if ((sv_event->command_id > 0) && (sv_event->command_id < 4)) { - click_mode_ = sv_event->command_id; - has_start_point_ = false; - } else { drag_mode_ = sv_event->command_id; } + click_mode_ = sv_event->command_id; + has_start_point_ = false; + } else { + drag_mode_ = sv_event->command_id; + } } // Takes care of the SVET_CLICK events. @@ -120,23 +123,24 @@ void SVPaint::MenuBarHandler(const SVEvent* sv_event) { // point drawing, or draw text. void SVPaint::ClickHandler(const SVEvent* sv_event) { switch (click_mode_) { - case 1: //Point to Point - if (has_start_point_) { window_->DrawTo(sv_event->x, sv_event->y); - } else { + case 1: // Point to Point + if (has_start_point_) { + window_->DrawTo(sv_event->x, sv_event->y); + } else { has_start_point_ = true; window_->SetCursor(sv_event->x, sv_event->y); - } - break; - case 2: //Point Drawing..simulated by drawing a 1 pixel line. - window_->Line(sv_event->x, sv_event->y, sv_event->x, sv_event->y); - break; - case 3: //Text - // We show a modal input dialog on our window, then draw the input and - // finally delete the input pointer. - char* p = window_->ShowInputDialog("Text:"); - window_->Text(sv_event->x, sv_event->y, p); - delete [] p; - break; + } + break; + case 2: // Point Drawing..simulated by drawing a 1 pixel line. + window_->Line(sv_event->x, sv_event->y, sv_event->x, sv_event->y); + break; + case 3: // Text + // We show a modal input dialog on our window, then draw the input and + // finally delete the input pointer. + char* p = window_->ShowInputDialog("Text:"); + window_->Text(sv_event->x, sv_event->y, p); + delete[] p; + break; } } @@ -145,48 +149,55 @@ void SVPaint::ClickHandler(const SVEvent* sv_event) { // an ellipse. void SVPaint::SelectionHandler(const SVEvent* sv_event) { switch (drag_mode_) { - //FIXME inversed x_size, y_size - case 4: //Line - window_->Line(sv_event->x, sv_event->y, - sv_event->x - sv_event->x_size, + // FIXME inversed x_size, y_size + case 4: // Line + window_->Line(sv_event->x, sv_event->y, sv_event->x - sv_event->x_size, sv_event->y - sv_event->y_size); break; - case 5: //Rectangle + case 5: // Rectangle window_->Rectangle(sv_event->x, sv_event->y, sv_event->x - sv_event->x_size, sv_event->y - sv_event->y_size); break; - case 6: //Ellipse + case 6: // Ellipse window_->Ellipse(sv_event->x - sv_event->x_size, - sv_event->y - sv_event->y_size, - sv_event->x_size, sv_event->y_size); + sv_event->y - sv_event->y_size, sv_event->x_size, + sv_event->y_size); break; - } + } } // The event handling function from ScrollView which we have to overwrite. // We handle CLICK, SELECTION, MENU and POPUP and throw away all other events. void SVPaint::Notify(const SVEvent* sv_event) { - if (sv_event->type == SVET_CLICK) { ClickHandler(sv_event); } - else if (sv_event->type == SVET_SELECTION) { SelectionHandler(sv_event); } - else if (sv_event->type == SVET_MENU) { MenuBarHandler(sv_event); } - else if (sv_event->type == SVET_POPUP) { PopupHandler(sv_event); } - else {} //throw other events away + if (sv_event->type == SVET_CLICK) { + ClickHandler(sv_event); + } else if (sv_event->type == SVET_SELECTION) { + SelectionHandler(sv_event); + } else if (sv_event->type == SVET_MENU) { + MenuBarHandler(sv_event); + } else if (sv_event->type == SVET_POPUP) { + PopupHandler(sv_event); + } else { + } // throw other events away } // Builds a new window, initializes the variables and event handler and builds // the menu. -SVPaint::SVPaint(const char *server_name) { +SVPaint::SVPaint(const char* server_name) { window_ = new ScrollView("ScrollView Paint Example", // window caption - 0, 0, // x,y window position - 500, 500, // window size - 500, 500, // canvas size - false, // whether the Y axis is inversed. - // this is included due to legacy - // reasons for tesseract and enables - // us to have (0,0) as the LOWER left - // of the coordinate system. - server_name); // the server address. + 0, + 0, // x,y window position + 500, + 500, // window size + 500, + 500, // canvas size + false, // whether the Y axis is inversed. + // this is included due to legacy + // reasons for tesseract and enables + // us to have (0,0) as the LOWER left + // of the coordinate system. + server_name); // the server address. // Set the start modes to point-to-point and line drawing. click_mode_ = 1; @@ -196,10 +207,10 @@ SVPaint::SVPaint(const char *server_name) { // Bild our menus and add them to the window. The flag illustrates whether // this is a menu bar. SVMenuNode* popup_menu = BuildPopupMenu(); - popup_menu->BuildMenu(window_,false); + popup_menu->BuildMenu(window_, false); SVMenuNode* bar_menu = BuildMenuBar(); - bar_menu->BuildMenu(window_,true); + bar_menu->BuildMenu(window_, true); // Set the initial color values to White (could also be done by // passing (rgb[0], rgb[1], rgb[2]). @@ -228,8 +239,12 @@ SVPaint::SVPaint(const char *server_name) { // If a parameter is given, we try to connect to the given server. // This enables us to test the remote capabilites of ScrollView. int main(int argc, char** argv) { - const char* server_name; - if (argc > 1) { server_name = argv[1]; } else { server_name = "localhost"; } - SVPaint svp(server_name); + const char* server_name; + if (argc > 1) { + server_name = argv[1]; + } else { + server_name = "localhost"; + } + SVPaint svp(server_name); } #endif // GRAPHICS_DISABLED diff --git a/src/viewer/svutil.cpp b/src/viewer/svutil.cpp index 6dfc3cfb22..22c3f038f5 100644 --- a/src/viewer/svutil.cpp +++ b/src/viewer/svutil.cpp @@ -37,10 +37,10 @@ struct addrinfo { #include #include #include -#include -#include #include #include +#include +#include #ifdef __linux__ #include #endif @@ -88,7 +88,7 @@ void SVSync::StartThread(void* (*func)(void*), void* arg) { #ifdef _WIN32 LPTHREAD_START_ROUTINE f = (LPTHREAD_START_ROUTINE)func; DWORD threadid; - HANDLE newthread = CreateThread(nullptr, // default security attributes + HANDLE newthread = CreateThread(nullptr, // default security attributes 0, // use default stack size f, // thread function arg, // argument to thread function @@ -127,13 +127,13 @@ void SVSync::StartProcess(const char* executable, const char* args) { STARTUPINFO start_info; PROCESS_INFORMATION proc_info; GetStartupInfo(&start_info); - if (!CreateProcess(nullptr, const_cast(proc.c_str()), nullptr, nullptr, FALSE, - CREATE_NO_WINDOW | DETACHED_PROCESS, nullptr, nullptr, - &start_info, &proc_info)) + if (!CreateProcess(nullptr, const_cast(proc.c_str()), nullptr, nullptr, + FALSE, CREATE_NO_WINDOW | DETACHED_PROCESS, nullptr, + nullptr, &start_info, &proc_info)) return; #else int pid = fork(); - if (pid != 0) { // The father process returns + if (pid != 0) { // The father process returns } else { #ifdef __linux__ // Make sure the java process terminates on exit, since its @@ -177,7 +177,7 @@ SVSemaphore::SVSemaphore() { char name[50]; snprintf(name, sizeof(name), "%ld", random()); sem_unlink(name); - semaphore_ = sem_open(name, O_CREAT , S_IWUSR, 0); + semaphore_ = sem_open(name, O_CREAT, S_IWUSR, 0); if (semaphore_ == SEM_FAILED) { perror("sem_open"); } @@ -228,14 +228,19 @@ void SVNetwork::Flush() { char* SVNetwork::Receive() { char* result = nullptr; #if defined(_WIN32) || defined(__CYGWIN__) - if (has_content) { result = strtok (nullptr, "\n"); } + if (has_content) { + result = strtok(nullptr, "\n"); + } #else - if (buffer_ptr_ != nullptr) { result = strtok_r(nullptr, "\n", &buffer_ptr_); } + if (buffer_ptr_ != nullptr) { + result = strtok_r(nullptr, "\n", &buffer_ptr_); + } #endif // This means there is something left in the buffer and we return it. - if (result != nullptr) { return result; - // Otherwise, we read from the stream_. + if (result != nullptr) { + return result; + // Otherwise, we read from the stream_. } else { buffer_ptr_ = nullptr; has_content = false; @@ -251,16 +256,20 @@ char* SVNetwork::Receive() { FD_ZERO(&readfds); FD_SET(stream_, &readfds); - int i = select(stream_+1, &readfds, nullptr, nullptr, &tv); + int i = select(stream_ + 1, &readfds, nullptr, nullptr, &tv); // The stream_ died. - if (i == 0) { return nullptr; } + if (i == 0) { + return nullptr; + } // Read the message buffer. i = recv(stream_, msg_buffer_in_, kMaxMsgSize, 0); // Server quit (0) or error (-1). - if (i <= 0) { return nullptr; } + if (i <= 0) { + return nullptr; + } msg_buffer_in_[i] = '\0'; has_content = true; #ifdef _WIN32 @@ -283,7 +292,6 @@ void SVNetwork::Close() { stream_ = -1; } - // The program to invoke to start ScrollView static const char* ScrollViewProg() { #ifdef _WIN32 @@ -294,7 +302,6 @@ static const char* ScrollViewProg() { return prog; } - // The arguments to the program to invoke to start ScrollView static std::string ScrollViewCommand(std::string scrollview_path) { // The following ugly ifdef is to enable the output of the java runtime @@ -311,44 +318,42 @@ static std::string ScrollViewCommand(std::string scrollview_path) { "-Xms1024m -Xmx2048m -jar %s/ScrollView.jar" " & wait\""; #endif - int cmdlen = strlen(cmd_template) + 4*strlen(scrollview_path.c_str()) + 1; + int cmdlen = strlen(cmd_template) + 4 * strlen(scrollview_path.c_str()) + 1; char* cmd = new char[cmdlen]; const char* sv_path = scrollview_path.c_str(); snprintf(cmd, cmdlen, cmd_template, sv_path, sv_path, sv_path, sv_path); std::string command(cmd); - delete [] cmd; + delete[] cmd; return command; } - // Platform-independent freeaddrinfo() static void FreeAddrInfo(struct addrinfo* addr_info) { - #if defined(__linux__) +#if defined(__linux__) freeaddrinfo(addr_info); - #else +#else delete addr_info->ai_addr; delete addr_info; - #endif +#endif } - // Non-linux version of getaddrinfo() #if !defined(__linux__) static int GetAddrInfoNonLinux(const char* hostname, int port, struct addrinfo** addr_info) { -// Get the host data depending on the OS. + // Get the host data depending on the OS. struct sockaddr_in* address; *addr_info = new struct addrinfo; memset(*addr_info, 0, sizeof(struct addrinfo)); address = new struct sockaddr_in; memset(address, 0, sizeof(struct sockaddr_in)); - (*addr_info)->ai_addr = (struct sockaddr*) address; + (*addr_info)->ai_addr = (struct sockaddr*)address; (*addr_info)->ai_addrlen = sizeof(struct sockaddr); (*addr_info)->ai_family = AF_INET; (*addr_info)->ai_socktype = SOCK_STREAM; - struct hostent *name; + struct hostent* name; #ifdef _WIN32 WSADATA wsaData; WSAStartup(MAKEWORD(1, 1), &wsaData); @@ -365,14 +370,13 @@ static int GetAddrInfoNonLinux(const char* hostname, int port, // Fill in the appropriate variables to be able to connect to the server. address->sin_family = name->h_addrtype; - memcpy((char *) &address->sin_addr.s_addr, - name->h_addr_list[0], name->h_length); + memcpy((char*)&address->sin_addr.s_addr, name->h_addr_list[0], + name->h_length); address->sin_port = htons(port); return 0; } #endif - // Platform independent version of getaddrinfo() // Given a hostname:port, produce an addrinfo struct static int GetAddrInfo(const char* hostname, int port, @@ -386,7 +390,6 @@ static int GetAddrInfo(const char* hostname, int port, #endif } - // Set up a connection to a ScrollView on hostname:port. SVNetwork::SVNetwork(const char* hostname, int port) { msg_buffer_in_ = new char[kMaxMsgSize + 1]; @@ -395,7 +398,7 @@ SVNetwork::SVNetwork(const char* hostname, int port) { has_content = false; buffer_ptr_ = nullptr; - struct addrinfo *addr_info = nullptr; + struct addrinfo* addr_info = nullptr; if (GetAddrInfo(hostname, port, &addr_info) != 0) { std::cerr << "Error resolving name for ScrollView host " @@ -419,7 +422,7 @@ SVNetwork::SVNetwork(const char* hostname, int port) { scrollview_path = "."; #endif } - const char *prog = ScrollViewProg(); + const char* prog = ScrollViewProg(); std::string command = ScrollViewCommand(scrollview_path); SVSync::StartProcess(prog, command.c_str()); diff --git a/src/viewer/svutil.h b/src/viewer/svutil.h index 570dc005ef..1d9558a30a 100644 --- a/src/viewer/svutil.h +++ b/src/viewer/svutil.h @@ -32,8 +32,8 @@ #define snprintf _snprintf #endif #else -#include "platform.h" #include +#include "platform.h" #endif #else #include @@ -43,18 +43,18 @@ #include #ifndef MAX -#define MAX(a, b) ((a > b) ? a : b) +#define MAX(a, b) ((a > b) ? a : b) #endif #ifndef MIN -#define MIN(a, b) ((a < b) ? a : b) +#define MIN(a, b) ((a < b) ? a : b) #endif /// The SVSync class provides functionality for Thread & Process Creation class SVSync { public: /// Create new thread. - static void StartThread(void *(*func)(void*), void* arg); + static void StartThread(void* (*func)(void*), void* arg); /// Signals a thread to exit. static void ExitThread(); /// Starts a new process. @@ -71,11 +71,12 @@ class SVSemaphore { void Signal(); /// Wait on a semaphore. void Wait(); + private: #ifdef _WIN32 HANDLE semaphore_; #elif defined(__APPLE__) - sem_t *semaphore_; + sem_t* semaphore_; #else sem_t semaphore_; #endif @@ -91,6 +92,7 @@ class SVMutex { void Lock(); /// Unlocks on a mutex. void Unlock(); + private: #ifdef _WIN32 HANDLE mutex_; diff --git a/src/vs2010/port/gettimeofday.cpp b/src/vs2010/port/gettimeofday.cpp index 664ea7304d..6f7791925e 100644 --- a/src/vs2010/port/gettimeofday.cpp +++ b/src/vs2010/port/gettimeofday.cpp @@ -17,13 +17,12 @@ // /////////////////////////////////////////////////////////////////////// -#include #include "gettimeofday.h" +#include -int gettimeofday(struct timeval *tp, struct timezone *tzp) { +int gettimeofday(struct timeval* tp, struct timezone* tzp) { l_int32 sec, usec; - if (tp == nullptr) - return -1; + if (tp == nullptr) return -1; l_getCurrentTime(&sec, &usec); tp->tv_sec = sec; diff --git a/src/vs2010/port/gettimeofday.h b/src/vs2010/port/gettimeofday.h index 621ecd3b99..20e83ab926 100644 --- a/src/vs2010/port/gettimeofday.h +++ b/src/vs2010/port/gettimeofday.h @@ -21,11 +21,11 @@ #define VS2008_PORT_GETTIMEOFDAY_H_ #ifdef _WIN32 -#include // timeval is defined in here. +#include // timeval is defined in here. #endif -typedef struct timezone tz; +typedef struct timezone tz; -int gettimeofday(struct timeval * tp, struct timezone * tzp); +int gettimeofday(struct timeval* tp, struct timezone* tzp); #endif // VS2008_PORT_GETTIMEOFDAY_H_ diff --git a/src/vs2010/tesseract/resource.h b/src/vs2010/tesseract/resource.h index 393a9e0071..2700913c8a 100644 --- a/src/vs2010/tesseract/resource.h +++ b/src/vs2010/tesseract/resource.h @@ -15,9 +15,9 @@ // #ifdef APSTUDIO_INVOKED #ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 101 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1001 -#define _APS_NEXT_SYMED_VALUE 101 +#define _APS_NEXT_RESOURCE_VALUE 101 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1001 +#define _APS_NEXT_SYMED_VALUE 101 #endif #endif diff --git a/src/wordrec/associate.cpp b/src/wordrec/associate.cpp index 255488bfa4..cadd3afa60 100644 --- a/src/wordrec/associate.cpp +++ b/src/wordrec/associate.cpp @@ -18,7 +18,6 @@ // /////////////////////////////////////////////////////////////////////// - #include #ifdef __UNIX__ #include @@ -35,13 +34,10 @@ const float AssociateUtils::kMaxFixedPitchCharAspectRatio = 2.0f; const float AssociateUtils::kMinGap = 0.03f; void AssociateUtils::ComputeStats(int col, int row, - const AssociateStats *parent_stats, - int parent_path_length, - bool fixed_pitch, - float max_char_wh_ratio, - WERD_RES *word_res, - bool debug, - AssociateStats *stats) { + const AssociateStats* parent_stats, + int parent_path_length, bool fixed_pitch, + float max_char_wh_ratio, WERD_RES* word_res, + bool debug, AssociateStats* stats) { stats->Clear(); ASSERT_HOST(word_res != nullptr); @@ -49,8 +45,8 @@ void AssociateUtils::ComputeStats(int col, int row, return; } if (debug) { - tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n", - col, row, fixed_pitch ? " (fixed pitch)" : ""); + tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n", col, row, + fixed_pitch ? " (fixed pitch)" : ""); } float normalizing_height = kBlnXHeight; ROW* blob_row = word_res->blob_row; @@ -63,7 +59,7 @@ void AssociateUtils::ComputeStats(int col, int row, normalizing_height = word_res->denorm.y_scale() * blob_row->body_size(); } else { normalizing_height = word_res->denorm.y_scale() * - (blob_row->x_height() + blob_row->ascenders()); + (blob_row->x_height() + blob_row->ascenders()); } if (debug) { tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n", @@ -84,8 +80,8 @@ void AssociateUtils::ComputeStats(int col, int row, } if (stats->gap_sum == 0) stats->gap_sum = negative_gap_sum; if (debug) { - tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n", - wh_ratio, max_char_wh_ratio, stats->gap_sum, + tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n", wh_ratio, + max_char_wh_ratio, stats->gap_sum, stats->bad_shape ? "bad_shape" : ""); } // Compute shape_cost (for fixed pitch mode). @@ -97,7 +93,7 @@ void AssociateUtils::ComputeStats(int col, int row, // no cutting through ink at the blob boundaries. if (col > 0) { float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height; - SEAM *left_seam = word_res->seam_array[col - 1]; + SEAM* left_seam = word_res->seam_array[col - 1]; if ((!end_row && left_gap < kMinGap) || left_seam->priority() > 0.0f) { stats->bad_shape = true; } @@ -109,7 +105,7 @@ void AssociateUtils::ComputeStats(int col, int row, float right_gap = 0.0f; if (!end_row) { right_gap = word_res->GetBlobsGap(row) / normalizing_height; - SEAM *right_seam = word_res->seam_array[row]; + SEAM* right_seam = word_res->seam_array[row]; if (right_gap < kMinGap || right_seam->priority() > 0.0f) { stats->bad_shape = true; if (right_gap < kMinGap) stats->bad_fixed_pitch_right_gap = true; @@ -128,11 +124,11 @@ void AssociateUtils::ComputeStats(int col, int row, stats->full_wh_ratio = wh_ratio + right_gap; if (parent_stats != nullptr) { stats->full_wh_ratio_total = - (parent_stats->full_wh_ratio_total + stats->full_wh_ratio); - float mean = - stats->full_wh_ratio_total / static_cast(parent_path_length+1); + (parent_stats->full_wh_ratio_total + stats->full_wh_ratio); + float mean = stats->full_wh_ratio_total / + static_cast(parent_path_length + 1); stats->full_wh_ratio_var = - parent_stats->full_wh_ratio_var + pow(mean-stats->full_wh_ratio, 2); + parent_stats->full_wh_ratio_var + pow(mean - stats->full_wh_ratio, 2); } else { stats->full_wh_ratio_total = stats->full_wh_ratio; } @@ -143,7 +139,7 @@ void AssociateUtils::ComputeStats(int col, int row, } stats->shape_cost = - FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio); + FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio); // For some reason Tesseract prefers to treat the whole CJ words // as one blob when the initial segmentation is particularly bad. @@ -156,8 +152,7 @@ void AssociateUtils::ComputeStats(int col, int row, } } -float AssociateUtils::FixedPitchWidthCost(float norm_width, - float right_gap, +float AssociateUtils::FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, float max_char_wh_ratio) { float cost = 0.0f; @@ -165,7 +160,7 @@ float AssociateUtils::FixedPitchWidthCost(float norm_width, if (norm_width > kMaxFixedPitchCharAspectRatio) cost += norm_width * norm_width; // extra penalty for merging CJK chars // Penalize skinny blobs, except for punctuation in the last position. - if (norm_width+right_gap < 0.5f && !end_pos) { + if (norm_width + right_gap < 0.5f && !end_pos) { cost += 1.0f - (norm_width + right_gap); } return cost; diff --git a/src/wordrec/associate.h b/src/wordrec/associate.h index 12f19c745c..594a20a452 100644 --- a/src/wordrec/associate.h +++ b/src/wordrec/associate.h @@ -59,7 +59,7 @@ struct AssociateStats { // the blob on the right bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-hight // ratio > kMaxFixedPitchCharAspectRatio - int gap_sum; // sum of gaps within the blob + int gap_sum; // sum of gaps within the blob }; // Utility functions for scoring segmentation paths according to their @@ -78,11 +78,11 @@ class AssociateUtils { // Rating *= rating_scale * Results->BlobLength // Certainty *= -(getDict().certainty_scale) static inline float ComputeOutlineLength(float rating_cert_scale, - const BLOB_CHOICE &b) { + const BLOB_CHOICE& b) { return rating_cert_scale * b.rating() / b.certainty(); } - static inline float ComputeRating(float rating_cert_scale, - float cert, int width) { + static inline float ComputeRating(float rating_cert_scale, float cert, + int width) { return static_cast(width) * cert / rating_cert_scale; } @@ -98,14 +98,10 @@ class AssociateUtils { // // Note: the function assumes that word_res, stats and // associate_cost pointers are not nullptr. - static void ComputeStats(int col, int row, - const AssociateStats *parent_stats, - int parent_path_length, - bool fixed_pitch, - float max_char_wh_ratio, - WERD_RES *word_res, - bool debug, - AssociateStats *stats); + static void ComputeStats(int col, int row, const AssociateStats* parent_stats, + int parent_path_length, bool fixed_pitch, + float max_char_wh_ratio, WERD_RES* word_res, + bool debug, AssociateStats* stats); // Returns the width cost for fixed-pitch text. static float FixedPitchWidthCost(float norm_width, float right_gap, diff --git a/src/wordrec/chop.cpp b/src/wordrec/chop.cpp index f7f6a8fbe4..5c74002293 100644 --- a/src/wordrec/chop.cpp +++ b/src/wordrec/chop.cpp @@ -28,10 +28,10 @@ ----------------------------------------------------------------------*/ #include "chop.h" -#include "outlines.h" #include "callcpp.h" -#include "plotedges.h" #include "const.h" +#include "outlines.h" +#include "plotedges.h" #include "wordrec.h" #include @@ -51,31 +51,30 @@ namespace tesseract { * Assign a priority to and edge point that might be used as part of a * split. The argument should be of type EDGEPT. */ -PRIORITY Wordrec::point_priority(EDGEPT *point) { +PRIORITY +Wordrec::point_priority(EDGEPT* point) { return (PRIORITY)angle_change(point->prev, point, point->next); } - /** * @name add_point_to_list * * Add an edge point to a POINT_GROUP containg a list of other points. */ -void Wordrec::add_point_to_list(PointHeap* point_heap, EDGEPT *point) { +void Wordrec::add_point_to_list(PointHeap* point_heap, EDGEPT* point) { if (point_heap->size() < MAX_NUM_POINTS - 2) { PointPair pair(point_priority(point), point); point_heap->Push(&pair); } #ifndef GRAPHICS_DISABLED - if (chop_debug > 2) - mark_outline(point); + if (chop_debug > 2) mark_outline(point); #endif } // Returns true if the edgept supplied as input is an inside angle. This // is determined by the angular change of the vectors from point to point. -bool Wordrec::is_inside_angle(EDGEPT *pt) { +bool Wordrec::is_inside_angle(EDGEPT* pt) { return angle_change(pt->prev, pt, pt->next) < chop_inside_angle; } @@ -85,7 +84,7 @@ bool Wordrec::is_inside_angle(EDGEPT *pt) { * Return the change in angle (degrees) of the line segments between * points one and two, and two and three. */ -int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { +int Wordrec::angle_change(EDGEPT* point1, EDGEPT* point2, EDGEPT* point3) { VECTOR vector1; VECTOR vector2; @@ -99,19 +98,15 @@ int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { vector2.y = point3->pos.y - point2->pos.y; /* Use cross product */ length = (float)sqrt((float)LENGTH(vector1) * LENGTH(vector2)); - if ((int) length == 0) - return (0); - angle = static_cast(floor(asin(CROSS (vector1, vector2) / - length) / PI * 180.0 + 0.5)); + if ((int)length == 0) return (0); + angle = static_cast( + floor(asin(CROSS(vector1, vector2) / length) / PI * 180.0 + 0.5)); /* Use dot product */ - if (SCALAR (vector1, vector2) < 0) - angle = 180 - angle; + if (SCALAR(vector1, vector2) < 0) angle = 180 - angle; /* Adjust angle */ - if (angle > 180) - angle -= 360; - if (angle <= -180) - angle += 360; + if (angle > 180) angle -= 360; + if (angle <= -180) angle += 360; return (angle); } @@ -121,37 +116,32 @@ int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { * Choose the edge point that is closest to the critical point. This * point may not be exactly vertical from the critical point. */ -EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point, - EDGEPT *vertical_point, - int *best_dist) { - EDGEPT *best_point = nullptr; +EDGEPT* Wordrec::pick_close_point(EDGEPT* critical_point, + EDGEPT* vertical_point, int* best_dist) { + EDGEPT* best_point = nullptr; int this_distance; int found_better; do { found_better = FALSE; - this_distance = edgept_dist (critical_point, vertical_point); + this_distance = edgept_dist(critical_point, vertical_point); if (this_distance <= *best_dist) { - - if (!(same_point (critical_point->pos, vertical_point->pos) || - same_point (critical_point->pos, vertical_point->next->pos) || - (best_point && same_point (best_point->pos, vertical_point->pos)) || - is_exterior_point (critical_point, vertical_point))) { + if (!(same_point(critical_point->pos, vertical_point->pos) || + same_point(critical_point->pos, vertical_point->next->pos) || + (best_point && same_point(best_point->pos, vertical_point->pos)) || + is_exterior_point(critical_point, vertical_point))) { *best_dist = this_distance; best_point = vertical_point; - if (chop_vertical_creep) - found_better = TRUE; + if (chop_vertical_creep) found_better = TRUE; } } vertical_point = vertical_point->next; - } - while (found_better == TRUE); + } while (found_better == TRUE); return (best_point); } - /** * @name prioritize_points * @@ -159,34 +149,32 @@ EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point, * each of these points assign a priority. Sort these points using a * heap structure so that they can be visited in order. */ -void Wordrec::prioritize_points(TESSLINE *outline, PointHeap* points) { - EDGEPT *this_point; - EDGEPT *local_min = nullptr; - EDGEPT *local_max = nullptr; +void Wordrec::prioritize_points(TESSLINE* outline, PointHeap* points) { + EDGEPT* this_point; + EDGEPT* local_min = nullptr; + EDGEPT* local_max = nullptr; this_point = outline->loop; local_min = this_point; local_max = this_point; do { if (this_point->vec.y < 0) { - /* Look for minima */ + /* Look for minima */ if (local_max != nullptr) new_max_point(local_max, points); - else if (is_inside_angle (this_point)) + else if (is_inside_angle(this_point)) add_point_to_list(points, this_point); local_max = nullptr; local_min = this_point->next; - } - else if (this_point->vec.y > 0) { - /* Look for maxima */ + } else if (this_point->vec.y > 0) { + /* Look for maxima */ if (local_min != nullptr) new_min_point(local_min, points); - else if (is_inside_angle (this_point)) + else if (is_inside_angle(this_point)) add_point_to_list(points, this_point); local_min = nullptr; local_max = this_point->next; - } - else { + } else { /* Flat area */ if (local_max != nullptr) { if (local_max->prev->vec.y != 0) { @@ -194,8 +182,7 @@ void Wordrec::prioritize_points(TESSLINE *outline, PointHeap* points) { } local_max = this_point->next; local_min = nullptr; - } - else { + } else { if (local_min->prev->vec.y != 0) { new_min_point(local_min, points); } @@ -204,13 +191,11 @@ void Wordrec::prioritize_points(TESSLINE *outline, PointHeap* points) { } } - /* Next point */ + /* Next point */ this_point = this_point->next; - } - while (this_point != outline->loop); + } while (this_point != outline->loop); } - /** * @name new_min_point * @@ -218,23 +203,22 @@ void Wordrec::prioritize_points(TESSLINE *outline, PointHeap* points) { * Return the new value for the local minimum. If a point is saved then * the local minimum is reset to nullptr. */ -void Wordrec::new_min_point(EDGEPT *local_min, PointHeap* points) { +void Wordrec::new_min_point(EDGEPT* local_min, PointHeap* points) { int16_t dir; - dir = direction (local_min); + dir = direction(local_min); if (dir < 0) { add_point_to_list(points, local_min); return; } - if (dir == 0 && point_priority (local_min) < 0) { + if (dir == 0 && point_priority(local_min) < 0) { add_point_to_list(points, local_min); return; } } - /** * @name new_max_point * @@ -242,23 +226,22 @@ void Wordrec::new_min_point(EDGEPT *local_min, PointHeap* points) { * Return the new value for the local minimum. If a point is saved then * the local minimum is reset to nullptr. */ -void Wordrec::new_max_point(EDGEPT *local_max, PointHeap* points) { +void Wordrec::new_max_point(EDGEPT* local_max, PointHeap* points) { int16_t dir; - dir = direction (local_max); + dir = direction(local_max); if (dir > 0) { add_point_to_list(points, local_max); return; } - if (dir == 0 && point_priority (local_max) < 0) { + if (dir == 0 && point_priority(local_max) < 0) { add_point_to_list(points, local_max); return; } } - /** * @name vertical_projection_point * @@ -271,17 +254,17 @@ void Wordrec::new_max_point(EDGEPT *local_max, PointHeap* points) { * a result, and any points that were newly created are also saved on * the new_points list. */ -void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, +void Wordrec::vertical_projection_point(EDGEPT* split_point, + EDGEPT* target_point, EDGEPT** best_point, - EDGEPT_CLIST *new_points) { - EDGEPT *p; /* Iterator */ - EDGEPT *this_edgept; /* Iterator */ + EDGEPT_CLIST* new_points) { + EDGEPT* p; /* Iterator */ + EDGEPT* this_edgept; /* Iterator */ EDGEPT_C_IT new_point_it(new_points); - int x = split_point->pos.x; /* X value of vertical */ - int best_dist = LARGE_DISTANCE;/* Best point found */ + int x = split_point->pos.x; /* X value of vertical */ + int best_dist = LARGE_DISTANCE; /* Best point found */ - if (*best_point != nullptr) - best_dist = edgept_dist(split_point, *best_point); + if (*best_point != nullptr) best_dist = edgept_dist(split_point, *best_point); p = target_point; /* Look at each edge point */ @@ -289,26 +272,21 @@ void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_poin if (((p->pos.x <= x && x <= p->next->pos.x) || (p->next->pos.x <= x && x <= p->pos.x)) && !same_point(split_point->pos, p->pos) && - !same_point(split_point->pos, p->next->pos) && - !p->IsChopPt() && + !same_point(split_point->pos, p->next->pos) && !p->IsChopPt() && (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) { - if (near_point(split_point, p, p->next, &this_edgept)) { new_point_it.add_before_then_move(this_edgept); } if (*best_point == nullptr) - best_dist = edgept_dist (split_point, this_edgept); + best_dist = edgept_dist(split_point, this_edgept); - this_edgept = - pick_close_point(split_point, this_edgept, &best_dist); - if (this_edgept) - *best_point = this_edgept; + this_edgept = pick_close_point(split_point, this_edgept, &best_dist); + if (this_edgept) *best_point = this_edgept; } p = p->next; - } - while (p != target_point); + } while (p != target_point); } } // namespace tesseract diff --git a/src/wordrec/chopper.cpp b/src/wordrec/chopper.cpp index 3d090f1788..0264f56ff7 100644 --- a/src/wordrec/chopper.cpp +++ b/src/wordrec/chopper.cpp @@ -38,8 +38,8 @@ #include "const.h" #include "findseam.h" #include "globals.h" -#include "render.h" #include "pageres.h" +#include "render.h" #include "seam.h" #include "stopper.h" #include "structures.h" @@ -63,67 +63,58 @@ static const int kMaxNumChunks = 64; * * Copy the list of outlines. */ -void preserve_outline(EDGEPT *start) { - EDGEPT *srcpt; +void preserve_outline(EDGEPT* start) { + EDGEPT* srcpt; - if (start == nullptr) - return; + if (start == nullptr) return; srcpt = start; do { srcpt->flags[1] = 1; srcpt = srcpt->next; - } - while (srcpt != start); + } while (srcpt != start); srcpt->flags[1] = 2; } - /**************************************************************************/ -void preserve_outline_tree(TESSLINE *srcline) { - TESSLINE *outline; +void preserve_outline_tree(TESSLINE* srcline) { + TESSLINE* outline; for (outline = srcline; outline != nullptr; outline = outline->next) { - preserve_outline (outline->loop); + preserve_outline(outline->loop); } } - /** * @name restore_outline_tree * * Copy the list of outlines. */ -EDGEPT *restore_outline(EDGEPT *start) { - EDGEPT *srcpt; - EDGEPT *real_start; +EDGEPT* restore_outline(EDGEPT* start) { + EDGEPT* srcpt; + EDGEPT* real_start; - if (start == nullptr) - return nullptr; + if (start == nullptr) return nullptr; srcpt = start; do { - if (srcpt->flags[1] == 2) - break; + if (srcpt->flags[1] == 2) break; srcpt = srcpt->next; - } - while (srcpt != start); + } while (srcpt != start); real_start = srcpt; do { srcpt = srcpt->next; if (srcpt->prev->flags[1] == 0) { remove_edgept(srcpt->prev); } - } - while (srcpt != real_start); + } while (srcpt != real_start); return real_start; } - /******************************************************************************/ -void restore_outline_tree(TESSLINE *srcline) { - TESSLINE *outline; +void restore_outline_tree(TESSLINE* srcline) { + TESSLINE* outline; for (outline = srcline; outline != nullptr; outline = outline->next) { - outline->loop = restore_outline (outline->loop); + outline->loop = restore_outline(outline->loop); outline->start = outline->loop->pos; } } @@ -133,9 +124,10 @@ void restore_outline_tree(TESSLINE *srcline) { static SEAM* CheckSeam(int debug_level, int32_t blob_number, TWERD* word, TBLOB* blob, TBLOB* other_blob, const GenericVector& seams, SEAM* seam) { - if (seam == nullptr || blob->outlines == nullptr || other_blob->outlines == nullptr || - total_containment(blob, other_blob) || check_blob(other_blob) || - !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) || + if (seam == nullptr || blob->outlines == nullptr || + other_blob->outlines == nullptr || total_containment(blob, other_blob) || + check_blob(other_blob) || !seam->ContainedByBlob(*blob) || + !seam->ContainedByBlob(*other_blob) || any_shared_split_points(seams, seam) || !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) { word->blobs.remove(blob_number + 1); @@ -145,8 +137,7 @@ static SEAM* CheckSeam(int debug_level, int32_t blob_number, TWERD* word, seam = nullptr; #ifndef GRAPHICS_DISABLED if (debug_level) { - if (debug_level >2) - display_blob(blob, Red); + if (debug_level > 2) display_blob(blob, Red); tprintf("\n** seam being removed ** \n"); } #endif @@ -158,7 +149,6 @@ static SEAM* CheckSeam(int debug_level, int32_t blob_number, TWERD* word, return seam; } - /** * @name attempt_blob_chop * @@ -166,24 +156,22 @@ static SEAM* CheckSeam(int debug_level, int32_t blob_number, TWERD* word, * it was successful. */ namespace tesseract { -SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, +SEAM* Wordrec::attempt_blob_chop(TWERD* word, TBLOB* blob, int32_t blob_number, bool italic_blob, const GenericVector& seams) { - if (repair_unchopped_blobs) - preserve_outline_tree (blob->outlines); - TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ + if (repair_unchopped_blobs) preserve_outline_tree(blob->outlines); + TBLOB* other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ // Insert it into the word. word->blobs.insert(other_blob, blob_number + 1); - SEAM *seam = nullptr; + SEAM* seam = nullptr; if (prioritize_division) { TPOINT location; if (divisible_blob(blob, italic_blob, &location)) { seam = new SEAM(0.0f, location); } } - if (seam == nullptr) - seam = pick_good_seam(blob); + if (seam == nullptr) seam = pick_good_seam(blob); if (chop_debug) { if (seam != nullptr) seam->Print("Good seam picked="); @@ -194,21 +182,20 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, seam->ApplySeam(italic_blob, blob, other_blob); } - seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, - seams, seam); + seam = + CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam); if (seam == nullptr) { - if (repair_unchopped_blobs) - restore_outline_tree(blob->outlines); + if (repair_unchopped_blobs) restore_outline_tree(blob->outlines); if (allow_blob_division && !prioritize_division) { // If the blob can simply be divided into outlines, then do that. TPOINT location; if (divisible_blob(blob, italic_blob, &location)) { - other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ + other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ word->blobs.insert(other_blob, blob_number + 1); seam = new SEAM(0.0f, location); seam->ApplySeam(italic_blob, blob, other_blob); - seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, - seams, seam); + seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, + seam); } } } @@ -219,21 +206,19 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, return seam; } - -SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number, +SEAM* Wordrec::chop_numbered_blob(TWERD* word, int32_t blob_number, bool italic_blob, const GenericVector& seams) { return attempt_blob_chop(word, word->blobs[blob_number], blob_number, italic_blob, seams); } - -SEAM *Wordrec::chop_overlapping_blob(const GenericVector& boxes, - bool italic_blob, WERD_RES *word_res, - int *blob_number) { - TWERD *word = word_res->chopped_word; +SEAM* Wordrec::chop_overlapping_blob(const GenericVector& boxes, + bool italic_blob, WERD_RES* word_res, + int* blob_number) { + TWERD* word = word_res->chopped_word; for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) { - TBLOB *blob = word->blobs[*blob_number]; + TBLOB* blob = word->blobs[*blob_number]; TPOINT topleft, botright; topleft.x = blob->bounding_box().left(); topleft.y = blob->bounding_box().top(); @@ -250,19 +235,16 @@ SEAM *Wordrec::chop_overlapping_blob(const GenericVector& boxes, bool almost_equal_box = false; int num_overlap = 0; for (int i = 0; i < boxes.size(); i++) { - if (original_box.overlap_fraction(boxes[i]) > 0.125) - num_overlap++; - if (original_box.almost_equal(boxes[i], 3)) - almost_equal_box = true; + if (original_box.overlap_fraction(boxes[i]) > 0.125) num_overlap++; + if (original_box.almost_equal(boxes[i], 3)) almost_equal_box = true; } TPOINT location; if (divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) { - SEAM *seam = attempt_blob_chop(word, blob, *blob_number, - italic_blob, word_res->seam_array); - if (seam != nullptr) - return seam; + SEAM* seam = attempt_blob_chop(word, blob, *blob_number, italic_blob, + word_res->seam_array); + if (seam != nullptr) return seam; } } @@ -272,13 +254,12 @@ SEAM *Wordrec::chop_overlapping_blob(const GenericVector& boxes, } // namespace tesseract - /** * @name any_shared_split_points * * Return true if any of the splits share a point with this one. */ -int any_shared_split_points(const GenericVector& seams, SEAM *seam) { +int any_shared_split_points(const GenericVector& seams, SEAM* seam) { int length; int index; @@ -288,31 +269,26 @@ int any_shared_split_points(const GenericVector& seams, SEAM *seam) { return FALSE; } - /** * @name check_blob * * @return true if blob has a non whole outline. */ -int check_blob(TBLOB *blob) { - TESSLINE *outline; - EDGEPT *edgept; +int check_blob(TBLOB* blob) { + TESSLINE* outline; + EDGEPT* edgept; for (outline = blob->outlines; outline != nullptr; outline = outline->next) { edgept = outline->loop; do { - if (edgept == nullptr) - break; + if (edgept == nullptr) break; edgept = edgept->next; - } - while (edgept != outline->loop); - if (edgept == nullptr) - return 1; + } while (edgept != outline->loop); + if (edgept == nullptr) return 1; } return 0; } - namespace tesseract { /** * @name improve_one_blob @@ -327,13 +303,11 @@ namespace tesseract { * can be used by ApplyBox as well as during recognition. */ SEAM* Wordrec::improve_one_blob(const GenericVector& blob_choices, - DANGERR *fixpt, - bool split_next_to_fragment, - bool italic_blob, - WERD_RES* word, + DANGERR* fixpt, bool split_next_to_fragment, + bool italic_blob, WERD_RES* word, int* blob_number) { float rating_ceiling = MAX_FLOAT32; - SEAM *seam = nullptr; + SEAM* seam = nullptr; do { *blob_number = select_blob_to_split_from_fixpt(fixpt); if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number); @@ -345,16 +319,13 @@ SEAM* Wordrec::improve_one_blob(const GenericVector& blob_choices, split_next_to_fragment); } if (chop_debug) tprintf("blob_number = %d\n", *blob_number); - if (*blob_number == -1) - return nullptr; + if (*blob_number == -1) return nullptr; // TODO(rays) it may eventually help to allow italic_blob to be true, seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob, word->seam_array); - if (seam != nullptr) - return seam; // Success! - if (blob_choices[*blob_number] == nullptr) - return nullptr; + if (seam != nullptr) return seam; // Success! + if (blob_choices[*blob_number] == nullptr) return nullptr; if (!split_point_from_dict) { // We chopped the worst rated blob, try something else next time. rating_ceiling = blob_choices[*blob_number]->rating(); @@ -372,8 +343,7 @@ SEAM* Wordrec::improve_one_blob(const GenericVector& blob_choices, */ SEAM* Wordrec::chop_one_blob(const GenericVector& boxes, const GenericVector& blob_choices, - WERD_RES* word_res, - int* blob_number) { + WERD_RES* word_res, int* blob_number) { if (prioritize_division) { return chop_overlapping_blob(boxes, true, word_res, blob_number); } else { @@ -390,7 +360,7 @@ SEAM* Wordrec::chop_one_blob(const GenericVector& boxes, * a good answer has been found or all the blobs have been chopped up * enough. The results are returned in the WERD_RES. */ -void Wordrec::chop_word_main(WERD_RES *word) { +void Wordrec::chop_word_main(WERD_RES* word) { int num_blobs = word->chopped_word->NumBlobs(); if (word->ratings == nullptr) { word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks); @@ -398,16 +368,17 @@ void Wordrec::chop_word_main(WERD_RES *word) { if (word->ratings->get(0, 0) == nullptr) { // Run initial classification. for (int b = 0; b < num_blobs; ++b) { - BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b, - "Initial:", word->chopped_word, - word->blamer_bundle); + BLOB_CHOICE_LIST* choices = + classify_piece(word->seam_array, b, b, "Initial:", word->chopped_word, + word->blamer_bundle); word->ratings->put(b, b, choices); } } else { // Blobs have been pre-classified. Set matrix cell for all blob choices for (int col = 0; col < word->ratings->dimension(); ++col) { for (int row = col; row < word->ratings->dimension() && - row < col + word->ratings->bandwidth(); ++row) { + row < col + word->ratings->bandwidth(); + ++row) { BLOB_CHOICE_LIST* choices = word->ratings->get(col, row); if (choices != nullptr) { BLOB_CHOICE_IT bc_it(choices); @@ -436,8 +407,8 @@ void Wordrec::chop_word_main(WERD_RES *word) { } if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) { - CallFillLattice(*word->ratings, word->best_choices, - *word->uch_set, word->blamer_bundle); + CallFillLattice(*word->ratings, word->best_choices, *word->uch_set, + word->blamer_bundle); } if (wordrec_debug_level > 0) { tprintf("Final Ratings Matrix:\n"); @@ -453,8 +424,7 @@ void Wordrec::chop_word_main(WERD_RES *word) { * the data, and incrementally runs the segmentation search until a good word * is found, or no more chops can be found. */ -void Wordrec::improve_by_chopping(float rating_cert_scale, - WERD_RES* word, +void Wordrec::improve_by_chopping(float rating_cert_scale, WERD_RES* word, BestChoiceBundle* best_choice_bundle, BlamerBundle* blamer_bundle, LMPainPoints* pain_points, @@ -508,8 +478,8 @@ void Wordrec::improve_by_chopping(float rating_cert_scale, blob_number = 0; } // Run language model incrementally. (Except with the n-gram model on.) - UpdateSegSearchNodes(rating_cert_scale, blob_number, pending, - word, pain_points, best_choice_bundle, blamer_bundle); + UpdateSegSearchNodes(rating_cert_scale, blob_number, pending, word, + pain_points, best_choice_bundle, blamer_bundle); } while (!language_model_->AcceptableChoiceFound() && word->ratings->dimension() < kMaxNumChunks); @@ -521,16 +491,14 @@ void Wordrec::improve_by_chopping(float rating_cert_scale, if (word->blamer_bundle != nullptr && word->blamer_bundle->incorrect_result_reason() == IRR_CORRECT && !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) { - bool valid_permuter = word->best_choice != nullptr && + bool valid_permuter = + word->best_choice != nullptr && Dict::valid_word_permuter(word->best_choice->permuter(), false); - word->blamer_bundle->BlameClassifierOrLangModel(word, - getDict().getUnicharset(), - valid_permuter, - wordrec_debug_blamer); + word->blamer_bundle->BlameClassifierOrLangModel( + word, getDict().getUnicharset(), valid_permuter, wordrec_debug_blamer); } } - /********************************************************************** * select_blob_to_split * @@ -538,15 +506,15 @@ void Wordrec::improve_by_chopping(float rating_cert_scale, * place to apply splits. If none, return -1. **********************************************************************/ int Wordrec::select_blob_to_split( - const GenericVector& blob_choices, - float rating_ceiling, bool split_next_to_fragment) { - BLOB_CHOICE *blob_choice; + const GenericVector& blob_choices, float rating_ceiling, + bool split_next_to_fragment) { + BLOB_CHOICE* blob_choice; int x; float worst = -MAX_FLOAT32; int worst_index = -1; float worst_near_fragment = -MAX_FLOAT32; int worst_index_near_fragment = -1; - const CHAR_FRAGMENT **fragments = nullptr; + const CHAR_FRAGMENT** fragments = nullptr; if (chop_debug) { if (rating_ceiling < MAX_FLOAT32) @@ -556,10 +524,10 @@ int Wordrec::select_blob_to_split( } if (split_next_to_fragment && blob_choices.size() > 0) { - fragments = new const CHAR_FRAGMENT *[blob_choices.length()]; + fragments = new const CHAR_FRAGMENT*[blob_choices.length()]; if (blob_choices[0] != nullptr) { - fragments[0] = getDict().getUnicharset().get_fragment( - blob_choices[0]->unichar_id()); + fragments[0] = + getDict().getUnicharset().get_fragment(blob_choices[0]->unichar_id()); } else { fragments[0] = nullptr; } @@ -572,7 +540,7 @@ int Wordrec::select_blob_to_split( } else { blob_choice = blob_choices[x]; // Populate fragments for the following position. - if (split_next_to_fragment && x+1 < blob_choices.size()) { + if (split_next_to_fragment && x + 1 < blob_choices.size()) { if (blob_choices[x + 1] != nullptr) { fragments[x + 1] = getDict().getUnicharset().get_fragment( blob_choices[x + 1]->unichar_id()); @@ -590,21 +558,22 @@ int Wordrec::select_blob_to_split( if (split_next_to_fragment) { // Update worst_near_fragment and worst_index_near_fragment. bool expand_following_fragment = - (x + 1 < blob_choices.size() && - fragments[x+1] != nullptr && !fragments[x+1]->is_beginning()); + (x + 1 < blob_choices.size() && fragments[x + 1] != nullptr && + !fragments[x + 1]->is_beginning()); bool expand_preceding_fragment = - (x > 0 && fragments[x-1] != nullptr && !fragments[x-1]->is_ending()); + (x > 0 && fragments[x - 1] != nullptr && + !fragments[x - 1]->is_ending()); if ((expand_following_fragment || expand_preceding_fragment) && blob_choice->rating() > worst_near_fragment) { worst_index_near_fragment = x; worst_near_fragment = blob_choice->rating(); if (chop_debug) { - tprintf("worst_index_near_fragment=%d" - " expand_following_fragment=%d" - " expand_preceding_fragment=%d\n", - worst_index_near_fragment, - expand_following_fragment, - expand_preceding_fragment); + tprintf( + "worst_index_near_fragment=%d" + " expand_following_fragment=%d" + " expand_preceding_fragment=%d\n", + worst_index_near_fragment, expand_following_fragment, + expand_preceding_fragment); } } } @@ -614,8 +583,8 @@ int Wordrec::select_blob_to_split( delete[] fragments; // TODO(daria): maybe a threshold of badness for // worst_near_fragment would be useful. - return worst_index_near_fragment != -1 ? - worst_index_near_fragment : worst_index; + return worst_index_near_fragment != -1 ? worst_index_near_fragment + : worst_index; } /********************************************************************** @@ -625,12 +594,10 @@ int Wordrec::select_blob_to_split( * dangerous blob that maps to multiple characters, return that blob * index as a place we need to split. If none, return -1. **********************************************************************/ -int Wordrec::select_blob_to_split_from_fixpt(DANGERR *fixpt) { - if (!fixpt) - return -1; +int Wordrec::select_blob_to_split_from_fixpt(DANGERR* fixpt) { + if (!fixpt) return -1; for (int i = 0; i < fixpt->size(); i++) { - if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end && - (*fixpt)[i].dangerous && + if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end && (*fixpt)[i].dangerous && (*fixpt)[i].correct_is_ngram) { return (*fixpt)[i].begin; } @@ -638,17 +605,15 @@ int Wordrec::select_blob_to_split_from_fixpt(DANGERR *fixpt) { return -1; } - } // namespace tesseract - /********************************************************************** * total_containment * * Check to see if one of these outlines is totally contained within * the bounding box of the other. **********************************************************************/ -int16_t total_containment(TBLOB *blob1, TBLOB *blob2) { +int16_t total_containment(TBLOB* blob1, TBLOB* blob2) { TBOX box1 = blob1->bounding_box(); TBOX box2 = blob2->bounding_box(); return box1.contains(box2) || box2.contains(box1); diff --git a/src/wordrec/chopper.h b/src/wordrec/chopper.h index ed657ce53c..323b6cf9cd 100644 --- a/src/wordrec/chopper.h +++ b/src/wordrec/chopper.h @@ -31,18 +31,17 @@ #include "seam.h" #include "stopper.h" +void preserve_outline(EDGEPT* start); -void preserve_outline(EDGEPT *start); +void preserve_outline_tree(TESSLINE* srcline); -void preserve_outline_tree(TESSLINE *srcline); +EDGEPT* restore_outline(EDGEPT* start); -EDGEPT *restore_outline(EDGEPT *start); +void restore_outline_tree(TESSLINE* srcline); -void restore_outline_tree(TESSLINE *srcline); +int any_shared_split_points(const GenericVector& seams, SEAM* seam); -int any_shared_split_points(const GenericVector& seams, SEAM *seam); +int check_blob(TBLOB* blob); -int check_blob(TBLOB *blob); - -int16_t total_containment(TBLOB *blob1, TBLOB *blob2); +int16_t total_containment(TBLOB* blob1, TBLOB* blob2); #endif diff --git a/src/wordrec/drawfx.cpp b/src/wordrec/drawfx.cpp index 32604668eb..7c2ac00793 100644 --- a/src/wordrec/drawfx.cpp +++ b/src/wordrec/drawfx.cpp @@ -21,35 +21,35 @@ #include "config_auto.h" #endif -#include "drawfx.h" -#include "normalis.h" -#include "werd.h" +#include "drawfx.h" +#include "normalis.h" +#include "werd.h" #ifndef GRAPHICS_DISABLED -#define FXDEMOWIN "FXDemo" -#define FXDEMOXPOS 250 -#define FXDEMOYPOS 0 -#define FXDEMOXSIZE 600 -#define FXDEMOYSIZE 256 -#define BLN_MAX 512 //max coord for bln -#define WERDWIDTH (BLN_MAX*20) -#define DECENT_WERD_WIDTH (5*kBlnXHeight) - //title of window -#define DEBUG_WIN_NAME "FXDebug" -#define DEBUG_XPOS 0 -#define DEBUG_YPOS 120 -#define DEBUG_XSIZE 80 -#define DEBUG_YSIZE 32 -#define YMAX 3508 -#define XMAX 2550 -#define MAXEDGELENGTH 1024 //max steps inoutline +#define FXDEMOWIN "FXDemo" +#define FXDEMOXPOS 250 +#define FXDEMOYPOS 0 +#define FXDEMOXSIZE 600 +#define FXDEMOYSIZE 256 +#define BLN_MAX 512 // max coord for bln +#define WERDWIDTH (BLN_MAX * 20) +#define DECENT_WERD_WIDTH (5 * kBlnXHeight) +// title of window +#define DEBUG_WIN_NAME "FXDebug" +#define DEBUG_XPOS 0 +#define DEBUG_YPOS 120 +#define DEBUG_XSIZE 80 +#define DEBUG_YSIZE 32 +#define YMAX 3508 +#define XMAX 2550 +#define MAXEDGELENGTH 1024 // max steps inoutline #define EXTERN -EXTERN STRING_VAR (fx_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); +EXTERN STRING_VAR(fx_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); EXTERN ScrollView* fx_win = nullptr; -EXTERN FILE *fx_debug = nullptr; +EXTERN FILE* fx_debug = nullptr; /********************************************************************** * create_fx_win @@ -57,22 +57,20 @@ EXTERN FILE *fx_debug = nullptr; * Create the fx window used to show the fit. **********************************************************************/ -void create_fx_win() { //make features win - fx_win = new ScrollView (FXDEMOWIN, - FXDEMOXPOS, FXDEMOYPOS, FXDEMOXSIZE, FXDEMOYSIZE, - WERDWIDTH*2, BLN_MAX*2, true); +void create_fx_win() { // make features win + fx_win = new ScrollView(FXDEMOWIN, FXDEMOXPOS, FXDEMOYPOS, FXDEMOXSIZE, + FXDEMOYSIZE, WERDWIDTH * 2, BLN_MAX * 2, true); } - /********************************************************************** * clear_fx_win * * Clear the fx window and draw on the base/mean lines. **********************************************************************/ -void clear_fx_win() { //make features win +void clear_fx_win() { // make features win fx_win->Clear(); - fx_win->Pen(64,64,64); + fx_win->Pen(64, 64, 64); fx_win->Line(-WERDWIDTH, kBlnBaselineOffset, WERDWIDTH, kBlnBaselineOffset); fx_win->Line(-WERDWIDTH, kBlnXHeight + kBlnBaselineOffset, WERDWIDTH, kBlnXHeight + kBlnBaselineOffset); @@ -86,5 +84,5 @@ void clear_fx_win() { //make features win * Create the fx window used to show the fit. **********************************************************************/ -void create_fxdebug_win() { //make gradients win +void create_fxdebug_win() { // make gradients win } diff --git a/src/wordrec/drawfx.h b/src/wordrec/drawfx.h index bf08c3b4e6..c604b9ec2f 100644 --- a/src/wordrec/drawfx.h +++ b/src/wordrec/drawfx.h @@ -17,16 +17,16 @@ * **********************************************************************/ -#ifndef DRAWFX_H -#define DRAWFX_H +#ifndef DRAWFX_H +#define DRAWFX_H -#include "params.h" -#include "scrollview.h" +#include "params.h" +#include "scrollview.h" -extern STRING_VAR_H (fx_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); +extern STRING_VAR_H(fx_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); extern ScrollView* fx_win; -extern FILE *fx_debug; -void create_fx_win(); //make features win -void clear_fx_win(); //make features win -void create_fxdebug_win(); //make gradients win +extern FILE* fx_debug; +void create_fx_win(); // make features win +void clear_fx_win(); // make features win +void create_fxdebug_win(); // make gradients win #endif diff --git a/src/wordrec/findseam.cpp b/src/wordrec/findseam.cpp index a86f8432aa..2fcaf98311 100644 --- a/src/wordrec/findseam.cpp +++ b/src/wordrec/findseam.cpp @@ -27,8 +27,8 @@ ----------------------------------------------------------------------*/ #include "findseam.h" #include "gradechop.h" -#include "plotedges.h" #include "outlines.h" +#include "plotedges.h" #include "seam.h" #include "wordrec.h" @@ -40,14 +40,14 @@ /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ -#define SPLIT_CLOSENESS 20/* Difference in x value */ - /* How many to keep */ -#define MAX_NUM_SEAMS 150 - /* How many to keep */ -#define MAX_OLD_SEAMS 150 -#define NO_FULL_PRIORITY -1/* Special marker for pri. */ - /* Evalute right away */ -#define BAD_PRIORITY 9999.0 +#define SPLIT_CLOSENESS 20 /* Difference in x value */ + /* How many to keep */ +#define MAX_NUM_SEAMS 150 +/* How many to keep */ +#define MAX_OLD_SEAMS 150 +#define NO_FULL_PRIORITY -1 /* Special marker for pri. */ + /* Evalute right away */ +#define BAD_PRIORITY 9999.0 /*---------------------------------------------------------------------- F u n c t i o n s @@ -60,7 +60,7 @@ namespace tesseract { * Adds the given new_seam to the seams priority queue, unless it is full * and the new seam is worse than the worst. **********************************************************************/ -void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, +void Wordrec::add_seam_to_queue(float new_priority, SEAM* new_seam, SeamQueue* seams) { if (new_seam == nullptr) return; if (chop_debug) { @@ -85,7 +85,6 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, seams->Push(&new_pair); } - /********************************************************************** * choose_best_seam * @@ -99,10 +98,10 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, * a split of nullptr, then no further splits can be supplied by the * caller. **********************************************************************/ -void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, - PRIORITY priority, SEAM **seam_result, - TBLOB *blob, SeamPile *seam_pile) { - SEAM *seam; +void Wordrec::choose_best_seam(SeamQueue* seam_queue, const SPLIT* split, + PRIORITY priority, SEAM** seam_result, + TBLOB* blob, SeamPile* seam_pile) { + SEAM* seam; char str[80]; float my_priority; /* Add seam of split */ @@ -115,8 +114,7 @@ void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, if (chop_debug > 1) seam->Print("Partial priority "); add_seam_to_queue(my_priority, seam, seam_queue); - if (my_priority > chop_good_split) - return; + if (my_priority > chop_good_split) return; } TBOX bbox = blob->bounding_box(); @@ -130,7 +128,7 @@ void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, chop_overlap_knob, chop_centered_maxwidth, chop_center_knob, chop_width_change_knob); if (chop_debug) { - sprintf (str, "Full my_priority %0.0f, ", my_priority); + sprintf(str, "Full my_priority %0.0f, ", my_priority); seam->Print(str); } @@ -151,7 +149,7 @@ void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, if (my_priority < chop_good_split) { delete seam; - return; /* Made good answer */ + return; /* Made good answer */ } if (seam) { @@ -176,15 +174,14 @@ void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, } } - my_priority = seam_queue->empty() ? NO_FULL_PRIORITY - : seam_queue->PeekTop().key(); + my_priority = + seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key(); if ((my_priority > chop_ok_split) || - (my_priority > chop_good_split && split)) + (my_priority > chop_good_split && split)) return; } } - /********************************************************************** * combine_seam * @@ -192,12 +189,12 @@ void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, * from this union should be added to the seam queue. The return value * tells whether or not any additional seams were added to the queue. **********************************************************************/ -void Wordrec::combine_seam(const SeamPile& seam_pile, - const SEAM* seam, SeamQueue* seam_queue) { +void Wordrec::combine_seam(const SeamPile& seam_pile, const SEAM* seam, + SeamQueue* seam_queue) { for (int x = 0; x < seam_pile.size(); ++x) { - const SEAM *this_one = seam_pile.get(x).data(); + const SEAM* this_one = seam_pile.get(x).data(); if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) { - SEAM *new_one = new SEAM(*seam); + SEAM* new_one = new SEAM(*seam); new_one->CombineWith(*this_one); if (chop_debug > 1) new_one->Print("Combo priority "); add_seam_to_queue(new_one->priority(), new_one, seam_queue); @@ -211,17 +208,16 @@ void Wordrec::combine_seam(const SeamPile& seam_pile, * Find and return a good seam that will split this blob into two pieces. * Work from the outlines provided. **********************************************************************/ -SEAM *Wordrec::pick_good_seam(TBLOB *blob) { +SEAM* Wordrec::pick_good_seam(TBLOB* blob) { SeamPile seam_pile(chop_seam_pile_size); - EDGEPT *points[MAX_NUM_POINTS]; + EDGEPT* points[MAX_NUM_POINTS]; EDGEPT_CLIST new_points; - SEAM *seam = nullptr; - TESSLINE *outline; + SEAM* seam = nullptr; + TESSLINE* outline; int16_t num_points = 0; #ifndef GRAPHICS_DISABLED - if (chop_debug > 2) - wordrec_display_splits.set_value(true); + if (chop_debug > 2) wordrec_display_splits.set_value(true); draw_blob_edges(blob); #endif @@ -239,11 +235,12 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) { SeamQueue seam_queue(MAX_NUM_SEAMS); try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob); - try_vertical_splits(points, num_points, &new_points, - &seam_queue, &seam_pile, &seam, blob); + try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, + &seam, blob); if (seam == nullptr) { - choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile); + choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, + &seam_pile); } else if (seam->priority() > chop_good_split) { choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile); @@ -251,7 +248,7 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) { EDGEPT_C_IT it(&new_points); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - EDGEPT *inserted_point = it.data(); + EDGEPT* inserted_point = it.data(); if (seam == nullptr || !seam->UsesPoint(inserted_point)) { for (outline = blob->outlines; outline; outline = outline->next) { if (outline->loop == inserted_point) { @@ -278,13 +275,11 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) { #endif } - if (chop_debug) - wordrec_display_splits.set_value(false); + if (chop_debug) wordrec_display_splits.set_value(false); return (seam); } - /********************************************************************** * try_point_pairs * @@ -292,12 +287,9 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) { * together. See if any of them are suitable for use. Use a seam * queue and seam pile that have already been initialized and used. **********************************************************************/ -void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS], - int16_t num_points, - SeamQueue* seam_queue, - SeamPile* seam_pile, - SEAM ** seam, - TBLOB * blob) { +void Wordrec::try_point_pairs(EDGEPT* points[MAX_NUM_POINTS], + int16_t num_points, SeamQueue* seam_queue, + SeamPile* seam_pile, SEAM** seam, TBLOB* blob) { int16_t x; int16_t y; PRIORITY priority; @@ -319,7 +311,6 @@ void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS], } } - /********************************************************************** * try_vertical_splits * @@ -330,23 +321,20 @@ void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS], * the blob while examining vertical splits and which may safely be * removed once a seam is chosen if they are not part of the seam. **********************************************************************/ -void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], - int16_t num_points, - EDGEPT_CLIST *new_points, - SeamQueue* seam_queue, - SeamPile* seam_pile, - SEAM ** seam, - TBLOB * blob) { - EDGEPT *vertical_point = nullptr; +void Wordrec::try_vertical_splits(EDGEPT* points[MAX_NUM_POINTS], + int16_t num_points, EDGEPT_CLIST* new_points, + SeamQueue* seam_queue, SeamPile* seam_pile, + SEAM** seam, TBLOB* blob) { + EDGEPT* vertical_point = nullptr; int16_t x; PRIORITY priority; - TESSLINE *outline; + TESSLINE* outline; for (x = 0; x < num_points; x++) { vertical_point = nullptr; for (outline = blob->outlines; outline; outline = outline->next) { - vertical_projection_point(points[x], outline->loop, - &vertical_point, new_points); + vertical_projection_point(points[x], outline->loop, &vertical_point, + new_points); } if (vertical_point && points[x] != vertical_point->next && @@ -360,4 +348,4 @@ void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], } } -} +} // namespace tesseract diff --git a/src/wordrec/findseam.h b/src/wordrec/findseam.h index 1e6f6defc4..2840b6a63b 100644 --- a/src/wordrec/findseam.h +++ b/src/wordrec/findseam.h @@ -29,10 +29,10 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "seam.h" +#include "chop.h" #include "genericheap.h" #include "kdpair.h" -#include "chop.h" +#include "seam.h" // The SeamPair elements own their SEAMs and delete them upon destruction. using SeamPair = tesseract::KDPtrPairInc; diff --git a/src/wordrec/gradechop.cpp b/src/wordrec/gradechop.cpp index 79a727d0a9..8cbe7b694f 100644 --- a/src/wordrec/gradechop.cpp +++ b/src/wordrec/gradechop.cpp @@ -26,11 +26,11 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "gradechop.h" -#include "wordrec.h" -#include "chop.h" -#include "ndminx.h" #include #include +#include "chop.h" +#include "ndminx.h" +#include "wordrec.h" /*---------------------------------------------------------------------- M a c r o s @@ -49,7 +49,8 @@ namespace tesseract { * 0 = "perfect" * 100 = "no way jay" **********************************************************************/ -PRIORITY Wordrec::grade_split_length(SPLIT *split) { +PRIORITY +Wordrec::grade_split_length(SPLIT* split) { PRIORITY grade; float split_length; @@ -59,12 +60,11 @@ PRIORITY Wordrec::grade_split_length(SPLIT *split) { if (split_length <= 0) grade = 0; else - grade = sqrt (split_length) * chop_split_dist_knob; + grade = sqrt(split_length) * chop_split_dist_knob; return (std::max(0.0f, grade)); } - /********************************************************************** * grade_sharpness * @@ -72,20 +72,20 @@ PRIORITY Wordrec::grade_split_length(SPLIT *split) { * 0 = "perfect" * 100 = "no way jay" **********************************************************************/ -PRIORITY Wordrec::grade_sharpness(SPLIT *split) { +PRIORITY +Wordrec::grade_sharpness(SPLIT* split) { PRIORITY grade; - grade = point_priority (split->point1) + point_priority (split->point2); + grade = point_priority(split->point1) + point_priority(split->point2); if (grade < -360.0) grade = 0; else grade += 360.0; - grade *= chop_sharpness_knob; /* Values 0 to -360 */ + grade *= chop_sharpness_knob; /* Values 0 to -360 */ return (grade); } - } // namespace tesseract diff --git a/src/wordrec/gradechop.h b/src/wordrec/gradechop.h index 01e5bf2641..d11527da82 100644 --- a/src/wordrec/gradechop.h +++ b/src/wordrec/gradechop.h @@ -29,8 +29,8 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "seam.h" #include "ndminx.h" +#include "seam.h" /*---------------------------------------------------------------------- M a c r o s @@ -43,8 +43,7 @@ * value of its goodness. **********************************************************************/ -#define partial_split_priority(split) \ -(grade_split_length (split) + \ - grade_sharpness (split)) \ +#define partial_split_priority(split) \ + (grade_split_length(split) + grade_sharpness(split)) #endif diff --git a/src/wordrec/language_model.cpp b/src/wordrec/language_model.cpp index b0ee4c3829..6e51f1798e 100644 --- a/src/wordrec/language_model.cpp +++ b/src/wordrec/language_model.cpp @@ -23,10 +23,10 @@ #include "language_model.h" #include "dawg.h" -#include "intproto.h" #include "helpers.h" -#include "lm_state.h" +#include "intproto.h" #include "lm_pain_points.h" +#include "lm_state.h" #include "matrix.h" #include "params.h" #include "params_training_featdef.h" @@ -34,15 +34,13 @@ namespace tesseract { #if (defined(_MSC_VER) && _MSC_VER < 1900) || defined(ANDROID) -static inline double log2(double n) { - return log(n) / log(2.0); -} +static inline double log2(double n) { return log(n) / log(2.0); } #endif // _MSC_VER const float LanguageModel::kMaxAvgNgramCost = 25.0f; -LanguageModel::LanguageModel(const UnicityTable *fontinfo_table, - Dict *dict) +LanguageModel::LanguageModel(const UnicityTable* fontinfo_table, + Dict* dict) : INT_MEMBER(language_model_debug_level, 0, "Language model debug level", dict->getCCUtil()->params()), BOOL_INIT_MEMBER(language_model_ngram_on, false, @@ -127,8 +125,8 @@ LanguageModel::LanguageModel(const UnicityTable *fontinfo_table, LanguageModel::~LanguageModel() { delete dawg_args_.updated_dawgs; } -void LanguageModel::InitForWord(const WERD_CHOICE *prev_word, - bool fixed_pitch, float max_char_wh_ratio, +void LanguageModel::InitForWord(const WERD_CHOICE* prev_word, bool fixed_pitch, + float max_char_wh_ratio, float rating_cert_scale) { fixed_pitch_ = fixed_pitch; max_char_wh_ratio_ = max_char_wh_ratio; @@ -151,8 +149,8 @@ void LanguageModel::InitForWord(const WERD_CHOICE *prev_word, } else { prev_word_str_ = " "; } - const char *str_ptr = prev_word_str_.string(); - const char *str_end = str_ptr + prev_word_str_.length(); + const char* str_ptr = prev_word_str_.string(); + const char* str_end = str_ptr + prev_word_str_.length(); int step; prev_word_unichar_step_len_ = 0; while (str_ptr != str_end && (step = UNICHAR::utf8_step(str_ptr))) { @@ -184,9 +182,11 @@ static void ScanParentsForCaseMix(const UNICHARSET& unicharset, // same BLOB_CHOICE, and that is what we will be using to decide // which to keep. ViterbiStateEntry_IT vit2(&parent_node->viterbi_state_entries); - for (vit2.mark_cycle_pt(); !vit2.cycled_list() && + for (vit2.mark_cycle_pt(); + !vit2.cycled_list() && vit2.data()->curr_b->unichar_id() != other_case; - vit2.forward()) {} + vit2.forward()) { + } if (!vit2.cycled_list()) { vse->competing_vse = vit2.data(); } @@ -244,31 +244,28 @@ static bool HasBetterCaseVariant(const UNICHARSET& unicharset, * lower case to continue an upper case initial, but it has to be detected * in the combiner so it knows which upper case letters are initial alphas. */ -bool LanguageModel::UpdateState( - bool just_classified, - int curr_col, int curr_row, - BLOB_CHOICE_LIST *curr_list, - LanguageModelState *parent_node, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle) { +bool LanguageModel::UpdateState(bool just_classified, int curr_col, + int curr_row, BLOB_CHOICE_LIST* curr_list, + LanguageModelState* parent_node, + LMPainPoints* pain_points, WERD_RES* word_res, + BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle) { if (language_model_debug_level > 0) { - tprintf("\nUpdateState: col=%d row=%d %s", - curr_col, curr_row, just_classified ? "just_classified" : ""); + tprintf("\nUpdateState: col=%d row=%d %s", curr_col, curr_row, + just_classified ? "just_classified" : ""); if (language_model_debug_level > 5) tprintf("(parent=%p)\n", parent_node); else tprintf("\n"); } // Initialize helper variables. - bool word_end = (curr_row+1 >= word_res->ratings->dimension()); + bool word_end = (curr_row + 1 >= word_res->ratings->dimension()); bool new_changed = false; float denom = (language_model_ngram_on) ? ComputeDenom(curr_list) : 1.0f; const UNICHARSET& unicharset = dict_->getUnicharset(); - BLOB_CHOICE *first_lower = nullptr; - BLOB_CHOICE *first_upper = nullptr; - BLOB_CHOICE *first_digit = nullptr; + BLOB_CHOICE* first_lower = nullptr; + BLOB_CHOICE* first_upper = nullptr; + BLOB_CHOICE* first_digit = nullptr; bool has_alnum_mix = false; if (parent_node != nullptr) { int result = SetTopParentLowerUpperDigit(parent_node); @@ -277,17 +274,17 @@ bool LanguageModel::UpdateState( tprintf("No parents found to process\n"); return false; } - if (result > 0) - has_alnum_mix = true; + if (result > 0) has_alnum_mix = true; } if (!GetTopLowerUpperDigit(curr_list, &first_lower, &first_upper, &first_digit)) - has_alnum_mix = false;; + has_alnum_mix = false; + ; ScanParentsForCaseMix(unicharset, parent_node); if (language_model_debug_level > 3 && parent_node != nullptr) { parent_node->Print("Parent viterbi list"); } - LanguageModelState *curr_state = best_choice_bundle->beam[curr_row]; + LanguageModelState* curr_state = best_choice_bundle->beam[curr_row]; // Call AddViterbiStateEntry() for each parent+child ViterbiStateEntry. ViterbiStateEntry_IT vit; @@ -297,7 +294,7 @@ bool LanguageModel::UpdateState( // TODO(antonova): make sure commenting this out if ok for ngram // model scoring (I think this was introduced to fix ngram model quirks). // Skip nullptr unichars unless it is the only choice. - //if (!curr_list->singleton() && c_it.data()->unichar_id() == 0) continue; + // if (!curr_list->singleton() && c_it.data()->unichar_id() == 0) continue; UNICHAR_ID unichar_id = choice->unichar_id(); if (unicharset.get_fragment(unichar_id)) { continue; // Skip fragments. @@ -322,15 +319,14 @@ bool LanguageModel::UpdateState( // increases the chances of choosing IPoc simply because it doesn't // include such a transition. iPoc will beat iPOC and ipoc because // the other words are baseline/x-height inconsistent. - if (HasBetterCaseVariant(unicharset, choice, curr_list)) - continue; + if (HasBetterCaseVariant(unicharset, choice, curr_list)) continue; // Upper counts as lower at the beginning of a word. if (blob_choice_flags & kUpperCaseFlag) blob_choice_flags |= kLowerCaseFlag; new_changed |= AddViterbiStateEntry( - blob_choice_flags, denom, word_end, curr_col, curr_row, - choice, curr_state, nullptr, pain_points, - word_res, best_choice_bundle, blamer_bundle); + blob_choice_flags, denom, word_end, curr_col, curr_row, choice, + curr_state, nullptr, pain_points, word_res, best_choice_bundle, + blamer_bundle); } else { // Get viterbi entries from each parent ViterbiStateEntry. vit.set_to_list(&parent_node->viterbi_state_entries); @@ -338,10 +334,10 @@ bool LanguageModel::UpdateState( vit.mark_cycle_pt(); ViterbiStateEntry* parent_vse = nullptr; LanguageModelFlagsType top_choice_flags; - while ((parent_vse = GetNextParentVSE(just_classified, has_alnum_mix, - c_it.data(), blob_choice_flags, - unicharset, word_res, &vit, - &top_choice_flags)) != nullptr) { + while ((parent_vse = + GetNextParentVSE(just_classified, has_alnum_mix, c_it.data(), + blob_choice_flags, unicharset, word_res, + &vit, &top_choice_flags)) != nullptr) { // Skip pruned entries and do not look at prunable entries if already // examined language_model_viterbi_list_max_num_prunable of those. if (PrunablePath(*parent_vse) && @@ -359,9 +355,9 @@ bool LanguageModel::UpdateState( // Create a new ViterbiStateEntry if BLOB_CHOICE in c_it.data() // looks good according to the Dawgs or character ngram model. new_changed |= AddViterbiStateEntry( - top_choice_flags, denom, word_end, curr_col, curr_row, - c_it.data(), curr_state, parent_vse, pain_points, - word_res, best_choice_bundle, blamer_bundle); + top_choice_flags, denom, word_end, curr_col, curr_row, c_it.data(), + curr_state, parent_vse, pain_points, word_res, best_choice_bundle, + blamer_bundle); } } } @@ -374,13 +370,13 @@ bool LanguageModel::UpdateState( * Uses the first character in the list in place of empty results. * Returns true if both alpha and digits are found. */ -bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, - BLOB_CHOICE **first_lower, - BLOB_CHOICE **first_upper, - BLOB_CHOICE **first_digit) const { +bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST* curr_list, + BLOB_CHOICE** first_lower, + BLOB_CHOICE** first_upper, + BLOB_CHOICE** first_digit) const { BLOB_CHOICE_IT c_it(curr_list); - const UNICHARSET &unicharset = dict_->getUnicharset(); - BLOB_CHOICE *first_unichar = nullptr; + const UNICHARSET& unicharset = dict_->getUnicharset(); + BLOB_CHOICE* first_unichar = nullptr; for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { UNICHAR_ID unichar_id = c_it.data()->unichar_id(); if (unicharset.get_fragment(unichar_id)) continue; // skip fragments @@ -398,7 +394,7 @@ bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, } ASSERT_HOST(first_unichar != nullptr); bool mixed = (*first_lower != nullptr || *first_upper != nullptr) && - *first_digit != nullptr; + *first_digit != nullptr; if (*first_lower == nullptr) *first_lower = first_unichar; if (*first_upper == nullptr) *first_upper = first_unichar; if (*first_digit == nullptr) *first_digit = first_unichar; @@ -415,7 +411,7 @@ bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, * parents are found at all (a legitimate case), and 0 otherwise. */ int LanguageModel::SetTopParentLowerUpperDigit( - LanguageModelState *parent_node) const { + LanguageModelState* parent_node) const { if (parent_node == nullptr) return -1; UNICHAR_ID top_id = INVALID_UNICHAR_ID; ViterbiStateEntry* top_lower = nullptr; @@ -426,7 +422,7 @@ int LanguageModel::SetTopParentLowerUpperDigit( float upper_rating = 0.0f; float digit_rating = 0.0f; float top_rating = 0.0f; - const UNICHARSET &unicharset = dict_->getUnicharset(); + const UNICHARSET& unicharset = dict_->getUnicharset(); ViterbiStateEntry_IT vit(&parent_node->viterbi_state_entries); for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { ViterbiStateEntry* vse = vit.data(); @@ -466,8 +462,8 @@ int LanguageModel::SetTopParentLowerUpperDigit( } } if (top_choice == nullptr) return -1; - bool mixed = (top_lower != nullptr || top_upper != nullptr) && - top_digit != nullptr; + bool mixed = + (top_lower != nullptr || top_upper != nullptr) && top_digit != nullptr; if (top_lower == nullptr) top_lower = top_choice; top_lower->top_choice_flags |= kLowerCaseFlag; if (top_upper == nullptr) top_upper = top_choice; @@ -477,7 +473,7 @@ int LanguageModel::SetTopParentLowerUpperDigit( top_choice->top_choice_flags |= kSmallestRatingFlag; if (top_id != INVALID_UNICHAR_ID && dict_->compound_marker(top_id) && (top_choice->top_choice_flags & - (kLowerCaseFlag | kUpperCaseFlag | kDigitFlag))) { + (kLowerCaseFlag | kUpperCaseFlag | kDigitFlag))) { // If the compound marker top choice carries any of the top alnum flags, // then give it all of them, allowing words like I-295 to be chosen. top_choice->top_choice_flags |= @@ -501,8 +497,7 @@ ViterbiStateEntry* LanguageModel::GetNextParentVSE( // Only consider the parent if it has been updated or // if the current ratings cell has just been classified. if (!just_classified && !parent_vse->updated) continue; - if (language_model_debug_level > 2) - parent_vse->Print("Considering"); + if (language_model_debug_level > 2) parent_vse->Print("Considering"); // If the parent is non-alnum, then upper counts as lower. *top_choice_flags = blob_choice_flags; if ((blob_choice_flags & kUpperCaseFlag) && @@ -542,7 +537,7 @@ ViterbiStateEntry* LanguageModel::GetNextParentVSE( if (bc->PosAndSizeAgree(*competing_b, word_res->x_height, language_model_debug_level >= 5) && !bc->PosAndSizeAgree(*parent_b, word_res->x_height, - language_model_debug_level >= 5)) + language_model_debug_level >= 5)) continue; // Competing blobchoice has a better vertical match. } } @@ -553,32 +548,26 @@ ViterbiStateEntry* LanguageModel::GetNextParentVSE( } bool LanguageModel::AddViterbiStateEntry( - LanguageModelFlagsType top_choice_flags, - float denom, - bool word_end, - int curr_col, int curr_row, - BLOB_CHOICE *b, - LanguageModelState *curr_state, - ViterbiStateEntry *parent_vse, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle) { + LanguageModelFlagsType top_choice_flags, float denom, bool word_end, + int curr_col, int curr_row, BLOB_CHOICE* b, LanguageModelState* curr_state, + ViterbiStateEntry* parent_vse, LMPainPoints* pain_points, + WERD_RES* word_res, BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle) { ViterbiStateEntry_IT vit; if (language_model_debug_level > 1) { - tprintf("AddViterbiStateEntry for unichar %s rating=%.4f" - " certainty=%.4f top_choice_flags=0x%x", - dict_->getUnicharset().id_to_unichar(b->unichar_id()), - b->rating(), b->certainty(), top_choice_flags); + tprintf( + "AddViterbiStateEntry for unichar %s rating=%.4f" + " certainty=%.4f top_choice_flags=0x%x", + dict_->getUnicharset().id_to_unichar(b->unichar_id()), b->rating(), + b->certainty(), top_choice_flags); if (language_model_debug_level > 5) tprintf(" parent_vse=%p\n", parent_vse); else tprintf("\n"); } // Check whether the list is full. - if (curr_state != nullptr && - curr_state->viterbi_state_entries_length >= - language_model_viterbi_list_max_size) { + if (curr_state != nullptr && curr_state->viterbi_state_entries_length >= + language_model_viterbi_list_max_size) { if (language_model_debug_level > 1) { tprintf("AddViterbiStateEntry: viterbi list is full!\n"); } @@ -586,21 +575,21 @@ bool LanguageModel::AddViterbiStateEntry( } // Invoke Dawg language model component. - LanguageModelDawgInfo *dawg_info = - GenerateDawgInfo(word_end, curr_col, curr_row, *b, parent_vse); + LanguageModelDawgInfo* dawg_info = + GenerateDawgInfo(word_end, curr_col, curr_row, *b, parent_vse); float outline_length = AssociateUtils::ComputeOutlineLength(rating_cert_scale_, *b); // Invoke Ngram language model component. - LanguageModelNgramInfo *ngram_info = nullptr; + LanguageModelNgramInfo* ngram_info = nullptr; if (language_model_ngram_on) { ngram_info = GenerateNgramInfo( dict_->getUnicharset().id_to_unichar(b->unichar_id()), b->certainty(), denom, curr_col, curr_row, outline_length, parent_vse); ASSERT_HOST(ngram_info != nullptr); } - bool liked_by_language_model = dawg_info != nullptr || - (ngram_info != nullptr && !ngram_info->pruned); + bool liked_by_language_model = + dawg_info != nullptr || (ngram_info != nullptr && !ngram_info->pruned); // Quick escape if not liked by the language model, can't be consistent // xheight, and not top choice. if (!liked_by_language_model && top_choice_flags == 0) { @@ -614,7 +603,7 @@ bool LanguageModel::AddViterbiStateEntry( // Check consistency of the path and set the relevant consistency_info. LMConsistencyInfo consistency_info( - parent_vse != nullptr ? &parent_vse->consistency_info : nullptr); + parent_vse != nullptr ? &parent_vse->consistency_info : nullptr); // Start with just the x-height consistency, as it provides significant // pruning opportunity. consistency_info.ComputeXheightConsistency( @@ -636,27 +625,28 @@ bool LanguageModel::AddViterbiStateEntry( } // Compute the rest of the consistency info. - FillConsistencyInfo(curr_col, word_end, b, parent_vse, - word_res, &consistency_info); + FillConsistencyInfo(curr_col, word_end, b, parent_vse, word_res, + &consistency_info); if (dawg_info != nullptr && consistency_info.invalid_punc) { consistency_info.invalid_punc = false; // do not penalize dict words } // Compute cost of associating the blobs that represent the current unichar. AssociateStats associate_stats; - ComputeAssociateStats(curr_col, curr_row, max_char_wh_ratio_, - parent_vse, word_res, &associate_stats); + ComputeAssociateStats(curr_col, curr_row, max_char_wh_ratio_, parent_vse, + word_res, &associate_stats); if (parent_vse != nullptr) { associate_stats.shape_cost += parent_vse->associate_stats.shape_cost; associate_stats.bad_shape |= parent_vse->associate_stats.bad_shape; } // Create the new ViterbiStateEntry compute the adjusted cost of the path. - ViterbiStateEntry *new_vse = new ViterbiStateEntry( - parent_vse, b, 0.0, outline_length, - consistency_info, associate_stats, top_choice_flags, dawg_info, - ngram_info, (language_model_debug_level > 0) ? - dict_->getUnicharset().id_to_unichar(b->unichar_id()) : nullptr); + ViterbiStateEntry* new_vse = new ViterbiStateEntry( + parent_vse, b, 0.0, outline_length, consistency_info, associate_stats, + top_choice_flags, dawg_info, ngram_info, + (language_model_debug_level > 0) + ? dict_->getUnicharset().id_to_unichar(b->unichar_id()) + : nullptr); new_vse->cost = ComputeAdjustedPathCost(new_vse); if (language_model_debug_level >= 3) tprintf("Adjusted cost = %g\n", new_vse->cost); @@ -699,8 +689,8 @@ bool LanguageModel::AddViterbiStateEntry( // Update best choice if needed. if (word_end) { - UpdateBestChoice(new_vse, pain_points, word_res, - best_choice_bundle, blamer_bundle); + UpdateBestChoice(new_vse, pain_points, word_res, best_choice_bundle, + blamer_bundle); // Discard the entry if UpdateBestChoice() found flaws in it. if (new_vse->cost >= WERD_CHOICE::kBadRating && new_vse != best_choice_bundle->best_vse) { @@ -729,7 +719,7 @@ bool LanguageModel::AddViterbiStateEntry( int prunable_counter = language_model_viterbi_list_max_num_prunable; vit.set_to_list(&(curr_state->viterbi_state_entries)); for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { - ViterbiStateEntry *curr_vse = vit.data(); + ViterbiStateEntry* curr_vse = vit.data(); // Clear the appropriate top choice flags of the entries in the // list that have cost higher thank new_entry->cost // (since they will not be top choices any more). @@ -760,12 +750,13 @@ bool LanguageModel::AddViterbiStateEntry( return true; } -void LanguageModel::GenerateTopChoiceInfo(ViterbiStateEntry *new_vse, - const ViterbiStateEntry *parent_vse, - LanguageModelState *lms) { +void LanguageModel::GenerateTopChoiceInfo(ViterbiStateEntry* new_vse, + const ViterbiStateEntry* parent_vse, + LanguageModelState* lms) { ViterbiStateEntry_IT vit(&(lms->viterbi_state_entries)); for (vit.mark_cycle_pt(); !vit.cycled_list() && new_vse->top_choice_flags && - new_vse->cost >= vit.data()->cost; vit.forward()) { + new_vse->cost >= vit.data()->cost; + vit.forward()) { // Clear the appropriate flags if the list already contains // a top choice entry with a lower cost. new_vse->top_choice_flags &= ~(vit.data()->top_choice_flags); @@ -776,18 +767,17 @@ void LanguageModel::GenerateTopChoiceInfo(ViterbiStateEntry *new_vse, } } -LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( - bool word_end, - int curr_col, int curr_row, - const BLOB_CHOICE &b, - const ViterbiStateEntry *parent_vse) { +LanguageModelDawgInfo* LanguageModel::GenerateDawgInfo( + bool word_end, int curr_col, int curr_row, const BLOB_CHOICE& b, + const ViterbiStateEntry* parent_vse) { // Initialize active_dawgs from parent_vse if it is not nullptr. // Otherwise use very_beginning_active_dawgs_. if (parent_vse == nullptr) { dawg_args_.active_dawgs = &very_beginning_active_dawgs_; dawg_args_.permuter = NO_PERM; } else { - if (parent_vse->dawg_info == nullptr) return nullptr; // not a dict word path + if (parent_vse->dawg_info == nullptr) + return nullptr; // not a dict word path dawg_args_.active_dawgs = &parent_vse->dawg_info->active_dawgs; dawg_args_.permuter = parent_vse->dawg_info->permuter; } @@ -800,7 +790,8 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( // Deal with compound words. if (dict_->compound_marker(b.unichar_id()) && - (parent_vse == nullptr || parent_vse->dawg_info->permuter != NUMBER_PERM)) { + (parent_vse == nullptr || + parent_vse->dawg_info->permuter != NUMBER_PERM)) { if (language_model_debug_level > 0) tprintf("Found compound marker\n"); // Do not allow compound operators at the beginning and end of the word. // Do not allow more than one compound operator per word. @@ -815,10 +806,11 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( // Check a that the path terminated before the current character is a word. bool has_word_ending = false; for (i = 0; i < parent_vse->dawg_info->active_dawgs.size(); ++i) { - const DawgPosition &pos = parent_vse->dawg_info->active_dawgs[i]; - const Dawg *pdawg = pos.dawg_index < 0 - ? nullptr : dict_->GetDawg(pos.dawg_index); - if (pdawg == nullptr || pos.back_to_punc) continue;; + const DawgPosition& pos = parent_vse->dawg_info->active_dawgs[i]; + const Dawg* pdawg = + pos.dawg_index < 0 ? nullptr : dict_->GetDawg(pos.dawg_index); + if (pdawg == nullptr || pos.back_to_punc) continue; + ; if (pdawg->type() == DAWG_TYPE_WORD && pos.dawg_ref != NO_EDGE && pdawg->end_of_word(pos.dawg_ref)) { has_word_ending = true; @@ -831,7 +823,7 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( return new LanguageModelDawgInfo(&beginning_active_dawgs_, COMPOUND_PERM); } // done dealing with compound words - LanguageModelDawgInfo *dawg_info = nullptr; + LanguageModelDawgInfo* dawg_info = nullptr; // Call LetterIsOkay(). // Use the normalized IDs so that all shapes of ' can be allowed in words @@ -841,8 +833,8 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( DawgPositionVector tmp_active_dawgs; for (int i = 0; i < normed_ids.size(); ++i) { if (language_model_debug_level > 2) - tprintf("Test Letter OK for unichar %d, normed %d\n", - b.unichar_id(), normed_ids[i]); + tprintf("Test Letter OK for unichar %d, normed %d\n", b.unichar_id(), + normed_ids[i]); dict_->LetterIsOkay(&dawg_args_, normed_ids[i], word_end && i == normed_ids.size() - 1); if (dawg_args_.permuter == NO_PERM) { @@ -852,8 +844,8 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( dawg_args_.active_dawgs = &tmp_active_dawgs; } if (language_model_debug_level > 2) - tprintf("Letter was OK for unichar %d, normed %d\n", - b.unichar_id(), normed_ids[i]); + tprintf("Letter was OK for unichar %d, normed %d\n", b.unichar_id(), + normed_ids[i]); } dawg_args_.active_dawgs = nullptr; if (dawg_args_.permuter != NO_PERM) { @@ -867,12 +859,11 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( return dawg_info; } -LanguageModelNgramInfo *LanguageModel::GenerateNgramInfo( - const char *unichar, float certainty, float denom, - int curr_col, int curr_row, float outline_length, - const ViterbiStateEntry *parent_vse) { +LanguageModelNgramInfo* LanguageModel::GenerateNgramInfo( + const char* unichar, float certainty, float denom, int curr_col, + int curr_row, float outline_length, const ViterbiStateEntry* parent_vse) { // Initialize parent context. - const char *pcontext_ptr = ""; + const char* pcontext_ptr = ""; int pcontext_unichar_step_len = 0; if (parent_vse == nullptr) { pcontext_ptr = prev_word_str_.string(); @@ -880,16 +871,15 @@ LanguageModelNgramInfo *LanguageModel::GenerateNgramInfo( } else { pcontext_ptr = parent_vse->ngram_info->context.string(); pcontext_unichar_step_len = - parent_vse->ngram_info->context_unichar_step_len; + parent_vse->ngram_info->context_unichar_step_len; } // Compute p(unichar | parent context). int unichar_step_len = 0; bool pruned = false; float ngram_cost; float ngram_and_classifier_cost = - ComputeNgramCost(unichar, certainty, denom, - pcontext_ptr, &unichar_step_len, - &pruned, &ngram_cost); + ComputeNgramCost(unichar, certainty, denom, pcontext_ptr, + &unichar_step_len, &pruned, &ngram_cost); // Normalize just the ngram_and_classifier_cost by outline_length. // The ngram_cost is used by the params_model, so it needs to be left as-is, // and the params model cost will be normalized by outline_length. @@ -915,27 +905,25 @@ LanguageModelNgramInfo *LanguageModel::GenerateNgramInfo( if (parent_vse != nullptr && parent_vse->ngram_info->pruned) pruned = true; // Construct and return the new LanguageModelNgramInfo. - LanguageModelNgramInfo *ngram_info = new LanguageModelNgramInfo( - pcontext_ptr, pcontext_unichar_step_len, pruned, ngram_cost, - ngram_and_classifier_cost); + LanguageModelNgramInfo* ngram_info = + new LanguageModelNgramInfo(pcontext_ptr, pcontext_unichar_step_len, + pruned, ngram_cost, ngram_and_classifier_cost); ngram_info->context += unichar; ngram_info->context_unichar_step_len += unichar_step_len; assert(ngram_info->context_unichar_step_len <= language_model_ngram_order); return ngram_info; } -float LanguageModel::ComputeNgramCost(const char *unichar, - float certainty, - float denom, - const char *context, - int *unichar_step_len, - bool *found_small_prob, - float *ngram_cost) { - const char *context_ptr = context; - char *modified_context = nullptr; - char *modified_context_end = nullptr; - const char *unichar_ptr = unichar; - const char *unichar_end = unichar_ptr + strlen(unichar_ptr); +float LanguageModel::ComputeNgramCost(const char* unichar, float certainty, + float denom, const char* context, + int* unichar_step_len, + bool* found_small_prob, + float* ngram_cost) { + const char* context_ptr = context; + char* modified_context = nullptr; + char* modified_context_end = nullptr; + const char* unichar_ptr = unichar; + const char* unichar_end = unichar_ptr + strlen(unichar_ptr); float prob = 0.0f; int step = 0; while (unichar_ptr < unichar_end && @@ -955,7 +943,7 @@ float LanguageModel::ComputeNgramCost(const char *unichar, if (modified_context == nullptr) { size_t context_len = strlen(context); modified_context = - new char[context_len + strlen(unichar_ptr) + step + 1]; + new char[context_len + strlen(unichar_ptr) + step + 1]; memcpy(modified_context, context, context_len); modified_context_end = modified_context + context_len; context_ptr = modified_context; @@ -971,20 +959,20 @@ float LanguageModel::ComputeNgramCost(const char *unichar, *found_small_prob = true; prob = language_model_ngram_small_prob; } - *ngram_cost = -1.0*log2(prob); + *ngram_cost = -1.0 * log2(prob); float ngram_and_classifier_cost = - -1.0*log2(CertaintyScore(certainty)/denom) + + -1.0 * log2(CertaintyScore(certainty) / denom) + *ngram_cost * language_model_ngram_scale_factor; if (language_model_debug_level > 1) { tprintf("-log [ p(%s) * p(%s | %s) ] = -log2(%g*%g) = %g\n", unichar, - unichar, context_ptr, CertaintyScore(certainty)/denom, prob, + unichar, context_ptr, CertaintyScore(certainty) / denom, prob, ngram_and_classifier_cost); } delete[] modified_context; return ngram_and_classifier_cost; } -float LanguageModel::ComputeDenom(BLOB_CHOICE_LIST *curr_list) { +float LanguageModel::ComputeDenom(BLOB_CHOICE_LIST* curr_list) { if (curr_list->empty()) return 1.0f; float denom = 0.0f; int len = 0; @@ -1000,19 +988,17 @@ float LanguageModel::ComputeDenom(BLOB_CHOICE_LIST *curr_list) { // Since we can not do this because of speed, we add a very crude estimate // of what these scores for the "missing" classifications would sum up to. denom += (dict_->getUnicharset().size() - len) * - CertaintyScore(language_model_ngram_nonmatch_score); + CertaintyScore(language_model_ngram_nonmatch_score); return denom; } -void LanguageModel::FillConsistencyInfo( - int curr_col, - bool word_end, - BLOB_CHOICE *b, - ViterbiStateEntry *parent_vse, - WERD_RES *word_res, - LMConsistencyInfo *consistency_info) { - const UNICHARSET &unicharset = dict_->getUnicharset(); +void LanguageModel::FillConsistencyInfo(int curr_col, bool word_end, + BLOB_CHOICE* b, + ViterbiStateEntry* parent_vse, + WERD_RES* word_res, + LMConsistencyInfo* consistency_info) { + const UNICHARSET& unicharset = dict_->getUnicharset(); UNICHAR_ID unichar_id = b->unichar_id(); BLOB_CHOICE* parent_b = parent_vse != nullptr ? parent_vse->curr_b : nullptr; @@ -1026,23 +1012,25 @@ void LanguageModel::FillConsistencyInfo( consistency_info->punc_ref = NO_EDGE; } else { bool is_apos = dict_->is_apostrophe(unichar_id); - bool prev_is_numalpha = (parent_b != nullptr && - (unicharset.get_isalpha(parent_b->unichar_id()) || - unicharset.get_isdigit(parent_b->unichar_id()))); + bool prev_is_numalpha = + (parent_b != nullptr && + (unicharset.get_isalpha(parent_b->unichar_id()) || + unicharset.get_isdigit(parent_b->unichar_id()))); UNICHAR_ID pattern_unichar_id = - (unicharset.get_isalpha(unichar_id) || - unicharset.get_isdigit(unichar_id) || - (is_apos && prev_is_numalpha)) ? - Dawg::kPatternUnicharID : unichar_id; + (unicharset.get_isalpha(unichar_id) || + unicharset.get_isdigit(unichar_id) || (is_apos && prev_is_numalpha)) + ? Dawg::kPatternUnicharID + : unichar_id; if (consistency_info->punc_ref == NO_EDGE || pattern_unichar_id != Dawg::kPatternUnicharID || dict_->GetPuncDawg()->edge_letter(consistency_info->punc_ref) != - Dawg::kPatternUnicharID) { + Dawg::kPatternUnicharID) { NODE_REF node = Dict::GetStartingNode(dict_->GetPuncDawg(), consistency_info->punc_ref); consistency_info->punc_ref = - (node != NO_EDGE) ? dict_->GetPuncDawg()->edge_char_of( - node, pattern_unichar_id, word_end) : NO_EDGE; + (node != NO_EDGE) ? dict_->GetPuncDawg()->edge_char_of( + node, pattern_unichar_id, word_end) + : NO_EDGE; if (consistency_info->punc_ref == NO_EDGE) { consistency_info->invalid_punc = true; } @@ -1051,12 +1039,12 @@ void LanguageModel::FillConsistencyInfo( } // Update case related counters. - if (parent_vse != nullptr && !word_end && dict_->compound_marker(unichar_id)) { + if (parent_vse != nullptr && !word_end && + dict_->compound_marker(unichar_id)) { // Reset counters if we are dealing with a compound word. consistency_info->num_lower = 0; consistency_info->num_non_first_upper = 0; - } - else if (unicharset.get_islower(unichar_id)) { + } else if (unicharset.get_islower(unichar_id)) { consistency_info->num_lower++; } else if ((parent_b != nullptr) && unicharset.get_isupper(unichar_id)) { if (unicharset.get_isupper(parent_b->unichar_id()) || @@ -1081,9 +1069,8 @@ void LanguageModel::FillConsistencyInfo( } } - if (parent_vse != nullptr && - (parent_vse->consistency_info.script_id != - dict_->getUnicharset().common_sid())) { + if (parent_vse != nullptr && (parent_vse->consistency_info.script_id != + dict_->getUnicharset().common_sid())) { int parent_script_id = parent_vse->consistency_info.script_id; // If script_id is Common, use script id of the parent instead. if (consistency_info->script_id == dict_->getUnicharset().common_sid()) { @@ -1110,17 +1097,20 @@ void LanguageModel::FillConsistencyInfo( parent_b->fontinfo_id2() == b->fontinfo_id()) { fontinfo_id = b->fontinfo_id(); } else if (parent_b->fontinfo_id() == b->fontinfo_id2() || - parent_b->fontinfo_id2() == b->fontinfo_id2()) { + parent_b->fontinfo_id2() == b->fontinfo_id2()) { fontinfo_id = b->fontinfo_id2(); } - if(language_model_debug_level > 1) { + if (language_model_debug_level > 1) { tprintf("pfont %s pfont %s font %s font2 %s common %s(%d)\n", - (parent_b->fontinfo_id() >= 0) ? - fontinfo_table_->get(parent_b->fontinfo_id()).name : "" , - (parent_b->fontinfo_id2() >= 0) ? - fontinfo_table_->get(parent_b->fontinfo_id2()).name : "", - (b->fontinfo_id() >= 0) ? - fontinfo_table_->get(b->fontinfo_id()).name : "", + (parent_b->fontinfo_id() >= 0) + ? fontinfo_table_->get(parent_b->fontinfo_id()).name + : "", + (parent_b->fontinfo_id2() >= 0) + ? fontinfo_table_->get(parent_b->fontinfo_id2()).name + : "", + (b->fontinfo_id() >= 0) + ? fontinfo_table_->get(b->fontinfo_id()).name + : "", (fontinfo_id >= 0) ? fontinfo_table_->get(fontinfo_id).name : "", (fontinfo_id >= 0) ? fontinfo_table_->get(fontinfo_id).name : "", fontinfo_id); @@ -1131,8 +1121,8 @@ void LanguageModel::FillConsistencyInfo( int temp_gap; if (fontinfo_id >= 0) { // found a common font ASSERT_HOST(fontinfo_id < fontinfo_table_->size()); - if (fontinfo_table_->get(fontinfo_id).get_spacing( - parent_b->unichar_id(), unichar_id, &temp_gap)) { + if (fontinfo_table_->get(fontinfo_id) + .get_spacing(parent_b->unichar_id(), unichar_id, &temp_gap)) { expected_gap = temp_gap; expected_gap_found = true; } @@ -1153,8 +1143,9 @@ void LanguageModel::FillConsistencyInfo( temp_fid = b->fontinfo_id2(); } ASSERT_HOST(temp_fid < 0 || fontinfo_table_->size()); - if (temp_fid >= 0 && fontinfo_table_->get(temp_fid).get_spacing( - parent_b->unichar_id(), unichar_id, &temp_gap)) { + if (temp_fid >= 0 && + fontinfo_table_->get(temp_fid).get_spacing( + parent_b->unichar_id(), unichar_id, &temp_gap)) { expected_gap += temp_gap; num_addends++; } @@ -1166,7 +1157,7 @@ void LanguageModel::FillConsistencyInfo( } if (expected_gap_found) { float actual_gap = - static_cast(word_res->GetBlobsGap(curr_col-1)); + static_cast(word_res->GetBlobsGap(curr_col - 1)); float gap_ratio = expected_gap / actual_gap; // TODO(rays) The gaps seem to be way off most of the time, saved by // the error here that the ratio was compared to 1/2, when it should @@ -1187,7 +1178,7 @@ void LanguageModel::FillConsistencyInfo( } } -float LanguageModel::ComputeAdjustedPathCost(ViterbiStateEntry *vse) { +float LanguageModel::ComputeAdjustedPathCost(ViterbiStateEntry* vse) { ASSERT_HOST(vse != nullptr); if (params_model_.Initialized()) { float features[PTRAIN_NUM_FEATURE_TYPES]; @@ -1204,39 +1195,39 @@ float LanguageModel::ComputeAdjustedPathCost(ViterbiStateEntry *vse) { return cost * vse->outline_length; } else { float adjustment = 1.0f; - if (vse->dawg_info == nullptr || vse->dawg_info->permuter != FREQ_DAWG_PERM) { + if (vse->dawg_info == nullptr || + vse->dawg_info->permuter != FREQ_DAWG_PERM) { adjustment += language_model_penalty_non_freq_dict_word; } if (vse->dawg_info == nullptr) { adjustment += language_model_penalty_non_dict_word; if (vse->length > language_model_min_compound_length) { adjustment += ((vse->length - language_model_min_compound_length) * - language_model_penalty_increment); + language_model_penalty_increment); } } if (vse->associate_stats.shape_cost > 0) { - adjustment += vse->associate_stats.shape_cost / - static_cast(vse->length); + adjustment += + vse->associate_stats.shape_cost / static_cast(vse->length); } if (language_model_ngram_on) { ASSERT_HOST(vse->ngram_info != nullptr); return vse->ngram_info->ngram_and_classifier_cost * adjustment; } else { - adjustment += ComputeConsistencyAdjustment(vse->dawg_info, - vse->consistency_info); + adjustment += + ComputeConsistencyAdjustment(vse->dawg_info, vse->consistency_info); return vse->ratings_sum * adjustment; } } } -void LanguageModel::UpdateBestChoice( - ViterbiStateEntry *vse, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle) { +void LanguageModel::UpdateBestChoice(ViterbiStateEntry* vse, + LMPainPoints* pain_points, + WERD_RES* word_res, + BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle) { bool truth_path; - WERD_CHOICE *word = ConstructWord(vse, word_res, &best_choice_bundle->fixpt, + WERD_CHOICE* word = ConstructWord(vse, word_res, &best_choice_bundle->fixpt, blamer_bundle, &truth_path); ASSERT_HOST(word != nullptr); if (dict_->stopper_debug_level >= 1) { @@ -1250,8 +1241,8 @@ void LanguageModel::UpdateBestChoice( // Record features from the current path if necessary. ParamsTrainingHypothesis curr_hyp; if (blamer_bundle != nullptr) { - if (vse->dawg_info != nullptr) vse->dawg_info->permuter = - static_cast(word->permuter()); + if (vse->dawg_info != nullptr) + vse->dawg_info->permuter = static_cast(word->permuter()); ExtractFeaturesFromPath(*vse, curr_hyp.features); word->string_and_lengths(&(curr_hyp.str), nullptr); curr_hyp.cost = vse->cost; // record cost for error rate computations @@ -1265,8 +1256,7 @@ void LanguageModel::UpdateBestChoice( } // Record the current hypothesis in params_training_bundle. blamer_bundle->AddHypothesis(curr_hyp); - if (truth_path) - blamer_bundle->UpdateBestRating(word->rating()); + if (truth_path) blamer_bundle->UpdateBestRating(word->rating()); } if (blamer_bundle != nullptr && blamer_bundle->GuidedSegsearchStillGoing()) { // The word was constructed solely for blamer_bundle->AddHypothesis, so @@ -1274,7 +1264,8 @@ void LanguageModel::UpdateBestChoice( delete word; return; } - if (word_res->chopped_word != nullptr && !word_res->chopped_word->blobs.empty()) + if (word_res->chopped_word != nullptr && + !word_res->chopped_word->blobs.empty()) word->SetScriptPositions(false, word_res->chopped_word); // Update and log new raw_choice if needed. if (word_res->raw_choice == nullptr || @@ -1289,8 +1280,8 @@ void LanguageModel::UpdateBestChoice( // ClassifyAdaptableWord() to compute adaption acceptance thresholds). // Note: the rating of the word is not adjusted. dict_->adjust_word(word, vse->dawg_info == nullptr, - vse->consistency_info.xht_decision, 0.0, - false, language_model_debug_level > 0); + vse->consistency_info.xht_decision, 0.0, false, + language_model_debug_level > 0); // Hand ownership of the word over to the word_res. if (!word_res->LogNewCookedChoice(dict_->tessedit_truncate_wordchoice_log, dict_->stopper_debug_level >= 1, word)) { @@ -1329,27 +1320,28 @@ void LanguageModel::UpdateBestChoice( } } -void LanguageModel::ExtractFeaturesFromPath( - const ViterbiStateEntry &vse, float features[]) { +void LanguageModel::ExtractFeaturesFromPath(const ViterbiStateEntry& vse, + float features[]) { memset(features, 0, sizeof(float) * PTRAIN_NUM_FEATURE_TYPES); // Record dictionary match info. - int len = vse.length <= kMaxSmallWordUnichars ? 0 : - vse.length <= kMaxMediumWordUnichars ? 1 : 2; + int len = vse.length <= kMaxSmallWordUnichars + ? 0 + : vse.length <= kMaxMediumWordUnichars ? 1 : 2; if (vse.dawg_info != nullptr) { int permuter = vse.dawg_info->permuter; if (permuter == NUMBER_PERM || permuter == USER_PATTERN_PERM) { if (vse.consistency_info.num_digits == vse.length) { - features[PTRAIN_DIGITS_SHORT+len] = 1.0; + features[PTRAIN_DIGITS_SHORT + len] = 1.0; } else { - features[PTRAIN_NUM_SHORT+len] = 1.0; + features[PTRAIN_NUM_SHORT + len] = 1.0; } } else if (permuter == DOC_DAWG_PERM) { - features[PTRAIN_DOC_SHORT+len] = 1.0; + features[PTRAIN_DOC_SHORT + len] = 1.0; } else if (permuter == SYSTEM_DAWG_PERM || permuter == USER_DAWG_PERM || - permuter == COMPOUND_PERM) { - features[PTRAIN_DICT_SHORT+len] = 1.0; + permuter == COMPOUND_PERM) { + features[PTRAIN_DICT_SHORT + len] = 1.0; } else if (permuter == FREQ_DAWG_PERM) { - features[PTRAIN_FREQ_SHORT+len] = 1.0; + features[PTRAIN_FREQ_SHORT + len] = 1.0; } } // Record shape cost feature (normalized by path length). @@ -1366,8 +1358,9 @@ void LanguageModel::ExtractFeaturesFromPath( // features[PTRAIN_NUM_BAD_PUNC] = vse.consistency_info.NumInconsistentPunc(); features[PTRAIN_NUM_BAD_CASE] = vse.consistency_info.NumInconsistentCase(); features[PTRAIN_XHEIGHT_CONSISTENCY] = vse.consistency_info.xht_decision; - features[PTRAIN_NUM_BAD_CHAR_TYPE] = vse.dawg_info == nullptr ? - vse.consistency_info.NumInconsistentChartype() : 0.0; + features[PTRAIN_NUM_BAD_CHAR_TYPE] = + vse.dawg_info == nullptr ? vse.consistency_info.NumInconsistentChartype() + : 0.0; features[PTRAIN_NUM_BAD_SPACING] = vse.consistency_info.NumInconsistentSpaces(); // Disabled this feature for now due to its poor performance. @@ -1378,19 +1371,16 @@ void LanguageModel::ExtractFeaturesFromPath( vse.ratings_sum / static_cast(vse.outline_length); } -WERD_CHOICE *LanguageModel::ConstructWord( - ViterbiStateEntry *vse, - WERD_RES *word_res, - DANGERR *fixpt, - BlamerBundle *blamer_bundle, - bool *truth_path) { +WERD_CHOICE* LanguageModel::ConstructWord(ViterbiStateEntry* vse, + WERD_RES* word_res, DANGERR* fixpt, + BlamerBundle* blamer_bundle, + bool* truth_path) { if (truth_path != nullptr) { - *truth_path = - (blamer_bundle != nullptr && - vse->length == blamer_bundle->correct_segmentation_length()); + *truth_path = (blamer_bundle != nullptr && + vse->length == blamer_bundle->correct_segmentation_length()); } - BLOB_CHOICE *curr_b = vse->curr_b; - ViterbiStateEntry *curr_vse = vse; + BLOB_CHOICE* curr_b = vse->curr_b; + ViterbiStateEntry* curr_vse = vse; int i; bool compound = dict_->hyphenated(); // treat hyphenated words as compound @@ -1406,13 +1396,13 @@ WERD_CHOICE *LanguageModel::ConstructWord( } // Construct a WERD_CHOICE by tracing parent pointers. - WERD_CHOICE *word = new WERD_CHOICE(word_res->uch_set, vse->length); + WERD_CHOICE* word = new WERD_CHOICE(word_res->uch_set, vse->length); word->set_length(vse->length); int total_blobs = 0; - for (i = (vse->length-1); i >= 0; --i) { + for (i = (vse->length - 1); i >= 0; --i) { if (blamer_bundle != nullptr && truth_path != nullptr && *truth_path && !blamer_bundle->MatrixPositionCorrect(i, curr_b->matrix_cell())) { - *truth_path = false; + *truth_path = false; } // The number of blobs used for this choice is row - col + 1. int num_blobs = curr_b->matrix_cell().row - curr_b->matrix_cell().col + 1; @@ -1425,10 +1415,10 @@ WERD_CHOICE *LanguageModel::ConstructWord( ((curr_vse != vse && curr_vse->parent_vse != nullptr) || !dict_->getUnicharset().get_ispunctuation(curr_b->unichar_id())))) { vse->associate_stats.full_wh_ratio_var += - pow(full_wh_ratio_mean - curr_vse->associate_stats.full_wh_ratio, 2); + pow(full_wh_ratio_mean - curr_vse->associate_stats.full_wh_ratio, 2); if (language_model_debug_level > 2) { - tprintf("full_wh_ratio_var += (%g-%g)^2\n", - full_wh_ratio_mean, curr_vse->associate_stats.full_wh_ratio); + tprintf("full_wh_ratio_var += (%g-%g)^2\n", full_wh_ratio_mean, + curr_vse->associate_stats.full_wh_ratio); } } @@ -1436,7 +1426,8 @@ WERD_CHOICE *LanguageModel::ConstructWord( // the unichars on the path (usually this will happen for unichars // that are compounding operators, like "-" and "/"). if (!compound && curr_vse->dawg_info && - curr_vse->dawg_info->permuter == COMPOUND_PERM) compound = true; + curr_vse->dawg_info->permuter == COMPOUND_PERM) + compound = true; // Update curr_* pointers. curr_vse = curr_vse->parent_vse; @@ -1463,8 +1454,8 @@ WERD_CHOICE *LanguageModel::ConstructWord( } else { word->set_permuter(NO_PERM); } - word->set_dangerous_ambig_found_(!dict_->NoDangerousAmbig(word, fixpt, true, - word_res->ratings)); + word->set_dangerous_ambig_found_( + !dict_->NoDangerousAmbig(word, fixpt, true, word_res->ratings)); return word; } diff --git a/src/wordrec/language_model.h b/src/wordrec/language_model.h index 9b5552e506..5b05c50713 100644 --- a/src/wordrec/language_model.h +++ b/src/wordrec/language_model.h @@ -31,8 +31,8 @@ #include "lm_pain_points.h" #include "lm_state.h" #include "matrix.h" -#include "params.h" #include "pageres.h" +#include "params.h" #include "params_model.h" namespace tesseract { @@ -52,7 +52,7 @@ class LanguageModel { // penalty adjustments. static const float kMaxAvgNgramCost; - LanguageModel(const UnicityTable *fontinfo_table, Dict *dict); + LanguageModel(const UnicityTable* fontinfo_table, Dict* dict); ~LanguageModel(); // Fills the given floats array with features extracted from path represented @@ -60,14 +60,13 @@ class LanguageModel { // for feature information. // Note: the function assumes that features points to an array of size // PTRAIN_NUM_FEATURE_TYPES. - static void ExtractFeaturesFromPath(const ViterbiStateEntry &vse, + static void ExtractFeaturesFromPath(const ViterbiStateEntry& vse, float features[]); // Updates data structures that are used for the duration of the segmentation // search on the current word; - void InitForWord(const WERD_CHOICE *prev_word, - bool fixed_pitch, float max_char_wh_ratio, - float rating_cert_scale); + void InitForWord(const WERD_CHOICE* prev_word, bool fixed_pitch, + float max_char_wh_ratio, float rating_cert_scale); // Updates language model state of the given BLOB_CHOICE_LIST (from // the ratings matrix) a its parent. Updates pain_points if new @@ -81,15 +80,11 @@ class LanguageModel { // of the list. // The list ordered by cost that is computed collectively by several // language model components (currently dawg and ngram components). - bool UpdateState( - bool just_classified, - int curr_col, int curr_row, - BLOB_CHOICE_LIST *curr_list, - LanguageModelState *parent_node, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle); + bool UpdateState(bool just_classified, int curr_col, int curr_row, + BLOB_CHOICE_LIST* curr_list, LanguageModelState* parent_node, + LMPainPoints* pain_points, WERD_RES* word_res, + BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle); // Returns true if an acceptable best choice was discovered. inline bool AcceptableChoiceFound() { return acceptable_choice_found_; } @@ -97,10 +92,9 @@ class LanguageModel { acceptable_choice_found_ = val; } // Returns the reference to ParamsModel. - inline ParamsModel &getParamsModel() { return params_model_; } + inline ParamsModel& getParamsModel() { return params_model_; } protected: - inline float CertaintyScore(float cert) { if (language_model_use_sigmoidal_certainty) { // cert is assumed to be between 0 and -dict_->certainty_scale. @@ -117,7 +111,7 @@ class LanguageModel { if (num_problems == 0) return 0.0f; if (num_problems == 1) return penalty; return (penalty + (language_model_penalty_increment * - static_cast(num_problems-1))); + static_cast(num_problems - 1))); } // Computes the adjustment to the ratings sum based on the given @@ -125,13 +119,14 @@ class LanguageModel { // case and character type are penalized proportionally to the number // of inconsistencies on the path. inline float ComputeConsistencyAdjustment( - const LanguageModelDawgInfo *dawg_info, - const LMConsistencyInfo &consistency_info) { + const LanguageModelDawgInfo* dawg_info, + const LMConsistencyInfo& consistency_info) { if (dawg_info != nullptr) { return ComputeAdjustment(consistency_info.NumInconsistentCase(), language_model_penalty_case) + - (consistency_info.inconsistent_script ? - language_model_penalty_script : 0.0f); + (consistency_info.inconsistent_script + ? language_model_penalty_script + : 0.0f); } return (ComputeAdjustment(consistency_info.NumInconsistentPunc(), language_model_penalty_punc) + @@ -141,24 +136,25 @@ class LanguageModel { language_model_penalty_chartype) + ComputeAdjustment(consistency_info.NumInconsistentSpaces(), language_model_penalty_spacing) + - (consistency_info.inconsistent_script ? - language_model_penalty_script : 0.0f) + - (consistency_info.inconsistent_font ? - language_model_penalty_font : 0.0f)); + (consistency_info.inconsistent_script + ? language_model_penalty_script + : 0.0f) + + (consistency_info.inconsistent_font ? language_model_penalty_font + : 0.0f)); } // Returns an adjusted ratings sum that includes inconsistency penalties, // penalties for non-dictionary paths and paths with dips in ngram // probability. - float ComputeAdjustedPathCost(ViterbiStateEntry *vse); + float ComputeAdjustedPathCost(ViterbiStateEntry* vse); // Finds the first lower and upper case letter and first digit in curr_list. // Uses the first character in the list in place of empty results. // Returns true if both alpha and digits are found. - bool GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, - BLOB_CHOICE **first_lower, - BLOB_CHOICE **first_upper, - BLOB_CHOICE **first_digit) const; + bool GetTopLowerUpperDigit(BLOB_CHOICE_LIST* curr_list, + BLOB_CHOICE** first_lower, + BLOB_CHOICE** first_upper, + BLOB_CHOICE** first_digit) const; // Forces there to be at least one entry in the overall set of the // viterbi_state_entries of each element of parent_node that has the // top_choice_flag set for lower, upper and digit using the same rules as @@ -166,46 +162,46 @@ class LanguageModel { // candidate, whether or not the flag is set on some other parent. // Returns 1 if both alpha and digits are found among the parents, -1 if no // parents are found at all (a legitimate case), and 0 otherwise. - int SetTopParentLowerUpperDigit(LanguageModelState *parent_node) const; + int SetTopParentLowerUpperDigit(LanguageModelState* parent_node) const; // Finds the next ViterbiStateEntry with which the given unichar_id can // combine sensibly, taking into account any mixed alnum/mixed case // situation, and whether this combination has been inspected before. ViterbiStateEntry* GetNextParentVSE( - bool just_classified, bool mixed_alnum, - const BLOB_CHOICE* bc, LanguageModelFlagsType blob_choice_flags, - const UNICHARSET& unicharset, WERD_RES* word_res, - ViterbiStateEntry_IT* vse_it, + bool just_classified, bool mixed_alnum, const BLOB_CHOICE* bc, + LanguageModelFlagsType blob_choice_flags, const UNICHARSET& unicharset, + WERD_RES* word_res, ViterbiStateEntry_IT* vse_it, LanguageModelFlagsType* top_choice_flags) const; // Helper function that computes the cost of the path composed of the // path in the given parent ViterbiStateEntry and the given BLOB_CHOICE. // If the new path looks good enough, adds a new ViterbiStateEntry to the // list of viterbi entries in the given BLOB_CHOICE and returns true. - bool AddViterbiStateEntry( - LanguageModelFlagsType top_choice_flags, float denom, bool word_end, - int curr_col, int curr_row, BLOB_CHOICE *b, - LanguageModelState *curr_state, ViterbiStateEntry *parent_vse, - LMPainPoints *pain_points, WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle); + bool AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags, + float denom, bool word_end, int curr_col, + int curr_row, BLOB_CHOICE* b, + LanguageModelState* curr_state, + ViterbiStateEntry* parent_vse, + LMPainPoints* pain_points, WERD_RES* word_res, + BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle); // Determines whether a potential entry is a true top choice and // updates changed accordingly. // // Note: The function assumes that b, top_choice_flags and changed // are not nullptr. - void GenerateTopChoiceInfo(ViterbiStateEntry *new_vse, - const ViterbiStateEntry *parent_vse, - LanguageModelState *lms); + void GenerateTopChoiceInfo(ViterbiStateEntry* new_vse, + const ViterbiStateEntry* parent_vse, + LanguageModelState* lms); // Calls dict_->LetterIsOk() with DawgArgs initialized from parent_vse and // unichar from b.unichar_id(). Constructs and returns LanguageModelDawgInfo // with updated active dawgs, constraints and permuter. // // Note: the caller is responsible for deleting the returned pointer. - LanguageModelDawgInfo *GenerateDawgInfo(bool word_end, - int curr_col, int curr_row, - const BLOB_CHOICE &b, - const ViterbiStateEntry *parent_vse); + LanguageModelDawgInfo* GenerateDawgInfo(bool word_end, int curr_col, + int curr_row, const BLOB_CHOICE& b, + const ViterbiStateEntry* parent_vse); // Computes p(unichar | parent context) and records it in ngram_cost. // If b.unichar_id() is an unlikely continuation of the parent context @@ -214,10 +210,9 @@ class LanguageModel { // updated context (that includes b.unichar_id() at the end) and returns it. // // Note: the caller is responsible for deleting the returned pointer. - LanguageModelNgramInfo *GenerateNgramInfo( - const char *unichar, float certainty, float denom, - int curr_col, int curr_row, float outline_length, - const ViterbiStateEntry *parent_vse); + LanguageModelNgramInfo* GenerateNgramInfo( + const char* unichar, float certainty, float denom, int curr_col, + int curr_row, float outline_length, const ViterbiStateEntry* parent_vse); // Computes -(log(prob(classifier)) + log(prob(ngram model))) // for the given unichar in the given context. If there are multiple @@ -226,21 +221,19 @@ class LanguageModel { // since probability_in_context() can only handle one at a time (while // unicharset might contain ngrams and glyphs composed from multiple UTF8 // characters). - float ComputeNgramCost(const char *unichar, float certainty, float denom, - const char *context, int *unichar_step_len, - bool *found_small_prob, float *ngram_prob); + float ComputeNgramCost(const char* unichar, float certainty, float denom, + const char* context, int* unichar_step_len, + bool* found_small_prob, float* ngram_prob); // Computes the normalization factors for the classifier confidences // (used by ComputeNgramCost()). - float ComputeDenom(BLOB_CHOICE_LIST *curr_list); + float ComputeDenom(BLOB_CHOICE_LIST* curr_list); // Fills the given consistenty_info based on parent_vse.consistency_info // and on the consistency of the given unichar_id with parent_vse. - void FillConsistencyInfo( - int curr_col, bool word_end, BLOB_CHOICE *b, - ViterbiStateEntry *parent_vse, - WERD_RES *word_res, - LMConsistencyInfo *consistency_info); + void FillConsistencyInfo(int curr_col, bool word_end, BLOB_CHOICE* b, + ViterbiStateEntry* parent_vse, WERD_RES* word_res, + LMConsistencyInfo* consistency_info); // Constructs WERD_CHOICE by recording unichar_ids of the BLOB_CHOICEs // on the path represented by the given BLOB_CHOICE and language model @@ -249,11 +242,10 @@ class LanguageModel { // constructed WERD_CHOICE is better than the best/raw choice recorded // in the best_choice_bundle, this function updates the corresponding // fields and sets best_choice_bunldle->updated to true. - void UpdateBestChoice(ViterbiStateEntry *vse, - LMPainPoints *pain_points, - WERD_RES *word_res, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle); + void UpdateBestChoice(ViterbiStateEntry* vse, LMPainPoints* pain_points, + WERD_RES* word_res, + BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle); // Constructs a WERD_CHOICE by tracing parent pointers starting with // the given LanguageModelStateEntry. Returns the constructed word. @@ -262,24 +254,21 @@ class LanguageModel { // length equal to lmse->length). // The caller is responsible for freeing memory associated with the // returned WERD_CHOICE. - WERD_CHOICE *ConstructWord(ViterbiStateEntry *vse, - WERD_RES *word_res, - DANGERR *fixpt, - BlamerBundle *blamer_bundle, - bool *truth_path); + WERD_CHOICE* ConstructWord(ViterbiStateEntry* vse, WERD_RES* word_res, + DANGERR* fixpt, BlamerBundle* blamer_bundle, + bool* truth_path); // Wrapper around AssociateUtils::ComputeStats(). - inline void ComputeAssociateStats(int col, int row, - float max_char_wh_ratio, - ViterbiStateEntry *parent_vse, - WERD_RES *word_res, - AssociateStats *associate_stats) { + inline void ComputeAssociateStats(int col, int row, float max_char_wh_ratio, + ViterbiStateEntry* parent_vse, + WERD_RES* word_res, + AssociateStats* associate_stats) { AssociateUtils::ComputeStats( col, row, (parent_vse != nullptr) ? &(parent_vse->associate_stats) : nullptr, - (parent_vse != nullptr) ? parent_vse->length : 0, - fixed_pitch_, max_char_wh_ratio, - word_res, language_model_debug_level > 2, associate_stats); + (parent_vse != nullptr) ? parent_vse->length : 0, fixed_pitch_, + max_char_wh_ratio, word_res, language_model_debug_level > 2, + associate_stats); } // Returns true if the path with such top_choice_flags and dawg_info @@ -288,17 +277,18 @@ class LanguageModel { // In non-space delimited languages all paths can be "somewhat" dictionary // words. In such languages we can not do dictionary-driven path pruning, // so paths with non-empty dawg_info are considered prunable. - inline bool PrunablePath(const ViterbiStateEntry &vse) { + inline bool PrunablePath(const ViterbiStateEntry& vse) { if (vse.top_choice_flags) return false; if (vse.dawg_info != nullptr && (vse.dawg_info->permuter == SYSTEM_DAWG_PERM || vse.dawg_info->permuter == USER_DAWG_PERM || - vse.dawg_info->permuter == FREQ_DAWG_PERM)) return false; + vse.dawg_info->permuter == FREQ_DAWG_PERM)) + return false; return true; } // Returns true if the given ViterbiStateEntry represents an acceptable path. - inline bool AcceptablePath(const ViterbiStateEntry &vse) { + inline bool AcceptablePath(const ViterbiStateEntry& vse) { return (vse.dawg_info != nullptr || vse.Consistent() || (vse.ngram_info != nullptr && !vse.ngram_info->pruned)); } @@ -355,7 +345,6 @@ class LanguageModel { BOOL_VAR_H(language_model_use_sigmoidal_certainty, false, "Use sigmoidal score for certainty"); - protected: // Member Variables. @@ -368,11 +357,11 @@ class LanguageModel { // The following variables are set at construction time. // Pointer to fontinfo table (not owned by LanguageModel). - const UnicityTable *fontinfo_table_; + const UnicityTable* fontinfo_table_; // Pointer to Dict class, that is used for querying the dictionaries // (the pointer is not owned by LanguageModel). - Dict *dict_; + Dict* dict_; // TODO(daria): the following variables should become LanguageModel params // when the old code in bestfirst.cpp and heuristic.cpp is deprecated. diff --git a/src/wordrec/lm_consistency.cpp b/src/wordrec/lm_consistency.cpp index bc7308a1b7..928c18cdf3 100644 --- a/src/wordrec/lm_consistency.cpp +++ b/src/wordrec/lm_consistency.cpp @@ -26,8 +26,8 @@ namespace tesseract { -void LMConsistencyInfo::ComputeXheightConsistency( - const BLOB_CHOICE *b, bool is_punc) { +void LMConsistencyInfo::ComputeXheightConsistency(const BLOB_CHOICE* b, + bool is_punc) { if (xht_decision == XH_INCONSISTENT) return; // It isn't going to get any better. @@ -54,9 +54,8 @@ void LMConsistencyInfo::ComputeXheightConsistency( // if (parent_vse == nullptr && sp == LanguageModelConsistencyInfo::kNORM) { // small_xht = 0; // } - IntersectRange(b->min_xheight(), b->max_xheight(), - &(xht_lo[xht_sp]), &(xht_hi[xht_sp])); - + IntersectRange(b->min_xheight(), b->max_xheight(), &(xht_lo[xht_sp]), + &(xht_hi[xht_sp])); // Compute xheight inconsistency kinds. if (parent_null) { diff --git a/src/wordrec/lm_consistency.h b/src/wordrec/lm_consistency.h index 8f2cf41872..bde06ea33f 100644 --- a/src/wordrec/lm_consistency.h +++ b/src/wordrec/lm_consistency.h @@ -28,7 +28,7 @@ namespace tesseract { -static const char * const XHeightConsistencyEnumName[] = { +static const char* const XHeightConsistencyEnumName[] = { "XH_GOOD", "XH_SUBNORMAL", "XH_INCONSISTENT", @@ -36,7 +36,7 @@ static const char * const XHeightConsistencyEnumName[] = { // Struct for keeping track of the consistency of the path. struct LMConsistencyInfo { - enum ChartypeEnum { CT_NONE, CT_ALPHA, CT_DIGIT, CT_OTHER}; + enum ChartypeEnum { CT_NONE, CT_ALPHA, CT_DIGIT, CT_OTHER }; // How much do characters have to be shifted away from normal parameters // before we say they're not normal? @@ -81,36 +81,30 @@ struct LMConsistencyInfo { *this = *parent_info; } } - inline int NumInconsistentPunc() const { - return invalid_punc ? num_punc : 0; - } + inline int NumInconsistentPunc() const { return invalid_punc ? num_punc : 0; } inline int NumInconsistentCase() const { return (num_non_first_upper > num_lower) ? num_lower : num_non_first_upper; } inline int NumInconsistentChartype() const { return (NumInconsistentPunc() + num_other + - ((num_alphas > num_digits) ? num_digits : num_alphas)); + ((num_alphas > num_digits) ? num_digits : num_alphas)); } inline bool Consistent() const { return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 && NumInconsistentChartype() == 0 && !inconsistent_script && !inconsistent_font && !InconsistentXHeight()); } - inline int NumInconsistentSpaces() const { - return num_inconsistent_spaces; - } + inline int NumInconsistentSpaces() const { return num_inconsistent_spaces; } inline int InconsistentXHeight() const { return xht_decision == XH_INCONSISTENT; } - void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc); + void ComputeXheightConsistency(const BLOB_CHOICE* b, bool is_punc); float BodyMinXHeight() const { - if (InconsistentXHeight()) - return 0.0f; + if (InconsistentXHeight()) return 0.0f; return xht_lo[kNORM]; } float BodyMaxXHeight() const { - if (InconsistentXHeight()) - return static_cast(INT16_MAX); + if (InconsistentXHeight()) return static_cast(INT16_MAX); return xht_hi[kNORM]; } diff --git a/src/wordrec/lm_pain_points.cpp b/src/wordrec/lm_pain_points.cpp index 03c8aef3e6..4b4c155860 100644 --- a/src/wordrec/lm_pain_points.cpp +++ b/src/wordrec/lm_pain_points.cpp @@ -36,7 +36,7 @@ namespace tesseract { const float LMPainPoints::kDefaultPainPointPriorityAdjustment = 2.0f; const float LMPainPoints::kLooseMaxCharWhRatio = 2.5f; -LMPainPointsType LMPainPoints::Deque(MATRIX_COORD *pp, float *priority) { +LMPainPointsType LMPainPoints::Deque(MATRIX_COORD* pp, float* priority) { for (int h = 0; h < LM_PPTYPE_NUM; ++h) { if (pain_points_heaps_[h].empty()) continue; *priority = pain_points_heaps_[h].PeekTop().key; @@ -47,31 +47,32 @@ LMPainPointsType LMPainPoints::Deque(MATRIX_COORD *pp, float *priority) { return LM_PPTYPE_NUM; } -void LMPainPoints::GenerateInitial(WERD_RES *word_res) { - MATRIX *ratings = word_res->ratings; +void LMPainPoints::GenerateInitial(WERD_RES* word_res) { + MATRIX* ratings = word_res->ratings; AssociateStats associate_stats; for (int col = 0; col < ratings->dimension(); ++col) { - int row_end = std::min(ratings->dimension(), col + ratings->bandwidth() + 1); + int row_end = + std::min(ratings->dimension(), col + ratings->bandwidth() + 1); for (int row = col + 1; row < row_end; ++row) { MATRIX_COORD coord(col, row); - if (coord.Valid(*ratings) && - ratings->get(col, row) != NOT_CLASSIFIED) continue; + if (coord.Valid(*ratings) && ratings->get(col, row) != NOT_CLASSIFIED) + continue; // Add an initial pain point if needed. if (ratings->Classified(col, row - 1, dict_->WildcardID()) || (col + 1 < ratings->dimension() && - ratings->Classified(col + 1, row, dict_->WildcardID()))) { - GeneratePainPoint(col, row, LM_PPTYPE_SHAPE, 0.0, - true, max_char_wh_ratio_, word_res); + ratings->Classified(col + 1, row, dict_->WildcardID()))) { + GeneratePainPoint(col, row, LM_PPTYPE_SHAPE, 0.0, true, + max_char_wh_ratio_, word_res); } } } } void LMPainPoints::GenerateFromPath(float rating_cert_scale, - ViterbiStateEntry *vse, - WERD_RES *word_res) { - ViterbiStateEntry *curr_vse = vse; - BLOB_CHOICE *curr_b = vse->curr_b; + ViterbiStateEntry* vse, + WERD_RES* word_res) { + ViterbiStateEntry* curr_vse = vse; + BLOB_CHOICE* curr_b = vse->curr_b; // The following pain point generation and priority calculation approaches // prioritize exploring paths with low average rating of the known part of // the path, while not relying on the ratings of the pieces to be combined. @@ -109,15 +110,16 @@ void LMPainPoints::GenerateFromPath(float rating_cert_scale, float ol_dif = vse->outline_length - ol_subtr; // priority is set to the average rating of the path per unit of outline, // not counting the ratings of the pieces to be joined. - float priority = ol_dif > 0 ? (vse->ratings_sum-rat_subtr)/ol_dif : 0.0; + float priority = + ol_dif > 0 ? (vse->ratings_sum - rat_subtr) / ol_dif : 0.0; GeneratePainPoint(pain_coord.col, pain_coord.row, LM_PPTYPE_PATH, priority, true, max_char_wh_ratio_, word_res); } else if (debug_level_ > 3) { tprintf("NO pain point (Classified) for col=%d row=%d type=%s\n", pain_coord.col, pain_coord.row, LMPainPointsTypeName[LM_PPTYPE_PATH]); - BLOB_CHOICE_IT b_it(word_res->ratings->get(pain_coord.col, - pain_coord.row)); + BLOB_CHOICE_IT b_it( + word_res->ratings->get(pain_coord.col, pain_coord.row)); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOB_CHOICE* choice = b_it.data(); choice->print_full(); @@ -129,34 +131,33 @@ void LMPainPoints::GenerateFromPath(float rating_cert_scale, } } -void LMPainPoints::GenerateFromAmbigs(const DANGERR &fixpt, - ViterbiStateEntry *vse, - WERD_RES *word_res) { +void LMPainPoints::GenerateFromAmbigs(const DANGERR& fixpt, + ViterbiStateEntry* vse, + WERD_RES* word_res) { // Begins and ends in DANGERR vector now record the blob indices as used // by the ratings matrix. for (int d = 0; d < fixpt.size(); ++d) { - const DANGERR_INFO &danger = fixpt[d]; + const DANGERR_INFO& danger = fixpt[d]; // Only use dangerous ambiguities. if (danger.dangerous) { - GeneratePainPoint(danger.begin, danger.end - 1, - LM_PPTYPE_AMBIG, vse->cost, true, - kLooseMaxCharWhRatio, word_res); + GeneratePainPoint(danger.begin, danger.end - 1, LM_PPTYPE_AMBIG, + vse->cost, true, kLooseMaxCharWhRatio, word_res); } } } -bool LMPainPoints::GeneratePainPoint( - int col, int row, LMPainPointsType pp_type, float special_priority, - bool ok_to_extend, float max_char_wh_ratio, - WERD_RES *word_res) { +bool LMPainPoints::GeneratePainPoint(int col, int row, LMPainPointsType pp_type, + float special_priority, bool ok_to_extend, + float max_char_wh_ratio, + WERD_RES* word_res) { MATRIX_COORD coord(col, row); if (coord.Valid(*word_res->ratings) && word_res->ratings->Classified(col, row, dict_->WildcardID())) { return false; } if (debug_level_ > 3) { - tprintf("Generating pain point for col=%d row=%d type=%s\n", - col, row, LMPainPointsTypeName[pp_type]); + tprintf("Generating pain point for col=%d row=%d type=%s\n", col, row, + LMPainPointsTypeName[pp_type]); } // Compute associate stats. AssociateStats associate_stats; @@ -211,8 +212,7 @@ bool LMPainPoints::GeneratePainPoint( void LMPainPoints::RemapForSplit(int index) { for (int i = 0; i < LM_PPTYPE_NUM; ++i) { GenericVector* heap = pain_points_heaps_[i].heap(); - for (int j = 0; j < heap->size(); ++j) - (*heap)[j].data.MapForSplit(index); + for (int j = 0; j < heap->size(); ++j) (*heap)[j].data.MapForSplit(index); } } diff --git a/src/wordrec/lm_pain_points.h b/src/wordrec/lm_pain_points.h index 68eca59316..35087a7397 100644 --- a/src/wordrec/lm_pain_points.h +++ b/src/wordrec/lm_pain_points.h @@ -43,7 +43,7 @@ enum LMPainPointsType { LM_PPTYPE_NUM }; -static const char * const LMPainPointsTypeName[] = { +static const char* const LMPainPointsTypeName[] = { "LM_PPTYPE_BLAMER", "LM_PPTYPE_AMBIGS", "LM_PPTYPE_PATH", @@ -52,7 +52,6 @@ static const char * const LMPainPointsTypeName[] = { class LMPainPoints { public: - static const float kDefaultPainPointPriorityAdjustment; // If there is a significant drop in character ngram probability or a // dangerous ambiguity make the thresholds on what blob combinations @@ -63,9 +62,12 @@ class LMPainPoints { return LMPainPointsTypeName[type]; } - LMPainPoints(int max, float rat, bool fp, const Dict *d, int deb) : - max_heap_size_(max), max_char_wh_ratio_(rat), fixed_pitch_(fp), - dict_(d), debug_level_(deb) {} + LMPainPoints(int max, float rat, bool fp, const Dict* d, int deb) + : max_heap_size_(max), + max_char_wh_ratio_(rat), + fixed_pitch_(fp), + dict_(d), + debug_level_(deb) {} ~LMPainPoints() {} // Returns true if the heap of pain points of pp_type is not empty(). @@ -76,7 +78,7 @@ class LMPainPoints { // Dequeues the next pain point from the pain points queue and copies // its contents and priority to *pp and *priority. // Returns LM_PPTYPE_NUM if pain points queue is empty, otherwise the type. - LMPainPointsType Deque(MATRIX_COORD *pp, float *priority); + LMPainPointsType Deque(MATRIX_COORD* pp, float* priority); // Clears pain points heap. void Clear() { @@ -85,19 +87,19 @@ class LMPainPoints { // For each cell, generate a "pain point" if the cell is not classified // and has a left or right neighbor that was classified. - void GenerateInitial(WERD_RES *word_res); + void GenerateInitial(WERD_RES* word_res); // Generate pain points from the given path. - void GenerateFromPath(float rating_cert_scale, ViterbiStateEntry *vse, - WERD_RES *word_res); + void GenerateFromPath(float rating_cert_scale, ViterbiStateEntry* vse, + WERD_RES* word_res); // Generate pain points from dangerous ambiguities in best choice. - void GenerateFromAmbigs(const DANGERR &fixpt, ViterbiStateEntry *vse, - WERD_RES *word_res); + void GenerateFromAmbigs(const DANGERR& fixpt, ViterbiStateEntry* vse, + WERD_RES* word_res); // Generate a pain point for the blamer. - bool GenerateForBlamer(double max_char_wh_ratio, WERD_RES *word_res, - int col, int row) { + bool GenerateForBlamer(double max_char_wh_ratio, WERD_RES* word_res, int col, + int row) { return GeneratePainPoint(col, row, LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res); } @@ -109,8 +111,7 @@ class LMPainPoints { // AssociateStats::gap_sum is used. bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, - float max_char_wh_ratio, - WERD_RES *word_res); + float max_char_wh_ratio, WERD_RES* word_res); // Adjusts the pain point coordinates to cope with expansion of the ratings // matrix due to a split of the blob with the given index. @@ -128,7 +129,7 @@ class LMPainPoints { // Set to true if fixed pitch should be assumed. bool fixed_pitch_; // Cached pointer to dictionary. - const Dict *dict_; + const Dict* dict_; // Debug level for print statements. int debug_level_; }; diff --git a/src/wordrec/lm_state.cpp b/src/wordrec/lm_state.cpp index a2004b7260..58df8db3b4 100644 --- a/src/wordrec/lm_state.cpp +++ b/src/wordrec/lm_state.cpp @@ -24,14 +24,14 @@ namespace tesseract { ELISTIZE(ViterbiStateEntry) -void ViterbiStateEntry::Print(const char *msg) const { +void ViterbiStateEntry::Print(const char* msg) const { tprintf("%s ViterbiStateEntry", msg); if (updated) tprintf("(NEW)"); if (this->debug_str != nullptr) { tprintf(" str=%s", this->debug_str->string()); } - tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", - this->ratings_sum, this->length, this->cost); + tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", this->ratings_sum, + this->length, this->cost); if (this->top_choice_flags) { tprintf(" top_choice_flags=0x%x", this->top_choice_flags); } @@ -47,8 +47,7 @@ void ViterbiStateEntry::Print(const char *msg) const { if (this->ngram_info) { tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d", this->ngram_info->ngram_and_classifier_cost, - this->ngram_info->context.string(), - this->ngram_info->pruned); + this->ngram_info->context.string(), this->ngram_info->pruned); } if (this->associate_stats.shape_cost > 0.0f) { tprintf(" shape_cost=%g", this->associate_stats.shape_cost); @@ -67,9 +66,9 @@ void LanguageModelState::Clear() { viterbi_state_entries_length = 0; } -void LanguageModelState::Print(const char *msg) { - tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", - msg, viterbi_state_entries_prunable_max_cost, +void LanguageModelState::Print(const char* msg) { + tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", msg, + viterbi_state_entries_prunable_max_cost, viterbi_state_entries_prunable_length, viterbi_state_entries_length); ViterbiStateEntry_IT vit(&viterbi_state_entries); for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { @@ -77,5 +76,4 @@ void LanguageModelState::Print(const char *msg) { } } - } // namespace tesseract diff --git a/src/wordrec/lm_state.h b/src/wordrec/lm_state.h index da377d0745..ee0eb3cac7 100644 --- a/src/wordrec/lm_state.h +++ b/src/wordrec/lm_state.h @@ -23,8 +23,8 @@ #define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ #include "associate.h" -#include "elst.h" #include "dawg.h" +#include "elst.h" #include "lm_consistency.h" #include "matrix.h" #include "ratngs.h" @@ -59,7 +59,7 @@ using LanguageModelFlagsType = unsigned char; /// component. It stores the set of active dawgs in which the sequence of /// letters on a path can be found. struct LanguageModelDawgInfo { - LanguageModelDawgInfo(const DawgPositionVector *a, PermuterType pt) + LanguageModelDawgInfo(const DawgPositionVector* a, PermuterType pt) : active_dawgs(*a), permuter(pt) {} DawgPositionVector active_dawgs; PermuterType permuter; @@ -68,9 +68,12 @@ struct LanguageModelDawgInfo { /// Struct for storing additional information used by Ngram language model /// component. struct LanguageModelNgramInfo { - LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc) - : context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc), - ngram_and_classifier_cost(ncc) {} + LanguageModelNgramInfo(const char* c, int l, bool p, float nc, float ncc) + : context(c), + context_unichar_step_len(l), + pruned(p), + ngram_cost(nc), + ngram_and_classifier_cost(ncc) {} STRING context; //< context string /// Length of the context measured by advancing using UNICHAR::utf8_step() /// (should be at most the order of the character ngram model used). @@ -89,20 +92,25 @@ struct LanguageModelNgramInfo { /// Struct for storing the information about a path in the segmentation graph /// explored by Viterbi search. struct ViterbiStateEntry : public ELIST_LINK { - ViterbiStateEntry(ViterbiStateEntry *pe, - BLOB_CHOICE *b, float c, float ol, - const LMConsistencyInfo &ci, - const AssociateStats &as, - LanguageModelFlagsType tcf, - LanguageModelDawgInfo *d, - LanguageModelNgramInfo *n, - const char *debug_uch) - : cost(c), curr_b(b), parent_vse(pe), competing_vse(nullptr), - ratings_sum(b->rating()), - min_certainty(b->certainty()), adapted(b->IsAdapted()), length(1), - outline_length(ol), consistency_info(ci), associate_stats(as), - top_choice_flags(tcf), dawg_info(d), ngram_info(n), - updated(true) { + ViterbiStateEntry(ViterbiStateEntry* pe, BLOB_CHOICE* b, float c, float ol, + const LMConsistencyInfo& ci, const AssociateStats& as, + LanguageModelFlagsType tcf, LanguageModelDawgInfo* d, + LanguageModelNgramInfo* n, const char* debug_uch) + : cost(c), + curr_b(b), + parent_vse(pe), + competing_vse(nullptr), + ratings_sum(b->rating()), + min_certainty(b->certainty()), + adapted(b->IsAdapted()), + length(1), + outline_length(ol), + consistency_info(ci), + associate_stats(as), + top_choice_flags(tcf), + dawg_info(d), + ngram_info(n), + updated(true) { debug_str = (debug_uch == nullptr) ? nullptr : new STRING(); if (pe != nullptr) { ratings_sum += pe->ratings_sum; @@ -123,11 +131,11 @@ struct ViterbiStateEntry : public ELIST_LINK { } /// Comparator function for sorting ViterbiStateEntry_LISTs in /// non-increasing order of costs. - static int Compare(const void *e1, const void *e2) { - const ViterbiStateEntry *ve1 = - *static_cast(e1); - const ViterbiStateEntry *ve2 = - *static_cast(e2); + static int Compare(const void* e1, const void* e2) { + const ViterbiStateEntry* ve1 = + *static_cast(e1); + const ViterbiStateEntry* ve2 = + *static_cast(e2); return (ve1->cost < ve2->cost) ? -1 : 1; } inline bool Consistent() const { @@ -140,24 +148,24 @@ struct ViterbiStateEntry : public ELIST_LINK { /// result. bool HasAlnumChoice(const UNICHARSET& unicharset) { if (curr_b == nullptr) return false; - UNICHAR_ID unichar_id = curr_b->unichar_id(); + UNICHAR_ID unichar_id = curr_b->unichar_id(); if (unicharset.get_isalpha(unichar_id) || unicharset.get_isdigit(unichar_id)) return true; return false; } - void Print(const char *msg) const; + void Print(const char* msg) const; /// The cost is an adjusted ratings sum, that is adjusted by all the language /// model components that use Viterbi search. float cost; /// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this). - BLOB_CHOICE *curr_b; - ViterbiStateEntry *parent_vse; + BLOB_CHOICE* curr_b; + ViterbiStateEntry* parent_vse; /// Pointer to a case-competing ViterbiStateEntry in the same list that /// represents a path ending in the same letter of the opposite case. - ViterbiStateEntry *competing_vse; + ViterbiStateEntry* competing_vse; /// Various information about the characters on the path represented /// by this ViterbiStateEntry. @@ -175,32 +183,32 @@ struct ViterbiStateEntry : public ELIST_LINK { /// Extra information maintained by Dawg language model component /// (owned by ViterbiStateEntry). - LanguageModelDawgInfo *dawg_info; + LanguageModelDawgInfo* dawg_info; /// Extra information maintained by Ngram language model component /// (owned by ViterbiStateEntry). - LanguageModelNgramInfo *ngram_info; + LanguageModelNgramInfo* ngram_info; bool updated; //< set to true if the entry has just been created/updated /// UTF8 string representing the path corresponding to this vse. /// Populated only in when language_model_debug_level > 0. - STRING *debug_str; + STRING* debug_str; }; ELISTIZEH(ViterbiStateEntry) /// Struct to store information maintained by various language model components. struct LanguageModelState { - LanguageModelState() : - viterbi_state_entries_prunable_length(0), - viterbi_state_entries_prunable_max_cost(MAX_FLOAT32), - viterbi_state_entries_length(0) {} + LanguageModelState() + : viterbi_state_entries_prunable_length(0), + viterbi_state_entries_prunable_max_cost(MAX_FLOAT32), + viterbi_state_entries_length(0) {} ~LanguageModelState() {} /// Clears the viterbi search state back to its initial conditions. void Clear(); - void Print(const char *msg); + void Print(const char* msg); /// Storage for the Viterbi state. ViterbiStateEntry_LIST viterbi_state_entries; @@ -214,7 +222,7 @@ struct LanguageModelState { /// Bundle together all the things pertaining to the best choice/state. struct BestChoiceBundle { explicit BestChoiceBundle(int matrix_dimension) - : updated(false), best_vse(nullptr) { + : updated(false), best_vse(nullptr) { beam.reserve(matrix_dimension); for (int i = 0; i < matrix_dimension; ++i) beam.push_back(new LanguageModelState); @@ -230,7 +238,7 @@ struct BestChoiceBundle { /// somewhere in the corresponding row. PointerVector beam; /// Best ViterbiStateEntry and BLOB_CHOICE. - ViterbiStateEntry *best_vse; + ViterbiStateEntry* best_vse; }; } // namespace tesseract diff --git a/src/wordrec/measure.h b/src/wordrec/measure.h index d7718cc57a..5c2ceb3ad7 100644 --- a/src/wordrec/measure.h +++ b/src/wordrec/measure.h @@ -41,8 +41,7 @@ ---------------------------------------------------------------------- */ -typedef struct -{ +typedef struct { long num_samples; float sum_of_samples; float sum_of_squares; @@ -89,8 +88,7 @@ typedef struct * Return the number of samples in a measurement. **********************************************************************/ -#define number_of_samples(m) \ -((m).num_samples) +#define number_of_samples(m) ((m).num_samples) /********************************************************************** * standard_deviation @@ -98,8 +96,7 @@ typedef struct * Return the standard deviation of the measurement. **********************************************************************/ -#define standard_deviation(m) \ -((float) sqrt (VARIANCE (m))) +#define standard_deviation(m) ((float)sqrt(VARIANCE(m))) /********************************************************************** * variance diff --git a/src/wordrec/outlines.cpp b/src/wordrec/outlines.cpp index 6fdfdbd976..0d5864be55 100644 --- a/src/wordrec/outlines.cpp +++ b/src/wordrec/outlines.cpp @@ -46,9 +46,8 @@ namespace tesseract { * the line segment. Return that point in near_pt. Returns whether * near_pt was newly created. **********************************************************************/ -bool Wordrec::near_point(EDGEPT *point, - EDGEPT *line_pt_0, EDGEPT *line_pt_1, - EDGEPT **near_pt) { +bool Wordrec::near_point(EDGEPT* point, EDGEPT* line_pt_0, EDGEPT* line_pt_1, + EDGEPT** near_pt) { TPOINT p; float slope; @@ -60,27 +59,26 @@ bool Wordrec::near_point(EDGEPT *point, float y1 = line_pt_1->pos.y; if (x0 == x1) { - /* Handle vertical line */ - p.x = (int16_t) x0; + /* Handle vertical line */ + p.x = (int16_t)x0; p.y = point->pos.y; - } - else { + } else { /* Slope and intercept */ slope = (y0 - y1) / (x0 - x1); intercept = y1 - x1 * slope; /* Find perpendicular */ - p.x = (int16_t) ((point->pos.x + (point->pos.y - intercept) * slope) / - (slope * slope + 1)); - p.y = (int16_t) (slope * p.x + intercept); + p.x = (int16_t)((point->pos.x + (point->pos.y - intercept) * slope) / + (slope * slope + 1)); + p.y = (int16_t)(slope * p.x + intercept); } - if (is_on_line (p, line_pt_0->pos, line_pt_1->pos) && - (!same_point (p, line_pt_0->pos)) && (!same_point (p, line_pt_1->pos))) { + if (is_on_line(p, line_pt_0->pos, line_pt_1->pos) && + (!same_point(p, line_pt_0->pos)) && (!same_point(p, line_pt_1->pos))) { /* Intersection on line */ *near_pt = make_edgept(p.x, p.y, line_pt_1, line_pt_0); return true; - } else { /* Intersection not on line */ + } else { /* Intersection not on line */ *near_pt = closest(point, line_pt_0, line_pt_1); return false; } diff --git a/src/wordrec/outlines.h b/src/wordrec/outlines.h index 14dac384af..86947ee4ea 100644 --- a/src/wordrec/outlines.h +++ b/src/wordrec/outlines.h @@ -33,9 +33,9 @@ /*---------------------------------------------------------------------- C o n s t a n t s ----------------------------------------------------------------------*/ -#define LARGE_DISTANCE 100000 /* Used for closest dist */ -#define MIN_BLOB_SIZE 10 /* Big units */ -#define MAX_ASPECT_RATIO 2.5 /* Widest character */ +#define LARGE_DISTANCE 100000 /* Used for closest dist */ +#define MIN_BLOB_SIZE 10 /* Big units */ +#define MAX_ASPECT_RATIO 2.5 /* Widest character */ /*---------------------------------------------------------------------- M a c r o s @@ -46,9 +46,9 @@ * Return TRUE if the point values are the same. The parameters must * be of type POINT. **********************************************************************/ -#define same_point(p1,p2) \ - ((abs (p1.x - p2.x) < chop_same_distance) && \ - (abs (p1.y - p2.y) < chop_same_distance)) +#define same_point(p1, p2) \ + ((abs(p1.x - p2.x) < chop_same_distance) && \ + (abs(p1.y - p2.y) < chop_same_distance)) /********************************************************************** * dist_square @@ -57,9 +57,8 @@ * parameters must be of type POINT. **********************************************************************/ -#define dist_square(p1,p2) \ - ((p2.x - p1.x) * (p2.x - p1.x) + \ - (p2.y - p1.y) * (p2.y - p1.y)) +#define dist_square(p1, p2) \ + ((p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y)) /********************************************************************** * closest @@ -68,15 +67,13 @@ * question. All three parameters must be of type EDGEPT. **********************************************************************/ -#define closest(test_p,p1,p2) \ -(p1 ? \ - (p2 ? \ - ((dist_square (test_p->pos, p1->pos) < \ - dist_square (test_p->pos, p2->pos)) ? \ - p1 : \ - p2) : \ - p1) : \ - p2) +#define closest(test_p, p1, p2) \ + (p1 ? (p2 ? ((dist_square(test_p->pos, p1->pos) < \ + dist_square(test_p->pos, p2->pos)) \ + ? p1 \ + : p2) \ + : p1) \ + : p2) /********************************************************************** * edgept_dist @@ -84,8 +81,7 @@ * Return the distance (squared) between the two edge points. **********************************************************************/ -#define edgept_dist(p1,p2) \ -(dist_square ((p1)->pos, (p2)->pos)) +#define edgept_dist(p1, p2) (dist_square((p1)->pos, (p2)->pos)) /********************************************************************** * is_exterior_point @@ -94,11 +90,12 @@ * outline. **********************************************************************/ -#define is_exterior_point(edge,point) \ -(same_point (edge->prev->pos, point->pos) || \ - same_point (edge->next->pos, point->pos) || \ - (angle_change (edge->prev, edge, edge->next) - \ - angle_change (edge->prev, edge, point) > 20)) +#define is_exterior_point(edge, point) \ + (same_point(edge->prev->pos, point->pos) || \ + same_point(edge->next->pos, point->pos) || \ + (angle_change(edge->prev, edge, edge->next) - \ + angle_change(edge->prev, edge, point) > \ + 20)) /********************************************************************** * is_equal @@ -106,8 +103,7 @@ * Return TRUE if the POINTs are equal. **********************************************************************/ -#define is_equal(p1,p2) \ -(((p1).x == (p2).x) && ((p1).y == (p2).y)) +#define is_equal(p1, p2) (((p1).x == (p2).x) && ((p1).y == (p2).y)) /********************************************************************** * is_on_line @@ -117,9 +113,8 @@ * parameters must be of type POINT. **********************************************************************/ -#define is_on_line(p,p0,p1) \ - (within_range ((p).x, (p0).x, (p1).x) && \ - within_range ((p).y, (p0).y, (p1).y)) +#define is_on_line(p, p0, p1) \ + (within_range((p).x, (p0).x, (p1).x) && within_range((p).y, (p0).y, (p1).y)) /********************************************************************** * within_range @@ -128,7 +123,7 @@ * Return FALSE otherwise. **********************************************************************/ -#define within_range(x,x0,x1) \ - (((x0 <= x) && (x <= x1)) || ((x1 <= x) && (x <= x0))) +#define within_range(x, x0, x1) \ + (((x0 <= x) && (x <= x1)) || ((x1 <= x) && (x <= x0))) #endif diff --git a/src/wordrec/params_model.cpp b/src/wordrec/params_model.cpp index 90975af86c..e4f1ab5f69 100644 --- a/src/wordrec/params_model.cpp +++ b/src/wordrec/params_model.cpp @@ -45,18 +45,16 @@ void ParamsModel::Print() { } } -void ParamsModel::Copy(const ParamsModel &other_model) { +void ParamsModel::Copy(const ParamsModel& other_model) { for (int p = 0; p < PTRAIN_NUM_PASSES; ++p) { - weights_vec_[p] = other_model.weights_for_pass( - static_cast(p)); + weights_vec_[p] = other_model.weights_for_pass(static_cast(p)); } } // Given a (modifiable) line, parse out a key / value pair. // Return true on success. -bool ParamsModel::ParseLine(char *line, char** key, float *val) { - if (line[0] == '#') - return false; +bool ParamsModel::ParseLine(char* line, char** key, float* val) { + if (line[0] == '#') return false; int end_of_key = 0; while (line[end_of_key] && !isspace(line[end_of_key])) end_of_key++; if (!line[end_of_key]) { @@ -65,8 +63,7 @@ bool ParamsModel::ParseLine(char *line, char** key, float *val) { } line[end_of_key++] = 0; *key = line; - if (sscanf(line + end_of_key, " %f", val) != 1) - return false; + if (sscanf(line + end_of_key, " %f", val) != 1) return false; return true; } @@ -80,11 +77,11 @@ float ParamsModel::ComputeCost(const float features[]) const { for (int f = 0; f < PTRAIN_NUM_FEATURE_TYPES; ++f) { unnorm_score += weights_vec_[pass_][f] * features[f]; } - return ClipToRange(-unnorm_score / kScoreScaleFactor, - kMinFinalCost, kMaxFinalCost); + return ClipToRange(-unnorm_score / kScoreScaleFactor, kMinFinalCost, + kMaxFinalCost); } -bool ParamsModel::Equivalent(const ParamsModel &that) const { +bool ParamsModel::Equivalent(const ParamsModel& that) const { float epsilon = 0.0001; for (int p = 0; p < PTRAIN_NUM_PASSES; ++p) { if (weights_vec_[p].size() != that.weights_vec_[p].size()) return false; @@ -97,9 +94,7 @@ bool ParamsModel::Equivalent(const ParamsModel &that) const { return true; } -bool ParamsModel::LoadFromFile( - const char *lang, - const char *full_path) { +bool ParamsModel::LoadFromFile(const char* lang, const char* full_path) { TFile fp; if (!fp.Open(full_path, nullptr)) { tprintf("Error opening file %s\n", full_path); @@ -108,21 +103,20 @@ bool ParamsModel::LoadFromFile( return LoadFromFp(lang, &fp); } -bool ParamsModel::LoadFromFp(const char *lang, TFile *fp) { +bool ParamsModel::LoadFromFp(const char* lang, TFile* fp) { const int kMaxLineSize = 100; char line[kMaxLineSize]; BitVector present; present.Init(PTRAIN_NUM_FEATURE_TYPES); lang_ = lang; // Load weights for passes with adaption on. - GenericVector &weights = weights_vec_[pass_]; + GenericVector& weights = weights_vec_[pass_]; weights.init_to_size(PTRAIN_NUM_FEATURE_TYPES, 0.0); while (fp->FGets(line, kMaxLineSize) != nullptr) { - char *key = nullptr; + char* key = nullptr; float value; - if (!ParseLine(line, &key, &value)) - continue; + if (!ParseLine(line, &key, &value)) continue; int idx = ParamsTrainingFeatureByName(key); if (idx < 0) { tprintf("ParamsModel::Unknown parameter %s\n", key); @@ -146,21 +140,21 @@ bool ParamsModel::LoadFromFp(const char *lang, TFile *fp) { return complete; } -bool ParamsModel::SaveToFile(const char *full_path) const { - const GenericVector &weights = weights_vec_[pass_]; +bool ParamsModel::SaveToFile(const char* full_path) const { + const GenericVector& weights = weights_vec_[pass_]; if (weights.size() != PTRAIN_NUM_FEATURE_TYPES) { tprintf("Refusing to save ParamsModel that has not been initialized.\n"); return false; } - FILE *fp = fopen(full_path, "wb"); + FILE* fp = fopen(full_path, "wb"); if (!fp) { tprintf("Could not open %s for writing.\n", full_path); return false; } bool all_good = true; for (int i = 0; i < weights.size(); i++) { - if (fprintf(fp, "%s %f\n", kParamsTrainingFeatureTypeName[i], weights[i]) - < 0) { + if (fprintf(fp, "%s %f\n", kParamsTrainingFeatureTypeName[i], weights[i]) < + 0) { all_good = false; } } diff --git a/src/wordrec/params_model.h b/src/wordrec/params_model.h index 6414d2b810..896bd32cc9 100644 --- a/src/wordrec/params_model.h +++ b/src/wordrec/params_model.h @@ -38,8 +38,10 @@ class ParamsModel { }; ParamsModel() : pass_(PTRAIN_PASS1) {} - ParamsModel(const char *lang, const GenericVector &weights) : - lang_(lang), pass_(PTRAIN_PASS1) { weights_vec_[pass_] = weights; } + ParamsModel(const char* lang, const GenericVector& weights) + : lang_(lang), pass_(PTRAIN_PASS1) { + weights_vec_[pass_] = weights; + } inline bool Initialized() { return weights_vec_[pass_].size() == PTRAIN_NUM_FEATURE_TYPES; } @@ -50,29 +52,27 @@ class ParamsModel { for (int p = 0; p < PTRAIN_NUM_PASSES; ++p) weights_vec_[p].clear(); } // Copies the weights of the given params model. - void Copy(const ParamsModel &other_model); + void Copy(const ParamsModel& other_model); // Applies params model weights to the given features. // Assumes that features is an array of size PTRAIN_NUM_FEATURE_TYPES. float ComputeCost(const float features[]) const; - bool Equivalent(const ParamsModel &that) const; + bool Equivalent(const ParamsModel& that) const; // Returns true on success. - bool SaveToFile(const char *full_path) const; + bool SaveToFile(const char* full_path) const; // Returns true on success. - bool LoadFromFile(const char *lang, const char *full_path); - bool LoadFromFp(const char *lang, TFile *fp); + bool LoadFromFile(const char* lang, const char* full_path); + bool LoadFromFp(const char* lang, TFile* fp); - const GenericVector& weights() const { - return weights_vec_[pass_]; - } + const GenericVector& weights() const { return weights_vec_[pass_]; } const GenericVector& weights_for_pass(PassEnum pass) const { return weights_vec_[pass]; } void SetPass(PassEnum pass) { pass_ = pass; } private: - bool ParseLine(char *line, char **key, float *val); + bool ParseLine(char* line, char** key, float* val); STRING lang_; // Set to the current pass type and used to determine which set of weights diff --git a/src/wordrec/pieces.cpp b/src/wordrec/pieces.cpp index 8c2409fc26..fcc3ecb20a 100644 --- a/src/wordrec/pieces.cpp +++ b/src/wordrec/pieces.cpp @@ -53,15 +53,13 @@ using tesseract::ScoredFont; * the collection of small pieces un modified. **********************************************************************/ namespace tesseract { -BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector& seams, - int16_t start, - int16_t end, - const char* description, - TWERD *word, - BlamerBundle *blamer_bundle) { +BLOB_CHOICE_LIST* Wordrec::classify_piece(const GenericVector& seams, + int16_t start, int16_t end, + const char* description, TWERD* word, + BlamerBundle* blamer_bundle) { if (end > start) SEAM::JoinPieces(seams, word->blobs, start, end); - BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description, - White, blamer_bundle); + BLOB_CHOICE_LIST* choices = + classify_blob(word->blobs[start], description, White, blamer_bundle); // Set the matrix_cell_ entries in all the BLOB_CHOICES. BLOB_CHOICE_IT bc_it(choices); for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { @@ -73,25 +71,23 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector& seams, return (choices); } -template -int SortByUnicharID(const void *void1, const void *void2) { - const BLOB_CHOICE *p1 = *static_cast(void1); - const BLOB_CHOICE *p2 = *static_cast(void2); +template +int SortByUnicharID(const void* void1, const void* void2) { + const BLOB_CHOICE* p1 = *static_cast(void1); + const BLOB_CHOICE* p2 = *static_cast(void2); return p1->unichar_id() - p2->unichar_id(); } -template -int SortByRating(const void *void1, const void *void2) { - const BLOB_CHOICE *p1 = *static_cast(void1); - const BLOB_CHOICE *p2 = *static_cast(void2); +template +int SortByRating(const void* void1, const void* void2) { + const BLOB_CHOICE* p1 = *static_cast(void1); + const BLOB_CHOICE* p2 = *static_cast(void2); - if (p1->rating() < p2->rating()) - return 1; + if (p1->rating() < p2->rating()) return 1; return -1; } - /********************************************************************** * fill_filtered_fragment_list * @@ -102,23 +98,22 @@ int SortByRating(const void *void1, const void *void2) { * total number of pieces. The result will be appended to * filtered_choices. **********************************************************************/ -void Wordrec::fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, - int fragment_pos, - int num_frag_parts, - BLOB_CHOICE_LIST *filtered_choices) { +void Wordrec::fill_filtered_fragment_list(BLOB_CHOICE_LIST* choices, + int fragment_pos, int num_frag_parts, + BLOB_CHOICE_LIST* filtered_choices) { BLOB_CHOICE_IT filtered_choices_it(filtered_choices); BLOB_CHOICE_IT choices_it(choices); for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) { UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id(); - const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id); + const CHAR_FRAGMENT* frag = unicharset.get_fragment(choice_unichar_id); if (frag != nullptr && frag->get_pos() == fragment_pos && frag->get_total() == num_frag_parts) { // Recover the unichar_id of the unichar that this fragment is // a part of - BLOB_CHOICE *b = new BLOB_CHOICE(*choices_it.data()); + BLOB_CHOICE* b = new BLOB_CHOICE(*choices_it.data()); int original_unichar = unicharset.unichar_to_id(frag->get_unichar()); b->set_unichar_id(original_unichar); filtered_choices_it.add_to_end(b); @@ -128,7 +123,6 @@ void Wordrec::fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, filtered_choices->sort(SortByUnicharID); } - /********************************************************************** * merge_and_put_fragment_lists * @@ -137,18 +131,17 @@ void Wordrec::fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, **********************************************************************/ void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts, - BLOB_CHOICE_LIST *choice_lists, - MATRIX *ratings) { - BLOB_CHOICE_IT *choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts]; + BLOB_CHOICE_LIST* choice_lists, + MATRIX* ratings) { + BLOB_CHOICE_IT* choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts]; for (int i = 0; i < num_frag_parts; i++) { choice_lists_it[i].set_to_list(&choice_lists[i]); choice_lists_it[i].mark_cycle_pt(); } - BLOB_CHOICE_LIST *merged_choice = ratings->get(row, column); - if (merged_choice == nullptr) - merged_choice = new BLOB_CHOICE_LIST; + BLOB_CHOICE_LIST* merged_choice = ratings->get(row, column); + if (merged_choice == nullptr) merged_choice = new BLOB_CHOICE_LIST; bool end_of_list = false; BLOB_CHOICE_IT merged_choice_it(merged_choice); @@ -167,8 +160,7 @@ void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, // value greater than or equal to max_unichar_id for (int i = 0; i < num_frag_parts; i++) { UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id(); - while (!choice_lists_it[i].cycled_list() && - unichar_id < max_unichar_id) { + while (!choice_lists_it[i].cycled_list() && unichar_id < max_unichar_id) { choice_lists_it[i].forward(); unichar_id = choice_lists_it[i].data()->unichar_id(); } @@ -178,8 +170,7 @@ void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, } } - if (end_of_list) - break; + if (end_of_list) break; // Checks if the fragments are parts of the same character UNICHAR_ID first_unichar_id = choice_lists_it[0].data()->unichar_id(); @@ -213,8 +204,7 @@ void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, merged_rating += rating; choice_lists_it[i].forward(); - if (choice_lists_it[i].cycled_list()) - end_of_list = true; + if (choice_lists_it[i].cycled_list()) end_of_list = true; IntersectRange(choice_lists_it[i].data()->min_xheight(), choice_lists_it[i].data()->max_xheight(), &merged_min_xheight, &merged_max_xheight); @@ -227,9 +217,11 @@ void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, choice_lists_it[i].data()->fonts(); for (int f = 0; f < frag_fonts.size(); ++f) { int merged_f = 0; - for (merged_f = 0; merged_f < merged_fonts.size() && + for (merged_f = 0; + merged_f < merged_fonts.size() && merged_fonts[merged_f].fontinfo_id != frag_fonts[f].fontinfo_id; - ++merged_f) {} + ++merged_f) { + } if (merged_f == merged_fonts.size()) { merged_fonts.push_back(frag_fonts[f]); } else if (merged_fonts[merged_f].score > frag_fonts[f].score) { @@ -239,31 +231,25 @@ void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, } float merged_yshift = positive_yshift != 0 - ? (negative_yshift != 0 ? 0 : positive_yshift) - : negative_yshift; - BLOB_CHOICE* choice = new BLOB_CHOICE(merged_unichar_id, - merged_rating, - merged_certainty, - merged_script_id, - merged_min_xheight, - merged_max_xheight, - merged_yshift, - classifier); + ? (negative_yshift != 0 ? 0 : positive_yshift) + : negative_yshift; + BLOB_CHOICE* choice = new BLOB_CHOICE( + merged_unichar_id, merged_rating, merged_certainty, merged_script_id, + merged_min_xheight, merged_max_xheight, merged_yshift, classifier); choice->set_fonts(merged_fonts); merged_choice_it.add_to_end(choice); } } if (classify_debug_level) - print_ratings_list("Merged Fragments", merged_choice, - unicharset); + print_ratings_list("Merged Fragments", merged_choice, unicharset); if (merged_choice->empty()) delete merged_choice; else ratings->put(row, column, merged_choice); - delete [] choice_lists_it; + delete[] choice_lists_it; } /********************************************************************** @@ -280,8 +266,8 @@ void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, **********************************************************************/ void Wordrec::get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, - int16_t num_blobs, MATRIX *ratings, - BLOB_CHOICE_LIST *choice_lists) { + int16_t num_blobs, MATRIX* ratings, + BLOB_CHOICE_LIST* choice_lists) { if (current_frag == num_frag_parts) { merge_and_put_fragment_lists(start, current_row - 1, num_frag_parts, choice_lists, ratings); @@ -289,9 +275,8 @@ void Wordrec::get_fragment_lists(int16_t current_frag, int16_t current_row, } for (int16_t x = current_row; x < num_blobs; x++) { - BLOB_CHOICE_LIST *choices = ratings->get(current_row, x); - if (choices == nullptr) - continue; + BLOB_CHOICE_LIST* choices = ratings->get(current_row, x); + if (choices == nullptr) continue; fill_filtered_fragment_list(choices, current_frag, num_frag_parts, &choice_lists[current_frag]); @@ -303,41 +288,38 @@ void Wordrec::get_fragment_lists(int16_t current_frag, int16_t current_row, } } - /********************************************************************** * merge_fragments * * Try to merge fragments in the ratings matrix and put the result in * the corresponding row and column **********************************************************************/ -void Wordrec::merge_fragments(MATRIX *ratings, int16_t num_blobs) { +void Wordrec::merge_fragments(MATRIX* ratings, int16_t num_blobs) { BLOB_CHOICE_LIST choice_lists[CHAR_FRAGMENT::kMaxChunks]; for (int16_t start = 0; start < num_blobs; start++) { for (int frag_parts = 2; frag_parts <= CHAR_FRAGMENT::kMaxChunks; frag_parts++) { - get_fragment_lists(0, start, start, frag_parts, num_blobs, - ratings, choice_lists); + get_fragment_lists(0, start, start, frag_parts, num_blobs, ratings, + choice_lists); } } // Delete fragments from the rating matrix for (int16_t x = 0; x < num_blobs; x++) { for (int16_t y = x; y < num_blobs; y++) { - BLOB_CHOICE_LIST *choices = ratings->get(x, y); + BLOB_CHOICE_LIST* choices = ratings->get(x, y); if (choices != nullptr) { BLOB_CHOICE_IT choices_it(choices); for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) { UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id(); - const CHAR_FRAGMENT *frag = + const CHAR_FRAGMENT* frag = unicharset.get_fragment(choice_unichar_id); - if (frag != nullptr) - delete choices_it.extract(); + if (frag != nullptr) delete choices_it.extract(); } } } } } - } // namespace tesseract diff --git a/src/wordrec/plotedges.cpp b/src/wordrec/plotedges.cpp index 43ac0d65f3..7d98d9b298 100644 --- a/src/wordrec/plotedges.cpp +++ b/src/wordrec/plotedges.cpp @@ -40,7 +40,7 @@ /*---------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ -ScrollView *edge_window = nullptr; +ScrollView* edge_window = nullptr; /*---------------------------------------------------------------------- F u n c t i o n s @@ -51,49 +51,46 @@ ScrollView *edge_window = nullptr; * Macro to display edge points in a window. **********************************************************************/ void display_edgepts(LIST outlines) { - void *window; + void* window; /* Set up window */ if (edge_window == nullptr) { - edge_window = c_create_window ("Edges", 750, 150, - 400, 128, -400.0, 400.0, 0.0, 256.0); - } - else { + edge_window = + c_create_window("Edges", 750, 150, 400, 128, -400.0, 400.0, 0.0, 256.0); + } else { c_clear_window(edge_window); } /* Render the outlines */ window = edge_window; /* Reclaim old memory */ iterate(outlines) { - render_edgepts (window, (EDGEPT *) first_node (outlines), White); + render_edgepts(window, (EDGEPT*)first_node(outlines), White); } } - /********************************************************************** * draw_blob_edges * * Display the edges of this blob in the edges window. **********************************************************************/ -void draw_blob_edges(TBLOB *blob) { - TESSLINE *ol; +void draw_blob_edges(TBLOB* blob) { + TESSLINE* ol; LIST edge_list = NIL_LIST; if (wordrec_display_splits) { for (ol = blob->outlines; ol != nullptr; ol = ol->next) - push_on (edge_list, ol->loop); + push_on(edge_list, ol->loop); display_edgepts(edge_list); destroy(edge_list); } } - /********************************************************************** * mark_outline * * Make a mark on the edges window at a particular location. **********************************************************************/ -void mark_outline(EDGEPT *edgept) { /* Start of point list */ - void *window = edge_window; +void mark_outline(EDGEPT* edgept) { /* Start of point list */ + void* window = edge_window; float x = edgept->pos.x; float y = edgept->pos.y; diff --git a/src/wordrec/plotedges.h b/src/wordrec/plotedges.h index 91521de734..892871751d 100644 --- a/src/wordrec/plotedges.h +++ b/src/wordrec/plotedges.h @@ -25,14 +25,14 @@ #ifndef PLOTEDGES_H #define PLOTEDGES_H +#include "blobs.h" #include "callcpp.h" #include "oldlist.h" -#include "blobs.h" /*---------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ -extern ScrollView *edge_window; /* Window for edges */ +extern ScrollView* edge_window; /* Window for edges */ /*---------------------------------------------------------------------- Macros @@ -42,11 +42,10 @@ extern ScrollView *edge_window; /* Window for edges */ * * Refresh the display of the edge window. **********************************************************************/ -#define update_edge_window() \ -if (wordrec_display_splits) { \ - c_make_current (edge_window); \ -} \ - +#define update_edge_window() \ + if (wordrec_display_splits) { \ + c_make_current(edge_window); \ + } /********************************************************************** * edge_window_wait @@ -54,16 +53,16 @@ if (wordrec_display_splits) { \ * Wait for someone to click in the edges window. **********************************************************************/ -#define edge_window_wait() \ -if (wordrec_display_splits) window_wait (edge_window) +#define edge_window_wait() \ + if (wordrec_display_splits) window_wait(edge_window) /*---------------------------------------------------------------------- F u n c t i o n s ---------------------------------------------------------------------*/ void display_edgepts(LIST outlines); -void draw_blob_edges(TBLOB *blob); +void draw_blob_edges(TBLOB* blob); -void mark_outline(EDGEPT *edgept); +void mark_outline(EDGEPT* edgept); #endif diff --git a/src/wordrec/render.cpp b/src/wordrec/render.cpp index 20529fe599..72b212b213 100644 --- a/src/wordrec/render.cpp +++ b/src/wordrec/render.cpp @@ -40,11 +40,9 @@ /*---------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ -ScrollView *blob_window = nullptr; +ScrollView* blob_window = nullptr; -C_COL color_list[] = { - Red, Cyan, Yellow, Blue, Green, White -}; +C_COL color_list[] = {Red, Cyan, Yellow, Blue, Green, White}; BOOL_VAR(wordrec_display_all_blobs, 0, "Display Blobs"); @@ -61,13 +59,12 @@ BOOL_VAR(wordrec_blob_pause, 0, "Blob pause"); * * Macro to display blob in a window. **********************************************************************/ -void display_blob(TBLOB *blob, C_COL color) { +void display_blob(TBLOB* blob, C_COL color) { /* Size of drawable */ if (blob_window == nullptr) { - blob_window = c_create_window ("Blobs", 520, 10, - 500, 256, -1000.0, 1000.0, 0.0, 256.0); - } - else { + blob_window = c_create_window("Blobs", 520, 10, 500, 256, -1000.0, 1000.0, + 0.0, 256.0); + } else { c_clear_window(blob_window); } @@ -80,28 +77,25 @@ void display_blob(TBLOB *blob, C_COL color) { * Create a list of line segments that represent the expanded outline * that was supplied as input. **********************************************************************/ -void render_blob(void *window, TBLOB *blob, C_COL color) { +void render_blob(void* window, TBLOB* blob, C_COL color) { /* No outline */ - if (!blob) - return; + if (!blob) return; - render_outline (window, blob->outlines, color); + render_outline(window, blob->outlines, color); } - /********************************************************************** * render_edgepts * * Create a list of line segments that represent the expanded outline * that was supplied as input. **********************************************************************/ -void render_edgepts(void *window, EDGEPT *edgept, C_COL color) { - if (!edgept) - return; +void render_edgepts(void* window, EDGEPT* edgept, C_COL color) { + if (!edgept) return; float x = edgept->pos.x; float y = edgept->pos.y; - EDGEPT *this_edge = edgept; + EDGEPT* this_edge = edgept; c_line_color_index(window, color); c_move(window, x, y); @@ -110,28 +104,22 @@ void render_edgepts(void *window, EDGEPT *edgept, C_COL color) { x = this_edge->pos.x; y = this_edge->pos.y; c_draw(window, x, y); - } - while (edgept != this_edge); + } while (edgept != this_edge); } - /********************************************************************** * render_outline * * Create a list of line segments that represent the expanded outline * that was supplied as input. **********************************************************************/ -void render_outline(void *window, - TESSLINE *outline, - C_COL color) { +void render_outline(void* window, TESSLINE* outline, C_COL color) { /* No outline */ - if (!outline) - return; + if (!outline) return; /* Draw Compact outline */ - if (outline->loop) - render_edgepts (window, outline->loop, color); + if (outline->loop) render_edgepts(window, outline->loop, color); /* Add on next outlines */ - render_outline (window, outline->next, color); + render_outline(window, outline->next, color); } #endif // GRAPHICS_DISABLED diff --git a/src/wordrec/render.h b/src/wordrec/render.h index 3faa398d42..272074b850 100644 --- a/src/wordrec/render.h +++ b/src/wordrec/render.h @@ -25,15 +25,15 @@ #ifndef RENDER_H #define RENDER_H -#include "host.h" -#include "callcpp.h" #include "blobs.h" +#include "callcpp.h" +#include "host.h" /*---------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ -extern ScrollView *blob_window; /* Window for blobs */ -extern C_COL color_list[]; /* Colors for outlines */ +extern ScrollView* blob_window; /* Window for blobs */ +extern C_COL color_list[]; /* Colors for outlines */ extern BOOL_VAR_H(wordrec_display_all_blobs, 0, "Display Blobs"); @@ -46,14 +46,12 @@ extern BOOL_VAR_H(wordrec_blob_pause, 0, "Blob pause"); /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -void display_blob(TBLOB *blob, C_COL color); +void display_blob(TBLOB* blob, C_COL color); -void render_blob(void *window, TBLOB *blob, C_COL color); +void render_blob(void* window, TBLOB* blob, C_COL color); -void render_edgepts(void *window, EDGEPT *edgept, C_COL color); +void render_edgepts(void* window, EDGEPT* edgept, C_COL color); -void render_outline(void *window, - TESSLINE *outline, - C_COL color); +void render_outline(void* window, TESSLINE* outline, C_COL color); #endif diff --git a/src/wordrec/segsearch.cpp b/src/wordrec/segsearch.cpp index 21c23a181c..60cf096dbe 100644 --- a/src/wordrec/segsearch.cpp +++ b/src/wordrec/segsearch.cpp @@ -21,9 +21,9 @@ #include "associate.h" #include "language_model.h" +#include "lm_pain_points.h" #include "matrix.h" #include "params.h" -#include "lm_pain_points.h" #include "ratngs.h" #include @@ -39,10 +39,9 @@ void Wordrec::DoSegSearch(WERD_RES* word_res) { void Wordrec::SegSearch(WERD_RES* word_res, BestChoiceBundle* best_choice_bundle, BlamerBundle* blamer_bundle) { - LMPainPoints pain_points(segsearch_max_pain_points, - segsearch_max_char_wh_ratio, - assume_fixed_pitch_char_segment, - &getDict(), segsearch_debug_level); + LMPainPoints pain_points( + segsearch_max_pain_points, segsearch_max_char_wh_ratio, + assume_fixed_pitch_char_segment, &getDict(), segsearch_debug_level); // Compute scaling factor that will help us recover blob outline length // from classifier rating and certainty for the blob. float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale; @@ -69,17 +68,17 @@ void Wordrec::SegSearch(WERD_RES* word_res, int num_futile_classifications = 0; STRING blamer_debug; while (wordrec_enable_assoc && - (!SegSearchDone(num_futile_classifications) || + (!SegSearchDone(num_futile_classifications) || (blamer_bundle != nullptr && - blamer_bundle->GuidedSegsearchStillGoing()))) { + blamer_bundle->GuidedSegsearchStillGoing()))) { // Get the next valid "pain point". bool found_nothing = true; LMPainPointsType pp_type; while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) != - LM_PPTYPE_NUM) { + LM_PPTYPE_NUM) { if (!pain_point.Valid(*word_res->ratings)) { - word_res->ratings->IncreaseBandSize( - pain_point.row - pain_point.col + 1); + word_res->ratings->IncreaseBandSize(pain_point.row - pain_point.col + + 1); } if (pain_point.Valid(*word_res->ratings) && !word_res->ratings->Classified(pain_point.col, pain_point.row, @@ -96,9 +95,8 @@ void Wordrec::SegSearch(WERD_RES* word_res, LMPainPoints::PainPointDescription(pp_type), &pending, word_res, &pain_points, blamer_bundle); - UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending, - word_res, &pain_points, best_choice_bundle, - blamer_bundle); + UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending, word_res, + &pain_points, best_choice_bundle, blamer_bundle); if (!best_choice_bundle->updated) ++num_futile_classifications; if (segsearch_debug_level > 0) { @@ -109,16 +107,15 @@ void Wordrec::SegSearch(WERD_RES* word_res, // See if it's time to terminate SegSearch or time for starting a guided // search for the true path to find the blame for the incorrect best_choice. - if (SegSearchDone(num_futile_classifications) && - blamer_bundle != nullptr && + if (SegSearchDone(num_futile_classifications) && blamer_bundle != nullptr && blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) { InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle, &blamer_debug); } } // end while loop exploring alternative paths if (blamer_bundle != nullptr) { - blamer_bundle->FinishSegSearch(word_res->best_choice, - wordrec_debug_blamer, &blamer_debug); + blamer_bundle->FinishSegSearch(word_res->best_choice, wordrec_debug_blamer, + &blamer_debug); } if (segsearch_debug_level > 0) { @@ -170,26 +167,24 @@ void Wordrec::InitialSegSearch(WERD_RES* word_res, LMPainPoints* pain_points, // Search the ratings matrix for the initial best path. (*pending)[0].SetColumnClassified(); - UpdateSegSearchNodes(rating_cert_scale, 0, pending, word_res, - pain_points, best_choice_bundle, blamer_bundle); + UpdateSegSearchNodes(rating_cert_scale, 0, pending, word_res, pain_points, + best_choice_bundle, blamer_bundle); } -void Wordrec::UpdateSegSearchNodes( - float rating_cert_scale, - int starting_col, - GenericVector* pending, - WERD_RES *word_res, - LMPainPoints *pain_points, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle) { - MATRIX *ratings = word_res->ratings; +void Wordrec::UpdateSegSearchNodes(float rating_cert_scale, int starting_col, + GenericVector* pending, + WERD_RES* word_res, + LMPainPoints* pain_points, + BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle) { + MATRIX* ratings = word_res->ratings; ASSERT_HOST(ratings->dimension() == pending->size()); ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size()); for (int col = starting_col; col < ratings->dimension(); ++col) { if (!(*pending)[col].WorkToDo()) continue; int first_row = col; - int last_row = std::min(ratings->dimension() - 1, - col + ratings->bandwidth() - 1); + int last_row = + std::min(ratings->dimension() - 1, col + ratings->bandwidth() - 1); if ((*pending)[col].SingleRow() >= 0) { first_row = last_row = (*pending)[col].SingleRow(); } @@ -201,8 +196,8 @@ void Wordrec::UpdateSegSearchNodes( // Iterate over the pending list for this column. for (int row = first_row; row <= last_row; ++row) { // Update language model state of this child+parent pair. - BLOB_CHOICE_LIST *current_node = ratings->get(col, row); - LanguageModelState *parent_node = + BLOB_CHOICE_LIST* current_node = ratings->get(col, row); + LanguageModelState* parent_node = col == 0 ? nullptr : best_choice_bundle->beam[col - 1]; if (current_node != nullptr && language_model_->UpdateState((*pending)[col].IsRowJustClassified(row), @@ -217,8 +212,8 @@ void Wordrec::UpdateSegSearchNodes( tprintf("Added child col=%d to pending\n", row + 1); } } // end if UpdateState. - } // end for row. - } // end for col. + } // end for row. + } // end for col. if (best_choice_bundle->best_vse != nullptr) { ASSERT_HOST(word_res->StatesAllValid()); if (best_choice_bundle->best_vse->updated) { @@ -234,8 +229,8 @@ void Wordrec::UpdateSegSearchNodes( // all pendings. for (int col = 0; col < pending->size(); ++col) { (*pending)[col].Clear(); - ViterbiStateEntry_IT - vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries); + ViterbiStateEntry_IT vse_it( + &best_choice_bundle->beam[col]->viterbi_state_entries); for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) { vse_it.data()->updated = false; } @@ -243,28 +238,26 @@ void Wordrec::UpdateSegSearchNodes( } void Wordrec::ProcessSegSearchPainPoint( - float pain_point_priority, - const MATRIX_COORD &pain_point, const char* pain_point_type, - GenericVector* pending, WERD_RES *word_res, - LMPainPoints *pain_points, BlamerBundle *blamer_bundle) { + float pain_point_priority, const MATRIX_COORD& pain_point, + const char* pain_point_type, GenericVector* pending, + WERD_RES* word_res, LMPainPoints* pain_points, + BlamerBundle* blamer_bundle) { if (segsearch_debug_level > 0) { tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n", - pain_point_type, pain_point_priority, - pain_point.col, pain_point.row); + pain_point_type, pain_point_priority, pain_point.col, + pain_point.row); } ASSERT_HOST(pain_points != nullptr); - MATRIX *ratings = word_res->ratings; + MATRIX* ratings = word_res->ratings; // Classify blob [pain_point.col pain_point.row] if (!pain_point.Valid(*ratings)) { ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col); } ASSERT_HOST(pain_point.Valid(*ratings)); - BLOB_CHOICE_LIST *classified = classify_piece(word_res->seam_array, - pain_point.col, pain_point.row, - pain_point_type, - word_res->chopped_word, - blamer_bundle); - BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row); + BLOB_CHOICE_LIST* classified = + classify_piece(word_res->seam_array, pain_point.col, pain_point.row, + pain_point_type, word_res->chopped_word, blamer_bundle); + BLOB_CHOICE_LIST* lst = ratings->get(pain_point.col, pain_point.row); if (lst == nullptr) { ratings->put(pain_point.col, pain_point.row, classified); } else { @@ -289,14 +282,14 @@ void Wordrec::ProcessSegSearchPainPoint( // with its left and right neighbors. if (classified != nullptr && !classified->empty()) { if (pain_point.col > 0) { - pain_points->GeneratePainPoint( - pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0, - true, segsearch_max_char_wh_ratio, word_res); + pain_points->GeneratePainPoint(pain_point.col - 1, pain_point.row, + LM_PPTYPE_SHAPE, 0.0, true, + segsearch_max_char_wh_ratio, word_res); } if (pain_point.row + 1 < ratings->dimension()) { - pain_points->GeneratePainPoint( - pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0, - true, segsearch_max_char_wh_ratio, word_res); + pain_points->GeneratePainPoint(pain_point.col, pain_point.row + 1, + LM_PPTYPE_SHAPE, 0.0, true, + segsearch_max_char_wh_ratio, word_res); } } (*pending)[pain_point.col].SetBlobClassified(pain_point.row); @@ -318,14 +311,13 @@ void Wordrec::ResetNGramSearch(WERD_RES* word_res, best_choice_bundle->best_vse = nullptr; // Clear out all existing pendings and add a new one for the first column. (*pending)[0].SetColumnClassified(); - for (int i = 1; i < pending->size(); ++i) - (*pending)[i].Clear(); + for (int i = 1; i < pending->size(); ++i) (*pending)[i].Clear(); } -void Wordrec::InitBlamerForSegSearch(WERD_RES *word_res, - LMPainPoints *pain_points, - BlamerBundle *blamer_bundle, - STRING *blamer_debug) { +void Wordrec::InitBlamerForSegSearch(WERD_RES* word_res, + LMPainPoints* pain_points, + BlamerBundle* blamer_bundle, + STRING* blamer_debug) { pain_points->Clear(); // Clear pain points heap. TessResultCallback2* pp_cb = NewPermanentTessCallback( pain_points, &LMPainPoints::GenerateForBlamer, diff --git a/src/wordrec/tface.cpp b/src/wordrec/tface.cpp index 5b2083def9..76cd9e7438 100644 --- a/src/wordrec/tface.cpp +++ b/src/wordrec/tface.cpp @@ -21,19 +21,18 @@ #include "chop.h" #include "chopper.h" #include "danerror.h" +#include "featdefs.h" #include "globals.h" #include "gradechop.h" #include "pageres.h" -#include "wordrec.h" -#include "featdefs.h" #include "params_model.h" +#include "wordrec.h" #include #ifdef __UNIX__ #include #endif - namespace tesseract { /** @@ -43,9 +42,9 @@ namespace tesseract { * init_permute determines whether to initialize the permute functions * and Dawg models. */ -void Wordrec::program_editup(const char *textbase, - TessdataManager *init_classifier, - TessdataManager *init_dict) { +void Wordrec::program_editup(const char* textbase, + TessdataManager* init_classifier, + TessdataManager* init_dict) { if (textbase != nullptr) imagefile = textbase; InitFeatureDefs(&feature_defs_); InitAdaptiveClassifier(init_classifier); @@ -63,12 +62,11 @@ void Wordrec::program_editup(const char *textbase, * Cleanup and exit the recog program. */ int Wordrec::end_recog() { - program_editdown (0); + program_editdown(0); return (0); } - /** * @name program_editdown * @@ -80,7 +78,6 @@ void Wordrec::program_editdown(int32_t elasped_time) { getDict().End(); } - /** * @name set_pass1 * @@ -92,7 +89,6 @@ void Wordrec::set_pass1() { SettupPass1(); } - /** * @name set_pass2 * @@ -104,13 +100,12 @@ void Wordrec::set_pass2() { SettupPass2(); } - /** * @name cc_recog * * Recognize a word. */ -void Wordrec::cc_recog(WERD_RES *word) { +void Wordrec::cc_recog(WERD_RES* word) { getDict().reset_hyphen_vars(word->word->flag(W_EOL)); chop_word_main(word); word->DebugWordChoices(getDict().stopper_debug_level >= 1, @@ -118,14 +113,13 @@ void Wordrec::cc_recog(WERD_RES *word) { ASSERT_HOST(word->StatesAllValid()); } - /** * @name dict_word() * * Test the dictionaries, returning NO_PERM (0) if not found, or one * of the PermuterType values if found, according to the dictionary. */ -int Wordrec::dict_word(const WERD_CHOICE &word) { +int Wordrec::dict_word(const WERD_CHOICE& word) { return getDict().valid_word(word); } @@ -135,13 +129,13 @@ int Wordrec::dict_word(const WERD_CHOICE &word) { * Called from Tess with a blob in tess form. * The blob may need rotating to the correct orientation for classification. */ -BLOB_CHOICE_LIST *Wordrec::call_matcher(TBLOB *tessblob) { +BLOB_CHOICE_LIST* Wordrec::call_matcher(TBLOB* tessblob) { // Rotate the blob for classification if necessary. TBLOB* rotated_blob = tessblob->ClassifyNormalizeIfNeeded(); if (rotated_blob == nullptr) { rotated_blob = tessblob; } - BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result + BLOB_CHOICE_LIST* ratings = new BLOB_CHOICE_LIST(); // matcher result AdaptiveClassifier(rotated_blob, ratings); if (rotated_blob != tessblob) { delete rotated_blob; @@ -149,5 +143,4 @@ BLOB_CHOICE_LIST *Wordrec::call_matcher(TBLOB *tessblob) { return ratings; } - } // namespace tesseract diff --git a/src/wordrec/wordclass.cpp b/src/wordrec/wordclass.cpp index 4f72dc1dee..e6cb4066dc 100644 --- a/src/wordrec/wordclass.cpp +++ b/src/wordrec/wordclass.cpp @@ -29,8 +29,8 @@ #include #include "associate.h" -#include "render.h" #include "callcpp.h" +#include "render.h" #include "wordrec.h" // Include automatically generated configuration file if running autoconf. @@ -53,12 +53,11 @@ namespace tesseract { * @param string The string to display in ScrollView * @param color The colour to use when displayed with ScrollView */ -BLOB_CHOICE_LIST *Wordrec::classify_blob(TBLOB *blob, - const char *string, C_COL color, - BlamerBundle *blamer_bundle) { +BLOB_CHOICE_LIST* Wordrec::classify_blob(TBLOB* blob, const char* string, + C_COL color, + BlamerBundle* blamer_bundle) { #ifndef GRAPHICS_DISABLED - if (wordrec_display_all_blobs) - display_blob(blob, color); + if (wordrec_display_all_blobs) display_blob(blob, color); #endif // TODO(rays) collapse with call_matcher and move all to wordrec.cpp. BLOB_CHOICE_LIST* choices = call_matcher(blob); @@ -67,19 +66,17 @@ BLOB_CHOICE_LIST *Wordrec::classify_blob(TBLOB *blob, // blame character classifier for incorrect answer. if (blamer_bundle != nullptr) { blamer_bundle->BlameClassifier(getDict().getUnicharset(), - blob->bounding_box(), - *choices, + blob->bounding_box(), *choices, wordrec_debug_blamer); } - #ifndef GRAPHICS_DISABLED +#ifndef GRAPHICS_DISABLED if (classify_debug_level && string) print_ratings_list(string, choices, getDict().getUnicharset()); - if (wordrec_blob_pause) - window_wait(blob_window); + if (wordrec_blob_pause) window_wait(blob_window); #endif return choices; } -} // namespace tesseract; +} // namespace tesseract diff --git a/src/wordrec/wordrec.cpp b/src/wordrec/wordrec.cpp index 03beedc098..82caad45f1 100644 --- a/src/wordrec/wordrec.cpp +++ b/src/wordrec/wordrec.cpp @@ -21,107 +21,94 @@ #include "language_model.h" #include "params.h" - namespace tesseract { -Wordrec::Wordrec() : - // control parameters - BOOL_MEMBER(merge_fragments_in_matrix, TRUE, - "Merge the fragments in the ratings matrix and delete them" - " after merging", params()), - BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information", - params()), - BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable", - params()), - BOOL_MEMBER(force_word_assoc, FALSE, - "force associator to run regardless of what enable_assoc is." - " This is used for CJK where component grouping is necessary.", - CCUtil::params()), - double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state", - params()), - BOOL_MEMBER(fragments_guide_chopper, FALSE, - "Use information from fragments to guide chopping process", - params()), - INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", - params()), - double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", - params()), - INT_MEMBER(chop_debug, 0, "Chop debug", - params()), - BOOL_MEMBER(chop_enable, 1, "Chop enable", - params()), - BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", - params()), - INT_MEMBER(chop_split_length, 10000, "Split Length", - params()), - INT_MEMBER(chop_same_distance, 2, "Same distance", - params()), - INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", - params()), - INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", - params()), - BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()), - INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", - params()), - INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", - params()), - double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", - params()), - double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", - params()), - double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", - params()), - INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " - "above which we don't care that a chop is not near the center.", - params()), - double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", - params()), - double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", - params()), - double_MEMBER(chop_ok_split, 100.0, "OK split limit", - params()), - double_MEMBER(chop_good_split, 50.0, "Good split limit", - params()), - INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", - params()), - INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug", - params()), - BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE, - "include fixed-pitch heuristics in char segmentation", - params()), - INT_MEMBER(wordrec_debug_level, 0, - "Debug level for wordrec", params()), - INT_MEMBER(wordrec_max_join_chunks, 4, - "Max number of broken pieces to associate", params()), - BOOL_MEMBER(wordrec_skip_no_truth_words, false, - "Only run OCR for words that had truth recorded in BlamerBundle", - params()), - BOOL_MEMBER(wordrec_debug_blamer, false, - "Print blamer debug messages", params()), - BOOL_MEMBER(wordrec_run_blamer, false, - "Try to set the blame for errors", params()), - INT_MEMBER(segsearch_debug_level, 0, - "SegSearch debug level", params()), - INT_MEMBER(segsearch_max_pain_points, 2000, - "Maximum number of pain points stored in the queue", - params()), - INT_MEMBER(segsearch_max_futile_classifications, 20, - "Maximum number of pain point classifications per chunk that" - " did not result in finding a better word choice.", - params()), - double_MEMBER(segsearch_max_char_wh_ratio, 2.0, - "Maximum character width-to-height ratio", params()), - BOOL_MEMBER(save_alt_choices, true, - "Save alternative paths found during chopping" - " and segmentation search", - params()) { +Wordrec::Wordrec() + : // control parameters + BOOL_MEMBER(merge_fragments_in_matrix, TRUE, + "Merge the fragments in the ratings matrix and delete them" + " after merging", + params()), + BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information", + params()), + BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable", params()), + BOOL_MEMBER( + force_word_assoc, FALSE, + "force associator to run regardless of what enable_assoc is." + " This is used for CJK where component grouping is necessary.", + CCUtil::params()), + double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state", + params()), + BOOL_MEMBER(fragments_guide_chopper, FALSE, + "Use information from fragments to guide chopping process", + params()), + INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", + params()), + double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", + params()), + INT_MEMBER(chop_debug, 0, "Chop debug", params()), + BOOL_MEMBER(chop_enable, 1, "Chop enable", params()), + BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", params()), + INT_MEMBER(chop_split_length, 10000, "Split Length", params()), + INT_MEMBER(chop_same_distance, 2, "Same distance", params()), + INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", + params()), + INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", + params()), + BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()), + INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", params()), + INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", params()), + double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", + params()), + double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", + params()), + double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", + params()), + INT_MEMBER( + chop_centered_maxwidth, 90, + "Width of (smaller) chopped blobs " + "above which we don't care that a chop is not near the center.", + params()), + double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", + params()), + double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", + params()), + double_MEMBER(chop_ok_split, 100.0, "OK split limit", params()), + double_MEMBER(chop_good_split, 50.0, "Good split limit", params()), + INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", params()), + INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug", + params()), + BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE, + "include fixed-pitch heuristics in char segmentation", + params()), + INT_MEMBER(wordrec_debug_level, 0, "Debug level for wordrec", params()), + INT_MEMBER(wordrec_max_join_chunks, 4, + "Max number of broken pieces to associate", params()), + BOOL_MEMBER( + wordrec_skip_no_truth_words, false, + "Only run OCR for words that had truth recorded in BlamerBundle", + params()), + BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", + params()), + BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", + params()), + INT_MEMBER(segsearch_debug_level, 0, "SegSearch debug level", params()), + INT_MEMBER(segsearch_max_pain_points, 2000, + "Maximum number of pain points stored in the queue", params()), + INT_MEMBER(segsearch_max_futile_classifications, 20, + "Maximum number of pain point classifications per chunk that" + " did not result in finding a better word choice.", + params()), + double_MEMBER(segsearch_max_char_wh_ratio, 2.0, + "Maximum character width-to-height ratio", params()), + BOOL_MEMBER(save_alt_choices, true, + "Save alternative paths found during chopping" + " and segmentation search", + params()) { prev_word_best_choice_ = nullptr; - language_model_ = new LanguageModel(&get_fontinfo_table(), - &(getDict())); + language_model_ = new LanguageModel(&get_fontinfo_table(), &(getDict())); fill_lattice_ = nullptr; } -Wordrec::~Wordrec() { - delete language_model_; -} +Wordrec::~Wordrec() { delete language_model_; } } // namespace tesseract diff --git a/src/wordrec/wordrec.h b/src/wordrec/wordrec.h index 7c79defa32..ba07428050 100644 --- a/src/wordrec/wordrec.h +++ b/src/wordrec/wordrec.h @@ -20,15 +20,15 @@ #define TESSERACT_WORDREC_WORDREC_H_ #include "associate.h" +#include "callcpp.h" #include "classify.h" #include "dict.h" +#include "findseam.h" +#include "gradechop.h" #include "language_model.h" -#include "ratngs.h" #include "matrix.h" -#include "gradechop.h" +#include "ratngs.h" #include "seam.h" -#include "findseam.h" -#include "callcpp.h" class WERD_RES; @@ -43,27 +43,21 @@ namespace tesseract { class SegSearchPending { public: SegSearchPending() - : classified_row_(-1), - revisit_whole_column_(false), - column_classified_(false) {} + : classified_row_(-1), + revisit_whole_column_(false), + column_classified_(false) {} // Marks the whole column as just classified. Used to start a search on // a newly initialized ratings matrix. - void SetColumnClassified() { - column_classified_ = true; - } + void SetColumnClassified() { column_classified_ = true; } // Marks the matrix entry at the given row as just classified. // Used after classifying a new matrix cell. // Additional to, not overriding a previous RevisitWholeColumn. - void SetBlobClassified(int row) { - classified_row_ = row; - } + void SetBlobClassified(int row) { classified_row_ = row; } // Marks the whole column as needing work, but not just classified. // Used when the parent vse list is updated. // Additional to, not overriding a previous SetBlobClassified. - void RevisitWholeColumn() { - revisit_whole_column_ = true; - } + void RevisitWholeColumn() { revisit_whole_column_ = true; } // Clears *this to indicate no work to do. void Clear() { @@ -102,24 +96,21 @@ class SegSearchPending { bool column_classified_; }; - /* ccmain/tstruct.cpp *********************************************************/ -class FRAGMENT:public ELIST_LINK -{ - public: - FRAGMENT() { //constructor - } - FRAGMENT(EDGEPT *head_pt, //start - EDGEPT *tail_pt); //end - - ICOORD head; //coords of start - ICOORD tail; //coords of end - EDGEPT *headpt; //start point - EDGEPT *tailpt; //end point +class FRAGMENT : public ELIST_LINK { + public: + FRAGMENT() { // constructor + } + FRAGMENT(EDGEPT* head_pt, // start + EDGEPT* tail_pt); // end + + ICOORD head; // coords of start + ICOORD tail; // coords of end + EDGEPT* headpt; // start point + EDGEPT* tailpt; // end point }; ELISTIZEH(FRAGMENT) - class Wordrec : public Classify { public: // config parameters ******************************************************* @@ -149,7 +140,8 @@ class Wordrec : public Classify { double_VAR_H(chop_split_dist_knob, 0.5, "Split length adjustment"); double_VAR_H(chop_overlap_knob, 0.9, "Split overlap adjustment"); double_VAR_H(chop_center_knob, 0.15, "Split center adjustment"); - INT_VAR_H(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " + INT_VAR_H(chop_centered_maxwidth, 90, + "Width of (smaller) chopped blobs " "above which we don't care that a chop is not near the center."); double_VAR_H(chop_sharpness_knob, 0.06, "Split sharpness adjustment"); double_VAR_H(chop_width_change_knob, 5.0, "Width change adjustment"); @@ -183,37 +175,35 @@ class Wordrec : public Classify { // Fills word->alt_choices with alternative paths found during // chopping/segmentation search that are kept in best_choices. - void SaveAltChoices(const LIST &best_choices, WERD_RES *word); + void SaveAltChoices(const LIST& best_choices, WERD_RES* word); // Fills character choice lattice in the given BlamerBundle // using the given ratings matrix and best choice list. - void FillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, BlamerBundle *blamer_bundle); + void FillLattice(const MATRIX& ratings, const WERD_CHOICE_LIST& best_choices, + const UNICHARSET& unicharset, BlamerBundle* blamer_bundle); // Calls fill_lattice_ member function // (assumes that fill_lattice_ is not nullptr). - void CallFillLattice(const MATRIX &ratings, - const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, - BlamerBundle *blamer_bundle) { + void CallFillLattice(const MATRIX& ratings, + const WERD_CHOICE_LIST& best_choices, + const UNICHARSET& unicharset, + BlamerBundle* blamer_bundle) { (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle); } // tface.cpp - void program_editup(const char *textbase, TessdataManager *init_classifier, - TessdataManager *init_dict); - void cc_recog(WERD_RES *word); + void program_editup(const char* textbase, TessdataManager* init_classifier, + TessdataManager* init_dict); + void cc_recog(WERD_RES* word); void program_editdown(int32_t elasped_time); void set_pass1(); void set_pass2(); int end_recog(); - BLOB_CHOICE_LIST *call_matcher(TBLOB* blob); - int dict_word(const WERD_CHOICE &word); + BLOB_CHOICE_LIST* call_matcher(TBLOB* blob); + int dict_word(const WERD_CHOICE& word); // wordclass.cpp - BLOB_CHOICE_LIST *classify_blob(TBLOB *blob, - const char *string, - C_COL color, - BlamerBundle *blamer_bundle); + BLOB_CHOICE_LIST* classify_blob(TBLOB* blob, const char* string, C_COL color, + BlamerBundle* blamer_bundle); // segsearch.cpp // SegSearch works on the lower diagonal matrix of BLOB_CHOICE_LISTs. @@ -267,8 +257,7 @@ class Wordrec : public Classify { // // Note: this function assumes that word_res, best_choice_bundle arguments // are not nullptr. - void SegSearch(WERD_RES* word_res, - BestChoiceBundle* best_choice_bundle, + void SegSearch(WERD_RES* word_res, BestChoiceBundle* best_choice_bundle, BlamerBundle* blamer_bundle); // Setup and run just the initial segsearch on an established matrix, @@ -284,91 +273,75 @@ class Wordrec : public Classify { void DoSegSearch(WERD_RES* word_res); // chop.cpp - PRIORITY point_priority(EDGEPT *point); - void add_point_to_list(PointHeap* point_heap, EDGEPT *point); + PRIORITY point_priority(EDGEPT* point); + void add_point_to_list(PointHeap* point_heap, EDGEPT* point); // Returns true if the edgept supplied as input is an inside angle. This // is determined by the angular change of the vectors from point to point. - bool is_inside_angle(EDGEPT *pt); - int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3); - EDGEPT *pick_close_point(EDGEPT *critical_point, - EDGEPT *vertical_point, - int *best_dist); - void prioritize_points(TESSLINE *outline, PointHeap* points); - void new_min_point(EDGEPT *local_min, PointHeap* points); - void new_max_point(EDGEPT *local_max, PointHeap* points); - void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, - EDGEPT** best_point, - EDGEPT_CLIST *new_points); + bool is_inside_angle(EDGEPT* pt); + int angle_change(EDGEPT* point1, EDGEPT* point2, EDGEPT* point3); + EDGEPT* pick_close_point(EDGEPT* critical_point, EDGEPT* vertical_point, + int* best_dist); + void prioritize_points(TESSLINE* outline, PointHeap* points); + void new_min_point(EDGEPT* local_min, PointHeap* points); + void new_max_point(EDGEPT* local_max, PointHeap* points); + void vertical_projection_point(EDGEPT* split_point, EDGEPT* target_point, + EDGEPT** best_point, EDGEPT_CLIST* new_points); // chopper.cpp - SEAM *attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, + SEAM* attempt_blob_chop(TWERD* word, TBLOB* blob, int32_t blob_number, bool italic_blob, const GenericVector& seams); - SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number, - bool italic_blob, const GenericVector& seams); - SEAM *chop_overlapping_blob(const GenericVector& boxes, - bool italic_blob, - WERD_RES *word_res, int *blob_number); - SEAM *improve_one_blob(const GenericVector &blob_choices, - DANGERR *fixpt, - bool split_next_to_fragment, - bool italic_blob, - WERD_RES *word, - int *blob_number); - SEAM *chop_one_blob(const GenericVector &boxes, - const GenericVector &blob_choices, - WERD_RES *word_res, - int *blob_number); - void chop_word_main(WERD_RES *word); - void improve_by_chopping(float rating_cert_scale, - WERD_RES *word, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle, - LMPainPoints *pain_points, + SEAM* chop_numbered_blob(TWERD* word, int32_t blob_number, bool italic_blob, + const GenericVector& seams); + SEAM* chop_overlapping_blob(const GenericVector& boxes, + bool italic_blob, WERD_RES* word_res, + int* blob_number); + SEAM* improve_one_blob(const GenericVector& blob_choices, + DANGERR* fixpt, bool split_next_to_fragment, + bool italic_blob, WERD_RES* word, int* blob_number); + SEAM* chop_one_blob(const GenericVector& boxes, + const GenericVector& blob_choices, + WERD_RES* word_res, int* blob_number); + void chop_word_main(WERD_RES* word); + void improve_by_chopping(float rating_cert_scale, WERD_RES* word, + BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle, + LMPainPoints* pain_points, GenericVector* pending); - int select_blob_to_split(const GenericVector &blob_choices, - float rating_ceiling, - bool split_next_to_fragment); - int select_blob_to_split_from_fixpt(DANGERR *fixpt); + int select_blob_to_split(const GenericVector& blob_choices, + float rating_ceiling, bool split_next_to_fragment); + int select_blob_to_split_from_fixpt(DANGERR* fixpt); // findseam.cpp - void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue* seams); - void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, - PRIORITY priority, SEAM **seam_result, TBLOB *blob, - SeamPile *seam_pile); - void combine_seam(const SeamPile& seam_pile, - const SEAM* seam, SeamQueue* seam_queue); - SEAM *pick_good_seam(TBLOB *blob); - void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], - int16_t num_points, - SeamQueue* seam_queue, - SeamPile* seam_pile, - SEAM ** seam, TBLOB * blob); - void try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], - int16_t num_points, - EDGEPT_CLIST *new_points, - SeamQueue* seam_queue, - SeamPile* seam_pile, - SEAM ** seam, TBLOB * blob); + void add_seam_to_queue(float new_priority, SEAM* new_seam, SeamQueue* seams); + void choose_best_seam(SeamQueue* seam_queue, const SPLIT* split, + PRIORITY priority, SEAM** seam_result, TBLOB* blob, + SeamPile* seam_pile); + void combine_seam(const SeamPile& seam_pile, const SEAM* seam, + SeamQueue* seam_queue); + SEAM* pick_good_seam(TBLOB* blob); + void try_point_pairs(EDGEPT* points[MAX_NUM_POINTS], int16_t num_points, + SeamQueue* seam_queue, SeamPile* seam_pile, SEAM** seam, + TBLOB* blob); + void try_vertical_splits(EDGEPT* points[MAX_NUM_POINTS], int16_t num_points, + EDGEPT_CLIST* new_points, SeamQueue* seam_queue, + SeamPile* seam_pile, SEAM** seam, TBLOB* blob); // gradechop.cpp - PRIORITY grade_split_length(SPLIT *split); - PRIORITY grade_sharpness(SPLIT *split); + PRIORITY grade_split_length(SPLIT* split); + PRIORITY grade_sharpness(SPLIT* split); // outlines.cpp - bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, - EDGEPT **near_pt); + bool near_point(EDGEPT* point, EDGEPT* line_pt_0, EDGEPT* line_pt_1, + EDGEPT** near_pt); // pieces.cpp - virtual BLOB_CHOICE_LIST *classify_piece(const GenericVector& seams, - int16_t start, - int16_t end, - const char* description, - TWERD *word, - BlamerBundle *blamer_bundle); + virtual BLOB_CHOICE_LIST* classify_piece(const GenericVector& seams, + int16_t start, int16_t end, + const char* description, TWERD* word, + BlamerBundle* blamer_bundle); // Try to merge fragments in the ratings matrix and put the result in // the corresponding row and column - void merge_fragments(MATRIX *ratings, - int16_t num_blobs); + void merge_fragments(MATRIX* ratings, int16_t num_blobs); // Recursively go through the ratings matrix to find lists of fragments // to be merged in the function merge_and_put_fragment_lists. // current_frag is the position of the piece we are looking for. @@ -377,52 +350,46 @@ class Wordrec : public Classify { // to append the results to the matrix. num_frag_parts is the total // number of pieces we are looking for and num_blobs is the size of the // ratings matrix. - void get_fragment_lists(int16_t current_frag, - int16_t current_row, - int16_t start, - int16_t num_frag_parts, - int16_t num_blobs, - MATRIX *ratings, - BLOB_CHOICE_LIST *choice_lists); + void get_fragment_lists(int16_t current_frag, int16_t current_row, + int16_t start, int16_t num_frag_parts, + int16_t num_blobs, MATRIX* ratings, + BLOB_CHOICE_LIST* choice_lists); // Merge the fragment lists in choice_lists and append it to the // ratings matrix - void merge_and_put_fragment_lists(int16_t row, - int16_t column, + void merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts, - BLOB_CHOICE_LIST *choice_lists, - MATRIX *ratings); + BLOB_CHOICE_LIST* choice_lists, + MATRIX* ratings); // Filter the fragment list so that the filtered_choices only contain // fragments that are in the correct position. choices is the list // that we are going to filter. fragment_pos is the position in the // fragment that we are looking for and num_frag_parts is the the // total number of pieces. The result will be appended to // filtered_choices. - void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, - int fragment_pos, + void fill_filtered_fragment_list(BLOB_CHOICE_LIST* choices, int fragment_pos, int num_frag_parts, - BLOB_CHOICE_LIST *filtered_choices); + BLOB_CHOICE_LIST* filtered_choices); // Member variables. - LanguageModel *language_model_; + LanguageModel* language_model_; PRIORITY pass2_ok_split; // Stores the best choice for the previous word in the paragraph. // This variable is modified by PAGE_RES_IT when iterating over // words to OCR on the page. - WERD_CHOICE *prev_word_best_choice_; + WERD_CHOICE* prev_word_best_choice_; // Sums of blame reasons computed by the blamer. GenericVector blame_reasons_; // Function used to fill char choice lattices. - void (Wordrec::*fill_lattice_)(const MATRIX &ratings, - const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, - BlamerBundle *blamer_bundle); + void (Wordrec::*fill_lattice_)(const MATRIX& ratings, + const WERD_CHOICE_LIST& best_choices, + const UNICHARSET& unicharset, + BlamerBundle* blamer_bundle); protected: inline bool SegSearchDone(int num_futile_classifications) { return (language_model_->AcceptableChoiceFound() || - num_futile_classifications >= - segsearch_max_futile_classifications); + num_futile_classifications >= segsearch_max_futile_classifications); } // Updates the language model state recorded for the child entries specified @@ -450,24 +417,20 @@ class Wordrec : public Classify { // best_choice_bundle: a collection of variables that should be updated // if a new best choice is found // - void UpdateSegSearchNodes( - float rating_cert_scale, - int starting_col, - GenericVector* pending, - WERD_RES *word_res, - LMPainPoints *pain_points, - BestChoiceBundle *best_choice_bundle, - BlamerBundle *blamer_bundle); + void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, + GenericVector* pending, + WERD_RES* word_res, LMPainPoints* pain_points, + BestChoiceBundle* best_choice_bundle, + BlamerBundle* blamer_bundle); // Process the given pain point: classify the corresponding blob, enqueue // new pain points to join the newly classified blob with its neighbors. void ProcessSegSearchPainPoint(float pain_point_priority, - const MATRIX_COORD &pain_point, + const MATRIX_COORD& pain_point, const char* pain_point_type, GenericVector* pending, - WERD_RES *word_res, - LMPainPoints *pain_points, - BlamerBundle *blamer_bundle); + WERD_RES* word_res, LMPainPoints* pain_points, + BlamerBundle* blamer_bundle); // Resets enough of the results so that the Viterbi search is re-run. // Needed when the n-gram model is enabled, as the multi-length comparison // implementation will re-value existing paths to worse values. @@ -478,13 +441,11 @@ class Wordrec : public Classify { // Add pain points for classifying blobs on the correct segmentation path // (so that we can evaluate correct segmentation path and discover the reason // for incorrect result). - void InitBlamerForSegSearch(WERD_RES *word_res, - LMPainPoints *pain_points, - BlamerBundle *blamer_bundle, - STRING *blamer_debug); + void InitBlamerForSegSearch(WERD_RES* word_res, LMPainPoints* pain_points, + BlamerBundle* blamer_bundle, + STRING* blamer_debug); }; - } // namespace tesseract #endif // TESSERACT_WORDREC_WORDREC_H_