From da03e4e9105b6262706d40ef2b4436eae4ebe19f Mon Sep 17 00:00:00 2001
From: Ray Smith <rays@google.com>
Date: Fri, 14 Jul 2017 09:30:14 -0700
Subject: [PATCH] Fixes from pull of cleanups: clang tidied, reviewed, fixed
 new bugs, undeleted needed code. Probably breaks the build, due to some
 inclusion of changes in utf8/32 conversion

---
 api/baseapi.cpp                        |  21 +-
 api/pdfrenderer.cpp                    |  35 +-
 api/renderer.cpp                       |   2 +-
 api/tesseractmain.cpp                  |  38 +-
 ccmain/control.cpp                     |  12 +-
 ccmain/docqual.cpp                     |   6 +-
 ccmain/equationdetect.cpp              |   8 +-
 ccmain/paragraphs.cpp                  |   2 +-
 ccmain/paramsd.cpp                     |   6 +-
 ccmain/pgedit.cpp                      |   3 +-
 ccmain/thresholder.cpp                 |   4 +-
 ccstruct/boxread.cpp                   |   2 +-
 ccstruct/coutln.cpp                    |  20 +-
 ccstruct/coutln.h                      |   6 +-
 ccstruct/ocrblock.cpp                  |  10 +-
 ccstruct/otsuthr.cpp                   |   3 +-
 ccstruct/pdblock.cpp                   |  15 +-
 ccstruct/polyblk.cpp                   |  11 +-
 ccstruct/ratngs.h                      |   6 +-
 ccstruct/rejctmap.cpp                  |  78 +-
 ccstruct/rejctmap.h                    |  37 +-
 ccstruct/statistc.cpp                  |   8 +-
 ccstruct/stepblob.h                    |   4 +-
 ccutil/ambigs.cpp                      |   4 +-
 ccutil/ambigs.h                        |   6 +-
 ccutil/basedir.cpp                     |   5 +-
 ccutil/genericvector.h                 |  26 +-
 ccutil/helpers.h                       |   2 +-
 ccutil/host.h                          |   8 +-
 ccutil/memry.cpp                       |   8 -
 ccutil/memry.h                         |   4 -
 ccutil/params.cpp                      |   4 +-
 ccutil/strngs.h                        |  10 +-
 ccutil/tessdatamanager.cpp             |  16 +
 ccutil/tessdatamanager.h               |   5 +
 ccutil/unichar.cpp                     |  19 +-
 ccutil/unichar.h                       |  19 +-
 ccutil/unicharcompress.cpp             |   6 +-
 ccutil/unicharset.cpp                  |  16 +-
 classify/adaptive.cpp                  |  10 +-
 classify/adaptmatch.cpp                |  10 +-
 classify/cluster.cpp                   |  24 +-
 classify/clusttool.cpp                 |   2 +-
 classify/featdefs.cpp                  |   6 +-
 classify/featdefs.h                    |   4 +-
 classify/kdtree.cpp                    |  22 +-
 classify/mfoutline.cpp                 |   3 +-
 classify/ocrfeatures.cpp               |   9 +-
 classify/protos.cpp                    |   3 +-
 classify/shapetable.h                  |   8 +-
 cutil/oldlist.cpp                      | 175 ++---
 cutil/oldlist.h                        |   6 +-
 dict/dawg.h                            |   2 +-
 dict/dict.cpp                          |   6 +-
 dict/dict.h                            |   4 +-
 dict/permdawg.cpp                      |   2 +-
 dict/trie.cpp                          |   4 +-
 opencl/openclwrapper.cpp               | 950 ++++++++++++-------------
 opencl/openclwrapper.h                 |   2 +-
 textord/bbgrid.h                       |  12 +-
 textord/colpartition.h                 |   6 +-
 textord/drawedg.cpp                    |   7 +-
 textord/makerow.cpp                    |   2 +-
 textord/scanedg.cpp                    |  11 +-
 textord/tabvector.h                    |   4 +-
 training/boxchar.cpp                   |  35 +-
 training/commontraining.cpp            |  14 +-
 training/normstrngs.cpp                |  98 +--
 training/normstrngs.h                  |   6 +-
 training/pango_font_info.cpp           |  39 +
 training/pango_font_info.h             |  10 +
 training/stringrenderer.cpp            |  24 +-
 training/stringrenderer.h              |   2 +-
 training/unicharset_extractor.cpp      |   2 +
 training/unicharset_training_utils.cpp |  38 +-
 wordrec/language_model.cpp             | 166 +++--
 wordrec/lm_pain_points.cpp             |   2 +-
 wordrec/lm_state.h                     |   6 +-
 wordrec/outlines.cpp                   |   2 +-
 wordrec/pieces.cpp                     |   8 +-
 80 files changed, 1061 insertions(+), 1180 deletions(-)

diff --git a/api/baseapi.cpp b/api/baseapi.cpp
index 6cdf305f9b..d88bdbfd92 100644
--- a/api/baseapi.cpp
+++ b/api/baseapi.cpp
@@ -41,11 +41,11 @@
 #include <string.h>
 #endif  // _WIN32
 
+#include <fstream>
 #include <iostream>
-#include <string>
 #include <iterator>
-#include <fstream>
-#include <memory> // std::unique_ptr
+#include <memory>  // std::unique_ptr
+#include <string>
 
 #include "allheaders.h"
 
@@ -1540,7 +1540,8 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
     if (bold) hocr_str += "<strong>";
     if (italic) hocr_str += "<em>";
     do {
-      const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
+      const std::unique_ptr<const char[]> grapheme(
+          res_it->GetUTF8Text(RIL_SYMBOL));
       if (grapheme && grapheme[0] != 0) {
         hocr_str += HOcrEscape(grapheme.get());
       }
@@ -1662,7 +1663,8 @@ char* TessBaseAPI::GetTSVText(int page_number) {
     if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
 
     do {
-      tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
+      tsv_str +=
+          std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
       res_it->Next(RIL_SYMBOL);
     } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
     tsv_str += "\n";  // end of row
@@ -1720,7 +1722,8 @@ char* TessBaseAPI::GetBoxText(int page_number) {
   do {
     int left, top, right, bottom;
     if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
-      const std::unique_ptr</*non-const*/ char[]> text(it->GetUTF8Text(RIL_SYMBOL));
+      const std::unique_ptr</*non-const*/ char[]> text(
+          it->GetUTF8Text(RIL_SYMBOL));
       // Tesseract uses space for recognition failure. Fix to a reject
       // character, kTesseractReject so we don't create illegal box files.
       for (int i = 0; text[i] != '\0'; ++i) {
@@ -1728,8 +1731,7 @@ char* TessBaseAPI::GetBoxText(int page_number) {
           text[i] = kTesseractReject;
       }
       snprintf(result + output_length, total_length - output_length,
-               "%s %d %d %d %d %d\n",
-               text.get(), left, image_height_ - bottom,
+               "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
                right, image_height_ - top, page_number);
       output_length += strlen(result + output_length);
       // Just in case...
@@ -2063,8 +2065,7 @@ void TessBaseAPI::End() {
     delete paragraph_models_;
     paragraph_models_ = NULL;
   }
-  if (osd_tesseract_ == tesseract_)
-    osd_tesseract_ = nullptr;
+  if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
   delete tesseract_;
   tesseract_ = nullptr;
   delete osd_tesseract_;
diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp
index 36383c29e3..14eac7ec54 100644
--- a/api/pdfrenderer.cpp
+++ b/api/pdfrenderer.cpp
@@ -20,7 +20,7 @@
 #include "config_auto.h"
 #endif
 
-#include <memory> // std::unique_ptr
+#include <memory>  // std::unique_ptr
 #include "allheaders.h"
 #include "baseapi.h"
 #include "math.h"
@@ -457,13 +457,12 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
     STRING pdf_word("");
     int pdf_word_len = 0;
     do {
-      const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
+      const std::unique_ptr<const char[]> grapheme(
+          res_it->GetUTF8Text(RIL_SYMBOL));
       if (grapheme && grapheme[0] != '\0') {
-        GenericVector<int> unicodes;
-        UNICHAR::UTF8ToUnicode(grapheme.get(), &unicodes);
+        std::vector<char32> unicodes = UNICHAR::UTF8ToUTF32(grapheme.get());
         char utf16[kMaxBytesPerCodepoint];
-        for (int i = 0; i < unicodes.length(); i++) {
-          int code = unicodes[i];
+        for (char32 code : unicodes) {
           if (CodepointToUtf16be(code, utf16)) {
             pdf_word += utf16;
             pdf_word_len++;
@@ -566,13 +565,13 @@ bool TessPDFRenderer::BeginDocumentHandler() {
 
   // CIDTOGIDMAP
   const int kCIDToGIDMapSize = 2 * (1 << 16);
-  const std::unique_ptr</*non-const*/ unsigned char[]> cidtogidmap(new unsigned char[kCIDToGIDMapSize]);
+  const std::unique_ptr<unsigned char[]> cidtogidmap(
+      new unsigned char[kCIDToGIDMapSize]);
   for (int i = 0; i < kCIDToGIDMapSize; i++) {
     cidtogidmap[i] = (i % 2) ? 1 : 0;
   }
   size_t len;
-  unsigned char *comp =
-      zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len);
+  unsigned char *comp = zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len);
   n = snprintf(buf, sizeof(buf),
                "5 0 obj\n"
                "<<\n"
@@ -665,8 +664,8 @@ bool TessPDFRenderer::BeginDocumentHandler() {
   fseek(fp, 0, SEEK_END);
   long int size = ftell(fp);
   fseek(fp, 0, SEEK_SET);
-  const std::unique_ptr</*non-const*/ char[]> buffer(new char[size]);
-  if (fread(buffer.get(), 1, size, fp) != static_cast<unsigned long>(size)) {
+  const std::unique_ptr<char[]> buffer(new char[size]);
+  if (fread(buffer.get(), 1, size, fp) != static_cast<size_t>(size)) {
     fclose(fp);
     return false;
   }
@@ -879,11 +878,11 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
   AppendPDFObject(buf);
 
   // CONTENTS
-  const std::unique_ptr</*non-const*/ char[]> pdftext(GetPDFTextObjects(api, width, height));
-  const long pdftext_len = strlen(pdftext.get());
+  const std::unique_ptr<char[]> pdftext(GetPDFTextObjects(api, width, height));
+  const size_t pdftext_len = strlen(pdftext.get());
   size_t len;
-  unsigned char *comp_pdftext =
-      zlibCompress(reinterpret_cast<unsigned char *>(pdftext.get()), pdftext_len, &len);
+  unsigned char *comp_pdftext = zlibCompress(
+      reinterpret_cast<unsigned char *>(pdftext.get()), pdftext_len, &len);
   long comp_pdftext_len = len;
   n = snprintf(buf, sizeof(buf),
                "%ld 0 obj\n"
@@ -960,11 +959,9 @@ bool TessPDFRenderer::EndDocumentHandler() {
 
   // INFO
   STRING utf16_title = "FEFF";  // byte_order_marker
-  GenericVector<int> unicodes;
-  UNICHAR::UTF8ToUnicode(title(), &unicodes);
+  std::vector<char32> unicodes = UNICHAR::UTF8ToUTF32(title());
   char utf16[kMaxBytesPerCodepoint];
-  for (int i = 0; i < unicodes.length(); i++) {
-    int code = unicodes[i];
+  for (char32 code : unicodes) {
     if (CodepointToUtf16be(code, utf16)) {
       utf16_title += utf16;
     }
diff --git a/api/renderer.cpp b/api/renderer.cpp
index 429d302097..a71f2c7245 100644
--- a/api/renderer.cpp
+++ b/api/renderer.cpp
@@ -19,8 +19,8 @@
 #include "config_auto.h"
 #endif
 
-#include <memory> // std::unique_ptr
 #include <string.h>
+#include <memory>  // std::unique_ptr
 #include "baseapi.h"
 #include "genericvector.h"
 #include "renderer.h"
diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp
index 3448f39df8..9a326c3ead 100644
--- a/api/tesseractmain.cpp
+++ b/api/tesseractmain.cpp
@@ -1,21 +1,21 @@
 /**********************************************************************
-* File:        tesseractmain.cpp  (Formerly tessedit.c)
-* Description: Main program for merge of tess and editor.
-* Author:                  Ray Smith
-* Created:                 Tue Jan 07 15:21:46 GMT 1992
-*
-* (C) Copyright 1992, Hewlett-Packard Ltd.
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-** http://www.apache.org/licenses/LICENSE-2.0
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*
-**********************************************************************/
+ * File:        tesseractmain.cpp  (Formerly tessedit.c)
+ * Description: Main program for merge of tess and editor.
+ * Author:                  Ray Smith
+ * Created:                 Tue Jan 07 15:21:46 GMT 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
 
 // Include automatically generated configuration file if running autoconf
 #ifdef HAVE_CONFIG_H
@@ -404,7 +404,7 @@ int main(int argc, char** argv) {
   static GenericVector<STRING> vars_vec;
   static GenericVector<STRING> vars_values;
 
-#ifdef NDEBUG
+#if !defined(DEBUG)
   // Disable debugging and informational messages from Leptonica.
   setMsgSeverity(L_SEVERITY_ERROR);
 #endif
@@ -431,7 +431,7 @@ int main(int argc, char** argv) {
   // first TessBaseAPI must be destructed, DawgCache must be the last object.
   tesseract::Dict::GlobalDawgCache();
 
-  // Avoid memory leak caused by auto variable when exit() is called.
+  // Avoid memory leak caused by auto variable when return is called.
   static tesseract::TessBaseAPI api;
 
   api.SetOutputName(outputbase);
diff --git a/ccmain/control.cpp b/ccmain/control.cpp
index 4749a80291..d16dad983c 100644
--- a/ccmain/control.cpp
+++ b/ccmain/control.cpp
@@ -1878,11 +1878,11 @@ BOOL8 Tesseract::check_debug_pt(WERD_RES *word, int location) {
  *
  * Find the modal font and remove from the stats.
  */
-static void find_modal_font(           //good chars in word
-                     STATS *fonts,     //font stats
-                     inT16 *font_out,   //output font
-                     int8_t *font_count  //output count
-                    ) {
+static void find_modal_font(  // good chars in word
+    STATS* fonts,             // font stats
+    inT16* font_out,          // output font
+    int8_t* font_count        // output count
+) {
   inT16 font;                     //font index
   inT32 count;                   //pile couat
 
@@ -1999,7 +1999,7 @@ void Tesseract::font_recognition_pass(PAGE_RES* page_res) {
     }
   }
   inT16 doc_font;                 // modal font
-  int8_t doc_font_count;           // modal font
+  int8_t doc_font_count;          // modal font
   find_modal_font(&doc_fonts, &doc_font, &doc_font_count);
   if (doc_font_count == 0)
     return;
diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp
index ad7f228053..2a54b98308 100644
--- a/ccmain/docqual.cpp
+++ b/ccmain/docqual.cpp
@@ -511,9 +511,9 @@ BOOL8 Tesseract::terrible_word_crunch(WERD_RES *word,
   int adjusted_len;
   int crunch_mode = 0;
 
-  if ((word->best_choice->unichar_string().length () == 0) ||
-    (strspn (word->best_choice->unichar_string().string(), " ") ==
-    word->best_choice->unichar_string().unsigned_size ()))
+  if ((word->best_choice->unichar_string().length() == 0) ||
+      (strspn(word->best_choice->unichar_string().string(), " ") ==
+       word->best_choice->unichar_string().unsigned_size()))
     crunch_mode = 1;
   else {
     adjusted_len = word->reject_map.length ();
diff --git a/ccmain/equationdetect.cpp b/ccmain/equationdetect.cpp
index 3c73418ae0..393b0e81e6 100644
--- a/ccmain/equationdetect.cpp
+++ b/ccmain/equationdetect.cpp
@@ -116,9 +116,7 @@ EquationDetect::EquationDetect(const char* equ_datapath,
   cps_super_bbox_ = NULL;
 }
 
-EquationDetect::~EquationDetect() {
-  delete(cps_super_bbox_);
-}
+EquationDetect::~EquationDetect() { delete (cps_super_bbox_); }
 
 void EquationDetect::SetLangTesseract(Tesseract* lang_tesseract) {
   lang_tesseract_ = lang_tesseract;
@@ -258,8 +256,8 @@ BlobSpecialTextType EquationDetect::EstimateTypeForUnichar(
 
 void EquationDetect::IdentifySpecialText() {
   // Set configuration for Tesseract::AdaptiveClassifier.
-  equ_tesseract_.tess_cn_matching.set_value(true);  // turn it on
-  equ_tesseract_.tess_bn_matching.set_value(false);
+  equ_tesseract_.tess_cn_matching.set_value(1);  // turn it on
+  equ_tesseract_.tess_bn_matching.set_value(0);
 
   // Set the multiplier to zero for lang_tesseract_ to improve the accuracy.
   int classify_class_pruner = lang_tesseract_->classify_class_pruner_multiplier;
diff --git a/ccmain/paragraphs.cpp b/ccmain/paragraphs.cpp
index a8ef87be2c..6ea4eb4bf8 100644
--- a/ccmain/paragraphs.cpp
+++ b/ccmain/paragraphs.cpp
@@ -21,7 +21,7 @@
 #endif
 
 #include <ctype.h>
-#include <memory> // std::unique_ptr
+#include <memory>  // std::unique_ptr
 
 #include "genericvector.h"
 #include "helpers.h"
diff --git a/ccmain/paramsd.cpp b/ccmain/paramsd.cpp
index dc1d124377..a5fccf88e8 100644
--- a/ccmain/paramsd.cpp
+++ b/ccmain/paramsd.cpp
@@ -183,10 +183,8 @@ void ParamsEditor::GetPrefixes(const char* s, STRING* level_one,
 
 // Compare two VC objects by their name.
 int ParamContent::Compare(const void* v1, const void* v2) {
-  const ParamContent* one =
-    *static_cast<const ParamContent* const *>(v1);
-  const ParamContent* two =
-    *static_cast<const ParamContent* const *>(v2);
+  const ParamContent* one = *static_cast<const ParamContent* const*>(v1);
+  const ParamContent* two = *static_cast<const ParamContent* const*>(v2);
   return strcmp(one->GetName(), two->GetName());
 }
 
diff --git a/ccmain/pgedit.cpp b/ccmain/pgedit.cpp
index a59cef168a..e8262159d2 100644
--- a/ccmain/pgedit.cpp
+++ b/ccmain/pgedit.cpp
@@ -544,7 +544,8 @@ BOOL8 Tesseract::process_cmd_win_event(                 // UI command semantics
       break;
 
     default:
-      sprintf(msg, "Unrecognised event %" PRId32 "(%s)", cmd_event, new_value);
+      snprintf(msg, sizeof(msg), "Unrecognised event %" PRId32 "(%s)",
+               cmd_event, new_value);
       image_win->AddMessage(msg);
     break;
   }
diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp
index f7eed196ab..a4be3db3a5 100644
--- a/ccmain/thresholder.cpp
+++ b/ccmain/thresholder.cpp
@@ -311,8 +311,8 @@ void ImageThresholder::ThresholdRectToPix(Pix* src_pix,
     for (int x = 0; x < rect_width_; ++x) {
       bool white_result = true;
       for (int ch = 0; ch < num_channels; ++ch) {
-        int pixel = GET_DATA_BYTE(linedata,
-                                  (x + rect_left_) * num_channels + ch);
+        int pixel =
+            GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
         if (hi_values[ch] >= 0 &&
             (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
           white_result = false;
diff --git a/ccstruct/boxread.cpp b/ccstruct/boxread.cpp
index d6ceebb4db..e2233e691e 100644
--- a/ccstruct/boxread.cpp
+++ b/ccstruct/boxread.cpp
@@ -206,7 +206,7 @@ bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
   // Validate UTF8 by making unichars with it.
   int used = 0;
   while (used < uch_len) {
-    UNICHAR ch(uch + used, uch_len - used);
+    tesseract::UNICHAR ch(uch + used, uch_len - used);
     int new_used = ch.utf8_len();
     if (new_used == 0) {
       tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n",
diff --git a/ccstruct/coutln.cpp b/ccstruct/coutln.cpp
index ccd4b2faeb..974e452929 100644
--- a/ccstruct/coutln.cpp
+++ b/ccstruct/coutln.cpp
@@ -652,22 +652,10 @@ static void ComputeGradient(const l_uint32* data, int wpl,
                             int x, int y, int width, int height,
                             ICOORD* gradient) {
   const l_uint32* line = data + y * wpl;
-  int pix_x_y =
-      x < width && y < height
-          ? GET_DATA_BYTE(line, x)
-          : 255;
-  int pix_x_prevy =
-      x < width && y > 0
-          ? GET_DATA_BYTE(line - wpl, x)
-          : 255;
-  int pix_prevx_prevy =
-      x > 0 && y > 0
-          ? GET_DATA_BYTE(line - wpl, x - 1)
-          : 255;
-  int pix_prevx_y =
-      x > 0 && y < height
-          ? GET_DATA_BYTE(line, x - 1)
-          : 255;
+  int pix_x_y = x < width && y < height ? GET_DATA_BYTE(line, x) : 255;
+  int pix_x_prevy = x < width && y > 0 ? GET_DATA_BYTE(line - wpl, x) : 255;
+  int pix_prevx_prevy = x > 0 && y > 0 ? GET_DATA_BYTE(line - wpl, x - 1) : 255;
+  int pix_prevx_y = x > 0 && y < height ? GET_DATA_BYTE(line, x - 1) : 255;
   gradient->set_x(pix_x_y + pix_x_prevy - (pix_prevx_y + pix_prevx_prevy));
   gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y));
 }
diff --git a/ccstruct/coutln.h b/ccstruct/coutln.h
index fbb63174c8..7b9265c717 100644
--- a/ccstruct/coutln.h
+++ b/ccstruct/coutln.h
@@ -1,7 +1,7 @@
 /**********************************************************************
- * File:					coutln.h      (Formerly:  coutline.c)
- * Description: Code for the C_OUTLINE class.
- * Author:					Ray Smith
+ * File:					coutln.h      (Formerly:
+ *coutline.c) Description: Code for the C_OUTLINE class. Author:
+ *Ray Smith
  * Created:					Mon Oct 07 16:01:57 BST 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
diff --git a/ccstruct/ocrblock.cpp b/ccstruct/ocrblock.cpp
index 19f2aecbfd..61b6d1c969 100644
--- a/ccstruct/ocrblock.cpp
+++ b/ccstruct/ocrblock.cpp
@@ -17,10 +17,10 @@
  *
  **********************************************************************/
 
+#include "ocrblock.h"
 #include <stdlib.h>
-#include <memory> // std::unique_ptr
+#include <memory>  // std::unique_ptr
 #include "blckerr.h"
-#include "ocrblock.h"
 #include "stepblob.h"
 #include "tprintf.h"
 
@@ -381,7 +381,8 @@ void BLOCK::compute_row_margins() {
     TBOX row_box = row->bounding_box();
     int left_y = row->base_line(row_box.left()) + row->x_height();
     int left_margin;
-    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_left(lines.get_line(left_y));
+    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_left(
+        lines.get_line(left_y));
     LeftMargin(segments_left.get(), row_box.left(), &left_margin);
 
     if (row_box.top() >= drop_cap_bottom) {
@@ -394,7 +395,8 @@ void BLOCK::compute_row_margins() {
 
     int right_y = row->base_line(row_box.right()) + row->x_height();
     int right_margin;
-    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_right(lines.get_line(right_y));
+    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_right(
+        lines.get_line(right_y));
     RightMargin(segments_right.get(), row_box.right(), &right_margin);
     row->set_lmargin(left_margin);
     row->set_rmargin(right_margin);
diff --git a/ccstruct/otsuthr.cpp b/ccstruct/otsuthr.cpp
index e8113b65ac..386d91bd24 100644
--- a/ccstruct/otsuthr.cpp
+++ b/ccstruct/otsuthr.cpp
@@ -161,8 +161,7 @@ void HistogramRect(Pix* src_pix, int channel,
   for (int y = top; y < bottom; ++y) {
     const l_uint32* linedata = srcdata + y * src_wpl;
     for (int x = 0; x < width; ++x) {
-      int pixel = GET_DATA_BYTE(linedata,
-                                (x + left) * num_channels + channel);
+      int pixel = GET_DATA_BYTE(linedata, (x + left) * num_channels + channel);
       ++histogram[pixel];
     }
   }
diff --git a/ccstruct/pdblock.cpp b/ccstruct/pdblock.cpp
index d0be9d2860..648608c164 100644
--- a/ccstruct/pdblock.cpp
+++ b/ccstruct/pdblock.cpp
@@ -17,11 +17,11 @@
  *
  **********************************************************************/
 
-#include          <stdlib.h>
-#include          <memory> // std::unique_ptr
-#include          "allheaders.h"
-#include          "blckerr.h"
-#include          "pdblock.h"
+#include "pdblock.h"
+#include <stdlib.h>
+#include <memory>  // std::unique_ptr
+#include "allheaders.h"
+#include "blckerr.h"
 
 // Include automatically generated configuration file if running autoconf.
 #ifdef HAVE_CONFIG_H
@@ -141,7 +141,8 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) {
     // rasterized interior. (Runs of interior pixels on a line.)
     PB_LINE_IT *lines = new PB_LINE_IT(&image_block);
     for (int y = box.bottom(); y < box.top(); ++y) {
-      const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y));
+      const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(
+          lines->get_line(y));
       if (!segments->empty()) {
         ICOORDELT_IT s_it(segments.get());
         // Each element of segments is a start x and x size of the
@@ -196,7 +197,7 @@ void PDBLK::plot(                //draw outline
     //                      serial,startpt.x(),startpt.y());
     char temp_buff[34];
     #if defined(__UNIX__) || defined(MINGW)
-    sprintf(temp_buff, "%" PRId32, serial);
+    snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, serial);
     #else
     ultoa (serial, temp_buff, 10);
     #endif
diff --git a/ccstruct/polyblk.cpp b/ccstruct/polyblk.cpp
index 984b82afef..c4d8211ed1 100644
--- a/ccstruct/polyblk.cpp
+++ b/ccstruct/polyblk.cpp
@@ -17,12 +17,12 @@
  *
  **********************************************************************/
 
+#include "polyblk.h"
 #include <ctype.h>
 #include <math.h>
 #include <stdio.h>
-#include <memory> // std::unique_ptr
+#include <memory>  // std::unique_ptr
 #include "elst.h"
-#include "polyblk.h"
 
 // Include automatically generated configuration file if running autoconf.
 #ifdef HAVE_CONFIG_H
@@ -254,7 +254,7 @@ void POLY_BLOCK::plot(ScrollView* window, inT32 num) {
     window->TextAttributes("Times", 80, false, false, false);
     char temp_buff[34];
     #if defined(__UNIX__) || defined(MINGW)
-    sprintf(temp_buff, "%" PRId32, num);
+    snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, num);
     #else
     ltoa (num, temp_buff, 10);
     #endif
@@ -281,9 +281,10 @@ void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) {
 
   for (y = this->bounding_box ()->bottom ();
   y <= this->bounding_box ()->top (); y++) {
-    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line (y));
+    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(
+        lines->get_line(y));
     if (!segments->empty ()) {
-      s_it.set_to_list (segments.get());
+      s_it.set_to_list(segments.get());
       for (s_it.mark_cycle_pt (); !s_it.cycled_list (); s_it.forward ()) {
         // Note different use of ICOORDELT, x coord is x coord of pixel
         // at the start of line segment, y coord is length of line segment
diff --git a/ccstruct/ratngs.h b/ccstruct/ratngs.h
index 7e658fa894..4d79b39ff1 100644
--- a/ccstruct/ratngs.h
+++ b/ccstruct/ratngs.h
@@ -190,10 +190,8 @@ class BLOB_CHOICE: public ELIST_LINK
     }
     // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
     static int SortByRating(const void *p1, const void *p2) {
-      const BLOB_CHOICE *bc1 =
-          *static_cast<const BLOB_CHOICE * const *>(p1);
-      const BLOB_CHOICE *bc2 =
-          *static_cast<const BLOB_CHOICE * const *>(p2);
+      const BLOB_CHOICE *bc1 = *static_cast<const BLOB_CHOICE *const *>(p1);
+      const BLOB_CHOICE *bc2 = *static_cast<const BLOB_CHOICE *const *>(p2);
       return (bc1->rating_ < bc2->rating_) ? -1 : 1;
     }
 
diff --git a/ccstruct/rejctmap.cpp b/ccstruct/rejctmap.cpp
index 6870ce9a38..aee7fbc28a 100644
--- a/ccstruct/rejctmap.cpp
+++ b/ccstruct/rejctmap.cpp
@@ -264,65 +264,17 @@ void REJ::full_print(FILE *fp) {
     flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
 }
 
-
-//The REJMAP class has been hacked to use malloc instead of new [].
-//This is to reduce memory fragmentation only as it is rather kludgy.
-// malloc by-passes the call to the constructor of REJ on each
-// array element. Although the constructor is empty, the BITS16 members
-// do have a constructor which sets all the flags to 0. The memset
-// replaces this functionality.
-
-REJMAP::REJMAP(  //classwise copy
-               const REJMAP &source) {
-  REJ *to;
-  REJ *from = source.ptr;
-  int i;
-
-  len = source.length ();
-
-  if (len > 0) {
-    ptr = (REJ *) malloc(len * sizeof (REJ));
-    to = ptr;
-    for (i = 0; i < len; i++) {
-      *to = *from;
-      to++;
-      from++;
-    }
-  }
-  else
-    ptr = NULL;
-}
-
-
-REJMAP & REJMAP::operator= (     //assign REJMAP
-const REJMAP & source            //from this
-) {
-  REJ *
-    to;
-  REJ *
-    from = source.ptr;
-  int
-    i;
-
-  initialise (source.len);
-  to = ptr;
-  for (i = 0; i < len; i++) {
-    *to = *from;
-    to++;
-    from++;
+REJMAP &REJMAP::operator=(const REJMAP &source) {
+  initialise(source.len);
+  for (int i = 0; i < len; i++) {
+    ptr[i] = source.ptr[i];
   }
   return *this;
 }
 
-
-void REJMAP::initialise(  //Redefine map
-                        inT16 length) {
-  free(ptr);
+void REJMAP::initialise(inT16 length) {
+  ptr.reset(new REJ[length]);
   len = length;
-  if (len > 0)
-    ptr = (REJ *) calloc(len, sizeof(REJ));
-  else
-    ptr = NULL;
 }
 
 
@@ -363,28 +315,12 @@ BOOL8 REJMAP::quality_recoverable_rejects() {  //Any potential rejs?
 void REJMAP::remove_pos(           //Cut out an element
                         inT16 pos  //element to remove
                        ) {
-  REJ *new_ptr;                  //new, smaller map
-  int i;
-
   ASSERT_HOST (pos >= 0);
   ASSERT_HOST (pos < len);
   ASSERT_HOST (len > 0);
 
   len--;
-  if (len > 0)
-    new_ptr = (REJ *) malloc(len * sizeof(REJ));
-  else
-    new_ptr = NULL;
-
-  for (i = 0; i < pos; i++)
-    new_ptr[i] = ptr[i];         //copy pre pos
-
-  for (; pos < len; pos++)
-    new_ptr[pos] = ptr[pos + 1]; //copy post pos
-
-                                 //delete old map
-  free(ptr);
-  ptr = new_ptr;
+  for (; pos < len; pos++) ptr[pos] = ptr[pos + 1];
 }
 
 
diff --git a/ccstruct/rejctmap.h b/ccstruct/rejctmap.h
index 84b5009b34..732661db17 100644
--- a/ccstruct/rejctmap.h
+++ b/ccstruct/rejctmap.h
@@ -44,7 +44,7 @@ OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
 #ifdef __UNIX__
 #include          <assert.h>
 #endif
-#include          "memry.h"
+#include <memory>
 #include          "bits16.h"
 #include                   "params.h"
 
@@ -203,33 +203,24 @@ class REJ
 
 class REJMAP
 {
-  REJ *ptr;                      //ptr to the chars
-  inT16 len;                     //Number of chars
+  std::unique_ptr<REJ[]> ptr;  // ptr to the chars
+  inT16 len;                   // Number of chars
 
-  public:
-    REJMAP() {  //constructor
-      ptr = NULL;
-      len = 0;
-    }
+ public:
+  REJMAP() : len(0) {}
 
-    REJMAP(  //classwise copy
-           const REJMAP &rejmap);
+  REJMAP(const REJMAP &rejmap) { *this = rejmap; }
 
-    REJMAP & operator= (         //assign REJMAP
-      const REJMAP & source);    //from this
-
-    ~REJMAP () {                 //destructor
-      free(ptr);
-    }
+  REJMAP &operator=(const REJMAP &source);
 
-    void initialise(  //Redefine map
-                    inT16 length);
+  // Sets up the ptr array to length, whatever it was before.
+  void initialise(inT16 length);
 
-    REJ & operator[](            //access function
-      inT16 index) const         //map index
-    {
-      ASSERT_HOST (index < len);
-      return ptr[index];         //no bounds checks
+  REJ &operator[](        // access function
+      inT16 index) const  // map index
+  {
+    ASSERT_HOST(index < len);
+    return ptr[index];  // no bounds checks
     }
 
     inT32 length() const {  //map length
diff --git a/ccstruct/statistc.cpp b/ccstruct/statistc.cpp
index e192ab52a2..bf275fdb25 100644
--- a/ccstruct/statistc.cpp
+++ b/ccstruct/statistc.cpp
@@ -89,9 +89,7 @@ void STATS::clear() {  // clear out buckets
  *
  * Destructor for a stats class.
  **********************************************************************/
-STATS::~STATS () {
-  delete [] buckets_;
-}
+STATS::~STATS() { delete[] buckets_; }
 
 /**********************************************************************
  * STATS::add
@@ -772,8 +770,8 @@ void swap_entries(void *array,   // array of entries
   char *ptr2;
   size_t count;                  // of bytes
 
-  ptr1 = static_cast<char*>(array) + index1 * size;
-  ptr2 = static_cast<char*>(array) + index2 * size;
+  ptr1 = static_cast<char *>(array) + index1 * size;
+  ptr2 = static_cast<char *>(array) + index2 * size;
   for (count = 0; count < size; count++) {
     tmp = *ptr1;
     *ptr1++ = *ptr2;
diff --git a/ccstruct/stepblob.h b/ccstruct/stepblob.h
index a62c5bb0c9..5c63c13b16 100644
--- a/ccstruct/stepblob.h
+++ b/ccstruct/stepblob.h
@@ -117,8 +117,8 @@ class C_BLOB:public ELIST_LINK
     }
 
     static int SortByXMiddle(const void *v1, const void *v2) {
-      const C_BLOB* blob1 = *static_cast<const C_BLOB* const *>(v1);
-      const C_BLOB* blob2 = *static_cast<const C_BLOB* const *>(v2);
+      const C_BLOB* blob1 = *static_cast<const C_BLOB* const*>(v1);
+      const C_BLOB* blob2 = *static_cast<const C_BLOB* const*>(v2);
       return blob1->bounding_box().x_middle() -
              blob2->bounding_box().x_middle();
     }
diff --git a/ccutil/ambigs.cpp b/ccutil/ambigs.cpp
index 4fe3b883a6..f65a6df8fb 100644
--- a/ccutil/ambigs.cpp
+++ b/ccutil/ambigs.cpp
@@ -25,8 +25,8 @@
 #include "universalambigs.h"
 
 #if defined(_WIN32) && !defined(__GNUC__)
-# define strtok_r(str, delim, saveptr) strtok_s(str, delim, saveptr)
-#endif  /* _WIN32 && !__GNUC__ */
+#define strtok_r(str, delim, saveptr) strtok_s(str, delim, saveptr)
+#endif /* _WIN32 && !__GNUC__ */
 
 namespace tesseract {
 
diff --git a/ccutil/ambigs.h b/ccutil/ambigs.h
index bc5965d80b..786d46073e 100644
--- a/ccutil/ambigs.h
+++ b/ccutil/ambigs.h
@@ -120,10 +120,8 @@ class AmbigSpec : public ELIST_LINK {
   // be sorted by their wrong_ngram arrays. Example of wrong_ngram vectors
   // in a a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1].
   static int compare_ambig_specs(const void *spec1, const void *spec2) {
-    const AmbigSpec *s1 =
-      *static_cast<const AmbigSpec * const *>(spec1);
-    const AmbigSpec *s2 =
-      *static_cast<const AmbigSpec * const *>(spec2);
+    const AmbigSpec *s1 = *static_cast<const AmbigSpec *const *>(spec1);
+    const AmbigSpec *s2 = *static_cast<const AmbigSpec *const *>(spec2);
     int result = UnicharIdArrayUtils::compare(s1->wrong_ngram, s2->wrong_ngram);
     if (result != 0) return result;
     return UnicharIdArrayUtils::compare(s1->correct_fragments,
diff --git a/ccutil/basedir.cpp b/ccutil/basedir.cpp
index 67214b0e3d..12099af602 100644
--- a/ccutil/basedir.cpp
+++ b/ccutil/basedir.cpp
@@ -1,8 +1,7 @@
 /**********************************************************************
  * File:        basedir.cpp  (Formerly getpath.c)
- * Description: Find the directory location of the current executable using PATH.
- * Author:      Ray Smith
- * Created:     Mon Jul 09 09:06:39 BST 1990
+ * Description: Find the directory location of the current executable using
+ *PATH. Author:      Ray Smith Created:     Mon Jul 09 09:06:39 BST 1990
  *
  * (C) Copyright 1990, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/ccutil/genericvector.h b/ccutil/genericvector.h
index bdea83d221..3c37b4aa71 100644
--- a/ccutil/genericvector.h
+++ b/ccutil/genericvector.h
@@ -37,9 +37,9 @@
 template <typename T>
 class GenericVector {
  public:
-  GenericVector() : size_used_(0), size_reserved_(0), data_(NULL),
-                    clear_cb_(NULL), compare_cb_(NULL) {}
-
+  GenericVector() {
+    init(kDefaultVectorSize);
+  }
   GenericVector(int size, T init_val) {
     init(size);
     init_to_size(size, init_val);
@@ -73,10 +73,11 @@ class GenericVector {
     return size_used_;
   }
   // Workaround to avoid g++ -Wsign-compare warnings.
-  unsigned int unsigned_size() const {
-    static_assert(sizeof(size_used_) <= sizeof(unsigned int), "");
+  size_t unsigned_size() const {
+    static_assert(sizeof(size_used_) <= sizeof(size_t),
+                  "Wow! sizeof(size_t) < sizeof(int32_t)!!");
     assert(0 <= size_used_);
-    return static_cast<unsigned int>(size_used_);
+    return static_cast<size_t>(size_used_);
   }
   int size_reserved() const {
     return size_reserved_;
@@ -364,8 +365,7 @@ typedef bool (*FileWriter)(const GenericVector<char>& data,
                            const STRING& filename);
 // The default FileReader loads the whole file into the vector of char,
 // returning false on error.
-inline bool LoadDataFromFile(const char *filename,
-                             GenericVector<char>* data) {
+inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
   bool result = false;
   FILE* fp = fopen(filename, "rb");
   if (fp != NULL) {
@@ -437,8 +437,8 @@ int sort_cmp(const void* t1, const void* t2) {
 // return > 0 if t1 > t2
 template <typename T>
 int sort_ptr_cmp(const void* t1, const void* t2) {
-  const T* a = *static_cast<T * const *>(t1);
-  const T* b = *static_cast<T * const *>(t2);
+  const T* a = *static_cast<T* const*>(t1);
+  const T* b = *static_cast<T* const*>(t2);
   if (*a < *b) {
     return -1;
   } else if (*b < *a) {
@@ -896,7 +896,8 @@ bool GenericVector<T>::write(
     }
     delete cb;
   } else {
-    if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) return false;
+    if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size())
+      return false;
   }
   return true;
 }
@@ -928,7 +929,8 @@ bool GenericVector<T>::read(
 template <typename T>
 bool GenericVector<T>::Serialize(FILE* fp) const {
   if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
-  if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) return false;
+  if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size())
+    return false;
   return true;
 }
 template <typename T>
diff --git a/ccutil/helpers.h b/ccutil/helpers.h
index 6b9249fb5d..3fd0edcf57 100644
--- a/ccutil/helpers.h
+++ b/ccutil/helpers.h
@@ -182,7 +182,7 @@ inline int IntCastRounded(double x) {
 
 // Reverse the order of bytes in a n byte quantity for big/little-endian switch.
 inline void ReverseN(void* ptr, int num_bytes) {
-  char *cptr = static_cast<char *>(ptr);
+  char* cptr = static_cast<char*>(ptr);
   int halfsize = num_bytes / 2;
   for (int i = 0; i < halfsize; ++i) {
     char tmp = cptr[i];
diff --git a/ccutil/host.h b/ccutil/host.h
index 5207495ed3..248e6cb994 100644
--- a/ccutil/host.h
+++ b/ccutil/host.h
@@ -27,8 +27,8 @@
 #undef max
 #endif
 
-#include <cinttypes>    // PRId32, ...
-#include <cstdint>      // int32_t, ...
+#include <cinttypes>  // PRId32, ...
+#include <cstdint>    // int32_t, ...
 
 // definitions of portable data types (numbers and characters)
 typedef int8_t inT8;
@@ -48,11 +48,11 @@ typedef unsigned char BOOL8;
 /* MinGW defines the standard PRI... macros, but MSVS doesn't. */
 
 #if !defined(PRId32)
-# define PRId32 "d"
+#define PRId32 "d"
 #endif
 
 #if !defined(PRId64)
-# define PRId64 "I64d"
+#define PRId64 "I64d"
 #endif
 
 #endif /* _WIN32 */
diff --git a/ccutil/memry.cpp b/ccutil/memry.cpp
index d0dfa231d0..2e4e1244a8 100644
--- a/ccutil/memry.cpp
+++ b/ccutil/memry.cpp
@@ -40,14 +40,6 @@ void *alloc_mem(inT32 count) {
   return malloc(static_cast<size_t>(count));
 }
 
-void *alloc_big_zeros(inT32 count) {
-  return calloc(static_cast<size_t>(count), 1);
-}
-
 void free_mem(void *oldchunk) {
   free(oldchunk);
 }
-
-void free_big_mem(void *oldchunk) {
-  free(oldchunk);
-}
diff --git a/ccutil/memry.h b/ccutil/memry.h
index 0ec275718e..d179aa2cf2 100644
--- a/ccutil/memry.h
+++ b/ccutil/memry.h
@@ -29,11 +29,7 @@ extern char *alloc_string(inT32 count);
 extern void free_string(char *string);
 // get some memory
 extern void *alloc_mem(inT32 count);
-// get some memory initialized to 0.
-extern void *alloc_big_zeros(inT32 count);
 // free mem from alloc_mem
 extern void free_mem(void *oldchunk);
-// free mem from alloc_big_zeros
-extern void free_big_mem(void *oldchunk);
 
 #endif
diff --git a/ccutil/params.cpp b/ccutil/params.cpp
index 1ec2b6dc5b..7ea2189a5c 100644
--- a/ccutil/params.cpp
+++ b/ccutil/params.cpp
@@ -101,8 +101,8 @@ bool ParamUtils::SetParam(const char *name, const char* value,
   int intval;
   IntParam *ip = FindParam<IntParam>(name, GlobalParams()->int_params,
                                      member_params->int_params);
-  if (ip && ip->constraint_ok(constraint) &&
-      sscanf(value, "%d", &intval) == 1) ip->set_value(intval);
+  if (ip && ip->constraint_ok(constraint) && sscanf(value, "%d", &intval) == 1)
+    ip->set_value(intval);
 
   // Look for the parameter among bool parameters.
   BoolParam *bp = FindParam<BoolParam>(name, GlobalParams()->bool_params,
diff --git a/ccutil/strngs.h b/ccutil/strngs.h
index 17169e21ae..36224305c4 100644
--- a/ccutil/strngs.h
+++ b/ccutil/strngs.h
@@ -20,11 +20,11 @@
 #ifndef           STRNGS_H
 #define           STRNGS_H
 
-#include          <assert.h>
-#include          <stdio.h>
-#include          <string.h>
-#include          "platform.h"
-#include          "memry.h"
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include "memry.h"
+#include "platform.h"
 
 namespace tesseract {
 class TFile;
diff --git a/ccutil/tessdatamanager.cpp b/ccutil/tessdatamanager.cpp
index c0bd370460..048ff15824 100644
--- a/ccutil/tessdatamanager.cpp
+++ b/ccutil/tessdatamanager.cpp
@@ -33,6 +33,13 @@
 
 namespace tesseract {
 
+// Lazily loads from the the given filename. Won't actually read the file
+// until it needs it.
+void TessdataManager::LoadFileLater(const char *data_file_name) {
+  Clear();
+  data_file_name_ = data_file_name;
+}
+
 bool TessdataManager::Init(const char *data_file_name) {
   GenericVector<char> data;
   if (reader_ == nullptr) {
@@ -46,6 +53,7 @@ bool TessdataManager::Init(const char *data_file_name) {
 // Loads from the given memory buffer as if a file.
 bool TessdataManager::LoadMemBuffer(const char *name, const char *data,
                                     int size) {
+  Clear();
   data_file_name_ = name;
   TFile fp;
   fp.Open(data, size);
@@ -74,6 +82,14 @@ bool TessdataManager::LoadMemBuffer(const char *name, const char *data,
   return true;
 }
 
+// Overwrites a single entry of the given type.
+void TessdataManager::OverwriteEntry(TessdataType type, const char *data,
+                                     int size) {
+  is_loaded_ = true;
+  entries_[type].resize_no_init(size);
+  memcpy(&entries_[type][0], data, size);
+}
+
 // Saves to the given filename.
 bool TessdataManager::SaveFile(const STRING &filename,
                                FileWriter writer) const {
diff --git a/ccutil/tessdatamanager.h b/ccutil/tessdatamanager.h
index 1c736663cd..db9c5583f7 100644
--- a/ccutil/tessdatamanager.h
+++ b/ccutil/tessdatamanager.h
@@ -128,6 +128,9 @@ class TessdataManager {
   bool swap() const { return swap_; }
   bool is_loaded() const { return is_loaded_; }
 
+  // Lazily loads from the the given filename. Won't actually read the file
+  // until it needs it.
+  void LoadFileLater(const char *data_file_name);
   /**
    * Opens and reads the given data file right now.
    * @return true on success.
@@ -136,6 +139,8 @@ class TessdataManager {
   // Loads from the given memory buffer as if a file, remembering name as some
   // arbitrary source id for caching.
   bool LoadMemBuffer(const char *name, const char *data, int size);
+  // Overwrites a single entry of the given type.
+  void OverwriteEntry(TessdataType type, const char *data, int size);
 
   // Saves to the given filename.
   bool SaveFile(const STRING &filename, FileWriter writer) const;
diff --git a/ccutil/unichar.cpp b/ccutil/unichar.cpp
index 0ceced13f0..255136f3ff 100644
--- a/ccutil/unichar.cpp
+++ b/ccutil/unichar.cpp
@@ -24,6 +24,8 @@
 
 #define UNI_MAX_LEGAL_UTF32 0x0010FFFF
 
+namespace tesseract {
+
 // Construct from a utf8 string. If len<0 then the string is null terminated.
 // If the string is too long to fit in the UNICHAR then it takes only what
 // will fit. Checks for illegal input and stops at an illegal sequence.
@@ -206,20 +208,21 @@ UNICHAR::const_iterator UNICHAR::end(const char* utf8_str, const int len) {
 }
 
 // Converts a utf-8 string to a vector of unicodes.
-// Returns false if the input contains invalid UTF-8, and replaces
-// the rest of the string with a single space.
-bool UNICHAR::UTF8ToUnicode(const char* utf8_str,
-                            GenericVector<int>* unicodes) {
+// Returns an empty vector if the input contains invalid UTF-8.
+/* static */
+std::vector<char32> UNICHAR::UTF8ToUTF32(const char* utf8_str) {
   const int utf8_length = strlen(utf8_str);
+  std::vector<char32> unicodes;
+  unicodes.reserve(utf8_length);
   const_iterator end_it(end(utf8_str, utf8_length));
   for (const_iterator it(begin(utf8_str, utf8_length)); it != end_it; ++it) {
     if (it.is_legal()) {
-      unicodes->push_back(*it);
+      unicodes.push_back(*it);
     } else {
-      unicodes->push_back(' ');
-      return false;
+      unicodes.clear();
+      return unicodes;
     }
   }
-  return true;
+  return unicodes;
 }
 
diff --git a/ccutil/unichar.h b/ccutil/unichar.h
index 85dde6f268..a1aef5a897 100644
--- a/ccutil/unichar.h
+++ b/ccutil/unichar.h
@@ -22,13 +22,14 @@
 
 #include <memory.h>
 #include <string.h>
-
-template <typename T> class GenericVector;
+#include <string>
+#include <vector>
 
 // Maximum number of characters that can be stored in a UNICHAR. Must be
 // at least 4. Must not exceed 31 without changing the coding of length.
 #define UNICHAR_LEN 30
 
+// TODO(rays) Move these to the tesseract namespace.
 // A UNICHAR_ID is the unique id of a unichar.
 typedef int UNICHAR_ID;
 
@@ -45,6 +46,10 @@ enum StrongScriptDirection {
                           // and right-to-left characters.
 };
 
+namespace tesseract {
+
+typedef signed int char32;
+
 // The UNICHAR class holds a single classification result. This may be
 // a single Unicode character (stored as between 1 and 4 utf8 bytes) or
 // multiple Unicode characters representing the NFKC expansion of a ligature
@@ -151,9 +156,11 @@ class UNICHAR {
   static const_iterator end(const char* utf8_str, const int byte_length);
 
   // Converts a utf-8 string to a vector of unicodes.
-  // Returns false if the input contains invalid UTF-8, and replaces
-  // the rest of the string with a single space.
-  static bool UTF8ToUnicode(const char* utf8_str, GenericVector<int>* unicodes);
+  // Returns an empty vector if the input contains invalid UTF-8.
+  static std::vector<char32> UTF8ToUTF32(const char* utf8_str);
+  // Converts a vector of unicodes to a utf8 string.
+  // Returns an empty string if the input contains an invalid unicode.
+  static string UTF32ToUTF8(const std::vector<char32>& str32);
 
  private:
   // A UTF-8 representation of 1 or more Unicode characters.
@@ -162,4 +169,6 @@ class UNICHAR {
   char chars[UNICHAR_LEN];
 };
 
+}  // namespace tesseract
+
 #endif  // TESSERACT_CCUTIL_UNICHAR_H_
diff --git a/ccutil/unicharcompress.cpp b/ccutil/unicharcompress.cpp
index 3b8595cac2..969016e0ed 100644
--- a/ccutil/unicharcompress.cpp
+++ b/ccutil/unicharcompress.cpp
@@ -148,10 +148,10 @@ bool UnicharCompress::ComputeEncoding(const UNICHARSET& unicharset, int null_id,
     }
     RecodedCharID code;
     // Convert to unicodes.
-    GenericVector<int> unicodes;
+    std::vector<char32> unicodes;
     if (u < unicharset.size() &&
-        UNICHAR::UTF8ToUnicode(unicharset.get_normed_unichar(u), &unicodes) &&
-        unicodes.size() == 1) {
+        (unicodes = UNICHAR::UTF8ToUTF32(unicharset.get_normed_unichar(u)))
+                .size() == 1) {
       // Check single unicodes for Hangul/Han and encode if so.
       int unicode = unicodes[0];
       int leading, vowel, trailing;
diff --git a/ccutil/unicharset.cpp b/ccutil/unicharset.cpp
index f36ac039de..aa87c127a4 100644
--- a/ccutil/unicharset.cpp
+++ b/ccutil/unicharset.cpp
@@ -29,6 +29,10 @@
 #include "tprintf.h"
 #include "unichar.h"
 
+// TODO(rays) Move UNICHARSET to tesseract namespace.
+using tesseract::char32;
+using tesseract::UNICHAR;
+
 // Special character used in representing character fragments.
 static const char kSeparator = '|';
 // Special character used in representing 'natural' character fragments.
@@ -990,12 +994,9 @@ bool UNICHARSET::AnyRepeatedUnicodes() const {
   if (has_special_codes()) start_id = SPECIAL_UNICHAR_CODES_COUNT;
   for (int id = start_id; id < size_used; ++id) {
     // Convert to unicodes.
-    GenericVector<int> unicodes;
-    if (UNICHAR::UTF8ToUnicode(get_normed_unichar(id), &unicodes) &&
-        unicodes.size() > 1) {
-      for (int u = 1; u < unicodes.size(); ++u) {
-        if (unicodes[u - 1] == unicodes[u]) return true;
-      }
+    std::vector<char32> unicodes = UNICHAR::UTF8ToUTF32(get_normed_unichar(id));
+    for (int u = 1; u < unicodes.size(); ++u) {
+      if (unicodes[u - 1] == unicodes[u]) return true;
     }
   }
   return false;
@@ -1013,7 +1014,8 @@ int UNICHARSET::add_script(const char* script) {
     assert(script_table_size_used == script_table_size_reserved);
     script_table_size_reserved += script_table_size_reserved;
     char** new_script_table = new char*[script_table_size_reserved];
-    memcpy(new_script_table, script_table, script_table_size_used * sizeof(char*));
+    memcpy(new_script_table, script_table,
+           script_table_size_used * sizeof(char*));
     delete[] script_table;
     script_table = new_script_table;
   }
diff --git a/classify/adaptive.cpp b/classify/adaptive.cpp
index 7483a74fee..54157d0e80 100644
--- a/classify/adaptive.cpp
+++ b/classify/adaptive.cpp
@@ -221,7 +221,7 @@ void free_adapted_templates(ADAPT_TEMPLATES templates) {
 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
   int NumProtos = MaxProtoId + 1;
 
-  TEMP_CONFIG Config = (TEMP_CONFIG) malloc(sizeof(TEMP_CONFIG_STRUCT));
+  TEMP_CONFIG Config = (TEMP_CONFIG)malloc(sizeof(TEMP_CONFIG_STRUCT));
   Config->Protos = NewBitVector (NumProtos);
 
   Config->NumTimesSeen = 1;
@@ -246,7 +246,7 @@ TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
  * @note History: Thu Mar 14 13:31:31 1991, DSJ, Created.
  */
 TEMP_PROTO NewTempProto() {
-  return (TEMP_PROTO) malloc(sizeof(TEMP_PROTO_STRUCT));
+  return (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT));
 }                                /* NewTempProto */
 
 
@@ -325,7 +325,7 @@ ADAPT_CLASS ReadAdaptedClass(TFile *fp) {
   fp->FRead(&NumTempProtos, sizeof(int), 1);
   Class->TempProtos = NIL_LIST;
   for (i = 0; i < NumTempProtos; i++) {
-    TEMP_PROTO TempProto = (TEMP_PROTO) malloc(sizeof(TEMP_PROTO_STRUCT));
+    TEMP_PROTO TempProto = (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT));
     fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
     Class->TempProtos = push_last (Class->TempProtos, TempProto);
   }
@@ -390,7 +390,7 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) {
  * @note History: Tue Mar 19 14:25:26 1991, DSJ, Created.
  */
 PERM_CONFIG ReadPermConfig(TFile *fp) {
-  PERM_CONFIG Config = (PERM_CONFIG) malloc(sizeof(PERM_CONFIG_STRUCT));
+  PERM_CONFIG Config = (PERM_CONFIG)malloc(sizeof(PERM_CONFIG_STRUCT));
   uinT8 NumAmbigs;
   fp->FRead(&NumAmbigs, sizeof(uinT8), 1);
   Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
@@ -416,7 +416,7 @@ PERM_CONFIG ReadPermConfig(TFile *fp) {
  * @note History: Tue Mar 19 14:29:59 1991, DSJ, Created.
  */
 TEMP_CONFIG ReadTempConfig(TFile *fp) {
-  TEMP_CONFIG Config = (TEMP_CONFIG) malloc(sizeof(TEMP_CONFIG_STRUCT));
+  TEMP_CONFIG Config = (TEMP_CONFIG)malloc(sizeof(TEMP_CONFIG_STRUCT));
   fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
 
   Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp
index ce62ee89b5..b796774d75 100644
--- a/classify/adaptmatch.cpp
+++ b/classify/adaptmatch.cpp
@@ -1980,7 +1980,7 @@ void Classify::MakePermanent(ADAPT_TEMPLATES Templates,
 
   // Initialize permanent config.
   Ambigs = GetAmbiguities(Blob, ClassId);
-  PERM_CONFIG Perm = (PERM_CONFIG) malloc(sizeof(PERM_CONFIG_STRUCT));
+  PERM_CONFIG Perm = (PERM_CONFIG)malloc(sizeof(PERM_CONFIG_STRUCT));
   Perm->Ambigs = Ambigs;
   Perm->FontinfoId = Config->FontinfoId;
 
@@ -2241,11 +2241,9 @@ void Classify::ShowBestMatchFor(int shape_id,
 
   tprintf("Static Shape ID: %d\n", shape_id);
   ShowMatchDisplay();
-  im_.Match(ClassForClassId(PreTrainedTemplates, shape_id),
-            AllProtosOn, &config_mask, // TODO: or reinterpret_cast<BIT_VECTOR>(&config_mask) anyway?
-            num_features, features, &cn_result,
-            classify_adapt_feature_threshold,
-            matcher_debug_flags,
+  im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), AllProtosOn,
+            &config_mask, num_features, features, &cn_result,
+            classify_adapt_feature_threshold, matcher_debug_flags,
             matcher_debug_separate_windows);
   UpdateMatchDisplay();
 #endif  // GRAPHICS_DISABLED
diff --git a/classify/cluster.cpp b/classify/cluster.cpp
index 9221b54c97..678f3aa674 100644
--- a/classify/cluster.cpp
+++ b/classify/cluster.cpp
@@ -592,12 +592,12 @@ void FreePrototype(void *arg) {  //PROTOTYPE     *Prototype)
     Prototype->Cluster->Prototype = FALSE;
 
   // deallocate the prototype statistics and then the prototype itself
-  free (Prototype->Distrib);
-  free (Prototype->Mean);
+  free(Prototype->Distrib);
+  free(Prototype->Mean);
   if (Prototype->Style != spherical) {
-    free (Prototype->Variance.Elliptical);
-    free (Prototype->Magnitude.Elliptical);
-    free (Prototype->Weight.Elliptical);
+    free(Prototype->Variance.Elliptical);
+    free(Prototype->Magnitude.Elliptical);
+    free(Prototype->Weight.Elliptical);
   }
   free(Prototype);
 }                                // FreePrototype
@@ -1123,9 +1123,9 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
   if (TotalDims < N + 1 || TotalDims < 2)
     return NULL;
   const int kMatrixSize = N * N * sizeof(FLOAT32);
-  FLOAT32* Covariance = static_cast<FLOAT32 *>(Emalloc(kMatrixSize));
-  FLOAT32* Inverse = static_cast<FLOAT32 *>(Emalloc(kMatrixSize));
-  FLOAT32* Delta = static_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
+  FLOAT32 *Covariance = static_cast<FLOAT32 *>(Emalloc(kMatrixSize));
+  FLOAT32 *Inverse = static_cast<FLOAT32 *>(Emalloc(kMatrixSize));
+  FLOAT32 *Delta = static_cast<FLOAT32 *>(Emalloc(N * sizeof(FLOAT32)));
   // Compute a new covariance matrix that only uses essential features.
   for (int i = 0; i < N; ++i) {
     int row_offset = i * N;
@@ -1749,13 +1749,13 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
   BOOL8 Symmetrical;
 
   // allocate memory needed for data structure
-  Buckets = static_cast<BUCKETS*>(Emalloc(sizeof(BUCKETS)));
+  Buckets = static_cast<BUCKETS *>(Emalloc(sizeof(BUCKETS)));
   Buckets->NumberOfBuckets = OptimumNumberOfBuckets(SampleCount);
   Buckets->SampleCount = SampleCount;
   Buckets->Confidence = Confidence;
-  Buckets->Count = static_cast<uinT32*>(
-      Emalloc(Buckets->NumberOfBuckets * sizeof(uinT32)));
-  Buckets->ExpectedCount = static_cast<FLOAT32*>(
+  Buckets->Count =
+      static_cast<uinT32 *>(Emalloc(Buckets->NumberOfBuckets * sizeof(uinT32)));
+  Buckets->ExpectedCount = static_cast<FLOAT32 *>(
       Emalloc(Buckets->NumberOfBuckets * sizeof(FLOAT32)));
 
   // initialize simple fields
diff --git a/classify/clusttool.cpp b/classify/clusttool.cpp
index b1a3d4b1e4..37cb7c49d1 100644
--- a/classify/clusttool.cpp
+++ b/classify/clusttool.cpp
@@ -227,7 +227,7 @@ FLOAT32 *ReadNFloats(TFile *fp, uinT16 N, FLOAT32 Buffer[]) {
   bool needs_free = false;
 
   if (Buffer == NULL) {
-    Buffer = static_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
+    Buffer = static_cast<FLOAT32 *>(Emalloc(N * sizeof(FLOAT32)));
     needs_free = true;
   }
 
diff --git a/classify/featdefs.cpp b/classify/featdefs.cpp
index 2e81290025..66c66ea9c8 100644
--- a/classify/featdefs.cpp
+++ b/classify/featdefs.cpp
@@ -290,13 +290,13 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
  * - ILLEGAL_SHORT_NAME
  * @note History: Wed May 23 15:36:05 1990, DSJ, Created.
  */
-int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
-                           const char *ShortName) {
+uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
+                                const char *ShortName) {
   int i;
 
   for (i = 0; i < FeatureDefs.NumFeatureTypes; i++)
     if (!strcmp ((FeatureDefs.FeatureDesc[i]->ShortName), ShortName))
-      return (i);
+      return static_cast<uint32_t>(i);
   DoError (ILLEGAL_SHORT_NAME, "Illegal short name for a feature");
   return 0;
 
diff --git a/classify/featdefs.h b/classify/featdefs.h
index 7c168f3daa..1478efa189 100644
--- a/classify/featdefs.h
+++ b/classify/featdefs.h
@@ -70,8 +70,8 @@ void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
 CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
                               FILE *File);
 
-int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
-                           const char *ShortName);
+uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
+                                const char *ShortName);
 
 /**----------------------------------------------------------------------------
         Global Data Definitions and Declarations
diff --git a/classify/kdtree.cpp b/classify/kdtree.cpp
index e98228a754..8fe2e15c40 100644
--- a/classify/kdtree.cpp
+++ b/classify/kdtree.cpp
@@ -137,10 +137,8 @@ class KDTreeSearch {
   MinK<FLOAT32, void *> results_;
 };
 
-KDTreeSearch::KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest) :
-    tree_(tree),
-    query_point_(query_point),
-    results_(MAXSEARCH, k_closest) {
+KDTreeSearch::KDTreeSearch(KDTREE *tree, FLOAT32 *query_point, int k_closest)
+    : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
   sb_min_ = new FLOAT32[tree->KeySize];
   sb_max_ = new FLOAT32[tree->KeySize];
 }
@@ -166,8 +164,9 @@ void KDTreeSearch::Search(int *result_count,
     int count = results_.elements_count();
     *result_count = count;
     for (int j = 0; j < count; j++) {
-      // TODO: why FLOAT64 here?
-      distances[j] = (FLOAT32) sqrt((FLOAT64)results_.elements()[j].key);
+      // Pre-cast to float64 as key is a template type and we have no control
+      // over its actual type.
+      distances[j] = (FLOAT32)sqrt((FLOAT64)results_.elements()[j].key);
       results[j] = results_.elements()[j].value;
     }
   }
@@ -387,10 +386,7 @@ KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) {
 
 
 /*---------------------------------------------------------------------------*/
-void FreeKDNode(KDNODE *Node) {
-  free(Node);
-}
-
+void FreeKDNode(KDNODE *Node) { free(Node); }
 
 /*---------------------------------------------------------------------------*/
 /**
@@ -405,8 +401,8 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
   if (!BoxIntersectsSearch(sb_min_, sb_max_))
     return;
 
-  results_.insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc,
-                                  query_point_, sub_tree->Key),
+  results_.insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc, query_point_,
+                                  sub_tree->Key),
                   sub_tree->Data);
 
   if (query_point_[level] < sub_tree->BranchPoint) {
@@ -479,7 +475,7 @@ FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) {
 /// one wrap distance away from the query.
 bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) {
   FLOAT32 *query = query_point_;
-  // Why FLOAT64?
+  // Compute the sum in higher precision.
   FLOAT64 total_distance = 0.0;
   FLOAT64 radius_squared =
       results_.max_insertable_key() * results_.max_insertable_key();
diff --git a/classify/mfoutline.cpp b/classify/mfoutline.cpp
index 3bd0916edd..3f41571b8f 100644
--- a/classify/mfoutline.cpp
+++ b/classify/mfoutline.cpp
@@ -219,10 +219,9 @@ void MarkDirectionChanges(MFOUTLINE Outline) {
 /*---------------------------------------------------------------------------*/
 /** Return a new edge point for a micro-feature outline. */
 MFEDGEPT *NewEdgePoint() {
-  return (MFEDGEPT *) malloc(sizeof(MFEDGEPT));
+  return reinterpret_cast<MFEDGEPT *>(malloc(sizeof(MFEDGEPT)));
 }
 
-
 /*---------------------------------------------------------------------------*/
 /**
  * This routine returns the next point in the micro-feature
diff --git a/classify/ocrfeatures.cpp b/classify/ocrfeatures.cpp
index 2dfd693043..8008d4aff7 100644
--- a/classify/ocrfeatures.cpp
+++ b/classify/ocrfeatures.cpp
@@ -56,9 +56,7 @@ BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
  * @return none
  * @note History: Mon May 21 13:33:27 1990, DSJ, Created.
  */
-void FreeFeature(FEATURE Feature) {
-  free(Feature);
-}                                /* FreeFeature */
+void FreeFeature(FEATURE Feature) { free(Feature); } /* FreeFeature */
 
 /**
  * Release the memory consumed by the specified feature
@@ -88,9 +86,8 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) {
 FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
   FEATURE Feature;
 
-  Feature = (FEATURE) malloc(sizeof(FEATURE_STRUCT) +
-    (FeatureDesc->NumParams - 1) *
-    sizeof (FLOAT32));
+  Feature = (FEATURE)malloc(sizeof(FEATURE_STRUCT) +
+                            (FeatureDesc->NumParams - 1) * sizeof(FLOAT32));
   Feature->Type = FeatureDesc;
   return (Feature);
 
diff --git a/classify/protos.cpp b/classify/protos.cpp
index ab604de9a8..ee887563be 100644
--- a/classify/protos.cpp
+++ b/classify/protos.cpp
@@ -228,8 +228,7 @@ void FreeClassFields(CLASS_TYPE Class) {
   int i;
 
   if (Class) {
-    if (Class->MaxNumProtos > 0)
-      free(Class->Prototypes);
+    if (Class->MaxNumProtos > 0) free(Class->Prototypes);
     if (Class->MaxNumConfigs > 0) {
       for (i = 0; i < Class->NumConfigs; i++)
         FreeBitVector (Class->Configurations[i]);
diff --git a/classify/shapetable.h b/classify/shapetable.h
index 5d3f64cc97..f399989999 100644
--- a/classify/shapetable.h
+++ b/classify/shapetable.h
@@ -54,8 +54,8 @@ struct UnicharRating {
 
   // Sort function to sort ratings appropriately by descending rating.
   static int SortDescendingRating(const void* t1, const void* t2) {
-    const UnicharRating* a = static_cast<const UnicharRating *>(t1);
-    const UnicharRating* b = static_cast<const UnicharRating *>(t2);
+    const UnicharRating* a = static_cast<const UnicharRating*>(t1);
+    const UnicharRating* b = static_cast<const UnicharRating*>(t2);
     if (a->rating > b->rating) {
       return -1;
     } else if (a->rating < b->rating) {
@@ -100,8 +100,8 @@ struct ShapeRating {
 
   // Sort function to sort ratings appropriately by descending rating.
   static int SortDescendingRating(const void* t1, const void* t2) {
-    const ShapeRating* a = static_cast<const ShapeRating *>(t1);
-    const ShapeRating* b = static_cast<const ShapeRating *>(t2);
+    const ShapeRating* a = static_cast<const ShapeRating*>(t1);
+    const ShapeRating* b = static_cast<const ShapeRating*>(t2);
     if (a->rating > b->rating) {
       return -1;
     } else if (a->rating < b->rating) {
diff --git a/cutil/oldlist.cpp b/cutil/oldlist.cpp
index 01988abcc2..54a0ea779f 100644
--- a/cutil/oldlist.cpp
+++ b/cutil/oldlist.cpp
@@ -40,40 +40,40 @@
 
   To implement a STACK use:
 
-  push       to add to the Stack                  l = push (l, (LIST) "jim");
-  pop          to remove items from the Stack     l = pop (l);
-  first_node        to access the head                 name = (char *) first_node (l);
+  push         to add to the Stack             l = push(l, (LIST)"jim");
+  pop          to remove items from the Stack  l = pop(l);
+  first_node   to access the head              name = (char *)first_node(l);
 
   To implement a QUEUE use:
 
-  push_last    to add to the Queue                 l = push_last (l, (LIST) "jim");
-  pop                  remove items from the Queue l = pop (l);
-  first_node                to access the head          name = (char *) first_node (l);
+  push_last    to add to the Queue              l = push_last(l, (LIST)"x");
+  pop          remove items from the Queue      l = pop(l);
+  first_node   to access the head               name = (char *)first_node (l);
 
   To implement LISP like functions use:
 
-  first_node           CAR                              x = (int) first_node (l);
-  rest            CDR                              l = list_rest (l);
-  push            CONS                             l = push (l, (LIST) this);
-  last            LAST                             x = last (l);
-  concat          APPEND                           l = concat (r, s);
-  count           LENGTH                           x = count (l);
-  search          MEMBER                           if (search (l, x, NULL))
+  first_node   CAR                              x = (int)first_node(l);
+  rest         CDR                              l = list_rest (l);
+  push         CONS                             l = push(l, (LIST)this);
+  last         LAST                             x = last(l);
+  concat       APPEND                           l = concat(r, s);
+  count        LENGTH                           x = count(l);
+  search       MEMBER                           if (search(l, x, NULL))
 
   To implement SETS use:
 
-  adjoin                                           l  = adjoin (l, x);
-  set_union                                        l = set_union (r, s);
-  intersection                                     l = intersection (r, s);
-  set_difference                                   l = set_difference (r, s);
-  delete                                           l = delete (s, x, NULL);
-  search                                           if (search (l, x, NULL))
+  adjoin                                        l  = adjoin(l, x);
+  set_union                                     l = set_union(r, s);
+  intersection                                  l = intersection(r, s);
+  set_difference                                l = set_difference(r, s);
+  delete                                        l = delete(s, x, NULL);
+  search                                        if (search(l, x, NULL))
 
   To Implement Associated LISTS use:
 
-  lpush                                            l = lpush (l, p);
-  assoc                                            s = assoc (l, x);
-  adelete                                          l = adelete (l, x);
+  lpush                                         l = lpush(l, p);
+  assoc                                         s = assoc(l, x);
+  adelete                                       l = adelete(l, x);
 
   The following rules of closure exist for the functions provided.
   a = first_node (push (a, b))
@@ -83,14 +83,14 @@
 
 ******************************************************************************/
 #include "oldlist.h"
-#include "structures.h"
 #include <stdio.h>
+#include "structures.h"
 
 /*----------------------------------------------------------------------
               M a c r o s
 ----------------------------------------------------------------------*/
-#define add_on(l,x)     l = push (l,first_node (x))
-#define next_one(l)     l = list_rest (l)
+#define add_on(l, x) l = push(l, first_node(x))
+#define next_one(l) l = list_rest(l)
 
 /*----------------------------------------------------------------------
               F u n c t i o n s
@@ -103,11 +103,10 @@
 int count(LIST var_list) {
   int temp = 0;
 
-  iterate (var_list) temp += 1;
+  iterate(var_list) temp += 1;
   return (temp);
 }
 
-
 /**********************************************************************
  *  d e l e t e    d
  *
@@ -121,59 +120,53 @@ LIST delete_d(LIST list, void *key, int_compare is_equal) {
   LIST result = NIL_LIST;
   LIST last_one = NIL_LIST;
 
-  if (is_equal == NULL)
-    is_equal = is_same;
+  if (is_equal == NULL) is_equal = is_same;
 
   while (list != NIL_LIST) {
-    if (!(*is_equal) (first_node (list), key)) {
+    if (!(*is_equal)(first_node(list), key)) {
       if (last_one == NIL_LIST) {
         last_one = list;
-        list = list_rest (list);
+        list = list_rest(list);
         result = last_one;
         set_rest(last_one, NIL_LIST);
-      }
-      else {
+      } else {
         set_rest(last_one, list);
         last_one = list;
-        list = list_rest (list);
+        list = list_rest(list);
         set_rest(last_one, NIL_LIST);
       }
-    }
-    else {
-      list = pop (list);
+    } else {
+      list = pop(list);
     }
   }
   return (result);
 }
 
 LIST delete_d(LIST list, void *key,
-              TessResultCallback2<int, void*, void*>* is_equal) {
+              TessResultCallback2<int, void *, void *> *is_equal) {
   LIST result = NIL_LIST;
   LIST last_one = NIL_LIST;
 
   while (list != NIL_LIST) {
-    if (!(*is_equal).Run (first_node (list), key)) {
+    if (!(*is_equal).Run(first_node(list), key)) {
       if (last_one == NIL_LIST) {
         last_one = list;
-        list = list_rest (list);
+        list = list_rest(list);
         result = last_one;
         set_rest(last_one, NIL_LIST);
-      }
-      else {
+      } else {
         set_rest(last_one, list);
         last_one = list;
-        list = list_rest (list);
+        list = list_rest(list);
         set_rest(last_one, NIL_LIST);
       }
-    }
-    else {
-      list = pop (list);
+    } else {
+      list = pop(list);
     }
   }
   return (result);
 }
 
-
 /**********************************************************************
  *  d e s t r o y
  *
@@ -183,21 +176,20 @@ LIST destroy(LIST list) {
   LIST next;
 
   while (list != NIL_LIST) {
-    next = list_rest (list);
+    next = list_rest(list);
     free_cell(list);
     list = next;
   }
   return (NIL_LIST);
 }
 
-
 /**********************************************************************
  *  d e s t r o y   n o d e s
  *
  *  Return the space taken by the LISTs of a list to the heap.
  **********************************************************************/
 void destroy_nodes(LIST list, void_dest destructor) {
-  ASSERT_HOST(destructor != NULL);
+  ASSERT_HOST(destructor != nullptr);
 
   while (list != NIL_LIST) {
     if (first_node(list) != NULL) (*destructor)(first_node(list));
@@ -205,7 +197,6 @@ void destroy_nodes(LIST list, void_dest destructor) {
   }
 }
 
-
 /**********************************************************************
  *  i n s e r t
  *
@@ -216,27 +207,15 @@ void insert(LIST list, void *node) {
   LIST element;
 
   if (list != NIL_LIST) {
-    element = push (NIL_LIST, node);
-    set_rest (element, list_rest (list));
+    element = push(NIL_LIST, node);
+    set_rest(element, list_rest(list));
     set_rest(list, element);
-    node = first_node (list);
-    list->node = first_node (list_rest (list));
-    list->next->node = (LIST) node;
+    node = first_node(list);
+    list->node = first_node(list_rest(list));
+    list->next->node = (LIST)node;
   }
 }
 
-
-/**********************************************************************
- *  i s   s a m e   n o d e
- *
- *  Compare the list node with the key value return TRUE (non-zero)
- *  if they are equivalent strings.  (Return FALSE if not)
- **********************************************************************/
-int is_same_node(void *item1, void *item2) {
-  return (item1 == item2);
-}
-
-
 /**********************************************************************
  *  i s   s a m e
  *
@@ -244,10 +223,9 @@ int is_same_node(void *item1, void *item2) {
  *  if they are equivalent strings.  (Return FALSE if not)
  **********************************************************************/
 int is_same(void *item1, void *item2) {
-  return (!strcmp ((char *) item1, (char *) item2));
+  return strcmp((char *)item1, (char *)item2) == 0 ? 1 : 0;
 }
 
-
 /**********************************************************************
  *  j o i n
  *
@@ -256,25 +234,21 @@ int is_same(void *item1, void *item2) {
  *  first list updated.
  **********************************************************************/
 LIST join(LIST list1, LIST list2) {
-  if (list1 == NIL_LIST)
-    return (list2);
-  set_rest (last (list1), list2);
+  if (list1 == NIL_LIST) return (list2);
+  set_rest(last(list1), list2);
   return (list1);
 }
 
-
 /**********************************************************************
  *  l a s t
  *
  *  Return the last list item (this is list type).
  **********************************************************************/
 LIST last(LIST var_list) {
-  while (list_rest (var_list) != NIL_LIST)
-    var_list = list_rest (var_list);
+  while (list_rest(var_list) != NIL_LIST) var_list = list_rest(var_list);
   return (var_list);
 }
 
-
 /**********************************************************************
  *  n t h   c e l l
  *
@@ -283,13 +257,11 @@ LIST last(LIST var_list) {
 void *nth_cell(LIST var_list, int item_num) {
   int x = 0;
   iterate(var_list) {
-    if (x++ == item_num)
-      return (var_list);
+    if (x++ == item_num) return (var_list);
   }
   return (var_list);
 }
 
-
 /**********************************************************************
  *  p o p
  *
@@ -299,7 +271,7 @@ void *nth_cell(LIST var_list, int item_num) {
 LIST pop(LIST list) {
   LIST temp;
 
-  temp = list_rest (list);
+  temp = list_rest(list);
 
   if (list != NIL_LIST) {
     free_cell(list);
@@ -307,7 +279,6 @@ LIST pop(LIST list) {
   return (temp);
 }
 
-
 /**********************************************************************
  *  p u s h
  *
@@ -317,13 +288,12 @@ LIST pop(LIST list) {
 LIST push(LIST list, void *element) {
   LIST t;
 
-  t = new_cell ();
-  t->node = (LIST) element;
+  t = new_cell();
+  t->node = (LIST)element;
   set_rest(t, list);
   return (t);
 }
 
-
 /**********************************************************************
  *  p u s h   l a s t
  *
@@ -333,15 +303,13 @@ LIST push_last(LIST list, void *item) {
   LIST t;
 
   if (list != NIL_LIST) {
-    t = last (list);
-    t->next = push (NIL_LIST, item);
+    t = last(list);
+    t->next = push(NIL_LIST, item);
     return (list);
-  }
-  else
-    return (push (NIL_LIST, item));
+  } else
+    return (push(NIL_LIST, item));
 }
 
-
 /**********************************************************************
  *  r e v e r s e
  *
@@ -351,11 +319,10 @@ LIST push_last(LIST list, void *item) {
 LIST reverse(LIST list) {
   LIST newlist = NIL_LIST;
 
-  iterate (list) copy_first (list, newlist);
+  iterate(list) copy_first(list, newlist);
   return (newlist);
 }
 
-
 /**********************************************************************
  *  r e v e r s e   d
  *
@@ -363,12 +330,11 @@ LIST reverse(LIST list) {
  *  destroyed.
  **********************************************************************/
 LIST reverse_d(LIST list) {
-  LIST result = reverse (list);
+  LIST result = reverse(list);
   destroy(list);
   return (result);
 }
 
-
 /**********************************************************************
  *  s   a d j o i n
  *
@@ -379,12 +345,11 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) {
   LIST l;
   int result;
 
-  if (compare == NULL)
-    compare = (int_compare) strcmp;
+  if (compare == NULL) compare = (int_compare)strcmp;
 
   l = var_list;
   iterate(l) {
-    result = (*compare) (variable, first_node (l));
+    result = (*compare)(variable, first_node(l));
     if (result == 0)
       return (var_list);
     else if (result < 0) {
@@ -392,7 +357,7 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) {
       return (var_list);
     }
   }
-  return (push_last (var_list, variable));
+  return (push_last(var_list, variable));
 }
 
 /**********************************************************************
@@ -404,16 +369,14 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) {
  *  for is_equal, the is_key routine will be used.
  **********************************************************************/
 LIST search(LIST list, void *key, int_compare is_equal) {
-  if (is_equal == NULL)
-    is_equal = is_same;
+  if (is_equal == NULL) is_equal = is_same;
 
-  iterate (list) if ((*is_equal) (first_node (list), key))
-  return (list);
+  iterate(list) if ((*is_equal)(first_node(list), key)) return (list);
   return (NIL_LIST);
 }
 
-LIST search(LIST list, void *key, TessResultCallback2<int, void*, void*>* is_equal) {
-  iterate (list) if ((*is_equal).Run(first_node (list), key))
-  return (list);
+LIST search(LIST list, void *key,
+            TessResultCallback2<int, void *, void *> *is_equal) {
+  iterate(list) if ((*is_equal).Run(first_node(list), key)) return (list);
   return (NIL_LIST);
 }
diff --git a/cutil/oldlist.h b/cutil/oldlist.h
index 508a69a4f2..bd4fdfa16f 100644
--- a/cutil/oldlist.h
+++ b/cutil/oldlist.h
@@ -70,8 +70,8 @@
  * join              - Concatenates list 1 and list 2.
  * delete_d          - Removes the requested elements from the list.
  * transform_d       - Modifies the list by applying a function to each node.
- * insert            - Add a new element into this spot in a list. (not NIL_LIST)
- * push_last         - Add a new element onto the end of a list.
+ * insert            - Add a new element into this spot in a list. (not
+ *NIL_LIST) push_last         - Add a new element onto the end of a list.
  * reverse_d         - Reverse a list and destroy the old one.
  *
  * ASSOCIATED LISTS:
@@ -249,8 +249,6 @@ void destroy_nodes(LIST list, void_dest destructor);
 
 void insert(LIST list, void *node);
 
-int is_same_node(void *item1, void *item2);
-
 int is_same(void *item1, void *item2);
 
 LIST join(LIST list1, LIST list2);
diff --git a/dict/dawg.h b/dict/dawg.h
index 23ac22168f..c36e7ba4fe 100644
--- a/dict/dawg.h
+++ b/dict/dawg.h
@@ -432,7 +432,7 @@ class SquishedDawg : public Dawg {
     num_forward_edges_in_node0 = num_forward_edges(0);
     if (debug_level > 3) print_all("SquishedDawg:");
   }
-  ~SquishedDawg();
+  virtual ~SquishedDawg();
 
   // Loads using the given TFile. Returns false on failure.
   bool Load(TFile *fp) {
diff --git a/dict/dict.cpp b/dict/dict.cpp
index 96a1d4531f..4364eae4bd 100644
--- a/dict/dict.cpp
+++ b/dict/dict.cpp
@@ -241,7 +241,8 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) {
   if (load_bigram_dawg) {
     bigram_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_BIGRAM_DAWG,
                                                 dawg_debug_level, data_file);
-    if (bigram_dawg_) dawgs_ += bigram_dawg_;
+    // The bigram_dawg_ is NOT used like the other dawgs! DO NOT add to the
+    // dawgs_!!
   }
   if (load_freq_dawg) {
     freq_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_FREQ_DAWG,
@@ -352,6 +353,7 @@ void Dict::End() {
       delete dawgs_[i];
     }
   }
+  dawg_cache_->FreeDawg(bigram_dawg_);
   if (dawg_cache_is_ours_) {
     delete dawg_cache_;
     dawg_cache_ = NULL;
@@ -370,7 +372,7 @@ void Dict::End() {
 int Dict::def_letter_is_okay(void* void_dawg_args,
                              UNICHAR_ID unichar_id,
                              bool word_end) const {
-  DawgArgs *dawg_args = static_cast<DawgArgs*>(void_dawg_args);
+  DawgArgs *dawg_args = static_cast<DawgArgs *>(void_dawg_args);
 
   if (dawg_debug_level >= 3) {
     tprintf("def_letter_is_okay: current unichar=%s word_end=%d"
diff --git a/dict/dict.h b/dict/dict.h
index ffba0c8c2d..a5b0817ea2 100644
--- a/dict/dict.h
+++ b/dict/dict.h
@@ -530,14 +530,14 @@ class Dict {
   DawgVector dawgs_;
   SuccessorListsVector successors_;
   Trie *pending_words_;
+  /// The following pointers are only cached for convenience.
+  /// The dawgs will be deleted when dawgs_ vector is destroyed.
   // bigram_dawg_ points to a dawg of two-word bigrams which always supercede if
   // any of them are present on the best choices list for a word pair.
   // the bigrams are stored as space-separated words where:
   // (1) leading and trailing punctuation has been removed from each word and
   // (2) any digits have been replaced with '?' marks.
   Dawg *bigram_dawg_;
-  /// The following pointers are only cached for convenience.
-  /// The dawgs will be deleted when dawgs_ vector is destroyed.
   // TODO(daria): need to support multiple languages in the future,
   // so maybe will need to maintain a list of dawgs of each kind.
   Dawg *freq_dawg_;
diff --git a/dict/permdawg.cpp b/dict/permdawg.cpp
index f12a35b03a..57f1300094 100644
--- a/dict/permdawg.cpp
+++ b/dict/permdawg.cpp
@@ -53,7 +53,7 @@ void Dict::go_deeper_dawg_fxn(
     int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info,
     bool word_ending, WERD_CHOICE *word, float certainties[], float *limit,
     WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args) {
-  DawgArgs *more_args = static_cast<DawgArgs*>(void_more_args);
+  DawgArgs *more_args = static_cast<DawgArgs *>(void_more_args);
   word_ending = (char_choice_index == char_choices.size()-1);
   int word_index = word->length() - 1;
   if (best_choice->rating() < *limit) return;
diff --git a/dict/trie.cpp b/dict/trie.cpp
index 5d2e3b4af6..a4406664d0 100644
--- a/dict/trie.cpp
+++ b/dict/trie.cpp
@@ -281,8 +281,8 @@ NODE_REF Trie::new_dawg_node() {
 
 // Sort function to sort words by decreasing order of length.
 static int sort_strings_by_dec_length(const void* v1, const void* v2) {
-  const STRING* s1 = static_cast<const STRING*>(v1);
-  const STRING* s2 = static_cast<const STRING*>(v2);
+  const STRING *s1 = static_cast<const STRING *>(v1);
+  const STRING *s2 = static_cast<const STRING *>(v2);
   return s2->length() - s1->length();
 }
 
diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp
index 79f95922ac..88e6d495c9 100644
--- a/opencl/openclwrapper.cpp
+++ b/opencl/openclwrapper.cpp
@@ -58,8 +58,9 @@ static const l_uint32 rmask32[] = {
     0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
     0x7fffffff, 0xffffffff};
 
-static cl_mem pixsCLBuffer, pixdCLBuffer, pixdCLIntermediate; //Morph operations buffers
-static cl_mem pixThBuffer; //output from thresholdtopix calculation
+static cl_mem pixsCLBuffer, pixdCLBuffer,
+    pixdCLIntermediate;     // Morph operations buffers
+static cl_mem pixThBuffer;  // output from thresholdtopix calculation
 static cl_int clStatus;
 static KernelEnv rEnv;
 
@@ -580,58 +581,58 @@ static ds_status writeProfileToFile(ds_profile *profile,
 }
 
 // substitute invalid characters in device name with _
-static void legalizeFileName( char *fileName) {
-    //printf("fileName: %s\n", fileName);
-    const char *invalidChars =
-        "/\?:*\"><| ";  // space is valid but can cause headaches
-    // for each invalid char
-    for (unsigned i = 0; i < strlen(invalidChars); i++) {
-        char invalidStr[4];
-        invalidStr[0] = invalidChars[i];
-        invalidStr[1] = '\0';
-        //printf("eliminating %s\n", invalidStr);
-        //char *pos = strstr(fileName, invalidStr);
-        // initial ./ is valid for present directory
-        //if (*pos == '.') pos++;
-        //if (*pos == '/') pos++;
-        for (char *pos = strstr(fileName, invalidStr); pos != nullptr;
-             pos = strstr(pos + 1, invalidStr)) {
-          // printf("\tfound: %s, ", pos);
-          pos[0] = '_';
-          // printf("fileName: %s\n", fileName);
-        }
+static void legalizeFileName(char *fileName) {
+  // printf("fileName: %s\n", fileName);
+  const char *invalidChars =
+      "/\?:*\"><| ";  // space is valid but can cause headaches
+  // for each invalid char
+  for (unsigned i = 0; i < strlen(invalidChars); i++) {
+    char invalidStr[4];
+    invalidStr[0] = invalidChars[i];
+    invalidStr[1] = '\0';
+    // printf("eliminating %s\n", invalidStr);
+    // char *pos = strstr(fileName, invalidStr);
+    // initial ./ is valid for present directory
+    // if (*pos == '.') pos++;
+    // if (*pos == '/') pos++;
+    for (char *pos = strstr(fileName, invalidStr); pos != nullptr;
+         pos = strstr(pos + 1, invalidStr)) {
+      // printf("\tfound: %s, ", pos);
+      pos[0] = '_';
+      // printf("fileName: %s\n", fileName);
     }
+  }
 }
 
-static void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) {
-    //printf("[DS] populateGPUEnvFromDevice\n");
-    size_t size;
-    gpuInfo->mnIsUserCreated = 1;
-    // device
-    gpuInfo->mpDevID = device;
-    gpuInfo->mpArryDevsID = new cl_device_id[1];
-    gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID;
-    clStatus =
-        clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE,
-                        sizeof(cl_device_type), &gpuInfo->mDevType, &size);
-    CHECK_OPENCL( clStatus, "populateGPUEnv::getDeviceInfo(TYPE)");
-    // platform
-    clStatus =
-        clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM,
-                        sizeof(cl_platform_id), &gpuInfo->mpPlatformID, &size);
-    CHECK_OPENCL( clStatus, "populateGPUEnv::getDeviceInfo(PLATFORM)");
-    // context
-    cl_context_properties props[3];
-    props[0] = CL_CONTEXT_PLATFORM;
-    props[1] = (cl_context_properties) gpuInfo->mpPlatformID;
-    props[2] = 0;
-    gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID, nullptr,
-                                         nullptr, &clStatus);
-    CHECK_OPENCL( clStatus, "populateGPUEnv::createContext");
-    // queue
-    cl_command_queue_properties queueProperties = 0;
-    gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus );
-    CHECK_OPENCL( clStatus, "populateGPUEnv::createCommandQueue");
+static void populateGPUEnvFromDevice(GPUEnv *gpuInfo, cl_device_id device) {
+  // printf("[DS] populateGPUEnvFromDevice\n");
+  size_t size;
+  gpuInfo->mnIsUserCreated = 1;
+  // device
+  gpuInfo->mpDevID = device;
+  gpuInfo->mpArryDevsID = new cl_device_id[1];
+  gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID;
+  clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE,
+                             sizeof(cl_device_type), &gpuInfo->mDevType, &size);
+  CHECK_OPENCL(clStatus, "populateGPUEnv::getDeviceInfo(TYPE)");
+  // platform
+  clStatus =
+      clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM,
+                      sizeof(cl_platform_id), &gpuInfo->mpPlatformID, &size);
+  CHECK_OPENCL(clStatus, "populateGPUEnv::getDeviceInfo(PLATFORM)");
+  // context
+  cl_context_properties props[3];
+  props[0] = CL_CONTEXT_PLATFORM;
+  props[1] = (cl_context_properties)gpuInfo->mpPlatformID;
+  props[2] = 0;
+  gpuInfo->mpContext =
+      clCreateContext(props, 1, &gpuInfo->mpDevID, nullptr, nullptr, &clStatus);
+  CHECK_OPENCL(clStatus, "populateGPUEnv::createContext");
+  // queue
+  cl_command_queue_properties queueProperties = 0;
+  gpuInfo->mpCmdQueue = clCreateCommandQueue(
+      gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus);
+  CHECK_OPENCL(clStatus, "populateGPUEnv::createCommandQueue");
 }
 
 int OpenclDevice::LoadOpencl()
@@ -662,27 +663,26 @@ int OpenclDevice::SetKernelEnv( KernelEnv *envInfo )
 
 static cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer,
                                      size_t nElements, cl_mem_flags flags,
-                                     cl_int *pStatus)
-{
-    cl_mem membuffer = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (flags),
-                                        nElements * sizeof(l_uint32), hostbuffer, pStatus);
+                                     cl_int *pStatus) {
+  cl_mem membuffer =
+      clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(flags),
+                     nElements * sizeof(l_uint32), hostbuffer, pStatus);
 
-    return membuffer;
+  return membuffer;
 }
 
-static
-Pix *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, Pix *pixd, Pix *pixs,
-                       int elements, cl_mem_flags flags, bool memcopy = false,
-                       bool sync = true) {
+static Pix *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, Pix *pixd,
+                              Pix *pixs, int elements, cl_mem_flags flags,
+                              bool memcopy = false, bool sync = true) {
   PROCNAME("mapOutputCLBuffer");
   if (!pixd) {
     if (memcopy) {
       if ((pixd = pixCreateTemplate(pixs)) == nullptr)
-        (Pix *)ERROR_PTR("pixd not made", procName, nullptr);
+        tprintf("pixd not made\n");
     } else {
       if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs),
                                   pixGetDepth(pixs))) == nullptr)
-        (Pix *)ERROR_PTR("pixd not made", procName, nullptr);
+        tprintf("pixd not made\n");
     }
   }
   l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(
@@ -714,35 +714,34 @@ void OpenclDevice::releaseMorphCLBuffers()
   pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr;
 }
 
-int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs)
-{
-    SetKernelEnv( &rEnv );
+int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs) {
+  SetKernelEnv(&rEnv);
 
-    if (pixThBuffer != nullptr) {
-      pixsCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h,
-                                            CL_MEM_ALLOC_HOST_PTR, &clStatus);
+  if (pixThBuffer != nullptr) {
+    pixsCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h,
+                                          CL_MEM_ALLOC_HOST_PTR, &clStatus);
 
-      // Get the output from ThresholdToPix operation
-      clStatus =
-          clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0,
-                              sizeof(l_uint32) * wpl * h, 0, nullptr, nullptr);
-    }
-    else
-    {
-        //Get data from the source image
-        l_uint32* srcdata = (l_uint32*) malloc(wpl*h*sizeof(l_uint32));
-        memcpy(srcdata, pixGetData(pixs), wpl*h*sizeof(l_uint32));
+    // Get the output from ThresholdToPix operation
+    clStatus =
+        clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0,
+                            sizeof(l_uint32) * wpl * h, 0, nullptr, nullptr);
+  } else {
+    // Get data from the source image
+    l_uint32 *srcdata =
+        reinterpret_cast<l_uint32 *>(malloc(wpl * h * sizeof(l_uint32)));
+    memcpy(srcdata, pixGetData(pixs), wpl * h * sizeof(l_uint32));
 
-        pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl*h, CL_MEM_USE_HOST_PTR, &clStatus);
-    }
+    pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl * h,
+                                          CL_MEM_USE_HOST_PTR, &clStatus);
+  }
 
-    pixdCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h,
-                                          CL_MEM_ALLOC_HOST_PTR, &clStatus);
+  pixdCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h,
+                                        CL_MEM_ALLOC_HOST_PTR, &clStatus);
 
-    pixdCLIntermediate = allocateZeroCopyBuffer(
-        rEnv, nullptr, wpl * h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
+  pixdCLIntermediate = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h,
+                                              CL_MEM_ALLOC_HOST_PTR, &clStatus);
 
-    return (int)clStatus;
+  return (int)clStatus;
 }
 
 int OpenclDevice::InitEnv()
@@ -1255,254 +1254,222 @@ PERF_COUNT_END
 }
 
 //Morphology Dilate operation for 5x5 structuring element. Invokes the relevant OpenCL kernels
-static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h)
-{
-    size_t globalThreads[2];
-    cl_mem pixtemp;
-    cl_int status;
-    int gsize;
-    size_t localThreads[2];
+static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h) {
+  size_t globalThreads[2];
+  cl_mem pixtemp;
+  cl_int status;
+  int gsize;
+  size_t localThreads[2];
 
-    //Horizontal pass
-    gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
-    globalThreads[0] = gsize;
-    globalThreads[1] = GROUPSIZE_HMORY;
-    localThreads[0] = GROUPSIZE_HMORX;
-    localThreads[1] = GROUPSIZE_HMORY;
-
-    rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor_5x5", &status );
-    CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_5x5");
-
-    status = clSetKernelArg(rEnv.mpkKernel,
-        0,
-        sizeof(cl_mem),
-        &pixsCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        1,
-        sizeof(cl_mem),
-        &pixdCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
-    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+  // Horizontal pass
+  gsize = (wpl * h + GROUPSIZE_HMORX - 1) / GROUPSIZE_HMORX * GROUPSIZE_HMORX;
+  globalThreads[0] = gsize;
+  globalThreads[1] = GROUPSIZE_HMORY;
+  localThreads[0] = GROUPSIZE_HMORX;
+  localThreads[1] = GROUPSIZE_HMORY;
 
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
-                                    nullptr, globalThreads, localThreads, 0,
-                                    nullptr, nullptr);
+  rEnv.mpkKernel =
+      clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_5x5", &status);
+  CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_5x5");
 
-    //Swap source and dest buffers
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixtemp;
+  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
 
-    //Vertical
-    gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
-    globalThreads[0] = gsize;
-    gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
-    globalThreads[1] = gsize;
-    localThreads[0] = GROUPSIZE_X;
-    localThreads[1] = GROUPSIZE_Y;
+  status =
+      clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
+                             globalThreads, localThreads, 0, nullptr, nullptr);
 
-    rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateVer_5x5", &status );
-    CHECK_OPENCL(status, "clCreateKernel morphoDilateVer_5x5");
-
-    status = clSetKernelArg(rEnv.mpkKernel,
-        0,
-        sizeof(cl_mem),
-        &pixsCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        1,
-        sizeof(cl_mem),
-        &pixdCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
-    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
-                                    nullptr, globalThreads, localThreads, 0,
-                                    nullptr, nullptr);
+  // Swap source and dest buffers
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixtemp;
 
-    return status;
+  // Vertical
+  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
+  globalThreads[0] = gsize;
+  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
+  globalThreads[1] = gsize;
+  localThreads[0] = GROUPSIZE_X;
+  localThreads[1] = GROUPSIZE_Y;
+
+  rEnv.mpkKernel =
+      clCreateKernel(rEnv.mpkProgram, "morphoDilateVer_5x5", &status);
+  CHECK_OPENCL(status, "clCreateKernel morphoDilateVer_5x5");
+
+  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+  status =
+      clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
+                             globalThreads, localThreads, 0, nullptr, nullptr);
+
+  return status;
 }
 
 //Morphology Erode operation for 5x5 structuring element. Invokes the relevant OpenCL kernels
-static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h)
-{
-    size_t globalThreads[2];
-    cl_mem pixtemp;
-    cl_int status;
-    int gsize;
-    l_uint32 fwmask, lwmask;
-    size_t localThreads[2];
+static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h) {
+  size_t globalThreads[2];
+  cl_mem pixtemp;
+  cl_int status;
+  int gsize;
+  l_uint32 fwmask, lwmask;
+  size_t localThreads[2];
 
-    lwmask = lmask32[31 - 2];
-    fwmask = rmask32[31 - 2];
+  lwmask = lmask32[31 - 2];
+  fwmask = rmask32[31 - 2];
 
-    //Horizontal pass
-    gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
-    globalThreads[0] = gsize;
-    globalThreads[1] = GROUPSIZE_HMORY;
-    localThreads[0] = GROUPSIZE_HMORX;
-    localThreads[1] = GROUPSIZE_HMORY;
-
-    rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor_5x5", &status );
-    CHECK_OPENCL(status, "clCreateKernel morphoErodeHor_5x5");
-
-    status = clSetKernelArg(rEnv.mpkKernel,
-        0,
-        sizeof(cl_mem),
-        &pixsCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        1,
-        sizeof(cl_mem),
-        &pixdCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
-    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+  // Horizontal pass
+  gsize = (wpl * h + GROUPSIZE_HMORX - 1) / GROUPSIZE_HMORX * GROUPSIZE_HMORX;
+  globalThreads[0] = gsize;
+  globalThreads[1] = GROUPSIZE_HMORY;
+  localThreads[0] = GROUPSIZE_HMORX;
+  localThreads[1] = GROUPSIZE_HMORY;
 
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
-                                    nullptr, globalThreads, localThreads, 0,
-                                    nullptr, nullptr);
+  rEnv.mpkKernel =
+      clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_5x5", &status);
+  CHECK_OPENCL(status, "clCreateKernel morphoErodeHor_5x5");
 
-    //Swap source and dest buffers
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixtemp;
+  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
 
-    //Vertical
-    gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
-    globalThreads[0] = gsize;
-    gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
-    globalThreads[1] = gsize;
-    localThreads[0] = GROUPSIZE_X;
-    localThreads[1] = GROUPSIZE_Y;
+  status =
+      clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
+                             globalThreads, localThreads, 0, nullptr, nullptr);
 
-    rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeVer_5x5", &status );
-    CHECK_OPENCL(status, "clCreateKernel morphoErodeVer_5x5");
-
-    status = clSetKernelArg(rEnv.mpkKernel,
-        0,
-        sizeof(cl_mem),
-        &pixsCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        1,
-        sizeof(cl_mem),
-        &pixdCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
-    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
-    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(fwmask), &fwmask);
-    status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(lwmask), &lwmask);
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
-                                    nullptr, globalThreads, localThreads, 0,
-                                    nullptr, nullptr);
+  // Swap source and dest buffers
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixtemp;
 
-    return status;
+  // Vertical
+  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
+  globalThreads[0] = gsize;
+  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
+  globalThreads[1] = gsize;
+  localThreads[0] = GROUPSIZE_X;
+  localThreads[1] = GROUPSIZE_Y;
+
+  rEnv.mpkKernel =
+      clCreateKernel(rEnv.mpkProgram, "morphoErodeVer_5x5", &status);
+  CHECK_OPENCL(status, "clCreateKernel morphoErodeVer_5x5");
+
+  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+  status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(fwmask), &fwmask);
+  status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(lwmask), &lwmask);
+  status =
+      clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
+                             globalThreads, localThreads, 0, nullptr, nullptr);
+
+  return status;
 }
 
 //Morphology Dilate operation. Invokes the relevant OpenCL kernels
-static cl_int
-pixDilateCL(l_int32  hsize, l_int32  vsize, l_int32  wpl, l_int32  h)
-{
-    l_int32  xp, yp, xn, yn;
-    SEL* sel;
-    size_t globalThreads[2];
-    cl_mem pixtemp;
-    cl_int status;
-    int gsize;
-    size_t localThreads[2];
-    char isEven;
+static cl_int pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl,
+                          l_int32 h) {
+  l_int32 xp, yp, xn, yn;
+  SEL *sel;
+  size_t globalThreads[2];
+  cl_mem pixtemp;
+  cl_int status;
+  int gsize;
+  size_t localThreads[2];
+  char isEven;
 
-    OpenclDevice::SetKernelEnv( &rEnv );
+  OpenclDevice::SetKernelEnv(&rEnv);
 
-    if (hsize == 5 && vsize == 5)
-    {
-        //Specific case for 5x5
-        status = pixDilateCL_55(wpl, h);
-        return status;
-    }
+  if (hsize == 5 && vsize == 5) {
+    // Specific case for 5x5
+    status = pixDilateCL_55(wpl, h);
+    return status;
+  }
 
-    sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
+  sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
 
-    selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
-    selDestroy(&sel);
-    //global and local work dimensions for Horizontal pass
-    gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
-    globalThreads[0] = gsize;
-    gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
-    globalThreads[1] = gsize;
-    localThreads[0] = GROUPSIZE_X;
-    localThreads[1] = GROUPSIZE_Y;
+  selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
+  selDestroy(&sel);
+  // global and local work dimensions for Horizontal pass
+  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
+  globalThreads[0] = gsize;
+  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
+  globalThreads[1] = gsize;
+  localThreads[0] = GROUPSIZE_X;
+  localThreads[1] = GROUPSIZE_Y;
 
-    if (xp > 31 || xn > 31)
-    {
-      // Generic case.
-      rEnv.mpkKernel =
-          clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status);
-      CHECK_OPENCL(status, "clCreateKernel morphoDilateHor");
-
-      status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
-      status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
-      status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
-      status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn);
-      status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl);
-      status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h);
-      status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
-                                      nullptr, globalThreads, localThreads, 0,
-                                      nullptr, nullptr);
-
-      if (yp > 0 || yn > 0) {
-        pixtemp = pixsCLBuffer;
-        pixsCLBuffer = pixdCLBuffer;
-        pixdCLBuffer = pixtemp;
-        }
-    }
-    else if (xp > 0 || xn > 0 )
-    {
-      // Specific Horizontal pass kernel for half width < 32
-      rEnv.mpkKernel =
-          clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status);
-      CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_32word");
-      isEven = (xp != xn);
-
-      status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
-      status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
-      status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
-      status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
-      status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
-      status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), &isEven);
-      status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
-                                      nullptr, globalThreads, localThreads, 0,
-                                      nullptr, nullptr);
-
-      if (yp > 0 || yn > 0) {
-        pixtemp = pixsCLBuffer;
-        pixsCLBuffer = pixdCLBuffer;
-        pixdCLBuffer = pixtemp;
-      }
+  if (xp > 31 || xn > 31) {
+    // Generic case.
+    rEnv.mpkKernel =
+        clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status);
+    CHECK_OPENCL(status, "clCreateKernel morphoDilateHor");
+
+    status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    nullptr, globalThreads, localThreads, 0,
+                                    nullptr, nullptr);
+
+    if (yp > 0 || yn > 0) {
+      pixtemp = pixsCLBuffer;
+      pixsCLBuffer = pixdCLBuffer;
+      pixdCLBuffer = pixtemp;
     }
+  } else if (xp > 0 || xn > 0) {
+    // Specific Horizontal pass kernel for half width < 32
+    rEnv.mpkKernel =
+        clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status);
+    CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_32word");
+    isEven = (xp != xn);
 
-    if (yp > 0 || yn > 0)
-    {
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateVer", &status );
-        CHECK_OPENCL(status, "clCreateKernel morphoDilateVer");
-
-        status = clSetKernelArg(rEnv.mpkKernel,
-            0,
-            sizeof(cl_mem),
-            &pixsCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-            1,
-            sizeof(cl_mem),
-            &pixdCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp);
-        status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
-        status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
-        status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(yn), &yn);
-        status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
-                                        nullptr, globalThreads, localThreads, 0,
-                                        nullptr, nullptr);
+    status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
+    status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), &isEven);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    nullptr, globalThreads, localThreads, 0,
+                                    nullptr, nullptr);
+
+    if (yp > 0 || yn > 0) {
+      pixtemp = pixsCLBuffer;
+      pixsCLBuffer = pixdCLBuffer;
+      pixdCLBuffer = pixtemp;
     }
+  }
 
-    return status;
+  if (yp > 0 || yn > 0) {
+    rEnv.mpkKernel =
+        clCreateKernel(rEnv.mpkProgram, "morphoDilateVer", &status);
+    CHECK_OPENCL(status, "clCreateKernel morphoDilateVer");
+
+    status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
+    status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(yn), &yn);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    nullptr, globalThreads, localThreads, 0,
+                                    nullptr, nullptr);
+  }
+
+  return status;
 }
 
 //Morphology Erode operation. Invokes the relevant OpenCL kernels
-static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) {
+static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl,
+                         l_uint32 h) {
   l_int32 xp, yp, xn, yn;
   SEL *sel;
   size_t globalThreads[2];
@@ -1609,45 +1576,42 @@ static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h)
 }
 
 //Morphology Open operation. Invokes the relevant OpenCL kernels
-static cl_int pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
-{
-    cl_int status;
-    cl_mem pixtemp;
+static cl_int pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) {
+  cl_int status;
+  cl_mem pixtemp;
 
-    //Erode followed by Dilate
-    status = pixErodeCL(hsize, vsize, wpl, h);
+  // Erode followed by Dilate
+  status = pixErodeCL(hsize, vsize, wpl, h);
 
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixtemp;
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixtemp;
 
-    status = pixDilateCL(hsize, vsize, wpl, h);
+  status = pixDilateCL(hsize, vsize, wpl, h);
 
-    return status;
+  return status;
 }
 
 //Morphology Close operation. Invokes the relevant OpenCL kernels
-static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
-{
-    cl_int status;
-    cl_mem pixtemp;
+static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) {
+  cl_int status;
+  cl_mem pixtemp;
 
-    //Dilate followed by Erode
-    status = pixDilateCL(hsize, vsize, wpl, h);
+  // Dilate followed by Erode
+  status = pixDilateCL(hsize, vsize, wpl, h);
 
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixtemp;
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixtemp;
 
-    status = pixErodeCL(hsize, vsize, wpl, h);
+  status = pixErodeCL(hsize, vsize, wpl, h);
 
-    return status;
+  return status;
 }
 
 //output = buffer1 & ~(buffer2)
-static
-cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1,
-                          cl_mem buffer2, cl_mem outBuffer = nullptr) {
+static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1,
+                                 cl_mem buffer2, cl_mem outBuffer = nullptr) {
   cl_int status;
   size_t globalThreads[2];
   int gsize;
@@ -2055,83 +2019,91 @@ typedef struct _TessScoreEvaluationInputData {
     Pix *pix;
 } TessScoreEvaluationInputData;
 
-static void populateTessScoreEvaluationInputData(TessScoreEvaluationInputData *input) {
-    srand(1);
-    // 8.5x11 inches @ 300dpi rounded to clean multiples
-    int height = 3328; // %256
-    int width = 2560; // %512
-    int numChannels = 4;
-    input->height = height;
-    input->width = width;
-    input->numChannels = numChannels;
-    unsigned char (*imageData4)[4] = (unsigned char (*)[4]) malloc(height*width*numChannels*sizeof(unsigned char)); // new unsigned char[4][height*width];
-    input->imageData = (unsigned char *) &imageData4[0];
-
-    // zero out image
-    unsigned char pixelWhite[4] = {  0,   0,   0, 255};
-    unsigned char pixelBlack[4] = {255, 255, 255, 255};
-    for (int p = 0; p < height*width; p++) {
-        //unsigned char tmp[4] = imageData4[0];
-        imageData4[p][0] = pixelWhite[0];
-        imageData4[p][1] = pixelWhite[1];
-        imageData4[p][2] = pixelWhite[2];
-        imageData4[p][3] = pixelWhite[3];
-    }
-    // random lines to be eliminated
-    int maxLineWidth = 64; // pixels wide
-    int numLines = 10;
-    // vertical lines
-    for (int i = 0; i < numLines; i++) {
-        int lineWidth = rand()%maxLineWidth;
-        int vertLinePos = lineWidth + rand()%(width-2*lineWidth);
-        //printf("[PI] VerticalLine @ %i (w=%i)\n", vertLinePos, lineWidth);
-        for (int row = vertLinePos-lineWidth/2; row < vertLinePos+lineWidth/2; row++) {
-            for (int col = 0; col < height; col++) {
-                //imageData4[row*width+col] = pixelBlack;
-                imageData4[row*width+col][0] = pixelBlack[0];
-                imageData4[row*width+col][1] = pixelBlack[1];
-                imageData4[row*width+col][2] = pixelBlack[2];
-                imageData4[row*width+col][3] = pixelBlack[3];
-            }
-        }
+static void populateTessScoreEvaluationInputData(
+    TessScoreEvaluationInputData *input) {
+  srand(1);
+  // 8.5x11 inches @ 300dpi rounded to clean multiples
+  int height = 3328;  // %256
+  int width = 2560;   // %512
+  int numChannels = 4;
+  input->height = height;
+  input->width = width;
+  input->numChannels = numChannels;
+  unsigned char(*imageData4)[4] = (unsigned char(*)[4])malloc(
+      height * width * numChannels *
+      sizeof(unsigned char));  // new unsigned char[4][height*width];
+  input->imageData = (unsigned char *)&imageData4[0];
+
+  // zero out image
+  unsigned char pixelWhite[4] = {0, 0, 0, 255};
+  unsigned char pixelBlack[4] = {255, 255, 255, 255};
+  for (int p = 0; p < height * width; p++) {
+    // unsigned char tmp[4] = imageData4[0];
+    imageData4[p][0] = pixelWhite[0];
+    imageData4[p][1] = pixelWhite[1];
+    imageData4[p][2] = pixelWhite[2];
+    imageData4[p][3] = pixelWhite[3];
+  }
+  // random lines to be eliminated
+  int maxLineWidth = 64;  // pixels wide
+  int numLines = 10;
+  // vertical lines
+  for (int i = 0; i < numLines; i++) {
+    int lineWidth = rand() % maxLineWidth;
+    int vertLinePos = lineWidth + rand() % (width - 2 * lineWidth);
+    // printf("[PI] VerticalLine @ %i (w=%i)\n", vertLinePos, lineWidth);
+    for (int row = vertLinePos - lineWidth / 2;
+         row < vertLinePos + lineWidth / 2; row++) {
+      for (int col = 0; col < height; col++) {
+        // imageData4[row*width+col] = pixelBlack;
+        imageData4[row * width + col][0] = pixelBlack[0];
+        imageData4[row * width + col][1] = pixelBlack[1];
+        imageData4[row * width + col][2] = pixelBlack[2];
+        imageData4[row * width + col][3] = pixelBlack[3];
+      }
     }
-    // horizontal lines
-    for (int i = 0; i < numLines; i++) {
-        int lineWidth = rand()%maxLineWidth;
-        int horLinePos = lineWidth + rand()%(height-2*lineWidth);
-        //printf("[PI] HorizontalLine @ %i (w=%i)\n", horLinePos, lineWidth);
-        for (int row = 0; row < width; row++) {
-            for (int col = horLinePos-lineWidth/2; col < horLinePos+lineWidth/2; col++) { // for (int row = vertLinePos-lineWidth/2; row < vertLinePos+lineWidth/2; row++) {
-                //printf("[PI] HoizLine pix @ (%3i, %3i)\n", row, col);
-                //imageData4[row*width+col] = pixelBlack;
-                imageData4[row*width+col][0] = pixelBlack[0];
-                imageData4[row*width+col][1] = pixelBlack[1];
-                imageData4[row*width+col][2] = pixelBlack[2];
-                imageData4[row*width+col][3] = pixelBlack[3];
-            }
-        }
+  }
+  // horizontal lines
+  for (int i = 0; i < numLines; i++) {
+    int lineWidth = rand() % maxLineWidth;
+    int horLinePos = lineWidth + rand() % (height - 2 * lineWidth);
+    // printf("[PI] HorizontalLine @ %i (w=%i)\n", horLinePos, lineWidth);
+    for (int row = 0; row < width; row++) {
+      for (int col = horLinePos - lineWidth / 2;
+           col < horLinePos + lineWidth / 2;
+           col++) {  // for (int row = vertLinePos-lineWidth/2; row <
+                     // vertLinePos+lineWidth/2; row++) {
+        // printf("[PI] HoizLine pix @ (%3i, %3i)\n", row, col);
+        // imageData4[row*width+col] = pixelBlack;
+        imageData4[row * width + col][0] = pixelBlack[0];
+        imageData4[row * width + col][1] = pixelBlack[1];
+        imageData4[row * width + col][2] = pixelBlack[2];
+        imageData4[row * width + col][3] = pixelBlack[3];
+      }
     }
-    // spots (noise, squares)
-    float fractionBlack = 0.1; // how much of the image should be blackened
-    int numSpots = (height*width)*fractionBlack/(maxLineWidth*maxLineWidth/2/2);
-    for (int i = 0; i < numSpots; i++) {
-        int lineWidth = rand()%maxLineWidth;
-        int col = lineWidth + rand()%(width-2*lineWidth);
-        int row = lineWidth + rand()%(height-2*lineWidth);
-        //printf("[PI] Spot[%i/%i] @ (%3i, %3i)\n", i, numSpots, row, col );
-        for (int r = row-lineWidth/2; r < row+lineWidth/2; r++) {
-            for (int c = col-lineWidth/2; c < col+lineWidth/2; c++) {
-                //printf("[PI] \tSpot[%i/%i] @ (%3i, %3i)\n", i, numSpots, r, c );
-                //imageData4[row*width+col] = pixelBlack;
-                imageData4[r*width+c][0] = pixelBlack[0];
-                imageData4[r*width+c][1] = pixelBlack[1];
-                imageData4[r*width+c][2] = pixelBlack[2];
-                imageData4[r*width+c][3] = pixelBlack[3];
-            }
-        }
+  }
+  // spots (noise, squares)
+  float fractionBlack = 0.1;  // how much of the image should be blackened
+  int numSpots =
+      (height * width) * fractionBlack / (maxLineWidth * maxLineWidth / 2 / 2);
+  for (int i = 0; i < numSpots; i++) {
+    int lineWidth = rand() % maxLineWidth;
+    int col = lineWidth + rand() % (width - 2 * lineWidth);
+    int row = lineWidth + rand() % (height - 2 * lineWidth);
+    // printf("[PI] Spot[%i/%i] @ (%3i, %3i)\n", i, numSpots, row, col );
+    for (int r = row - lineWidth / 2; r < row + lineWidth / 2; r++) {
+      for (int c = col - lineWidth / 2; c < col + lineWidth / 2; c++) {
+        // printf("[PI] \tSpot[%i/%i] @ (%3i, %3i)\n", i, numSpots, r, c );
+        // imageData4[row*width+col] = pixelBlack;
+        imageData4[r * width + c][0] = pixelBlack[0];
+        imageData4[r * width + c][1] = pixelBlack[1];
+        imageData4[r * width + c][2] = pixelBlack[2];
+        imageData4[r * width + c][3] = pixelBlack[3];
+      }
     }
+  }
 
-    input->pix = pixCreate(input->width, input->height, 1);
+  input->pix = pixCreate(input->width, input->height, 1);
 }
 
 typedef struct _TessDeviceScore {
@@ -2144,8 +2116,10 @@ typedef struct _TessDeviceScore {
  * Micro Benchmarks for Device Selection
  *****************************************************************************/
 
-static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type) {
-    double time = 0;
+static double composeRGBPixelMicroBench(GPUEnv *env,
+                                        TessScoreEvaluationInputData input,
+                                        ds_device_type type) {
+  double time = 0;
 #if ON_WINDOWS
     LARGE_INTEGER freq, time_funct_start, time_funct_end;
     QueryPerformanceFrequency(&freq);
@@ -2226,8 +2200,10 @@ static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputDat
     return time;
 }
 
-static double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
-    double time;
+static double histogramRectMicroBench(GPUEnv *env,
+                                      TessScoreEvaluationInputData input,
+                                      ds_device_type type) {
+  double time;
 #if ON_WINDOWS
     LARGE_INTEGER freq, time_funct_start, time_funct_end;
     QueryPerformanceFrequency(&freq);
@@ -2305,16 +2281,14 @@ static double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData
 }
 
 //Reproducing the ThresholdRectToPix native version
-static void ThresholdRectToPix_Native(const unsigned char* imagedata,
-                                          int bytes_per_pixel,
-                                          int bytes_per_line,
-                                          const int* thresholds,
-                                          const int* hi_values,
-                                          Pix** pix) {
-    int top = 0;
-    int left = 0;
-    int width = pixGetWidth(*pix);
-    int height = pixGetHeight(*pix);
+static void ThresholdRectToPix_Native(const unsigned char *imagedata,
+                                      int bytes_per_pixel, int bytes_per_line,
+                                      const int *thresholds,
+                                      const int *hi_values, Pix **pix) {
+  int top = 0;
+  int left = 0;
+  int width = pixGetWidth(*pix);
+  int height = pixGetHeight(*pix);
 
   *pix = pixCreate(width, height, 1);
   uint32_t *pixdata = pixGetData(*pix);
@@ -2342,8 +2316,10 @@ static void ThresholdRectToPix_Native(const unsigned char* imagedata,
   }
 }
 
-static double thresholdRectToPixMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type) {
-    double time;
+static double thresholdRectToPixMicroBench(GPUEnv *env,
+                                           TessScoreEvaluationInputData input,
+                                           ds_device_type type) {
+  double time;
 #if ON_WINDOWS
     LARGE_INTEGER freq, time_funct_start, time_funct_end;
     QueryPerformanceFrequency(&freq);
@@ -2436,9 +2412,10 @@ static double thresholdRectToPixMicroBench(GPUEnv *env, TessScoreEvaluationInput
     return time;
 }
 
-static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type) {
-
-    double time = 0;
+static double getLineMasksMorphMicroBench(GPUEnv *env,
+                                          TessScoreEvaluationInputData input,
+                                          ds_device_type type) {
+  double time = 0;
 #if ON_WINDOWS
     LARGE_INTEGER freq, time_funct_start, time_funct_end;
     QueryPerformanceFrequency(&freq);
@@ -2533,19 +2510,22 @@ static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputD
 #include "stdlib.h"
 
 // encode score object as byte string
-static ds_status serializeScore( ds_device* device, void **serializedScore, unsigned int* serializedScoreSize ) {
-    *serializedScoreSize = sizeof(TessDeviceScore);
-    *serializedScore = new unsigned char[*serializedScoreSize];
-    memcpy(*serializedScore, device->score, *serializedScoreSize);
-    return DS_SUCCESS;
+static ds_status serializeScore(ds_device *device, void **serializedScore,
+                                unsigned int *serializedScoreSize) {
+  *serializedScoreSize = sizeof(TessDeviceScore);
+  *serializedScore = new unsigned char[*serializedScoreSize];
+  memcpy(*serializedScore, device->score, *serializedScoreSize);
+  return DS_SUCCESS;
 }
 
 // parses byte string and stores in score object
-static ds_status deserializeScore( ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize ) {
-    // check that serializedScoreSize == sizeof(TessDeviceScore);
-    device->score = new TessDeviceScore;
-    memcpy(device->score, serializedScore, serializedScoreSize);
-    return DS_SUCCESS;
+static ds_status deserializeScore(ds_device *device,
+                                  const unsigned char *serializedScore,
+                                  unsigned int serializedScoreSize) {
+  // check that serializedScoreSize == sizeof(TessDeviceScore);
+  device->score = new TessDeviceScore;
+  memcpy(device->score, serializedScore, serializedScoreSize);
+  return DS_SUCCESS;
 }
 
 static ds_status releaseScore(void *score) {
@@ -2554,58 +2534,68 @@ static ds_status releaseScore(void *score) {
 }
 
 // evaluate devices
-static ds_status evaluateScoreForDevice( ds_device *device, void *inputData) {
-    // overwrite statuc gpuEnv w/ current device
-    // so native opencl calls can be used; they use static gpuEnv
-    printf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native" );
-    GPUEnv *env = nullptr;
-    if (device->type == DS_DEVICE_OPENCL_DEVICE) {
-        env = new GPUEnv;
-        //printf("[DS] populating tmp GPUEnv from device\n");
-        populateGPUEnvFromDevice( env, device->oclDeviceID);
-        env->mnFileCount = 0; //argc;
-        env->mnKernelCount = 0UL;
-        //printf("[DS] compiling kernels for tmp GPUEnv\n");
-        OpenclDevice::gpuEnv = *env;
-        OpenclDevice::CompileKernelFile(env, "");
-    }
-
-    TessScoreEvaluationInputData *input = (TessScoreEvaluationInputData *)inputData;
-
-    // pixReadTiff
-    double composeRGBPixelTime = composeRGBPixelMicroBench( env, *input, device->type );
-
-    // HistogramRect
-    double histogramRectTime = histogramRectMicroBench( env, *input, device->type );
-
-    // ThresholdRectToPix
-    double thresholdRectToPixTime = thresholdRectToPixMicroBench( env, *input, device->type );
-
-    // getLineMasks
-    double getLineMasksMorphTime = getLineMasksMorphMicroBench( env, *input, device->type );
-
-
-    // weigh times (% of cpu time)
-    // these weights should be the % execution time that the native cpu code took
-    float composeRGBPixelWeight     = 1.2f;
-    float histogramRectWeight       = 2.4f;
-    float thresholdRectToPixWeight  = 4.5f;
-    float getLineMasksMorphWeight = 5.0f;
-
-    float weightedTime = composeRGBPixelWeight * composeRGBPixelTime +
-                         histogramRectWeight * histogramRectTime +
-                         thresholdRectToPixWeight * thresholdRectToPixTime +
-                         getLineMasksMorphWeight * getLineMasksMorphTime;
-    device->score = new TessDeviceScore;
-    ((TessDeviceScore *)device->score)->time = weightedTime;
+static ds_status evaluateScoreForDevice(ds_device *device, void *inputData) {
+  // overwrite statuc gpuEnv w/ current device
+  // so native opencl calls can be used; they use static gpuEnv
+  printf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName,
+         device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native");
+  GPUEnv *env = nullptr;
+  if (device->type == DS_DEVICE_OPENCL_DEVICE) {
+    env = new GPUEnv;
+    // printf("[DS] populating tmp GPUEnv from device\n");
+    populateGPUEnvFromDevice(env, device->oclDeviceID);
+    env->mnFileCount = 0;  // argc;
+    env->mnKernelCount = 0UL;
+    // printf("[DS] compiling kernels for tmp GPUEnv\n");
+    OpenclDevice::gpuEnv = *env;
+    OpenclDevice::CompileKernelFile(env, "");
+  }
 
-    printf("[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native" );
-    printf("[DS]%25s: %f (w=%.1f)\n", "composeRGBPixel", composeRGBPixelTime, composeRGBPixelWeight );
-    printf("[DS]%25s: %f (w=%.1f)\n", "HistogramRect", histogramRectTime, histogramRectWeight );
-    printf("[DS]%25s: %f (w=%.1f)\n", "ThresholdRectToPix", thresholdRectToPixTime, thresholdRectToPixWeight );
-    printf("[DS]%25s: %f (w=%.1f)\n", "getLineMasksMorph", getLineMasksMorphTime, getLineMasksMorphWeight );
-    printf("[DS]%25s: %f\n", "Score", ((TessDeviceScore *)device->score)->time );
-    return DS_SUCCESS;
+  TessScoreEvaluationInputData *input =
+      static_cast<TessScoreEvaluationInputData *>(inputData);
+
+  // pixReadTiff
+  double composeRGBPixelTime =
+      composeRGBPixelMicroBench(env, *input, device->type);
+
+  // HistogramRect
+  double histogramRectTime = histogramRectMicroBench(env, *input, device->type);
+
+  // ThresholdRectToPix
+  double thresholdRectToPixTime =
+      thresholdRectToPixMicroBench(env, *input, device->type);
+
+  // getLineMasks
+  double getLineMasksMorphTime =
+      getLineMasksMorphMicroBench(env, *input, device->type);
+
+  // weigh times (% of cpu time)
+  // these weights should be the % execution time that the native cpu code took
+  float composeRGBPixelWeight = 1.2f;
+  float histogramRectWeight = 2.4f;
+  float thresholdRectToPixWeight = 4.5f;
+  float getLineMasksMorphWeight = 5.0f;
+
+  float weightedTime = composeRGBPixelWeight * composeRGBPixelTime +
+                       histogramRectWeight * histogramRectTime +
+                       thresholdRectToPixWeight * thresholdRectToPixTime +
+                       getLineMasksMorphWeight * getLineMasksMorphTime;
+  device->score = new TessDeviceScore;
+  ((TessDeviceScore *)device->score)->time = weightedTime;
+
+  printf("[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName,
+         device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native");
+  printf("[DS]%25s: %f (w=%.1f)\n", "composeRGBPixel", composeRGBPixelTime,
+         composeRGBPixelWeight);
+  printf("[DS]%25s: %f (w=%.1f)\n", "HistogramRect", histogramRectTime,
+         histogramRectWeight);
+  printf("[DS]%25s: %f (w=%.1f)\n", "ThresholdRectToPix",
+         thresholdRectToPixTime, thresholdRectToPixWeight);
+  printf("[DS]%25s: %f (w=%.1f)\n", "getLineMasksMorph", getLineMasksMorphTime,
+         getLineMasksMorphWeight);
+  printf("[DS]%25s: %f\n", "Score",
+         static_cast<TessDeviceScore *>(device->score)->time);
+  return DS_SUCCESS;
 }
 
 // initial call to select device
diff --git a/opencl/openclwrapper.h b/opencl/openclwrapper.h
index f150e6b5a1..5fe1a50023 100644
--- a/opencl/openclwrapper.h
+++ b/opencl/openclwrapper.h
@@ -242,7 +242,7 @@ class OpenclDevice
 /* OpenCL implementations of Morphological operations*/
 
     //Initialiation of OCL buffers used in Morph operations
-    static int initMorphCLAllocations(l_int32  wpl, l_int32  h, Pix* pixs);
+    static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs);
     static void releaseMorphCLBuffers();
 
     static void pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline,
diff --git a/textord/bbgrid.h b/textord/bbgrid.h
index ba3b9d28c2..dcc017d4cf 100644
--- a/textord/bbgrid.h
+++ b/textord/bbgrid.h
@@ -372,8 +372,8 @@ template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {
 template<class BBC>
 int SortByBoxLeft(const void* void1, const void* void2) {
   // The void*s are actually doubly indirected, so get rid of one level.
-  const BBC* p1 = *static_cast<const BBC* const *>(void1);
-  const BBC* p2 = *static_cast<const BBC* const *>(void2);
+  const BBC* p1 = *static_cast<const BBC* const*>(void1);
+  const BBC* p2 = *static_cast<const BBC* const*>(void2);
   int result = p1->bounding_box().left() - p2->bounding_box().left();
   if (result != 0)
     return result;
@@ -390,8 +390,8 @@ int SortByBoxLeft(const void* void1, const void* void2) {
 template<class BBC>
 int SortRightToLeft(const void* void1, const void* void2) {
   // The void*s are actually doubly indirected, so get rid of one level.
-  const BBC* p1 = *static_cast<const BBC* const *>(void1);
-  const BBC* p2 = *static_cast<const BBC* const *>(void2);
+  const BBC* p1 = *static_cast<const BBC* const*>(void1);
+  const BBC* p2 = *static_cast<const BBC* const*>(void2);
   int result = p2->bounding_box().right() - p1->bounding_box().right();
   if (result != 0)
     return result;
@@ -408,8 +408,8 @@ int SortRightToLeft(const void* void1, const void* void2) {
 template<class BBC>
 int SortByBoxBottom(const void* void1, const void* void2) {
   // The void*s are actually doubly indirected, so get rid of one level.
-  const BBC* p1 = *static_cast<const BBC* const *>(void1);
-  const BBC* p2 = *static_cast<const BBC* const *>(void2);
+  const BBC* p1 = *static_cast<const BBC* const*>(void1);
+  const BBC* p2 = *static_cast<const BBC* const*>(void2);
   int result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
   if (result != 0)
     return result;
diff --git a/textord/colpartition.h b/textord/colpartition.h
index 811175076c..7d799a5cec 100644
--- a/textord/colpartition.h
+++ b/textord/colpartition.h
@@ -706,10 +706,8 @@ class ColPartition : public ELIST2_LINK {
 
   // Sort function to sort by bounding box.
   static int SortByBBox(const void* p1, const void* p2) {
-    const ColPartition* part1 =
-        *static_cast<const ColPartition* const*>(p1);
-    const ColPartition* part2 =
-        *static_cast<const ColPartition* const*>(p2);
+    const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
+    const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
     int mid_y1 = part1->bounding_box_.y_middle();
     int mid_y2 = part2->bounding_box_.y_middle();
     if ((part2->bounding_box_.bottom() <= mid_y1 &&
diff --git a/textord/drawedg.cpp b/textord/drawedg.cpp
index 96c4b55472..0a429b5483 100644
--- a/textord/drawedg.cpp
+++ b/textord/drawedg.cpp
@@ -1,8 +1,9 @@
 /**********************************************************************
  * File:        drawedg.cpp  (Formerly drawedge.c)
- * Description: Collection of functions to draw things to do with edge detection.
- * Author:                  Ray Smith
- * Created:                 Thu Jun 06 13:29:20 BST 1991
+ * Description: Collection of functions to draw things to do with edge
+ *              detection.
+ *  Author:     Ray Smith
+ *  Created:    Thu Jun 06 13:29:20 BST 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/textord/makerow.cpp b/textord/makerow.cpp
index c4aa55ba58..76b76818ac 100644
--- a/textord/makerow.cpp
+++ b/textord/makerow.cpp
@@ -813,7 +813,7 @@ void compute_line_occupation(                    //project blobs
       int32_t width = blob_box.right() - blob_box.left();
       index = blob_box.bottom() - min_y;
       ASSERT_HOST(index >= 0 && index < line_count);
-                                 //count transitions
+      // count transitions
       deltas[index] += width;
       index = blob_box.top() - min_y;
       ASSERT_HOST(index >= 0 && index < line_count);
diff --git a/textord/scanedg.cpp b/textord/scanedg.cpp
index a9f27d55b5..84d7656a97 100644
--- a/textord/scanedg.cpp
+++ b/textord/scanedg.cpp
@@ -19,14 +19,14 @@
 
 #include "scanedg.h"
 
-#include <memory> // std::unique_ptr
+#include <memory>  // std::unique_ptr
 
 #include "allheaders.h"
 #include "edgloop.h"
 
 #define WHITE_PIX     1          /*thresholded colours */
 #define BLACK_PIX     0
-                                 /*W->B->W */
+// Flips between WHITE_PIX and BLACK_PIX.
 #define FLIP_COLOUR(pix)  (1-(pix))
 
 /**********************************************************************
@@ -102,9 +102,10 @@ void make_margins(                         //get a line
 
   if (block->poly_block () != NULL) {
     lines = new PB_LINE_IT (block->poly_block ());
-    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line (y));
+    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(
+        lines->get_line(y));
     if (!segments->empty ()) {
-      seg_it.set_to_list (segments.get());
+      seg_it.set_to_list(segments.get());
       seg_it.mark_cycle_pt ();
       start = seg_it.data ()->x ();
       xext = seg_it.data ()->y ();
@@ -335,7 +336,7 @@ void join_edges(CRACKEDGE *edge1,  // edges to join
   if (edge1->pos.x() + edge1->stepx != edge2->pos.x()
   || edge1->pos.y() + edge1->stepy != edge2->pos.y()) {
     CRACKEDGE *tempedge = edge1;
-    edge1 = edge2;               // swap around
+    edge1 = edge2;  // swap around
     edge2 = tempedge;
   }
 
diff --git a/textord/tabvector.h b/textord/tabvector.h
index b3e37ca601..949fdb1978 100644
--- a/textord/tabvector.h
+++ b/textord/tabvector.h
@@ -292,8 +292,8 @@ class TabVector : public ELIST2_LINK {
 
   // Sort function for E2LIST::sort to sort by sort_key_.
   static int SortVectorsByKey(const void* v1, const void* v2) {
-    const TabVector* tv1 = *static_cast<const TabVector* const *>(v1);
-    const TabVector* tv2 = *static_cast<const TabVector* const *>(v2);
+    const TabVector* tv1 = *static_cast<const TabVector* const*>(v1);
+    const TabVector* tv2 = *static_cast<const TabVector* const*>(v2);
     return tv1->sort_key_ - tv2->sort_key_;
   }
 
diff --git a/training/boxchar.cpp b/training/boxchar.cpp
index 0d51178ed1..a8e9c6c313 100644
--- a/training/boxchar.cpp
+++ b/training/boxchar.cpp
@@ -24,6 +24,7 @@
 
 #include <stddef.h>
 #include <algorithm>
+#include <vector>
 
 #include "fileio.h"
 #include "genericvector.h"
@@ -82,17 +83,16 @@ void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules,
                              std::vector<BoxChar*>* boxes) {
   int prev_i = -1;
   int max_shift = 0;
-  for (int i = 0; static_cast<unsigned int>(i) < boxes->size(); ++i) {
+  for (size_t i = 0; i < boxes->size(); ++i) {
     Box* box = (*boxes)[i]->box_;
     if (box == nullptr) {
-      if (prev_i < 0 || prev_i < i - 1 || static_cast<unsigned int>(i) + 1 == boxes->size()) {
+      if (prev_i < 0 || prev_i + 1 < i || i + 1 == boxes->size()) {
         // Erase null boxes at the start of a line and after another null box.
         do {
           delete (*boxes)[i];
           boxes->erase(boxes->begin() + i);
-          --i;
-        } while (i >= 0 && static_cast<unsigned int>(i) + 1 == boxes->size() &&
-                 (*boxes)[i]->box_ == nullptr);
+          if (i == 0) break;
+        } while (i-- == boxes->size() && (*boxes)[i]->box_ == nullptr);
       }
       continue;
     }
@@ -120,7 +120,7 @@ void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules,
             x = 0;
           }
         }
-        if (prev_i == i - 1) {
+        if (prev_i + 1 == i) {
           // New character needed.
           BoxChar* new_box = new BoxChar("\t", 1);
           new_box->AddBox(x, y, width, height);
@@ -146,7 +146,7 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
                            std::vector<BoxChar*>* boxes) {
   // After InsertNewlines, any remaining null boxes are not newlines, and are
   // singletons, so add a box to each remaining null box.
-  for (int i = 1; static_cast<unsigned int>(i) + 1 < boxes->size(); ++i) {
+  for (size_t i = 1; i + 1 < boxes->size(); ++i) {
     Box* box = (*boxes)[i]->box_;
     if (box == nullptr) {
       Box* prev = (*boxes)[i - 1]->box_;
@@ -178,8 +178,9 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
         }
         // Left becomes the max right of all next boxes forward to the first
         // space or newline.
-        for (size_t j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != nullptr &&
-                               (*boxes)[j]->ch_ != "\t";
+        for (size_t j = i + 2;
+             j < boxes->size() && (*boxes)[j]->box_ != nullptr &&
+             (*boxes)[j]->ch_ != "\t";
              ++j) {
           next = (*boxes)[j]->box_;
           if (next->x + next->w > left) {
@@ -215,11 +216,12 @@ void BoxChar::ReorderRTLText(std::vector<BoxChar*>* boxes) {
 /* static */
 bool BoxChar::ContainsMostlyRTL(const std::vector<BoxChar*>& boxes) {
   int num_rtl = 0, num_ltr = 0;
-  for (unsigned int i = 0; i < boxes.size(); ++i) {
+  for (size_t i = 0; i < boxes.size(); ++i) {
     // Convert the unichar to UTF32 representation
-    GenericVector<char32> uni_vector;
-    if (!UNICHAR::UTF8ToUnicode(boxes[i]->ch_.c_str(), &uni_vector)) {
-      tprintf("Illegal utf8 in boxchar %u string:%s = ", i,
+    std::vector<char32> uni_vector =
+        UNICHAR::UTF8ToUTF32(boxes[i]->ch_.c_str());
+    if (uni_vector.empty()) {
+      tprintf("Illegal utf8 in boxchar %d string:%s = ", i,
               boxes[i]->ch_.c_str());
       for (size_t c = 0; c < boxes[i]->ch_.size(); ++c) {
         tprintf(" 0x%x", boxes[i]->ch_[c]);
@@ -227,8 +229,8 @@ bool BoxChar::ContainsMostlyRTL(const std::vector<BoxChar*>& boxes) {
       tprintf("\n");
       continue;
     }
-    for (int j = 0; j < uni_vector.size(); ++j) {
-      UCharDirection dir = u_charDirection(uni_vector[j]);
+    for (char32 ch : uni_vector) {
+      UCharDirection dir = u_charDirection(ch);
       if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC ||
           dir == U_ARABIC_NUMBER) {
         ++num_rtl;
@@ -263,7 +265,8 @@ bool BoxChar::MostlyVertical(const std::vector<BoxChar*>& boxes) {
 /* static */
 int BoxChar::TotalByteLength(const std::vector<BoxChar*>& boxes) {
   int total_length = 0;
-  for (size_t i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size();
+  for (size_t i = 0; i < boxes.size(); ++i)
+    total_length += boxes[i]->ch_.size();
   return total_length;
 }
 
diff --git a/training/commontraining.cpp b/training/commontraining.cpp
index e080087909..36b79b3ff7 100644
--- a/training/commontraining.cpp
+++ b/training/commontraining.cpp
@@ -13,6 +13,9 @@
 
 #include "commontraining.h"
 
+#include <assert.h>
+#include <math.h>
+
 #include "allheaders.h"
 #include "ccutil.h"
 #include "classify.h"
@@ -35,9 +38,6 @@
 #include "tprintf.h"
 #include "unicity_table.h"
 
-#include <assert.h>
-#include <math.h>
-
 using tesseract::CCUtil;
 using tesseract::IntFeatureSpace;
 using tesseract::ParamUtils;
@@ -369,9 +369,8 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
   LABELEDLIST char_sample;
   FEATURE_SET feature_samples;
   CHAR_DESC char_desc;
-  int ShortNameToFeatureType_res = ShortNameToFeatureType(feature_defs, feature_name);
-  assert(0 <= ShortNameToFeatureType_res);
-  unsigned int feature_type = static_cast<unsigned int>(ShortNameToFeatureType_res);
+  uint32_t feature_type = ShortNameToFeatureType(feature_defs, feature_name);
+
   // Zero out the font_sample_count for all the classes.
   LIST it = *training_samples;
   iterate(it) {
@@ -485,7 +484,8 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
   LIST FeatureList = nullptr;
   FEATURE_SET FeatureSet = nullptr;
 
-  int desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type);
+  int32_t desc_index =
+      ShortNameToFeatureType(FeatureDefs, program_feature_type);
   N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
   Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);
 
diff --git a/training/normstrngs.cpp b/training/normstrngs.cpp
index 99f84b78e4..17fe5cf8b7 100644
--- a/training/normstrngs.cpp
+++ b/training/normstrngs.cpp
@@ -25,37 +25,12 @@
 #include "unichar.h"
 #include "unicode/normalizer2.h"  // From libicu
 #include "unicode/translit.h"     // From libicu
+#include "unicode/uchar.h"        // From libicu
 #include "unicode/unorm2.h"       // From libicu
+#include "unicode/uscript.h"      // From libicu
 
 namespace tesseract {
 
-void UTF8ToUTF32(const char* utf8_str, GenericVector<char32>* str32) {
-  str32->clear();
-  str32->reserve(strlen(utf8_str));
-  int len = strlen(utf8_str);
-  int step = 0;
-  for (int ch = 0; ch < len; ch += step) {
-    step = UNICHAR::utf8_step(utf8_str + ch);
-    if (step > 0) {
-      UNICHAR uni_ch(utf8_str + ch, step);
-      (*str32) += uni_ch.first_uni();
-    }
-  }
-}
-
-void UTF32ToUTF8(const GenericVector<char32>& str32, STRING* utf8_str) {
-  utf8_str->ensure(str32.length());
-  utf8_str->assign("", 0);
-  for (int i = 0; i < str32.length(); ++i) {
-    UNICHAR uni_ch(str32[i]);
-    char *utf8 = uni_ch.utf8_str();
-    if (utf8 != nullptr) {
-      (*utf8_str) += utf8;
-      delete[] utf8;
-    }
-  }
-}
-
 bool is_hyphen_punc(const char32 ch) {
   static const int kNumHyphenPuncUnicodes = 13;
   static const char32 kHyphenPuncUnicodes[kNumHyphenPuncUnicodes] = {
@@ -171,42 +146,33 @@ bool IsOCREquivalent(char32 ch1, char32 ch2) {
 
 bool IsValidCodepoint(const char32 ch) {
   // In the range [0, 0xD800) or [0xE000, 0x10FFFF]
-  return (static_cast<uinT32>(ch) < 0xD800)
-      || (ch >= 0xE000 && ch <= 0x10FFFF);
+  return (static_cast<uinT32>(ch) < 0xD800) || (ch >= 0xE000 && ch <= 0x10FFFF);
 }
 
 bool IsWhitespace(const char32 ch) {
-  ASSERT_HOST_MSG(IsValidCodepoint(ch),
-                  "Invalid Unicode codepoint: 0x%x\n", ch);
+  ASSERT_HOST_MSG(IsValidCodepoint(ch), "Invalid Unicode codepoint: 0x%x\n",
+                  ch);
   return u_isUWhiteSpace(static_cast<UChar32>(ch));
 }
 
 bool IsUTF8Whitespace(const char* text) {
-#if 0 // intent
   return SpanUTF8Whitespace(text) == strlen(text);
-#else // avoiding g++ -Wsign-compare warning
-  const int res = SpanUTF8Whitespace(text);
-  assert(0 <= res);
-  return static_cast<unsigned int>(res) == strlen(text);
-#endif
 }
 
-int SpanUTF8Whitespace(const char* text) {
+unsigned int SpanUTF8Whitespace(const char* text) {
   int n_white = 0;
   for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
-       it != UNICHAR::end(text, strlen(text));
-       ++it) {
+       it != UNICHAR::end(text, strlen(text)); ++it) {
     if (!IsWhitespace(*it)) break;
     n_white += it.utf8_len();
   }
   return n_white;
 }
 
-int SpanUTF8NotWhitespace(const char* text) {
+unsigned int SpanUTF8NotWhitespace(const char* text) {
   int n_notwhite = 0;
   for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
-       it != UNICHAR::end(text, strlen(text));
-       ++it) {
+       it != UNICHAR::end(text, strlen(text)); ++it) {
     if (IsWhitespace(*it)) break;
     n_notwhite += it.utf8_len();
   }
@@ -215,33 +181,31 @@ int SpanUTF8NotWhitespace(const char* text) {
 
 bool IsInterchangeValid(const char32 ch) {
   return IsValidCodepoint(ch) &&
-      !(ch >= 0xFDD0 && ch <= 0xFDEF) &&  // Noncharacters.
-      !(ch >= 0xFFFE && ch <= 0xFFFF) &&
-      !(ch >= 0x1FFFE && ch <= 0x1FFFF) &&
-      !(ch >= 0x2FFFE && ch <= 0x2FFFF) &&
-      !(ch >= 0x3FFFE && ch <= 0x3FFFF) &&
-      !(ch >= 0x4FFFE && ch <= 0x4FFFF) &&
-      !(ch >= 0x5FFFE && ch <= 0x5FFFF) &&
-      !(ch >= 0x6FFFE && ch <= 0x6FFFF) &&
-      !(ch >= 0x7FFFE && ch <= 0x7FFFF) &&
-      !(ch >= 0x8FFFE && ch <= 0x8FFFF) &&
-      !(ch >= 0x9FFFE && ch <= 0x9FFFF) &&
-      !(ch >= 0xAFFFE && ch <= 0xAFFFF) &&
-      !(ch >= 0xBFFFE && ch <= 0xBFFFF) &&
-      !(ch >= 0xCFFFE && ch <= 0xCFFFF) &&
-      !(ch >= 0xDFFFE && ch <= 0xDFFFF) &&
-      !(ch >= 0xEFFFE && ch <= 0xEFFFF) &&
-      !(ch >= 0xFFFFE && ch <= 0xFFFFF) &&
-      !(ch >= 0x10FFFE && ch <= 0x10FFFF) &&
-      (!u_isISOControl(static_cast<UChar32>(ch)) ||
-       ch == '\n' || ch == '\f' || ch == '\t' || ch == '\r');
+         !(ch >= 0xFDD0 && ch <= 0xFDEF) &&  // Noncharacters.
+         !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) &&
+         !(ch >= 0x2FFFE && ch <= 0x2FFFF) &&
+         !(ch >= 0x3FFFE && ch <= 0x3FFFF) &&
+         !(ch >= 0x4FFFE && ch <= 0x4FFFF) &&
+         !(ch >= 0x5FFFE && ch <= 0x5FFFF) &&
+         !(ch >= 0x6FFFE && ch <= 0x6FFFF) &&
+         !(ch >= 0x7FFFE && ch <= 0x7FFFF) &&
+         !(ch >= 0x8FFFE && ch <= 0x8FFFF) &&
+         !(ch >= 0x9FFFE && ch <= 0x9FFFF) &&
+         !(ch >= 0xAFFFE && ch <= 0xAFFFF) &&
+         !(ch >= 0xBFFFE && ch <= 0xBFFFF) &&
+         !(ch >= 0xCFFFE && ch <= 0xCFFFF) &&
+         !(ch >= 0xDFFFE && ch <= 0xDFFFF) &&
+         !(ch >= 0xEFFFE && ch <= 0xEFFFF) &&
+         !(ch >= 0xFFFFE && ch <= 0xFFFFF) &&
+         !(ch >= 0x10FFFE && ch <= 0x10FFFF) &&
+         (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' ||
+          ch == '\f' || ch == '\t' || ch == '\r');
 }
 
 bool IsInterchangeValid7BitAscii(const char32 ch) {
-  return IsValidCodepoint(ch) &&
-      ch <= 128 &&
-      (!u_isISOControl(static_cast<UChar32>(ch)) ||
-       ch == '\n' || ch == '\f' || ch == '\t' || ch == '\r');
+  return IsValidCodepoint(ch) && ch <= 128 &&
+         (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' ||
+          ch == '\f' || ch == '\t' || ch == '\r');
 }
 
 char32 FullwidthToHalfwidth(const char32 ch) {
diff --git a/training/normstrngs.h b/training/normstrngs.h
index 6fca3193ab..27f36e0981 100644
--- a/training/normstrngs.h
+++ b/training/normstrngs.h
@@ -50,7 +50,7 @@ inline STRING NormalizeUTF8String(const char* str8) {
   return NormalizeUTF8String(false, str8);
 }
 
-// Apply just the OCR-specific normalizations and return the normalized char.
+// Applies just the OCR-specific normalizations and return the normalized char.
 char32 OCRNormalize(char32 ch);
 
 // Returns true if the OCRNormalized ch1 and ch2 are the same.
@@ -67,11 +67,11 @@ bool IsUTF8Whitespace(const char* text);
 
 // Returns the length of bytes of the prefix of 'text' that have the White_Space
 // unicode property.
-int SpanUTF8Whitespace(const char* text);
+unsigned int SpanUTF8Whitespace(const char* text);
 
 // Returns the length of bytes of the prefix of 'text' that DO NOT have the
 // White_Space unicode property.
-int SpanUTF8NotWhitespace(const char* text);
+unsigned int SpanUTF8NotWhitespace(const char* text);
 
 // Returns true if the char is interchange valid i.e. no C0 or C1 control codes
 // (other than CR LF HT FF) and no non-characters.
diff --git a/training/pango_font_info.cpp b/training/pango_font_info.cpp
index b0474575ad..07d9077717 100644
--- a/training/pango_font_info.cpp
+++ b/training/pango_font_info.cpp
@@ -88,6 +88,10 @@ PangoFontInfo::PangoFontInfo(const string& desc)
 
 void PangoFontInfo::Clear() {
   font_size_ = 0;
+  is_bold_ = false;
+  is_italic_ = false;
+  is_smallcaps_ = false;
+  is_monospace_ = false;
   family_name_.clear();
   font_type_ = UNKNOWN;
   if (desc_) {
@@ -168,6 +172,29 @@ static void ListFontFamilies(PangoFontFamily*** families,
   pango_font_map_list_families(font_map, families, n_families);
 }
 
+// Inspects whether a given font family is monospace. If the font is not
+// available, it cannot make a decision and returns false by default.
+static bool IsMonospaceFontFamily(const char* family_name) {
+  PangoFontFamily** families = 0;
+  int n_families = 0;
+  bool is_monospace = false;
+  ListFontFamilies(&families, &n_families);
+  ASSERT_HOST(n_families > 0);
+  bool found = false;
+  for (int i = 0; i < n_families; ++i) {
+    if (!strcasecmp(family_name, pango_font_family_get_name(families[i]))) {
+      is_monospace = pango_font_family_is_monospace(families[i]);
+      found = true;
+      break;
+    }
+  }
+  if (!found) {
+    tlog(1, "Could not find monospace property of family %s\n", family_name);
+  }
+  g_free(families);
+  return is_monospace;
+}
+
 bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) {
   Clear();
   const char* family = pango_font_description_get_family(desc);
@@ -180,6 +207,7 @@ bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) {
   }
   family_name_ = string(family);
   desc_ = pango_font_description_copy(desc);
+  is_monospace_ = IsMonospaceFontFamily(family);
 
   // Set font size in points
   font_size_ = pango_font_description_get_size(desc);
@@ -187,6 +215,17 @@ bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) {
     font_size_ /= PANGO_SCALE;
   }
 
+  PangoStyle style = pango_font_description_get_style(desc);
+  is_italic_ = (PANGO_STYLE_ITALIC == style ||
+                PANGO_STYLE_OBLIQUE == style);
+  is_smallcaps_ = (pango_font_description_get_variant(desc)
+                   == PANGO_VARIANT_SMALL_CAPS);
+
+  is_bold_ = (pango_font_description_get_weight(desc) >= PANGO_WEIGHT_BOLD);
+  // We don't have a way to detect whether a font is of type Fraktur. The fonts
+  // we currently use all have "Fraktur" in their family name, so we do a
+  // fragile but functional check for that here.
+  is_fraktur_ = (strcasestr(family, "Fraktur") != nullptr);
   return true;
 }
 
diff --git a/training/pango_font_info.h b/training/pango_font_info.h
index f435d04af0..af6ee98512 100644
--- a/training/pango_font_info.h
+++ b/training/pango_font_info.h
@@ -105,6 +105,11 @@ class PangoFontInfo {
   const string& family_name() const    { return family_name_; }
   // Size in points (1/72"), rounded to the nearest integer.
   int font_size() const { return font_size_; }
+  bool is_bold() const { return is_bold_; }
+  bool is_italic() const { return is_italic_; }
+  bool is_smallcaps() const { return is_smallcaps_; }
+  bool is_monospace() const { return is_monospace_; }
+  bool is_fraktur() const { return is_fraktur_; }
   FontTypeEnum font_type() const { return font_type_; }
 
   int resolution() const { return resolution_; }
@@ -123,6 +128,11 @@ class PangoFontInfo {
   // Font properties set automatically from parsing the font description name.
   string family_name_;
   int font_size_;
+  bool is_bold_;
+  bool is_italic_;
+  bool is_smallcaps_;
+  bool is_monospace_;
+  bool is_fraktur_;
   FontTypeEnum font_type_;
   // The Pango description that was used to initialize the instance.
   PangoFontDescription* desc_;
diff --git a/training/stringrenderer.cpp b/training/stringrenderer.cpp
index 35aca8baee..382b292d81 100644
--- a/training/stringrenderer.cpp
+++ b/training/stringrenderer.cpp
@@ -141,11 +141,11 @@ void StringRenderer::set_resolution(const int resolution) {
 }
 
 void StringRenderer::set_underline_start_prob(const double frac) {
-  underline_start_prob_ = min(max(frac, 0.0), 1.0);
+  underline_start_prob_ = std::min(std::max(frac, 0.0), 1.0);
 }
 
 void StringRenderer::set_underline_continuation_prob(const double frac) {
-  underline_continuation_prob_ = min(max(frac, 0.0), 1.0);
+  underline_continuation_prob_ = std::min(std::max(frac, 0.0), 1.0);
 }
 
 StringRenderer::~StringRenderer() {
@@ -191,6 +191,7 @@ void StringRenderer::SetLayoutProperties() {
   int max_height = page_height_ - 2 * v_margin_;
   tlog(3, "max_width = %d, max_height = %d\n", max_width, max_height);
   if (vertical_text_) {
+    using std::swap;
     swap(max_width, max_height);
   }
   pango_layout_set_width(layout_, max_width * PANGO_SCALE);
@@ -340,8 +341,7 @@ void StringRenderer::RotatePageBoxes(float rotation) {
 
 
 void StringRenderer::ClearBoxes() {
-  for (size_t i = 0; i < boxchars_.size(); ++i)
-    delete boxchars_[i];
+  for (size_t i = 0; i < boxchars_.size(); ++i) delete boxchars_[i];
   boxchars_.clear();
   boxaDestroy(&page_boxes_);
 }
@@ -433,10 +433,10 @@ static void MergeBoxCharsToWords(std::vector<BoxChar*>* boxchars) {
       // Compute bounding box union
       const Box* box = boxchars->at(i)->box();
       Box* last_box = last_boxchar->mutable_box();
-      int left = min(last_box->x, box->x);
-      int right = max(last_box->x + last_box->w, box->x + box->w);
-      int top = min(last_box->y, box->y);
-      int bottom = max(last_box->y + last_box->h, box->y + box->h);
+      int left = std::min(last_box->x, box->x);
+      int right = std::max(last_box->x + last_box->w, box->x + box->w);
+      int top = std::min(last_box->y, box->y);
+      int bottom = std::max(last_box->y + last_box->h, box->y + box->h);
       // Conclude that the word was broken to span multiple lines based on the
       // size of the merged bounding box in relation to those of the individual
       // characters seen so far.
@@ -523,9 +523,9 @@ void StringRenderer::ComputeClusterBoxes() {
                     "cluster_text:%s  start_byte_index:%d\n",
                     cluster_text.c_str(), start_byte_index);
     if (box_padding_) {
-      cluster_rect.x = max(0, cluster_rect.x - box_padding_);
+      cluster_rect.x = std::max(0, cluster_rect.x - box_padding_);
       cluster_rect.width += 2 * box_padding_;
-      cluster_rect.y = max(0, cluster_rect.y - box_padding_);
+      cluster_rect.y = std::max(0, cluster_rect.y - box_padding_);
       cluster_rect.height += 2 * box_padding_;
     }
     if (add_ligatures_) {
@@ -865,8 +865,8 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage,
     tprintf("Total chars = %d\n", total_chars_);
   }
   const std::vector<string>& all_fonts = FontUtils::ListAvailableFonts();
-  assert(0 <= font_index_);
-  for (unsigned int i = static_cast<unsigned int>(font_index_); i < all_fonts.size(); ++i) {
+
+  for (size_t i = font_index_; i < all_fonts.size(); ++i) {
     ++font_index_;
     int raw_score = 0;
     int ok_chars =
diff --git a/training/stringrenderer.h b/training/stringrenderer.h
index e1144d4ee7..b6189ced6b 100644
--- a/training/stringrenderer.h
+++ b/training/stringrenderer.h
@@ -212,7 +212,7 @@ class StringRenderer {
   // Objects cached for subsequent calls to RenderAllFontsToImage()
   std::unordered_map<char32, inT64> char_map_;  // Time-saving char histogram.
   int total_chars_;   // Number in the string to be rendered.
-  int font_index_;    // Index of next font to use in font list.
+  unsigned int font_index_;  // Index of next font to use in font list.
   int last_offset_;   // Offset returned from last successful rendering
 
  private:
diff --git a/training/unicharset_extractor.cpp b/training/unicharset_extractor.cpp
index 1e6c35afb3..e9954a4e87 100644
--- a/training/unicharset_extractor.cpp
+++ b/training/unicharset_extractor.cpp
@@ -38,6 +38,8 @@
 #include "unichar.h"
 #include "unicharset.h"
 
+using tesseract::UNICHAR;
+
 static const char* const kUnicharsetFileName = "unicharset";
 
 UNICHAR_ID wc_to_unichar_id(const UNICHARSET &unicharset, int wc) {
diff --git a/training/unicharset_training_utils.cpp b/training/unicharset_training_utils.cpp
index 10582f027f..d16e919af8 100644
--- a/training/unicharset_training_utils.cpp
+++ b/training/unicharset_training_utils.cpp
@@ -22,13 +22,13 @@
 #include <stdlib.h>
 #include <string.h>
 #include <string>
+#include <vector>
 
 #include "fileio.h"
-#include "genericvector.h"
 #include "icuerrorcode.h"
 #include "normstrngs.h"
 #include "statistc.h"
-#include "strngs.h"
+#include "unichar.h"
 #include "unicharset.h"
 #include "unicode/uchar.h"    // from libicu
 #include "unicode/uscript.h"  // from libicu
@@ -50,8 +50,7 @@ void SetupBasicProperties(bool report_errors, bool decompose,
     }
 
     // Convert the unichar to UTF32 representation
-    GenericVector<char32> uni_vector;
-    tesseract::UTF8ToUTF32(unichar_str, &uni_vector);
+    std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);
 
     // Assume that if the property is true for any character in the string,
     // then it holds for the whole "character".
@@ -61,17 +60,12 @@ void SetupBasicProperties(bool report_errors, bool decompose,
     bool unichar_isdigit = false;
     bool unichar_ispunct = false;
 
-    for (int i = 0; i < uni_vector.size(); ++i) {
-      if (u_isalpha(uni_vector[i]))
-        unichar_isalpha = true;
-      if (u_islower(uni_vector[i]))
-        unichar_islower = true;
-      if (u_isupper(uni_vector[i]))
-        unichar_isupper = true;
-      if (u_isdigit(uni_vector[i]))
-        unichar_isdigit = true;
-      if (u_ispunct(uni_vector[i]))
-        unichar_ispunct = true;
+    for (char32 u_ch : uni_vector) {
+      if (u_isalpha(u_ch)) unichar_isalpha = true;
+      if (u_islower(u_ch)) unichar_islower = true;
+      if (u_isupper(u_ch)) unichar_isupper = true;
+      if (u_isdigit(u_ch)) unichar_isdigit = true;
+      if (u_ispunct(u_ch)) unichar_ispunct = true;
     }
 
     unicharset->set_isalpha(unichar_id, unichar_isalpha);
@@ -88,7 +82,7 @@ void SetupBasicProperties(bool report_errors, bool decompose,
     // Obtain the lower/upper case if needed and record it in the properties.
     unicharset->set_other_case(unichar_id, unichar_id);
     if (unichar_islower || unichar_isupper) {
-      GenericVector<char32> other_case(num_code_points, 0);
+      std::vector<char32> other_case(num_code_points, 0);
       for (int i = 0; i < num_code_points; ++i) {
         // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.
         // However since they deal with UChars (so need a conversion function
@@ -97,8 +91,7 @@ void SetupBasicProperties(bool report_errors, bool decompose,
         other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) :
           u_tolower(uni_vector[i]);
       }
-      STRING other_case_uch;
-      tesseract::UTF32ToUTF8(other_case, &other_case_uch);
+      string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);
       UNICHAR_ID other_case_id =
           unicharset->unichar_to_id(other_case_uch.c_str());
       if (other_case_id != INVALID_UNICHAR_ID) {
@@ -110,7 +103,7 @@ void SetupBasicProperties(bool report_errors, bool decompose,
     }
 
     // Set RTL property and obtain mirror unichar ID from ICU.
-    GenericVector<char32> mirrors(num_code_points, 0);
+    std::vector<char32> mirrors(num_code_points, 0);
     for (int i = 0; i < num_code_points; ++i) {
       mirrors[i] = u_charMirror(uni_vector[i]);
       if (i == 0) {  // set directionality to that of the 1st code point
@@ -119,8 +112,7 @@ void SetupBasicProperties(bool report_errors, bool decompose,
                                       u_charDirection(uni_vector[i])));
       }
     }
-    STRING mirror_uch;
-    tesseract::UTF32ToUTF8(mirrors, &mirror_uch);
+    string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);
     UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());
     if (mirror_uch_id != INVALID_UNICHAR_ID) {
       unicharset->set_mirror(unichar_id, mirror_uch_id);
@@ -130,8 +122,8 @@ void SetupBasicProperties(bool report_errors, bool decompose,
     }
 
     // Record normalized version of this unichar.
-    STRING normed_str = tesseract::NormalizeUTF8String(decompose, unichar_str);
-    if (unichar_id != 0 && normed_str.length() > 0) {
+    string normed_str = tesseract::NormalizeUTF8String(decompose, unichar_str);
+    if (unichar_id != 0 && !normed_str.empty()) {
       unicharset->set_normed(unichar_id, normed_str.c_str());
     } else {
       unicharset->set_normed(unichar_id, unichar_str);
diff --git a/wordrec/language_model.cpp b/wordrec/language_model.cpp
index 7075f3d783..17ce78b9df 100644
--- a/wordrec/language_model.cpp
+++ b/wordrec/language_model.cpp
@@ -43,91 +43,89 @@ const float LanguageModel::kMaxAvgNgramCost = 25.0f;
 
 LanguageModel::LanguageModel(const UnicityTable<FontInfo> *fontinfo_table,
                              Dict *dict)
-  : INT_MEMBER(language_model_debug_level, 0, "Language model debug level",
-               dict->getCCUtil()->params()),
-    BOOL_INIT_MEMBER(language_model_ngram_on, false,
-                     "Turn on/off the use of character ngram model",
-                     dict->getCCUtil()->params()),
-    INT_MEMBER(language_model_ngram_order, 8,
-               "Maximum order of the character ngram model",
-               dict->getCCUtil()->params()),
-    INT_MEMBER(language_model_viterbi_list_max_num_prunable, 10,
-               "Maximum number of prunable (those for which"
-               " PrunablePath() is true) entries in each viterbi list"
-               " recorded in BLOB_CHOICEs",
-               dict->getCCUtil()->params()),
-    INT_MEMBER(language_model_viterbi_list_max_size, 500,
-               "Maximum size of viterbi lists recorded in BLOB_CHOICEs",
-               dict->getCCUtil()->params()),
-    double_MEMBER(language_model_ngram_small_prob, 0.000001,
-                  "To avoid overly small denominators use this as the "
-                  "floor of the probability returned by the ngram model.",
+    : INT_MEMBER(language_model_debug_level, 0, "Language model debug level",
+                 dict->getCCUtil()->params()),
+      BOOL_INIT_MEMBER(language_model_ngram_on, false,
+                       "Turn on/off the use of character ngram model",
+                       dict->getCCUtil()->params()),
+      INT_MEMBER(language_model_ngram_order, 8,
+                 "Maximum order of the character ngram model",
+                 dict->getCCUtil()->params()),
+      INT_MEMBER(language_model_viterbi_list_max_num_prunable, 10,
+                 "Maximum number of prunable (those for which"
+                 " PrunablePath() is true) entries in each viterbi list"
+                 " recorded in BLOB_CHOICEs",
+                 dict->getCCUtil()->params()),
+      INT_MEMBER(language_model_viterbi_list_max_size, 500,
+                 "Maximum size of viterbi lists recorded in BLOB_CHOICEs",
+                 dict->getCCUtil()->params()),
+      double_MEMBER(language_model_ngram_small_prob, 0.000001,
+                    "To avoid overly small denominators use this as the "
+                    "floor of the probability returned by the ngram model.",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_ngram_nonmatch_score, -40.0,
+                    "Average classifier score of a non-matching unichar.",
+                    dict->getCCUtil()->params()),
+      BOOL_MEMBER(language_model_ngram_use_only_first_uft8_step, false,
+                  "Use only the first UTF8 step of the given string"
+                  " when computing log probabilities.",
                   dict->getCCUtil()->params()),
-    double_MEMBER(language_model_ngram_nonmatch_score, -40.0,
-                  "Average classifier score of a non-matching unichar.",
-                  dict->getCCUtil()->params()),
-    BOOL_MEMBER(language_model_ngram_use_only_first_uft8_step, false,
-                "Use only the first UTF8 step of the given string"
-                " when computing log probabilities.",
-                dict->getCCUtil()->params()),
-    double_MEMBER(language_model_ngram_scale_factor, 0.03,
-                  "Strength of the character ngram model relative to the"
-                  " character classifier ",
-                  dict->getCCUtil()->params()),
-    double_MEMBER(language_model_ngram_rating_factor, 16.0,
-                  "Factor to bring log-probs into the same range as ratings"
-                  " when multiplied by outline length ",
-                  dict->getCCUtil()->params()),
-    BOOL_MEMBER(language_model_ngram_space_delimited_language, true,
-                "Words are delimited by space",
-                dict->getCCUtil()->params()),
-    INT_MEMBER(language_model_min_compound_length, 3,
-               "Minimum length of compound words",
-               dict->getCCUtil()->params()),
-    double_MEMBER(language_model_penalty_non_freq_dict_word, 0.1,
-                  "Penalty for words not in the frequent word dictionary",
-                  dict->getCCUtil()->params()),
-    double_MEMBER(language_model_penalty_non_dict_word, 0.15,
-                  "Penalty for non-dictionary words",
-                  dict->getCCUtil()->params()),
-    double_MEMBER(language_model_penalty_punc, 0.2,
-                  "Penalty for inconsistent punctuation",
-                  dict->getCCUtil()->params()),
-    double_MEMBER(language_model_penalty_case, 0.1,
-                  "Penalty for inconsistent case",
-                  dict->getCCUtil()->params()),
-    double_MEMBER(language_model_penalty_script, 0.5,
-                  "Penalty for inconsistent script",
-                  dict->getCCUtil()->params()),
-    double_MEMBER(language_model_penalty_chartype, 0.3,
-                  "Penalty for inconsistent character type",
-                  dict->getCCUtil()->params()),
-    // TODO(daria, rays): enable font consistency checking
-    // after improving font analysis.
-    double_MEMBER(language_model_penalty_font, 0.00,
-                  "Penalty for inconsistent font",
-                  dict->getCCUtil()->params()),
-    double_MEMBER(language_model_penalty_spacing, 0.05,
-                  "Penalty for inconsistent spacing",
-                  dict->getCCUtil()->params()),
-    double_MEMBER(language_model_penalty_increment, 0.01,
-                  "Penalty increment",
-                  dict->getCCUtil()->params()),
-    INT_MEMBER(wordrec_display_segmentations, 0, "Display Segmentations",
-               dict->getCCUtil()->params()),
-    BOOL_INIT_MEMBER(language_model_use_sigmoidal_certainty, false,
-                     "Use sigmoidal score for certainty",
-                     dict->getCCUtil()->params()),
-  dawg_args_(nullptr, new DawgPositionVector(), NO_PERM),
-  fontinfo_table_(fontinfo_table), dict_(dict),
-  fixed_pitch_(false), max_char_wh_ratio_(0.0),
-  acceptable_choice_found_(false) {
+      double_MEMBER(language_model_ngram_scale_factor, 0.03,
+                    "Strength of the character ngram model relative to the"
+                    " character classifier ",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_ngram_rating_factor, 16.0,
+                    "Factor to bring log-probs into the same range as ratings"
+                    " when multiplied by outline length ",
+                    dict->getCCUtil()->params()),
+      BOOL_MEMBER(language_model_ngram_space_delimited_language, true,
+                  "Words are delimited by space", dict->getCCUtil()->params()),
+      INT_MEMBER(language_model_min_compound_length, 3,
+                 "Minimum length of compound words",
+                 dict->getCCUtil()->params()),
+      double_MEMBER(language_model_penalty_non_freq_dict_word, 0.1,
+                    "Penalty for words not in the frequent word dictionary",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_penalty_non_dict_word, 0.15,
+                    "Penalty for non-dictionary words",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_penalty_punc, 0.2,
+                    "Penalty for inconsistent punctuation",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_penalty_case, 0.1,
+                    "Penalty for inconsistent case",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_penalty_script, 0.5,
+                    "Penalty for inconsistent script",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_penalty_chartype, 0.3,
+                    "Penalty for inconsistent character type",
+                    dict->getCCUtil()->params()),
+      // TODO(daria, rays): enable font consistency checking
+      // after improving font analysis.
+      double_MEMBER(language_model_penalty_font, 0.00,
+                    "Penalty for inconsistent font",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_penalty_spacing, 0.05,
+                    "Penalty for inconsistent spacing",
+                    dict->getCCUtil()->params()),
+      double_MEMBER(language_model_penalty_increment, 0.01, "Penalty increment",
+                    dict->getCCUtil()->params()),
+      INT_MEMBER(wordrec_display_segmentations, 0, "Display Segmentations",
+                 dict->getCCUtil()->params()),
+      BOOL_INIT_MEMBER(language_model_use_sigmoidal_certainty, false,
+                       "Use sigmoidal score for certainty",
+                       dict->getCCUtil()->params()),
+      dawg_args_(nullptr, new DawgPositionVector(), NO_PERM),
+      fontinfo_table_(fontinfo_table),
+      dict_(dict),
+      fixed_pitch_(false),
+      max_char_wh_ratio_(0.0),
+      acceptable_choice_found_(false) {
   ASSERT_HOST(dict_ != NULL);
 }
 
-LanguageModel::~LanguageModel() {
-  delete dawg_args_.updated_dawgs;
-}
+LanguageModel::~LanguageModel() { delete dawg_args_.updated_dawgs; }
 
 void LanguageModel::InitForWord(const WERD_CHOICE *prev_word,
                                 bool fixed_pitch, float max_char_wh_ratio,
@@ -797,8 +795,7 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
   // Deal with hyphenated words.
   if (word_end && dict_->has_hyphen_end(b.unichar_id(), curr_col == 0)) {
     if (language_model_debug_level > 0) tprintf("Hyphenated word found\n");
-    return new LanguageModelDawgInfo(dawg_args_.active_dawgs,
-                                     COMPOUND_PERM);
+    return new LanguageModelDawgInfo(dawg_args_.active_dawgs, COMPOUND_PERM);
   }
 
   // Deal with compound words.
@@ -811,7 +808,8 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
     // language_model_min_compound_length
     if (parent_vse == NULL || word_end ||
         dawg_args_.permuter == COMPOUND_PERM ||
-        parent_vse->length < language_model_min_compound_length) return NULL;
+        parent_vse->length < language_model_min_compound_length)
+      return NULL;
 
     int i;
     // Check a that the path terminated before the current character is a word.
diff --git a/wordrec/lm_pain_points.cpp b/wordrec/lm_pain_points.cpp
index 46a878ba1f..12c70ea712 100644
--- a/wordrec/lm_pain_points.cpp
+++ b/wordrec/lm_pain_points.cpp
@@ -1,5 +1,5 @@
 ///////////////////////////////////////////////////////////////////////
-// File:        lm_pain_points.cpp
+// File:        pain_points.cpp
 // Description: Functions that utilize the knowledge about the properties
 //              of the paths explored by the segmentation search in order
 //              to "pain points" - the locations in the ratings matrix
diff --git a/wordrec/lm_state.h b/wordrec/lm_state.h
index 987b9818b3..9c41fb240b 100644
--- a/wordrec/lm_state.h
+++ b/wordrec/lm_state.h
@@ -60,7 +60,7 @@ typedef unsigned char LanguageModelFlagsType;
 /// letters on a path can be found.
 struct LanguageModelDawgInfo {
   LanguageModelDawgInfo(const DawgPositionVector *a, PermuterType pt)
-    : active_dawgs(*a), permuter(pt) {}
+      : active_dawgs(*a), permuter(pt) {}
   DawgPositionVector active_dawgs;
   PermuterType permuter;
 };
@@ -125,9 +125,9 @@ struct ViterbiStateEntry : public ELIST_LINK {
   /// non-increasing order of costs.
   static int Compare(const void *e1, const void *e2) {
     const ViterbiStateEntry *ve1 =
-      *static_cast<const ViterbiStateEntry * const *>(e1);
+        *static_cast<const ViterbiStateEntry *const *>(e1);
     const ViterbiStateEntry *ve2 =
-      *static_cast<const ViterbiStateEntry * const *>(e2);
+        *static_cast<const ViterbiStateEntry *const *>(e2);
     return (ve1->cost < ve2->cost) ? -1 : 1;
   }
   inline bool Consistent() const {
diff --git a/wordrec/outlines.cpp b/wordrec/outlines.cpp
index f4e6cc17d1..bcbd8adb93 100644
--- a/wordrec/outlines.cpp
+++ b/wordrec/outlines.cpp
@@ -24,7 +24,7 @@
  ********************************************************************************
  * Revision 1.2  89/09/15  09:24:41  09:24:41  marks (Mark Seaman)
  * First released version of Combinatorial splitter code
-**/
+ **/
 /*----------------------------------------------------------------------
               I n c l u d e s
 ----------------------------------------------------------------------*/
diff --git a/wordrec/pieces.cpp b/wordrec/pieces.cpp
index 7e5770ce1f..16f7dd2e65 100644
--- a/wordrec/pieces.cpp
+++ b/wordrec/pieces.cpp
@@ -75,16 +75,16 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector<SEAM*>& seams,
 
 template<class BLOB_CHOICE>
 int SortByUnicharID(const void *void1, const void *void2) {
-  const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE * const *>(void1);
-  const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE * const *>(void2);
+  const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE *const *>(void1);
+  const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE *const *>(void2);
 
   return p1->unichar_id() - p2->unichar_id();
 }
 
 template<class BLOB_CHOICE>
 int SortByRating(const void *void1, const void *void2) {
-  const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE * const *>(void1);
-  const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE * const *>(void2);
+  const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE *const *>(void1);
+  const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE *const *>(void2);
 
   if (p1->rating() < p2->rating())
     return 1;