diff --git a/.gitignore b/.gitignore
index 6fa0865df7..8a3d338b50 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,6 +52,7 @@ training/wordlist2dawg
 *.patch
 
 # ignore compilation files
+build/*
 */.deps/*
 */.libs/*
 *.lo
@@ -74,4 +75,8 @@ kernel*.bin
 # build dirs
 /build*
 /cppan
-/win*
\ No newline at end of file
+/.cppan
+/win*
+*.dll
+*.exe
+*.lnk
diff --git a/AUTHORS b/AUTHORS
index 4252027d51..4d9c75c4aa 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -2,12 +2,14 @@ Ray Smith (lead developer) <theraysmith@gmail.com>
 Ahmad Abdulkader
 Rika Antonova
 Nicholas Beato
+Jeff Breidenbach
 Samuel Charron
 Phil Cheatle
 Simon Crouch
 David Eger
 Sheelagh Huddleston
 Dan Johnson
+Rajesh Katikam
 Thomas Kielbus
 Dar-Shyang Lee
 Zongyi (Joe) Liu
@@ -26,3 +28,15 @@ Joern Wanke
 Ping Ping Xiu
 Andrew Ziem
 Oscar Zuniga
+
+Community Contributors:
+Zdenko Podobný (Maintainer)
+Jim Regan (Maintainer)
+James R Barlow
+Amit Dovev
+Martin Ettl
+Tom Morris
+Tobias Müller
+Egor Pugin
+Sundar M. Vaidya
+Stefan Weil
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e4bc5cbb9e..68473fb4ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,19 +46,25 @@ set(VERSION_PLAIN ${VERSION_MAJOR}.${VERSION_MINOR})
 
 set(MINIMUM_LEPTONICA_VERSION 1.71)
 
-if(NOT EXISTS ${PROJECT_SOURCE_DIR}/cppan)
+if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.cppan)
     if (NOT Leptonica_DIR AND NOT MSVC)
         find_package(PkgConfig REQUIRED)
-        pkg_check_modules(Leptonica REQUIRED lept)
+        pkg_check_modules(Leptonica REQUIRED lept>=${MINIMUM_LEPTONICA_VERSION})
     else()
         find_package(Leptonica ${MINIMUM_LEPTONICA_VERSION} REQUIRED CONFIG)
     endif()
 else()
-    add_subdirectory(cppan)
+    if (STATIC)
+        set(CPPAN_BUILD_SHARED_LIBS 0)
+    else()
+        set(CPPAN_BUILD_SHARED_LIBS 1)
+    endif()
+    add_subdirectory(.cppan)
 endif()
 
 find_package(OpenCL QUIET)
-find_package(PkgConfig)
+
+option(BUILD_TRAINING_TOOLS "Build training tools" ON)
 
 ###############################################################################
 #
@@ -76,6 +82,9 @@ if (WIN32)
         add_definitions(-D_CRT_SECURE_NO_WARNINGS)
 
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
+        if (APPVEYOR)
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W0")
+        endif()
     endif()
 
     set(LIB_Ws2_32 Ws2_32)
@@ -201,25 +210,29 @@ set(tesseract_src ${tesseract_src}
     api/pdfrenderer.cpp
 )
 
-add_library                     (tesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr})
+add_library                     (libtesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr})
 if (NOT STATIC)
-target_compile_definitions      (tesseract PUBLIC -DTESS_EXPORTS)
+target_compile_definitions      (libtesseract
+    PRIVATE -DTESS_EXPORTS
+    INTERFACE -DTESS_IMPORTS
+)
+set_target_properties           (libtesseract PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS True)
 endif()
-target_link_libraries           (tesseract ${LIB_Ws2_32} ${LIB_pthread})
-set_target_properties           (tesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
-set_target_properties           (tesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
+target_link_libraries           (libtesseract ${LIB_Ws2_32} ${LIB_pthread})
+set_target_properties           (libtesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
+set_target_properties           (libtesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
 if (WIN32)
-set_target_properties           (tesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR})
-set_target_properties           (tesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d)
+set_target_properties           (libtesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR})
+set_target_properties           (libtesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d)
 endif()
 
 if (NOT CPPAN_BUILD)
-    target_link_libraries       (tesseract ${Leptonica_LIBRARIES})
-    export(TARGETS tesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
+    target_link_libraries       (libtesseract ${Leptonica_LIBRARIES})
+    export(TARGETS libtesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
 else()
-    target_link_libraries       (tesseract cppan)
+    target_link_libraries       (libtesseract pvt.cppan.demo.danbloomberg.leptonica)
     file(WRITE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_BINARY_DIR}/cppan.cmake)\n")
-    export(TARGETS tesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
+    export(TARGETS libtesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
 endif()
 
 ########################################
@@ -231,12 +244,13 @@ set(tesseractmain_src
     vs2010/tesseract/resource.h
     vs2010/tesseract/tesseract.rc
 )
-add_executable                  (tesseractmain ${tesseractmain_src})
-target_link_libraries           (tesseractmain tesseract)
-set_target_properties           (tesseractmain PROPERTIES OUTPUT_NAME tesseract)
+add_executable                  (tesseract ${tesseractmain_src})
+target_link_libraries           (tesseract libtesseract)
 
 ########################################
 
+if (BUILD_TRAINING_TOOLS)
 add_subdirectory(training)
+endif()
 
 ###############################################################################
diff --git a/ChangeLog b/ChangeLog
index 492d6984c9..4836aeca73 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2017-02-16 - V3.05.00
+  * Made some fine tuning to the hOCR output.
+  * Added TSV as another optional output format.
+  * Fixed ABI break introduced in 3.04.00 with the AnalyseLayout() method.
+  * text2image tool - Enable all OpenType ligatures available in a font. This feature requires Pango 1.38 or newer.
+  * Training tools - Replaced asserts with tprintf() and exit(1).
+  * Fixed Cygwin compatibility.
+  * Improved multipage tiff processing.
+  * Improved the embedded pdf font (pdf.ttf).
+  * Enable selection of OCR engine mode from command line.
+  * Changed tesseract command line parameter '-psm' to '--psm'.
+  * Write output of tesseract --help, --version and --list-langs to stdout instead of stderr.
+  * Added new C API for orientation and script detection, removed the old one.
+  * Increased minimum autoconf version to 2.59.
+  * Removed dead code.
+  * Fixed many compiler warning.
+  * Fixed memory and resource leaks.
+  * Fixed some issues with the 'Cube' OCR engine.
+  * Fixed some openCL issues.
+  * Added option to build Tesseract with CMake build system.
+  * Implemented CPPAN support for easy Windows building.
+
 2016-02-17 - V3.04.01
   * Added OSD renderer for psm 0. Works for single page and multi-page images.
   * Improve tesstrain.sh script.
diff --git a/INSTALL.GIT.md b/INSTALL.GIT.md
index 07acbb0b01..31277e15fb 100644
--- a/INSTALL.GIT.md
+++ b/INSTALL.GIT.md
@@ -24,7 +24,7 @@ So, the steps for making Tesseract are:
 You need to install at least English language and OSD data files to TESSDATA_PREFIX
 directory. You can retrieve single file with tools like [wget](https://www.gnu.org/software/wget/), [curl](https://curl.haxx.se/), [GithubDownloader](https://github.com/intezer/GithubDownloader) or browser.
 
-All language data files can be retrieved from git repository (usefull only for packagers!):
+All language data files can be retrieved from git repository (useful only for packagers!):
 
     $ git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata
 
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/Makefile.am b/Makefile.am
index a4aa1dd915..8e2dbcf42f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -4,9 +4,12 @@ ACLOCAL_AMFLAGS = -I m4
 if ENABLE_TRAINING
 TRAINING_SUBDIR = training
 training:
+	$(MAKE)
 	@cd "$(top_builddir)/training" && $(MAKE)
 training-install:
 	@cd "$(top_builddir)/training" && $(MAKE) install
+training-uninstall:
+	@cd "$(top_builddir)/training" && $(MAKE) uninstall
 clean-local:
 	@cd "$(top_builddir)/training" && $(MAKE) clean
 else
diff --git a/README.md b/README.md
index fd64016d78..9dbdd8d2aa 100644
--- a/README.md
+++ b/README.md
@@ -5,25 +5,26 @@ For the latest online version of the README.md see:
     
   https://github.com/tesseract-ocr/tesseract/blob/master/README.md
 
-#About
+# About
 
 This package contains an OCR engine - `libtesseract` and a command line program - `tesseract`.
 
 The lead developer is Ray Smith. The maintainer is Zdenko Podobny. 
-For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS) and github's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
+For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS)
+and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
 
 Tesseract has unicode (UTF-8) support, and can recognize more than 100
 languages "out of the box". It can be trained to recognize other languages. See [Tesseract Training](https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract) for more information. 
 
 Tesseract supports various output formats: plain-text, hocr(html), pdf.
 
-This project does not include a GUI application. If you need one, please see the [3rdParty](https://github.com/tesseract-ocr/tesseract/wiki/3rdParty) wiki page.
+This project does not include a GUI application. If you need one, please see the [3rdParty](https://github.com/tesseract-ocr/tesseract/wiki/User-Projects-%E2%80%93-3rdParty) wiki page.
 
 You should note that in many cases, in order to get better OCR results, you'll need to [improve the quality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality) of the image you are giving Tesseract.
 
-The latest stable version is 3.04.01, released in February 2016.
+The latest stable version is 3.05.00, released in February 2017.
 
-#Brief history
+# Brief history
 
 Tesseract was originally developed at Hewlett-Packard Laboratories Bristol and
 at Hewlett-Packard Co, Greeley Colorado between 1985 and 1994, with some
@@ -33,13 +34,13 @@ In 2005 Tesseract was open sourced by HP. Since 2006 it is developed by Google.
 
 [Release Notes](https://github.com/tesseract-ocr/tesseract/wiki/ReleaseNotes)
 
-#For developers
+# For developers
 
 Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/master/api/capi.h) or [C++](https://github.com/tesseract-ocr/tesseract/blob/master/api/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the [wrapper](https://github.com/tesseract-ocr/tesseract/wiki/AddOns#tesseract-wrappers) section on AddOns wiki page.
 
 Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](http://tesseract-ocr.github.io/).
 
-#License
+# License
 
     The code in this repository is licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -55,19 +56,19 @@ Documentation of Tesseract generated from source code by doxygen can be found on
 
 **NOTE**: This software depends on other packages that may be licensed under different open source licenses.
 
-#Installing Tesseract
+# Installing Tesseract
 
 You can either [Install Tesseract via pre-built binary package](https://github.com/tesseract-ocr/tesseract/wiki) or [build it from source](https://github.com/tesseract-ocr/tesseract/wiki/Compiling).
 
-#Running Tesseract
+# Running Tesseract
 
 Basic command line usage:
 
-    tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...]
+    tesseract imagename outputbase [-l lang] [--psm pagesegmode] [configfiles...]
 
 For more information about the various command line options use `tesseract --help` or `man tesseract`. 
 
-#Support
+# Support
 
 Mailing-lists:
 * [tesseract-ocr](https://groups.google.com/d/forum/tesseract-ocr) - For tesseract users. 
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index 4624801469..fd0e5112f0 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -4,7 +4,7 @@ include $(CLEAR_VARS)
 LOCAL_MODULE := tesseract-$(APP_ABI)
 
 LOCAL_STATIC_LIBRARIES := \
-    mobile_base \
+    base \
     leptonica-$(APP_ABI)
 
 LOCAL_C_INCLUDES := $(APP_C_INCLUDES)
@@ -51,7 +51,6 @@ LOCAL_CFLAGS := -DANDROID_BUILD -DNO_CUBE_BUILD -DGRAPHICS_DISABLED
 
 include $(BUILD_SHARED_LIBRARY)
 
-$(call import-module,mobile/base)
-$(call import-module,mobile/base)
+$(call import-module,base/port)
 $(call import-module,mobile/util/hash)
 $(call import-module,third_party/leptonica/android/jni)
diff --git a/api/Makefile.am b/api/Makefile.am
index 9d20919b2e..df4bcd6368 100644
--- a/api/Makefile.am
+++ b/api/Makefile.am
@@ -81,15 +81,10 @@ tesseract_LDADD = libtesseract.la
 
 tesseract_LDFLAGS = $(OPENCL_LDFLAGS)
 
-if OPENMP
-tesseract_LDADD += $(OPENMP_CFLAGS)
-endif
-
 if T_WIN
-tesseract_LDADD += -lws2_32
+tesseract_LDADD += -lws2_32 -ltiff
 libtesseract_la_LDFLAGS += -no-undefined -Wl,--as-needed -lws2_32
 endif
 if ADD_RT
 tesseract_LDADD += -lrt
 endif
-
diff --git a/api/baseapi.cpp b/api/baseapi.cpp
index a0689978e5..d552b8d1c3 100644
--- a/api/baseapi.cpp
+++ b/api/baseapi.cpp
@@ -34,8 +34,6 @@
 // workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
 #undef __STRICT_ANSI__
 #endif  // _MSC_VER
-#include <stdlib.h>
-#include <windows.h>
 #include <fcntl.h>
 #include <io.h>
 #else
@@ -121,7 +119,6 @@ TessBaseAPI::TessBaseAPI()
     block_list_(NULL),
     page_res_(NULL),
     input_file_(NULL),
-    input_image_(NULL),
     output_file_(NULL),
     datapath_(NULL),
     language_(NULL),
@@ -130,6 +127,7 @@ TessBaseAPI::TessBaseAPI()
     truth_cb_(NULL),
     rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0),
     image_width_(0), image_height_(0) {
+    unknown_title_ = "";
 }
 
 TessBaseAPI::~TessBaseAPI() {
@@ -515,9 +513,7 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
 
 /**
  * Provide an image for Tesseract to recognize. Format is as
- * TesseractRect above. Does not copy the image buffer, or take
- * ownership. The source image may be destroyed after Recognize is called,
- * either explicitly or implicitly via one of the Get*Text functions.
+ * TesseractRect above. Copies the image buffer and converts to Pix.
  * SetImage clears all recognition results, and sets the rectangle to the
  * full image, so it may be followed immediately by a GetUTF8Text, and it
  * will automatically perform recognition.
@@ -525,9 +521,11 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
 void TessBaseAPI::SetImage(const unsigned char* imagedata,
                            int width, int height,
                            int bytes_per_pixel, int bytes_per_line) {
-  if (InternalSetImage())
+  if (InternalSetImage()) {
     thresholder_->SetImage(imagedata, width, height,
                            bytes_per_pixel, bytes_per_line);
+    SetInputImage(thresholder_->GetPixRect());
+  }
 }
 
 void TessBaseAPI::SetSourceResolution(int ppi) {
@@ -539,18 +537,17 @@ void TessBaseAPI::SetSourceResolution(int ppi) {
 
 /**
  * Provide an image for Tesseract to recognize. As with SetImage above,
- * Tesseract doesn't take a copy or ownership or pixDestroy the image, so
- * it must persist until after Recognize.
+ * Tesseract takes its own copy of the image, so it need not persist until
+ * after Recognize.
  * Pix vs raw, which to use?
- * Use Pix where possible. A future version of Tesseract may choose to use Pix
- * as its internal representation and discard IMAGE altogether.
- * Because of that, an implementation that sources and targets Pix may end up
- * with less copies than an implementation that does not.
+ * Use Pix where possible. Tesseract uses Pix as its internal representation
+ * and it is therefore more efficient to provide a Pix directly.
  */
 void TessBaseAPI::SetImage(Pix* pix) {
-  if (InternalSetImage())
+  if (InternalSetImage()) {
     thresholder_->SetImage(pix);
-  SetInputImage(pix);
+    SetInputImage(thresholder_->GetPixRect());
+  }
 }
 
 /**
@@ -693,8 +690,8 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
       if (pixa != NULL) {
         Pix* pix = NULL;
         if (raw_image) {
-          pix = page_it->GetImage(level, raw_padding, input_image_,
-                                  &left, &top);
+          pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
+                                  &top);
         } else {
           pix = page_it->GetBinaryImage(level);
         }
@@ -809,9 +806,7 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
  * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
-PageIterator* TessBaseAPI::AnalyseLayout() {
-  return AnalyseLayout(false);
-}
+PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); }
 
 PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
   if (FindLines() == 0) {
@@ -836,8 +831,7 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
     return -1;
   if (FindLines() != 0)
     return -1;
-  if (page_res_ != NULL)
-    delete page_res_;
+  delete page_res_;
   if (block_list_->empty()) {
     page_res_ = new PAGE_RES(false, block_list_,
                              &tesseract_->prev_word_best_choice_);
@@ -940,17 +934,10 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
   return 0;
 }
 
-void TessBaseAPI::SetInputImage(Pix *pix) {
-  if (input_image_)
-    pixDestroy(&input_image_);
-  input_image_ = NULL;
-  if (pix)
-    input_image_ = pixCopy(NULL, pix);
-}
+// Takes ownership of the input pix.
+void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); }
 
-Pix* TessBaseAPI::GetInputImage() {
-  return input_image_;
-}
+Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); }
 
 const char * TessBaseAPI::GetInputName() {
   if (input_file_)
@@ -994,8 +981,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
   }
 
   // Begin producing output
-  const char* kUnknownTitle = "";
-  if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
+  if (renderer && !renderer->BeginDocument(unknown_title_)) {
     return false;
   }
 
@@ -1042,6 +1028,7 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
   OpenclDevice od;
 #endif  // USE_OPENCL
   int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
+  size_t offset = 0;
   for (; ; ++page) {
     if (tessedit_page_number >= 0)
       page = tessedit_page_number;
@@ -1052,9 +1039,8 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
           od.pixReadTiffCl(filename, page);
     } else {
 #endif  // USE_OPENCL
-      pix = (data) ?
-          pixReadMemTiff(data, size, page) :
-          pixReadTiff(filename, page);
+    pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
+                 : pixReadFromMultipageTiff(filename, &offset);
 #ifdef USE_OPENCL
     }
 #endif  // USE_OPENCL
@@ -1068,6 +1054,7 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
     pixDestroy(&pix);
     if (!r) return false;
     if (tessedit_page_number >= 0) break;
+    if (!offset) break;
   }
   return true;
 #else
@@ -1107,7 +1094,6 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
                                        const char* retry_config,
                                        int timeout_millisec,
                                        TessResultRenderer* renderer) {
-#ifndef ANDROID_BUILD
   PERF_COUNT_START("ProcessPages")
   bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
   if (stdInput) {
@@ -1142,7 +1128,15 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
 
   // Maybe we have a filelist
   if (r != 0 || format == IFF_UNKNOWN) {
-    STRING s(buf.c_str());
+    STRING s;
+    if (stdInput) {
+      s = buf.c_str();
+    } else {
+      std::ifstream t(filename);
+      std::string u((std::istreambuf_iterator<char>(t)),
+                    std::istreambuf_iterator<char>());
+      s = u.c_str();
+    }
     return ProcessPagesFileList(NULL, &s, retry_config,
                                 timeout_millisec, renderer,
                                 tesseract_->tessedit_page_number);
@@ -1164,8 +1158,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
   }
 
   // Begin the output
-  const char* kUnknownTitle = "";
-  if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
+  if (renderer && !renderer->BeginDocument(unknown_title_)) {
     pixDestroy(&pix);
     return false;
   }
@@ -1187,9 +1180,6 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
   }
   PERF_COUNT_END
   return true;
-#else
-  return false;
-#endif
 }
 
 bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
@@ -1379,8 +1369,9 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it,
   hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
 }
 
-static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2) {
-  const unsigned long BUFSIZE = 64;
+static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
+                        int num2) {
+  const size_t BUFSIZE = 64;
   char id_buffer[BUFSIZE];
   if (num2 >= 0) {
     snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
@@ -1393,8 +1384,7 @@ static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int
   *hocr_str += "'";
 }
 
-static void AddBoxTohOCR(const ResultIterator *it,
-                         PageIteratorLevel level,
+static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
                          STRING* hocr_str) {
   int left, top, right, bottom;
   it->BoundingBox(level, &left, &top, &right, &bottom);
@@ -1410,7 +1400,7 @@ static void AddBoxTohOCR(const ResultIterator *it,
     // add custom height measures
     float row_height, descenders, ascenders;  // row attributes
     it->RowAttributes(&row_height, &descenders, &ascenders);
-    // TODO: Do we want to limit these to a single decimal place?
+    // TODO(rays): Do we want to limit these to a single decimal place?
     hocr_str->add_str_double("; x_size ", row_height);
     hocr_str->add_str_double("; x_descenders ", descenders * -1);
     hocr_str->add_str_double("; x_ascenders ", ascenders);
@@ -1418,9 +1408,8 @@ static void AddBoxTohOCR(const ResultIterator *it,
   *hocr_str += "\">";
 }
 
-static void AddBoxToTSV(const PageIterator *it,
-                         PageIteratorLevel level,
-                         STRING* hocr_str) {
+static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
+                        STRING* hocr_str) {
   int left, top, right, bottom;
   it->BoundingBox(level, &left, &top, &right, &bottom);
   hocr_str->add_str_int("\t", left);
@@ -1429,8 +1418,6 @@ static void AddBoxToTSV(const PageIterator *it,
   hocr_str->add_str_int("\t", bottom - top);
 }
 
-
-
 /**
  * Make a HTML-formatted string with hOCR markup from the internal
  * data structures.
@@ -1440,7 +1427,7 @@ static void AddBoxToTSV(const PageIterator *it,
  * STL removed from original patch submission and refactored by rays.
  */
 char* TessBaseAPI::GetHOCRText(int page_number) {
-  return GetHOCRText(NULL,page_number);
+  return GetHOCRText(NULL, page_number);
 }
 
 /**
@@ -1452,13 +1439,12 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
  * STL removed from original patch submission and refactored by rays.
  */
 char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
-  if (tesseract_ == NULL ||
-      (page_res_ == NULL && Recognize(monitor) < 0))
+  if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0))
     return NULL;
 
   int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
   int page_id = page_number + 1;  // hOCR uses 1-based page numbers.
-  bool para_is_ltr = true; // Default direction is LTR
+  bool para_is_ltr = true;        // Default direction is LTR
   const char* paragraph_lang = NULL;
   bool font_info = false;
   GetBoolVariable("hocr_font_info", &font_info);
@@ -1470,13 +1456,13 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
 
 #ifdef _WIN32
   // convert input name from ANSI encoding to utf-8
-  int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
-                                      NULL, 0);
+  int str16_len =
+      MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, 0);
   wchar_t *uni16_str = new WCHAR[str16_len];
   str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
                                   uni16_str, str16_len);
-  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL,
-                                     0, NULL, NULL);
+  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 0,
+                                     NULL, NULL);
   char *utf8_str = new char[utf8_len];
   WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
                       utf8_len, NULL, NULL);
@@ -1509,7 +1495,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
 
     // Open any new block/paragraph/textline.
     if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-      para_is_ltr = true; // reset to default direction
+      para_is_ltr = true;  // reset to default direction
       hocr_str += "   <div class='ocr_carea'";
       AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
       AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
@@ -1523,9 +1509,9 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
       AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
       paragraph_lang = res_it->WordRecognitionLanguage();
       if (paragraph_lang) {
-          hocr_str += " lang='";
-          hocr_str += paragraph_lang;
-          hocr_str += "'";
+        hocr_str += " lang='";
+        hocr_str += paragraph_lang;
+        hocr_str += "'";
       }
       AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
     }
@@ -1567,8 +1553,12 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
     }
     switch (res_it->WordDirection()) {
       // Only emit direction if different from current paragraph direction
-      case DIR_LEFT_TO_RIGHT: if (!para_is_ltr) hocr_str += " dir='ltr'"; break;
-      case DIR_RIGHT_TO_LEFT: if (para_is_ltr) hocr_str += " dir='rtl'"; break;
+      case DIR_LEFT_TO_RIGHT:
+        if (!para_is_ltr) hocr_str += " dir='ltr'";
+        break;
+      case DIR_RIGHT_TO_LEFT:
+        if (para_is_ltr) hocr_str += " dir='rtl'";
+        break;
       case DIR_MIX:
       case DIR_NEUTRAL:
       default:  // Do nothing.
@@ -1600,7 +1590,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
     if (last_word_in_para) {
       hocr_str += "\n    </p>\n";
       pcnt++;
-      para_is_ltr = true; // back to default direction
+      para_is_ltr = true;  // back to default direction
     }
     if (last_word_in_block) {
       hocr_str += "   </div>\n";
@@ -1620,8 +1610,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
  * page_number is 0-based but will appear in the output as 1-based.
  */
 char* TessBaseAPI::GetTSVText(int page_number) {
-  if (tesseract_ == NULL ||
-      (page_res_ == NULL && Recognize(NULL) < 0))
+  if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0))
     return NULL;
 
   int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
@@ -1629,9 +1618,10 @@ char* TessBaseAPI::GetTSVText(int page_number) {
 
   STRING tsv_str("");
 
-  int page_num = page_id, block_num = 0, par_num = 0, line_num = 0, word_num = 0;
+  int page_num = page_id, block_num = 0, par_num = 0, line_num = 0,
+      word_num = 0;
 
-  tsv_str.add_str_int("1\t", page_num); // level 1 - page
+  tsv_str.add_str_int("1\t", page_num);  // level 1 - page
   tsv_str.add_str_int("\t", block_num);
   tsv_str.add_str_int("\t", par_num);
   tsv_str.add_str_int("\t", line_num);
@@ -1642,7 +1632,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
   tsv_str.add_str_int("\t", rect_height_);
   tsv_str += "\t-1\t\n";
 
-  ResultIterator *res_it = GetIterator();
+  ResultIterator* res_it = GetIterator();
   while (!res_it->Empty(RIL_BLOCK)) {
     if (res_it->Empty(RIL_WORD)) {
       res_it->Next(RIL_WORD);
@@ -1652,46 +1642,46 @@ char* TessBaseAPI::GetTSVText(int page_number) {
     // Add rows for any new block/paragraph/textline.
     if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
       block_num++, par_num = 0, line_num = 0, word_num = 0;
-      tsv_str.add_str_int("2\t", page_num); // level 2 - block
+      tsv_str.add_str_int("2\t", page_num);  // level 2 - block
       tsv_str.add_str_int("\t", block_num);
       tsv_str.add_str_int("\t", par_num);
       tsv_str.add_str_int("\t", line_num);
       tsv_str.add_str_int("\t", word_num);
       AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
-      tsv_str += "\t-1\t\n"; // end of row for block
+      tsv_str += "\t-1\t\n";  // end of row for block
     }
     if (res_it->IsAtBeginningOf(RIL_PARA)) {
       par_num++, line_num = 0, word_num = 0;
-      tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
+      tsv_str.add_str_int("3\t", page_num);  // level 3 - paragraph
       tsv_str.add_str_int("\t", block_num);
       tsv_str.add_str_int("\t", par_num);
       tsv_str.add_str_int("\t", line_num);
       tsv_str.add_str_int("\t", word_num);
       AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
-      tsv_str += "\t-1\t\n"; // end of row for para
+      tsv_str += "\t-1\t\n";  // end of row for para
     }
     if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
       line_num++, word_num = 0;
-      tsv_str.add_str_int("4\t", page_num); // level 4 - line
+      tsv_str.add_str_int("4\t", page_num);  // level 4 - line
       tsv_str.add_str_int("\t", block_num);
       tsv_str.add_str_int("\t", par_num);
       tsv_str.add_str_int("\t", line_num);
       tsv_str.add_str_int("\t", word_num);
       AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
-      tsv_str += "\t-1\t\n"; // end of row for line
+      tsv_str += "\t-1\t\n";  // end of row for line
     }
 
     // Now, process the word...
     int left, top, right, bottom;
     bool bold, italic, underlined, monospace, serif, smallcaps;
     int pointsize, font_id;
-    const char *font_name;
+    const char* font_name;
     res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-    font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
-                                           &monospace, &serif, &smallcaps,
-                                           &pointsize, &font_id);
+    font_name =
+        res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
+                                   &serif, &smallcaps, &pointsize, &font_id);
     word_num++;
-    tsv_str.add_str_int("5\t", page_num); // level 5 - word
+    tsv_str.add_str_int("5\t", page_num);  // level 5 - word
     tsv_str.add_str_int("\t", block_num);
     tsv_str.add_str_int("\t", par_num);
     tsv_str.add_str_int("\t", line_num);
@@ -1712,11 +1702,11 @@ char* TessBaseAPI::GetTSVText(int page_number) {
       tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
       res_it->Next(RIL_SYMBOL);
     } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
-    tsv_str += "\n"; // end of row
+    tsv_str += "\n";  // end of row
     wcnt++;
   }
 
-  char *ret = new char[tsv_str.length() + 1];
+  char* ret = new char[tsv_str.length() + 1];
   strcpy(ret, tsv_str.string());
   delete res_it;
   return ret;
@@ -1760,7 +1750,7 @@ char* TessBaseAPI::GetBoxText(int page_number) {
   int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
       kMaxBytesPerLine;
   char* result = new char[total_length];
-  strcpy(result, "\0");
+  result[0] = '\0';
   int output_length = 0;
   LTRResultIterator* it = GetLTRIterator();
   do {
@@ -1907,43 +1897,70 @@ char* TessBaseAPI::GetUNLVText() {
   return result;
 }
 
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-char* TessBaseAPI::GetOsdText(int page_number) {
+/**
+ * Detect the orientation of the input image and apparent script (alphabet).
+ * orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270)
+ * orient_conf is the confidence (15.0 is reasonably confident)
+ * script_name is an ASCII string, the name of the script, e.g. "Latin"
+ * script_conf is confidence level in the script
+ * Returns true on success and writes values to each parameter as an output
+ */
+bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) {
   OSResults osr;
 
   bool osd = DetectOS(&osr);
   if (!osd) {
-     return NULL;
+    return false;
   }
 
   int orient_id = osr.best_result.orientation_id;
   int script_id = osr.get_best_script(orient_id);
-  float orient_conf = osr.best_result.oconfidence;
-  float script_conf = osr.best_result.sconfidence;
-  const char* script_name =
+  if (orient_conf)
+    *orient_conf = osr.best_result.oconfidence;
+  if (orient_deg)
+    *orient_deg = orient_id * 90; // convert quadrant to degrees
+
+  if (script_name) {
+    const char* script =
       osr.unicharset->get_script_from_script_id(script_id);
 
-  // clockwise orientation of the input image, in degrees
-  int orient_deg = orient_id * 90;
+    *script_name = script;
+  }
+
+  if (script_conf)
+    *script_conf = osr.best_result.sconfidence;
+  
+  return true;
+}
+
+/**
+ * The recognized text is returned as a char* which is coded
+ * as UTF8 and must be freed with the delete [] operator.
+ * page_number is a 0-based page index that will appear in the osd file.
+ */
+char* TessBaseAPI::GetOsdText(int page_number) {
+  int orient_deg;
+  float orient_conf;
+  const char* script_name;
+  float script_conf;
+
+  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf))
+    return NULL;
 
   // clockwise rotation needed to make the page upright
-  int rotate =  OrientationIdToValue(orient_id);
-
-  char* osd_buf = new char[255];
-  snprintf(osd_buf, 255,
-          "Page number: %d\n"
-          "Orientation in degrees: %d\n"
-          "Rotate: %d\n"
-          "Orientation confidence: %.2f\n"
-          "Script: %s\n"
-          "Script confidence: %.2f\n",
-          page_number,
-          orient_deg, rotate, orient_conf,
-          script_name, script_conf);
+  int rotate = OrientationIdToValue(orient_deg / 90);
+
+  const int kOsdBufsize = 255;
+  char* osd_buf = new char[kOsdBufsize];
+  snprintf(osd_buf, kOsdBufsize,
+           "Page number: %d\n"
+           "Orientation in degrees: %d\n"
+           "Rotate: %d\n"
+           "Orientation confidence: %.2f\n"
+           "Script: %s\n"
+           "Script confidence: %.2f\n",
+           page_number, orient_deg, rotate, orient_conf, script_name,
+           script_conf);
 
   return osd_buf;
 }
@@ -2063,7 +2080,7 @@ void TessBaseAPI::Clear() {
   if (thresholder_ != NULL)
     thresholder_->Clear();
   ClearResults();
-  SetInputImage(NULL);
+  if (tesseract_ != NULL) SetInputImage(NULL);
 }
 
 /**
@@ -2109,10 +2126,6 @@ void TessBaseAPI::End() {
     delete input_file_;
     input_file_ = NULL;
   }
-  if (input_image_ != NULL) {
-    pixDestroy(&input_image_);
-    input_image_ = NULL;
-  }
   if (output_file_ != NULL) {
     delete output_file_;
     output_file_ = NULL;
@@ -2243,6 +2256,8 @@ void TessBaseAPI::Threshold(Pix** pix) {
   if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
     // Use the minimum default resolution, as it is safer to under-estimate
     // than over-estimate resolution.
+    tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
+            y_res, kMinCredibleResolution);
     thresholder_->SetSourceYResolution(kMinCredibleResolution);
   }
   PageSegMode pageseg_mode =
@@ -2767,7 +2782,7 @@ void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
   INT_FX_RESULT_STRUCT fx_info;
   tesseract_->ExtractFeatures(*blob, false, &bl_features,
                               &cn_features, &fx_info, &outline_counts);
-  if (cn_features.size() == 0 || cn_features.size() > MAX_NUM_INT_FEATURES) {
+  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
     *num_features = 0;
     return;  // Feature extraction failed.
   }
diff --git a/api/baseapi.h b/api/baseapi.h
index 3b0d3f67ce..ab51ef20d6 100644
--- a/api/baseapi.h
+++ b/api/baseapi.h
@@ -20,7 +20,7 @@
 #ifndef TESSERACT_API_BASEAPI_H__
 #define TESSERACT_API_BASEAPI_H__
 
-#define TESSERACT_VERSION_STR "3.05.00dev"
+#define TESSERACT_VERSION_STR "3.05.00"
 #define TESSERACT_VERSION 0x030500
 #define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
                                             (patch))
@@ -142,6 +142,7 @@ class TESS_API TessBaseAPI {
    * is stored in the PDF so we need that as well.
    */
   const char* GetInputName();
+  // Takes ownership of the input pix.
   void SetInputImage(Pix *pix);
   Pix* GetInputImage();
   int GetSourceYResolution();
@@ -333,9 +334,7 @@ class TESS_API TessBaseAPI {
 
   /**
    * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Does not copy the image buffer, or take
-   * ownership. The source image may be destroyed after Recognize is called,
-   * either explicitly or implicitly via one of the Get*Text functions.
+   * TesseractRect above. Copies the image buffer and converts to Pix.
    * SetImage clears all recognition results, and sets the rectangle to the
    * full image, so it may be followed immediately by a GetUTF8Text, and it
    * will automatically perform recognition.
@@ -345,13 +344,11 @@ class TESS_API TessBaseAPI {
 
   /**
    * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract doesn't take a copy or ownership or pixDestroy the image, so
-   * it must persist until after Recognize.
+   * Tesseract takes its own copy of the image, so it need not persist until
+   * after Recognize.
    * Pix vs raw, which to use?
-   * Use Pix where possible. A future version of Tesseract may choose to use Pix
-   * as its internal representation and discard IMAGE altogether.
-   * Because of that, an implementation that sources and targets Pix may end up
-   * with less copies than an implementation that does not.
+   * Use Pix where possible. Tesseract uses Pix as its internal representation
+   * and it is therefore more efficient to provide a Pix directly.
    */
   void SetImage(Pix* pix);
 
@@ -376,8 +373,7 @@ class TESS_API TessBaseAPI {
    * delete it when it it is replaced or the API is destructed.
    */
   void SetThresholder(ImageThresholder* thresholder) {
-    if (thresholder_ != NULL)
-      delete thresholder_;
+    delete thresholder_;
     thresholder_ = thresholder;
     ClearResults();
   }
@@ -588,8 +584,8 @@ class TESS_API TessBaseAPI {
    * data structures.
    * page_number is 0-based but will appear in the output as 1-based.
    * monitor can be used to
-   * 	cancel the recognition
-   * 	receive progress callbacks
+   *  cancel the recognition
+   *  receive progress callbacks
    */
   char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
 
@@ -622,6 +618,16 @@ class TESS_API TessBaseAPI {
    */
   char* GetUNLVText();
 
+  /**
+   * Detect the orientation of the input image and apparent script (alphabet).
+   * orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270)
+   * orient_conf is the confidence (15.0 is reasonably confident)
+   * script_name is an ASCII string, the name of the script, e.g. "Latin"
+   * script_conf is confidence level in the script
+   * Returns true on success and writes values to each parameter as an output
+   */
+  bool DetectOrientationScript(int* orient_deg, float* orient_conf, const char** script_name, float* script_conf);
+
   /**
    * The recognized text is returned as a char* which is coded
    * as UTF8 and must be freed with the delete [] operator.
@@ -750,13 +756,9 @@ class TESS_API TessBaseAPI {
    */
   static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
 
-  Tesseract* tesseract() const {
-    return tesseract_;
-  }
+  Tesseract* tesseract() const { return tesseract_; }
 
-  OcrEngineMode oem() const {
-    return last_oem_requested_;
-  }
+  OcrEngineMode oem() const { return last_oem_requested_; }
 
   void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
 
@@ -855,9 +857,7 @@ class TESS_API TessBaseAPI {
                                     int** y1,
                                     PAGE_RES* page_res);
 
-  TESS_LOCAL const PAGE_RES* GetPageRes() const {
-    return page_res_;
-  };
+  TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
   /* @} */
 
 
@@ -870,7 +870,6 @@ class TESS_API TessBaseAPI {
   BLOCK_LIST*       block_list_;      ///< The page layout.
   PAGE_RES*         page_res_;        ///< The page-level data.
   STRING*           input_file_;      ///< Name used by training code.
-  Pix*              input_image_;     ///< Image used for searchable PDF
   STRING*           output_file_;     ///< Name used by debug code.
   STRING*           datapath_;        ///< Current location of tessdata.
   STRING*           language_;        ///< Last initialized language.
@@ -898,7 +897,7 @@ class TESS_API TessBaseAPI {
                             const char* retry_config, int timeout_millisec,
                             TessResultRenderer* renderer,
                             int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration
+  // TIFF supports multipage so gets special consideration.
   bool ProcessPagesMultipageTiff(const unsigned char *data,
                                  size_t size,
                                  const char* filename,
@@ -906,6 +905,12 @@ class TESS_API TessBaseAPI {
                                  int timeout_millisec,
                                  TessResultRenderer* renderer,
                                  int tessedit_page_number);
+  // There's currently no way to pass a document title from the
+  // Tesseract command line, and we have multiple places that choose
+  // to set the title to an empty string. Using a single named
+  // variable will hopefully reduce confusion if the situation changes
+  // in the future.
+  const char *unknown_title_;
 };  // class TessBaseAPI.
 
 /** Escape a char string - remove &<>"' with HTML codes. */
diff --git a/api/capi.cpp b/api/capi.cpp
index 849d296104..57bed872df 100644
--- a/api/capi.cpp
+++ b/api/capi.cpp
@@ -538,9 +538,18 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand
 
 TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results)
 {
-    return handle->DetectOS(results) ? TRUE : FALSE;
+    return FALSE; // Unsafe ABI, return FALSE always
 }
 
+TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
+                                                            int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
+{
+    bool success;
+    success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
+    return (BOOL)success;
+}
+
+
 TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
                                                             int* num_features, int* FeatureOutlineIndex)
 {
diff --git a/api/capi.h b/api/capi.h
index a0c54a20e4..93d43f9483 100644
--- a/api/capi.h
+++ b/api/capi.h
@@ -285,8 +285,14 @@ TESS_API void  TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
 TESS_API void  TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f);
 
 TESS_API void  TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f);
+
+// Deprecated, no longer working
 TESS_API BOOL  TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results);
 
+// Call TessDeleteText(*best_script_name) to free memory allocated by this function
+TESS_API BOOL  TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
+                                                            int* orient_deg, float* orient_conf, const char **script_name, float* script_conf);
+
 TESS_API void  TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
                                                        int* num_features, int* FeatureOutlineIndex);
 
diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp
index 4708300492..001c86ce41 100644
--- a/api/pdfrenderer.cpp
+++ b/api/pdfrenderer.cpp
@@ -20,12 +20,12 @@
 #include "config_auto.h"
 #endif
 
+#include "allheaders.h"
 #include "baseapi.h"
-#include "renderer.h"
 #include "math.h"
+#include "renderer.h"
 #include "strngs.h"
 #include "tprintf.h"
-#include "allheaders.h"
 
 #ifdef _MSC_VER
 #include "mathfix.h"
@@ -282,7 +282,7 @@ void AffineMatrix(int writing_direction,
   }
 }
 
-// There are some really stupid PDF viewers in the wild, such as
+// There are some really awkward PDF viewers in the wild, such as
 // 'Preview' which ships with the Mac. They do a better job with text
 // selection and highlighting when given perfectly flat baseline
 // instead of very slightly tilted. We clip small tilts to appease
@@ -441,8 +441,8 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
           int code = unicodes[i];
           // Convert to UTF-16BE https://en.wikipedia.org/wiki/UTF-16
           if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) {
-                tprintf("Dropping invalid codepoint %d\n", code);
-                continue;
+            tprintf("Dropping invalid codepoint %d\n", code);
+            continue;
           }
           if (code < 0x10000) {
             snprintf(utf16, sizeof(utf16), "<%04X>", code);
@@ -567,7 +567,8 @@ bool TessPDFRenderer::BeginDocumentHandler() {
                "<<\n"
                "  /Length %lu /Filter /FlateDecode\n"
                ">>\n"
-               "stream\n", (unsigned long)len);
+               "stream\n",
+               (unsigned long)len);
   if (n >= sizeof(buf)) {
     lept_free(comp);
     return false;
@@ -619,7 +620,6 @@ bool TessPDFRenderer::BeginDocumentHandler() {
   AppendPDFObject(buf);
 
   // FONT DESCRIPTOR
-  const int kCharHeight = 2;  // Effect: highlights are half height
   n = snprintf(buf, sizeof(buf),
                "7 0 obj\n"
                "<<\n"
@@ -635,10 +635,10 @@ bool TessPDFRenderer::BeginDocumentHandler() {
                "  /Type /FontDescriptor\n"
                ">>\n"
                "endobj\n",
-               1000 / kCharHeight,
-               1000 / kCharHeight,
+               1000,
+               1000,
                1000 / kCharWidth,
-               1000 / kCharHeight,
+               1000,
                8L      // Font data
                );
   if (n >= sizeof(buf)) return false;
@@ -819,10 +819,6 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
   *pdf_object_size =
       b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len;
   *pdf_object = new char[*pdf_object_size];
-  if (!pdf_object) {
-    l_CIDataDestroy(&cid);
-    return false;
-  }
 
   char *p = *pdf_object;
   memcpy(p, b1, b1_len);
diff --git a/api/renderer.cpp b/api/renderer.cpp
index 4a88a24608..e683149381 100644
--- a/api/renderer.cpp
+++ b/api/renderer.cpp
@@ -155,11 +155,11 @@ TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
 
 bool TessHOcrRenderer::BeginDocumentHandler() {
   AppendString(
-        "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
-        "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
-        "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
-        "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
-        "lang=\"en\">\n <head>\n  <title>");
+      "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+      "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
+      "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
+      "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
+      "lang=\"en\">\n <head>\n  <title>");
   AppendString(title());
   AppendString(
       "</title>\n"
@@ -198,25 +198,25 @@ bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
 /**********************************************************************
  * TSV Text Renderer interface implementation
  **********************************************************************/
-TessTsvRenderer::TessTsvRenderer(const char *outputbase)
+TessTsvRenderer::TessTsvRenderer(const char* outputbase)
     : TessResultRenderer(outputbase, "tsv") {
-    font_info_ = false;
+  font_info_ = false;
 }
 
-TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info)
+TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
     : TessResultRenderer(outputbase, "tsv") {
-    font_info_ = font_info;
+  font_info_ = font_info;
 }
 
 bool TessTsvRenderer::BeginDocumentHandler() {
   // Output TSV column headings
-  AppendString("level\tpage_num\tblock_num\tpar_num\tline_num\tword_num\tleft\ttop\twidth\theight\tconf\ttext\n");
+  AppendString(
+      "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
+      "num\tleft\ttop\twidth\theight\tconf\ttext\n");
   return true;
 }
 
-bool TessTsvRenderer::EndDocumentHandler() {
-  return true;
-}
+bool TessTsvRenderer::EndDocumentHandler() { return true; }
 
 bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
   char* tsv = api->GetTSVText(imagenum());
@@ -266,8 +266,7 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
  * Osd Text Renderer interface implementation
  **********************************************************************/
 TessOsdRenderer::TessOsdRenderer(const char* outputbase)
-    : TessResultRenderer(outputbase, "osd") {
-}
+    : TessResultRenderer(outputbase, "osd") {}
 
 bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
   char* osd = api->GetOsdText(imagenum());
diff --git a/api/renderer.h b/api/renderer.h
index 6b47813f7b..d868f267fa 100644
--- a/api/renderer.h
+++ b/api/renderer.h
@@ -77,7 +77,7 @@ class TESS_API TessResultRenderer {
     bool EndDocument();
 
     const char* file_extension() const { return file_extension_; }
-    const char* title() const { return title_; }
+    const char* title() const { return title_.c_str(); }
 
     /**
      * Returns the index of the last image given to AddImage
@@ -126,7 +126,7 @@ class TESS_API TessResultRenderer {
 
   private:
     const char* file_extension_;  // standard extension for generated output
-    const char* title_;           // title of document being renderered
+    STRING title_;                // title of document being renderered
     int imagenum_;                // index of last image added
 
     FILE* fout_;                  // output file pointer
@@ -153,13 +153,13 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer {
   explicit TessHOcrRenderer(const char *outputbase, bool font_info);
   explicit TessHOcrRenderer(const char *outputbase);
 
-protected:
+ protected:
   virtual bool BeginDocumentHandler();
   virtual bool AddImageHandler(TessBaseAPI* api);
   virtual bool EndDocumentHandler();
 
-private:
-  bool font_info_;              // whether to print font information
+ private:
+  bool font_info_;  // whether to print font information
 };
 
 /**
@@ -167,15 +167,15 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer {
  */
 class TESS_API TessTsvRenderer : public TessResultRenderer {
  public:
-  explicit TessTsvRenderer(const char *outputbase, bool font_info);
-  explicit TessTsvRenderer(const char *outputbase);
+  explicit TessTsvRenderer(const char* outputbase, bool font_info);
+  explicit TessTsvRenderer(const char* outputbase);
 
-protected:
+ protected:
   virtual bool BeginDocumentHandler();
   virtual bool AddImageHandler(TessBaseAPI* api);
   virtual bool EndDocumentHandler();
 
-private:
+ private:
   bool font_info_;              // whether to print font information
 };
 
@@ -188,12 +188,12 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
   // we load a custom PDF font from this location.
   TessPDFRenderer(const char *outputbase, const char *datadir);
 
-protected:
+ protected:
   virtual bool BeginDocumentHandler();
   virtual bool AddImageHandler(TessBaseAPI* api);
   virtual bool EndDocumentHandler();
 
-private:
+ private:
   // We don't want to have every image in memory at once,
   // so we store some metadata as we go along producing
   // PDFs one page at a time. At the end that metadata is
diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp
index 3fe8dc8c53..7cd4021fbe 100644
--- a/api/tesseractmain.cpp
+++ b/api/tesseractmain.cpp
@@ -36,32 +36,31 @@
 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
 
 #include <tiffio.h>
-#include <windows.h>
 
 static void Win32WarningHandler(const char* module, const char* fmt,
                                 va_list ap) {
-    if (module != NULL) {
-        fprintf(stderr, "%s: ", module);
-    }
-    fprintf(stderr, "Warning, ");
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, ".\n");
+  if (module != NULL) {
+    fprintf(stderr, "%s: ", module);
+  }
+  fprintf(stderr, "Warning, ");
+  vfprintf(stderr, fmt, ap);
+  fprintf(stderr, ".\n");
 }
 
 #endif /* HAVE_TIFFIO_H &&  _WIN32 */
 
 void PrintVersionInfo() {
-    char *versionStrP;
+  char* versionStrP;
 
-    printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
+  printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
 
-    versionStrP = getLeptonicaVersion();
-    printf(" %s\n", versionStrP);
-    lept_free(versionStrP);
+  versionStrP = getLeptonicaVersion();
+  printf(" %s\n", versionStrP);
+  lept_free(versionStrP);
 
-    versionStrP = getImagelibVersions();
-    printf("  %s\n", versionStrP);
-    lept_free(versionStrP);
+  versionStrP = getImagelibVersions();
+  printf("  %s\n", versionStrP);
+  lept_free(versionStrP);
 
 #ifdef USE_OPENCL
     cl_platform_id platform;
@@ -82,7 +81,7 @@ void PrintVersionInfo() {
     printf("  Found %d devices.\n", num_devices);
     for (i = 0; i < num_devices; ++i) {
       clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0);
-      printf("    Device %d name: %s.\n", i+1, info);
+      printf("    Device %d name: %s.\n", i + 1, info);
     }
 #endif
 }
@@ -90,7 +89,7 @@ void PrintVersionInfo() {
 void PrintUsage(const char* program) {
   printf(
       "Usage:\n"
-      "  %s --help | --help-psm | --version\n"
+      "  %s --help | --help-psm | --help-oem | --version\n"
       "  %s --list-langs [--tessdata-dir PATH]\n"
       "  %s --print-parameters [options...] [configfile...]\n"
       "  %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
@@ -100,27 +99,33 @@ void PrintUsage(const char* program) {
 void PrintHelpForPSM() {
   const char* msg =
       "Page segmentation modes:\n"
-        "  0    Orientation and script detection (OSD) only.\n"
-        "  1    Automatic page segmentation with OSD.\n"
-        "  2    Automatic page segmentation, but no OSD, or OCR.\n"
-        "  3    Fully automatic page segmentation, but no OSD. (Default)\n"
-        "  4    Assume a single column of text of variable sizes.\n"
-        "  5    Assume a single uniform block of vertically aligned text.\n"
-        "  6    Assume a single uniform block of text.\n"
-        "  7    Treat the image as a single text line.\n"
-        "  8    Treat the image as a single word.\n"
-        "  9    Treat the image as a single word in a circle.\n"
-        " 10    Treat the image as a single character.\n"
-
-        //TODO: Consider publishing these modes.
-        #if 0
-        " 11    Sparse text. Find as much text as possible in no"
-          " particular order.\n"
-        " 12    Sparse text with OSD.\n"
-        " 13    Raw line. Treat the image as a single text line,\n"
-          "\t\t\tbypassing hacks that are Tesseract-specific.\n"
-        #endif
-        ;
+      "  0    Orientation and script detection (OSD) only.\n"
+      "  1    Automatic page segmentation with OSD.\n"
+      "  2    Automatic page segmentation, but no OSD, or OCR.\n"
+      "  3    Fully automatic page segmentation, but no OSD. (Default)\n"
+      "  4    Assume a single column of text of variable sizes.\n"
+      "  5    Assume a single uniform block of vertically aligned text.\n"
+      "  6    Assume a single uniform block of text.\n"
+      "  7    Treat the image as a single text line.\n"
+      "  8    Treat the image as a single word.\n"
+      "  9    Treat the image as a single word in a circle.\n"
+      " 10    Treat the image as a single character.\n"
+      " 11    Sparse text. Find as much text as possible in no"
+      " particular order.\n"
+      " 12    Sparse text with OSD.\n"
+      " 13    Raw line. Treat the image as a single text line,\n"
+      "\t\t\tbypassing hacks that are Tesseract-specific.\n";
+
+  printf("%s", msg);
+}
+
+void PrintHelpForOEM() {
+  const char* msg =
+      "OCR Engine modes:\n"
+      "  0    Original Tesseract only.\n"
+      "  1    Cube only.\n"
+      "  2    Tesseract + cube.\n"
+      "  3    Default, based on what is available.\n";
 
   printf("%s", msg);
 }
@@ -136,32 +141,34 @@ void PrintHelpMessage(const char* program) {
       "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
       "  -c VAR=VALUE          Set value for config variables.\n"
       "                        Multiple -c arguments are allowed.\n"
-      "  -psm NUM              Specify page segmentation mode.\n"
-      "NOTE: These options must occur before any configfile.\n"
-     ;
+      "  --psm NUM             Specify page segmentation mode.\n"
+      "  --oem NUM             Specify OCR Engine mode.\n"
+      "NOTE: These options must occur before any configfile.\n";
 
   printf("\n%s\n", ocr_options);
   PrintHelpForPSM();
+  PrintHelpForOEM();
 
-  const char *single_options =
+  const char* single_options =
       "Single options:\n"
       "  -h, --help            Show this help message.\n"
       "  --help-psm            Show page segmentation modes.\n"
+      "  --help-oem            Show OCR Engine modes.\n"
       "  -v, --version         Show version information.\n"
       "  --list-langs          List available languages for tesseract engine.\n"
-      "  --print-parameters    Print tesseract parameters to stdout.\n"
-      ;
+      "  --print-parameters    Print tesseract parameters to stdout.\n";
 
   printf("\n%s", single_options);
 }
 
-void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) {
+void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
+                            char** argv) {
   char opt1[256], opt2[255];
   for (int i = 0; i < argc; i++) {
     if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
       strncpy(opt1, argv[i + 1], 255);
       opt1[255] = '\0';
-      char *p = strchr(opt1, '=');
+      char* p = strchr(opt1, '=');
       if (!p) {
         fprintf(stderr, "Missing = in configvar assignment\n");
         exit(1);
@@ -190,8 +197,8 @@ void PrintLangsList(tesseract::TessBaseAPI* api) {
 }
 
 void PrintBanner() {
-    tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
-           tesseract::TessBaseAPI::Version());
+  tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
+          tesseract::TessBaseAPI::Version());
 }
 
 /**
@@ -209,31 +216,26 @@ void PrintBanner() {
  * but that doesn't work.
  */
 void FixPageSegMode(tesseract::TessBaseAPI* api,
-              tesseract::PageSegMode pagesegmode) {
+                    tesseract::PageSegMode pagesegmode) {
   if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
-     api->SetPageSegMode(pagesegmode);
+    api->SetPageSegMode(pagesegmode);
 }
 
 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
-void ParseArgs(const int argc, char** argv,
-                  const char** lang,
-                  const char** image,
-                  const char** outputbase,
-                  const char** datapath,
-                  bool* list_langs,
-                  bool* print_parameters,
-                  GenericVector<STRING>* vars_vec,
-                  GenericVector<STRING>* vars_values,
-                  int* arg_i,
-                  tesseract::PageSegMode* pagesegmode) {
+void ParseArgs(const int argc, char** argv, const char** lang,
+               const char** image, const char** outputbase,
+               const char** datapath, bool* list_langs, bool* print_parameters,
+               GenericVector<STRING>* vars_vec,
+               GenericVector<STRING>* vars_values, int* arg_i,
+               tesseract::PageSegMode* pagesegmode,
+               tesseract::OcrEngineMode* enginemode) {
   if (argc == 1) {
     PrintHelpMessage(argv[0]);
     exit(0);
   }
 
   if (argc == 2) {
-    if ((strcmp(argv[1], "-h") == 0) ||
-         (strcmp(argv[1], "--help") == 0)) {
+    if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
       PrintHelpMessage(argv[0]);
       exit(0);
     }
@@ -241,8 +243,11 @@ void ParseArgs(const int argc, char** argv,
       PrintHelpForPSM();
       exit(0);
     }
-    if ((strcmp(argv[1], "-v") == 0) ||
-         (strcmp(argv[1], "--version") == 0)) {
+    if ((strcmp(argv[1], "--help-oem") == 0)) {
+      PrintHelpForOEM();
+      exit(0);
+    }
+    if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) {
       PrintVersionInfo();
       exit(0);
     }
@@ -269,8 +274,16 @@ void ParseArgs(const int argc, char** argv,
       noocr = true;
       *list_langs = true;
     } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
+      // The parameter -psm is deprecated and was replaced by --psm.
+      // It is still supported for compatibility reasons.
+      *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
+      ++i;
+    } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
       *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
       ++i;
+    } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
+      *enginemode = static_cast<tesseract::OcrEngineMode>(atoi(argv[i + 1]));
+      ++i;
     } else if (strcmp(argv[i], "--print-parameters") == 0) {
       noocr = true;
       *print_parameters = true;
@@ -298,10 +311,10 @@ void ParseArgs(const int argc, char** argv,
   }
 }
 
-void PreloadRenderers(tesseract::TessBaseAPI* api,
-          tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
-          tesseract::PageSegMode pagesegmode,
-          const char* outputbase) {
+void PreloadRenderers(
+    tesseract::TessBaseAPI* api,
+    tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
+    tesseract::PageSegMode pagesegmode, const char* outputbase) {
   if (pagesegmode == tesseract::PSM_OSD_ONLY) {
     renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
   } else {
@@ -311,7 +324,7 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
       bool font_info;
       api->GetBoolVariable("hocr_font_info", &font_info);
       renderers->push_back(
-                     new tesseract::TessHOcrRenderer(outputbase, font_info));
+          new tesseract::TessHOcrRenderer(outputbase, font_info));
     }
 
     api->GetBoolVariable("tessedit_create_tsv", &b);
@@ -324,8 +337,8 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
 
     api->GetBoolVariable("tessedit_create_pdf", &b);
     if (b) {
-      renderers->push_back(new tesseract::TessPDFRenderer(outputbase,
-                                                        api->GetDatapath()));
+      renderers->push_back(
+          new tesseract::TessPDFRenderer(outputbase, api->GetDatapath()));
     }
 
     api->GetBoolVariable("tessedit_write_unlv", &b);
@@ -359,8 +372,7 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
  *
  **********************************************************************/
 
-
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
   const char* lang = "eng";
   const char* image = NULL;
   const char* outputbase = NULL;
@@ -369,21 +381,26 @@ int main(int argc, char **argv) {
   bool print_parameters = false;
   int arg_i = 1;
   tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
+  tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
   /* main() calls functions like ParseArgs which call exit().
    * This results in memory leaks if vars_vec and vars_values are
    * declared as auto variables (destructor is not called then). */
   static GenericVector<STRING> vars_vec;
   static GenericVector<STRING> vars_values;
 
+#if !defined(DEBUG)
+  // Disable debugging and informational messages from Leptonica.
+  setMsgSeverity(L_SEVERITY_WARNING);
+#endif
+
 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
   /* Show libtiff warnings on console (not in GUI). */
   TIFFSetWarningHandler(Win32WarningHandler);
 #endif /* HAVE_TIFFIO_H &&  _WIN32 */
 
-  ParseArgs(argc, argv,
-          &lang, &image, &outputbase, &datapath,
-          &list_langs, &print_parameters,
-          &vars_vec, &vars_values, &arg_i, &pagesegmode);
+  ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
+            &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
+            &enginemode);
 
   bool banner = false;
   if (outputbase != NULL && strcmp(outputbase, "-") &&
@@ -396,8 +413,8 @@ int main(int argc, char **argv) {
 
   api.SetOutputName(outputbase);
 
-  int init_failed = api.Init(datapath, lang, tesseract::OEM_DEFAULT,
-                &(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values, false);
+  int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
+                             argc - arg_i, &vars_vec, &vars_values, false);
   if (init_failed) {
     fprintf(stderr, "Could not initialize tesseract.\n");
     exit(1);
@@ -406,8 +423,8 @@ int main(int argc, char **argv) {
   SetVariablesFromCLArgs(&api, argc, argv);
 
   if (list_langs) {
-     PrintLangsList(&api);
-     exit(0);
+    PrintLangsList(&api);
+    exit(0);
   }
 
   if (print_parameters) {
@@ -436,12 +453,13 @@ int main(int argc, char **argv) {
     tesseract::TextlineOrder order;
     float deskew_angle;
 
-    tesseract::PageIterator* it =  api.AnalyseLayout();
+    tesseract::PageIterator* it = api.AnalyseLayout();
     if (it) {
       it->Orientation(&orientation, &direction, &order, &deskew_angle);
-      tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
-             "Deskew angle: %.4f\n",
-              orientation, direction, order, deskew_angle);
+      tprintf(
+          "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
+          "Deskew angle: %.4f\n",
+          orientation, direction, order, deskew_angle);
     } else {
       ret_val = 1;
     }
@@ -456,14 +474,12 @@ int main(int argc, char **argv) {
   // ambigs.train, box.train, box.train.stderr, linebox, rebox
   bool b = false;
   bool in_training_mode =
-        (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
-        (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
-        (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
+      (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
+      (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
+      (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
 
   tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
 
-
-
   if (in_training_mode) {
     renderers.push_back(NULL);
   } else {
diff --git a/appveyor.yml b/appveyor.yml
index 020331c461..c6aeee9221 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,25 +1,45 @@
-os: Visual Studio 2015
+os: Visual Studio 2017
 
 platform:
   - Win32
   - Win64
 
 configuration:
-  - Release
+  - Release  
+  
+ # for curl
+ install:
+   - set PATH=C:\Program Files\Git\mingw64\bin;%PATH%
 
 before_build:
-  - if %platform%==Win32 set generator=Visual Studio 14
-  - if %platform%==Win64 set generator=Visual Studio 14 Win64
+  - if %platform%==Win32 set generator=Visual Studio 15 2017
+  - if %platform%==Win64 set generator=Visual Studio 15 2017 Win64
   - if %platform%==Win32 set vcplatform=Win32
   - if %platform%==Win64 set vcplatform=x64
-  
-  - curl -fsS -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip
+
+  - curl -fsS -L -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip
   - 7z x cppan.zip
   - set PATH=%PATH%;%cd%
-  
+
+  - cppan # dummy run to create %USERPROFILE%\.cppan\cppan.yml
+  - ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nvar_check_jobs: 1`n"'
+  - ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nbuild_warning_level: 0`n"'
+  - ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nbuild_system_verbose: false`n"'
+
 build_script:
-  - cppan
   - mkdir build
+  - mkdir build\bin
+  - mkdir build\bin\Release
   - cd build
-  - cmake .. -G "%generator%" -DSTATIC=1
-  - msbuild tesseract.sln /p:Platform=%vcplatform% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
+  #- cmd: 'echo local_settings: > cppan.yml'
+  #- cmd: 'echo     generator: %generator% >> cppan.yml'
+  #- cmd: 'echo     use_shared_libs: true >> cppan.yml'
+  #- cppan --build ..
+  - cmake .. -G "%generator%" -DBUILD_TRAINING_TOOLS=Off -DAPPVEYOR=1
+  - cmake --build . --config Release > bin\Release\log.txt 2>&1
+
+artifacts:
+  - path: build\bin\Release
+  #- path: build
+    name: tesseract-$(APPVEYOR_BUILD_VERSION)
+    
diff --git a/autogen.sh b/autogen.sh
index ac44d35770..5319afbc86 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,4 +1,13 @@
 #!/bin/sh
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 # This is a simple script which is meant to help developers
 # better deal with the GNU autotools, specifically:
@@ -37,6 +46,19 @@ if [ "$1" = "clean" ]; then
     find . -iname "Makefile.in" -type f -exec rm '{}' +
 fi
 
+# Prevent any errors that might result from failing to properly invoke 
+# `libtoolize` or `glibtoolize,` whichever is present on your system, 
+# from occurring by testing for its existence and capturing the absolute path to 
+# its location for caching purposes prior to using it later on in 'Step 2:'  
+if command -v libtoolize >/dev/null 2>&1; then
+  LIBTOOLIZE="$(command -v libtoolize)"
+elif command -v glibtoolize >/dev/null 2>&1; then
+  LIBTOOLIZE="$(command -v glibtoolize)"
+else
+  echo "Unable to find a valid copy of libtoolize or glibtoolize in your PATH!"
+  bail_out
+fi
+
 # create m4 directory if it not exists
 if [ ! -d m4 ];  then
     mkdir m4
@@ -61,9 +83,9 @@ aclocal -I config || bail_out
 
 # --- Step 2:
 
-echo "Running libtoolize"
-libtoolize -f -c || glibtoolize -f -c || bail_out
-libtoolize --automake || glibtoolize --automake || bail_out
+echo "Running $LIBTOOLIZE"
+$LIBTOOLIZE -f -c || bail_out
+$LIBTOOLIZE --automake || bail_out
 
 # --- Step 3: Generate config.h.in from:
 #             . configure.ac (look for AM_CONFIG_HEADER tag or AC_CONFIG_HEADER tag)
diff --git a/ccmain/control.cpp b/ccmain/control.cpp
index 3970c5429e..cdc2bb5a6e 100644
--- a/ccmain/control.cpp
+++ b/ccmain/control.cpp
@@ -1,8 +1,8 @@
 /******************************************************************
  * File:        control.cpp  (Formerly control.c)
  * Description: Module-independent matcher controller.
- * Author:					Ray Smith
- * Created:					Thu Apr 23 11:09:58 BST 1992
+ * Author:          Ray Smith
+ * Created:         Thu Apr 23 11:09:58 BST 1992
  * ReHacked:    Tue Sep 22 08:42:49 BST 1992 Phil Cheatle
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
@@ -73,7 +73,6 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res,
   }
 }
 
-
 /**
  * Recognize a single word in interactive mode.
  *
@@ -219,16 +218,14 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
       if (pass_n == 1) {
         monitor->progress = 70 * w / words->size();
         if (monitor->progress_callback != NULL) {
-            TBOX box = pr_it->word()->word->bounding_box();
-            (*monitor->progress_callback)(monitor->progress,
-                                          box.left(), box.right(),
-                                          box.top(), box.bottom());
+          TBOX box = pr_it->word()->word->bounding_box();
+          (*monitor->progress_callback)(monitor->progress, box.left(),
+                                        box.right(), box.top(), box.bottom());
         }
       } else {
         monitor->progress = 70 + 30 * w / words->size();
-        if (monitor->progress_callback!=NULL) {
-                      (*monitor->progress_callback)(monitor->progress,
-                                                    0, 0, 0, 0);
+        if (monitor->progress_callback != NULL) {
+          (*monitor->progress_callback)(monitor->progress, 0, 0, 0, 0);
         }
       }
       if (monitor->deadline_exceeded() ||
@@ -539,7 +536,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
         }
       }
     }
-    if (overrides_word1.size() >= 1) {
+    if (!overrides_word1.empty()) {
       // Excellent, we have some bigram matches.
       if (EqualIgnoringCaseAndTerminalPunct(*w_prev->best_choice,
                                             *overrides_word1[best_idx]) &&
diff --git a/ccmain/cube_control.cpp b/ccmain/cube_control.cpp
index 1430debc5b..50f7512dd1 100644
--- a/ccmain/cube_control.cpp
+++ b/ccmain/cube_control.cpp
@@ -5,6 +5,16 @@
  * Author:      Raquel Romano
  * Created:     September 2009
  *
+ * (C) Copyright 2009, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
  **********************************************************************/
 
 // Include automatically generated configuration file if running autoconf.
@@ -169,13 +179,11 @@ bool Tesseract::init_cube_objects(bool load_combiner,
   // Create the combiner object and load the combiner net for target languages.
   if (load_combiner) {
     tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
-    if (!tess_cube_combiner_ || !tess_cube_combiner_->LoadCombinerNet()) {
+    if (!tess_cube_combiner_->LoadCombinerNet()) {
       delete cube_cntxt_;
       cube_cntxt_ = NULL;
-      if (tess_cube_combiner_ != NULL) {
-        delete tess_cube_combiner_;
-        tess_cube_combiner_ = NULL;
-      }
+      delete tess_cube_combiner_;
+      tess_cube_combiner_ = NULL;
       if (cube_debug_level > 0)
         tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
       return false;
diff --git a/ccmain/cube_reco_context.cpp b/ccmain/cube_reco_context.cpp
index fed53f0954..dadc0624a4 100644
--- a/ccmain/cube_reco_context.cpp
+++ b/ccmain/cube_reco_context.cpp
@@ -55,40 +55,26 @@ CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
 }
 
 CubeRecoContext::~CubeRecoContext() {
-  if (char_classifier_ != NULL) {
-    delete char_classifier_;
-    char_classifier_ = NULL;
-  }
+  delete char_classifier_;
+  char_classifier_ = NULL;
 
-  if (word_size_model_ != NULL) {
-    delete word_size_model_;
-    word_size_model_ = NULL;
-  }
+  delete word_size_model_;
+  word_size_model_ = NULL;
 
-  if (char_set_ != NULL) {
-    delete char_set_;
-    char_set_ = NULL;
-  }
+  delete char_set_;
+  char_set_ = NULL;
 
-  if (char_bigrams_ != NULL) {
-    delete char_bigrams_;
-    char_bigrams_ = NULL;
-  }
+  delete char_bigrams_;
+  char_bigrams_ = NULL;
 
-  if (word_unigrams_ != NULL) {
-    delete word_unigrams_;
-    word_unigrams_ = NULL;
-  }
+  delete word_unigrams_;
+  word_unigrams_ = NULL;
 
-  if (lang_mod_ != NULL) {
-    delete lang_mod_;
-    lang_mod_ = NULL;
-  }
+  delete lang_mod_;
+  lang_mod_ = NULL;
 
-  if (params_ != NULL) {
-    delete params_;
-    params_ = NULL;
-  }
+  delete params_;
+  params_ = NULL;
 }
 
 /**
@@ -145,11 +131,6 @@ bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
   lang_mod_ = new TessLangModel(lm_params, data_file_path,
                                 tess_obj_->getDict().load_system_dawg,
                                 tessdata_manager, this);
-  if (lang_mod_ == NULL) {
-    fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create "
-            "TessLangModel\n");
-    return false;
-  }
 
   // Create the optional char bigrams object.
   char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
@@ -190,11 +171,6 @@ CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
                                           UNICHARSET *tess_unicharset) {
   // create the object
   CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
-  if (cntxt == NULL) {
-    fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create "
-            "CubeRecoContext object\n");
-    return NULL;
-  }
   // load the necessary components
   if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
     fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
diff --git a/ccmain/cubeclassifier.h b/ccmain/cubeclassifier.h
index 98bdb5cf00..3ae00f7974 100644
--- a/ccmain/cubeclassifier.h
+++ b/ccmain/cubeclassifier.h
@@ -23,6 +23,7 @@
 #define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
 
 #include "shapeclassifier.h"
+#include "platform.h"
 
 namespace tesseract {
 
@@ -35,7 +36,7 @@ class TrainingSample;
 struct UnicharRating;
 
 // Cube implementation of a ShapeClassifier.
-class CubeClassifier : public ShapeClassifier {
+class TESS_API CubeClassifier : public ShapeClassifier {
  public:
   explicit CubeClassifier(Tesseract* tesseract);
   virtual ~CubeClassifier();
@@ -55,7 +56,7 @@ class CubeClassifier : public ShapeClassifier {
 };
 
 // Combination of Tesseract class pruner with scoring by cube.
-class CubeTessClassifier : public ShapeClassifier {
+class TESS_API CubeTessClassifier : public ShapeClassifier {
  public:
   explicit CubeTessClassifier(Tesseract* tesseract);
   virtual ~CubeTessClassifier();
diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp
index c6e7f17e0f..4706fb3b26 100644
--- a/ccmain/docqual.cpp
+++ b/ccmain/docqual.cpp
@@ -1,8 +1,8 @@
 /******************************************************************
  * File:        docqual.cpp  (Formerly docqual.c)
  * Description: Document Quality Metrics
- * Author:		Phil Cheatle
- * Created:		Mon May  9 11:27:28 BST 1994
+ * Author:    Phil Cheatle
+ * Created:   Mon May  9 11:27:28 BST 1994
  *
  * (C) Copyright 1994, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -98,8 +98,8 @@ void Tesseract::word_char_quality(WERD_RES *word,
                                   ROW *row,
                                   inT16 *match_count,
                                   inT16 *accepted_match_count) {
-  if (word->bln_boxes == NULL ||
-    word->rebuild_word == NULL || word->rebuild_word->blobs.empty()) {
+  if (word->bln_boxes == NULL || word->rebuild_word == NULL ||
+      word->rebuild_word->blobs.empty()) {
     *match_count = 0;
     *accepted_match_count = 0;
     return;
@@ -132,7 +132,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) {
   int expected_outline_count;
 
   if (STRING (outlines_odd).contains (c))
-    return 0;                    //Don't use this char
+    return 0;  // Don't use this char
   else if (STRING (outlines_2).contains (c))
     expected_outline_count = 2;
   else
@@ -151,17 +151,16 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
   }
 }
 
-
 /*************************************************************************
  * unrej_good_quality_words()
  * Accept potential rejects in words which pass the following checks:
  *    - Contains a potential reject
  *    - Word looks like a sensible alpha word.
  *    - Word segmentation is the same as the original image
- *		- All characters have the expected number of outlines
+ *    - All characters have the expected number of outlines
  * NOTE - the rejection counts are recalculated after unrejection
  *      - CAN'T do it in a single pass without a bit of fiddling
- *		- keep it simple but inefficient
+ *    - keep it simple but inefficient
  *************************************************************************/
 void Tesseract::unrej_good_quality_words(  //unreject potential
                                          PAGE_RES_IT &page_res_it) {
@@ -403,7 +402,6 @@ void Tesseract::doc_and_block_rejection(  //reject big chunks
 
 }  // namespace tesseract
 
-
 /*************************************************************************
  * reject_whole_page()
  * Don't believe any of it - set the reject map to 00..00 in all words
diff --git a/ccmain/equationdetect.cpp b/ccmain/equationdetect.cpp
index 06aab24923..5fa955e432 100644
--- a/ccmain/equationdetect.cpp
+++ b/ccmain/equationdetect.cpp
@@ -20,6 +20,7 @@
 #ifdef _MSC_VER
 #pragma warning(disable:4244)  // Conversion warnings
 #include <mathfix.h>
+#include <windows.h>
 #endif
 
 #ifdef __MINGW32__
diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp
index f58c9610fa..5fbe8c9a4e 100644
--- a/ccmain/fixspace.cpp
+++ b/ccmain/fixspace.cpp
@@ -3,8 +3,8 @@
  * Description: Implements a pass over the page res, exploring the alternative
  *              spacing possibilities, trying to use context to improve the
  *              word spacing
-* Author:		Phil Cheatle
-* Created:		Thu Oct 21 11:38:43 BST 1993
+* Author:   Phil Cheatle
+* Created:    Thu Oct 21 11:38:43 BST 1993
 *
 * (C) Copyright 1993, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -211,7 +211,6 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
   }
 }
 
-
 /**
  * @name eval_word_spacing()
  * The basic measure is the number of characters in contextually confirmed
diff --git a/ccmain/ltrresultiterator.cpp b/ccmain/ltrresultiterator.cpp
index d5b8594667..f80e594518 100644
--- a/ccmain/ltrresultiterator.cpp
+++ b/ccmain/ltrresultiterator.cpp
@@ -145,13 +145,12 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const {
   return 0.0f;
 }
 
-void LTRResultIterator::RowAttributes(float* row_height,
-                                      float* descenders,
+void LTRResultIterator::RowAttributes(float* row_height, float* descenders,
                                       float* ascenders) const {
-    *row_height = it_->row()->row->x_height() + it_->row()-> row->ascenders()
-                  - it_->row()->row->descenders();
-    *descenders = it_->row()->row->descenders();
-    *ascenders = it_->row()->row->ascenders();
+  *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
+                it_->row()->row->descenders();
+  *descenders = it_->row()->row->descenders();
+  *ascenders = it_->row()->row->ascenders();
 }
 
 // Returns the font attributes of the current word. If iterating at a higher
diff --git a/ccmain/ltrresultiterator.h b/ccmain/ltrresultiterator.h
index 8819c2a0ee..f2605b52d2 100644
--- a/ccmain/ltrresultiterator.h
+++ b/ccmain/ltrresultiterator.h
@@ -92,8 +92,7 @@ class TESS_API LTRResultIterator : public PageIterator {
   float Confidence(PageIteratorLevel level) const;
 
   // Returns the attributes of the current row.
-  void RowAttributes(float* row_height,
-                     float* descenders,
+  void RowAttributes(float* row_height, float* descenders,
                      float* ascenders) const;
 
   // ============= Functions that refer to words only ============.
diff --git a/ccmain/osdetect.cpp b/ccmain/osdetect.cpp
index f2fe94a1c3..69e626222f 100644
--- a/ccmain/osdetect.cpp
+++ b/ccmain/osdetect.cpp
@@ -164,8 +164,14 @@ void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
   int vertical_y = 1;
   tesseract::TabVector_LIST v_lines;
   tesseract::TabVector_LIST h_lines;
-  int resolution = (kMinCredibleResolution > pixGetXRes(pix)) ?
-      kMinCredibleResolution : pixGetXRes(pix);
+  int resolution;
+  if (kMinCredibleResolution > pixGetXRes(pix)) {
+    resolution = kMinCredibleResolution;
+    tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
+            pixGetXRes(pix), resolution);
+  } else {
+    resolution = pixGetXRes(pix);
+  }
 
   tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
                                             &vertical_x, &vertical_y,
diff --git a/ccmain/output.cpp b/ccmain/output.cpp
index ddfcfc54b6..6fca63e420 100644
--- a/ccmain/output.cpp
+++ b/ccmain/output.cpp
@@ -1,8 +1,8 @@
 /******************************************************************
  * File:        output.cpp  (Formerly output.c)
  * Description: Output pass
- * Author:					Phil Cheatle
- * Created:					Thu Aug  4 10:56:08 BST 1994
+ * Author:          Phil Cheatle
+ * Created:         Thu Aug  4 10:56:08 BST 1994
  *
  * (C) Copyright 1994, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -78,18 +78,16 @@ void Tesseract::output_pass(  //Tess output pass //send to api
   while (page_res_it.word () != NULL) {
     check_debug_pt (page_res_it.word (), 120);
 
-	if (target_word_box)
-	{
-
-		TBOX current_word_box=page_res_it.word ()->word->bounding_box();
-		FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2);
-		if (!target_word_box->contains(center_pt))
-		{
-			page_res_it.forward ();
-			continue;
-		}
-
-	}
+    if (target_word_box) {
+      TBOX current_word_box = page_res_it.word()->word->bounding_box();
+      FCOORD center_pt(
+          (current_word_box.right() + current_word_box.left()) / 2,
+          (current_word_box.bottom() + current_word_box.top()) / 2);
+      if (!target_word_box->contains(center_pt)) {
+        page_res_it.forward();
+        continue;
+      }
+    }
     if (tessedit_write_block_separators &&
     block_of_last_word != page_res_it.block ()) {
       block_of_last_word = page_res_it.block ();
@@ -337,7 +335,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
   rating_per_ch = word.rating() / word_res->reject_map.length();
 
   if (rating_per_ch >= suspect_rating_per_ch)
-    return;                      //Don't touch bad ratings
+    return;  // Don't touch bad ratings
 
   if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
     /* Unreject any Tess Acceptable word - but NOT tess reject chs*/
diff --git a/ccmain/pageiterator.cpp b/ccmain/pageiterator.cpp
index fc15840c44..7d7865ae30 100644
--- a/ccmain/pageiterator.cpp
+++ b/ccmain/pageiterator.cpp
@@ -87,7 +87,7 @@ const PageIterator& PageIterator::operator=(const PageIterator& src) {
   rect_top_ = src.rect_top_;
   rect_width_ = src.rect_width_;
   rect_height_ = src.rect_height_;
-  if (it_ != NULL) delete it_;
+  delete it_;
   it_ = new PAGE_RES_IT(*src.it_);
   BeginWord(src.blob_index_);
   return *this;
@@ -597,10 +597,8 @@ void PageIterator::BeginWord(int offset) {
     }
     word_ = NULL;
     // We will be iterating the box_word.
-    if (cblob_it_ != NULL) {
-      delete cblob_it_;
-      cblob_it_ = NULL;
-    }
+    delete cblob_it_;
+    cblob_it_ = NULL;
   } else {
     // No recognition yet, so a "symbol" is a cblob.
     word_ = word_res->word;
diff --git a/ccmain/pagesegmain.cpp b/ccmain/pagesegmain.cpp
index 4e3c342070..d815365137 100644
--- a/ccmain/pagesegmain.cpp
+++ b/ccmain/pagesegmain.cpp
@@ -18,9 +18,6 @@
  **********************************************************************/
 
 #ifdef _WIN32
-#ifndef __GNUC__
-#include <windows.h>
-#endif  // __GNUC__
 #ifndef unlink
 #include <io.h>
 #endif
@@ -412,9 +409,10 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
                   "Don't rotate.\n", osd_margin);
           osd_orientation = 0;
         } else {
-          tprintf("OSD: Weak margin (%.2f) for %d blob text block, "
-                  "but using orientation anyway: %d\n",
-                  osd_margin, osd_blobs.length(), osd_orientation);
+          tprintf(
+              "OSD: Weak margin (%.2f) for %d blob text block, "
+              "but using orientation anyway: %d\n",
+              osd_margin, osd_blobs.length(), osd_orientation);
         }
       }
     }
diff --git a/ccmain/par_control.cpp b/ccmain/par_control.cpp
index 7a7d0415d6..6797a5a2a7 100644
--- a/ccmain/par_control.cpp
+++ b/ccmain/par_control.cpp
@@ -18,9 +18,6 @@
 ///////////////////////////////////////////////////////////////////////
 
 #include "tesseractclass.h"
-#ifdef OPENMP
-#include <omp.h>
-#endif  // OPENMP
 
 namespace tesseract {
 
@@ -53,7 +50,6 @@ void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
   }
   // Pre-classify all the blobs.
   if (tessedit_parallelize > 1) {
-    #pragma omp parallel for num_threads(10)
     for (int b = 0; b < blobs.size(); ++b) {
       *blobs[b].choices =
           blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);
diff --git a/ccmain/paragraphs.cpp b/ccmain/paragraphs.cpp
index 7459940dd0..c7d21a9192 100644
--- a/ccmain/paragraphs.cpp
+++ b/ccmain/paragraphs.cpp
@@ -2052,7 +2052,7 @@ void ConvertHypothesizedModelRunsToParagraphs(
     bool single_line_paragraph = false;
     SetOfModels models;
     rows[start].NonNullHypotheses(&models);
-    if (models.size() > 0) {
+    if (!models.empty()) {
       model = models[0];
       if (rows[start].GetLineType(model) != LT_BODY)
         single_line_paragraph = true;
@@ -2190,17 +2190,17 @@ void LeftoverSegments(const GenericVector<RowScratchRegisters> &rows,
     SetOfModels models_w_crowns;
     rows[i].StrongHypotheses(&models);
     rows[i].NonNullHypotheses(&models_w_crowns);
-    if (models.empty() && models_w_crowns.size() > 0) {
+    if (models.empty() && !models_w_crowns.empty()) {
       // Crown paragraph.  Is it followed by a modeled line?
       for (int end = i + 1; end < rows.size(); end++) {
         SetOfModels end_models;
         SetOfModels strong_end_models;
         rows[end].NonNullHypotheses(&end_models);
         rows[end].StrongHypotheses(&strong_end_models);
-        if (end_models.size() == 0) {
+        if (end_models.empty()) {
           needs_fixing = true;
           break;
-        } else if (strong_end_models.size() > 0) {
+        } else if (!strong_end_models.empty()) {
           needs_fixing = false;
           break;
         }
@@ -2485,7 +2485,7 @@ void InitializeRowInfo(bool after_recognition,
   info->ltr = ltr >= rtl;
   info->has_leaders = num_leaders > 3;
   info->num_words = werds.size();
-  if (werds.size() > 0) {
+  if (!werds.empty()) {
     WERD_RES *lword = werds[0], *rword = werds[werds.size() - 1];
     info->lword_text = lword->best_choice->unichar_string().string();
     info->rword_text = rword->best_choice->unichar_string().string();
@@ -2538,7 +2538,7 @@ void DetectParagraphs(int debug_level,
 
   // If we're called before text recognition, we might not have
   // tight block bounding boxes, so trim by the minimum on each side.
-  if (row_infos.size() > 0) {
+  if (!row_infos.empty()) {
     int min_lmargin = row_infos[0].pix_ldistance;
     int min_rmargin = row_infos[0].pix_rdistance;
     for (int i = 1; i < row_infos.size(); i++) {
diff --git a/ccmain/paramsd.cpp b/ccmain/paramsd.cpp
index 7784f85361..e0e60539f6 100644
--- a/ccmain/paramsd.cpp
+++ b/ccmain/paramsd.cpp
@@ -329,13 +329,19 @@ void ParamsEditor::WriteParams(char *filename,
     fclose(fp);
     sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
     int a = sv_window_->ShowYesNoDialog(msg_str);
-    if (a == 'n') { return; }  // don't write
+    if (a == 'n') {
+      return;
+    }  // don't write
   }
 
 
   fp = fopen (filename, "wb");  // can we write to it?
   if (fp == NULL) {
-    sv_window_->AddMessage("Can't write to file " "%s" "", filename);
+    sv_window_->AddMessage(
+        "Can't write to file "
+        "%s"
+        "",
+        filename);
     return;
   }
 
diff --git a/ccmain/pgedit.cpp b/ccmain/pgedit.cpp
index d78c0dacc0..5e23595422 100644
--- a/ccmain/pgedit.cpp
+++ b/ccmain/pgedit.cpp
@@ -191,7 +191,7 @@ ScrollView* bln_word_window_handle() {  // return handle
  */
 
 void build_image_window(int width, int height) {
-  if (image_win != NULL) { delete image_win; }
+  delete image_win;
   image_win = new ScrollView(editor_image_win_name.string(),
                              editor_image_xpos, editor_image_ypos,
                              width + 1,
diff --git a/ccmain/reject.cpp b/ccmain/reject.cpp
index aacc80dd6e..72f9d873d1 100644
--- a/ccmain/reject.cpp
+++ b/ccmain/reject.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        reject.cpp  (Formerly reject.c)
  * Description: Rejection functions used in tessedit
- * Author:		Phil Cheatle
- * Created:		Wed Sep 23 16:50:21 BST 1992
+ * Author:    Phil Cheatle
+ * Created:   Wed Sep 23 16:50:21 BST 1992
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/ccmain/tessedit.cpp b/ccmain/tessedit.cpp
index dd96ba0ebd..b2fa02196a 100644
--- a/ccmain/tessedit.cpp
+++ b/ccmain/tessedit.cpp
@@ -44,7 +44,7 @@
 #include "params.h"
 
 #define VARDIR        "configs/" /*variables files */
-                                 //config under api
+                                 // config under api
 #define API_CONFIG      "configs/api_config"
 
 ETEXT_DESC *global_monitor = NULL;  // progress monitor
@@ -468,7 +468,9 @@ int Tesseract::init_tesseract_lm(const char *arg0,
   if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
                                 NULL, 0, NULL, NULL, false))
     return -1;
-  getDict().Load(Dict::GlobalDawgCache());
+  getDict().SetupForLoad(Dict::GlobalDawgCache());
+  getDict().Load(tessdata_manager.GetDataFileName().string(), lang);
+  getDict().FinishLoad();
   tessdata_manager.End();
   return 0;
 }
diff --git a/ccmain/tesseract_cube_combiner.cpp b/ccmain/tesseract_cube_combiner.cpp
index e17bd04c2a..2b91f5259d 100644
--- a/ccmain/tesseract_cube_combiner.cpp
+++ b/ccmain/tesseract_cube_combiner.cpp
@@ -21,8 +21,6 @@
 // the recognition results of Tesseract and Cube at the word level
 
 #include <algorithm>
-#include <string>
-#include <vector>
 #include <wctype.h>
 
 #include "tesseract_cube_combiner.h"
diff --git a/ccmain/tesseract_cube_combiner.h b/ccmain/tesseract_cube_combiner.h
index 49a0e2f4b0..9a6eb35302 100644
--- a/ccmain/tesseract_cube_combiner.h
+++ b/ccmain/tesseract_cube_combiner.h
@@ -27,16 +27,6 @@
 #include <vector>
 #include "pageres.h"
 
-#ifdef _WIN32
-#include <windows.h>
-using namespace std;
-#endif
-
-#ifdef USE_STD_NAMESPACE
-using std::string;
-using std::vector;
-#endif
-
 namespace tesseract {
 
 class CubeObject;
@@ -72,15 +62,15 @@ class TesseractCubeCombiner {
   // output parameter will be true if both answers are identical,
   // false otherwise. Modifies the cube_alt_list, so no assumptions
   // should be made about its state upon return.
-  bool ComputeCombinerFeatures(const string &tess_res,
+  bool ComputeCombinerFeatures(const std::string &tess_res,
                                int tess_confidence,
                                CubeObject *cube_obj,
                                WordAltList *cube_alt_list,
-                               vector<double> *features,
+                               std::vector<double> *features,
                                bool *agreement);
 
   // Is the word valid according to Tesseract's language model
-  bool ValidWord(const string &str);
+  bool ValidWord(const std::string &str);
 
   // Loads the combiner neural network from file, using cube_cntxt_
   // to find path.
@@ -88,11 +78,11 @@ class TesseractCubeCombiner {
  private:
   // Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
   // strips punc and/or normalizes case and then converts back
-  string NormalizeString(const string &str, bool remove_punc, bool norm_case);
+    std::string NormalizeString(const std::string &str, bool remove_punc, bool norm_case);
 
   // Compares 2 strings after optionally normalizing them and or stripping
   // punctuation
-  int CompareStrings(const string &str1, const string &str2, bool ignore_punc,
+  int CompareStrings(const std::string &str1, const std::string &str2, bool ignore_punc,
                      bool norm_case);
 
   NeuralNet *combiner_net_;  // pointer to the combiner NeuralNet object
diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp
index 8db50fbd54..5cbf70c8c2 100644
--- a/ccmain/tesseractclass.cpp
+++ b/ccmain/tesseractclass.cpp
@@ -398,8 +398,8 @@ Tesseract::Tesseract()
                  "Don't suspect dict wds longer than this", this->params()),
       BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
                   this->params()),
-      double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit",
-                    this->params()),
+      double_MEMBER(suspect_rating_per_ch, 999.9,
+                    "Don't touch bad rating limit", this->params()),
       double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
                     this->params()),
       BOOL_MEMBER(tessedit_minimal_rejection, false,
@@ -512,7 +512,6 @@ Tesseract::Tesseract()
                     "Page separator (default is form feed control character)",
                     this->params()),
 
-
       // The following parameters were deprecated and removed from their
       // original
       // locations. The parameters are temporarily kept here to give Tesseract
@@ -606,6 +605,7 @@ Tesseract::Tesseract()
       pix_binary_(NULL),
       cube_binary_(NULL),
       pix_grey_(NULL),
+      pix_original_(NULL),
       pix_thresholds_(NULL),
       source_resolution_(0),
       textord_(this),
@@ -625,6 +625,7 @@ Tesseract::Tesseract()
 
 Tesseract::~Tesseract() {
   Clear();
+  pixDestroy(&pix_original_);
   end_tesseract();
   sub_langs_.delete_data_pointers();
 #ifndef NO_CUBE_BUILD
diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h
index 91d25bc8ae..e01625e354 100644
--- a/ccmain/tesseractclass.h
+++ b/ccmain/tesseractclass.h
@@ -38,7 +38,6 @@
 
 class BLOB_CHOICE_LIST_CLIST;
 class BLOCK_LIST;
-class CharSamp;
 struct OSResults;
 class PAGE_RES;
 class PAGE_RES_IT;
@@ -98,6 +97,7 @@ namespace tesseract {
 
 class ColumnFinder;
 #ifndef NO_CUBE_BUILD
+class CharSamp;
 class CubeLineObject;
 class CubeObject;
 class CubeRecoContext;
@@ -189,7 +189,7 @@ class Tesseract : public Wordrec {
   }
   // Destroy any existing pix and return a pointer to the pointer.
   Pix** mutable_pix_binary() {
-    Clear();
+    pixDestroy(&pix_binary_);
     return &pix_binary_;
   }
   Pix* pix_binary() const {
@@ -202,16 +202,20 @@ class Tesseract : public Wordrec {
     pixDestroy(&pix_grey_);
     pix_grey_ = grey_pix;
   }
-  // Returns a pointer to a Pix representing the best available image of the
-  // page. The image will be 8-bit grey if the input was grey or color. Note
-  // that in grey 0 is black and 255 is white. If the input was binary, then
-  // the returned Pix will be binary. Note that here black is 1 and white is 0.
-  // To tell the difference pixGetDepth() will return 8 or 1.
-  // In either case, the return value is a borrowed Pix, and should not be
-  // deleted or pixDestroyed.
-  Pix* BestPix() const {
-    return pix_grey_ != NULL ? pix_grey_ : pix_binary_;
+  Pix* pix_original() const { return pix_original_; }
+  // Takes ownership of the given original_pix.
+  void set_pix_original(Pix* original_pix) {
+    pixDestroy(&pix_original_);
+    pix_original_ = original_pix;
   }
+  // Returns a pointer to a Pix representing the best available (original) image
+  // of the page. Can be of any bit depth, but never color-mapped, as that has
+  // always been dealt with. Note that in grey and color, 0 is black and 255 is
+  // white. If the input was binary, then black is 1 and white is 0.
+  // To tell the difference pixGetDepth() will return 32, 8 or 1.
+  // In any case, the return value is a borrowed Pix, and should not be
+  // deleted or pixDestroyed.
+  Pix* BestPix() const { return pix_original_; }
   void set_pix_thresholds(Pix* thresholds) {
     pixDestroy(&pix_thresholds_);
     pix_thresholds_ = thresholds;
@@ -1010,8 +1014,7 @@ class Tesseract : public Wordrec {
   INT_VAR_H(suspect_level, 99, "Suspect marker level");
   INT_VAR_H(suspect_space_level, 100,
             "Min suspect level for rejecting spaces");
-  INT_VAR_H(suspect_short_words, 2,
-            "Don't Suspect dict wds longer than this");
+  INT_VAR_H(suspect_short_words, 2, "Don't Suspect dict wds longer than this");
   BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
   double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit");
   double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
@@ -1175,6 +1178,8 @@ class Tesseract : public Wordrec {
   Pix* cube_binary_;
   // Grey-level input image if the input was not binary, otherwise NULL.
   Pix* pix_grey_;
+  // Original input image. Color if the input was color.
+  Pix* pix_original_;
   // Thresholds that were used to generate the thresholded image from grey.
   Pix* pix_thresholds_;
   // Input image resolution after any scaling. The resolution is not well
diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp
index df6abd01eb..4208c65ba2 100644
--- a/ccmain/thresholder.cpp
+++ b/ccmain/thresholder.cpp
@@ -152,19 +152,27 @@ void ImageThresholder::SetImage(const Pix* pix) {
   int depth;
   pixGetDimensions(src, &image_width_, &image_height_, &depth);
   // Convert the image as necessary so it is one of binary, plain RGB, or
-  // 8 bit with no colormap.
-  if (depth > 1 && depth < 8) {
+  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
+  // not just a clone of the input.
+  if (pixGetColormap(src)) {
+    Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
+    depth = pixGetDepth(tmp);
+    if (depth > 1 && depth < 8) {
+      pix_ = pixConvertTo8(tmp, false);
+      pixDestroy(&tmp);
+    } else {
+      pix_ = tmp;
+    }
+  } else if (depth > 1 && depth < 8) {
     pix_ = pixConvertTo8(src, false);
-  } else if (pixGetColormap(src)) {
-    pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
   } else {
-    pix_ = pixClone(src);
+    pix_ = pixCopy(NULL, src);
   }
   depth = pixGetDepth(pix_);
   pix_channels_ = depth / 8;
   pix_wpl_ = pixGetWpl(pix_);
   scale_ = 1;
-  estimated_res_ = yres_ = pixGetYRes(src);
+  estimated_res_ = yres_ = pixGetYRes(pix_);
   Init();
 }
 
@@ -173,8 +181,11 @@ void ImageThresholder::SetImage(const Pix* pix) {
 // Caller must use pixDestroy to free the created Pix.
 void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
   if (pix_channels_ == 0) {
-    // We have a binary image, so it just has to be cloned.
-    *pix = GetPixRect();
+    // We have a binary image, but it still has to be copied, as this API
+    // allows the caller to modify the output.
+    Pix* original = GetPixRect();
+    *pix = pixCopy(NULL, original);
+    pixDestroy(&original);
   } else {
     OtsuThresholdRectToPix(pix_, pix);
   }
@@ -257,10 +268,10 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
   OpenclDevice od;
   if ((num_channels == 4 || num_channels == 1) &&
       od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) {
-    od.ThresholdRectToPixOCL((const unsigned char*)pixGetData(src_pix),
-                             num_channels, pixGetWpl(src_pix) * 4,
-                             thresholds, hi_values, out_pix /*pix_OCL*/,
-                             rect_height_, rect_width_, rect_top_, rect_left_);
+    od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
+                             pixGetWpl(src_pix) * 4, thresholds, hi_values,
+                             out_pix /*pix_OCL*/, rect_height_, rect_width_,
+                             rect_top_, rect_left_);
   } else {
 #endif
     ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
diff --git a/ccstruct/blamer.cpp b/ccstruct/blamer.cpp
index 5d2837d084..4573e9b3f0 100644
--- a/ccstruct/blamer.cpp
+++ b/ccstruct/blamer.cpp
@@ -317,7 +317,7 @@ void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) {
   int num_blobs = word->chopped_word->blobs.size();
   int box_index = 0;
   int blob_index = 0;
-  inT16 truth_x;
+  inT16 truth_x = -1;
   while (box_index < truth_word_.length() && blob_index < num_blobs) {
     truth_x = norm_truth_word_.BlobBox(box_index).right();
     TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
diff --git a/ccstruct/blobbox.cpp b/ccstruct/blobbox.cpp
index 280096b5d3..3ffb9dc930 100644
--- a/ccstruct/blobbox.cpp
+++ b/ccstruct/blobbox.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        blobbox.cpp  (Formerly blobnbox.c)
  * Description: Code for the textord blob class.
- * Author:					Ray Smith
- * Created:					Thu Jul 30 09:08:51 BST 1992
+ * Author:          Ray Smith
+ * Created:         Thu Jul 30 09:08:51 BST 1992
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -31,7 +31,9 @@
 #define PROJECTION_MARGIN 10     //arbitrary
 #define EXTERN
 
-ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK)
+ELISTIZE(BLOBNBOX)
+ELIST2IZE(TO_ROW)
+ELISTIZE(TO_BLOCK)
 
 // Up to 30 degrees is allowed for rotations of diacritic blobs.
 const double kCosSmallAngle = 0.866;
@@ -176,7 +178,7 @@ void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
     gaps[dir] = MAX_INT16;
     BLOBNBOX* neighbour = neighbours_[dir];
     if (neighbour != NULL) {
-      TBOX n_box = neighbour->bounding_box();
+      const TBOX& n_box = neighbour->bounding_box();
       if (dir == BND_LEFT || dir == BND_RIGHT) {
         gaps[dir] = box.x_gap(n_box);
       } else {
diff --git a/ccstruct/boxread.cpp b/ccstruct/boxread.cpp
index f4aedca5b3..fee0aa9aef 100644
--- a/ccstruct/boxread.cpp
+++ b/ccstruct/boxread.cpp
@@ -34,8 +34,7 @@ FILE* OpenBoxFile(const STRING& fname) {
   STRING filename = BoxFileName(fname);
   FILE* box_file = NULL;
   if (!(box_file = fopen(filename.string(), "rb"))) {
-    CANTOPENFILE.error("read_next_box", TESSEXIT,
-                       "Can't open box file %s",
+    CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s",
                        filename.string());
   }
   return box_file;
diff --git a/ccstruct/boxword.h b/ccstruct/boxword.h
index 742bbb8e4d..c1fab068bb 100644
--- a/ccstruct/boxword.h
+++ b/ccstruct/boxword.h
@@ -82,9 +82,7 @@ class BoxWord {
   const TBOX& bounding_box() const {
     return bbox_;
   }
-  int length() const {
-    return length_;
-  }
+  int length() const { return length_; }
   const TBOX& BlobBox(int index) const {
     return boxes_[index];
   }
diff --git a/ccstruct/coutln.cpp b/ccstruct/coutln.cpp
index bc2b119d8c..238272d2c6 100644
--- a/ccstruct/coutln.cpp
+++ b/ccstruct/coutln.cpp
@@ -48,9 +48,9 @@ ICOORD C_OUTLINE::step_coords[4] = {
  * @param length length of loop
  */
 
-C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left, 
-                      ICOORD top_right, inT16 length)
-    : box (bot_left, top_right), start (startpt->pos), offsets(NULL) {
+C_OUTLINE::C_OUTLINE(CRACKEDGE* startpt, ICOORD bot_left, ICOORD top_right,
+                     inT16 length)
+    : box(bot_left, top_right), start(startpt->pos), offsets(NULL) {
   inT16 stepindex;               //index to step
   CRACKEDGE *edgept;             //current point
 
@@ -71,7 +71,6 @@ C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left,
   }
 }
 
-
 /**
  * @name C_OUTLINE::C_OUTLINE
  *
@@ -139,7 +138,7 @@ inT16 length                     //length of loop
  * @param rotation rotate to coord
  */
 
-C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(NULL) {
+C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(NULL) {
   TBOX new_box;                   //easy bounding
   inT16 stepindex;               //index to step
   inT16 dirdiff;                 //direction change
@@ -300,7 +299,6 @@ inT32 C_OUTLINE::perimeter() const {
   return total_steps;
 }
 
-
 /**
  * @name C_OUTLINE::outer_area
  *
@@ -332,7 +330,6 @@ inT32 C_OUTLINE::outer_area() const {
   return total;
 }
 
-
 /**
  * @name C_OUTLINE::count_transitions
  *
@@ -459,7 +456,6 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) {
   return total;
 }
 
-
 /**
  * @name C_OUTLINE::operator<
  *
@@ -468,8 +464,7 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) {
  */
 
 BOOL8
-C_OUTLINE::operator< (const C_OUTLINE & other) const
-{
+C_OUTLINE::operator<(const C_OUTLINE& other) const {
   inT16 count = 0;               //winding count
   ICOORD pos;                    //position of point
   inT32 stepindex;               //index to cstep
@@ -495,7 +490,6 @@ C_OUTLINE::operator< (const C_OUTLINE & other) const
   return count != 0;
 }
 
-
 /**
  * @name C_OUTLINE::winding_number
  *
@@ -534,7 +528,6 @@ inT16 C_OUTLINE::winding_number(ICOORD point) const {
   return count;                  //winding number
 }
 
-
 /**
  * C_OUTLINE::turn_direction
  *
@@ -563,7 +556,6 @@ inT16 C_OUTLINE::turn_direction() const {  //winding number
   return count;                  //winding number
 }
 
-
 /**
  * @name C_OUTLINE::reverse
  *
@@ -586,7 +578,6 @@ void C_OUTLINE::reverse() {  //reverse drection
   }
 }
 
-
 /**
  * @name C_OUTLINE::move
  *
@@ -661,14 +652,27 @@ static void ComputeGradient(const l_uint32* data, int wpl,
                             int x, int y, int width, int height,
                             ICOORD* gradient) {
   const l_uint32* line = data + y * wpl;
-  int pix_x_y = x < width && y < height ?
-      GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x) : 255;
-  int pix_x_prevy = x < width && y > 0 ?
-      GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line - wpl)), x) : 255;
-  int pix_prevx_prevy = x > 0 && y > 0 ?
-      GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<void const*>(line - wpl)), x - 1) : 255;
-  int pix_prevx_y = x > 0 && y < height ?
-      GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x - 1) : 255;
+  int pix_x_y =
+      x < width && y < height
+          ? GET_DATA_BYTE(
+                const_cast<void*>(reinterpret_cast<const void*>(line)), x)
+          : 255;
+  int pix_x_prevy =
+      x < width && y > 0
+          ? GET_DATA_BYTE(
+                const_cast<void*>(reinterpret_cast<const void*>(line - wpl)), x)
+          : 255;
+  int pix_prevx_prevy =
+      x > 0 && y > 0
+          ? GET_DATA_BYTE(
+                const_cast<void*>(reinterpret_cast<void const*>(line - wpl)),
+                x - 1)
+          : 255;
+  int pix_prevx_y =
+      x > 0 && y < height
+          ? GET_DATA_BYTE(
+                const_cast<void*>(reinterpret_cast<const void*>(line)), x - 1)
+          : 255;
   gradient->set_x(pix_x_y + pix_x_prevy - (pix_prevx_y + pix_prevx_prevy));
   gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y));
 }
@@ -684,8 +688,10 @@ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign,
   if (y <= 0 || y >= height)
     return false;
   const l_uint32* line = data + y * wpl;
-  int pixel1 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line - wpl)), x);
-  int pixel2 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x);
+  int pixel1 = GET_DATA_BYTE(
+      const_cast<void*>(reinterpret_cast<const void*>(line - wpl)), x);
+  int pixel2 =
+      GET_DATA_BYTE(const_cast<void*>(reinterpret_cast<const void*>(line)), x);
   int diff = (pixel2 - pixel1) * diff_sign;
   if (diff > *best_diff) {
     *best_diff = diff;
@@ -705,8 +711,10 @@ static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign,
                                    int* best_diff, int* best_sum, int* best_x) {
   if (x <= 0 || x >= width)
     return false;
-  int pixel1 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x - 1);
-  int pixel2 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x);
+  int pixel1 = GET_DATA_BYTE(
+      const_cast<void*>(reinterpret_cast<const void*>(line)), x - 1);
+  int pixel2 =
+      GET_DATA_BYTE(const_cast<void*>(reinterpret_cast<const void*>(line)), x);
   int diff = (pixel2 - pixel1) * diff_sign;
   if (diff > *best_diff) {
     *best_diff = diff;
@@ -954,8 +962,7 @@ void C_OUTLINE::render_outline(int left, int top, Pix* pix) const {
  */
 
 #ifndef GRAPHICS_DISABLED
-void C_OUTLINE::plot(ScrollView* window,
-                     ScrollView::Color colour) const {
+void C_OUTLINE::plot(ScrollView* window, ScrollView::Color colour) const {
   inT16 stepindex;               // index to cstep
   ICOORD pos;                    // current position
   DIR128 stepdir;                // direction of step
@@ -1016,7 +1023,6 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
 }
 #endif
 
-
 /**
  * @name C_OUTLINE::operator=
  *
@@ -1024,7 +1030,7 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
  * @param source assign from this
  */
 
-C_OUTLINE & C_OUTLINE::operator= (const C_OUTLINE & source) {
+C_OUTLINE& C_OUTLINE::operator=(const C_OUTLINE& source) {
   box = source.box;
   start = source.start;
   if (steps != NULL)
diff --git a/ccstruct/fontinfo.cpp b/ccstruct/fontinfo.cpp
index d3e6f3756e..536ac280de 100644
--- a/ccstruct/fontinfo.cpp
+++ b/ccstruct/fontinfo.cpp
@@ -241,7 +241,7 @@ bool read_set(FILE* f, FontSet* fs, bool swap) {
   if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false;
   if (swap)
     Reverse32(&fs->size);
-  fs->configs = new int[fs->size];
+  fs->configs = new int32_t[fs->size];
   for (int i = 0; i < fs->size; ++i) {
     if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false;
     if (swap)
diff --git a/ccstruct/fontinfo.h b/ccstruct/fontinfo.h
index 5f2d420852..597a179c97 100644
--- a/ccstruct/fontinfo.h
+++ b/ccstruct/fontinfo.h
@@ -25,6 +25,8 @@
 #include "host.h"
 #include "unichar.h"
 
+#include <stdint.h>
+
 template <typename T> class UnicityTable;
 
 namespace tesseract {
@@ -135,8 +137,8 @@ struct FontInfo {
 // the FontInfo in the FontSet structure, it's better to share FontInfos among
 // FontSets (Classify::fontinfo_table_).
 struct FontSet {
-  int           size;
-  int*          configs;  // FontInfo ids
+  int32_t       size;
+  int32_t*      configs;  // FontInfo ids
 };
 
 // Class that adds a bit of functionality on top of GenericVector to
diff --git a/ccstruct/hpdsizes.h b/ccstruct/hpdsizes.h
index 2670e21b07..f4d886a0b4 100644
--- a/ccstruct/hpdsizes.h
+++ b/ccstruct/hpdsizes.h
@@ -1,3 +1,12 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef           HPDSIZES_H
 #define           HPDSIZES_H
 
diff --git a/ccstruct/imagedata.cpp b/ccstruct/imagedata.cpp
index 3c244c7724..86a2d74deb 100644
--- a/ccstruct/imagedata.cpp
+++ b/ccstruct/imagedata.cpp
@@ -30,6 +30,16 @@
 #include "helpers.h"
 #include "tprintf.h"
 
+#if defined(__MINGW32__)
+# include <unistd.h>
+#elif __cplusplus > 199711L   // in C++11
+# include <thread>
+#endif
+
+// Number of documents to read ahead while training. Doesn't need to be very
+// large.
+const int kMaxReadAhead = 8;
+
 namespace tesseract {
 
 WordFeature::WordFeature() : x_(0), y_(0), dir_(0) {
@@ -182,6 +192,19 @@ bool ImageData::DeSerialize(bool swap, TFile* fp) {
   return true;
 }
 
+// As DeSerialize, but only seeks past the data - hence a static method.
+bool ImageData::SkipDeSerialize(bool swap, TFile* fp) {
+  if (!STRING::SkipDeSerialize(swap, fp)) return false;
+  inT32 page_number;
+  if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false;
+  if (!GenericVector<char>::SkipDeSerialize(swap, fp)) return false;
+  if (!STRING::SkipDeSerialize(swap, fp)) return false;
+  if (!GenericVector<TBOX>::SkipDeSerialize(swap, fp)) return false;
+  if (!GenericVector<STRING>::SkipDeSerializeClasses(swap, fp)) return false;
+  inT8 vertical = 0;
+  return fp->FRead(&vertical, sizeof(vertical), 1) == 1;
+}
+
 // Saves the given Pix as a PNG-encoded string and destroys it.
 void ImageData::SetPix(Pix* pix) {
   SetPixInternal(pix, &image_data_);
@@ -195,37 +218,34 @@ Pix* ImageData::GetPix() const {
 // Gets anything and everything with a non-NULL pointer, prescaled to a
 // given target_height (if 0, then the original image height), and aligned.
 // Also returns (if not NULL) the width and height of the scaled image.
-// The return value is the scale factor that was applied to the image to
-// achieve the target_height.
-float ImageData::PreScale(int target_height, Pix** pix,
-                          int* scaled_width, int* scaled_height,
-                          GenericVector<TBOX>* boxes) const {
+// The return value is the scaled Pix, which must be pixDestroyed after use,
+// and scale_factor (if not NULL) is set to the scale factor that was applied
+// to the image to achieve the target_height.
+Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor,
+                         int* scaled_width, int* scaled_height,
+                         GenericVector<TBOX>* boxes) const {
   int input_width = 0;
   int input_height = 0;
   Pix* src_pix = GetPix();
   ASSERT_HOST(src_pix != NULL);
   input_width = pixGetWidth(src_pix);
   input_height = pixGetHeight(src_pix);
-  if (target_height == 0)
-    target_height = input_height;
+  if (target_height == 0) {
+    target_height = MIN(input_height, max_height);
+  }
   float im_factor = static_cast<float>(target_height) / input_height;
   if (scaled_width != NULL)
     *scaled_width = IntCastRounded(im_factor * input_width);
   if (scaled_height != NULL)
     *scaled_height = target_height;
-  if (pix != NULL) {
-    // Get the scaled image.
-    pixDestroy(pix);
-    *pix = pixScale(src_pix, im_factor, im_factor);
-    if (*pix == NULL) {
-      tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
-              input_width, input_height, im_factor);
-    }
-    if (scaled_width != NULL)
-      *scaled_width = pixGetWidth(*pix);
-    if (scaled_height != NULL)
-      *scaled_height = pixGetHeight(*pix);
+  // Get the scaled image.
+  Pix* pix = pixScale(src_pix, im_factor, im_factor);
+  if (pix == NULL) {
+    tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
+            input_width, input_height, im_factor);
   }
+  if (scaled_width != NULL) *scaled_width = pixGetWidth(pix);
+  if (scaled_height != NULL) *scaled_height = pixGetHeight(pix);
   pixDestroy(&src_pix);
   if (boxes != NULL) {
     // Get the boxes.
@@ -241,7 +261,8 @@ float ImageData::PreScale(int target_height, Pix** pix,
       boxes->push_back(box);
     }
   }
-  return im_factor;
+  if (scale_factor != NULL) *scale_factor = im_factor;
+  return pix;
 }
 
 int ImageData::MemoryUsed() const {
@@ -266,19 +287,20 @@ void ImageData::Display() const {
   // Draw the boxes.
   win->Pen(ScrollView::RED);
   win->Brush(ScrollView::NONE);
-  win->TextAttributes("Arial", kTextSize, false, false, false);
-  for (int b = 0; b < boxes_.size(); ++b) {
-    boxes_[b].plot(win);
-    win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string());
-    TBOX scaled(boxes_[b]);
-    scaled.scale(256.0 / height);
-    scaled.plot(win);
+  int text_size = kTextSize;
+  if (!boxes_.empty() && boxes_[0].height() * 2 < text_size)
+    text_size = boxes_[0].height() * 2;
+  win->TextAttributes("Arial", text_size, false, false, false);
+  if (!boxes_.empty()) {
+    for (int b = 0; b < boxes_.size(); ++b) {
+      boxes_[b].plot(win);
+      win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string());
+    }
+  } else {
+    // The full transcription.
+    win->Pen(ScrollView::CYAN);
+    win->Text(0, height + kTextSize * 2, transcription_.string());
   }
-  // The full transcription.
-  win->Pen(ScrollView::CYAN);
-  win->Text(0, height + kTextSize * 2, transcription_.string());
-  // Add the features.
-  win->Pen(ScrollView::GREEN);
   win->Update();
   window_wait(win);
 #endif
@@ -340,27 +362,51 @@ bool ImageData::AddBoxes(const char* box_text) {
   return false;
 }
 
+// Thread function to call ReCachePages.
+void* ReCachePagesFunc(void* data) {
+  DocumentData* document_data = reinterpret_cast<DocumentData*>(data);
+  document_data->ReCachePages();
+  return NULL;
+}
+
 DocumentData::DocumentData(const STRING& name)
-  : document_name_(name), pages_offset_(0), total_pages_(0),
-    memory_used_(0), max_memory_(0), reader_(NULL) {}
+    : document_name_(name),
+      pages_offset_(-1),
+      total_pages_(-1),
+      memory_used_(0),
+      max_memory_(0),
+      reader_(NULL) {}
 
-DocumentData::~DocumentData() {}
+DocumentData::~DocumentData() {
+  SVAutoLock lock_p(&pages_mutex_);
+  SVAutoLock lock_g(&general_mutex_);
+}
 
 // Reads all the pages in the given lstmf filename to the cache. The reader
 // is used to read the file.
 bool DocumentData::LoadDocument(const char* filename, const char* lang,
                                 int start_page, inT64 max_memory,
                                 FileReader reader) {
+  SetDocument(filename, lang, max_memory, reader);
+  pages_offset_ = start_page;
+  return ReCachePages();
+}
+
+// Sets up the document, without actually loading it.
+void DocumentData::SetDocument(const char* filename, const char* lang,
+                               inT64 max_memory, FileReader reader) {
+  SVAutoLock lock_p(&pages_mutex_);
+  SVAutoLock lock(&general_mutex_);
   document_name_ = filename;
   lang_ = lang;
-  pages_offset_ = start_page;
+  pages_offset_ = -1;
   max_memory_ = max_memory;
   reader_ = reader;
-  return ReCachePages();
 }
 
 // Writes all the pages to the given filename. Returns false on error.
 bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
+  SVAutoLock lock(&pages_mutex_);
   TFile fp;
   fp.OpenWrite(NULL);
   if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
@@ -370,112 +416,174 @@ bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
   return true;
 }
 bool DocumentData::SaveToBuffer(GenericVector<char>* buffer) {
+  SVAutoLock lock(&pages_mutex_);
   TFile fp;
   fp.OpenWrite(buffer);
   return pages_.Serialize(&fp);
 }
 
+// Adds the given page data to this document, counting up memory.
+void DocumentData::AddPageToDocument(ImageData* page) {
+  SVAutoLock lock(&pages_mutex_);
+  pages_.push_back(page);
+  set_memory_used(memory_used() + page->MemoryUsed());
+}
+
+// If the given index is not currently loaded, loads it using a separate
+// thread.
+void DocumentData::LoadPageInBackground(int index) {
+  ImageData* page = NULL;
+  if (IsPageAvailable(index, &page)) return;
+  SVAutoLock lock(&pages_mutex_);
+  if (pages_offset_ == index) return;
+  pages_offset_ = index;
+  pages_.clear();
+  #ifndef GRAPHICS_DISABLED
+  SVSync::StartThread(ReCachePagesFunc, this);
+  #endif  // GRAPHICS_DISABLED
+}
+
 // Returns a pointer to the page with the given index, modulo the total
-// number of pages, recaching if needed.
+// number of pages. Blocks until the background load is completed.
 const ImageData* DocumentData::GetPage(int index) {
-  index = Modulo(index, total_pages_);
-  if (index < pages_offset_ || index >= pages_offset_ + pages_.size()) {
-    pages_offset_ = index;
-    if (!ReCachePages()) return NULL;
+  ImageData* page = NULL;
+  while (!IsPageAvailable(index, &page)) {
+    // If there is no background load scheduled, schedule one now.
+    pages_mutex_.Lock();
+    bool needs_loading = pages_offset_ != index;
+    pages_mutex_.Unlock();
+    if (needs_loading) LoadPageInBackground(index);
+    // We can't directly load the page, or the background load will delete it
+    // while the caller is using it, so give it a chance to work.
+#if __cplusplus > 199711L
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+#elif _WIN32  // MSVS
+    Sleep(1000);
+#else
+    sleep(1);
+#endif
+  }
+  return page;
+}
+
+// Returns true if the requested page is available, and provides a pointer,
+// which may be NULL if the document is empty. May block, even though it
+// doesn't guarantee to return true.
+bool DocumentData::IsPageAvailable(int index, ImageData** page) {
+  SVAutoLock lock(&pages_mutex_);
+  int num_pages = NumPages();
+  if (num_pages == 0 || index < 0) {
+    *page = NULL;  // Empty Document.
+    return true;
+  }
+  if (num_pages > 0) {
+    index = Modulo(index, num_pages);
+    if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
+      *page = pages_[index - pages_offset_];  // Page is available already.
+      return true;
+    }
   }
-  return pages_[index - pages_offset_];
+  return false;
+}
+
+// Removes all pages from memory and frees the memory, but does not forget
+// the document metadata.
+inT64 DocumentData::UnCache() {
+  SVAutoLock lock(&pages_mutex_);
+  inT64 memory_saved = memory_used();
+  pages_.clear();
+  pages_offset_ = -1;
+  set_total_pages(-1);
+  set_memory_used(0);
+  tprintf("Unloaded document %s, saving %d memory\n", document_name_.string(),
+          memory_saved);
+  return memory_saved;
 }
 
-// Loads as many pages can fit in max_memory_ starting at index pages_offset_.
+// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
+// starting at index pages_offset_.
 bool DocumentData::ReCachePages() {
+  SVAutoLock lock(&pages_mutex_);
   // Read the file.
+  set_total_pages(0);
+  set_memory_used(0);
+  int loaded_pages = 0;
+  pages_.truncate(0);
   TFile fp;
-  if (!fp.Open(document_name_, reader_)) return false;
-  memory_used_ = 0;
-  if (!pages_.DeSerialize(false, &fp)) {
-    tprintf("Deserialize failed: %s\n", document_name_.string());
-    pages_.truncate(0);
+  if (!fp.Open(document_name_, reader_) ||
+      !PointerVector<ImageData>::DeSerializeSize(false, &fp, &loaded_pages) ||
+      loaded_pages <= 0) {
+    tprintf("Deserialize header failed: %s\n", document_name_.string());
     return false;
   }
-  total_pages_ = pages_.size();
-  pages_offset_ %= total_pages_;
-  // Delete pages before the first one we want, and relocate the rest.
+  pages_offset_ %= loaded_pages;
+  // Skip pages before the first one we want, and load the rest until max
+  // memory and skip the rest after that.
   int page;
-  for (page = 0; page < pages_.size(); ++page) {
-    if (page < pages_offset_) {
-      delete pages_[page];
-      pages_[page] = NULL;
+  for (page = 0; page < loaded_pages; ++page) {
+    if (page < pages_offset_ ||
+        (max_memory_ > 0 && memory_used() > max_memory_)) {
+      if (!PointerVector<ImageData>::DeSerializeSkip(false, &fp)) break;
     } else {
-      ImageData* image_data = pages_[page];
-      if (max_memory_ > 0 && page > pages_offset_ &&
-          memory_used_ + image_data->MemoryUsed() > max_memory_)
-        break;  // Don't go over memory quota unless the first image.
+      if (!pages_.DeSerializeElement(false, &fp)) break;
+      ImageData* image_data = pages_.back();
       if (image_data->imagefilename().length() == 0) {
         image_data->set_imagefilename(document_name_);
         image_data->set_page_number(page);
       }
       image_data->set_language(lang_);
-      memory_used_ += image_data->MemoryUsed();
-      if (pages_offset_ != 0) {
-        pages_[page - pages_offset_] = image_data;
-        pages_[page] = NULL;
-      }
+      set_memory_used(memory_used() + image_data->MemoryUsed());
     }
   }
-  pages_.truncate(page - pages_offset_);
-  tprintf("Loaded %d/%d pages (%d-%d) of document %s\n",
-          pages_.size(), total_pages_, pages_offset_,
-          pages_offset_ + pages_.size(), document_name_.string());
+  if (page < loaded_pages) {
+    tprintf("Deserialize failed: %s read %d/%d pages\n",
+            document_name_.string(), page, loaded_pages);
+    pages_.truncate(0);
+  } else {
+    tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", pages_.size(),
+            loaded_pages, pages_offset_, pages_offset_ + pages_.size(),
+            document_name_.string());
+  }
+  set_total_pages(loaded_pages);
   return !pages_.empty();
 }
 
-// Adds the given page data to this document, counting up memory.
-void DocumentData::AddPageToDocument(ImageData* page) {
-  pages_.push_back(page);
-  memory_used_ += page->MemoryUsed();
-}
-
 // A collection of DocumentData that knows roughly how much memory it is using.
 DocumentCache::DocumentCache(inT64 max_memory)
-  : total_pages_(0), memory_used_(0), max_memory_(max_memory) {}
+    : num_pages_per_doc_(0), max_memory_(max_memory) {}
 DocumentCache::~DocumentCache() {}
 
 // Adds all the documents in the list of filenames, counting memory.
 // The reader is used to read the files.
 bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
-                                  const char* lang, FileReader reader) {
-  inT64 fair_share_memory = max_memory_ / filenames.size();
+                                  const char* lang,
+                                  CachingStrategy cache_strategy,
+                                  FileReader reader) {
+  cache_strategy_ = cache_strategy;
+  inT64 fair_share_memory = 0;
+  // In the round-robin case, each DocumentData handles restricting its content
+  // to its fair share of memory. In the sequential case, DocumentCache
+  // determines which DocumentDatas are held entirely in memory.
+  if (cache_strategy_ == CS_ROUND_ROBIN)
+    fair_share_memory = max_memory_ / filenames.size();
   for (int arg = 0; arg < filenames.size(); ++arg) {
     STRING filename = filenames[arg];
     DocumentData* document = new DocumentData(filename);
-    if (document->LoadDocument(filename.string(), lang, 0,
-                               fair_share_memory, reader)) {
-      AddToCache(document);
-    } else {
-      tprintf("Failed to load image %s!\n", filename.string());
-      delete document;
-    }
+    document->SetDocument(filename.string(), lang, fair_share_memory, reader);
+    AddToCache(document);
   }
-  tprintf("Loaded %d pages, total %gMB\n",
-          total_pages_, memory_used_ / 1048576.0);
-  return total_pages_ > 0;
+  if (!documents_.empty()) {
+    // Try to get the first page now to verify the list of filenames.
+    if (GetPageBySerial(0) != NULL) return true;
+    tprintf("Load of page 0 failed!\n");
+  }
+  return false;
 }
 
-// Adds document to the cache, throwing out other documents if needed.
+// Adds document to the cache.
 bool DocumentCache::AddToCache(DocumentData* data) {
   inT64 new_memory = data->memory_used();
-  memory_used_ += new_memory;
   documents_.push_back(data);
-  total_pages_ += data->NumPages();
-  // Delete the first item in the array, and other pages of the same name
-  // while memory is full.
-  while (memory_used_ >= max_memory_ && max_memory_ > 0) {
-    tprintf("Memory used=%lld vs max=%lld, discarding doc of size %lld\n",
-            memory_used_ , max_memory_, documents_[0]->memory_used());
-    memory_used_ -= documents_[0]->memory_used();
-    total_pages_ -= documents_[0]->NumPages();
-    documents_.remove(0);
-  }
   return true;
 }
 
@@ -488,11 +596,104 @@ DocumentData* DocumentCache::FindDocument(const STRING& document_name) const {
   return NULL;
 }
 
+// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
+// strategy, could take a long time.
+int DocumentCache::TotalPages() {
+  if (cache_strategy_ == CS_SEQUENTIAL) {
+    // In sequential mode, we assume each doc has the same number of pages
+    // whether it is true or not.
+    if (num_pages_per_doc_ == 0) GetPageSequential(0);
+    return num_pages_per_doc_ * documents_.size();
+  }
+  int total_pages = 0;
+  int num_docs = documents_.size();
+  for (int d = 0; d < num_docs; ++d) {
+    // We have to load a page to make NumPages() valid.
+    documents_[d]->GetPage(0);
+    total_pages += documents_[d]->NumPages();
+  }
+  return total_pages;
+}
+
 // Returns a page by serial number, selecting them in a round-robin fashion
-// from all the documents.
-const ImageData* DocumentCache::GetPageBySerial(int serial) {
-  int document_index = serial % documents_.size();
-  return documents_[document_index]->GetPage(serial / documents_.size());
+// from all the documents. Highly disk-intensive, but doesn't need samples
+// to be shuffled between files to begin with.
+const ImageData* DocumentCache::GetPageRoundRobin(int serial) {
+  int num_docs = documents_.size();
+  int doc_index = serial % num_docs;
+  const ImageData* doc = documents_[doc_index]->GetPage(serial / num_docs);
+  for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) {
+    doc_index = (serial + offset) % num_docs;
+    int page = (serial + offset) / num_docs;
+    documents_[doc_index]->LoadPageInBackground(page);
+  }
+  return doc;
+}
+
+// Returns a page by serial number, selecting them in sequence from each file.
+// Requires the samples to be shuffled between the files to give a random or
+// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
+const ImageData* DocumentCache::GetPageSequential(int serial) {
+  int num_docs = documents_.size();
+  ASSERT_HOST(num_docs > 0);
+  if (num_pages_per_doc_ == 0) {
+    // Use the pages in the first doc as the number of pages in each doc.
+    documents_[0]->GetPage(0);
+    num_pages_per_doc_ = documents_[0]->NumPages();
+    if (num_pages_per_doc_ == 0) {
+      tprintf("First document cannot be empty!!\n");
+      ASSERT_HOST(num_pages_per_doc_ > 0);
+    }
+    // Get rid of zero now if we don't need it.
+    if (serial / num_pages_per_doc_ % num_docs > 0) documents_[0]->UnCache();
+  }
+  int doc_index = serial / num_pages_per_doc_ % num_docs;
+  const ImageData* doc =
+      documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
+  // Count up total memory. Background loading makes it more complicated to
+  // keep a running count.
+  inT64 total_memory = 0;
+  for (int d = 0; d < num_docs; ++d) {
+    total_memory += documents_[d]->memory_used();
+  }
+  if (total_memory >= max_memory_) {
+    // Find something to un-cache.
+    // If there are more than 3 in front, then serial is from the back reader
+    // of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then
+    // we create a hole between them and then un-caching the backmost occupied
+    // will work for both.
+    int num_in_front = CountNeighbourDocs(doc_index, 1);
+    for (int offset = num_in_front - 2;
+         offset > 1 && total_memory >= max_memory_; --offset) {
+      int next_index = (doc_index + offset) % num_docs;
+      total_memory -= documents_[next_index]->UnCache();
+    }
+    // If that didn't work, the best solution is to un-cache from the back. If
+    // we take away the document that a 2nd reader is using, it will put it
+    // back and make a hole between.
+    int num_behind = CountNeighbourDocs(doc_index, -1);
+    for (int offset = num_behind; offset < 0 && total_memory >= max_memory_;
+         ++offset) {
+      int next_index = (doc_index + offset + num_docs) % num_docs;
+      total_memory -= documents_[next_index]->UnCache();
+    }
+  }
+  int next_index = (doc_index + 1) % num_docs;
+  if (!documents_[next_index]->IsCached() && total_memory < max_memory_) {
+    documents_[next_index]->LoadPageInBackground(0);
+  }
+  return doc;
+}
+
+// Helper counts the number of adjacent cached neighbours of index looking in
+// direction dir, ie index+dir, index+2*dir etc.
+int DocumentCache::CountNeighbourDocs(int index, int dir) {
+  int num_docs = documents_.size();
+  for (int offset = dir; abs(offset) < num_docs; offset += dir) {
+    int offset_index = (index + offset + num_docs) % num_docs;
+    if (!documents_[offset_index]->IsCached()) return offset - dir;
+  }
+  return num_docs;
 }
 
 }  // namespace tesseract.
diff --git a/ccstruct/imagedata.h b/ccstruct/imagedata.h
index 6321f121b1..ae6722934e 100644
--- a/ccstruct/imagedata.h
+++ b/ccstruct/imagedata.h
@@ -25,6 +25,7 @@
 #include "normalis.h"
 #include "rect.h"
 #include "strngs.h"
+#include "svutil.h"
 
 struct Pix;
 
@@ -34,8 +35,22 @@ namespace tesseract {
 const int kFeaturePadding = 2;
 // Number of pixels to pad around text boxes.
 const int kImagePadding = 4;
-// Number of training images to combine into a mini-batch for training.
-const int kNumPagesPerMiniBatch = 100;
+
+// Enum to determine the caching and data sequencing strategy.
+enum CachingStrategy {
+  // Reads all of one file before moving on to the next. Requires samples to be
+  // shuffled across files. Uses the count of samples in the first file as
+  // the count in all the files to achieve high-speed random access. As a
+  // consequence, if subsequent files are smaller, they get entries used more
+  // than once, and if subsequent files are larger, some entries are not used.
+  // Best for larger data sets that don't fit in memory.
+  CS_SEQUENTIAL,
+  // Reads one sample from each file in rotation. Does not require shuffled
+  // samples, but is extremely disk-intensive. Samples in smaller files also
+  // get used more often than samples in larger files.
+  // Best for smaller data sets that mostly fit in memory.
+  CS_ROUND_ROBIN,
+};
 
 class WordFeature {
  public:
@@ -103,6 +118,8 @@ class ImageData {
   // Reads from the given file. Returns false in case of error.
   // If swap is true, assumes a big/little-endian swap is needed.
   bool DeSerialize(bool swap, TFile* fp);
+  // As DeSerialize, but only seeks past the data - hence a static method.
+  static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
 
   // Other accessors.
   const STRING& imagefilename() const {
@@ -145,11 +162,12 @@ class ImageData {
   // Gets anything and everything with a non-NULL pointer, prescaled to a
   // given target_height (if 0, then the original image height), and aligned.
   // Also returns (if not NULL) the width and height of the scaled image.
-  // The return value is the scale factor that was applied to the image to
-  // achieve the target_height.
-  float PreScale(int target_height, Pix** pix,
-                 int* scaled_width, int* scaled_height,
-                 GenericVector<TBOX>* boxes) const;
+  // The return value is the scaled Pix, which must be pixDestroyed after use,
+  // and scale_factor (if not NULL) is set to the scale factor that was applied
+  // to the image to achieve the target_height.
+  Pix* PreScale(int target_height, int max_height, float* scale_factor,
+                int* scaled_width, int* scaled_height,
+                GenericVector<TBOX>* boxes) const;
 
   int MemoryUsed() const;
 
@@ -184,6 +202,8 @@ class ImageData {
 
 // A collection of ImageData that knows roughly how much memory it is using.
 class DocumentData {
+  friend void* ReCachePagesFunc(void* data);
+
  public:
   explicit DocumentData(const STRING& name);
   ~DocumentData();
@@ -192,6 +212,9 @@ class DocumentData {
   // is used to read the file.
   bool LoadDocument(const char* filename, const char* lang, int start_page,
                     inT64 max_memory, FileReader reader);
+  // Sets up the document, without actually loading it.
+  void SetDocument(const char* filename, const char* lang, inT64 max_memory,
+                   FileReader reader);
   // Writes all the pages to the given filename. Returns false on error.
   bool SaveDocument(const char* filename, FileWriter writer);
   bool SaveToBuffer(GenericVector<char>* buffer);
@@ -200,26 +223,62 @@ class DocumentData {
   void AddPageToDocument(ImageData* page);
 
   const STRING& document_name() const {
+    SVAutoLock lock(&general_mutex_);
     return document_name_;
   }
   int NumPages() const {
+    SVAutoLock lock(&general_mutex_);
     return total_pages_;
   }
   inT64 memory_used() const {
+    SVAutoLock lock(&general_mutex_);
     return memory_used_;
   }
+  // If the given index is not currently loaded, loads it using a separate
+  // thread. Note: there are 4 cases:
+  // Document uncached: IsCached() returns false, total_pages_ < 0.
+  // Required page is available: IsPageAvailable returns true. In this case,
+  // total_pages_ > 0 and
+  // pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
+  // Pages are loaded, but the required one is not.
+  // The requested page is being loaded by LoadPageInBackground. In this case,
+  // index == pages_offset_. Once the loading starts, the pages lock is held
+  // until it completes, at which point IsPageAvailable will unblock and return
+  // true.
+  void LoadPageInBackground(int index);
   // Returns a pointer to the page with the given index, modulo the total
-  // number of pages, recaching if needed.
+  // number of pages. Blocks until the background load is completed.
   const ImageData* GetPage(int index);
+  // Returns true if the requested page is available, and provides a pointer,
+  // which may be NULL if the document is empty. May block, even though it
+  // doesn't guarantee to return true.
+  bool IsPageAvailable(int index, ImageData** page);
   // Takes ownership of the given page index. The page is made NULL in *this.
   ImageData* TakePage(int index) {
+    SVAutoLock lock(&pages_mutex_);
     ImageData* page = pages_[index];
     pages_[index] = NULL;
     return page;
   }
+  // Returns true if the document is currently loaded or in the process of
+  // loading.
+  bool IsCached() const { return NumPages() >= 0; }
+  // Removes all pages from memory and frees the memory, but does not forget
+  // the document metadata. Returns the memory saved.
+  inT64 UnCache();
 
  private:
-  // Loads as many pages can fit in max_memory_ starting at index pages_offset_.
+  // Sets the value of total_pages_ behind a mutex.
+  void set_total_pages(int total) {
+    SVAutoLock lock(&general_mutex_);
+    total_pages_ = total;
+  }
+  void set_memory_used(inT64 memory_used) {
+    SVAutoLock lock(&general_mutex_);
+    memory_used_ = memory_used;
+  }
+  // Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
+  // starting at index pages_offset_.
   bool ReCachePages();
 
  private:
@@ -239,43 +298,77 @@ class DocumentData {
   inT64 max_memory_;
   // Saved reader from LoadDocument to allow re-caching.
   FileReader reader_;
+  // Mutex that protects pages_ and pages_offset_ against multiple parallel
+  // loads, and provides a wait for page.
+  SVMutex pages_mutex_;
+  // Mutex that protects other data members that callers want to access without
+  // waiting for a load operation.
+  mutable SVMutex general_mutex_;
 };
 
 // A collection of DocumentData that knows roughly how much memory it is using.
+// Note that while it supports background read-ahead, it assumes that a single
+// thread is accessing documents, ie it is not safe for multiple threads to
+// access different documents in parallel, as one may de-cache the other's
+// content.
 class DocumentCache {
  public:
   explicit DocumentCache(inT64 max_memory);
   ~DocumentCache();
 
+  // Deletes all existing documents from the cache.
+  void Clear() {
+    documents_.clear();
+    num_pages_per_doc_ = 0;
+  }
   // Adds all the documents in the list of filenames, counting memory.
   // The reader is used to read the files.
   bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang,
-                     FileReader reader);
+                     CachingStrategy cache_strategy, FileReader reader);
 
-  // Adds document to the cache, throwing out other documents if needed.
+  // Adds document to the cache.
   bool AddToCache(DocumentData* data);
 
   // Finds and returns a document by name.
   DocumentData* FindDocument(const STRING& document_name) const;
 
-  // Returns a page by serial number, selecting them in a round-robin fashion
-  // from all the documents.
-  const ImageData* GetPageBySerial(int serial);
+  // Returns a page by serial number using the current cache_strategy_ to
+  // determine the mapping from serial number to page.
+  const ImageData* GetPageBySerial(int serial) {
+    if (cache_strategy_ == CS_SEQUENTIAL)
+      return GetPageSequential(serial);
+    else
+      return GetPageRoundRobin(serial);
+  }
 
   const PointerVector<DocumentData>& documents() const {
     return documents_;
   }
-  int total_pages() const {
-    return total_pages_;
-  }
+  // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
+  // strategy, could take a long time.
+  int TotalPages();
 
  private:
+  // Returns a page by serial number, selecting them in a round-robin fashion
+  // from all the documents. Highly disk-intensive, but doesn't need samples
+  // to be shuffled between files to begin with.
+  const ImageData* GetPageRoundRobin(int serial);
+  // Returns a page by serial number, selecting them in sequence from each file.
+  // Requires the samples to be shuffled between the files to give a random or
+  // uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
+  const ImageData* GetPageSequential(int serial);
+
+  // Helper counts the number of adjacent cached neighbour documents_ of index
+  // looking in direction dir, ie index+dir, index+2*dir etc.
+  int CountNeighbourDocs(int index, int dir);
+
   // A group of pages that corresponds in some loose way to a document.
   PointerVector<DocumentData> documents_;
-  // Total of all pages.
-  int total_pages_;
-  // Total of all memory used by the cache.
-  inT64 memory_used_;
+  // Strategy to use for caching and serializing data samples.
+  CachingStrategy cache_strategy_;
+  // Number of pages in the first document, used as a divisor in
+  // GetPageSequential to determine the document index.
+  int num_pages_per_doc_;
   // Max memory allowed in this cache.
   inT64 max_memory_;
 };
diff --git a/ccstruct/matrix.h b/ccstruct/matrix.h
index e13ef31899..4b5b242a43 100644
--- a/ccstruct/matrix.h
+++ b/ccstruct/matrix.h
@@ -1,8 +1,12 @@
 /* -*-C-*-
  ******************************************************************************
+ * File:         matrix.h  (Formerly matrix.h)
+ * Description:  Generic 2-d array/matrix and banded triangular matrix class.
+ * Author:       Ray Smith
+ * TODO(rays) Separate from ratings matrix, which it also contains:
  *
- * File:        matrix.h  (Formerly matrix.h)
- * Description:  Ratings matrix code. (Used by associator)
+ * Descrition:   Ratings matrix class (specialization of banded matrix).
+ *               Segmentation search matrix of lists of BLOB_CHOICE.
  * Author:       Mark Seaman, OCR Technology
  * Created:      Wed May 16 13:22:06 1990
  * Modified:     Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt
@@ -25,9 +29,13 @@
 #ifndef TESSERACT_CCSTRUCT_MATRIX_H__
 #define TESSERACT_CCSTRUCT_MATRIX_H__
 
+#include <math.h>
 #include "kdpair.h"
+#include "points.h"
+#include "serialis.h"
 #include "unicharset.h"
 
+class BLOB_CHOICE;
 class BLOB_CHOICE_LIST;
 
 #define NOT_CLASSIFIED reinterpret_cast<BLOB_CHOICE_LIST*>(0)
@@ -44,34 +52,60 @@ class GENERIC_2D_ARRAY {
   // either pass the memory in, or allocate after by calling Resize().
   GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array)
     : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array)  {
+    size_allocated_ = dim1 * dim2;
   }
   // Original constructor for a full rectangular matrix DOES allocate memory
   // and initialize it to empty.
   GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty)
     : empty_(empty), dim1_(dim1), dim2_(dim2)  {
-    array_ = new T[dim1_ * dim2_];
-    for (int x = 0; x < dim1_; x++)
-      for (int y = 0; y < dim2_; y++)
-        this->put(x, y, empty_);
+    int new_size = dim1 * dim2;
+    array_ = new T[new_size];
+    size_allocated_ = new_size;
+    for (int i = 0; i < size_allocated_; ++i)
+      array_[i] = empty_;
+  }
+  // Default constructor for array allocation. Use Resize to set the size.
+  GENERIC_2D_ARRAY()
+    : array_(NULL), empty_(static_cast<T>(0)), dim1_(0), dim2_(0),
+      size_allocated_(0) {
+  }
+  GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY<T>& src)
+    : array_(NULL), empty_(static_cast<T>(0)), dim1_(0), dim2_(0),
+      size_allocated_(0) {
+    *this = src;
   }
   virtual ~GENERIC_2D_ARRAY() { delete[] array_; }
 
+  void operator=(const GENERIC_2D_ARRAY<T>& src) {
+    ResizeNoInit(src.dim1(), src.dim2());
+    memcpy(array_, src.array_, num_elements() * sizeof(array_[0]));
+  }
+
+  // Reallocate the array to the given size. Does not keep old data, but does
+  // not initialize the array either.
+  void ResizeNoInit(int size1, int size2) {
+    int new_size = size1 * size2;
+    if (new_size > size_allocated_) {
+      delete [] array_;
+      array_ = new T[new_size];
+      size_allocated_ = new_size;
+    }
+    dim1_ = size1;
+    dim2_ = size2;
+  }
+
   // Reallocate the array to the given size. Does not keep old data.
   void Resize(int size1, int size2, const T& empty) {
     empty_ = empty;
-    if (size1 != dim1_ || size2 != dim2_) {
-      dim1_ = size1;
-      dim2_ = size2;
-      delete [] array_;
-      array_ = new T[dim1_ * dim2_];
-    }
+    ResizeNoInit(size1, size2);
     Clear();
   }
 
   // Reallocate the array to the given size, keeping old data.
   void ResizeWithCopy(int size1, int size2) {
     if (size1 != dim1_ || size2 != dim2_) {
-      T* new_array = new T[size1 * size2];
+      int new_size = size1 * size2;
+      T* new_array = new T[new_size];
       for (int col = 0; col < size1; ++col) {
         for (int row = 0; row < size2; ++row) {
           int old_index = col * dim2() + row;
@@ -87,6 +121,7 @@ class GENERIC_2D_ARRAY {
       array_ = new_array;
       dim1_ = size1;
       dim2_ = size2;
+      size_allocated_ = new_size;
     }
   }
 
@@ -106,9 +141,16 @@ class GENERIC_2D_ARRAY {
     if (fwrite(array_, sizeof(*array_), size, fp) != size) return false;
     return true;
   }
+  bool Serialize(tesseract::TFile* fp) const {
+    if (!SerializeSize(fp)) return false;
+    if (fp->FWrite(&empty_, sizeof(empty_), 1) != 1) return false;
+    int size = num_elements();
+    if (fp->FWrite(array_, sizeof(*array_), size) != size) return false;
+    return true;
+  }
 
   // Reads from the given file. Returns false in case of error.
-  // Only works with bitwise-serializeable typ
+  // Only works with bitwise-serializeable types!
   // If swap is true, assumes a big/little-endian swap is needed.
   bool DeSerialize(bool swap, FILE* fp) {
     if (!DeSerializeSize(swap, fp)) return false;
@@ -122,6 +164,18 @@ class GENERIC_2D_ARRAY {
     }
     return true;
   }
+  bool DeSerialize(bool swap, tesseract::TFile* fp) {
+    if (!DeSerializeSize(swap, fp)) return false;
+    if (fp->FRead(&empty_, sizeof(empty_), 1) != 1) return false;
+    if (swap) ReverseN(&empty_, sizeof(empty_));
+    int size = num_elements();
+    if (fp->FRead(array_, sizeof(*array_), size) != size) return false;
+    if (swap) {
+      for (int i = 0; i < size; ++i)
+        ReverseN(&array_[i], sizeof(array_[i]));
+    }
+    return true;
+  }
 
   // Writes to the given file. Returns false in case of error.
   // Assumes a T::Serialize(FILE*) const function.
@@ -163,11 +217,17 @@ class GENERIC_2D_ARRAY {
   }
 
   // Put a list element into the matrix at a specific location.
+  void put(ICOORD pos, const T& thing) {
+    array_[this->index(pos.x(), pos.y())] = thing;
+  }
   void put(int column, int row, const T& thing) {
     array_[this->index(column, row)] = thing;
   }
 
   // Get the item at a specified location from the matrix.
+  T get(ICOORD pos) const {
+    return array_[this->index(pos.x(), pos.y())];
+  }
   T get(int column, int row) const {
     return array_[this->index(column, row)];
   }
@@ -187,6 +247,207 @@ class GENERIC_2D_ARRAY {
     return &array_[this->index(column, 0)];
   }
 
+  // Adds addend to *this, element-by-element.
+  void operator+=(const GENERIC_2D_ARRAY<T>& addend) {
+    if (dim2_ == addend.dim2_) {
+      // Faster if equal size in the major dimension.
+      int size = MIN(num_elements(), addend.num_elements());
+      for (int i = 0; i < size; ++i) {
+        array_[i] += addend.array_[i];
+      }
+    } else {
+      for (int x = 0; x < dim1_; x++) {
+        for (int y = 0; y < dim2_; y++) {
+          (*this)(x, y) += addend(x, y);
+        }
+      }
+    }
+  }
+  // Subtracts minuend from *this, element-by-element.
+  void operator-=(const GENERIC_2D_ARRAY<T>& minuend) {
+    if (dim2_ == minuend.dim2_) {
+      // Faster if equal size in the major dimension.
+      int size = MIN(num_elements(), minuend.num_elements());
+      for (int i = 0; i < size; ++i) {
+        array_[i] -= minuend.array_[i];
+      }
+    } else {
+      for (int x = 0; x < dim1_; x++) {
+        for (int y = 0; y < dim2_; y++) {
+          (*this)(x, y) -= minuend(x, y);
+        }
+      }
+    }
+  }
+  // Adds addend to all elements.
+  void operator+=(const T& addend) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] += addend;
+    }
+  }
+  // Multiplies *this by factor, element-by-element.
+  void operator*=(const T& factor) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] *= factor;
+    }
+  }
+  // Clips *this to the given range.
+  void Clip(const T& rangemin, const T& rangemax) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] = ClipToRange(array_[i], rangemin, rangemax);
+    }
+  }
+  // Returns true if all elements of *this are within the given range.
+  // Only uses operator<
+  bool WithinBounds(const T& rangemin, const T& rangemax) const {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      const T& value = array_[i];
+      if (value < rangemin || rangemax < value)
+        return false;
+    }
+    return true;
+  }
+  // Normalize the whole array.
+  double Normalize() {
+    int size = num_elements();
+    if (size <= 0) return 0.0;
+    // Compute the mean.
+    double mean = 0.0;
+    for (int i = 0; i < size; ++i) {
+      mean += array_[i];
+    }
+    mean /= size;
+    // Subtract the mean and compute the standard deviation.
+    double sd = 0.0;
+    for (int i = 0; i < size; ++i) {
+      double normed = array_[i] - mean;
+      array_[i] = normed;
+      sd += normed * normed;
+    }
+    sd = sqrt(sd / size);
+    if (sd > 0.0) {
+      // Divide by the sd.
+      for (int i = 0; i < size; ++i) {
+        array_[i] /= sd;
+      }
+    }
+    return sd;
+  }
+
+  // Returns the maximum value of the array.
+  T Max() const {
+    int size = num_elements();
+    if (size <= 0) return empty_;
+    // Compute the max.
+    T max_value = array_[0];
+    for (int i = 1; i < size; ++i) {
+      const T& value = array_[i];
+      if (value > max_value) max_value = value;
+    }
+    return max_value;
+  }
+
+  // Returns the maximum absolute value of the array.
+  T MaxAbs() const {
+    int size = num_elements();
+    if (size <= 0) return empty_;
+    // Compute the max.
+    T max_abs = static_cast<T>(0);
+    for (int i = 0; i < size; ++i) {
+      T value = static_cast<T>(fabs(array_[i]));
+      if (value > max_abs) max_abs = value;
+    }
+    return max_abs;
+  }
+
+  // Accumulates the element-wise sums of squares of src into *this.
+  void SumSquares(const GENERIC_2D_ARRAY<T>& src) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] += src.array_[i] * src.array_[i];
+    }
+  }
+
+  // Scales each element using the ada-grad algorithm, ie array_[i] by
+  // sqrt(num_samples/max(1,sqsum[i])).
+  void AdaGradScaling(const GENERIC_2D_ARRAY<T>& sqsum, int num_samples) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] *= sqrt(num_samples / MAX(1.0, sqsum.array_[i]));
+    }
+  }
+
+  void AssertFinite() const {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      ASSERT_HOST(isfinite(array_[i]));
+    }
+  }
+
+  // REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a
+  // num_dims-dimensional array/tensor with dimensions given by dims, (ordered
+  // from most significant to least significant, the same as standard C arrays)
+  // and moves src_dim to dest_dim, with the initial dest_dim and any dimensions
+  // in between shifted towards the hole left by src_dim. Example:
+  // Current data content: array_=[0, 1, 2, ....119]
+  //   perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]...
+  //   but the current dimensions are irrelevant.
+  // num_dims = 4, dims=[5, 4, 3, 2]
+  // src_dim=3, dest_dim=1
+  // tensor=[[[[0, 1][2, 3][4, 5]]
+  //          [[6, 7][8, 9][10, 11]]
+  //          [[12, 13][14, 15][16, 17]]
+  //          [[18, 19][20, 21][22, 23]]]
+  //         [[[24, 25]...
+  // output dims =[5, 2, 4, 3]
+  // output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]]
+  //                 [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]]
+  //                [[[24, 26, 28]...
+  // which is stored in the array_ as:
+  //   [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...]
+  // NOTE: the 2 stored matrix dimensions are simply copied from *this. To
+  // change the dimensions after the transpose, use ResizeNoInit.
+  // Higher dimensions above 2 are strictly the responsibility of the caller.
+  void RotatingTranspose(const int* dims, int num_dims, int src_dim,
+                         int dest_dim, GENERIC_2D_ARRAY<T>* result) const {
+    int max_d = MAX(src_dim, dest_dim);
+    int min_d = MIN(src_dim, dest_dim);
+    // In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the
+    // ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1]
+    // being contiguous blocks of data that will move together, and
+    // [d0, min_d -1] being replicas of the transpose operation.
+    // num_replicas represents the large dimensions unchanged by the operation.
+    // move_size represents the small dimensions unchanged by the operation.
+    // src_step represents the stride in the src between each adjacent group
+    // in the destination.
+    int num_replicas = 1, move_size = 1, src_step = 1;
+    for (int d = 0; d < min_d; ++d) num_replicas *= dims[d];
+    for (int d = max_d + 1; d < num_dims; ++d) move_size *= dims[d];
+    for (int d = src_dim + 1; d < num_dims; ++d) src_step *= dims[d];
+    if (src_dim > dest_dim) src_step *= dims[src_dim];
+    // wrap_size is the size of a single replica, being the amount that is
+    // handled num_replicas times.
+    int wrap_size = move_size;
+    for (int d = min_d; d <= max_d; ++d) wrap_size *= dims[d];
+    result->ResizeNoInit(dim1_, dim2_);
+    result->empty_ = empty_;
+    const T* src = array_;
+    T* dest = result->array_;
+    for (int replica = 0; replica < num_replicas; ++replica) {
+      for (int start = 0; start < src_step; start += move_size) {
+        for (int pos = start; pos < wrap_size; pos += src_step) {
+          memcpy(dest, src + pos, sizeof(*dest) * move_size);
+          dest += move_size;
+        }
+      }
+      src += wrap_size;
+    }
+  }
+
   // Delete objects pointed to by array_[i].
   void delete_matrix_pointers() {
     int size = num_elements();
@@ -206,6 +467,13 @@ class GENERIC_2D_ARRAY {
     if (fwrite(&size, sizeof(size), 1, fp) != 1) return false;
     return true;
   }
+  bool SerializeSize(tesseract::TFile* fp) const {
+    inT32 size = dim1_;
+    if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
+    size = dim2_;
+    if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
+    return true;
+  }
   // Factored helper to deserialize the size.
   // If swap is true, assumes a big/little-endian swap is needed.
   bool DeSerializeSize(bool swap, FILE* fp) {
@@ -219,11 +487,26 @@ class GENERIC_2D_ARRAY {
     Resize(size1, size2, empty_);
     return true;
   }
+  bool DeSerializeSize(bool swap, tesseract::TFile* fp) {
+    inT32 size1, size2;
+    if (fp->FRead(&size1, sizeof(size1), 1) != 1) return false;
+    if (fp->FRead(&size2, sizeof(size2), 1) != 1) return false;
+    if (swap) {
+      ReverseN(&size1, sizeof(size1));
+      ReverseN(&size2, sizeof(size2));
+    }
+    Resize(size1, size2, empty_);
+    return true;
+  }
 
   T* array_;
   T empty_;   // The unused cell.
   int dim1_;  // Size of the 1st dimension in indexing functions.
   int dim2_;  // Size of the 2nd dimension in indexing functions.
+  // The total size to which the array can be expanded before a realloc is
+  // needed. If Resize is used, memory is retained so it can be re-expanded
+  // without a further alloc, and this stores the allocated size.
+  int size_allocated_;
 };
 
 // A generic class to store a banded triangular matrix with entries of type T.
diff --git a/ccstruct/mod128.cpp b/ccstruct/mod128.cpp
index ee4aa6c3c6..4e5f4bd325 100644
--- a/ccstruct/mod128.cpp
+++ b/ccstruct/mod128.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        mod128.c  (Formerly dir128.c)
  * Description: Code to convert a DIR128 to an ICOORD.
- * Author:					Ray Smith
- * Created:					Tue Oct 22 11:56:09 BST 1991
+ * Author:          Ray Smith
+ * Created:         Tue Oct 22 11:56:09 BST 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -86,16 +86,3 @@ DIR128::DIR128(                 //from fcoord
   while (high - low > 1);
   dir = low;
 }
-
-
-/**********************************************************************
- * dir_to_gradient
- *
- * Convert a direction to a vector.
- **********************************************************************/
-
-#if 0 // code is buggy for negative dir and unused
-ICOORD DIR128::vector() const {  //convert to vector
-  return dirtab[dir];            //easy really
-}
-#endif
diff --git a/ccstruct/mod128.h b/ccstruct/mod128.h
index 592264ba62..c0e71a423c 100644
--- a/ccstruct/mod128.h
+++ b/ccstruct/mod128.h
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        mod128.h  (Formerly dir128.h)
  * Description: Header for class which implements modulo arithmetic.
- * Author:					Ray Smith
- * Created:					Tue Mar 26 17:48:13 GMT 1991
+ * Author:          Ray Smith
+ * Created:         Tue Mar 26 17:48:13 GMT 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -77,7 +77,6 @@ class DLLSYM DIR128
     inT8 get_dir() const {  //access function
       return dir;
     }
-    ICOORD vector() const;  //turn to vector
 
   private:
     inT8 dir;                    //a direction
diff --git a/ccstruct/otsuthr.cpp b/ccstruct/otsuthr.cpp
index 071b0d2345..8822ce2943 100644
--- a/ccstruct/otsuthr.cpp
+++ b/ccstruct/otsuthr.cpp
@@ -51,23 +51,16 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height,
 
   // only use opencl if compiled w/ OpenCL and selected device is opencl
 #ifdef USE_OPENCL
-    // all of channel 0 then all of channel 1...
-    int *histogramAllChannels = new int[kHistogramSize * num_channels];
+  // all of channel 0 then all of channel 1...
+  int* histogramAllChannels = new int[kHistogramSize * num_channels];
 
-    // Calculate Histogram on GPU
-    OpenclDevice od;
-    if (od.selectedDeviceIsOpenCL() &&
-        (num_channels == 1 || num_channels == 4) && top == 0 && left == 0 ) {
-      od.HistogramRectOCL(
-          (const unsigned char*)pixGetData(src_pix),
-          num_channels,
-          pixGetWpl(src_pix) * 4,
-          left,
-          top,
-          width,
-          height,
-          kHistogramSize,
-          histogramAllChannels);
+  // Calculate Histogram on GPU
+  OpenclDevice od;
+  if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) &&
+      top == 0 && left == 0) {
+    od.HistogramRectOCL((unsigned char*)pixGetData(src_pix), num_channels,
+                        pixGetWpl(src_pix) * 4, left, top, width, height,
+                        kHistogramSize, histogramAllChannels);
 
     // Calculate Threshold from Histogram on cpu
     for (int ch = 0; ch < num_channels; ++ch) {
@@ -143,7 +136,6 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height,
   delete[] histogramAllChannels;
 #endif  // USE_OPENCL
 
-
   if (!any_good_hivalue) {
     // Use the best of the ones that were not good enough.
     (*hi_values)[best_hi_index] = best_hi_value;
diff --git a/ccstruct/pageres.cpp b/ccstruct/pageres.cpp
index b66e5636ff..32635365e4 100644
--- a/ccstruct/pageres.cpp
+++ b/ccstruct/pageres.cpp
@@ -882,17 +882,17 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
     choice_it.add_after_then_move(choices[c]);
     ratings->put(c, c, choice_list);
   }
-  FakeWordFromRatings();
+  FakeWordFromRatings(TOP_CHOICE_PERM);
   reject_map.initialise(blob_count);
   done = true;
 }
 
 // Creates a WERD_CHOICE for the word using the top choices from the leading
 // diagonal of the ratings matrix.
-void WERD_RES::FakeWordFromRatings() {
+void WERD_RES::FakeWordFromRatings(PermuterType permuter) {
   int num_blobs = ratings->dimension();
   WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
-  word_choice->set_permuter(TOP_CHOICE_PERM);
+  word_choice->set_permuter(permuter);
   for (int b = 0; b < num_blobs; ++b) {
     UNICHAR_ID unichar_id = UNICHAR_SPACE;
     float rating = MAX_INT32;
@@ -1105,6 +1105,7 @@ void WERD_RES::InitNonPointers() {
   x_height = 0.0;
   caps_height = 0.0;
   baseline_shift = 0.0f;
+  space_certainty = 0.0f;
   guessed_x_ht = TRUE;
   guessed_caps_ht = TRUE;
   combination = FALSE;
diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h
index 7329bc89ea..33b9f4cb35 100644
--- a/ccstruct/pageres.h
+++ b/ccstruct/pageres.h
@@ -1,7 +1,7 @@
 /**********************************************************************
  * File:        pageres.h  (Formerly page_res.h)
  * Description: Results classes used by control.c
- * Author:		Phil Cheatle
+ * Author:    Phil Cheatle
  * Created:     Tue Sep 22 08:42:49 BST 1992
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
@@ -295,6 +295,9 @@ class WERD_RES : public ELIST_LINK {
   float x_height;              // post match estimate
   float caps_height;           // post match estimate
   float baseline_shift;        // post match estimate.
+  // Certainty score for the spaces either side of this word (LSTM mode).
+  // MIN this value with the actual word certainty.
+  float space_certainty;
 
   /*
     To deal with fuzzy spaces we need to be able to combine "words" to form
@@ -327,7 +330,7 @@ class WERD_RES : public ELIST_LINK {
   }
   // Deep copies everything except the ratings MATRIX.
   // To get that use deep_copy below.
-  WERD_RES(const WERD_RES &source) : ELIST_LINK(source) {
+  WERD_RES(const WERD_RES& source) : ELIST_LINK(source) {
     InitPointers();
     *this = source;            // see operator=
   }
@@ -590,7 +593,7 @@ class WERD_RES : public ELIST_LINK {
 
   // Creates a WERD_CHOICE for the word using the top choices from the leading
   // diagonal of the ratings matrix.
-  void FakeWordFromRatings();
+  void FakeWordFromRatings(PermuterType permuter);
 
   // Copies the best_choice strings to the correct_text for adaption/training.
   void BestChoiceToCorrectText();
@@ -630,7 +633,7 @@ class WERD_RES : public ELIST_LINK {
   static WERD_RES* deep_copy(const WERD_RES* src) {
     WERD_RES* result = new WERD_RES(*src);
     // That didn't copy the ratings, but we want a copy if there is one to
-    // begin width.
+    // begin with.
     if (src->ratings != NULL)
       result->ratings = src->ratings->DeepCopy();
     return result;
diff --git a/ccstruct/params_training_featdef.h b/ccstruct/params_training_featdef.h
index ff76480be0..6e021f0b30 100644
--- a/ccstruct/params_training_featdef.h
+++ b/ccstruct/params_training_featdef.h
@@ -126,7 +126,7 @@ typedef GenericVector<ParamsTrainingHypothesis> ParamsTrainingHypothesisList;
 // explored on PASS1, PASS2, fix xheight pass, etc).
 class ParamsTrainingBundle {
  public:
-  ParamsTrainingBundle() {};
+  ParamsTrainingBundle() {}
   // Starts a new hypothesis list.
   // Should be called at the beginning of a new run of the segmentation search.
   void StartHypothesisList() {
diff --git a/ccstruct/pdblock.h b/ccstruct/pdblock.h
index b64eff36d0..cf29b782cb 100644
--- a/ccstruct/pdblock.h
+++ b/ccstruct/pdblock.h
@@ -29,90 +29,74 @@ struct Pix;
 
 CLISTIZEH (PDBLK)
 ///page block
-class PDBLK
-{
+class PDBLK {
   friend class BLOCK_RECT_IT;    //< block iterator
 
-  public:
-    ///empty constructor
-    PDBLK() {
-      hand_poly = NULL;
-      index_ = 0;
-    }
-    ///simple constructor
-    PDBLK(inT16 xmin,  //< bottom left
-          inT16 ymin,
-          inT16 xmax,  //< top right
-          inT16 ymax);
-
-    ///set vertex lists
-    ///@param left list of left vertices
-    ///@param right list of right vertices
-    void set_sides(ICOORDELT_LIST *left,
-                   ICOORDELT_LIST *right);
-
-    ///destructor
-    ~PDBLK () {
-      if (hand_poly) delete hand_poly;
-    }
-
-    POLY_BLOCK *poly_block() const {
-      return hand_poly;
-    }
-    ///set the poly block
-    void set_poly_block(POLY_BLOCK *blk) {
-      hand_poly = blk;
-    }
-    ///get box
-    void bounding_box(ICOORD &bottom_left,        //bottom left
-                      ICOORD &top_right) const {  //topright
-      bottom_left = box.botleft ();
-      top_right = box.topright ();
-    }
-    ///get real box
-    const TBOX &bounding_box() const {
-      return box;
-    }
-
-    int index() const {
-      return index_;
-    }
-    void set_index(int value) {
-      index_ = value;
-    }
-
-    ///is pt inside block
-    BOOL8 contains(ICOORD pt);
-
-    /// reposition block
-    void move(const ICOORD vec);  // by vector
-
-    // Returns a binary Pix mask with a 1 pixel for every pixel within the
-    // block. Rotates the coordinate system by rerotation prior to rendering.
-    // If not NULL, mask_box is filled with the position box of the returned
-    // mask image.
-    Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box);
-
-    #ifndef GRAPHICS_DISABLED
-    ///draw histogram
-    ///@param window window to draw in
-    ///@param serial serial number
-    ///@param colour colour to draw in
-    void plot(ScrollView* window,
-              inT32 serial,
-              ScrollView::Color colour);
-    #endif  // GRAPHICS_DISABLED
-
-    ///assignment
-    ///@param source from this
-    PDBLK & operator= (const PDBLK & source);
-
-  protected:
-    POLY_BLOCK *hand_poly;       //< weird as well
-    ICOORDELT_LIST leftside;     //< left side vertices
-    ICOORDELT_LIST rightside;    //< right side vertices
-    TBOX box;                    //< bounding box
-    int index_;                  //< Serial number of this block.
+ public:
+  /// empty constructor
+  PDBLK() {
+    hand_poly = NULL;
+    index_ = 0;
+  }
+  /// simple constructor
+  PDBLK(inT16 xmin,  //< bottom left
+        inT16 ymin,
+        inT16 xmax,  //< top right
+        inT16 ymax);
+
+  /// set vertex lists
+  ///@param left list of left vertices
+  ///@param right list of right vertices
+  void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right);
+
+  /// destructor
+  ~PDBLK() { delete hand_poly; }
+
+  POLY_BLOCK *poly_block() const { return hand_poly; }
+  /// set the poly block
+  void set_poly_block(POLY_BLOCK *blk) { hand_poly = blk; }
+  /// get box
+  void bounding_box(ICOORD &bottom_left,        // bottom left
+                    ICOORD &top_right) const {  // topright
+    bottom_left = box.botleft();
+    top_right = box.topright();
+  }
+  /// get real box
+  const TBOX &bounding_box() const { return box; }
+
+  int index() const { return index_; }
+  void set_index(int value) { index_ = value; }
+
+  /// is pt inside block
+  BOOL8 contains(ICOORD pt);
+
+  /// reposition block
+  void move(const ICOORD vec);  // by vector
+
+  // Returns a binary Pix mask with a 1 pixel for every pixel within the
+  // block. Rotates the coordinate system by rerotation prior to rendering.
+  // If not NULL, mask_box is filled with the position box of the returned
+  // mask image.
+  Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box);
+
+#ifndef GRAPHICS_DISABLED
+  /// draw histogram
+  ///@param window window to draw in
+  ///@param serial serial number
+  ///@param colour colour to draw in
+  void plot(ScrollView *window, inT32 serial, ScrollView::Color colour);
+#endif  // GRAPHICS_DISABLED
+
+  /// assignment
+  ///@param source from this
+  PDBLK &operator=(const PDBLK &source);
+
+ protected:
+  POLY_BLOCK *hand_poly;     //< weird as well
+  ICOORDELT_LIST leftside;   //< left side vertices
+  ICOORDELT_LIST rightside;  //< right side vertices
+  TBOX box;                  //< bounding box
+  int index_;                //< Serial number of this block.
 };
 
 class DLLSYM BLOCK_RECT_IT       //rectangle iterator
diff --git a/ccstruct/polyaprx.cpp b/ccstruct/polyaprx.cpp
index 81b8500a0e..7597349543 100644
--- a/ccstruct/polyaprx.cpp
+++ b/ccstruct/polyaprx.cpp
@@ -214,7 +214,7 @@ EDGEPT edgepts[]                 //output is array
 void fix2(                //polygonal approx
           EDGEPT *start,  /*loop to approimate */
           int area) {
-  EDGEPT *edgept;                /*current point */
+  EDGEPT *edgept; /*current point */
   EDGEPT *edgept1;
   EDGEPT *loopstart;             /*modified start of loop */
   EDGEPT *linestart;             /*start of line segment */
diff --git a/ccstruct/polyblk.cpp b/ccstruct/polyblk.cpp
index e0a455905f..b5ca2e1212 100644
--- a/ccstruct/polyblk.cpp
+++ b/ccstruct/polyblk.cpp
@@ -1,7 +1,7 @@
 /**********************************************************************
  * File:        polyblk.c  (Formerly poly_block.c)
  * Description: Polygonal blocks
- * Author:					Sheelagh Lloyd?
+ * Author:          Sheelagh Lloyd?
  * Created:
  *
  * (C) Copyright 1993, Hewlett-Packard Ltd.
diff --git a/ccstruct/quspline.cpp b/ccstruct/quspline.cpp
index f50cfe50ee..82107e1e25 100644
--- a/ccstruct/quspline.cpp
+++ b/ccstruct/quspline.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        quspline.cpp  (Formerly qspline.c)
  * Description: Code for the QSPLINE class.
- * Author:	Ray Smith
- * Created:	Tue Oct 08 17:16:12 BST 1991
+ * Author:  Ray Smith
+ * Created: Tue Oct 08 17:16:12 BST 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/ccstruct/ratngs.h b/ccstruct/ratngs.h
index 31b27cfb37..2ee9c94a30 100644
--- a/ccstruct/ratngs.h
+++ b/ccstruct/ratngs.h
@@ -268,7 +268,7 @@ const char *ScriptPosToString(tesseract::ScriptPos script_pos);
 
 }  // namespace tesseract.
 
-class WERD_CHOICE : public ELIST_LINK {
+class TESS_API WERD_CHOICE : public ELIST_LINK {
  public:
   static const float kBadRating;
   static const char *permuter_name(uinT8 permuter);
@@ -288,7 +288,8 @@ class WERD_CHOICE : public ELIST_LINK {
                src_certainty, src_permuter);
   }
   WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
-  WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) {
+  WERD_CHOICE(const WERD_CHOICE &word)
+      : ELIST_LINK(word), unicharset_(word.unicharset_) {
     this->init(word.length());
     this->operator=(word);
   }
diff --git a/ccstruct/rect.cpp b/ccstruct/rect.cpp
index 22417485db..4a9fe00b34 100644
--- a/ccstruct/rect.cpp
+++ b/ccstruct/rect.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        rect.c  (Formerly box.c)
  * Description: Bounding box class definition.
- * Author:					Phil Cheatle
- * Created:					Wed Oct 16 15:18:45 BST 1991
+ * Author:          Phil Cheatle
+ * Created:         Wed Oct 16 15:18:45 BST 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -29,10 +29,10 @@
  *
  **********************************************************************/
 
-TBOX::TBOX(                   //constructor
-         const ICOORD pt1,  //one corner
-         const ICOORD pt2   //the other corner
-        ) {
+TBOX::TBOX(            // constructor
+    const ICOORD pt1,  // one corner
+    const ICOORD pt2   // the other corner
+    ) {
   if (pt1.x () <= pt2.x ()) {
     if (pt1.y () <= pt2.y ()) {
       bot_left = pt1;
diff --git a/ccstruct/rect.h b/ccstruct/rect.h
index d9b90642f4..f31247a1af 100644
--- a/ccstruct/rect.h
+++ b/ccstruct/rect.h
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        rect.h  (Formerly box.h)
  * Description: Bounding box class definition.
- * Author:					Phil Cheatle
- * Created:					Wed Oct 16 15:18:45 BST 1991
+ * Author:          Phil Cheatle
+ * Created:         Wed Oct 16 15:18:45 BST 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -307,9 +307,9 @@ class DLLSYM TBOX  {  // bounding box
  *
  **********************************************************************/
 
-inline TBOX::TBOX(               // constructor
-                const FCOORD pt  // floating centre
-               ) {
+inline TBOX::TBOX(   // constructor
+    const FCOORD pt  // floating centre
+    ) {
   bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ()));
   top_right = ICOORD ((inT16) ceil (pt.x ()), (inT16) ceil (pt.y ()));
 }
diff --git a/ccstruct/rejctmap.cpp b/ccstruct/rejctmap.cpp
index 9c9ff2e503..a2910675a8 100644
--- a/ccstruct/rejctmap.cpp
+++ b/ccstruct/rejctmap.cpp
@@ -267,7 +267,7 @@ void REJ::full_print(FILE *fp) {
 
 //The REJMAP class has been hacked to use alloc_struct instead of new [].
 //This is to reduce memory fragmentation only as it is rather kludgy.
-//alloc_struct by-passes the call to the contsructor of REJ on each
+//alloc_struct by-passes the call to the constructor of REJ on each
 //array element. Although the constructor is empty, the BITS16 members
 //do have a constructor which sets all the flags to 0. The memset
 //replaces this functionality.
diff --git a/ccstruct/rejctmap.h b/ccstruct/rejctmap.h
index d945dda1fa..009ba58a78 100644
--- a/ccstruct/rejctmap.h
+++ b/ccstruct/rejctmap.h
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        rejctmap.h  (Formerly rejmap.h)
  * Description: REJ and REJMAP class functions.
- * Author:		Phil Cheatle
- * Created:		Thu Jun  9 13:46:38 BST 1994
+ * Author:    Phil Cheatle
+ * Created:   Thu Jun  9 13:46:38 BST 1994
  *
  * (C) Copyright 1994, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -48,46 +48,45 @@ OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
 #include          "bits16.h"
 #include                   "params.h"
 
-enum REJ_FLAGS
-{
+enum REJ_FLAGS {
   /* Reject modes which are NEVER overridden */
-  R_TESS_FAILURE,                // PERM Tess didn't classify
-  R_SMALL_XHT,                   // PERM Xht too small
-  R_EDGE_CHAR,                   // PERM Too close to edge of image
-  R_1IL_CONFLICT,                // PERM 1Il confusion
-  R_POSTNN_1IL,                  // PERM 1Il unrejected by NN
-  R_REJ_CBLOB,                   // PERM Odd blob
-  R_MM_REJECT,                   // PERM Matrix match rejection (m's)
-  R_BAD_REPETITION,              // TEMP Repeated char which doesn't match trend
+  R_TESS_FAILURE,    // PERM Tess didn't classify
+  R_SMALL_XHT,       // PERM Xht too small
+  R_EDGE_CHAR,       // PERM Too close to edge of image
+  R_1IL_CONFLICT,    // PERM 1Il confusion
+  R_POSTNN_1IL,      // PERM 1Il unrejected by NN
+  R_REJ_CBLOB,       // PERM Odd blob
+  R_MM_REJECT,       // PERM Matrix match rejection (m's)
+  R_BAD_REPETITION,  // TEMP Repeated char which doesn't match trend
 
   /* Initial reject modes (pre NN_ACCEPT) */
-  R_POOR_MATCH,                  // TEMP Ray's original heuristic (Not used)
-  R_NOT_TESS_ACCEPTED,           // TEMP Tess didn't accept WERD
-  R_CONTAINS_BLANKS,             // TEMP Tess failed on other chs in WERD
-  R_BAD_PERMUTER,                // POTENTIAL Bad permuter for WERD
+  R_POOR_MATCH,         // TEMP Ray's original heuristic (Not used)
+  R_NOT_TESS_ACCEPTED,  // TEMP Tess didn't accept WERD
+  R_CONTAINS_BLANKS,    // TEMP Tess failed on other chs in WERD
+  R_BAD_PERMUTER,       // POTENTIAL Bad permuter for WERD
 
   /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
-  R_HYPHEN,                      // TEMP Post NN dodgy hyphen or full stop
-  R_DUBIOUS,                     // TEMP Post NN dodgy chars
-  R_NO_ALPHANUMS,                // TEMP No alphanumerics in word after NN
-  R_MOSTLY_REJ,                  // TEMP Most of word rejected so rej the rest
-  R_XHT_FIXUP,                   // TEMP Xht tests unsure
+  R_HYPHEN,        // TEMP Post NN dodgy hyphen or full stop
+  R_DUBIOUS,       // TEMP Post NN dodgy chars
+  R_NO_ALPHANUMS,  // TEMP No alphanumerics in word after NN
+  R_MOSTLY_REJ,    // TEMP Most of word rejected so rej the rest
+  R_XHT_FIXUP,     // TEMP Xht tests unsure
 
   /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
-  R_BAD_QUALITY,                 // TEMP Quality metrics bad for WERD
+  R_BAD_QUALITY,  // TEMP Quality metrics bad for WERD
 
   /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
-  R_DOC_REJ,                     // TEMP Document rejection
-  R_BLOCK_REJ,                   // TEMP Block rejection
-  R_ROW_REJ,                     // TEMP Row rejection
-  R_UNLV_REJ,                    // TEMP ~ turned to - or ^ turned to space
+  R_DOC_REJ,    // TEMP Document rejection
+  R_BLOCK_REJ,  // TEMP Block rejection
+  R_ROW_REJ,    // TEMP Row rejection
+  R_UNLV_REJ,   // TEMP ~ turned to - or ^ turned to space
 
   /* Accept modes which occur between the above rejection groups */
-  R_NN_ACCEPT,                   //NN acceptance
-  R_HYPHEN_ACCEPT,               //Hyphen acceptance
-  R_MM_ACCEPT,                   //Matrix match acceptance
-  R_QUALITY_ACCEPT,              //Accept word in good quality doc
-  R_MINIMAL_REJ_ACCEPT           //Accept EVERYTHING except tess failures
+  R_NN_ACCEPT,          // NN acceptance
+  R_HYPHEN_ACCEPT,      // Hyphen acceptance
+  R_MM_ACCEPT,          // Matrix match acceptance
+  R_QUALITY_ACCEPT,     // Accept word in good quality doc
+  R_MINIMAL_REJ_ACCEPT  // Accept EVERYTHING except tess failures
 };
 
 /* REJECT MAP VALUES */
diff --git a/ccstruct/statistc.cpp b/ccstruct/statistc.cpp
index 39d5edd180..8b1ba8c9a1 100644
--- a/ccstruct/statistc.cpp
+++ b/ccstruct/statistc.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        statistc.c  (Formerly stats.c)
  * Description: Simple statistical package for integer values.
- * Author:					Ray Smith
- * Created:					Mon Feb 04 16:56:05 GMT 1991
+ * Author:          Ray Smith
+ * Created:         Mon Feb 04 16:56:05 GMT 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -215,7 +215,6 @@ inT32 STATS::min_bucket() const {  // Find min
   return rangemin_ + min;
 }
 
-
 /**********************************************************************
  * STATS::max_bucket
  *
diff --git a/ccutil/ambigs.h b/ccutil/ambigs.h
index b278f9f39d..faab21989b 100644
--- a/ccutil/ambigs.h
+++ b/ccutil/ambigs.h
@@ -59,17 +59,18 @@ class UnicharIdArrayUtils {
   // less than length of array2, if any array1[i] is less than array2[i].
   // Returns 0 if the arrays are equal, 1 otherwise.
   // The function assumes that the arrays are terminated by INVALID_UNICHAR_ID.
-  static inline int compare(const UNICHAR_ID array1[],
-                            const UNICHAR_ID array2[]) {
-    const UNICHAR_ID *ptr1 = array1;
-    const UNICHAR_ID *ptr2 = array2;
-    while (*ptr1 != INVALID_UNICHAR_ID && *ptr2 != INVALID_UNICHAR_ID) {
-      if (*ptr1 != *ptr2) return *ptr1 < *ptr2 ? -1 : 1;
-      ++ptr1;
-      ++ptr2;
+  static inline int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2) {
+    for (;;) {
+      const UNICHAR_ID val1 = *ptr1++;
+      const UNICHAR_ID val2 = *ptr2++;
+      if (val1 != val2) {
+        if (val1 == INVALID_UNICHAR_ID) return -1;
+        if (val2 == INVALID_UNICHAR_ID) return 1;
+        if (val1 < val2) return -1;
+        return 1;
+      }
+      if (val1 == INVALID_UNICHAR_ID) return 0;
     }
-    if (*ptr1 == INVALID_UNICHAR_ID && *ptr2 == INVALID_UNICHAR_ID) return 0;
-    return *ptr1 == INVALID_UNICHAR_ID ? -1 : 1;
   }
 
   // Look uid in the vector of uids.  If found, the index of the matched
diff --git a/ccutil/bits16.h b/ccutil/bits16.h
index 352b48bee2..6bbec4c0f1 100644
--- a/ccutil/bits16.h
+++ b/ccutil/bits16.h
@@ -37,12 +37,12 @@ class DLLSYM BITS16
     void turn_on_bit(                  // flip specified bit
                      uinT8 bit_num) {  // bit to flip 0..7
       val = val | 01 << bit_num;
-    };
+    }
 
     void turn_off_bit(                  // flip specified bit
                       uinT8 bit_num) {  // bit to flip 0..7
       val = val & ~(01 << bit_num);
-    };
+    }
 
     void set_bit(                // flip specified bit
                  uinT8 bit_num,  // bit to flip 0..7
@@ -51,11 +51,11 @@ class DLLSYM BITS16
         val = val | 01 << bit_num;
       else
         val = val & ~(01 << bit_num);
-    };
+    }
 
     BOOL8 bit(                        // access bit
               uinT8 bit_num) const {  // bit to access
       return (val >> bit_num) & 01;
-    };
+    }
 };
 #endif
diff --git a/ccutil/ccutil.cpp b/ccutil/ccutil.cpp
index ecf2cb5e36..8f965bb8c1 100644
--- a/ccutil/ccutil.cpp
+++ b/ccutil/ccutil.cpp
@@ -1,5 +1,14 @@
 // Copyright 2008 Google Inc. All Rights Reserved.
 // Author: scharron@google.com (Samuel Charron)
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "ccutil.h"
 
diff --git a/ccutil/ccutil.h b/ccutil/ccutil.h
index 974ef7d225..faba0aef06 100644
--- a/ccutil/ccutil.h
+++ b/ccutil/ccutil.h
@@ -26,9 +26,7 @@
 #include "params.h"
 #include "unicharset.h"
 
-#ifdef _WIN32
-#include <windows.h>
-#else
+#ifndef _WIN32
 #include <pthread.h>
 #include <semaphore.h>
 #endif
@@ -51,7 +49,7 @@ class CCUtilMutex {
 };
 
 
-class CCUtil {
+class TESS_API CCUtil {
  public:
   CCUtil();
   virtual ~CCUtil();
diff --git a/ccutil/clst.cpp b/ccutil/clst.cpp
index fbbb561fad..52caadf38f 100644
--- a/ccutil/clst.cpp
+++ b/ccutil/clst.cpp
@@ -26,7 +26,7 @@
  **********************************************************************/
 
 /***********************************************************************
- *							CLIST::internal_deep_clear
+ *              CLIST::internal_deep_clear
  *
  *  Used by the "deep_clear" member function of derived list
  *  classes to destroy all the elements on the list.
@@ -56,9 +56,8 @@ void (*zapper) (void *)) {       //ptr to zapper functn
   }
 }
 
-
 /***********************************************************************
- *							CLIST::shallow_clear
+ *              CLIST::shallow_clear
  *
  *  Used by the destructor and the "shallow_clear" member function of derived
  *  list classes to destroy the list.
@@ -83,7 +82,7 @@ void CLIST::shallow_clear() {  //destroy all links
 }
 
 /***********************************************************************
- *							CLIST::assign_to_sublist
+ *              CLIST::assign_to_sublist
  *
  *  The list is set to a sublist of another list.  "This" list must be empty
  *  before this function is invoked.  The two iterators passed must refer to
@@ -107,9 +106,8 @@ void CLIST::assign_to_sublist(                           //to this list
   last = start_it->extract_sublist (end_it);
 }
 
-
 /***********************************************************************
- *							CLIST::length
+ *              CLIST::length
  *
  *  Return count of elements on list
  **********************************************************************/
@@ -123,9 +121,8 @@ inT32 CLIST::length() const {  //count elements
   return count;
 }
 
-
 /***********************************************************************
- *							CLIST::sort
+ *              CLIST::sort
  *
  *  Sort elements on list
  **********************************************************************/
@@ -239,7 +236,7 @@ void CLIST::set_subtract(int comparator(const void*, const void*),
  **********************************************************************/
 
 /***********************************************************************
- *							CLIST_ITERATOR::forward
+ *              CLIST_ITERATOR::forward
  *
  *  Move the iterator to the next element of the list.
  *  REMEMBER: ALL LISTS ARE CIRCULAR.
@@ -276,9 +273,8 @@ void *CLIST_ITERATOR::forward() {
   return current->data;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::data_relative
+ *              CLIST_ITERATOR::data_relative
  *
  *  Return the data pointer to the element "offset" elements from current.
  *  "offset" must not be less than -1.
@@ -312,9 +308,8 @@ void *CLIST_ITERATOR::data_relative(                //get data + or - ...
   return ptr->data;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::move_to_last()
+ *              CLIST_ITERATOR::move_to_last()
  *
  *  Move current so that it is set to the end of the list.
  *  Return data just in case anyone wants it.
@@ -336,9 +331,8 @@ void *CLIST_ITERATOR::move_to_last() {
     return current->data;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::exchange()
+ *              CLIST_ITERATOR::exchange()
  *
  *  Given another iterator, whose current element is a different element on
  *  the same list list OR an element of another list, exchange the two current
@@ -434,9 +428,8 @@ void CLIST_ITERATOR::exchange(                             //positions of 2 link
   other_it->current = old_current;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::extract_sublist()
+ *              CLIST_ITERATOR::extract_sublist()
  *
  *  This is a private member, used only by CLIST::assign_to_sublist.
  *  Given another iterator for the same list, extract the links from THIS to
@@ -478,7 +471,7 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist(                             //from
 
   temp_it.mark_cycle_pt ();
   do {                           //walk sublist
-    if (temp_it.cycled_list ())  //can't find end pt
+    if (temp_it.cycled_list())   // can't find end pt
       BAD_SUBLIST.error ("CLIST_ITERATOR.extract_sublist", ABORT, NULL);
 
     if (temp_it.at_last ()) {
diff --git a/ccutil/clst.h b/ccutil/clst.h
index a5a42a6e40..f93d75afcb 100644
--- a/ccutil/clst.h
+++ b/ccutil/clst.h
@@ -28,9 +28,9 @@
 class CLIST_ITERATOR;
 
 /**********************************************************************
- *							CLASS - CLIST_LINK
+ *              CLASS - CLIST_LINK
  *
- *							Generic link class for singly linked CONS cell lists
+ *              Generic link class for singly linked CONS cell lists
  *
  *  Note:  No destructor - elements are assumed to be destroyed EITHER after
  *  they have been extracted from a list OR by the CLIST destructor which
@@ -50,13 +50,13 @@ class DLLSYM CLIST_LINK
       data = next = NULL;
     }
 
-    CLIST_LINK(                       //copy constructor
-               const CLIST_LINK &) {  //don't copy link
+    CLIST_LINK(                // copy constructor
+        const CLIST_LINK &) {  // don't copy link
       data = next = NULL;
     }
 
-    void operator= (             //don't copy links
-    const CLIST_LINK &) {
+    void operator=(  // don't copy links
+        const CLIST_LINK &) {
       data = next = NULL;
     }
 };
@@ -89,8 +89,8 @@ class DLLSYM CLIST
     void internal_deep_clear (   //destroy all links
       void (*zapper) (void *));  //ptr to zapper functn
 
-    void shallow_clear();  //clear list but don't
-    //delete data elements
+    void shallow_clear();  // clear list but don't
+    // delete data elements
 
     bool empty() const {  //is list empty?
       return !last;
@@ -136,9 +136,10 @@ class DLLSYM CLIST
 };
 
 /***********************************************************************
- *							CLASS - CLIST_ITERATOR
+ *              CLASS - CLIST_ITERATOR
  *
- *							Generic iterator class for singly linked lists with embedded links
+ *              Generic iterator class for singly linked lists with embedded
+ *links
  **********************************************************************/
 
 class DLLSYM CLIST_ITERATOR
@@ -231,8 +232,8 @@ class DLLSYM CLIST_ITERATOR
 
     BOOL8 cycled_list();  //Completed a cycle?
 
-    void add_to_end(                  //add at end &
-                    void *new_data);  //don't move
+    void add_to_end(      // add at end &
+        void *new_data);  // don't move
 
     void exchange(                            //positions of 2 links
                   CLIST_ITERATOR *other_it);  //other iterator
@@ -246,7 +247,7 @@ class DLLSYM CLIST_ITERATOR
 };
 
 /***********************************************************************
- *							CLIST_ITERATOR::set_to_list
+ *              CLIST_ITERATOR::set_to_list
  *
  *  (Re-)initialise the iterator to point to the start of the list_to_iterate
  *  over.
@@ -270,9 +271,8 @@ inline void CLIST_ITERATOR::set_to_list(  //change list
   ex_current_was_cycle_pt = FALSE;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::CLIST_ITERATOR
+ *              CLIST_ITERATOR::CLIST_ITERATOR
  *
  *  CONSTRUCTOR - set iterator to specified list;
  **********************************************************************/
@@ -281,9 +281,8 @@ inline CLIST_ITERATOR::CLIST_ITERATOR(CLIST *list_to_iterate) {
   set_to_list(list_to_iterate);
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::add_after_then_move
+ *              CLIST_ITERATOR::add_after_then_move
  *
  *  Add a new element to the list after the current element and move the
  *  iterator to the new element.
@@ -329,9 +328,8 @@ inline void CLIST_ITERATOR::add_after_then_move(  // element to add
   current = new_element;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::add_after_stay_put
+ *              CLIST_ITERATOR::add_after_stay_put
  *
  *  Add a new element to the list after the current element but do not move
  *  the iterator to the new element.
@@ -380,9 +378,8 @@ inline void CLIST_ITERATOR::add_after_stay_put(  // element to add
   }
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::add_before_then_move
+ *              CLIST_ITERATOR::add_before_then_move
  *
  *  Add a new element to the list before the current element and move the
  *  iterator to the new element.
@@ -425,9 +422,8 @@ inline void CLIST_ITERATOR::add_before_then_move(  // element to add
   current = new_element;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::add_before_stay_put
+ *              CLIST_ITERATOR::add_before_stay_put
  *
  *  Add a new element to the list before the current element but don't move the
  *  iterator to the new element.
@@ -471,11 +467,11 @@ inline void CLIST_ITERATOR::add_before_stay_put(  // element to add
   }
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::add_list_after
+ *              CLIST_ITERATOR::add_list_after
  *
- *  Insert another list to this list after the current element but don't move the
+ *  Insert another list to this list after the current element but don't move
+ *the
  *  iterator.
  **********************************************************************/
 
@@ -518,9 +514,8 @@ inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) {
   }
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::add_list_before
+ *              CLIST_ITERATOR::add_list_before
  *
  *  Insert another list to this list before the current element. Move the
  *  iterator to the start of the inserted elements
@@ -563,9 +558,8 @@ inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) {
   }
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::extract
+ *              CLIST_ITERATOR::extract
  *
  *  Do extraction by removing current from the list, deleting the cons cell
  *  and returning the data to the caller, but NOT updating the iterator.  (So
@@ -606,9 +600,8 @@ inline void *CLIST_ITERATOR::extract() {
   return extracted_data;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::move_to_first()
+ *              CLIST_ITERATOR::move_to_first()
  *
  *  Move current so that it is set to the start of the list.
  *  Return data just in case anyone wants it.
@@ -626,9 +619,8 @@ inline void *CLIST_ITERATOR::move_to_first() {
   return current != NULL ? current->data : NULL;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::mark_cycle_pt()
+ *              CLIST_ITERATOR::mark_cycle_pt()
  *
  *  Remember the current location so that we can tell whether we've returned
  *  to this point later.
@@ -651,9 +643,8 @@ inline void CLIST_ITERATOR::mark_cycle_pt() {
   started_cycling = FALSE;
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::at_first()
+ *              CLIST_ITERATOR::at_first()
  *
  *  Are we at the start of the list?
  *
@@ -671,9 +662,8 @@ inline BOOL8 CLIST_ITERATOR::at_first() {
     !ex_current_was_last));      //first and last
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::at_last()
+ *              CLIST_ITERATOR::at_last()
  *
  *  Are we at the end of the list?
  *
@@ -691,9 +681,8 @@ inline BOOL8 CLIST_ITERATOR::at_last() {
     ex_current_was_last));       //first and last
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::cycled_list()
+ *              CLIST_ITERATOR::cycled_list()
  *
  *  Have we returned to the cycle_pt since it was set?
  *
@@ -709,9 +698,8 @@ inline BOOL8 CLIST_ITERATOR::cycled_list() {
 
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::length()
+ *              CLIST_ITERATOR::length()
  *
  *  Return the length of the list
  *
@@ -726,9 +714,8 @@ inline inT32 CLIST_ITERATOR::length() {
   return list->length ();
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::sort()
+ *              CLIST_ITERATOR::sort()
  *
  *  Sort the elements of the list, then reposition at the start.
  *
@@ -747,9 +734,8 @@ const void *, const void *)) {
   move_to_first();
 }
 
-
 /***********************************************************************
- *							CLIST_ITERATOR::add_to_end
+ *              CLIST_ITERATOR::add_to_end
  *
  *  Add a new element to the end of the list without moving the iterator.
  *  This is provided because a single linked list cannot move to the last as
@@ -811,7 +797,7 @@ The macro generates:
   - An element deletion function:      CLASSNAME##_c1_zapper
   - An element copier function:
               CLASSNAME##_c1_copier
-  - A CLIST subclass:		CLASSNAME##_CLIST
+  - A CLIST subclass:   CLASSNAME##_CLIST
   - A CLIST_ITERATOR subclass:
               CLASSNAME##_C_IT
 
@@ -830,114 +816,116 @@ CLISTIZEH is a concatenation of 3 fragments CLISTIZEH_A, CLISTIZEH_B and
 CLISTIZEH_C.
 ***********************************************************************/
 
-#define CLISTIZEH_A( CLASSNAME )												\
-																				\
-extern DLLSYM void			CLASSNAME##_c1_zapper(		/*delete a link*/		\
-void*						link);						/*link to delete*/		\
-																				\
-extern DLLSYM void*			CLASSNAME##_c1_copier(		/*deep copy a link*/	\
-void*						old_element);   /*source link */
-
-#define CLISTIZEH_B( CLASSNAME )												\
-																				\
-/***********************************************************************		\
-*							CLASS - CLASSNAME##_CLIST							\
-*																				\
-*							List class for class CLASSNAME						\
-*																				\
-**********************************************************************/			\
-																				\
-class DLLSYM				CLASSNAME##_CLIST : public CLIST					\
-{																				\
-public:																			\
-							CLASSNAME##_CLIST():CLIST() {}						\
-														/* constructor */		\
-																				\
-							CLASSNAME##_CLIST(	/* don't construct */			\
-	const CLASSNAME##_CLIST&)							/*by initial assign*/	\
-	{ DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_CLIST ),			\
-														ABORT, NULL ); }		\
-																				\
-void						deep_clear()				/* delete elements */	\
-	{ CLIST::internal_deep_clear( &CLASSNAME##_c1_zapper ); }					\
-																				\
-void						operator=(					/* prevent assign */	\
-	const CLASSNAME##_CLIST&)													\
-	{ DONT_ASSIGN_LISTS.error( QUOTE_IT( CLASSNAME##_CLIST ),					\
-											ABORT, NULL ); }
-
-#define CLISTIZEH_C( CLASSNAME )												\
-																				\
-};																				\
-																				\
-																				\
-																				\
-/***********************************************************************		\
-*							CLASS - CLASSNAME##_C_IT							\
-*																				\
-*							Iterator class for class CLASSNAME##_CLIST			\
-*																				\
-*  Note: We don't need to coerce pointers to member functions input				\
-*  parameters as these are automatically converted to the type of the base		\
-*  type. ("A ptr to a class may be converted to a pointer to a public base		\
-*  class of that class")														\
-**********************************************************************/			\
-																				\
-class DLLSYM				CLASSNAME##_C_IT : public CLIST_ITERATOR			\
-{																				\
-public:																			\
-							CLASSNAME##_C_IT():CLIST_ITERATOR(){}				\
-																				\
-							CLASSNAME##_C_IT(									\
-	CLASSNAME##_CLIST*		list):CLIST_ITERATOR(list){}						\
-																				\
-	CLASSNAME*			data()												\
-		{ return (CLASSNAME*) CLIST_ITERATOR::data(); }						\
-																				\
-	CLASSNAME*			data_relative(										\
-	inT8					offset)												\
-		{ return (CLASSNAME*) CLIST_ITERATOR::data_relative( offset ); }		\
-																				\
-	CLASSNAME*			forward()											\
-		{ return (CLASSNAME*) CLIST_ITERATOR::forward(); }					\
-																				\
-	CLASSNAME*			extract()											\
-		{ return (CLASSNAME*) CLIST_ITERATOR::extract(); }					\
-																				\
-	CLASSNAME*			move_to_first()										\
-		{ return (CLASSNAME*) CLIST_ITERATOR::move_to_first(); }				\
-																				\
-	CLASSNAME*			move_to_last()										\
-		{ return (CLASSNAME*) CLIST_ITERATOR::move_to_last(); }				\
-};
+#define CLISTIZEH_A(CLASSNAME)                                             \
+                                                                           \
+  extern DLLSYM void CLASSNAME##_c1_zapper(             /*delete a link*/  \
+                                           void *link); /*link to delete*/ \
+                                                                           \
+  extern DLLSYM void                                                       \
+      *CLASSNAME##_c1_copier(                    /*deep copy a link*/      \
+                             void *old_element); /*source link */
+
+#define CLISTIZEH_B(CLASSNAME)                                              \
+                                                                            \
+  /***********************************************************************  \
+  *             CLASS -                                                     \
+  *CLASSNAME##_CLIST                                                        \
+  *                                                                         \
+  *             List class for class                                        \
+  *CLASSNAME                                                                \
+  *                                                                         \
+  **********************************************************************/   \
+                                                                            \
+  class DLLSYM CLASSNAME##_CLIST : public CLIST {                           \
+   public:                                                                  \
+    CLASSNAME##_CLIST() : CLIST() {}                                        \
+    /* constructor */                                                       \
+                                                                            \
+    CLASSNAME##_CLIST(                           /* don't construct */      \
+                      const CLASSNAME##_CLIST &) /*by initial assign*/      \
+    {                                                                       \
+      DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, \
+                                        NULL);                              \
+    }                                                                       \
+                                                                            \
+    void deep_clear() /* delete elements */                                 \
+    {                                                                       \
+      CLIST::internal_deep_clear(&CLASSNAME##_c1_zapper);                   \
+    }                                                                       \
+                                                                            \
+    void operator=(/* prevent assign */                                     \
+                   const CLASSNAME##_CLIST &) {                             \
+      DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, NULL);    \
+    }
 
-#define CLISTIZEH( CLASSNAME )						\
-									\
-CLISTIZEH_A( CLASSNAME )						\
-									\
-CLISTIZEH_B( CLASSNAME )						\
-									\
-CLISTIZEH_C( CLASSNAME )
+#define CLISTIZEH_C(CLASSNAME)                                               \
+  }                                                                          \
+  ;                                                                          \
+                                                                             \
+  /***********************************************************************   \
+  *             CLASS - CLASSNAME##_C_IT                                     \
+  *                                                                          \
+  *             Iterator class for class CLASSNAME##_CLIST                   \
+  *                                                                          \
+  *  Note: We don't need to coerce pointers to member functions input        \
+  *  parameters as these are automatically converted to the type of the base \
+  *  type. ("A ptr to a class may be converted to a pointer to a public base \
+  *  class of that class")                                                   \
+  **********************************************************************/    \
+                                                                             \
+  class DLLSYM CLASSNAME##_C_IT : public CLIST_ITERATOR {                    \
+   public:                                                                   \
+    CLASSNAME##_C_IT() : CLIST_ITERATOR() {}                                 \
+                                                                             \
+    CLASSNAME##_C_IT(CLASSNAME##_CLIST *list) : CLIST_ITERATOR(list) {}      \
+                                                                             \
+    CLASSNAME *data() { return (CLASSNAME *)CLIST_ITERATOR::data(); }        \
+                                                                             \
+    CLASSNAME *data_relative(inT8 offset) {                                  \
+      return (CLASSNAME *)CLIST_ITERATOR::data_relative(offset);             \
+    }                                                                        \
+                                                                             \
+    CLASSNAME *forward() { return (CLASSNAME *)CLIST_ITERATOR::forward(); }  \
+                                                                             \
+    CLASSNAME *extract() { return (CLASSNAME *)CLIST_ITERATOR::extract(); }  \
+                                                                             \
+    CLASSNAME *move_to_first() {                                             \
+      return (CLASSNAME *)CLIST_ITERATOR::move_to_first();                   \
+    }                                                                        \
+                                                                             \
+    CLASSNAME *move_to_last() {                                              \
+      return (CLASSNAME *)CLIST_ITERATOR::move_to_last();                    \
+    }                                                                        \
+  };
+
+#define CLISTIZEH(CLASSNAME) \
+                             \
+  CLISTIZEH_A(CLASSNAME)     \
+                             \
+  CLISTIZEH_B(CLASSNAME)     \
+                             \
+  CLISTIZEH_C(CLASSNAME)
 
 /***********************************************************************
   CLISTIZE( CLASSNAME )  MACRO
 ***********************************************************************/
 
-#define CLISTIZE( CLASSNAME )													\
-																				\
-/***********************************************************************		\
-*							CLASSNAME##_c1_zapper								\
-*																				\
-*  A function which can delete a CLASSNAME element.  This is passed to the		\
-*  generic deep_clear list member function so that when a list is cleared the	\
-*  elements on the list are properly destroyed from the base class, even		\
-*  though we don't use a virtual destructor function.							\
-**********************************************************************/			\
-																				\
-DLLSYM void					CLASSNAME##_c1_zapper(		/*delete a link*/		\
-void*						link)						/*link to delete*/		\
-{																				\
-delete (CLASSNAME *) link;														\
-}																				\
+#define CLISTIZE(CLASSNAME)                                                  \
+                                                                             \
+  /***********************************************************************   \
+  *             CLASSNAME##_c1_zapper                                        \
+  *                                                                          \
+  *  A function which can delete a CLASSNAME element.  This is passed to the \
+  *  generic deep_clear list member function so that when a list is cleared  \
+  *the                                                                       \
+  *  elements on the list are properly destroyed from the base class, even   \
+  *  though we don't use a virtual destructor function.                      \
+  **********************************************************************/    \
+                                                                             \
+  DLLSYM void CLASSNAME##_c1_zapper(            /*delete a link*/            \
+                                    void *link) /*link to delete*/           \
+  {                                                                          \
+    delete (CLASSNAME *)link;                                                \
+  }
 
 #endif
diff --git a/ccutil/elst.cpp b/ccutil/elst.cpp
index 8ad999b5ba..2d2c9ad65d 100644
--- a/ccutil/elst.cpp
+++ b/ccutil/elst.cpp
@@ -26,7 +26,7 @@
  **********************************************************************/
 
 /***********************************************************************
- *							ELIST::internal_clear
+ *              ELIST::internal_clear
  *
  *  Used by the destructor and the "clear" member function of derived list
  *  classes to destroy all the elements on the list.
@@ -57,7 +57,7 @@ void (*zapper) (ELIST_LINK *)) {
 }
 
 /***********************************************************************
- *							ELIST::assign_to_sublist
+ *              ELIST::assign_to_sublist
  *
  *  The list is set to a sublist of another list.  "This" list must be empty
  *  before this function is invoked.  The two iterators passed must refer to
@@ -81,9 +81,8 @@ void ELIST::assign_to_sublist(                           //to this list
   last = start_it->extract_sublist (end_it);
 }
 
-
 /***********************************************************************
- *							ELIST::length
+ *              ELIST::length
  *
  *  Return count of elements on list
  **********************************************************************/
@@ -97,9 +96,8 @@ inT32 ELIST::length() const {  // count elements
   return count;
 }
 
-
 /***********************************************************************
- *							ELIST::sort
+ *              ELIST::sort
  *
  *  Sort elements on list
  *  NB If you don't like the const declarations in the comparator, coerce yours:
@@ -187,7 +185,7 @@ ELIST_LINK *ELIST::add_sorted_and_find(
  **********************************************************************/
 
 /***********************************************************************
- *							ELIST_ITERATOR::forward
+ *              ELIST_ITERATOR::forward
  *
  *  Move the iterator to the next element of the list.
  *  REMEMBER: ALL LISTS ARE CIRCULAR.
@@ -224,9 +222,8 @@ ELIST_LINK *ELIST_ITERATOR::forward() {
   return current;
 }
 
-
 /***********************************************************************
- *							ELIST_ITERATOR::data_relative
+ *              ELIST_ITERATOR::data_relative
  *
  *  Return the data pointer to the element "offset" elements from current.
  *  "offset" must not be less than -1.
@@ -260,9 +257,8 @@ ELIST_LINK *ELIST_ITERATOR::data_relative(                //get data + or - ...
   return ptr;
 }
 
-
 /***********************************************************************
- *							ELIST_ITERATOR::move_to_last()
+ *              ELIST_ITERATOR::move_to_last()
  *
  *  Move current so that it is set to the end of the list.
  *  Return data just in case anyone wants it.
@@ -281,9 +277,8 @@ ELIST_LINK *ELIST_ITERATOR::move_to_last() {
   return current;
 }
 
-
 /***********************************************************************
- *							ELIST_ITERATOR::exchange()
+ *              ELIST_ITERATOR::exchange()
  *
  *  Given another iterator, whose current element is a different element on
  *  the same list list OR an element of another list, exchange the two current
@@ -379,9 +374,8 @@ void ELIST_ITERATOR::exchange(                             //positions of 2 link
   other_it->current = old_current;
 }
 
-
 /***********************************************************************
- *							ELIST_ITERATOR::extract_sublist()
+ *              ELIST_ITERATOR::extract_sublist()
  *
  *  This is a private member, used only by ELIST::assign_to_sublist.
  *  Given another iterator for the same list, extract the links from THIS to
@@ -425,7 +419,7 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist(                             //from
 
   temp_it.mark_cycle_pt ();
   do {                           //walk sublist
-    if (temp_it.cycled_list ())  //can't find end pt
+    if (temp_it.cycled_list())   // can't find end pt
       BAD_SUBLIST.error ("ELIST_ITERATOR.extract_sublist", ABORT, NULL);
 
     if (temp_it.at_last ()) {
diff --git a/ccutil/elst.h b/ccutil/elst.h
index e239577419..d53a7c34f0 100644
--- a/ccutil/elst.h
+++ b/ccutil/elst.h
@@ -98,8 +98,8 @@ class DLLSYM ELIST_LINK
       next = NULL;
     }
 
-    void operator= (             //don't copy links
-    const ELIST_LINK &) {
+    void operator=(  // don't copy links
+        const ELIST_LINK &) {
       next = NULL;
     }
 };
@@ -273,8 +273,8 @@ class DLLSYM ELIST_ITERATOR
 
     bool cycled_list();  //Completed a cycle?
 
-    void add_to_end(                        //add at end &
-                    ELIST_LINK *new_link);  //don't move
+    void add_to_end(            // add at end &
+        ELIST_LINK *new_link);  // don't move
 
     void exchange(                            //positions of 2 links
                   ELIST_ITERATOR *other_it);  //other iterator
@@ -458,7 +458,6 @@ inline void ELIST_ITERATOR::add_before_then_move(  // element to add
   current = new_element;
 }
 
-
 /***********************************************************************
  *                          ELIST_ITERATOR::add_before_stay_put
  *
@@ -501,11 +500,11 @@ inline void ELIST_ITERATOR::add_before_stay_put(  // element to add
   }
 }
 
-
 /***********************************************************************
  *                          ELIST_ITERATOR::add_list_after
  *
- *  Insert another list to this list after the current element but don't move the
+ *  Insert another list to this list after the current element but don't move
+ *the
  *  iterator.
  **********************************************************************/
 
@@ -959,30 +958,29 @@ ELISTIZEH_C( CLASSNAME )
   ELISTIZE( CLASSNAME ) MACRO
 ***********************************************************************/
 
-#define ELISTIZE(CLASSNAME)                                                 \
-                                                                            \
-/***********************************************************************    \
-*                           CLASSNAME##_zapper                              \
-*                                                                           \
-*  A function which can delete a CLASSNAME element.  This is passed to the  \
-*  generic clear list member function so that when a list is cleared the    \
-*  elements on the list are properly destroyed from the base class, even    \
-*  though we don't use a virtual destructor function.                       \
-**********************************************************************/     \
-                                                                            \
-DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link) {                          \
-  delete reinterpret_cast<CLASSNAME*>(link);                                \
-}                                                                           \
-                                                                            \
-/* Become a deep copy of src_list*/                                         \
-void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST* src_list,          \
-               CLASSNAME* (*copier)(const CLASSNAME*)) {                    \
-                                                                            \
-  CLASSNAME##_IT from_it(const_cast<CLASSNAME##_LIST*>(src_list));          \
-  CLASSNAME##_IT to_it(this);                                               \
-                                                                            \
-  for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward())  \
-    to_it.add_after_then_move((*copier)(from_it.data()));                   \
-}
+#define ELISTIZE(CLASSNAME)                                                   \
+                                                                              \
+  /***********************************************************************    \
+  *                           CLASSNAME##_zapper                              \
+  *                                                                           \
+  *  A function which can delete a CLASSNAME element.  This is passed to the  \
+  *  generic clear list member function so that when a list is cleared the    \
+  *  elements on the list are properly destroyed from the base class, even    \
+  *  though we don't use a virtual destructor function.                       \
+  **********************************************************************/     \
+                                                                              \
+  DLLSYM void CLASSNAME##_zapper(ELIST_LINK *link) {                          \
+    delete reinterpret_cast<CLASSNAME *>(link);                               \
+  }                                                                           \
+                                                                              \
+  /* Become a deep copy of src_list*/                                         \
+  void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list,          \
+                                   CLASSNAME *(*copier)(const CLASSNAME *)) { \
+    CLASSNAME##_IT from_it(const_cast<CLASSNAME##_LIST *>(src_list));         \
+    CLASSNAME##_IT to_it(this);                                               \
+                                                                              \
+    for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward())  \
+      to_it.add_after_then_move((*copier)(from_it.data()));                   \
+  }
 
 #endif
diff --git a/ccutil/elst2.cpp b/ccutil/elst2.cpp
index 30cedec17b..0d4960ed49 100644
--- a/ccutil/elst2.cpp
+++ b/ccutil/elst2.cpp
@@ -27,7 +27,7 @@
  **********************************************************************/
 
 /***********************************************************************
- *							ELIST2::internal_clear
+ *              ELIST2::internal_clear
  *
  *  Used by the destructor and the "clear" member function of derived list
  *  classes to destroy all the elements on the list.
@@ -58,7 +58,7 @@ void (*zapper) (ELIST2_LINK *)) {
 }
 
 /***********************************************************************
- *							ELIST2::assign_to_sublist
+ *              ELIST2::assign_to_sublist
  *
  *  The list is set to a sublist of another list.  "This" list must be empty
  *  before this function is invoked.  The two iterators passed must refer to
@@ -82,9 +82,8 @@ void ELIST2::assign_to_sublist(                            //to this list
   last = start_it->extract_sublist (end_it);
 }
 
-
 /***********************************************************************
- *							ELIST2::length
+ *              ELIST2::length
  *
  *  Return count of elements on list
  **********************************************************************/
@@ -98,9 +97,8 @@ inT32 ELIST2::length() const {  // count elements
   return count;
 }
 
-
 /***********************************************************************
- *							ELIST2::sort
+ *              ELIST2::sort
  *
  *  Sort elements on list
  *  NB If you don't like the const declarations in the comparator, coerce yours:
@@ -180,7 +178,7 @@ void ELIST2::add_sorted(int comparator(const void*, const void*),
  **********************************************************************/
 
 /***********************************************************************
- *							ELIST2_ITERATOR::forward
+ *              ELIST2_ITERATOR::forward
  *
  *  Move the iterator to the next element of the list.
  *  REMEMBER: ALL LISTS ARE CIRCULAR.
@@ -218,9 +216,8 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() {
   return current;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::backward
+ *              ELIST2_ITERATOR::backward
  *
  *  Move the iterator to the previous element of the list.
  *  REMEMBER: ALL LISTS ARE CIRCULAR.
@@ -257,9 +254,8 @@ ELIST2_LINK *ELIST2_ITERATOR::backward() {
   return current;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::data_relative
+ *              ELIST2_ITERATOR::data_relative
  *
  *  Return the data pointer to the element "offset" elements from current.
  *  (This function can't be INLINEd because it contains a loop)
@@ -289,9 +285,8 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative(                //get data + or - ..
   return ptr;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::exchange()
+ *              ELIST2_ITERATOR::exchange()
  *
  *  Given another iterator, whose current element is a different element on
  *  the same list list OR an element of another list, exchange the two current
@@ -399,9 +394,8 @@ void ELIST2_ITERATOR::exchange(                              //positions of 2 li
   other_it->current = old_current;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::extract_sublist()
+ *              ELIST2_ITERATOR::extract_sublist()
  *
  *  This is a private member, used only by ELIST2::assign_to_sublist.
  *  Given another iterator for the same list, extract the links from THIS to
@@ -445,7 +439,7 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist(                              //fr
 
   temp_it.mark_cycle_pt ();
   do {                           //walk sublist
-    if (temp_it.cycled_list ())  //can't find end pt
+    if (temp_it.cycled_list())   // can't find end pt
       BAD_SUBLIST.error ("ELIST2_ITERATOR.extract_sublist", ABORT, NULL);
 
     if (temp_it.at_last ()) {
diff --git a/ccutil/elst2.h b/ccutil/elst2.h
index 364abd86bc..bf078fbd56 100644
--- a/ccutil/elst2.h
+++ b/ccutil/elst2.h
@@ -46,9 +46,9 @@ i)  The duplication in source does not affect the run time code size - the
 **********************************************************************/
 
 /**********************************************************************
- *							CLASS - ELIST2_LINK
+ *              CLASS - ELIST2_LINK
  *
- *							Generic link class for doubly linked lists with embedded links
+ *              Generic link class for doubly linked lists with embedded links
  *
  *  Note:  No destructor - elements are assumed to be destroyed EITHER after
  *  they have been extracted from a list OR by the ELIST2 destructor which
@@ -68,13 +68,13 @@ class DLLSYM ELIST2_LINK
       prev = next = NULL;
     }
 
-    ELIST2_LINK(                        //copy constructor
-                const ELIST2_LINK &) {  //don't copy link
+    ELIST2_LINK(                // copy constructor
+        const ELIST2_LINK &) {  // don't copy link
       prev = next = NULL;
     }
 
-    void operator= (             //don't copy links
-    const ELIST2_LINK &) {
+    void operator=(  // don't copy links
+        const ELIST2_LINK &) {
       prev = next = NULL;
     }
 };
@@ -142,9 +142,10 @@ class DLLSYM ELIST2
 };
 
 /***********************************************************************
- *							CLASS - ELIST2_ITERATOR
+ *              CLASS - ELIST2_ITERATOR
  *
- *							Generic iterator class for doubly linked lists with embedded links
+ *              Generic iterator class for doubly linked lists with embedded
+ *links
  **********************************************************************/
 
 class DLLSYM ELIST2_ITERATOR
@@ -240,8 +241,8 @@ class DLLSYM ELIST2_ITERATOR
 
     BOOL8 cycled_list();  //Completed a cycle?
 
-    void add_to_end(                         //add at end &
-                    ELIST2_LINK *new_link);  //don't move
+    void add_to_end(             // add at end &
+        ELIST2_LINK *new_link);  // don't move
 
     void exchange(                             //positions of 2 links
                   ELIST2_ITERATOR *other_it);  //other iterator
@@ -255,7 +256,7 @@ class DLLSYM ELIST2_ITERATOR
 };
 
 /***********************************************************************
- *							ELIST2_ITERATOR::set_to_list
+ *              ELIST2_ITERATOR::set_to_list
  *
  *  (Re-)initialise the iterator to point to the start of the list_to_iterate
  *  over.
@@ -279,9 +280,8 @@ inline void ELIST2_ITERATOR::set_to_list(  //change list
   ex_current_was_cycle_pt = FALSE;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::ELIST2_ITERATOR
+ *              ELIST2_ITERATOR::ELIST2_ITERATOR
  *
  *  CONSTRUCTOR - set iterator to specified list;
  **********************************************************************/
@@ -290,9 +290,8 @@ inline ELIST2_ITERATOR::ELIST2_ITERATOR(ELIST2 *list_to_iterate) {
   set_to_list(list_to_iterate);
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::add_after_then_move
+ *              ELIST2_ITERATOR::add_after_then_move
  *
  *  Add a new element to the list after the current element and move the
  *  iterator to the new element.
@@ -339,9 +338,8 @@ inline void ELIST2_ITERATOR::add_after_then_move(  // element to add
   current = new_element;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::add_after_stay_put
+ *              ELIST2_ITERATOR::add_after_stay_put
  *
  *  Add a new element to the list after the current element but do not move
  *  the iterator to the new element.
@@ -391,9 +389,8 @@ inline void ELIST2_ITERATOR::add_after_stay_put(  // element to add
   }
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::add_before_then_move
+ *              ELIST2_ITERATOR::add_before_then_move
  *
  *  Add a new element to the list before the current element and move the
  *  iterator to the new element.
@@ -438,9 +435,8 @@ inline void ELIST2_ITERATOR::add_before_then_move(  // element to add
   current = new_element;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::add_before_stay_put
+ *              ELIST2_ITERATOR::add_before_stay_put
  *
  *  Add a new element to the list before the current element but don't move the
  *  iterator to the new element.
@@ -486,11 +482,11 @@ inline void ELIST2_ITERATOR::add_before_stay_put(  // element to add
   }
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::add_list_after
+ *              ELIST2_ITERATOR::add_list_after
  *
- *  Insert another list to this list after the current element but don't move the
+ *  Insert another list to this list after the current element but don't move
+ *the
  *  iterator.
  **********************************************************************/
 
@@ -537,9 +533,8 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) {
   }
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::add_list_before
+ *              ELIST2_ITERATOR::add_list_before
  *
  *  Insert another list to this list before the current element. Move the
  *  iterator to the start of the inserted elements
@@ -586,9 +581,8 @@ inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) {
   }
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::extract
+ *              ELIST2_ITERATOR::extract
  *
  *  Do extraction by removing current from the list, returning it to the
  *  caller, but NOT updating the iterator.  (So that any calling loop can do
@@ -631,9 +625,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() {
   return extracted_link;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::move_to_first()
+ *              ELIST2_ITERATOR::move_to_first()
  *
  *  Move current so that it is set to the start of the list.
  *  Return data just in case anyone wants it.
@@ -651,9 +644,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() {
   return current;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::move_to_last()
+ *              ELIST2_ITERATOR::move_to_last()
  *
  *  Move current so that it is set to the end of the list.
  *  Return data just in case anyone wants it.
@@ -671,9 +663,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() {
   return current;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::mark_cycle_pt()
+ *              ELIST2_ITERATOR::mark_cycle_pt()
  *
  *  Remember the current location so that we can tell whether we've returned
  *  to this point later.
@@ -696,9 +687,8 @@ inline void ELIST2_ITERATOR::mark_cycle_pt() {
   started_cycling = FALSE;
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::at_first()
+ *              ELIST2_ITERATOR::at_first()
  *
  *  Are we at the start of the list?
  *
@@ -716,9 +706,8 @@ inline BOOL8 ELIST2_ITERATOR::at_first() {
     !ex_current_was_last));      //first and last
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::at_last()
+ *              ELIST2_ITERATOR::at_last()
  *
  *  Are we at the end of the list?
  *
@@ -736,9 +725,8 @@ inline BOOL8 ELIST2_ITERATOR::at_last() {
     ex_current_was_last));       //first and last
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::cycled_list()
+ *              ELIST2_ITERATOR::cycled_list()
  *
  *  Have we returned to the cycle_pt since it was set?
  *
@@ -754,9 +742,8 @@ inline BOOL8 ELIST2_ITERATOR::cycled_list() {
 
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::length()
+ *              ELIST2_ITERATOR::length()
  *
  *  Return the length of the list
  *
@@ -771,9 +758,8 @@ inline inT32 ELIST2_ITERATOR::length() {
   return list->length ();
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::sort()
+ *              ELIST2_ITERATOR::sort()
  *
  *  Sort the elements of the list, then reposition at the start.
  *
@@ -792,9 +778,8 @@ const void *, const void *)) {
   move_to_first();
 }
 
-
 /***********************************************************************
- *							ELIST2_ITERATOR::add_to_end
+ *              ELIST2_ITERATOR::add_to_end
  *
  *  Add a new element to the end of the list without moving the iterator.
  *  This is provided because a single linked list cannot move to the last as
@@ -854,7 +839,7 @@ will NOT work correctly for classes derived from this.
 
 The macro generates:
   - An element deletion function:      CLASSNAME##_zapper
-  - An E_LIST2 subclass:	CLASSNAME##_LIST
+  - An E_LIST2 subclass:  CLASSNAME##_LIST
   - An E_LIST2_ITERATOR subclass:
               CLASSNAME##_IT
 
@@ -873,132 +858,132 @@ ELIST2IZEH is a concatenation of 3 fragments ELIST2IZEH_A, ELIST2IZEH_B and
 ELIST2IZEH_C.
 ***********************************************************************/
 
-#define ELIST2IZEH_A( CLASSNAME )													\
-																										\
-extern DLLSYM void			CLASSNAME##_zapper(			/*delete a link*/		\
-ELIST2_LINK*				link);						/*link to delete*/
-
-#define ELIST2IZEH_B( CLASSNAME )													\
-																										\
-/***********************************************************************		\
-*							CLASS - CLASSNAME##_LIST																	\
-*																										\
-*							List class for class CLASSNAME															\
-*																										\
-**********************************************************************/			\
-																										\
-class DLLSYM				CLASSNAME##_LIST : public ELIST2							\
-{																										\
-public:																								\
-							CLASSNAME##_LIST():ELIST2() {} \
-														/* constructor */		\
-																										\
-							CLASSNAME##_LIST(			/* don't construct */ \
-	const CLASSNAME##_LIST&)							/*by initial assign*/\
-	{ DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_LIST ),      \
-														ABORT, NULL ); }							\
-																										\
-void						clear()						/* delete elements */\
-	{ ELIST2::internal_clear( &CLASSNAME##_zapper ); }								\
-																										\
-									~CLASSNAME##_LIST()	/* destructor */		\
-	{ clear(); }																				\
-\
-/* Become a deep copy of src_list*/ \
-void deep_copy(const CLASSNAME##_LIST* src_list, \
-               CLASSNAME* (*copier)(const CLASSNAME*)); \
-\
-void						operator=(					/* prevent assign */	\
-	const CLASSNAME##_LIST&)																\
-	{ DONT_ASSIGN_LISTS.error( QUOTE_IT( CLASSNAME##_LIST ),						\
-											ABORT, NULL ); }
-
-#define ELIST2IZEH_C( CLASSNAME )													\
-};																										\
-																										\
-																										\
-																										\
-/***********************************************************************		\
-*							CLASS - CLASSNAME##_IT																		\
-*																										\
-*							Iterator class for class CLASSNAME##_LIST											\
-*																										\
-*  Note: We don't need to coerce pointers to member functions input				\
-*  parameters as these are automatically converted to the type of the base		\
-*  type. ("A ptr to a class may be converted to a pointer to a public base		\
-*  class of that class")																		\
-**********************************************************************/			\
-																										\
-class DLLSYM				CLASSNAME##_IT : public ELIST2_ITERATOR				\
-{																										\
-public:																								\
-								CLASSNAME##_IT():ELIST2_ITERATOR(){}					\
-																										\
-								CLASSNAME##_IT(												\
-CLASSNAME##_LIST*			list):ELIST2_ITERATOR(list){}								\
-																										\
-	CLASSNAME*			data()															\
-		{ return (CLASSNAME*) ELIST2_ITERATOR::data(); }								\
-																										\
-	CLASSNAME*			data_relative(													\
-	inT8					offset)															\
-		{ return (CLASSNAME*) ELIST2_ITERATOR::data_relative( offset ); }		\
-																										\
-	CLASSNAME*			forward()														\
-		{ return (CLASSNAME*) ELIST2_ITERATOR::forward(); }							\
-																										\
-	CLASSNAME*			backward()														\
-		{ return (CLASSNAME*) ELIST2_ITERATOR::backward(); }						\
-																										\
-	CLASSNAME*			extract()														\
-		{ return (CLASSNAME*) ELIST2_ITERATOR::extract(); }							\
-																										\
-	CLASSNAME*			move_to_first()												\
-		{ return (CLASSNAME*) ELIST2_ITERATOR::move_to_first(); }					\
-																										\
-	CLASSNAME*			move_to_last()													\
-		{ return (CLASSNAME*) ELIST2_ITERATOR::move_to_last(); }					\
-};
-
-#define ELIST2IZEH( CLASSNAME )														\
-																										\
-ELIST2IZEH_A( CLASSNAME )																		\
-																										\
-ELIST2IZEH_B( CLASSNAME )																		\
-																										\
-ELIST2IZEH_C( CLASSNAME )
+#define ELIST2IZEH_A(CLASSNAME)                                               \
+                                                                              \
+  extern DLLSYM void CLASSNAME##_zapper(                    /*delete a link*/ \
+                                        ELIST2_LINK *link); /*link to delete*/
+
+#define ELIST2IZEH_B(CLASSNAME)                                            \
+                                                                           \
+  /*********************************************************************** \
+  *             CLASS -                                                    \
+  *CLASSNAME##_LIST                                                        \
+  *                                                                        \
+  *             List class for class                                       \
+  *CLASSNAME                                                               \
+  *                                                                        \
+  **********************************************************************/  \
+                                                                           \
+  class DLLSYM CLASSNAME##_LIST : public ELIST2 {                          \
+   public:                                                                 \
+    CLASSNAME##_LIST() : ELIST2() {}                                       \
+    /* constructor */                                                      \
+                                                                           \
+    CLASSNAME##_LIST(                          /* don't construct */       \
+                     const CLASSNAME##_LIST &) /*by initial assign*/       \
+    {                                                                      \
+      DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, \
+                                        NULL);                             \
+    }                                                                      \
+                                                                           \
+    void clear() /* delete elements */                                     \
+    {                                                                      \
+      ELIST2::internal_clear(&CLASSNAME##_zapper);                         \
+    }                                                                      \
+                                                                           \
+    ~CLASSNAME##_LIST() /* destructor */                                   \
+    {                                                                      \
+      clear();                                                             \
+    }                                                                      \
+                                                                           \
+    /* Become a deep copy of src_list*/                                    \
+    void deep_copy(const CLASSNAME##_LIST *src_list,                       \
+                   CLASSNAME *(*copier)(const CLASSNAME *));               \
+                                                                           \
+    void operator=(/* prevent assign */                                    \
+                   const CLASSNAME##_LIST &) {                             \
+      DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, NULL);    \
+    }
 
+#define ELIST2IZEH_C(CLASSNAME)                                                \
+  }                                                                            \
+  ;                                                                            \
+                                                                               \
+  /***********************************************************************     \
+  *             CLASS - CLASSNAME##_IT                                         \
+  *                                                                            \
+  *             Iterator class for class CLASSNAME##_LIST                      \
+  *                                                                            \
+  *  Note: We don't need to coerce pointers to member functions input          \
+  *  parameters as these are automatically converted to the type of the base   \
+  *  type. ("A ptr to a class may be converted to a pointer to a public base   \
+  *  class of that class")                                                     \
+  **********************************************************************/      \
+                                                                               \
+  class DLLSYM CLASSNAME##_IT : public ELIST2_ITERATOR {                       \
+   public:                                                                     \
+    CLASSNAME##_IT() : ELIST2_ITERATOR() {}                                    \
+                                                                               \
+    CLASSNAME##_IT(CLASSNAME##_LIST *list) : ELIST2_ITERATOR(list) {}          \
+                                                                               \
+    CLASSNAME *data() { return (CLASSNAME *)ELIST2_ITERATOR::data(); }         \
+                                                                               \
+    CLASSNAME *data_relative(inT8 offset) {                                    \
+      return (CLASSNAME *)ELIST2_ITERATOR::data_relative(offset);              \
+    }                                                                          \
+                                                                               \
+    CLASSNAME *forward() { return (CLASSNAME *)ELIST2_ITERATOR::forward(); }   \
+                                                                               \
+    CLASSNAME *backward() { return (CLASSNAME *)ELIST2_ITERATOR::backward(); } \
+                                                                               \
+    CLASSNAME *extract() { return (CLASSNAME *)ELIST2_ITERATOR::extract(); }   \
+                                                                               \
+    CLASSNAME *move_to_first() {                                               \
+      return (CLASSNAME *)ELIST2_ITERATOR::move_to_first();                    \
+    }                                                                          \
+                                                                               \
+    CLASSNAME *move_to_last() {                                                \
+      return (CLASSNAME *)ELIST2_ITERATOR::move_to_last();                     \
+    }                                                                          \
+  };
+
+#define ELIST2IZEH(CLASSNAME) \
+                              \
+  ELIST2IZEH_A(CLASSNAME)     \
+                              \
+  ELIST2IZEH_B(CLASSNAME)     \
+                              \
+  ELIST2IZEH_C(CLASSNAME)
 
 /***********************************************************************
   ELIST2IZE( CLASSNAME ) MACRO
 ***********************************************************************/
 
-#define ELIST2IZE( CLASSNAME )                                                \
-																										\
-/***********************************************************************		\
-*							CLASSNAME##_zapper																			\
-*																										\
-*  A function which can delete a CLASSNAME element.  This is passed to the		\
-*  generic clear list member function so that when a list is cleared the		\
-*  elements on the list are properly destroyed from the base class, even		\
-*  though we don't use a virtual destructor function.									\
-**********************************************************************/			\
-																										\
-DLLSYM void					CLASSNAME##_zapper(			/*delete a link*/		\
-ELIST2_LINK*				link)						/*link to delete*/	\
-{																										\
-delete (CLASSNAME *) link;																	\
-}																										\
-\
-/* Become a deep copy of src_list*/ \
-void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST* src_list, \
-               CLASSNAME* (*copier)(const CLASSNAME*)) { \
-\
-  CLASSNAME##_IT from_it(const_cast<CLASSNAME##_LIST*>(src_list)); \
-  CLASSNAME##_IT to_it(this); \
-\
-  for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \
-    to_it.add_after_then_move((*copier)(from_it.data())); \
-}
+#define ELIST2IZE(CLASSNAME)                                                  \
+                                                                              \
+  /***********************************************************************    \
+  *             CLASSNAME##_zapper                                            \
+  *                                                                           \
+  *  A function which can delete a CLASSNAME element.  This is passed to the  \
+  *  generic clear list member function so that when a list is cleared the    \
+  *  elements on the list are properly destroyed from the base class, even    \
+  *  though we don't use a virtual destructor function.                       \
+  **********************************************************************/     \
+                                                                              \
+  DLLSYM void CLASSNAME##_zapper(                   /*delete a link*/         \
+                                 ELIST2_LINK *link) /*link to delete*/        \
+  {                                                                           \
+    delete (CLASSNAME *)link;                                                 \
+  }                                                                           \
+                                                                              \
+  /* Become a deep copy of src_list*/                                         \
+  void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list,          \
+                                   CLASSNAME *(*copier)(const CLASSNAME *)) { \
+    CLASSNAME##_IT from_it(const_cast<CLASSNAME##_LIST *>(src_list));         \
+    CLASSNAME##_IT to_it(this);                                               \
+                                                                              \
+    for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward())  \
+      to_it.add_after_then_move((*copier)(from_it.data()));                   \
+  }
 
 #endif
diff --git a/ccutil/errcode.h b/ccutil/errcode.h
index d690240036..2f31a7b9ae 100644
--- a/ccutil/errcode.h
+++ b/ccutil/errcode.h
@@ -87,11 +87,10 @@ const ERRCODE ASSERT_FAILED = "Assert failed";
                         __FILE__, __LINE__);                            \
   }
 
-#define ASSERT_HOST_MSG(x, ...) if (!(x))                               \
-  {                                                                     \
-    tprintf(__VA_ARGS__);                                               \
-    ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d",               \
-                        __FILE__, __LINE__);                            \
+#define ASSERT_HOST_MSG(x, ...)                                                \
+  if (!(x)) {                                                                  \
+    tprintf(__VA_ARGS__);                                                      \
+    ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", __FILE__, __LINE__); \
   }
 
 void signal_exit(int signal_code);
diff --git a/ccutil/genericheap.h b/ccutil/genericheap.h
index bb5f8ddc79..ccf273b33a 100644
--- a/ccutil/genericheap.h
+++ b/ccutil/genericheap.h
@@ -108,6 +108,8 @@ class GenericHeap {
   const Pair& PeekTop() const {
     return heap_[0];
   }
+  // Get the value of the worst (largest, defined by operator< ) element.
+  const Pair& PeekWorst() const { return heap_[IndexOfWorst()]; }
 
   // Removes the top element of the heap. If entry is not NULL, the element
   // is copied into *entry, otherwise it is discarded.
@@ -136,22 +138,12 @@ class GenericHeap {
   // not NULL, the element is copied into *entry, otherwise it is discarded.
   // Time = O(n). Returns false if the heap was already empty.
   bool PopWorst(Pair* entry) {
-    int heap_size = heap_.size();
-    if (heap_size == 0) return false;  // It cannot be empty!
-
-    // Find the maximum element. Its index is guaranteed to be greater than
-    // the index of the parent of the last element, since by the heap invariant
-    // the parent must be less than or equal to the children.
-    int worst_index = heap_size - 1;
-    int end_parent = ParentNode(worst_index);
-    for (int i = worst_index - 1; i > end_parent; --i) {
-      if (heap_[worst_index] < heap_[i])
-        worst_index = i;
-    }
+    int worst_index = IndexOfWorst();
+    if (worst_index < 0) return false;  // It cannot be empty!
     // Extract the worst element from the heap, leaving a hole at worst_index.
     if (entry != NULL)
       *entry = heap_[worst_index];
-    --heap_size;
+    int heap_size = heap_.size() - 1;
     if (heap_size > 0) {
       // Sift the hole upwards to match the last element of the heap_
       Pair hole_pair = heap_[heap_size];
@@ -162,6 +154,22 @@ class GenericHeap {
     return true;
   }
 
+  // Returns the index of the worst element. Time = O(n/2).
+  int IndexOfWorst() const {
+    int heap_size = heap_.size();
+    if (heap_size == 0) return -1;  // It cannot be empty!
+
+    // Find the maximum element. Its index is guaranteed to be greater than
+    // the index of the parent of the last element, since by the heap invariant
+    // the parent must be less than or equal to the children.
+    int worst_index = heap_size - 1;
+    int end_parent = ParentNode(worst_index);
+    for (int i = worst_index - 1; i > end_parent; --i) {
+      if (heap_[worst_index] < heap_[i]) worst_index = i;
+    }
+    return worst_index;
+  }
+
   // The pointed-to Pair has changed its key value, so the location of pair
   // is reshuffled to maintain the heap invariant.
   // Must be a valid pointer to an element of the heap_!
diff --git a/ccutil/genericvector.h b/ccutil/genericvector.h
index d867d8929b..3a70e21ce0 100644
--- a/ccutil/genericvector.h
+++ b/ccutil/genericvector.h
@@ -174,6 +174,8 @@ class GenericVector {
   // If swap is true, assumes a big/little-endian swap is needed.
   bool DeSerialize(bool swap, FILE* fp);
   bool DeSerialize(bool swap, tesseract::TFile* fp);
+  // Skips the deserialization of the vector.
+  static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
   // Writes a vector of classes to the given file. Assumes the existence of
   // bool T::Serialize(FILE* fp) const that returns false in case of error.
   // Returns false in case of error.
@@ -186,6 +188,8 @@ class GenericVector {
   // If swap is true, assumes a big/little-endian swap is needed.
   bool DeSerializeClasses(bool swap, FILE* fp);
   bool DeSerializeClasses(bool swap, tesseract::TFile* fp);
+  // Calls SkipDeSerialize on the elements of the vector.
+  static bool SkipDeSerializeClasses(bool swap, tesseract::TFile* fp);
 
   // Allocates a new array of double the current_size, copies over the
   // information from data to the new location, deletes data and returns
@@ -238,14 +242,13 @@ class GenericVector {
   int binary_search(const T& target) const {
     int bottom = 0;
     int top = size_used_;
-    do {
+    while (top - bottom > 1) {
       int middle = (bottom + top) / 2;
       if (data_[middle] > target)
         top = middle;
       else
         bottom = middle;
     }
-    while (top - bottom > 1);
     return bottom;
   }
 
@@ -361,7 +364,7 @@ inline bool LoadDataFromFile(const STRING& filename,
   size_t size = ftell(fp);
   fseek(fp, 0, SEEK_SET);
   // Pad with a 0, just in case we treat the result as a string.
-  data->init_to_size((int)size + 1, 0);
+  data->init_to_size(static_cast<int>(size) + 1, 0);
   bool result = fread(&(*data)[0], 1, size, fp) == size;
   fclose(fp);
   return result;
@@ -556,34 +559,54 @@ class PointerVector : public GenericVector<T*> {
   }
   bool DeSerialize(bool swap, TFile* fp) {
     inT32 reserved;
-    if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
-    if (swap) Reverse32(&reserved);
+    if (!DeSerializeSize(swap, fp, &reserved)) return false;
     GenericVector<T*>::reserve(reserved);
     truncate(0);
     for (int i = 0; i < reserved; ++i) {
-      inT8 non_null;
-      if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
-      T* item = NULL;
-      if (non_null) {
-        item = new T;
-        if (!item->DeSerialize(swap, fp)) {
-          delete item;
-          return false;
-        }
-        this->push_back(item);
-      } else {
-        // Null elements should keep their place in the vector.
-        this->push_back(NULL);
+      if (!DeSerializeElement(swap, fp)) return false;
+    }
+    return true;
+  }
+  // Enables deserialization of a selection of elements. Note that in order to
+  // retain the integrity of the stream, the caller must call some combination
+  // of DeSerializeElement and DeSerializeSkip of the exact number returned in
+  // *size, assuming a true return.
+  static bool DeSerializeSize(bool swap, TFile* fp, inT32* size) {
+    if (fp->FRead(size, sizeof(*size), 1) != 1) return false;
+    if (swap) Reverse32(size);
+    return true;
+  }
+  // Reads and appends to the vector the next element of the serialization.
+  bool DeSerializeElement(bool swap, TFile* fp) {
+    inT8 non_null;
+    if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
+    T* item = NULL;
+    if (non_null) {
+      item = new T;
+      if (!item->DeSerialize(swap, fp)) {
+        delete item;
+        return false;
       }
+      this->push_back(item);
+    } else {
+      // Null elements should keep their place in the vector.
+      this->push_back(NULL);
+    }
+    return true;
+  }
+  // Skips the next element of the serialization.
+  static bool DeSerializeSkip(bool swap, TFile* fp) {
+    inT8 non_null;
+    if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
+    if (non_null) {
+      if (!T::SkipDeSerialize(swap, fp)) return false;
     }
     return true;
   }
 
   // Sorts the items pointed to by the members of this vector using
   // t::operator<().
-  void sort() {
-    sort(&sort_ptr_cmp<T>);
-  }
+  void sort() { this->GenericVector<T*>::sort(&sort_ptr_cmp<T>); }
 };
 
 }  // namespace tesseract
@@ -926,6 +949,13 @@ bool GenericVector<T>::DeSerialize(bool swap, tesseract::TFile* fp) {
   }
   return true;
 }
+template <typename T>
+bool GenericVector<T>::SkipDeSerialize(bool swap, tesseract::TFile* fp) {
+  inT32 reserved;
+  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
+  if (swap) Reverse32(&reserved);
+  return fp->FRead(NULL, sizeof(T), reserved) == reserved;
+}
 
 // Writes a vector of classes to the given file. Assumes the existence of
 // bool T::Serialize(FILE* fp) const that returns false in case of error.
@@ -976,6 +1006,16 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, tesseract::TFile* fp) {
   }
   return true;
 }
+template <typename T>
+bool GenericVector<T>::SkipDeSerializeClasses(bool swap, tesseract::TFile* fp) {
+  uinT32 reserved;
+  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
+  if (swap) Reverse32(&reserved);
+  for (int i = 0; i < reserved; ++i) {
+    if (!T::SkipDeSerialize(swap, fp)) return false;
+  }
+  return true;
+}
 
 // This method clear the current object, then, does a shallow copy of
 // its argument, and finally invalidates its argument.
diff --git a/ccutil/globaloc.h b/ccutil/globaloc.h
index 41438194c8..60d6b73ae2 100644
--- a/ccutil/globaloc.h
+++ b/ccutil/globaloc.h
@@ -29,7 +29,7 @@ void SavePixForCrash(int resolution, Pix* pix);
 
 void signal_exit(int signal_code);
 
-void err_exit(); 
+void TESS_API err_exit(); 
 
 void set_global_loc_code(int loc_code); 
 
diff --git a/ccutil/hashfn.h b/ccutil/hashfn.h
index be211b0731..73e15be9a8 100644
--- a/ccutil/hashfn.h
+++ b/ccutil/hashfn.h
@@ -20,16 +20,15 @@
 #ifndef           HASHFN_H
 #define           HASHFN_H
 
-#ifdef USE_STD_NAMESPACE
 #if (__cplusplus >= 201103L) || defined(_MSC_VER)  // Visual Studio
 #include <unordered_map>
 #include <unordered_set>
-#define hash_map std::unordered_map
-#if (_MSC_VER >= 1500 && _MSC_VER < 1600)  // Visual Studio 2008
-using namespace std::tr1;
+#if defined(_MSC_VER) && (_MSC_VER >= 1500 && _MSC_VER < 1600)  // VS 2008
+#define TessHashMap std::tr1::unordered_map
+#define TessHashSet std::tr1::unordered_set
 #else  // _MSC_VER
-using std::unordered_map;
-using std::unordered_set;
+#define TessHashMap std::unordered_map
+#define TessHashSet std::unordered_set
 #include <memory>
 #define SmartPtr std::unique_ptr
 #define HAVE_UNIQUE_PTR
@@ -41,23 +40,14 @@ using std::unordered_set;
 #include <ext/hash_set>
 using __gnu_cxx::hash_map;
 using __gnu_cxx::hash_set;
-#define unordered_map hash_map
-#define unordered_set hash_set
+#define TessHashMap __gnu_cxx::hash_map
+#define TessHashSet __gnu_cxx::hash_set
 #else
 #include <hash_map>
 #include <hash_set>
+#define TessHashMap hash_map
+#define TessHashSet :hash_set
 #endif  // gcc
-#elif (__clang__)
-#include <unordered_map>
-#include <unordered_set>
-#define hash_map std::unordered_map
-#define unordered_set std::unordered_set
-#else  // USE_STD_NAMESPACE
-#include <hash_map>
-#include <hash_set>
-#define unordered_map hash_map
-#define unordered_set hash_set
-#endif  // USE_STD_NAMESPACE
 
 #ifndef HAVE_UNIQUE_PTR
 // Trivial smart ptr. Expand to add features of std::unique_ptr as required.
@@ -73,7 +63,7 @@ template<class T> class SmartPtr {
     return ptr_;
   }
   void reset(T* ptr) {
-    if (ptr_ != NULL) delete ptr_;
+    delete ptr_;
     ptr_ = ptr;
   }
   bool operator==(const T* ptr) const {
diff --git a/ccutil/helpers.h b/ccutil/helpers.h
index 51dd3b0b44..a2276bc451 100644
--- a/ccutil/helpers.h
+++ b/ccutil/helpers.h
@@ -73,7 +73,7 @@ class TRand {
 
 // Remove newline (if any) at the end of the string.
 inline void chomp_string(char *str) {
-  int last_index = (int)strlen(str) - 1;
+  int last_index = static_cast<int>(strlen(str)) - 1;
   while (last_index >= 0 &&
          (str[last_index] == '\n' || str[last_index] == '\r')) {
     str[last_index--] = '\0';
diff --git a/ccutil/lsterr.h b/ccutil/lsterr.h
index 42ed07e326..82497ae80d 100644
--- a/ccutil/lsterr.h
+++ b/ccutil/lsterr.h
@@ -38,6 +38,6 @@ const ERRCODE NULL_PREV = "Previous element on the list is NULL";
 const ERRCODE EMPTY_LIST = "List is empty";
 const ERRCODE BAD_PARAMETER = "List parameter error";
 const ERRCODE STILL_LINKED =
-"Attempting to add an element with non NULL links, to a list";
+    "Attempting to add an element with non NULL links, to a list";
 #endif
 #endif
diff --git a/ccutil/mainblk.cpp b/ccutil/mainblk.cpp
index aa73c55002..36becce9ec 100644
--- a/ccutil/mainblk.cpp
+++ b/ccutil/mainblk.cpp
@@ -74,7 +74,7 @@ void CCUtil::main_setup(const char *argv0, const char *basename) {
 #endif /* _WIN32 */
 #if defined(TESSDATA_PREFIX)
   } else {
-    /* Use tessdata prefix which was compiled in. */
+/* Use tessdata prefix which was compiled in. */
 #define _STR(a) #a
 #define _XSTR(a) _STR(a)
     datadir = _XSTR(TESSDATA_PREFIX);
diff --git a/ccutil/ocrclass.h b/ccutil/ocrclass.h
index 3175a6d29a..cb83c6d6b6 100644
--- a/ccutil/ocrclass.h
+++ b/ccutil/ocrclass.h
@@ -1,7 +1,7 @@
 /**********************************************************************
  * File:        ocrclass.h
  * Description: Class definitions and constants for the OCR API.
- * Author:					Hewlett-Packard Co
+ * Author:          Hewlett-Packard Co
  *
  * (C) Copyright 1996, Hewlett-Packard Co.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -29,7 +29,6 @@
 
 #ifndef __GNUC__
 #ifdef _WIN32
-#include          <windows.h>
 #include          "gettimeofday.h"
 #endif
 #else
@@ -110,28 +109,35 @@ typedef struct {                  /*single character */
  * user words found. If it returns true then operation is cancelled.
  **********************************************************************/
 typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
-typedef bool (*PROGRESS_FUNC)(int progress,
-                              int left, int right, int top, int bottom);
+typedef bool (*PROGRESS_FUNC)(int progress, int left, int right, int top,
+                              int bottom);
 
 class ETEXT_DESC {             // output header
  public:
-  inT16 count;                 /// chars in this buffer(0)
-  inT16 progress;              /// percent complete increasing (0-100)
+  inT16 count;     /// chars in this buffer(0)
+  inT16 progress;  /// percent complete increasing (0-100)
   /** Progress monitor covers word recognition and it does not cover layout
   * analysis.
   * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
-  inT8 more_to_come;           /// true if not last
-  volatile inT8 ocr_alive;     /// ocr sets to 1, HP 0
-  inT8 err_code;               /// for errcode use
-  CANCEL_FUNC cancel;          /// returns true to cancel
-  PROGRESS_FUNC progress_callback; /// called whenever progress increases
-  void* cancel_this;           /// this or other data for cancel
-  struct timeval end_time;     /** time to stop. expected to be set only by call
-                               * to set_deadline_msecs() */
-  EANYCODE_CHAR text[1];       /// character data
+  inT8 more_to_come;                /// true if not last
+  volatile inT8 ocr_alive;          /// ocr sets to 1, HP 0
+  inT8 err_code;                    /// for errcode use
+  CANCEL_FUNC cancel;               /// returns true to cancel
+  PROGRESS_FUNC progress_callback;  /// called whenever progress increases
+  void* cancel_this;                /// this or other data for cancel
+  struct timeval end_time;          /// Time to stop. Expected to be set only
+                                    /// by call to set_deadline_msecs().
+  EANYCODE_CHAR text[1];            /// character data
 
-  ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0),
-                   err_code(0), cancel(NULL), cancel_this(NULL) {
+  ETEXT_DESC()
+      : count(0),
+        progress(0),
+        more_to_come(0),
+        ocr_alive(0),
+        err_code(0),
+        cancel(NULL),
+        progress_callback(NULL),
+        cancel_this(NULL) {
     end_time.tv_sec = 0;
     end_time.tv_usec = 0;
   }
diff --git a/ccutil/params.cpp b/ccutil/params.cpp
index 9b604fbef4..30604f1129 100644
--- a/ccutil/params.cpp
+++ b/ccutil/params.cpp
@@ -31,8 +31,7 @@
 #define EQUAL         '='
 
 tesseract::ParamsVectors *GlobalParams() {
-  static tesseract::ParamsVectors global_params =
-    tesseract::ParamsVectors();
+  static tesseract::ParamsVectors global_params = tesseract::ParamsVectors();
   return &global_params;
 }
 
@@ -73,7 +72,7 @@ bool ParamUtils::ReadParamsFromFp(FILE *fp, inT64 end_offset,
 
   while ((end_offset < 0 || ftell(fp) < end_offset) &&
          fgets(line, MAX_PATH, fp)) {
-    if (line[0] != '\n' && line[0] != '#') {
+    if (line[0] != '\r' && line[0] != '\n' && line[0] != '#') {
       chomp_string(line);  // remove newline
       for (valptr = line; *valptr && *valptr != ' ' && *valptr != '\t';
         valptr++);
diff --git a/ccutil/params.h b/ccutil/params.h
index d49ce3ff8a..c168dbe4c6 100644
--- a/ccutil/params.h
+++ b/ccutil/params.h
@@ -55,7 +55,7 @@ class ParamUtils {
   // ORed or ANDed with any current values.
   // Blank lines and lines beginning # are ignored.
   // Values may have any whitespace after the name and are the rest of line.
-  static bool ReadParamsFile(
+  static bool TESS_API ReadParamsFile(
       const char *file,   // filename to read
       SetParamConstraint constraint,
       ParamsVectors *member_params);
@@ -252,7 +252,7 @@ class DoubleParam : public Param {
 //
 // TODO(daria): remove GlobalParams() when all global Tesseract
 // parameters are converted to members.
-tesseract::ParamsVectors *GlobalParams();
+tesseract::ParamsVectors TESS_API *GlobalParams();
 
 /*************************************************************************
  * Note on defining parameters.
diff --git a/ccutil/platform.h b/ccutil/platform.h
index 219f9e31e0..dd9be87ac6 100644
--- a/ccutil/platform.h
+++ b/ccutil/platform.h
@@ -24,6 +24,10 @@
 
 #define DLLSYM
 #ifdef _WIN32
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif /* NOMINMAX */
+#define WIN32_LEAN_AND_MEAN
 #ifdef __GNUC__
 #define ultoa _ultoa
 #endif  /* __GNUC__ */
diff --git a/ccutil/serialis.cpp b/ccutil/serialis.cpp
index ff3b278a7e..d1eed58465 100644
--- a/ccutil/serialis.cpp
+++ b/ccutil/serialis.cpp
@@ -95,7 +95,7 @@ int TFile::FRead(void* buffer, int size, int count) {
   char* char_buffer = reinterpret_cast<char*>(buffer);
   if (data_->size() - offset_ < required_size)
     required_size = data_->size() - offset_;
-  if (required_size > 0)
+  if (required_size > 0 && char_buffer != NULL)
     memcpy(char_buffer, &(*data_)[offset_], required_size);
   offset_ += required_size;
   return required_size / size;
diff --git a/ccutil/strngs.cpp b/ccutil/strngs.cpp
index ff3bbac287..5a9cfd0d48 100644
--- a/ccutil/strngs.cpp
+++ b/ccutil/strngs.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        strngs.c  (Formerly strings.c)
  * Description: STRING class functions.
- * Author:					Ray Smith
- * Created:					Fri Feb 15 09:13:30 GMT 1991
+ * Author:          Ray Smith
+ * Created:         Fri Feb 15 09:13:30 GMT 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -181,6 +181,14 @@ bool STRING::DeSerialize(bool swap, TFile* fp) {
   return true;
 }
 
+// As DeSerialize, but only seeks past the data - hence a static method.
+bool STRING::SkipDeSerialize(bool swap, tesseract::TFile* fp) {
+  inT32 len;
+  if (fp->FRead(&len, sizeof(len), 1) != 1) return false;
+  if (swap) ReverseN(&len, sizeof(len));
+  return fp->FRead(NULL, 1, len) == len;
+}
+
 BOOL8 STRING::contains(const char c) const {
   return (c != '\0') && (strchr (GetCStr(), c) != NULL);
 }
diff --git a/ccutil/strngs.h b/ccutil/strngs.h
index 9308cc67c8..2e65463efd 100644
--- a/ccutil/strngs.h
+++ b/ccutil/strngs.h
@@ -60,6 +60,8 @@ class TESS_API STRING
     // Reads from the given file. Returns false in case of error.
     // If swap is true, assumes a big/little-endian swap is needed.
     bool DeSerialize(bool swap, tesseract::TFile* fp);
+    // As DeSerialize, but only seeks past the data - hence a static method.
+    static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
 
     BOOL8 contains(const char c) const;
     inT32 length() const;
@@ -145,13 +147,11 @@ class TESS_API STRING
     }
 
     // returns the string data part of storage
-    inline char* GetCStr() {
-      return ((char *)data_) + sizeof(STRING_HEADER);
-    };
+    inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); }
 
     inline const char* GetCStr() const {
       return ((const char *)data_) + sizeof(STRING_HEADER);
-    };
+    }
     inline bool InvariantOk() const {
 #if STRING_IS_PROTECTED
       return (GetHeader()->used_ == 0) ?
diff --git a/ccutil/tessdatamanager.h b/ccutil/tessdatamanager.h
index fd2685a1d8..e583b70049 100644
--- a/ccutil/tessdatamanager.h
+++ b/ccutil/tessdatamanager.h
@@ -76,24 +76,24 @@ enum TessdataType {
  * kTessdataFileSuffixes[i] indicates the file suffix for
  * tessdata of type i (from TessdataType enum).
  */
-static const char * const kTessdataFileSuffixes[] = {
-  kLangConfigFileSuffix,        // 0
-  kUnicharsetFileSuffix,        // 1
-  kAmbigsFileSuffix,            // 2
-  kBuiltInTemplatesFileSuffix,  // 3
-  kBuiltInCutoffsFileSuffix,    // 4
-  kNormProtoFileSuffix,         // 5
-  kPuncDawgFileSuffix,          // 6
-  kSystemDawgFileSuffix,        // 7
-  kNumberDawgFileSuffix,        // 8
-  kFreqDawgFileSuffix,          // 9
-  kFixedLengthDawgsFileSuffix,  // 10  // deprecated
-  kCubeUnicharsetFileSuffix,    // 11
-  kCubeSystemDawgFileSuffix,    // 12
-  kShapeTableFileSuffix,        // 13
-  kBigramDawgFileSuffix,        // 14
-  kUnambigDawgFileSuffix,       // 15
-  kParamsModelFileSuffix,       // 16
+static const char *const kTessdataFileSuffixes[] = {
+    kLangConfigFileSuffix,        // 0
+    kUnicharsetFileSuffix,        // 1
+    kAmbigsFileSuffix,            // 2
+    kBuiltInTemplatesFileSuffix,  // 3
+    kBuiltInCutoffsFileSuffix,    // 4
+    kNormProtoFileSuffix,         // 5
+    kPuncDawgFileSuffix,          // 6
+    kSystemDawgFileSuffix,        // 7
+    kNumberDawgFileSuffix,        // 8
+    kFreqDawgFileSuffix,          // 9
+    kFixedLengthDawgsFileSuffix,  // 10  // deprecated
+    kCubeUnicharsetFileSuffix,    // 11
+    kCubeSystemDawgFileSuffix,    // 12
+    kShapeTableFileSuffix,        // 13
+    kBigramDawgFileSuffix,        // 14
+    kUnambigDawgFileSuffix,       // 15
+    kParamsModelFileSuffix,       // 16
 };
 
 /**
@@ -101,23 +101,23 @@ static const char * const kTessdataFileSuffixes[] = {
  * of type i (from TessdataType enum) is text, and is binary otherwise.
  */
 static const bool kTessdataFileIsText[] = {
-  true,                         // 0
-  true,                         // 1
-  true,                         // 2
-  false,                        // 3
-  true,                         // 4
-  true,                         // 5
-  false,                        // 6
-  false,                        // 7
-  false,                        // 8
-  false,                        // 9
-  false,                        // 10  // deprecated
-  true,                         // 11
-  false,                        // 12
-  false,                        // 13
-  false,                        // 14
-  false,                        // 15
-  true,                         // 16
+    true,   // 0
+    true,   // 1
+    true,   // 2
+    false,  // 3
+    true,   // 4
+    true,   // 5
+    false,  // 6
+    false,  // 7
+    false,  // 8
+    false,  // 9
+    false,  // 10  // deprecated
+    true,   // 11
+    false,  // 12
+    false,  // 13
+    false,  // 14
+    false,  // 15
+    true,   // 16
 };
 
 /**
diff --git a/ccutil/unicharset.h b/ccutil/unicharset.h
index 684655affb..eb9f463068 100644
--- a/ccutil/unicharset.h
+++ b/ccutil/unicharset.h
@@ -141,7 +141,7 @@ class UNICHARSET {
   // Custom list of characters and their ligature forms (UTF8)
   // These map to unicode values in the private use area (PUC) and are supported
   // by only few font families (eg. Wyld, Adobe Caslon Pro).
-  static const char* kCustomLigatures[][2];
+  static TESS_API const char* kCustomLigatures[][2];
 
   // List of strings for the SpecialUnicharCodes. Keep in sync with the enum.
   static const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT];
@@ -177,12 +177,11 @@ class UNICHARSET {
 
   // Return the UNICHAR_ID of a given unichar representation within the
   // UNICHARSET.
-  UNICHAR_ID unichar_to_id(const char* const unichar_repr) const;
+  UNICHAR_ID TESS_API unichar_to_id(const char* const unichar_repr) const;
 
   // Return the UNICHAR_ID of a given unichar representation within the
   // UNICHARSET. Only the first length characters from unichar_repr are used.
-  UNICHAR_ID unichar_to_id(const char* const unichar_repr,
-                                 int length) const;
+  UNICHAR_ID unichar_to_id(const char* const unichar_repr, int length) const;
 
   // Return the minimum number of bytes that matches a legal UNICHAR_ID,
   // while leaving the rest of the string encodable. Returns 0 if the
@@ -235,7 +234,7 @@ class UNICHARSET {
   }
 
   // Add a unichar representation to the set.
-  void unichar_insert(const char* const unichar_repr);
+  void TESS_API unichar_insert(const char* const unichar_repr);
 
   // Return true if the given unichar id exists within the set.
   // Relies on the fact that unichar ids are contiguous in the unicharset.
@@ -245,7 +244,7 @@ class UNICHARSET {
   }
 
   // Return true if the given unichar representation exists within the set.
-  bool contains_unichar(const char* const unichar_repr) const;
+  bool TESS_API contains_unichar(const char* const unichar_repr) const;
   bool contains_unichar(const char* const unichar_repr, int length) const;
 
   // Return true if the given unichar representation corresponds to the given
@@ -328,7 +327,7 @@ class UNICHARSET {
 
   // Saves the content of the UNICHARSET to the given STRING.
   // Returns true if the operation is successful.
-  bool save_to_string(STRING *str) const;
+  bool TESS_API save_to_string(STRING *str) const;
 
   // Load a unicharset from a unicharset file that has been loaded into
   // the given memory buffer.
diff --git a/classify/adaptive.cpp b/classify/adaptive.cpp
index 019befb4f6..a713fedc27 100644
--- a/classify/adaptive.cpp
+++ b/classify/adaptive.cpp
@@ -311,8 +311,8 @@ void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
  * @note History: Tue Mar 19 14:11:01 1991, DSJ, Created.
  */
 ADAPT_CLASS ReadAdaptedClass(FILE *File) {
-  int NumTempProtos;
-  int NumConfigs;
+  int32_t NumTempProtos;
+  int32_t NumConfigs;
   int i;
   ADAPT_CLASS Class;
   TEMP_PROTO TempProto;
@@ -330,7 +330,7 @@ ADAPT_CLASS ReadAdaptedClass(FILE *File) {
     WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
 
   /* then read in the list of temporary protos */
-  fread ((char *) &NumTempProtos, sizeof (int), 1, File);
+  fread (&NumTempProtos, sizeof(NumTempProtos), 1, File);
   Class->TempProtos = NIL_LIST;
   for (i = 0; i < NumTempProtos; i++) {
     TempProto =
@@ -341,7 +341,7 @@ ADAPT_CLASS ReadAdaptedClass(FILE *File) {
   }
 
   /* then read in the adapted configs */
-  fread ((char *) &NumConfigs, sizeof (int), 1, File);
+  fread (&NumConfigs, sizeof(NumConfigs), 1, File);
   for (i = 0; i < NumConfigs; i++)
     if (test_bit (Class->PermConfigs, i))
       Class->Config[i].Perm = ReadPermConfig (File);
diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp
index 6e995c2159..e4713a04f5 100644
--- a/classify/adaptmatch.cpp
+++ b/classify/adaptmatch.cpp
@@ -819,7 +819,7 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob,
   Features = ExtractPicoFeatures(Blob);
 
   NumFeatures = Features->NumFeatures;
-  if (NumFeatures > UNLIKELY_NUM_FEAT) {
+  if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) {
     FreeFeatureSet(Features);
     return 0;
   }
@@ -908,8 +908,7 @@ void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId,
 
     NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
     if (NumFeatures <= 0) {
-      FreeFeatureSet(FloatFeatures);
-      return;
+      return;  // Features already freed by GetAdaptiveFeatures.
     }
 
     // Only match configs with the matching font.
@@ -1008,8 +1007,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
 #endif
 }
 
-
-
 /**
  * This routine adds the result of a classification into
  * Results.  If the new rating is much worse than the current
diff --git a/classify/classify.cpp b/classify/classify.cpp
index 436efd1f2d..7c11c51f6e 100644
--- a/classify/classify.cpp
+++ b/classify/classify.cpp
@@ -151,8 +151,8 @@ Classify::Classify()
       INT_MEMBER(classify_integer_matcher_multiplier, 10,
                  "Integer Matcher Multiplier  0-255:   ", this->params()),
       EnableLearning(true),
-      INT_MEMBER(il1_adaption_test, 0, "Don't adapt to i/I at beginning of word",
-                 this->params()),
+      INT_MEMBER(il1_adaption_test, 0,
+                 "Don't adapt to i/I at beginning of word", this->params()),
       BOOL_MEMBER(classify_bln_numeric_mode, 0,
                   "Assume the input is numbers [0-9].", this->params()),
       double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
diff --git a/classify/cluster.cpp b/classify/cluster.cpp
index b723bfa82e..1f82349552 100644
--- a/classify/cluster.cpp
+++ b/classify/cluster.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:	cluster.c
- **	Purpose:	Routines for clustering points in N-D space
- **	Author:		Dan Johnson
- **	History:	5/29/89, DSJ, Created.
+ ** Filename: cluster.c
+ ** Purpose:  Routines for clustering points in N-D space
+ ** Author:   Dan Johnson
+ ** History:  5/29/89, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -390,11 +390,11 @@ double InvertMatrix(const float* input, int size, float* inv);
  * This routine creates a new clusterer data structure,
  * initializes it, and returns a pointer to it.
  *
- * @param SampleSize	number of dimensions in feature space
- * @param ParamDesc	description of each dimension
- * @return	pointer to the new clusterer data structure
- * @note Exceptions:	None
- * @note History:	5/29/89, DSJ, Created.
+ * @param SampleSize  number of dimensions in feature space
+ * @param ParamDesc description of each dimension
+ * @return  pointer to the new clusterer data structure
+ * @note Exceptions:  None
+ * @note History: 5/29/89, DSJ, Created.
  */
 CLUSTERER *
 MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) {
@@ -437,7 +437,6 @@ MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) {
   return Clusterer;
 }                                // MakeClusterer
 
-
 /**
  * This routine creates a new sample data structure to hold
  * the specified feature.  This sample is added to the clusterer
@@ -445,14 +444,14 @@ MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) {
  * clustered later), and a pointer to the sample is returned to
  * the caller.
  *
- * @param Clusterer	clusterer data structure to add sample to
- * @param Feature	feature to be added to clusterer
- * @param CharID	unique ident. of char that sample came from
+ * @param Clusterer clusterer data structure to add sample to
+ * @param Feature feature to be added to clusterer
+ * @param CharID  unique ident. of char that sample came from
  *
- * @return 		Pointer to the new sample data structure
- * @note Exceptions:	ALREADYCLUSTERED	MakeSample can't be called after
+ * @return    Pointer to the new sample data structure
+ * @note Exceptions:  ALREADYCLUSTERED  MakeSample can't be called after
  *    ClusterSamples has been called
- * @note History:	5/29/89, DSJ, Created.
+ * @note History: 5/29/89, DSJ, Created.
  */
 SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature,
                    inT32 CharID) {
@@ -490,7 +489,6 @@ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature,
   return (Sample);
 }                                // MakeSample
 
-
 /**
  * This routine first checks to see if the samples in this
  * clusterer have already been clustered before; if so, it does
@@ -505,12 +503,12 @@ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature,
  * list of prototypes that best represent the samples given
  * the constraints specified in Config.
  *
- * @param Clusterer	data struct containing samples to be clustered
- * @param Config	parameters which control clustering process
+ * @param Clusterer data struct containing samples to be clustered
+ * @param Config  parameters which control clustering process
  *
  * @return Pointer to a list of prototypes
- * @note Exceptions:	None
- * @note History:	5/29/89, DSJ, Created.
+ * @note Exceptions:  None
+ * @note History: 5/29/89, DSJ, Created.
  */
 LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
   //only create cluster tree if samples have never been clustered before
@@ -523,10 +521,16 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
 
   //compute prototypes starting at the root node in the tree
   ComputePrototypes(Clusterer, Config);
-  return (Clusterer->ProtoList);
+  // We don't need the cluster pointers in the protos any more, so null them
+  // out, which makes it safe to delete the clusterer.
+  LIST proto_list = Clusterer->ProtoList;
+  iterate(proto_list) {
+    PROTOTYPE *proto = reinterpret_cast<PROTOTYPE *>(first_node(proto_list));
+    proto->Cluster = NULL;
+  }
+  return Clusterer->ProtoList;
 }                                // ClusterSamples
 
-
 /**
  * This routine frees all of the memory allocated to the
  * specified data structure.  It will not, however, free
@@ -535,10 +539,10 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
  * to NULL to indicate that the cluster data structures no
  * longer exist.  Any sample lists that have been obtained
  * via calls to GetSamples are no longer valid.
- * @param Clusterer	pointer to data structure to be freed
+ * @param Clusterer pointer to data structure to be freed
  * @return None
- * @note Exceptions:	None
- * @note History:	6/6/89, DSJ, Created.
+ * @note Exceptions:  None
+ * @note History: 6/6/89, DSJ, Created.
  */
 void FreeClusterer(CLUSTERER *Clusterer) {
   if (Clusterer != NULL) {
@@ -558,21 +562,19 @@ void FreeClusterer(CLUSTERER *Clusterer) {
   }
 }                                // FreeClusterer
 
-
 /**
  * This routine frees all of the memory allocated to the
  * specified list of prototypes.  The clusters which are
  * pointed to by the prototypes are not freed.
- * @param ProtoList	pointer to list of prototypes to be freed
+ * @param ProtoList pointer to list of prototypes to be freed
  * @return None
- * @note Exceptions:	None
- * @note History:	6/6/89, DSJ, Created.
+ * @note Exceptions:  None
+ * @note History: 6/6/89, DSJ, Created.
  */
 void FreeProtoList(LIST *ProtoList) {
   destroy_nodes(*ProtoList, FreePrototype);
 }                                // FreeProtoList
 
-
 /**
  * This routine deallocates the memory consumed by the specified
  * prototype and modifies the corresponding cluster so that it
@@ -606,7 +608,6 @@ void FreePrototype(void *arg) {  //PROTOTYPE     *Prototype)
   memfree(Prototype);
 }                                // FreePrototype
 
-
 /**
  * This routine is used to find all of the samples which
  * belong to a cluster.  It starts by removing the top
@@ -617,10 +618,10 @@ void FreePrototype(void *arg) {  //PROTOTYPE     *Prototype)
  * If all samples have been found, NULL is returned.
  * InitSampleSearch() must be called
  * before NextSample() to initialize the search.
- * @param SearchState	ptr to list containing clusters to be searched
- * @return	Pointer to the next leaf cluster (sample) or NULL.
- * @note Exceptions:	None
- * @note History:	6/16/89, DSJ, Created.
+ * @param SearchState ptr to list containing clusters to be searched
+ * @return  Pointer to the next leaf cluster (sample) or NULL.
+ * @note Exceptions:  None
+ * @note History: 6/16/89, DSJ, Created.
  */
 CLUSTER *NextSample(LIST *SearchState) {
   CLUSTER *Cluster;
@@ -637,29 +638,27 @@ CLUSTER *NextSample(LIST *SearchState) {
   }
 }                                // NextSample
 
-
 /**
  * This routine returns the mean of the specified
  * prototype in the indicated dimension.
- * @param Proto	prototype to return mean of
- * @param Dimension	dimension whose mean is to be returned
- * @return	Mean of Prototype in Dimension
+ * @param Proto prototype to return mean of
+ * @param Dimension dimension whose mean is to be returned
+ * @return  Mean of Prototype in Dimension
  * @note Exceptions: none
- * @note History:	7/6/89, DSJ, Created.
+ * @note History: 7/6/89, DSJ, Created.
  */
 FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension) {
   return (Proto->Mean[Dimension]);
 }                                // Mean
 
-
 /**
  * This routine returns the standard deviation of the
  * prototype in the indicated dimension.
- * @param Proto		prototype to return standard deviation of
- * @param Dimension	dimension whose stddev is to be returned
- * @return	Standard deviation of Prototype in Dimension
+ * @param Proto   prototype to return standard deviation of
+ * @param Dimension dimension whose stddev is to be returned
+ * @return  Standard deviation of Prototype in Dimension
  * @note Exceptions: none
- * @note History:	7/6/89, DSJ, Created.
+ * @note History: 7/6/89, DSJ, Created.
  */
 FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) {
   switch (Proto->Style) {
@@ -697,10 +696,10 @@ FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) {
  * tree are the individual samples themselves; they have no
  * sub-clusters.  The root node of the tree conceptually contains
  * all of the samples.
- * @param Clusterer	data structure holdings samples to be clustered
- * @return	None (the Clusterer data structure is changed)
- * @note Exceptions:	None
- * @note History:	5/29/89, DSJ, Created.
+ * @param Clusterer data structure holdings samples to be clustered
+ * @return  None (the Clusterer data structure is changed)
+ * @note Exceptions:  None
+ * @note History: 5/29/89, DSJ, Created.
  */
 void CreateClusterTree(CLUSTERER *Clusterer) {
   ClusteringContext context;
@@ -760,7 +759,6 @@ void CreateClusterTree(CLUSTERER *Clusterer) {
   memfree(context.candidates);
 }                                // CreateClusterTree
 
-
 /**
  * This routine is designed to be used in concert with the
  * KDWalk routine.  It will create a potential cluster for
@@ -786,7 +784,6 @@ void MakePotentialClusters(ClusteringContext *context,
   }
 }                                // MakePotentialClusters
 
-
 /**
  * This routine searches the specified kd-tree for the nearest
  * neighbor of the specified cluster.  It actually uses the
@@ -795,12 +792,12 @@ void MakePotentialClusters(ClusteringContext *context,
  * neighbor is returned, if it can be found, otherwise NULL is
  * returned.  The distance between the 2 nodes is placed
  * in the specified variable.
- * @param Tree		kd-tree to search in for nearest neighbor
- * @param Cluster	cluster whose nearest neighbor is to be found
- * @param Distance	ptr to variable to report distance found
- * @return	Pointer to the nearest neighbor of Cluster, or NULL
+ * @param Tree    kd-tree to search in for nearest neighbor
+ * @param Cluster cluster whose nearest neighbor is to be found
+ * @param Distance  ptr to variable to report distance found
+ * @return  Pointer to the nearest neighbor of Cluster, or NULL
  * @note Exceptions: none
- * @note History:	5/29/89, DSJ, Created.
+ * @note History: 5/29/89, DSJ, Created.
  *  7/13/89, DSJ, Removed visibility of kd-tree node data struct
  */
 CLUSTER *
@@ -830,17 +827,16 @@ FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance)
   return BestNeighbor;
 }                                // FindNearestNeighbor
 
-
 /**
  * This routine creates a new permanent cluster from the
  * clusters specified in TempCluster.  The 2 clusters in
  * TempCluster are marked as "clustered" and deleted from
  * the kd-tree.  The new cluster is then added to the kd-tree.
- * @param Clusterer	current clustering environment
- * @param TempCluster	potential cluster to make permanent
+ * @param Clusterer current clustering environment
+ * @param TempCluster potential cluster to make permanent
  * @return Pointer to the new permanent cluster
- * @note Exceptions:	none
- * @note History:	5/29/89, DSJ, Created.
+ * @note Exceptions:  none
+ * @note History: 5/29/89, DSJ, Created.
  *    7/13/89, DSJ, Removed visibility of kd-tree node data struct
  */
 CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) {
@@ -872,21 +868,20 @@ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) {
   return Cluster;
 }                                // MakeNewCluster
 
-
 /**
  * This routine merges two clusters into one larger cluster.
  * To do this it computes the number of samples in the new
  * cluster and the mean of the new cluster.  The ParamDesc
  * information is used to ensure that circular dimensions
  * are handled correctly.
- * @param N	# of dimensions (size of arrays)
- * @param ParamDesc	array of dimension descriptions
- * @param n1, n2	number of samples in each old cluster
- * @param m	array to hold mean of new cluster
- * @param m1, m2	arrays containing means of old clusters
- * @return	The number of samples in the new cluster.
- * @note Exceptions:	None
- * @note History:	5/31/89, DSJ, Created.
+ * @param N # of dimensions (size of arrays)
+ * @param ParamDesc array of dimension descriptions
+ * @param n1, n2  number of samples in each old cluster
+ * @param m array to hold mean of new cluster
+ * @param m1, m2  arrays containing means of old clusters
+ * @return  The number of samples in the new cluster.
+ * @note Exceptions:  None
+ * @note History: 5/31/89, DSJ, Created.
  */
 inT32 MergeClusters(inT16 N,
                     PARAM_DESC ParamDesc[],
@@ -921,17 +916,16 @@ inT32 MergeClusters(inT16 N,
   return n;
 }                                // MergeClusters
 
-
 /**
  * This routine decides which clusters in the cluster tree
  * should be represented by prototypes, forms a list of these
  * prototypes, and places the list in the Clusterer data
  * structure.
- * @param Clusterer	data structure holding cluster tree
- * @param Config		parameters used to control prototype generation
- * @return	None
- * @note Exceptions:	None
- * @note History:	5/30/89, DSJ, Created.
+ * @param Clusterer data structure holding cluster tree
+ * @param Config    parameters used to control prototype generation
+ * @return  None
+ * @note Exceptions:  None
+ * @note History: 5/30/89, DSJ, Created.
  */
 void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
   LIST ClusterStack = NIL_LIST;
@@ -961,8 +955,7 @@ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
   }
 }                                // ComputePrototypes
 
-
-/** 
+/**
  * This routine attempts to create a prototype from the
  * specified cluster that conforms to the distribution
  * specified in Config.  If there are too few samples in the
@@ -972,12 +965,12 @@ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
  * is generated and NULL is returned.  If a prototype can be
  * found that matches the desired distribution then a pointer
  * to it is returned, otherwise NULL is returned.
- * @param Clusterer	data structure holding cluster tree
- * @param Config	parameters used to control prototype generation
- * @param Cluster	cluster to be made into a prototype
- * @return	Pointer to new prototype or NULL
- * @note Exceptions:	None
- * @note History:	6/19/89, DSJ, Created.
+ * @param Clusterer data structure holding cluster tree
+ * @param Config  parameters used to control prototype generation
+ * @param Cluster cluster to be made into a prototype
+ * @return  Pointer to new prototype or NULL
+ * @note Exceptions:  None
+ * @note History: 6/19/89, DSJ, Created.
  */
 PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,
                          CLUSTERCONFIG *Config,
@@ -1050,7 +1043,6 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,
   return Proto;
 }                                // MakePrototype
 
-
 /**
  * This routine checks for clusters which are degenerate and
  * therefore cannot be analyzed in a statistically valid way.
@@ -1063,14 +1055,14 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,
  *
  * If the cluster is not degenerate, NULL is returned.
  *
- * @param N		number of dimensions
- * @param Cluster		cluster being analyzed
- * @param Statistics	statistical info about cluster
- * @param Style		type of prototype to be generated
- * @param MinSamples	minimum number of samples in a cluster
- * @return	Pointer to degenerate prototype or NULL.
- * @note Exceptions:	None
- * @note History:	6/20/89, DSJ, Created.
+ * @param N   number of dimensions
+ * @param Cluster   cluster being analyzed
+ * @param Statistics  statistical info about cluster
+ * @param Style   type of prototype to be generated
+ * @param MinSamples  minimum number of samples in a cluster
+ * @return  Pointer to degenerate prototype or NULL.
+ * @note Exceptions:  None
+ * @note History: 6/20/89, DSJ, Created.
  *    7/12/89, DSJ, Changed name and added check for 0 stddev.
  *    8/8/89, DSJ, Removed check for 0 stddev (handled elsewhere).
  */
@@ -1110,10 +1102,10 @@ PROTOTYPE *MakeDegenerateProto(  //this was MinSample
  * be split. If not, then a new prototype is formed and
  * returned to the caller. If there is, then NULL is returned
  * to the caller.
- * @param Clusterer	data struct containing samples being clustered
+ * @param Clusterer data struct containing samples being clustered
  * @param Config provides the magic number of samples that make a good cluster
- * @param Cluster		cluster to be made into an elliptical prototype
- * @param Statistics	statistical info about cluster
+ * @param Cluster   cluster to be made into an elliptical prototype
+ * @param Statistics  statistical info about cluster
  * @return Pointer to new elliptical prototype or NULL.
  */
 PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
@@ -1215,13 +1207,13 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
  * be approximated by a spherical normal distribution.  If it
  * can be, then a new prototype is formed and returned to the
  * caller.  If it can't be, then NULL is returned to the caller.
- * @param Clusterer	data struct containing samples being clustered
- * @param Cluster		cluster to be made into a spherical prototype
- * @param Statistics	statistical info about cluster
- * @param Buckets		histogram struct used to analyze distribution
- * @return	Pointer to new spherical prototype or NULL.
- * @note Exceptions:	None
- * @note History:	6/1/89, DSJ, Created.
+ * @param Clusterer data struct containing samples being clustered
+ * @param Cluster   cluster to be made into a spherical prototype
+ * @param Statistics  statistical info about cluster
+ * @param Buckets   histogram struct used to analyze distribution
+ * @return  Pointer to new spherical prototype or NULL.
+ * @note Exceptions:  None
+ * @note History: 6/1/89, DSJ, Created.
  */
 PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,
                               CLUSTER *Cluster,
@@ -1247,19 +1239,18 @@ PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,
   return (Proto);
 }                                // MakeSphericalProto
 
-
 /**
  * This routine tests the specified cluster to see if it can
  * be approximated by an elliptical normal distribution.  If it
  * can be, then a new prototype is formed and returned to the
  * caller.  If it can't be, then NULL is returned to the caller.
- * @param Clusterer	data struct containing samples being clustered
- * @param Cluster		cluster to be made into an elliptical prototype
- * @param Statistics	statistical info about cluster
- * @param Buckets		histogram struct used to analyze distribution
- * @return	Pointer to new elliptical prototype or NULL.
- * @note Exceptions:	None
- * @note History:	6/12/89, DSJ, Created.
+ * @param Clusterer data struct containing samples being clustered
+ * @param Cluster   cluster to be made into an elliptical prototype
+ * @param Statistics  statistical info about cluster
+ * @param Buckets   histogram struct used to analyze distribution
+ * @return  Pointer to new elliptical prototype or NULL.
+ * @note Exceptions:  None
+ * @note History: 6/12/89, DSJ, Created.
  */
 PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,
                                CLUSTER *Cluster,
@@ -1286,7 +1277,6 @@ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,
   return (Proto);
 }                                // MakeEllipticalProto
 
-
 /**
  * This routine tests each dimension of the specified cluster to
  * see what distribution would best approximate that dimension.
@@ -1295,14 +1285,14 @@ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,
  * be represented by one of these distributions,
  * then a new prototype is formed and returned to the
  * caller.  If it can't be, then NULL is returned to the caller.
- * @param Clusterer	data struct containing samples being clustered
- * @param Cluster		cluster to be made into a prototype
- * @param Statistics	statistical info about cluster
- * @param NormalBuckets	histogram struct used to analyze distribution
- * @param Confidence	confidence level for alternate distributions
- * @return	Pointer to new mixed prototype or NULL.
- * @note Exceptions:	None
- * @note History:	6/12/89, DSJ, Created.
+ * @param Clusterer data struct containing samples being clustered
+ * @param Cluster   cluster to be made into a prototype
+ * @param Statistics  statistical info about cluster
+ * @param NormalBuckets histogram struct used to analyze distribution
+ * @param Confidence  confidence level for alternate distributions
+ * @return  Pointer to new mixed prototype or NULL.
+ * @note Exceptions:  None
+ * @note History: 6/12/89, DSJ, Created.
  */
 PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,
                           CLUSTER *Cluster,
@@ -1355,16 +1345,15 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,
   return (Proto);
 }                                // MakeMixedProto
 
-
 /**
  * This routine alters the ith dimension of the specified
  * mixed prototype to be D_random.
- * @param i	index of dimension to be changed
- * @param Proto	prototype whose dimension is to be altered
- * @param ParamDesc	description of specified dimension
- * @return	None
- * @note Exceptions:	None
- * @note History:	6/20/89, DSJ, Created.
+ * @param i index of dimension to be changed
+ * @param Proto prototype whose dimension is to be altered
+ * @param ParamDesc description of specified dimension
+ * @return  None
+ * @note Exceptions:  None
+ * @note History: 6/20/89, DSJ, Created.
  */
 void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) {
   Proto->Distrib[i] = D_random;
@@ -1380,16 +1369,15 @@ void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) {
   // note that the proto Weight is irrelevant for D_random protos
 }                                // MakeDimRandom
 
-
 /**
  * This routine alters the ith dimension of the specified
  * mixed prototype to be uniform.
- * @param i	index of dimension to be changed
- * @param Proto		prototype whose dimension is to be altered
- * @param Statistics	statistical info about prototype
- * @return	None
- * @note Exceptions:	None
- * @note History:	6/20/89, DSJ, Created.
+ * @param i index of dimension to be changed
+ * @param Proto   prototype whose dimension is to be altered
+ * @param Statistics  statistical info about prototype
+ * @return  None
+ * @note Exceptions:  None
+ * @note History: 6/20/89, DSJ, Created.
  */
 void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) {
   Proto->Distrib[i] = uniform;
@@ -1410,7 +1398,6 @@ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) {
   // note that the proto Weight is irrelevant for uniform protos
 }                                // MakeDimUniform
 
-
 /**
  * This routine searches the cluster tree for all leaf nodes
  * which are samples in the specified cluster.  It computes
@@ -1420,12 +1407,12 @@ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) {
  * return this information to the caller.  An incremental
  * algorithm for computing statistics is not used because
  * it will not work with circular dimensions.
- * @param N	number of dimensions
- * @param ParamDesc	array of dimension descriptions
- * @param Cluster	cluster whose stats are to be computed
- * @return	Pointer to new data structure containing statistics
- * @note Exceptions:	None
- * @note History:	6/2/89, DSJ, Created.
+ * @param N number of dimensions
+ * @param ParamDesc array of dimension descriptions
+ * @param Cluster cluster whose stats are to be computed
+ * @return  Pointer to new data structure containing statistics
+ * @note Exceptions:  None
+ * @note History: 6/2/89, DSJ, Created.
  */
 STATISTICS *
 ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) {
@@ -1502,19 +1489,18 @@ ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) {
   return (Statistics);
 }                                // ComputeStatistics
 
-
 /**
  * This routine creates a spherical prototype data structure to
  * approximate the samples in the specified cluster.
  * Spherical prototypes have a single variance which is
  * common across all dimensions.  All dimensions are normally
  * distributed and independent.
- * @param N	number of dimensions
- * @param Cluster	cluster to be made into a spherical prototype
- * @param Statistics	statistical info about samples in cluster
- * @return	Pointer to a new spherical prototype data structure
- * @note Exceptions:	None
- * @note History:	6/19/89, DSJ, Created.
+ * @param N number of dimensions
+ * @param Cluster cluster to be made into a spherical prototype
+ * @param Statistics  statistical info about samples in cluster
+ * @return  Pointer to a new spherical prototype data structure
+ * @note Exceptions:  None
+ * @note History: 6/19/89, DSJ, Created.
  */
 PROTOTYPE *NewSphericalProto(uinT16 N,
                              CLUSTER *Cluster,
@@ -1537,18 +1523,17 @@ PROTOTYPE *NewSphericalProto(uinT16 N,
   return (Proto);
 }                                // NewSphericalProto
 
-
 /**
  * This routine creates an elliptical prototype data structure to
  * approximate the samples in the specified cluster.
  * Elliptical prototypes have a variance for each dimension.
  * All dimensions are normally distributed and independent.
- * @param N	number of dimensions
- * @param Cluster	cluster to be made into an elliptical prototype
- * @param Statistics	statistical info about samples in cluster
- * @return	Pointer to a new elliptical prototype data structure
- * @note Exceptions:	None
- * @note History:	6/19/89, DSJ, Created.
+ * @param N number of dimensions
+ * @param Cluster cluster to be made into an elliptical prototype
+ * @param Statistics  statistical info about samples in cluster
+ * @return  Pointer to a new elliptical prototype data structure
+ * @note Exceptions:  None
+ * @note History: 6/19/89, DSJ, Created.
  */
 PROTOTYPE *NewEllipticalProto(inT16 N,
                               CLUSTER *Cluster,
@@ -1579,7 +1564,6 @@ PROTOTYPE *NewEllipticalProto(inT16 N,
   return (Proto);
 }                                // NewEllipticalProto
 
-
 /**
  * This routine creates a mixed prototype data structure to
  * approximate the samples in the specified cluster.
@@ -1588,12 +1572,12 @@ PROTOTYPE *NewEllipticalProto(inT16 N,
  * structure is initially filled in as though it were an
  * elliptical prototype.  The actual distributions of the
  * dimensions can be altered by other routines.
- * @param N	number of dimensions
- * @param Cluster	cluster to be made into a mixed prototype
- * @param Statistics	statistical info about samples in cluster
- * @return	Pointer to a new mixed prototype data structure
- * @note Exceptions:	None
- * @note History:	6/19/89, DSJ, Created.
+ * @param N number of dimensions
+ * @param Cluster cluster to be made into a mixed prototype
+ * @param Statistics  statistical info about samples in cluster
+ * @return  Pointer to a new mixed prototype data structure
+ * @note Exceptions:  None
+ * @note History: 6/19/89, DSJ, Created.
  */
 PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) {
   PROTOTYPE *Proto;
@@ -1609,16 +1593,15 @@ PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) {
   return (Proto);
 }                                // NewMixedProto
 
-
 /**
  * This routine allocates memory to hold a simple prototype
  * data structure, i.e. one without independent distributions
  * and variances for each dimension.
- * @param N	number of dimensions
- * @param Cluster	cluster to be made into a prototype
- * @return	Pointer to new simple prototype
- * @note Exceptions:	None
- * @note History:	6/19/89, DSJ, Created.
+ * @param N number of dimensions
+ * @param Cluster cluster to be made into a prototype
+ * @return  Pointer to new simple prototype
+ * @note Exceptions:  None
+ * @note History: 6/19/89, DSJ, Created.
  */
 PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) {
   PROTOTYPE *Proto;
@@ -1640,7 +1623,6 @@ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) {
   return (Proto);
 }                                // NewSimpleProto
 
-
 /**
  * This routine returns TRUE if the specified covariance
  * matrix indicates that all N dimensions are independent of
@@ -1653,13 +1635,13 @@ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) {
  * coeff[ij] = stddev[ij] / sqrt (stddev[ii] * stddev[jj])
  * The covariance matrix is assumed to be symmetric (which
  * should always be true).
- * @param ParamDesc	descriptions of each feature space dimension
- * @param N	number of dimensions
- * @param CoVariance	ptr to a covariance matrix
- * @param Independence	max off-diagonal correlation coefficient
- * @return	TRUE if dimensions are independent, FALSE otherwise
- * @note Exceptions:	None
- * @note History:	6/4/89, DSJ, Created.
+ * @param ParamDesc descriptions of each feature space dimension
+ * @param N number of dimensions
+ * @param CoVariance  ptr to a covariance matrix
+ * @param Independence  max off-diagonal correlation coefficient
+ * @return  TRUE if dimensions are independent, FALSE otherwise
+ * @note Exceptions:  None
+ * @note History: 6/4/89, DSJ, Created.
  */
 BOOL8
 Independent (PARAM_DESC ParamDesc[],
@@ -1692,7 +1674,6 @@ inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) {
   return (TRUE);
 }                                // Independent
 
-
 /**
  * This routine returns a histogram data structure which can
  * be used by other routines to place samples into histogram
@@ -1703,12 +1684,12 @@ inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) {
  * created so that it minimizes the computation time needed
  * to create a new bucket.
  * @param clusterer  which keeps a bucket_cache for us.
- * @param Distribution	type of probability distribution to test for
- * @param SampleCount	number of samples that are available
- * @param Confidence	probability of a Type I error
- * @return	Bucket data structure
+ * @param Distribution  type of probability distribution to test for
+ * @param SampleCount number of samples that are available
+ * @param Confidence  probability of a Type I error
+ * @return  Bucket data structure
  * @note Exceptions: none
- * @note History:	Thu Aug  3 12:58:10 1989, DSJ, Created.
+ * @note History: Thu Aug  3 12:58:10 1989, DSJ, Created.
  */
 BUCKETS *GetBuckets(CLUSTERER* clusterer,
                     DISTRIBUTION Distribution,
@@ -1739,7 +1720,6 @@ BUCKETS *GetBuckets(CLUSTERER* clusterer,
   return Buckets;
 }                                // GetBuckets
 
-
 /**
  * This routine creates a histogram data structure which can
  * be used by other routines to place samples into histogram
@@ -1751,12 +1731,12 @@ BUCKETS *GetBuckets(CLUSTERER* clusterer,
  * order to make this possible, a mapping table is
  * computed which maps "normalized" samples into the
  * appropriate bucket.
- * @param Distribution	type of probability distribution to test for
- * @param SampleCount	number of samples that are available
- * @param Confidence	probability of a Type I error
+ * @param Distribution  type of probability distribution to test for
+ * @param SampleCount number of samples that are available
+ * @param Confidence  probability of a Type I error
  * @return Pointer to new histogram data structure
- * @note Exceptions:	None
- * @note History:	6/4/89, DSJ, Created.
+ * @note Exceptions:  None
+ * @note History: 6/4/89, DSJ, Created.
  */
 BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
                      uinT32 SampleCount,
@@ -1840,7 +1820,6 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
   return Buckets;
 }                                // MakeBuckets
 
-
 /**
  * This routine computes the optimum number of histogram
  * buckets that should be used in a chi-squared goodness of
@@ -1851,7 +1830,7 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
  * values.  The table is intended for a 0.05 level of
  * significance (alpha).  This routine assumes that it is
  * equally valid for other alpha's, which may not be true.
- * @param SampleCount	number of samples to be tested
+ * @param SampleCount number of samples to be tested
  * @return Optimum number of histogram buckets
  * @note Exceptions: None
  * @note History: 6/5/89, DSJ, Created.
@@ -1874,7 +1853,6 @@ uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) {
   return kBucketsTable[Last];
 }                                // OptimumNumberOfBuckets
 
-
 /**
  * This routine computes the chi-squared value which will
  * leave a cumulative probability of Alpha in the right tail
@@ -1887,8 +1865,8 @@ uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) {
  * chi-squared value.  Therefore, once a particular chi-squared
  * value is computed, it is stored in the list and never
  * needs to be computed again.
- * @param DegreesOfFreedom	determines shape of distribution
- * @param Alpha	probability of right tail
+ * @param DegreesOfFreedom  determines shape of distribution
+ * @param Alpha probability of right tail
  * @return Desired chi-squared value
  * @note Exceptions: none
  * @note History: 6/5/89, DSJ, Created.
@@ -1932,19 +1910,19 @@ ComputeChiSquared (uinT16 DegreesOfFreedom, FLOAT64 Alpha)
 
 }                                // ComputeChiSquared
 
-
 /**
  * This routine computes the probability density function
  * of a discrete normal distribution defined by the global
  * variables kNormalMean, kNormalVariance, and kNormalMagnitude.
  * Normal magnitude could, of course, be computed in terms of
  * the normal variance but it is precomputed for efficiency.
- * @param x	number to compute the normal probability density for
+ * @param x number to compute the normal probability density for
  * @note Globals:
- *		kNormalMean	mean of a discrete normal distribution
- *		kNormalVariance	variance of a discrete normal distribution
- *		kNormalMagnitude	magnitude of a discrete normal distribution
- * @return	The value of the normal distribution at x.
+ *    kNormalMean mean of a discrete normal distribution
+ *    kNormalVariance variance of a discrete normal distribution
+ *    kNormalMagnitude  magnitude of a discrete normal
+ *distribution
+ * @return  The value of the normal distribution at x.
  * @note Exceptions: None
  * @note History: 6/4/89, DSJ, Created.
  */
@@ -1955,12 +1933,11 @@ FLOAT64 NormalDensity(inT32 x) {
   return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance);
 }                                // NormalDensity
 
-
 /**
  * This routine computes the probability density function
  * of a uniform distribution at the specified point.  The
  * range of the distribution is from 0 to BUCKETTABLESIZE.
- * @param x	number to compute the uniform probability density for
+ * @param x number to compute the uniform probability density for
  * @return The value of the uniform distribution at x.
  * @note Exceptions: None
  * @note History: 6/5/89, DSJ, Created.
@@ -1974,13 +1951,12 @@ FLOAT64 UniformDensity(inT32 x) {
     return (FLOAT64) 0.0;
 }                                // UniformDensity
 
-
 /**
  * This routine computes a trapezoidal approximation to the
  * integral of a function over a small delta in x.
- * @param f1	value of function at x1
- * @param f2	value of function at x2
- * @param Dx	x2 - x1 (should always be positive)
+ * @param f1  value of function at x1
+ * @param f2  value of function at x2
+ * @param Dx  x2 - x1 (should always be positive)
  * @return Approximation of the integral of the function from x1 to x2.
  * @note Exceptions: None
  * @note History: 6/5/89, DSJ, Created.
@@ -1989,7 +1965,6 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) {
   return (f1 + f2) * Dx / 2.0;
 }                                // Integral
 
-
 /**
  * This routine counts the number of cluster samples which
  * fall within the various histogram buckets in Buckets.  Only
@@ -2002,12 +1977,12 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) {
  * range and the StdDev is 1/2 the range.  A dimension with
  * zero standard deviation cannot be statistically analyzed.
  * In this case, a pseudo-analysis is used.
- * @param Buckets	histogram buckets to count samples
- * @param Cluster	cluster whose samples are being analyzed
- * @param Dim	dimension of samples which is being analyzed
- * @param ParamDesc	description of the dimension
- * @param Mean	"mean" of the distribution
- * @param StdDev	"standard deviation" of the distribution
+ * @param Buckets histogram buckets to count samples
+ * @param Cluster cluster whose samples are being analyzed
+ * @param Dim dimension of samples which is being analyzed
+ * @param ParamDesc description of the dimension
+ * @param Mean  "mean" of the distribution
+ * @param StdDev  "standard deviation" of the distribution
  * @return None (the Buckets data structure is filled in)
  * @note Exceptions: None
  * @note History: 6/5/89, DSJ, Created.
@@ -2071,16 +2046,15 @@ void FillBuckets(BUCKETS *Buckets,
   }
 }                                // FillBuckets
 
-
 /**
  * This routine determines which bucket x falls into in the
  * discrete normal distribution defined by kNormalMean
  * and kNormalStdDev.  x values which exceed the range of
  * the discrete distribution are clipped.
- * @param ParamDesc	used to identify circular dimensions
- * @param x	value to be normalized
- * @param Mean	mean of normal distribution
- * @param StdDev	standard deviation of normal distribution
+ * @param ParamDesc used to identify circular dimensions
+ * @param x value to be normalized
+ * @param Mean  mean of normal distribution
+ * @param StdDev  standard deviation of normal distribution
  * @return Bucket number into which x falls
  * @note Exceptions: None
  * @note History: 6/5/89, DSJ, Created.
@@ -2107,16 +2081,15 @@ uinT16 NormalBucket(PARAM_DESC *ParamDesc,
   return (uinT16) floor((FLOAT64) X);
 }                                // NormalBucket
 
-
 /**
  * This routine determines which bucket x falls into in the
  * discrete uniform distribution defined by
  * BUCKETTABLESIZE.  x values which exceed the range of
  * the discrete distribution are clipped.
- * @param ParamDesc	used to identify circular dimensions
- * @param x	value to be normalized
- * @param Mean	center of range of uniform distribution
- * @param StdDev	1/2 the range of the uniform distribution
+ * @param ParamDesc used to identify circular dimensions
+ * @param x value to be normalized
+ * @param Mean  center of range of uniform distribution
+ * @param StdDev  1/2 the range of the uniform distribution
  * @return Bucket number into which x falls
  * @note Exceptions: None
  * @note History: 6/5/89, DSJ, Created.
@@ -2143,7 +2116,6 @@ uinT16 UniformBucket(PARAM_DESC *ParamDesc,
   return (uinT16) floor((FLOAT64) X);
 }                                // UniformBucket
 
-
 /**
  * This routine performs a chi-square goodness of fit test
  * on the histogram data in the Buckets data structure.  TRUE
@@ -2151,7 +2123,7 @@ uinT16 UniformBucket(PARAM_DESC *ParamDesc,
  * distribution which was specified when the Buckets
  * structure was originally created.  Otherwise FALSE is
  * returned.
- * @param Buckets		histogram data to perform chi-square test on
+ * @param Buckets   histogram data to perform chi-square test on
  * @return TRUE if samples match distribution, FALSE otherwise
  * @note Exceptions: None
  * @note History: 6/5/89, DSJ, Created.
@@ -2176,11 +2148,10 @@ BOOL8 DistributionOK(BUCKETS *Buckets) {
     return TRUE;
 }                                // DistributionOK
 
-
 /**
  * This routine frees the memory used by the statistics
  * data structure.
- * @param Statistics	pointer to data structure to be freed
+ * @param Statistics  pointer to data structure to be freed
  * @return None
  * @note Exceptions: None
  * @note History: 6/5/89, DSJ, Created.
@@ -2192,7 +2163,6 @@ void FreeStatistics(STATISTICS *Statistics) {
   memfree(Statistics);
 }                                // FreeStatistics
 
-
 /**
  * This routine properly frees the memory used by a BUCKETS.
  *
@@ -2204,13 +2174,12 @@ void FreeBuckets(BUCKETS *buckets) {
   Efree(buckets);
 }                                // FreeBuckets
 
-
 /**
  * This routine frees the memory consumed by the specified
  * cluster and all of its subclusters.  This is done by
  * recursive calls to FreeCluster().
  *
- * @param Cluster	pointer to cluster to be freed
+ * @param Cluster pointer to cluster to be freed
  *
  * @return None
  *
@@ -2225,7 +2194,6 @@ void FreeCluster(CLUSTER *Cluster) {
   }
 }                                // FreeCluster
 
-
 /**
  * This routine computes the degrees of freedom that should
  * be used in a chi-squared test with the specified number of
@@ -2234,8 +2202,8 @@ void FreeCluster(CLUSTER *Cluster) {
  * computed more easily.  This will cause the value of
  * chi-squared to be higher than the optimum value, resulting
  * in the chi-square test being more lenient than optimum.
- * @param Distribution		distribution being tested for
- * @param HistogramBuckets	number of buckets in chi-square test
+ * @param Distribution    distribution being tested for
+ * @param HistogramBuckets  number of buckets in chi-square test
  * @return The number of degrees of freedom for a chi-square test
  * @note Exceptions: none
  * @note History: Thu Aug  3 14:04:18 1989, DSJ, Created.
@@ -2252,7 +2220,6 @@ uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets) {
 
 }                                // DegreesOfFreedom
 
-
 /**
  * This routine is used to search a list of histogram data
  * structures to find one with the specified number of
@@ -2272,7 +2239,6 @@ int NumBucketsMatch(void *arg1,    // BUCKETS *Histogram,
 
 }                                // NumBucketsMatch
 
-
 /**
  * This routine is used to search a list for a list node
  * whose contents match Key.  It is called by the list
@@ -2287,13 +2253,12 @@ int ListEntryMatch(void *arg1,    //ListNode
 
 }                                // ListEntryMatch
 
-
 /**
  * This routine multiplies each ExpectedCount histogram entry
  * by NewSampleCount/OldSampleCount so that the histogram
  * is now adjusted to the new sample count.
- * @param Buckets	histogram data structure to adjust
- * @param NewSampleCount	new sample count to adjust to
+ * @param Buckets histogram data structure to adjust
+ * @param NewSampleCount  new sample count to adjust to
  * @return none
  * @note Exceptions: none
  * @note History: Thu Aug  3 14:31:14 1989, DSJ, Created.
@@ -2313,11 +2278,10 @@ void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount) {
 
 }                                // AdjustBuckets
 
-
 /**
  * This routine sets the bucket counts in the specified histogram
  * to zero.
- * @param Buckets	histogram data structure to init
+ * @param Buckets histogram data structure to init
  * @return none
  * @note Exceptions: none
  * @note History: Thu Aug  3 14:31:14 1989, DSJ, Created.
@@ -2331,7 +2295,6 @@ void InitBuckets(BUCKETS *Buckets) {
 
 }                                // InitBuckets
 
-
 /**
  * This routine is used to search a list of structures which
  * hold pre-computed chi-squared values for a chi-squared
@@ -2355,14 +2318,13 @@ int AlphaMatch(void *arg1,    //CHISTRUCT                             *ChiStruct
 
 }                                // AlphaMatch
 
-
 /**
  * This routine allocates a new data structure which is used
  * to hold a chi-squared value along with its associated
  * number of degrees of freedom and alpha value.
  *
- * @param DegreesOfFreedom	degrees of freedom for new chi value
- * @param Alpha			confidence level for new chi value
+ * @param DegreesOfFreedom  degrees of freedom for new chi value
+ * @param Alpha     confidence level for new chi value
  * @return none
  * @note Exceptions: none
  * @note History: Fri Aug  4 11:04:59 1989, DSJ, Created.
@@ -2377,7 +2339,6 @@ CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) {
 
 }                                // NewChiStruct
 
-
 /**
  * This routine attempts to find an x value at which Function
  * goes to zero (i.e. a root of the function ).  It will only
@@ -2385,10 +2346,10 @@ CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) {
  * are no extrema between the solution and the InitialGuess.
  * The algorithms used are extremely primitive.
  *
- * @param Function	function whose zero is to be found
- * @param FunctionParams	arbitrary data to pass to function
- * @param InitialGuess	point to start solution search at
- * @param Accuracy	maximum allowed error
+ * @param Function  function whose zero is to be found
+ * @param FunctionParams  arbitrary data to pass to function
+ * @param InitialGuess  point to start solution search at
+ * @param Accuracy  maximum allowed error
  * @return Solution of function ( x for which f(x) = 0 ).
  * @note Exceptions: none
  * @note History: Fri Aug  4 11:08:59 1989, DSJ, Created.
@@ -2440,7 +2401,6 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy)
 
 }                                // Solve
 
-
 /**
  * This routine computes the area under a chi density curve
  * from 0 to x, minus the desired area under the curve.  The
@@ -2455,8 +2415,8 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy)
  * integrating the chi density curve in parts to obtain
  * a series that can be used to compute the area under the
  * curve.
- * @param ChiParams	contains degrees of freedom and alpha
- * @param x		value of chi-squared to evaluate
+ * @param ChiParams contains degrees of freedom and alpha
+ * @param x   value of chi-squared to evaluate
  * @return Error between actual and desired area under the chi curve.
  * @note Exceptions: none
  * @note History: Fri Aug  4 12:48:41 1989, DSJ, Created.
@@ -2480,7 +2440,6 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) {
 
 }                                // ChiArea
 
-
 /**
  * This routine looks at all samples in the specified cluster.
  * It computes a running estimate of the percentage of the
@@ -2498,10 +2457,10 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) {
  * contained in the same cluster, then the cluster should be
  * split.
  *
- * @param Clusterer	data structure holding cluster tree
- * @param Cluster		cluster containing samples to be tested
- * @param MaxIllegal	max percentage of samples allowed to have
- *				more than 1 feature in the cluster
+ * @param Clusterer data structure holding cluster tree
+ * @param Cluster   cluster containing samples to be tested
+ * @param MaxIllegal  max percentage of samples allowed to have
+ *        more than 1 feature in the cluster
  * @return TRUE if the cluster should be split, FALSE otherwise.
  * @note Exceptions: none
  * @note History: Wed Aug 30 11:13:05 1989, DSJ, Created.
@@ -2562,7 +2521,7 @@ CLUSTER * Cluster, FLOAT32 MaxIllegal)
 }                                // MultipleCharSamples
 
 /**
- * Compute the inverse of a matrix using LU decomposition with partial pivoting. 
+ * Compute the inverse of a matrix using LU decomposition with partial pivoting.
  * The return value is the sum of norms of the off-diagonal terms of the
  * product of a and inv. (A measure of the error.)
  */
diff --git a/classify/cluster.h b/classify/cluster.h
index 53ddf87dad..a068a5d58c 100644
--- a/classify/cluster.h
+++ b/classify/cluster.h
@@ -107,15 +107,15 @@ typedef struct {
 /*--------------------------------------------------------------------------
         Public Function Prototypes
 --------------------------------------------------------------------------*/
-CLUSTERER *MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]);
+CLUSTERER TESS_API *MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]);
 
-SAMPLE *MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, inT32 CharID);
+SAMPLE TESS_API *MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, inT32 CharID);
 
 LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
 
-void FreeClusterer(CLUSTERER *Clusterer);
+void TESS_API FreeClusterer(CLUSTERER *Clusterer);
 
-void FreeProtoList(LIST *ProtoList);
+void TESS_API FreeProtoList(LIST *ProtoList);
 
 void FreePrototype(void *arg);  // PROTOTYPE *Prototype);
 
@@ -125,7 +125,7 @@ FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension);
 
 FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension);
 
-inT32 MergeClusters(inT16 N, PARAM_DESC ParamDesc[], inT32 n1, inT32 n2,
+inT32 TESS_API MergeClusters(inT16 N, PARAM_DESC ParamDesc[], inT32 n1, inT32 n2,
                     FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]);
 
 //--------------Global Data Definitions and Declarations---------------------------
diff --git a/classify/clusttool.cpp b/classify/clusttool.cpp
index d86c3a2407..02e619d273 100644
--- a/classify/clusttool.cpp
+++ b/classify/clusttool.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:	clustertool.c
- **	Purpose:	Misc. tools for use with the clustering routines
- **	Author:		Dan Johnson
- **	History:	6/6/89, DSJ, Created.
+ ** Filename: clustertool.c
+ ** Purpose:  Misc. tools for use with the clustering routines
+ ** Author:   Dan Johnson
+ ** History:  6/6/89, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -26,9 +26,10 @@
 #include <math.h>
 
 //---------------Global Data Definitions and Declarations--------------------
-#define TOKENSIZE 80             //< max size of tokens read from an input file
-#define MAXSAMPLESIZE 65535      //< max num of dimensions in feature space
-//#define MAXBLOCKSIZE  65535   //< max num of samples in a character (block size)
+#define TOKENSIZE 80         //< max size of tokens read from an input file
+#define MAXSAMPLESIZE 65535  //< max num of dimensions in feature space
+//#define MAXBLOCKSIZE  65535   //< max num of samples in a character (block
+// size)
 
 /**
  * This routine reads a single integer from the specified
@@ -37,7 +38,7 @@
  * @param File open text file to read sample size from
  * @return Sample size
  * @note Globals: None
- * @note Exceptions: ILLEGALSAMPLESIZE	illegal format or range
+ * @note Exceptions: ILLEGALSAMPLESIZE  illegal format or range
  * @note History: 6/6/89, DSJ, Created.
  */
 uinT16 ReadSampleSize(FILE *File) {
@@ -293,7 +294,7 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
     if (NumFloatsRead != 1) {
       if ((NumFloatsRead == EOF) && (i == 0)) {
         if (needs_free) {
-            Efree(Buffer);
+          Efree(Buffer);
         }
         return NULL;
       } else {
@@ -315,8 +316,7 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
  * @note Exceptions: None
  * @note History: 6/6/89, DSJ, Created.
  */
-void
-WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
+void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[]) {
   int i;
 
   for (i = 0; i < N; i++) {
@@ -446,15 +446,10 @@ void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
  * @note History: 6/12/89, DSJ, Created.
 */
 
-void WriteProtoList(
-     FILE	*File,
-     uinT16	N,
-     PARAM_DESC	ParamDesc[],
-     LIST	ProtoList,
-     BOOL8	WriteSigProtos,
-     BOOL8	WriteInsigProtos)
-{
-  PROTOTYPE	*Proto;
+void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[],
+                    LIST ProtoList, BOOL8 WriteSigProtos,
+                    BOOL8 WriteInsigProtos) {
+  PROTOTYPE *Proto;
 
   /* write file header */
   fprintf(File,"%0d\n",N);
@@ -464,8 +459,8 @@ void WriteProtoList(
   iterate(ProtoList)
     {
       Proto = (PROTOTYPE *) first_node ( ProtoList );
-      if (( Proto->Significant && WriteSigProtos )	||
-	  ( ! Proto->Significant && WriteInsigProtos ) )
-	WritePrototype( File, N, Proto );
+      if ((Proto->Significant && WriteSigProtos) ||
+          (!Proto->Significant && WriteInsigProtos))
+        WritePrototype(File, N, Proto);
     }
 }
diff --git a/classify/clusttool.h b/classify/clusttool.h
index e82fa1ef48..a6fe38ea48 100644
--- a/classify/clusttool.h
+++ b/classify/clusttool.h
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:	clusttool.h
- **	Purpose:	Definition of clustering utility tools
- **	Author:		Dan Johnson
- **	History:	6/6/89, DSJ, Created.
+ ** Filename: clusttool.h
+ ** Purpose:  Definition of clustering utility tools
+ ** Author:   Dan Johnson
+ ** History:  6/6/89, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -36,7 +36,7 @@ PROTOSTYLE ReadProtoStyle(FILE *File);
 
 FLOAT32 *ReadNFloats (FILE * File, uinT16 N, FLOAT32 Buffer[]);
 
-void WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]);
+void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[]);
 
 void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto);
 
@@ -44,13 +44,9 @@ void WriteNFloats (FILE * File, uinT16 N, FLOAT32 Array[]);
 
 void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle);
 
-void WriteProtoList(
-     FILE	*File,
-     uinT16	N,
-     PARAM_DESC	ParamDesc[],
-     LIST	ProtoList,
-     BOOL8	WriteSigProtos,
-     BOOL8	WriteInsigProtos);
+void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[],
+                    LIST ProtoList, BOOL8 WriteSigProtos,
+                    BOOL8 WriteInsigProtos);
 
 //--------------Global Data Definitions and Declarations---------------------
 // define errors that can be trapped
diff --git a/classify/cutoffs.cpp b/classify/cutoffs.cpp
index 4f6417149a..ffb8692ef1 100644
--- a/classify/cutoffs.cpp
+++ b/classify/cutoffs.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    cutoffs.c
- **	Purpose:     Routines to manipulate an array of class cutoffs.
- **	Author:      Dan Johnson
- **	History:     Wed Feb 20 09:28:51 1991, DSJ, Created.
+ ** Filename:    cutoffs.c
+ ** Purpose:     Routines to manipulate an array of class cutoffs.
+ ** Author:      Dan Johnson
+ ** History:     Wed Feb 20 09:28:51 1991, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
diff --git a/classify/featdefs.cpp b/classify/featdefs.cpp
index ad7b799675..dd31f91d86 100644
--- a/classify/featdefs.cpp
+++ b/classify/featdefs.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    featdefs.c
- **	Purpose:     Definitions of currently defined feature types.
- **	Author:      Dan Johnson
- **	History:     Mon May 21 10:26:21 1990, DSJ, Created.
+ ** Filename:    featdefs.c
+ ** Purpose:     Definitions of currently defined feature types.
+ ** Author:      Dan Johnson
+ ** History:     Mon May 21 10:26:21 1990, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -289,13 +289,13 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
  * the feature type for the feature with the specified short
  * name.  Trap an error if the specified name is not found.
  *
- * Globals: 
+ * Globals:
  * - none
  *
  * @param FeatureDefs    definitions of feature types/extractors
  * @param ShortName short name of a feature type
  * @return Feature type which corresponds to ShortName.
- * @note Exceptions: 
+ * @note Exceptions:
  * - ILLEGAL_SHORT_NAME
  * @note History: Wed May 23 15:36:05 1990, DSJ, Created.
  */
diff --git a/classify/featdefs.h b/classify/featdefs.h
index 704bbdfde2..eb0c71ce45 100644
--- a/classify/featdefs.h
+++ b/classify/featdefs.h
@@ -55,7 +55,7 @@ typedef FEATURE_DEFS_STRUCT *FEATURE_DEFS;
 /*----------------------------------------------------------------------
     Generic functions for manipulating character descriptions
 ----------------------------------------------------------------------*/
-void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs);
+void TESS_API InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs);
 
 void FreeCharDescription(CHAR_DESC CharDesc);
 
@@ -67,17 +67,17 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
 void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
                           CHAR_DESC CharDesc, STRING* str);
 
-CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
+CHAR_DESC TESS_API ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
                               FILE *File);
 
-int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
+int TESS_API ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
                            const char *ShortName);
 
 /**----------------------------------------------------------------------------
         Global Data Definitions and Declarations
 ----------------------------------------------------------------------------**/
 extern const FEATURE_DESC_STRUCT MicroFeatureDesc;
-extern const FEATURE_DESC_STRUCT PicoFeatDesc;
+extern TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc;
 extern const FEATURE_DESC_STRUCT CharNormDesc;
 extern const FEATURE_DESC_STRUCT OutlineFeatDesc;
 extern const FEATURE_DESC_STRUCT IntFeatDesc;
diff --git a/classify/fpoint.cpp b/classify/fpoint.cpp
index 854bea7b7e..ff5b7b7cf7 100644
--- a/classify/fpoint.cpp
+++ b/classify/fpoint.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    fpoint.c
- **	Purpose:     Abstract data type for a 2D point (floating point coords)
- **	Author:      Dan Johnson
- **	History:     Thu Apr 12 10:44:15 1990, DSJ, Created.
+ ** Filename:    fpoint.c
+ ** Purpose:     Abstract data type for a 2D point (floating point coords)
+ ** Author:      Dan Johnson
+ ** History:     Thu Apr 12 10:44:15 1990, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -58,5 +58,4 @@ FLOAT32 NormalizedAngleFrom(FPOINT *Point1,
   if (Angle < 0.0 || Angle >= FullScale)
     Angle = 0.0;
   return (Angle);
-
 }
diff --git a/classify/intfeaturemap.h b/classify/intfeaturemap.h
index 55c5b5cf5e..93bc896c0e 100644
--- a/classify/intfeaturemap.h
+++ b/classify/intfeaturemap.h
@@ -75,7 +75,7 @@ class IntFeatureMap {
 
   // Copies the given feature_space and uses it as the index feature map
   // from INT_FEATURE_STRUCT.
-  void Init(const IntFeatureSpace& feature_space);
+  void TESS_API Init(const IntFeatureSpace& feature_space);
 
   // Helper to return an offset index feature. In this context an offset
   // feature with a dir of +/-1 is a feature of a similar direction,
diff --git a/classify/intfeaturespace.h b/classify/intfeaturespace.h
index e1e8e6ec9b..b72c42bdd1 100644
--- a/classify/intfeaturespace.h
+++ b/classify/intfeaturespace.h
@@ -35,7 +35,7 @@ class IndexMap;
 
 // Down-sampling quantization of the INT_FEATURE_STRUCT feature space and
 // conversion to a single scalar index value, used as a binary feature space.
-class IntFeatureSpace {
+class TESS_API IntFeatureSpace {
  public:
   IntFeatureSpace();
   // Default copy constructors and assignment OK!
diff --git a/classify/intfx.cpp b/classify/intfx.cpp
index 78aa59bbc9..9c9870a2e9 100644
--- a/classify/intfx.cpp
+++ b/classify/intfx.cpp
@@ -520,7 +520,7 @@ bool ExtractIntFeat(const TBLOB& blob,
   tesseract::Classify::ExtractFeatures(blob, nonlinear_norm,
                                        &bl_features, &cn_features, results,
                                        NULL);
-  if (bl_features.size() == 0 || cn_features.size() == 0 ||
+  if (bl_features.empty() || cn_features.empty() ||
       bl_features.size() > MAX_NUM_INT_FEATURES ||
       cn_features.size() > MAX_NUM_INT_FEATURES) {
     return false;  // Feature extraction failed.
diff --git a/classify/intfx.h b/classify/intfx.h
index 26c435374c..1cd51a3022 100644
--- a/classify/intfx.h
+++ b/classify/intfx.h
@@ -48,7 +48,7 @@ const double kStandardFeatureLength = 64.0 / 5;
 /**----------------------------------------------------------------------------
           Public Function Prototypes
 ----------------------------------------------------------------------------**/
-void InitIntegerFX();
+void TESS_API InitIntegerFX();
 
 // Returns a vector representing the direction of a feature with the given
 // theta direction in an INT_FEATURE_STRUCT.
diff --git a/classify/intmatcher.cpp b/classify/intmatcher.cpp
index 8fc135ea45..ff999608f2 100644
--- a/classify/intmatcher.cpp
+++ b/classify/intmatcher.cpp
@@ -295,7 +295,8 @@ class ClassPruner {
       HeapSort(num_classes_, sort_key_, sort_index_);
   }
 
-  /** Prints debug info on the class pruner matches for the pruned classes only. */
+  /** Prints debug info on the class pruner matches for the pruned classes only.
+   */
   void DebugMatch(const Classify& classify,
                   const INT_TEMPLATES_STRUCT* int_templates,
                   const INT_FEATURE_STRUCT* features) const {
@@ -370,8 +371,9 @@ class ClassPruner {
  private:
   /** Array[rounded_classes_] of initial counts for each class. */
   int *class_count_;
-  /// Array[rounded_classes_] of modified counts for each class after normalizing
-  /// for expected number of features, disabled classes, fragments, and xheights.
+  /// Array[rounded_classes_] of modified counts for each class after
+  /// normalizing for expected number of features, disabled classes, fragments,
+  /// and xheights.
   int *norm_count_;
   /** Array[rounded_classes_ +1] of pruned counts that gets sorted */
   int *sort_key_;
@@ -402,8 +404,9 @@ class ClassPruner {
  *                               normalization process (by CLASS_INDEX)
  * @param expected_num_features  Array of expected number of features
  *                               for each class (by CLASS_INDEX)
- * @param results                Sorted Array of pruned classes. Must be an array
- *                               of size at least int_templates->NumClasses.
+ * @param results                Sorted Array of pruned classes. Must be an
+ *                               array of size at least
+ *                               int_templates->NumClasses.
  * @param keep_this
  */
 int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
@@ -606,7 +609,6 @@ int IntegerMatcher::FindGoodProtos(
   return NumGoodProtos;
 }
 
-
 /**
  * FindBadFeatures finds all features with maximum feature-evidence <
  * AdaptFeatureThresh. The list is ordered by increasing feature number.
@@ -701,7 +703,6 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
   evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
 }
 
-
 /*----------------------------------------------------------------------------
               Private Code
 ----------------------------------------------------------------------------*/
@@ -717,8 +718,6 @@ void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
          class_template->NumConfigs * sizeof(feature_evidence_[0]));
 }
 
-
-
 /**
  * Print debugging information for Configuations
  * @return none
@@ -742,7 +741,6 @@ void IMDebugConfiguration(int FeatureNum,
   cprintf ("\n");
 }
 
-
 /**
  * Print debugging information for Configuations
  * @return none
@@ -795,10 +793,10 @@ int IntegerMatcher::UpdateTablesForFeature(
   uinT32 XFeatureAddress;
   uinT32 YFeatureAddress;
   uinT32 ThetaFeatureAddress;
-  uinT8 *UINT8Pointer;
+  uinT8* UINT8Pointer;
   int ProtoIndex;
   uinT8 Temp;
-  int *IntPointer;
+  int* IntPointer;
   int ConfigNum;
   inT32 M3;
   inT32 A3;
@@ -916,7 +914,6 @@ int IntegerMatcher::UpdateTablesForFeature(
   return SumOverConfigs;
 }
 
-
 /**
  * Print debugging information for Configuations
  * @return none
@@ -1165,8 +1162,6 @@ void ScratchEvidence::UpdateSumOfProtoEvidences(
   }
 }
 
-
-
 /**
  * Normalize Sum of Proto and Feature Evidence by dividing by the sum of
  * the Feature Lengths and the Proto Lengths for each configuration.
@@ -1180,7 +1175,6 @@ void ScratchEvidence::NormalizeSums(
   }
 }
 
-
 /**
  * Find the best match for the current class and update the Result
  * with the configuration and match rating.
diff --git a/classify/intmatcher.h b/classify/intmatcher.h
index 46dbfc5a8d..df678d75ed 100644
--- a/classify/intmatcher.h
+++ b/classify/intmatcher.h
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    intmatcher.h
- **	Purpose:     Interface to high level generic classifier routines.
- **	Author:      Robert Moss
- **	History:     Wed Feb 13 15:24:15 MST 1991, RWM, Created.
+ ** Filename:    intmatcher.h
+ ** Purpose:     Interface to high level generic classifier routines.
+ ** Author:      Robert Moss
+ ** History:     Wed Feb 13 15:24:15 MST 1991, RWM, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
diff --git a/classify/intproto.cpp b/classify/intproto.cpp
index 4c2f0d9536..473ffd96db 100644
--- a/classify/intproto.cpp
+++ b/classify/intproto.cpp
@@ -46,9 +46,7 @@
 #include "config_auto.h"
 #endif
 
-using tesseract::FontInfo;
 using tesseract::FontSet;
-using tesseract::FontSpacingInfo;
 
 /* match debug display constants*/
 #define PROTO_PRUNER_SCALE  (4.0)
@@ -326,10 +324,8 @@ int AddIntProto(INT_CLASS Class) {
        Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0);
 
   return (Index);
-
 }
 
-
 /**
  * This routine adds Proto to the class pruning tables
  * for the specified class in Templates.
@@ -372,7 +368,6 @@ void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId,
   }
 }                                /* AddProtoToClassPruner */
 
-
 /**
  * This routine updates the proto pruner lookup tables
  * for Class to include a new proto identified by ProtoId
@@ -432,7 +427,6 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId,
   FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug);
 }                                /* AddProtoToProtoPruner */
 
-
 /**
  * Returns a quantized bucket for the given param shifted by offset,
  * notionally (param + offset) * num_buckets, but clipped and casted to the
@@ -550,7 +544,6 @@ void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) {
             P->A, P->B, P->C, Class->ProtoLengths[ProtoId]);
 }                                /* ConvertProto */
 
-
 /**
  * This routine converts from the old floating point format
  * to the new integer format.
@@ -627,7 +620,7 @@ INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos,
  * @note Exceptions: none
  * @note History: Thu Mar 21 14:45:04 1991, DSJ, Created.
  */
-void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) {
+void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, FLOAT32 Evidence) {
   ScrollView::Color color = GetMatchColorFor(Evidence);
   RenderIntFeature(IntMatchWindow, Feature, color);
   if (FeatureDisplayWindow) {
@@ -635,7 +628,6 @@ void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) {
   }
 }                                /* DisplayIntFeature */
 
-
 /**
  * This routine renders the specified proto into a
  * global display list.
@@ -720,7 +712,6 @@ void free_int_class(INT_CLASS int_class) {
   Efree(int_class);
 }
 
-
 /**
  * This routine allocates a new set of integer templates
  * initialized to hold 0 classes.
@@ -1218,7 +1209,6 @@ FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) {
 
 }                                /* BucketStart */
 
-
 /**
  * This routine returns the parameter value which
  * corresponds to the end of the specified bucket.
@@ -1236,7 +1226,6 @@ FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) {
   return (((FLOAT32) (Bucket + 1) / NumBuckets) - Offset);
 }                                /* BucketEnd */
 
-
 /**
  * This routine fills in the section of a class pruner
  * corresponding to a single x value for a single proto of
@@ -1284,7 +1273,6 @@ void DoFill(FILL_SPEC *FillSpec,
     }
 }                                /* DoFill */
 
-
 /**
  * Return TRUE if the specified table filler is done, i.e.
  * if it has no more lines to fill.
@@ -1306,7 +1294,6 @@ BOOL8 FillerDone(TABLE_FILLER *Filler) {
 
 }                                /* FillerDone */
 
-
 /**
  * This routine sets Bit in each bit vector whose
  * bucket lies within the range Center +- Spread.  The fill
@@ -1349,7 +1336,6 @@ void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
 
 }                                /* FillPPCircularBits */
 
-
 /**
  * This routine sets Bit in each bit vector whose
  * bucket lies within the range Center +- Spread.  The fill
@@ -1516,7 +1502,6 @@ void GetCPPadsForLevel(int Level,
 
 }                                /* GetCPPadsForLevel */
 
-
 /**
  * @param Evidence  evidence value to return color for
  * @return Color which corresponds to specified Evidence value.
@@ -1538,7 +1523,6 @@ ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) {
     return ScrollView::BLUE;
 }                                /* GetMatchColorFor */
 
-
 /**
  * This routine returns (in Fill) the specification of
  * the next line to be filled from Filler.  FillerDone() should
@@ -1589,7 +1573,6 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
 
 }                                /* GetNextFill */
 
-
 /**
  * This routine computes a data structure (Filler)
  * which can be used to fill in a rectangle surrounding
@@ -1723,8 +1706,10 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad,
 
       /* translate into bucket positions and deltas */
       Filler->X = Bucket8For(Start.x, XS, NB);
-      Filler->StartDelta = -(inT16) ((Sin / Cos) * 256);
-      Filler->EndDelta = (inT16) ((Cos / Sin) * 256);
+      Filler->StartDelta = static_cast<inT16>(ClipToRange<int>(
+          -IntCastRounded((Sin / Cos) * 256), MIN_INT16, MAX_INT16));
+      Filler->EndDelta = static_cast<inT16>(ClipToRange<int>(
+          IntCastRounded((Cos / Sin) * 256), MIN_INT16, MAX_INT16));
 
       XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x;
       YAdjust = XAdjust * Sin / Cos;
@@ -1787,7 +1772,6 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
   window->DrawTo(X + Dx, Y + Dy);
 }                                /* RenderIntFeature */
 
-
 /**
  * This routine extracts the parameters of the specified
  * proto from the class description and adds a rendering of
diff --git a/classify/intproto.h b/classify/intproto.h
index d2c07147a9..262974b83e 100644
--- a/classify/intproto.h
+++ b/classify/intproto.h
@@ -52,7 +52,7 @@ class FCOORD;
 #define NUM_CP_BUCKETS    24
 #define CLASSES_PER_CP    32
 #define NUM_BITS_PER_CLASS  2
-#define CLASS_PRUNER_CLASS_MASK (~(~0 << NUM_BITS_PER_CLASS))
+#define CLASS_PRUNER_CLASS_MASK (~(~0u << NUM_BITS_PER_CLASS))
 #define CLASSES_PER_CP_WERD (CLASSES_PER_CP / NUM_BITS_PER_CLASS)
 #define PROTOS_PER_PP_WERD  BITS_PER_WERD
 #define BITS_PER_CP_VECTOR  (CLASSES_PER_CP * NUM_BITS_PER_CLASS)
diff --git a/classify/kdtree.cpp b/classify/kdtree.cpp
index 61a94f66cc..6ba7086d23 100644
--- a/classify/kdtree.cpp
+++ b/classify/kdtree.cpp
@@ -70,11 +70,11 @@ class MinK {
   const Element* elements() { return elements_; }
 
  private:
-  const Key max_key_;  //< the maximum possible Key
-  Element* elements_;  //< unsorted array of elements
+  const Key max_key_;   //< the maximum possible Key
+  Element *elements_;   //< unsorted array of elements
   int elements_count_;  //< the number of results collected so far
-  int k_;  //< the number of results we want from the search
-  int max_index_;  //< the index of the result with the largest key
+  int k_;               //< the number of results we want from the search
+  int max_index_;       //< the index of the result with the largest key
 };
 
 template<typename Key, typename Value>
@@ -117,7 +117,8 @@ bool MinK<Key, Value>::insert(Key key, Value value) {
 
 
 //-----------------------------------------------------------------------------
-/** Helper class for searching for the k closest points to query_point in tree. */
+/** Helper class for searching for the k closest points to query_point in tree.
+ */
 class KDTreeSearch {
  public:
   KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest);
@@ -241,14 +242,13 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
   *PtrToNode = MakeKDNode(Tree, Key, (void *) Data, Level);
 }                                /* KDStore */
 
-
 /**
- * This routine deletes a node from Tree.  The node to be	
- * deleted is specified by the Key for the node and the Data	
- * contents of the node.  These two pointers must be identical	
- * to the pointers that were used for the node when it was	
- * originally stored in the tree.  A node will be deleted from	
- * the tree only if its key and data pointers are identical	
+ * This routine deletes a node from Tree.  The node to be
+ * deleted is specified by the Key for the node and the Data
+ * contents of the node.  These two pointers must be identical
+ * to the pointers that were used for the node when it was
+ * originally stored in the tree.  A node will be deleted from
+ * the tree only if its key and data pointers are identical
  * to Key and Data respectively.  The tree is re-formed by removing
  * the affected subtree and inserting all elements but the root.
  *
@@ -298,7 +298,6 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) {
   }
 }                                /* KDDelete */
 
-
 /**
  * This routine searches the K-D tree specified by Tree and
  * finds the QuerySize nearest neighbors of Query.  All neighbors
@@ -442,7 +441,7 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
 
 
 /*---------------------------------------------------------------------------*/
-/** 
+/**
  *Returns the Euclidean distance squared between p1 and p2 for all essential
  * dimensions.
  * @param k      keys are in k-space
@@ -541,7 +540,6 @@ void Walk(KDTREE *tree, void_proc action, void *context,
     Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
 }
 
-
 /** Given a subtree nodes, insert all of its elements into tree. */
 void InsertNodes(KDTREE *tree, KDNODE *nodes) {
   if (nodes == NULL)
diff --git a/classify/kdtree.h b/classify/kdtree.h
index 1294ea3c6a..259d7078d0 100644
--- a/classify/kdtree.h
+++ b/classify/kdtree.h
@@ -83,7 +83,7 @@ void FreeKDNode(KDNODE *Node);
 
 FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]);
 
-FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]);
+FLOAT32 TESS_API ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]);
 
 int QueryInSearch(KDTREE *tree);
 
diff --git a/classify/mastertrainer.cpp b/classify/mastertrainer.cpp
index 866a617a22..849fb06010 100644
--- a/classify/mastertrainer.cpp
+++ b/classify/mastertrainer.cpp
@@ -214,10 +214,14 @@ void MasterTrainer::AddSample(bool verification, const char* unichar,
 // Must be called after ReadTrainingSamples, as the current number of images
 // is used as an offset for page numbers in the samples.
 void MasterTrainer::LoadPageImages(const char* filename) {
+  size_t offset = 0;
   int page;
   Pix* pix;
-  for (page = 0; (pix = pixReadTiff(filename, page)) != NULL; ++page) {
+  for (page = 0; ; page++) {
+    pix = pixReadFromMultipageTiff(filename, &offset);
+    if (!pix) break;
     page_images_.push_back(pix);
+    if (!offset) break;
   }
   tprintf("Loaded %d page images from %s\n", page, filename);
 }
@@ -362,9 +366,11 @@ bool MasterTrainer::LoadFontInfo(const char* filename) {
     fontinfo.name = font_name;
     fontinfo.properties = 0;
     fontinfo.universal_id = 0;
-    if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name,
-                &italic, &bold, &fixed, &serif, &fraktur) != 6)
+    if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold,
+                &fixed, &serif, &fraktur) != 6) {
+      delete[] font_name;
       continue;
+    }
     fontinfo.properties =
         (italic << 0) +
         (bold << 1) +
@@ -373,6 +379,8 @@ bool MasterTrainer::LoadFontInfo(const char* filename) {
         (fraktur << 4);
     if (!fontinfo_table_.contains(fontinfo)) {
       fontinfo_table_.push_back(fontinfo);
+    } else {
+      delete[] font_name;
     }
   }
   fclose(fp);
diff --git a/classify/mastertrainer.h b/classify/mastertrainer.h
index 8cc7158acf..25119d2382 100644
--- a/classify/mastertrainer.h
+++ b/classify/mastertrainer.h
@@ -66,7 +66,7 @@ struct ShapeDist {
 // Initially supports shape clustering and mftrainining.
 // Other important features of the MasterTrainer are conditioning the data
 // by outlier elimination, replication with perturbation, and serialization.
-class MasterTrainer {
+class TESS_API MasterTrainer {
  public:
   MasterTrainer(NormalizationMode norm_mode, bool shape_analysis,
                 bool replicate_samples, int debug_level);
diff --git a/classify/mf.cpp b/classify/mf.cpp
index d0c59487e6..37cd2eca0a 100644
--- a/classify/mf.cpp
+++ b/classify/mf.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    mf.c
- **	Purpose:     Micro-feature interface to flexible feature extractor.
- **	Author:      Dan Johnson
- **	History:     Thu May 24 09:08:38 1990, DSJ, Created.
+ ** Filename:    mf.c
+ ** Purpose:     Micro-feature interface to flexible feature extractor.
+ ** Author:      Dan Johnson
+ ** History:     Thu May 24 09:08:38 1990, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -36,7 +36,7 @@
  * Call the old micro-feature extractor and then copy
  * the features into the new format.  Then deallocate the
  * old micro-features.
- * @param Blob	blob to extract micro-features from
+ * @param Blob  blob to extract micro-features from
  * @param cn_denorm  control parameter to feature extractor.
  * @return Micro-features for Blob.
  * @note Exceptions: none
diff --git a/classify/mfdefs.cpp b/classify/mfdefs.cpp
index abe8d0c71a..0f225e8b08 100644
--- a/classify/mfdefs.cpp
+++ b/classify/mfdefs.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    mfdefs.c
- **	Purpose:     Basic routines for manipulating micro-features
- **	Author:      Dan Johnson
- **	History:     Mon Jan 22 08:48:58 1990, DSJ, Created.
+ ** Filename:    mfdefs.c
+ ** Purpose:     Basic routines for manipulating micro-features
+ ** Author:      Dan Johnson
+ ** History:     Mon Jan 22 08:48:58 1990, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -32,7 +32,7 @@
  * @return New MICROFEATURE
  * @note History: 7/27/89, DSJ, Created.
  */
-MICROFEATURE NewMicroFeature() { 
+MICROFEATURE NewMicroFeature() {
   return ((MICROFEATURE) Emalloc (sizeof (MFBLOCK)));
 }                                /* NewMicroFeature */
 
@@ -41,10 +41,10 @@ MICROFEATURE NewMicroFeature() {
 /**
  * This routine deallocates all of the memory consumed by
  * a list of micro-features.
- * @param MicroFeatures	list of micro-features to be freed
+ * @param MicroFeatures list of micro-features to be freed
  * @return  none
  * @note History: 7/27/89, DSJ, Created.
  */
-void FreeMicroFeatures(MICROFEATURES MicroFeatures) { 
+void FreeMicroFeatures(MICROFEATURES MicroFeatures) {
   destroy_nodes(MicroFeatures, Efree); 
 }                                /* FreeMicroFeatures */
diff --git a/classify/mfoutline.cpp b/classify/mfoutline.cpp
index 511c34d41f..59593a8523 100644
--- a/classify/mfoutline.cpp
+++ b/classify/mfoutline.cpp
@@ -35,7 +35,8 @@
 ----------------------------------------------------------------------------*/
 
 /*---------------------------------------------------------------------------*/
-/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). */
+/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format).
+ */
 LIST ConvertBlob(TBLOB *blob) {
   LIST outlines = NIL_LIST;
   return (blob == NULL)
@@ -344,7 +345,6 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
 
 }                                /* ChangeDirection */
 
-
 /**
  * This routine normalizes each point in Outline by
  * translating it to the specified center and scaling it
@@ -378,7 +378,6 @@ void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
 
 }                                /* CharNormalizeOutline */
 
-
 /**
  * This routine computes the slope from Start to Finish and
  * and then computes the approximate direction of the line
diff --git a/classify/mfx.cpp b/classify/mfx.cpp
index 3da4fb3d0f..6fd8ed5da5 100644
--- a/classify/mfx.cpp
+++ b/classify/mfx.cpp
@@ -128,7 +128,6 @@ FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) {
   return (Orientation);
 }                                /* ComputeOrientation */
 
-
 /**
  * Convert Outline to MicroFeatures
  * @param Outline         outline to extract micro-features from
@@ -164,7 +163,6 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
   return (MicroFeatures);
 }                                /* ConvertToMicroFeatures */
 
-
 /**
  * This routine computes the feature parameters which describe
  * the micro-feature that starts and Start and ends at End.
@@ -178,7 +176,7 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
  * @return New micro-feature or NULL if the feature was rejected.
  * @note Globals: none
  * @note Exceptions: none
- * @note History: 
+ * @note History:
  * - 7/26/89, DSJ, Created.
  * - 11/17/89, DSJ, Added handling for Start and End same point.
  */
diff --git a/classify/mfx.h b/classify/mfx.h
index 05ce29cee5..5ed006dcc7 100644
--- a/classify/mfx.h
+++ b/classify/mfx.h
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:	mfx.h
- **	Purpose:	Definition of micro-feature extraction routines
- **	Author:		Dan Johnson
- **	History:	5/29/89, DSJ, Created.
+ ** Filename: mfx.h
+ ** Purpose:  Definition of micro-feature extraction routines
+ ** Author:   Dan Johnson
+ ** History:  5/29/89, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
diff --git a/classify/normfeat.cpp b/classify/normfeat.cpp
index a4ac672a11..f297b3b05d 100644
--- a/classify/normfeat.cpp
+++ b/classify/normfeat.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    normfeat.c
- **	Purpose:     Definition of char normalization features.
- **	Author:      Dan Johnson
- **	History:     12/14/90, DSJ, Created.
+ ** Filename:    normfeat.c
+ ** Purpose:     Definition of char normalization features.
+ ** Author:      Dan Johnson
+ ** History:     12/14/90, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -33,7 +33,6 @@ FLOAT32 ActualOutlineLength(FEATURE Feature) {
   return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
 }
 
-
 /**
  * Return the character normalization feature for a blob.
  *
diff --git a/classify/normmatch.cpp b/classify/normmatch.cpp
index 488cd1652f..b0e066a89d 100644
--- a/classify/normmatch.cpp
+++ b/classify/normmatch.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    normmatch.c
- **	Purpose:     Simple matcher based on character normalization features.
- **	Author:      Dan Johnson
- **	History:     Wed Dec 19 16:18:06 1990, DSJ, Created.
+ ** Filename:    normmatch.c
+ ** Purpose:     Simple matcher based on character normalization features.
+ ** Author:      Dan Johnson
+ ** History:     Wed Dec 19 16:18:06 1990, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -197,10 +197,10 @@ double NormEvidenceOf(register double NormAdj) {
 /*---------------------------------------------------------------------------*/
 /**
  * This routine dumps out detailed normalization match info.
- * @param File		open text file to dump match debug info to
- * @param NumParams	# of parameters in proto and feature
- * @param Proto[]		array of prototype parameters
- * @param Feature[]	array of feature parameters
+ * @param File    open text file to dump match debug info to
+ * @param NumParams # of parameters in proto and feature
+ * @param Proto[]   array of prototype parameters
+ * @param Feature[] array of feature parameters
  * Globals: none
  * @return  none
  * @note Exceptions: none
diff --git a/classify/ocrfeatures.cpp b/classify/ocrfeatures.cpp
index 0895ed0886..7df8135048 100644
--- a/classify/ocrfeatures.cpp
+++ b/classify/ocrfeatures.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    features.c
- **	Purpose:     Generic definition of a feature.
- **	Author:      Dan Johnson
- **	History:     Mon May 21 10:49:04 1990, DSJ, Created.
+ ** Filename:    features.c
+ ** Purpose:     Generic definition of a feature.
+ ** Author:      Dan Johnson
+ ** History:     Mon May 21 10:49:04 1990, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -66,12 +66,11 @@ void FreeFeature(FEATURE Feature) {
 
 }                                /* FreeFeature */
 
-
 /**
  * Release the memory consumed by the specified feature
  * set.  This routine also frees the memory consumed by the
  * features contained in the set.
- * @param FeatureSet	set of features to be freed
+ * @param FeatureSet  set of features to be freed
  * @return none
  * @note History: Mon May 21 13:59:46 1990, DSJ, Created.
  */
@@ -85,11 +84,10 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) {
   }
 }                                /* FreeFeatureSet */
 
-
 /**
  * Allocate and return a new feature of the specified
  * type.
- * @param FeatureDesc	description of feature to be created.
+ * @param FeatureDesc description of feature to be created.
  * @return New #FEATURE.
  * @note History: Mon May 21 14:06:42 1990, DSJ, Created.
  */
@@ -105,11 +103,10 @@ FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
 
 }                                /* NewFeature */
 
-
 /**
  * Allocate and return a new feature set large enough to
  * hold the specified number of features.
- * @param NumFeatures	maximum # of features to be put in feature set
+ * @param NumFeatures maximum # of features to be put in feature set
  * @return New #FEATURE_SET.
  * @note History: Mon May 21 14:22:40 1990, DSJ, Created.
  */
@@ -124,7 +121,6 @@ FEATURE_SET NewFeatureSet(int NumFeatures) {
 
 }                                /* NewFeatureSet */
 
-
 /**
  * Create a new feature of the specified type and read in
  * the value of its parameters from File.  The extra penalty
@@ -135,10 +131,11 @@ FEATURE_SET NewFeatureSet(int NumFeatures) {
  * @param File open text file to read feature from
  * @param FeatureDesc specifies type of feature to read from File
  * @return New #FEATURE read from File.
- * @note Exceptions: #ILLEGAL_FEATURE_PARAM if text file doesn't match expected format
+ * @note Exceptions: #ILLEGAL_FEATURE_PARAM if text file doesn't match expected
+ * format
  * @note History: Wed May 23 08:53:16 1990, DSJ, Created.
  */
-FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
+FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) {
   FEATURE Feature;
   int i;
 
@@ -153,7 +150,6 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
   return (Feature);
 }                                /* ReadFeature */
 
-
 /**
  * Create a new feature set of the specified type and read in
  * the features from File.  The correct text representation
@@ -165,7 +161,7 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
  * @return New feature set read from File.
  * @note History: Wed May 23 09:17:31 1990, DSJ, Created.
  */
-FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
+FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) {
   FEATURE_SET FeatureSet;
   int NumFeatures;
   int i;
@@ -180,7 +176,6 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
   return (FeatureSet);
 }                                /* ReadFeatureSet */
 
-
 /**
  * Appends a textual representation of Feature to str.
  * This representation is simply a list of the N parameters
@@ -203,7 +198,6 @@ void WriteFeature(FEATURE Feature, STRING* str) {
   *str += "\n";
 }                                /* WriteFeature */
 
-
 /**
  * Write a textual representation of FeatureSet to File.
  * This representation is an integer specifying the number of
@@ -224,7 +218,6 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
   }
 }                                /* WriteFeatureSet */
 
-
 /**
  * Write a textual representation of FeatureDesc to File
  * in the old format (i.e. the format used by the clusterer).
@@ -240,7 +233,7 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
  * @return none
  * @note History: Fri May 25 15:27:18 1990, DSJ, Created.
  */
-void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
+void WriteOldParamDesc(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) {
   int i;
 
   fprintf (File, "%d\n", FeatureDesc->NumParams);
diff --git a/classify/ocrfeatures.h b/classify/ocrfeatures.h
index 31a4794ca6..ae00e5a834 100644
--- a/classify/ocrfeatures.h
+++ b/classify/ocrfeatures.h
@@ -108,7 +108,7 @@ BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature);
 
 void FreeFeature(FEATURE Feature);
 
-void FreeFeatureSet(FEATURE_SET FeatureSet);
+void TESS_API FreeFeatureSet(FEATURE_SET FeatureSet);
 
 FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc);
 
diff --git a/classify/outfeat.cpp b/classify/outfeat.cpp
index b1a4a9be90..76597f7c15 100644
--- a/classify/outfeat.cpp
+++ b/classify/outfeat.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    outfeat.c
- **	Purpose:     Definition of outline-features.
- **	Author:      Dan Johnson
- **	History:     11/13/90, DSJ, Created.
+ ** Filename:    outfeat.c
+ ** Purpose:     Definition of outline-features.
+ ** Author:      Dan Johnson
+ ** History:     11/13/90, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -40,7 +40,7 @@ namespace tesseract {
  * @return Outline-features for Blob.
  * @note Globals: none
  * @note Exceptions: none
- * @note History: 
+ * @note History:
  * - 11/13/90, DSJ, Created.
  * - 05/24/91, DSJ, Updated for either char or baseline normalize.
  */
@@ -115,7 +115,7 @@ void AddOutlineFeatureToSet(FPOINT *Start,
  * @return none (results are returned in FeatureSet)
  * @note Globals: none
  * @note Exceptions: none
- * @note History: 
+ * @note History:
  * - 11/13/90, DSJ, Created.
  * - 5/24/91, DSJ, Added hidden edge capability.
  */
diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp
index 74beb18f35..a4a39263cf 100644
--- a/classify/picofeat.cpp
+++ b/classify/picofeat.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    picofeat.c
- **	Purpose:     Definition of pico-features.
- **	Author:      Dan Johnson
- **	History:     9/4/90, DSJ, Created.
+ ** Filename:    picofeat.c
+ ** Purpose:     Definition of pico-features.
+ ** Author:      Dan Johnson
+ ** History:     9/4/90, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -98,7 +98,7 @@ FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
  * nearest whole number of pico-features.  The pico-features
  * are spaced evenly over the entire segment.
  * Globals:
- * - classify_pico_feature_length	length of a single pico-feature
+ * - classify_pico_feature_length length of a single pico-feature
  * @param Start starting point of pico-feature
  * @param End ending point of pico-feature
  * @param FeatureSet set to add pico-feature to
diff --git a/classify/picofeat.h b/classify/picofeat.h
index 208b7e7708..966ffc32e7 100644
--- a/classify/picofeat.h
+++ b/classify/picofeat.h
@@ -61,5 +61,5 @@ extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length");
 /**----------------------------------------------------------------------------
         Global Data Definitions and Declarations
 ----------------------------------------------------------------------------**/
-extern FLOAT32 PicoFeatureLength;
+extern TESS_API FLOAT32 PicoFeatureLength;
 #endif
diff --git a/classify/protos.h b/classify/protos.h
index bb2f56b59f..7d36447915 100644
--- a/classify/protos.h
+++ b/classify/protos.h
@@ -166,13 +166,13 @@ void CopyProto(PROTO Src, PROTO Dest);
 
 void FillABC(PROTO Proto);
 
-void FreeClass(CLASS_TYPE Class);
+void TESS_API FreeClass(CLASS_TYPE Class);
 
 void FreeClassFields(CLASS_TYPE Class);
 
 void InitPrototypes();
 
-CLASS_TYPE NewClass(int NumProtos, int NumConfigs);
+CLASS_TYPE TESS_API NewClass(int NumProtos, int NumConfigs);
 
 void PrintProtos(CLASS_TYPE Class);
 
diff --git a/classify/shapeclassifier.cpp b/classify/shapeclassifier.cpp
index a39c8a24bb..e0ee3373d8 100644
--- a/classify/shapeclassifier.cpp
+++ b/classify/shapeclassifier.cpp
@@ -176,7 +176,7 @@ void ShapeClassifier::UnicharPrintResults(
   for (int i = 0; i < results.size(); ++i) {
     tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id,
             GetUnicharset().id_to_unichar(results[i].unichar_id));
-    if (results[i].fonts.size() != 0) {
+    if (!results[i].fonts.empty()) {
       tprintf(" Font Vector:");
       for (int f = 0; f < results[i].fonts.size(); ++f) {
         tprintf(" %d", results[i].fonts[f].fontinfo_id);
diff --git a/classify/shapetable.h b/classify/shapetable.h
index d8faae8817..59c3fbc6df 100644
--- a/classify/shapetable.h
+++ b/classify/shapetable.h
@@ -261,7 +261,7 @@ class Shape {
 // that the shape represents.
 // Each UnicharAndFonts also lists the fonts of the unichar_id that were
 // mapped to the shape during training.
-class ShapeTable {
+class TESS_API ShapeTable {
  public:
   ShapeTable();
   // The UNICHARSET reference supplied here, or in set_unicharset below must
diff --git a/classify/tessclassifier.h b/classify/tessclassifier.h
index 57a04861e2..fcf1870492 100644
--- a/classify/tessclassifier.h
+++ b/classify/tessclassifier.h
@@ -23,6 +23,7 @@
 #define THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_
 
 #include "shapeclassifier.h"
+#include "platform.h"
 
 namespace tesseract {
 
@@ -33,7 +34,7 @@ class TrainingSample;
 // Due to limitations in the content of TrainingSample, this currently
 // only works for the static classifier and only works if the ShapeTable
 // in classify is not NULL.
-class TessClassifier : public ShapeClassifier {
+class TESS_API TessClassifier : public ShapeClassifier {
  public:
   TessClassifier(bool pruner_only, tesseract::Classify* classify)
     : pruner_only_(pruner_only), classify_(classify) {}
diff --git a/classify/trainingsample.cpp b/classify/trainingsample.cpp
index 7fe83b7718..ee6c9d7f83 100644
--- a/classify/trainingsample.cpp
+++ b/classify/trainingsample.cpp
@@ -209,7 +209,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
                                      int geo_type,
                                      CHAR_DESC_STRUCT* char_desc) {
   // Extract the INT features.
-  if (features_ != NULL) delete [] features_;
+  delete[] features_;
   FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
   if (char_features == NULL) {
     tprintf("Error: no features to train on of type %s\n",
@@ -230,7 +230,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
     }
   }
   // Extract the Micro features.
-  if (micro_features_ != NULL) delete [] micro_features_;
+  delete[] micro_features_;
   char_features = char_desc->FeatureSets[micro_type];
   if (char_features == NULL) {
     tprintf("Error: no features to train on of type %s\n",
diff --git a/classify/trainingsampleset.cpp b/classify/trainingsampleset.cpp
index afbf3f420e..93936fcae6 100644
--- a/classify/trainingsampleset.cpp
+++ b/classify/trainingsampleset.cpp
@@ -96,10 +96,8 @@ bool TrainingSampleSet::DeSerialize(bool swap, FILE* fp) {
   num_raw_samples_ = samples_.size();
   if (!unicharset_.load_from_file(fp)) return false;
   if (!font_id_map_.DeSerialize(swap, fp)) return false;
-  if (font_class_array_ != NULL) {
-    delete font_class_array_;
-    font_class_array_ = NULL;
-  }
+  delete font_class_array_;
+  font_class_array_ = NULL;
   inT8 not_null;
   if (fread(&not_null, sizeof(not_null), 1, fp) != 1) return false;
   if (not_null) {
diff --git a/cmake/BuildFunctions.cmake b/cmake/BuildFunctions.cmake
index eea5a396cb..39fd6d7071 100644
--- a/cmake/BuildFunctions.cmake
+++ b/cmake/BuildFunctions.cmake
@@ -1,3 +1,12 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 ################################################################################
 #
 # macros and functions
diff --git a/cmake/Configure.cmake b/cmake/Configure.cmake
index d7f4ac6ad5..bd5b80c5a7 100644
--- a/cmake/Configure.cmake
+++ b/cmake/Configure.cmake
@@ -1,3 +1,12 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 ################################################################################
 #
 # configure
diff --git a/cmake/FindICU.cmake b/cmake/FindICU.cmake
index cd6bf9265c..8381c2eb84 100644
--- a/cmake/FindICU.cmake
+++ b/cmake/FindICU.cmake
@@ -1,3 +1,12 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 # This module can find the International Components for Unicode (ICU) Library
 #
 # Requirements:
diff --git a/cmake/SourceGroups.cmake b/cmake/SourceGroups.cmake
index ca87e808d3..7e79927e6c 100644
--- a/cmake/SourceGroups.cmake
+++ b/cmake/SourceGroups.cmake
@@ -1,3 +1,12 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 #include(SourceGroups)
 
 set(SSRC ${CMAKE_SOURCE_DIR})
@@ -14,6 +23,7 @@ set(H_CPP "(${H}|${CPP})")
 source_group("Resource files" ".*\\.(rc|ico)")
 
 source_group("api"          "${SSRC}/api/${H_CPP}")
+source_group("arch"         "${SSRC}/arch/${H_CPP}")
 source_group("ccmain"       "${SSRC}/ccmain/${H_CPP}")
 source_group("ccstruct"     "${SSRC}/ccstruct/${H_CPP}")
 source_group("ccutil"       "${SSRC}/ccutil/${H_CPP}")
@@ -21,6 +31,7 @@ source_group("classify"     "${SSRC}/classify/${H_CPP}")
 source_group("cube"         "${SSRC}/cube/${H_CPP}")
 source_group("cutil"        "${SSRC}/cutil/${H_CPP}")
 source_group("dict"         "${SSRC}/dict/${H_CPP}")
+source_group("lstm"         "${SSRC}/lstm/${H_CPP}")
 source_group("neural"       "${SSRC}/neural_networks/runtime/${H_CPP}")
 source_group("opencl"       "${SSRC}/opencl/${H_CPP}")
 source_group("textord"      "${SSRC}/textord/${H_CPP}")
diff --git a/configure.ac b/configure.ac
index a775e4fc28..f02cea108e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,8 +5,8 @@
 # ----------------------------------------
 # Initialization
 # ----------------------------------------
-AC_PREREQ([2.50])
-AC_INIT([tesseract], [3.05.00dev], [https://github.com/tesseract-ocr/tesseract/issues])
+AC_PREREQ([2.59])
+AC_INIT([tesseract], [3.05.00], [https://github.com/tesseract-ocr/tesseract/issues])
 AC_PROG_CXX([g++ clang++])
 AC_LANG([C++])
 AC_LANG_COMPILER_REQUIRE
@@ -18,8 +18,8 @@ AC_PREFIX_DEFAULT([/usr/local])
 
 # Define date of package, etc. Could be useful in auto-generated
 # documentation.
-PACKAGE_YEAR=2015
-PACKAGE_DATE="07/11"
+PACKAGE_YEAR=2017
+PACKAGE_DATE="02/16"
 
 abs_top_srcdir=`AS_DIRNAME([$0])`
 gitrev="`git --git-dir=${abs_top_srcdir}/.git --work-tree=${abs_top_srcdir} describe --always --tags`"
@@ -43,7 +43,7 @@ GENERIC_LIBRARY_NAME=tesseract
 
 # Release versioning
 GENERIC_MAJOR_VERSION=3
-GENERIC_MINOR_VERSION=4
+GENERIC_MINOR_VERSION=5
 GENERIC_MICRO_VERSION=0
 
 # API version (often = GENERIC_MAJOR_VERSION.GENERIC_MINOR_VERSION)
@@ -75,6 +75,7 @@ AM_CONDITIONAL([T_WIN], false)
 AM_CONDITIONAL([MINGW], false)
 AM_CONDITIONAL([OSX], false)
 AM_CONDITIONAL([GRAPHICS_DISABLED], false)
+AC_SUBST([AM_CPPFLAGS])
 
 OPENCL_INC="/opt/AMDAPP/include"
 OPENCL_LIBS="-lOpenCL -ltiff"
@@ -117,7 +118,7 @@ esac
 includedir="${includedir}/tesseract"
 
 AC_ARG_WITH([extra-includes],
-            [AC_HELP_STRING([--with-extra-includes=DIR],
+            [AS_HELP_STRING([--with-extra-includes=DIR],
                        [Define an additional directory for include files])],
             [if test -d "$withval" ; then
                CFLAGS="$CFLAGS -I$withval"
@@ -126,7 +127,7 @@ AC_ARG_WITH([extra-includes],
              fi])
 
 AC_ARG_WITH([extra-libraries],
-            [AC_HELP_STRING([--with-extra-libraries=DIR],
+            [AS_HELP_STRING([--with-extra-libraries=DIR],
                        [Define an additional directory for library files])],
             [if test -d "$withval" ; then
               LDFLAGS="$LDFLAGS -L$withval"
@@ -136,8 +137,8 @@ AC_ARG_WITH([extra-libraries],
 
 AC_MSG_CHECKING([--enable-graphics argument])
 AC_ARG_ENABLE([graphics],
- [AC_HELP_STRING([--enable-graphics],[enable graphics (ScrollView) (default)])
-AC_HELP_STRING([--disable-graphics],[disable graphics (ScrollView)])],
+ [AS_HELP_STRING([--enable-graphics],[enable graphics (ScrollView) (default)])
+AS_HELP_STRING([--disable-graphics],[disable graphics (ScrollView)])],
     [enable_graphics=$enableval],
     [enable_graphics="yes"])
 AC_MSG_RESULT([$enable_graphics])
@@ -149,12 +150,12 @@ fi
 # Check if cube should be disabled
 AC_MSG_CHECKING([whether to disable cube])
 AC_ARG_ENABLE([cube],
-  [AC_HELP_STRING([--disable-cube], [don't build cube support (experimental)])],
+  [AS_HELP_STRING([--disable-cube], [don't build cube support (experimental)])],
     [disable_cube="yes"], [disable_cube="no"])
 AC_MSG_RESULT([$disable_cube])
 AM_CONDITIONAL([NO_CUBE_BUILD], [test "$disable_cube" = "yes"])
 if test "$disable_cube" = "yes"; then
-  AC_SUBST([AM_CPPFLAGS], [-DNO_CUBE_BUILD])
+  AM_CPPFLAGS="-DNO_CUBE_BUILD $AM_CPPFLAGS"
 fi
 
 # check whether to build embedded version
@@ -166,19 +167,9 @@ AC_ARG_ENABLE([embedded],
 AC_MSG_RESULT([$enable_embedded])
 AM_CONDITIONAL([EMBEDDED], [test "$enable_embedded" = "yes"])
 if test "$enable_embedded" = "yes"; then
-  AC_SUBST([AM_CPPFLAGS], [-DEMBEDDED])
+  AM_CPPFLAGS="-DEMBEDDED $AM_CPPFLAGS"
 fi
 
-# check whether to build OpenMP support
-AM_CONDITIONAL([OPENMP], false)
-AC_OPENMP
-AS_IF([test "x$OPENMP_CFLAGS" != "x"],
-  [AM_CONDITIONAL([OPENMP], true)
-   AC_SUBST([AM_CPPFLAGS], ["$OPENMP_CXXFLAGS"])
-   AC_DEFINE([OPENMP], [], [Defined when compiled with OpenMP support])]
-)
-
-
 # check whether to build opencl version
 AC_MSG_CHECKING([--enable-opencl argument])
 AC_ARG_ENABLE([opencl],
@@ -227,7 +218,7 @@ case "${host_os}" in
       if !($have_opencl_lib); then
         AC_MSG_ERROR([Required OpenCL library not found!])
       fi
-      AC_SUBST([AM_CPPFLAGS], [-DUSE_OPENCL])
+      AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS"
       OPENCL_CPPFLAGS=""
       OPENCL_LDFLAGS="-framework OpenCL"
     fi
@@ -246,7 +237,7 @@ case "${host_os}" in
         if !($have_tiff); then
             AC_MSG_ERROR([Required TIFF headers not found! Try to install libtiff-dev?? package.])
         fi
-        AC_SUBST([AM_CPPFLAGS], [-DUSE_OPENCL])
+        AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS"
         OPENCL_CPPFLAGS="-I${OPENCL_INC}"
         OPENCL_LDFLAGS="${OPENCL_LIBS}"
     fi
@@ -261,7 +252,7 @@ AC_SUBST([OPENCL_LDFLAGS])
 # http://groups.google.com/group/tesseract-dev/browse_thread/thread/976645ae98189127
 AC_MSG_CHECKING([--enable-visibility argument])
 AC_ARG_ENABLE([visibility],
-    [AC_HELP_STRING([--enable-visibility],[enable experimental build with fvisibility (default=no)])],
+    [AS_HELP_STRING([--enable-visibility],[enable experimental build with fvisibility (default=no)])],
     [enable_visibility=$enableval],
     [enable_visibility="no"])
 AC_MSG_RESULT([$enable_visibility])
@@ -270,7 +261,7 @@ AM_CONDITIONAL([VISIBILITY], [test "$enable_visibility" = "yes"])
 # check whether to build multiple libraries
 AC_MSG_CHECKING([--enable-multiple-libraries argument])
 AC_ARG_ENABLE([multiple-libraries],
-    [AC_HELP_STRING([--enable-multiple-libraries],[enable multiple libraries (default=no)])],
+    [AS_HELP_STRING([--enable-multiple-libraries],[enable multiple libraries (default=no)])],
     [enable_mlibs=$enableval],
     [enable_mlibs="no"])
 AC_MSG_RESULT([$enable_mlibs])
@@ -279,7 +270,7 @@ AM_CONDITIONAL([USING_MULTIPLELIBS], [test "$enable_mlibs" = "yes"])
 # Check if tessdata-prefix is disabled
 AC_MSG_CHECKING([whether to use tessdata-prefix])
 AC_ARG_ENABLE([tessdata-prefix],
-    [AC_HELP_STRING([--disable-tessdata-prefix],
+    [AS_HELP_STRING([--disable-tessdata-prefix],
             [don't set TESSDATA-PREFIX during compile])],
     [tessdata_prefix="no"], [tessdata_prefix="yes"])
 AC_MSG_RESULT([$tessdata_prefix])
@@ -288,7 +279,7 @@ AM_CONDITIONAL([NO_TESSDATA_PREFIX], [test "$tessdata_prefix" = "no"])
 # Check whether enable debuging
 AC_MSG_CHECKING([whether to enable debugging])
 AC_ARG_ENABLE([debug],
-    [AC_HELP_STRING([--enable-debug],
+    [AS_HELP_STRING([--enable-debug],
         [turn on debugging (default=no)])],
     [debug=$enableval],
     [debug="no"])
@@ -447,16 +438,16 @@ else
   AC_MSG_ERROR([leptonica not found])
 fi
 
-AC_MSG_CHECKING([leptonica headers version >= 1.71])
+AC_MSG_CHECKING([leptonica headers version >= 1.74])
 AC_PREPROC_IFELSE(
     [AC_LANG_PROGRAM([#include "allheaders.h"],
-[#if (LIBLEPT_MAJOR_VERSION >= 1) && (LIBLEPT_MINOR_VERSION >= 71)
+[#if (LIBLEPT_MAJOR_VERSION >= 1) && (LIBLEPT_MINOR_VERSION >= 74)
 int i = 0;
 #else
 #error You need to upgrade your leptonica library!
 #endif])],
     [AC_MSG_RESULT(yes)],
-    [AC_MSG_FAILURE([leptonica 1.71 or higher is required])])
+    [AC_MSG_FAILURE([leptonica 1.74 or higher is required])])
 
 AM_CONDITIONAL([ENABLE_TRAINING], true)
 
diff --git a/cppan.yml b/cppan.yml
index 8065da79e4..ef38d520c1 100644
--- a/cppan.yml
+++ b/cppan.yml
@@ -1,124 +1,241 @@
 local_settings:
-    cppan_dir: cppan
-
-files:
-  - api/.*\.cpp
-  - ccmain/.*\.cpp
-  - ccstruct/.*\.cpp
-  - ccutil/.*\.cpp
-  - classify/.*\.cpp
-  - cube/.*\.cpp
-  - cutil/.*\.cpp
-  - dict/.*\.cpp
-  - neural_networks/runtime/.*\.cpp
-  - opencl/.*\.cpp
-  - textord/.*\.cpp
-  - viewer/.*\.cpp
-  - wordrec/.*\.cpp
-
-  - api/.*\.h
-  - ccmain/.*\.h
-  - ccstruct/.*\.h
-  - ccutil/.*\.h
-  - classify/.*\.h
-  - cube/.*\.h
-  - cutil/.*\.h
-  - dict/.*\.h
-  - neural_networks/runtime/.*\.h
-  - opencl/.*\.h
-  - textord/.*\.h
-  - viewer/.*\.h
-  - wordrec/.*\.h
-
-  - vs2010/port/.*
-
-include_directories:
-  private:
-    - classify
-    - cube
-    - cutil
-    - dict
-    - neural_networks/runtime
-    - opencl
-    - textord
-    - vs2010/port
-    - viewer
-    - wordrec
-  public:
-    - api
-    - ccmain
-    - ccstruct
-    - ccutil
-
-check_function_exists:
-  - getline
-
-check_symbol_exists:
-  snprintf: stdio.h
-
-check_include_exists:
-  - dlfcn.h
-  - inttypes.h
-  - limits.h
-  - malloc.h
-  - memory.h
-  - stdbool.h
-  - stdint.h
-  - stdlib.h
-  - strings.h
-  - string.h
-  - sys/ipc.h
-  - sys/shm.h
-  - sys/stat.h
-  - sys/types.h
-  - sys/wait.h
-  - tiffio.h
-  - unistd.h
-  - cairo/cairo-version.h
-  - CL/cl.h
-  - OpenCL/cl.h
-  - pango-1.0/pango/pango-features.h
-  - unicode/uchar.h
-
-check_type_size:
-  - long long int
-  - off_t
-  - mbstate_t
-  - wchar_t
-  - _Bool
-
-pre_sources: |
-    # dummy config file
-    if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h)
-        file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h)
-    endif()
-
-post_sources: |
-    if (NOT WIN32)
-        list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/gettimeofday.cpp")
-        list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/gettimeofday.h")
-        list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/mathfix.h")
-        list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/strcasestr.cpp")
-        list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/strcasestr.h")
-        list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/strtok_r.cpp")
-        list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/strtok_r.h")
-        list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/vcsversion.h")
-    endif()
-
-options:
-    any:
-        definitions:
-            public:
-              - HAVE_CONFIG_H
-              - _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1
-              - USE_STD_NAMESPACE=1
-              - WINDLLNAME="tesseract"
-    shared:
-        definitions:
-            public: TESS_EXPORTS
-
-dependencies:
-  private:
-    # tesseract uses leptonica only internally
-    # and does not expose its interface to users
-    pvt.cppan.demo.leptonica: master
+    #use_shared_libs: true
+    #generator: Visual Studio 14 2015 Win64
+    silent: false
+    #copy_import_libs: true
+    build:
+        c_flags: /W0
+        cxx_flags: /W0
+
+    dependencies:
+        pvt.cppan.demo.danbloomberg.leptonica: 1
+        pvt.cppan.demo.unicode.icu.i18n: "*"
+
+root_project: pvt.cppan.demo.google.tesseract
+
+common_settings:
+    c++: 11
+
+projects:
+    libtesseract:
+        type: lib
+        export_all_symbols: true
+        files:
+          - api/.*\.cpp
+          - ccmain/.*\.cpp
+          - ccstruct/.*\.cpp
+          - ccutil/.*\.cpp
+          - classify/.*\.cpp
+          - cube/.*\.cpp
+          - cutil/.*\.cpp
+          - dict/.*\.cpp
+          - neural_networks/runtime/.*\.cpp
+          - opencl/.*\.cpp
+          - textord/.*\.cpp
+          - viewer/.*\.cpp
+          - wordrec/.*\.cpp
+
+          - api/.*\.h
+          - ccmain/.*\.h
+          - ccstruct/.*\.h
+          - ccutil/.*\.h
+          - classify/.*\.h
+          - cube/.*\.h
+          - cutil/.*\.h
+          - dict/.*\.h
+          - neural_networks/runtime/.*\.h
+          - opencl/.*\.h
+          - textord/.*\.h
+          - viewer/.*\.h
+          - wordrec/.*\.h
+
+          - vs2010/port/.*
+
+        exclude_from_build:
+          - api/tesseractmain.cpp
+          - viewer/svpaint.cpp
+
+        include_directories:
+          public:
+          #private:
+            - classify
+            - cube
+            - cutil
+            - dict
+            - neural_networks/runtime
+            - opencl
+            - textord
+            - vs2010/port
+            - viewer
+            - wordrec
+          #public:
+            - api
+            - ccmain
+            - ccstruct
+            - ccutil
+
+        check_function_exists:
+          - getline
+
+        check_symbol_exists:
+          snprintf: stdio.h
+
+        check_include_exists:
+          - dlfcn.h
+          - inttypes.h
+          - limits.h
+          - malloc.h
+          - memory.h
+          - stdbool.h
+          - stdint.h
+          - stdlib.h
+          - strings.h
+          - string.h
+          - sys/ipc.h
+          - sys/shm.h
+          - sys/stat.h
+          - sys/types.h
+          - sys/wait.h
+          - tiffio.h
+          - unistd.h
+          - cairo/cairo-version.h
+          - CL/cl.h
+          - OpenCL/cl.h
+          - pango-1.0/pango/pango-features.h
+          - unicode/uchar.h
+
+        check_type_size:
+          - long long int
+          - off_t
+          - mbstate_t
+          - wchar_t
+          - _Bool
+
+
+        post_sources: |
+            file_write_once(${BDIR}/config_auto.h "")
+            if (NOT WIN32)
+                remove_src_dir(vs2010/port/*)
+            endif()
+
+        options:
+            any:
+                definitions:
+                    public:
+                      - HAVE_CONFIG_H
+                      - _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1
+                      - USE_STD_NAMESPACE=1
+                      - WINDLLNAME="tesseract"
+            shared:
+                definitions:
+                    private:
+                        - TESS_EXPORTS
+                    interface:
+                        - TESS_IMPORTS
+
+        dependencies:
+            pvt.cppan.demo.danbloomberg.leptonica: 1.74
+
+    tesseract:
+        files: api/tesseractmain.cpp
+        dependencies:
+            - libtesseract
+
+    tessopt:
+        type: lib
+        static_only: true
+        files: training/tessopt.*
+        include_directories: training
+        dependencies: libtesseract
+
+    common_training:
+        type: lib
+        static_only: true
+        files:
+            - training/commandlineflags.cpp
+            - training/commandlineflags.h
+            - training/commontraining.cpp
+            - training/commontraining.h
+        include_directories: training
+        dependencies:
+            - tessopt
+
+    ambiguous_words:
+        files: training/ambiguous_words.cpp
+        dependencies: libtesseract
+
+    classifier_tester:
+        files: training/classifier_tester.cpp
+        dependencies: common_training
+
+    combine_tessdata:
+        files: training/combine_tessdata.cpp
+        dependencies: libtesseract
+
+    cntraining:
+        files: training/cntraining.cpp
+        dependencies: common_training
+
+    dawg2wordlist:
+        files: training/dawg2wordlist.cpp
+        dependencies: libtesseract
+
+    mftraining:
+        files:
+            - training/mftraining.cpp
+            - training/mergenf.*
+        dependencies: common_training
+
+    shapeclustering:
+        files: training/shapeclustering.cpp
+        dependencies: common_training
+
+    unicharset_extractor:
+        files: training/unicharset_extractor.cpp
+        dependencies: tessopt
+
+    wordlist2dawg:
+        files: training/wordlist2dawg.cpp
+        dependencies: libtesseract
+
+    unicharset_training:
+        type: lib
+        static_only: true
+        files:
+            - training/fileio.*
+            - training/icuerrorcode.h
+            - training/lstmtester.*
+            - training/normstrngs.*
+            - training/unicharset_training_utils.*
+        include_directories: training
+        dependencies:
+            - common_training
+            - pvt.cppan.demo.unicode.icu.i18n
+
+    set_unicharset_properties:
+        files: training/set_unicharset_properties.cpp
+        dependencies: unicharset_training
+
+    text2image:
+        files:
+            - training/text2image.cpp
+            - training/boxchar.cpp
+            - training/boxchar.h
+            - training/degradeimage.cpp
+            - training/degradeimage.h
+            - training/ligature_table.cpp
+            - training/ligature_table.h
+            - training/normstrngs.cpp
+            - training/normstrngs.h
+            - training/pango_font_info.cpp
+            - training/pango_font_info.h
+            - training/stringrenderer.cpp
+            - training/stringrenderer.h
+            - training/tlog.cpp
+            - training/tlog.h
+            - training/util.h
+            - training/icuerrorcode.h
+
+        dependencies:
+            - unicharset_training
+            - pvt.cppan.demo.gnome.pango.pangocairo: 1
diff --git a/cube/beam_search.cpp b/cube/beam_search.cpp
index fd17a1d59f..37fc35c566 100644
--- a/cube/beam_search.cpp
+++ b/cube/beam_search.cpp
@@ -36,8 +36,7 @@ BeamSearch::BeamSearch(CubeRecoContext *cntxt, bool word_mode) {
 void BeamSearch::Cleanup() {
   if (col_ != NULL) {
     for (int col = 0; col < col_cnt_; col++) {
-      if (col_[col])
-        delete col_[col];
+      delete col_[col];
     }
     delete []col_;
   }
@@ -124,11 +123,6 @@ WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) {
 
   // alloc memory for columns
   col_ = new SearchColumn *[col_cnt_];
-  if (!col_) {
-    fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
-            "SearchColumn array\n");
-    return NULL;
-  }
   memset(col_, 0, col_cnt_ * sizeof(*col_));
 
   // for all possible segments
@@ -136,11 +130,6 @@ WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) {
     // create a search column
     col_[end_seg - 1] = new SearchColumn(end_seg - 1,
                                          cntxt_->Params()->BeamWidth());
-    if (!col_[end_seg - 1]) {
-      fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
-              "SearchColumn for column %d\n", end_seg - 1);
-      return NULL;
-    }
 
     // for all possible start segments
     int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
@@ -356,8 +345,7 @@ CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, SearchNode *srch_node,
     return NULL;
 
   if (str32) {
-    if (*str32)
-      delete [](*str32);  // clear existing value
+    delete [](*str32);  // clear existing value
     *str32 = srch_node->PathString();
     if (!*str32)
       return NULL;
@@ -404,11 +392,6 @@ CharSamp **BeamSearch::SplitByNode(SearchObject *srch_obj,
 
   // Allocate memory for CharSamp array.
   CharSamp **chars = new CharSamp *[*char_cnt];
-  if (!chars) {
-    if (char_boxes)
-      boxaDestroy(char_boxes);
-    return NULL;
-  }
 
   int ch_idx = *char_cnt - 1;
   int seg_pt_cnt = srch_obj->SegPtCnt();
diff --git a/cube/beam_search.h b/cube/beam_search.h
index cd8fc0110d..31659a7fef 100644
--- a/cube/beam_search.h
+++ b/cube/beam_search.h
@@ -80,7 +80,7 @@ class BeamSearch {
   // best-cost path before the alternates list is sorted.
   inline int BestPresortedNodeIndex() const {
     return best_presorted_node_idx_;
-  };
+  }
 
  private:
   // Maximum reasonable segmentation point count
diff --git a/cube/bmp_8.cpp b/cube/bmp_8.cpp
index f7b6e0a1b8..936d344e40 100644
--- a/cube/bmp_8.cpp
+++ b/cube/bmp_8.cpp
@@ -48,18 +48,14 @@ Bmp8::~Bmp8() {
 // free buffer
 void Bmp8::FreeBmpBuffer(unsigned char **buff) {
   if (buff != NULL) {
-    if (buff[0] != NULL) {
-      delete []buff[0];
-    }
+    delete []buff[0];
     delete []buff;
   }
 }
 
 void Bmp8::FreeBmpBuffer(unsigned int **buff) {
   if (buff != NULL) {
-    if (buff[0] != NULL) {
-      delete []buff[0];
-    }
+    delete []buff[0];
     delete []buff;
   }
 }
@@ -76,17 +72,10 @@ unsigned char **Bmp8::CreateBmpBuffer(unsigned char init_val) {
   stride_ = ((wid_ % 4) == 0) ? wid_ : (4 * (1 + (wid_ / 4)));
 
   buff = (unsigned char **) new unsigned char *[hgt_ * sizeof(*buff)];
-  if (!buff) {
-    delete []buff;
-    return NULL;
-  }
 
   // alloc and init memory for buffer and line buffer
   buff[0] = (unsigned char *)
       new unsigned char[stride_ * hgt_ * sizeof(*buff[0])];
-  if (!buff[0]) {
-    return NULL;
-  }
 
   memset(buff[0], init_val, stride_ * hgt_ * sizeof(*buff[0]));
 
@@ -104,16 +93,9 @@ unsigned int ** Bmp8::CreateBmpBuffer(int wid, int hgt,
 
   // compute stride (align on 4 byte boundries)
   buff = (unsigned int **) new unsigned int *[hgt * sizeof(*buff)];
-  if (!buff) {
-    delete []buff;
-    return NULL;
-  }
 
   // alloc and init memory for buffer and line buffer
   buff[0] = (unsigned int *) new unsigned int[wid * hgt * sizeof(*buff[0])];
-  if (!buff[0]) {
-    return NULL;
-  }
 
   memset(buff[0], init_val, wid * hgt * sizeof(*buff[0]));
 
@@ -176,9 +158,6 @@ bool Bmp8::LoadFromCharDumpFile(CachedFile *fp) {
 
   // alloc memory & read the 3 channel buffer
   buff = new unsigned char[buf_size];
-  if (buff == NULL) {
-    return false;
-  }
 
   if (fp->Read(buff, buf_size) != buf_size) {
     delete []buff;
@@ -217,9 +196,6 @@ bool Bmp8::LoadFromCharDumpFile(CachedFile *fp) {
 Bmp8 * Bmp8::FromCharDumpFile(CachedFile *fp) {
   // create a Bmp8 object
   Bmp8 *bmp_obj = new Bmp8(0, 0);
-  if (bmp_obj == NULL) {
-    return NULL;
-  }
 
   if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
     delete bmp_obj;
@@ -271,9 +247,6 @@ bool Bmp8::LoadFromCharDumpFile(FILE *fp) {
 
   // alloc memory & read the 3 channel buffer
   buff = new unsigned char[buf_size];
-  if (buff == NULL) {
-    return false;
-  }
 
   if (fread(buff, 1, buf_size, fp) != buf_size) {
     delete []buff;
@@ -312,9 +285,6 @@ bool Bmp8::LoadFromCharDumpFile(FILE *fp) {
 Bmp8 * Bmp8::FromCharDumpFile(FILE *fp) {
   // create a Bmp8 object
   Bmp8 *bmp_obj = new Bmp8(0, 0);
-  if (bmp_obj == NULL) {
-    return NULL;
-  }
 
   if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
     delete bmp_obj;
@@ -549,9 +519,6 @@ bool Bmp8::SaveBmp2CharDumpFile(FILE *fp) const {
 
   // alloc memory & write the 3 channel buffer
   buff = new unsigned char[buf_size];
-  if (buff == NULL) {
-    return false;
-  }
 
   // copy the data
   for (y = 0, pix = 0; y < hgt_; y++) {
@@ -703,7 +670,7 @@ ConComp ** Bmp8::FindConComps(int *concomp_cnt, int min_size) const {
         // if there was no foreground pix, then create a new concomp
         if (master_concomp == NULL) {
           master_concomp = new ConComp();
-          if (master_concomp == NULL || master_concomp->Add(x, y) == false) {
+          if (master_concomp->Add(x, y) == false) {
             fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not "
                     "allocate or add a connected component\n");
             FreeBmpBuffer(out_bmp_array);
@@ -715,13 +682,6 @@ ConComp ** Bmp8::FindConComps(int *concomp_cnt, int min_size) const {
           if ((alloc_concomp_cnt % kConCompAllocChunk) == 0) {
             ConComp **temp_con_comp =
                 new ConComp *[alloc_concomp_cnt + kConCompAllocChunk];
-            if (temp_con_comp == NULL) {
-              fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not "
-                      "extend array of connected components\n");
-              FreeBmpBuffer(out_bmp_array);
-              delete []concomp_array;
-              return NULL;
-            }
 
             if (alloc_concomp_cnt > 0) {
               memcpy(temp_con_comp, concomp_array,
@@ -778,9 +738,6 @@ bool Bmp8::ComputeTanTable() {
   // alloc memory for tan table
   delete []tan_table_;
   tan_table_ = new float[kDeslantAngleCount];
-  if (tan_table_ == NULL) {
-    return false;
-  }
 
   for (ang_idx = 0, ang_val = kMinDeslantAngle;
        ang_idx < kDeslantAngleCount; ang_idx++) {
@@ -825,10 +782,6 @@ bool Bmp8::Deslant() {
   int **angle_hist = new int*[kDeslantAngleCount];
   for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) {
     angle_hist[ang_idx] = new int[des_wid];
-    if (angle_hist[ang_idx] == NULL) {
-      delete[] angle_hist;
-      return false;
-    }
     memset(angle_hist[ang_idx], 0, des_wid * sizeof(*angle_hist[ang_idx]));
   }
 
@@ -1010,10 +963,6 @@ bool Bmp8::HorizontalDeslant(double *deslant_angle) {
   int **angle_hist = new int*[kDeslantAngleCount];
   for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) {
     angle_hist[ang_idx] = new int[des_hgt];
-    if (angle_hist[ang_idx] == NULL) {
-      delete[] angle_hist;
-      return false;
-    }
     memset(angle_hist[ang_idx], 0, des_hgt * sizeof(*angle_hist[ang_idx]));
   }
 
@@ -1122,9 +1071,6 @@ float Bmp8::MeanHorizontalHistogramEntropy() const {
 
 int *Bmp8::HorizontalHistogram() const {
   int *hist = new int[hgt_];
-  if (hist == NULL) {
-    return NULL;
-  }
 
   // compute histograms
   for (int y = 0; y < hgt_; y++) {
diff --git a/cube/cached_file.cpp b/cube/cached_file.cpp
index a9a5b2e0b1..84f6f31d66 100644
--- a/cube/cached_file.cpp
+++ b/cube/cached_file.cpp
@@ -68,9 +68,6 @@ bool CachedFile::Open() {
   rewind(fp_);
   // alloc memory for buffer
   buff_ = new unsigned char[kCacheSize];
-  if (buff_ == NULL) {
-    return false;
-  }
   // init counters
   buff_size_ = 0;
   buff_pos_ = 0;
diff --git a/cube/char_altlist.cpp b/cube/char_altlist.cpp
index c0e7776ef2..d4d4b9c543 100644
--- a/cube/char_altlist.cpp
+++ b/cube/char_altlist.cpp
@@ -56,10 +56,6 @@ bool CharAltList::Insert(int class_id, int cost, void *tag) {
     alt_cost_ = new int[max_alt_];
     alt_tag_ = new void *[max_alt_];
 
-    if (class_id_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) {
-      return false;
-    }
-
     memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
   }
 
@@ -67,9 +63,6 @@ bool CharAltList::Insert(int class_id, int cost, void *tag) {
     int class_cnt = char_set_->ClassCount();
 
     class_id_cost_ = new int[class_cnt];
-    if (class_id_cost_ == NULL) {
-      return false;
-    }
 
     for (int ich = 0; ich < class_cnt; ich++) {
       class_id_cost_[ich] = WORST_COST;
diff --git a/cube/char_bigrams.cpp b/cube/char_bigrams.cpp
index b36b1f6cda..b005c1d2d4 100644
--- a/cube/char_bigrams.cpp
+++ b/cube/char_bigrams.cpp
@@ -61,11 +61,6 @@ CharBigrams *CharBigrams::Create(const string &data_file_path,
 
   // construct a new object
   CharBigrams *char_bigrams_obj = new CharBigrams();
-  if (char_bigrams_obj == NULL) {
-    fprintf(stderr, "Cube ERROR (CharBigrams::Create): could not create "
-            "character bigrams object.\n");
-    return NULL;
-  }
   CharBigramTable *table = &char_bigrams_obj->bigram_table_;
 
   table->total_cnt = 0;
@@ -90,11 +85,6 @@ CharBigrams *CharBigrams::Create(const string &data_file_path,
     // expand the bigram table
     if (ch1 > table->max_char) {
       CharBigram *char_bigram = new CharBigram[ch1 + 1];
-      if (char_bigram == NULL) {
-        fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating "
-                "additional memory for character bigram table.\n");
-        return NULL;
-      }
 
       if (table->char_bigram != NULL && table->max_char >= 0) {
         memcpy(char_bigram, table->char_bigram,
@@ -115,12 +105,6 @@ CharBigrams *CharBigrams::Create(const string &data_file_path,
 
     if (ch2 > table->char_bigram[ch1].max_char) {
       Bigram *bigram = new Bigram[ch2 + 1];
-      if (bigram == NULL) {
-        fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating "
-                "memory for bigram.\n");
-        delete char_bigrams_obj;
-        return NULL;
-      }
 
       if (table->char_bigram[ch1].bigram != NULL &&
           table->char_bigram[ch1].max_char >= 0) {
@@ -179,14 +163,14 @@ int CharBigrams::Cost(const char_32 *char_32_ptr, CharSet *char_set) const {
     if (lower_32 && lower_32[0] != 0) {
       int cost_lower = MeanCostWithSpaces(lower_32);
       cost = MIN(cost, cost_lower);
-      delete [] lower_32;
     }
+    delete [] lower_32;
     char_32 *upper_32 = CubeUtils::ToUpper(char_32_ptr, char_set);
     if (upper_32 && upper_32[0] != 0) {
       int cost_upper = MeanCostWithSpaces(upper_32);
       cost = MIN(cost, cost_upper);
-      delete [] upper_32;
     }
+    delete [] upper_32;
   }
   return cost;
 }
diff --git a/cube/char_samp.cpp b/cube/char_samp.cpp
index c3493fa13c..f55735be0e 100644
--- a/cube/char_samp.cpp
+++ b/cube/char_samp.cpp
@@ -106,12 +106,10 @@ CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) {
   // the label is not null terminated in the file
   if (val32 > 0 && val32 < MAX_UINT32) {
     label32 = new char_32[val32 + 1];
-    if (label32 == NULL) {
-      return NULL;
-    }
     // read label
     if (fp->Read(label32, val32 * sizeof(*label32)) !=
         (val32 * sizeof(*label32))) {
+      delete [] label32;
       return NULL;
     }
     // null terminate
@@ -121,35 +119,40 @@ CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) {
   }
   // read coordinates
   if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
+    delete [] label32;
     return NULL;
   }
   if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
+    delete [] label32;
     return NULL;
   }
   if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
+    delete [] label32;
     return NULL;
   }
   if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
+    delete [] label32;
     return NULL;
   }
   if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
+    delete [] label32;
     return NULL;
   }
   if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
+    delete [] label32;
     return NULL;
   }
   if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
+    delete [] label32;
     return NULL;
   }
   if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
       sizeof(norm_aspect_ratio)) {
+    delete [] label32;
     return NULL;
   }
   // create the object
   CharSamp *char_samp = new CharSamp();
-  if (char_samp == NULL) {
-    return NULL;
-  }
   // init
   char_samp->label32_ = label32;
   char_samp->page_ = page;
@@ -195,9 +198,6 @@ CharSamp *CharSamp::FromCharDumpFile(FILE *fp) {
   // the label is not null terminated in the file
   if (val32 > 0 && val32 < MAX_UINT32) {
     label32 = new char_32[val32 + 1];
-    if (label32 == NULL) {
-      return NULL;
-    }
     // read label
     if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
         (val32 * sizeof(*label32))) {
@@ -224,10 +224,6 @@ CharSamp *CharSamp::FromCharDumpFile(FILE *fp) {
   }
   // create the object
   CharSamp *char_samp = new CharSamp();
-  if (char_samp == NULL) {
-    delete [] label32;
-    return NULL;
-  }
   // init
   char_samp->label32_ = label32;
   char_samp->page_ = page;
@@ -250,9 +246,6 @@ CharSamp *CharSamp::FromCharDumpFile(FILE *fp) {
 // specified width and height
 CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) {
   CharSamp *scaled_samp = new CharSamp(wid, hgt);
-  if (scaled_samp == NULL) {
-    return NULL;
-  }
   if (scaled_samp->ScaleFrom(this, isotropic) == false) {
     delete scaled_samp;
     return NULL;
@@ -274,9 +267,6 @@ CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt,
                                 unsigned char *data) {
   // create the object
   CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
-  if (char_samp == NULL) {
-    return NULL;
-  }
   if (char_samp->LoadFromRawData(data) == false) {
     delete char_samp;
     return NULL;
@@ -421,14 +411,6 @@ ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left,
         if ((seg_cnt % kConCompAllocChunk) == 0) {
           ConComp **temp_segm_array =
               new ConComp *[seg_cnt + kConCompAllocChunk];
-          if (temp_segm_array == NULL) {
-            fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not "
-                    "allocate additional connected components\n");
-            delete []concomp_seg_array;
-            delete []concomp_array;
-            delete []seg_array;
-            return NULL;
-          }
           if (seg_cnt > 0) {
             memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
             delete []seg_array;
@@ -486,8 +468,6 @@ CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp,
   bool *id_exist = new bool[id_cnt];
   bool *left_most_exist = new bool[id_cnt];
   bool *right_most_exist = new bool[id_cnt];
-  if (!id_exist || !left_most_exist || !right_most_exist)
-    return NULL;
   memset(id_exist, 0, id_cnt * sizeof(*id_exist));
   memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
   memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
@@ -544,9 +524,6 @@ CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp,
   (*right_most) = (unq_right_most >= unq_ids);
   // create the char sample object
   CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
-  if (!samp) {
-    return NULL;
-  }
 
   // set the foreground pixels
   for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
@@ -594,9 +571,6 @@ CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) {
   // the label is not null terminated in the file
   if (val32 > 0 && val32 < MAX_UINT32) {
     label32 = new char_32[val32 + 1];
-    if (label32 == NULL) {
-      return NULL;
-    }
     // read label
     memcpy(label32, raw_data, val32 * sizeof(*label32));
     raw_data += (val32 * sizeof(*label32));
@@ -608,9 +582,6 @@ CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) {
 
   // create the object
   CharSamp *char_samp = new CharSamp();
-  if (char_samp == NULL) {
-    return NULL;
-  }
 
   // read coordinates
   char_samp->label32_ = label32;
diff --git a/cube/char_samp.h b/cube/char_samp.h
index a3c3063bd6..827e1c37c8 100644
--- a/cube/char_samp.h
+++ b/cube/char_samp.h
@@ -66,20 +66,14 @@ class CharSamp : public Bmp8 {
   void SetTop(unsigned short top) { top_ = top; }
   void SetPage(unsigned short page) { page_ = page; }
   void SetLabel(char_32 label) {
-    if (label32_ != NULL) {
-      delete []label32_;
-    }
+    delete []label32_;
     label32_ = new char_32[2];
-    if (label32_ != NULL) {
-      label32_[0] = label;
-      label32_[1] = 0;
-    }
+    label32_[0] = label;
+    label32_[1] = 0;
   }
   void SetLabel(const char_32 *label32) {
-    if (label32_ != NULL) {
-      delete []label32_;
-      label32_ = NULL;
-    }
+    delete []label32_;
+    label32_ = NULL;
     if (label32 != NULL) {
       // remove any byte order marks if any
       if (label32[0] == 0xfeff) {
@@ -87,10 +81,8 @@ class CharSamp : public Bmp8 {
       }
       int len = LabelLen(label32);
       label32_ = new char_32[len + 1];
-      if (label32_ != NULL) {
-        memcpy(label32_, label32, len * sizeof(*label32));
-        label32_[len] = 0;
-      }
+      memcpy(label32_, label32, len * sizeof(*label32));
+      label32_[len] = 0;
     }
   }
   void SetLabel(string str);
diff --git a/cube/char_samp_set.cpp b/cube/char_samp_set.cpp
index 2a495095ef..9af7581187 100644
--- a/cube/char_samp_set.cpp
+++ b/cube/char_samp_set.cpp
@@ -40,9 +40,7 @@ void CharSampSet::Cleanup() {
     // only free samples if owned by class
     if (own_samples_ == true) {
       for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
-        if (samp_buff_[samp_idx] != NULL) {
-          delete samp_buff_[samp_idx];
-        }
+        delete samp_buff_[samp_idx];
       }
     }
     delete []samp_buff_;
@@ -57,9 +55,6 @@ bool CharSampSet::Add(CharSamp *char_samp) {
       // create an extended buffer
     CharSamp **new_samp_buff =
         reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
-    if (new_samp_buff == NULL) {
-      return false;
-    }
     // copy old contents
     if (cnt_ > 0) {
       memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
@@ -109,10 +104,6 @@ CharSampSet * CharSampSet::FromCharDumpFile(string file_name) {
   }
   // create an object
   CharSampSet *samp_set = new CharSampSet();
-  if (samp_set == NULL) {
-    fclose(fp);
-    return NULL;
-  }
   if (samp_set->LoadCharSamples(fp) == false) {
     delete samp_set;
     samp_set = NULL;
@@ -148,9 +139,6 @@ bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
     i64_pos;
   // open the file
   fp_in = new CachedFile(file_name);
-  if (fp_in == NULL) {
-    return false;
-  }
   i64_size = fp_in->Size();
   if (i64_size < 1) {
     return false;
diff --git a/cube/char_set.cpp b/cube/char_set.cpp
index 1414d640f4..b2ec5f544a 100644
--- a/cube/char_set.cpp
+++ b/cube/char_set.cpp
@@ -54,9 +54,6 @@ CharSet::~CharSet() {
 CharSet *CharSet::Create(TessdataManager *tessdata_manager,
                          UNICHARSET *tess_unicharset) {
   CharSet *char_set = new CharSet();
-  if (char_set == NULL) {
-    return NULL;
-  }
 
   // First look for Cube's unicharset; if not there, use tesseract's
   bool cube_unicharset_exists;
@@ -119,19 +116,9 @@ bool CharSet::LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset) {
   }
   // memory for class strings
   class_strings_ = new string_32*[class_cnt_];
-  if (class_strings_ == NULL) {
-    fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not "
-            "allocate memory for class strings.\n");
-    return false;
-  }
   // memory for unicharset map
   if (tess_unicharset) {
     unicharset_map_ = new int[class_cnt_];
-    if (unicharset_map_ == NULL) {
-      fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not "
-              "allocate memory for unicharset map.\n");
-      return false;
-    }
   }
 
   // Read in character strings and add to hash table
@@ -154,11 +141,6 @@ bool CharSet::LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset) {
     }
     CubeUtils::UTF8ToUTF32(str_line, &str32);
     class_strings_[class_id] = new string_32(str32);
-    if (class_strings_[class_id] == NULL) {
-      fprintf(stderr, "Cube ERROR (CharSet::ReadAndHashStrings): could not "
-              "allocate memory for class string with class_id=%d.\n", class_id);
-      return false;
-    }
 
     // Add to hash-table
     int hash_val = Hash(reinterpret_cast<const char_32 *>(str32.c_str()));
diff --git a/cube/classifier_base.h b/cube/classifier_base.h
index 8c2b1bbf9a..98f2f79af4 100644
--- a/cube/classifier_base.h
+++ b/cube/classifier_base.h
@@ -49,21 +49,15 @@ class CharClassifier {
   virtual ~CharClassifier() {
     if (fold_sets_  != NULL) {
       for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
-        if (fold_sets_[fold_set] != NULL) {
-          delete []fold_sets_[fold_set];
-        }
+        delete []fold_sets_[fold_set];
       }
       delete []fold_sets_;
       fold_sets_ = NULL;
     }
-    if (fold_set_len_ != NULL) {
-      delete []fold_set_len_;
-      fold_set_len_ = NULL;
-    }
-    if (feat_extract_ != NULL) {
-      delete feat_extract_;
-      feat_extract_ = NULL;
-    }
+    delete []fold_set_len_;
+    fold_set_len_ = NULL;
+    delete feat_extract_;
+    feat_extract_ = NULL;
   }
 
   // pure virtual functions that need to be implemented by any inheriting class
diff --git a/cube/classifier_factory.cpp b/cube/classifier_factory.cpp
index a22f0d4ea8..04df263911 100644
--- a/cube/classifier_factory.cpp
+++ b/cube/classifier_factory.cpp
@@ -56,12 +56,6 @@ CharClassifier *CharClassifierFactory::Create(const string &data_file_path,
       return NULL;
   }
 
-  if (feat_extract == NULL) {
-    fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): unable "
-              "to instantiate feature extraction object.\n");
-    return NULL;
-  }
-
   // create the classifier object
   CharClassifier *classifier_obj;
   switch (params->TypeClassifier()) {
@@ -79,12 +73,6 @@ CharClassifier *CharClassifierFactory::Create(const string &data_file_path,
       return NULL;
   }
 
-  if (classifier_obj == NULL) {
-    fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): error "
-            "allocating memory for character classifier object.\n");
-    return NULL;
-  }
-
   // Init the classifier
   if (!classifier_obj->Init(data_file_path, lang, lang_mod)) {
     delete classifier_obj;
diff --git a/cube/con_comp.cpp b/cube/con_comp.cpp
index 53b1a73b43..a0a926dbd8 100644
--- a/cube/con_comp.cpp
+++ b/cube/con_comp.cpp
@@ -52,9 +52,6 @@ ConComp::~ConComp() {
 // adds a pt to the conn comp and updates its boundaries
 bool ConComp::Add(int x, int y) {
   ConCompPt *pt_ptr = new ConCompPt(x, y);
-  if (pt_ptr == NULL) {
-    return false;
-  }
 
   if (head_ == NULL) {
     left_ = x;
@@ -114,9 +111,6 @@ int *ConComp::CreateHistogram(int max_hist_wnd) {
 
   // alloc memo for histogram
   int *hist_array = new int[wid];
-  if (hist_array == NULL) {
-    return NULL;
-  }
 
   memset(hist_array, 0, wid * sizeof(*hist_array));
 
@@ -148,9 +142,6 @@ int *ConComp::SegmentHistogram(int *hist_array, int *seg_pt_cnt) {
     hgt = bottom_ - top_ + 1;
 
   int *x_seg_pt = new int[wid];
-  if (x_seg_pt == NULL) {
-    return NULL;
-  }
 
   int seg_pt_wnd = static_cast<int>(hgt * SEG_PT_WND_RATIO);
 
@@ -216,18 +207,9 @@ ConComp **ConComp::Segment(int max_hist_wnd, int *concomp_cnt) {
 
   // create concomp array
   ConComp **concomp_array = new ConComp *[seg_pt_cnt + 1];
-  if (concomp_array == NULL) {
-    delete []x_seg_pt;
-    return NULL;
-  }
 
   for (int concomp = 0; concomp <= seg_pt_cnt; concomp++) {
     concomp_array[concomp] = new ConComp();
-    if (concomp_array[concomp] == NULL) {
-      delete []x_seg_pt;
-      delete []concomp_array;
-      return NULL;
-    }
 
     // split concomps inherit the ID this concomp
     concomp_array[concomp]->SetID(id_);
diff --git a/cube/conv_net_classifier.cpp b/cube/conv_net_classifier.cpp
index ac33cd33b1..e4846ac381 100644
--- a/cube/conv_net_classifier.cpp
+++ b/cube/conv_net_classifier.cpp
@@ -147,18 +147,7 @@ bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
   // allocate i/p and o/p buffers if needed
   if (net_input_ == NULL) {
     net_input_ = new float[feat_cnt];
-    if (net_input_ == NULL) {
-      fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
-            "unable to allocate memory for input nodes\n");
-      return false;
-    }
-
     net_output_ = new float[class_cnt];
-    if (net_output_ == NULL) {
-      fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
-            "unable to allocate memory for output nodes\n");
-      return false;
-    }
   }
 
   // compute input features
@@ -205,11 +194,6 @@ CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) {
 
   // create an altlist
   CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
-  if (alt_list == NULL) {
-    fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::Classify): "
-            "returning emtpy CharAltList\n");
-    return NULL;
-  }
 
   for (int out = 1; out < class_cnt; out++) {
     int cost = CubeUtils::Prob2Cost(net_output_[out]);
@@ -261,14 +245,7 @@ bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
   fold_set_cnt_ = str_vec.size();
 
   fold_sets_ = new int *[fold_set_cnt_];
-  if (fold_sets_ == NULL) {
-    return false;
-  }
   fold_set_len_ = new int[fold_set_cnt_];
-  if (fold_set_len_ == NULL) {
-    fold_set_cnt_ = 0;
-    return false;
-  }
 
   for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
     reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
@@ -287,12 +264,6 @@ bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
     CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
     fold_set_len_[fold_set] = str32.length();
     fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
-    if (fold_sets_[fold_set] == NULL) {
-      fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): "
-              "could not allocate folding set\n");
-      fold_set_cnt_ = fold_set;
-      return false;
-    }
     for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
       fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
     }
@@ -375,14 +346,7 @@ bool ConvNetCharClassifier::LoadNets(const string &data_file_path,
   // allocate i/p and o/p buffers if needed
   if (net_input_ == NULL) {
     net_input_ = new float[feat_cnt];
-    if (net_input_ == NULL) {
-      return false;
-    }
-
     net_output_ = new float[class_cnt];
-    if (net_output_ == NULL) {
-      return false;
-    }
   }
 
   return true;
diff --git a/cube/cube_line_object.cpp b/cube/cube_line_object.cpp
index 0325453740..72fd87ff2b 100644
--- a/cube/cube_line_object.cpp
+++ b/cube/cube_line_object.cpp
@@ -91,68 +91,62 @@ bool CubeLineObject::Process() {
   if (word_break_threshold > 0) {
     // over-allocate phrases object buffer
     phrases_ = new CubeObject *[con_comp_cnt];
-    if (phrases_ != NULL) {
-      // create a phrase if the horizontal distance between two consecutive
-      // concomps is higher than threshold
-      int start_con_idx = 0;
-      int current_phrase_limit = rtl ? con_comps[0]->Left() :
-                                       con_comps[0]->Right();
-
-      for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) {
-        bool create_new_phrase = true;
-        // if not at the end, compute the distance between two consecutive
-        // concomps
-        if (con_idx < con_comp_cnt) {
-          int dist = 0;
-          if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
-            dist = current_phrase_limit - con_comps[con_idx]->Right();
-          } else {
-            dist = con_comps[con_idx]->Left() - current_phrase_limit;
-          }
-          create_new_phrase = (dist > word_break_threshold);
+    // create a phrase if the horizontal distance between two consecutive
+    // concomps is higher than threshold
+    int start_con_idx = 0;
+    int current_phrase_limit = rtl ? con_comps[0]->Left() :
+                                     con_comps[0]->Right();
+
+    for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) {
+      bool create_new_phrase = true;
+      // if not at the end, compute the distance between two consecutive
+      // concomps
+      if (con_idx < con_comp_cnt) {
+        int dist = 0;
+        if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
+          dist = current_phrase_limit - con_comps[con_idx]->Right();
+        } else {
+          dist = con_comps[con_idx]->Left() - current_phrase_limit;
         }
+        create_new_phrase = (dist > word_break_threshold);
+      }
 
-        // create a new phrase
-        if (create_new_phrase) {
-          // create a phrase corresponding to a range on components
-          bool left_most;
-          bool right_most;
-          CharSamp *phrase_char_samp =
-              CharSamp::FromConComps(con_comps, start_con_idx,
-                                     con_idx - start_con_idx, NULL,
-                                     &left_most, &right_most,
-                                     line_pix_->h);
-          if (phrase_char_samp == NULL) {
-            break;
-          }
-          phrases_[phrase_cnt_] = new CubeObject(cntxt_, phrase_char_samp);
-          if (phrases_[phrase_cnt_] == NULL) {
-            delete phrase_char_samp;
-            break;
-          }
-          // set the ownership of the charsamp to the cube object
-          phrases_[phrase_cnt_]->SetCharSampOwnership(true);
-          phrase_cnt_++;
-          // advance the starting index to the current index
-          start_con_idx = con_idx;
-          // set the limit of the newly starting phrase (if any)
-          if (con_idx < con_comp_cnt) {
-            current_phrase_limit = rtl ? con_comps[con_idx]->Left() :
-                                         con_comps[con_idx]->Right();
-          }
+      // create a new phrase
+      if (create_new_phrase) {
+        // create a phrase corresponding to a range on components
+        bool left_most;
+        bool right_most;
+        CharSamp *phrase_char_samp =
+            CharSamp::FromConComps(con_comps, start_con_idx,
+                                   con_idx - start_con_idx, NULL,
+                                   &left_most, &right_most,
+                                   line_pix_->h);
+        if (phrase_char_samp == NULL) {
+          break;
+        }
+        phrases_[phrase_cnt_] = new CubeObject(cntxt_, phrase_char_samp);
+        // set the ownership of the charsamp to the cube object
+        phrases_[phrase_cnt_]->SetCharSampOwnership(true);
+        phrase_cnt_++;
+        // advance the starting index to the current index
+        start_con_idx = con_idx;
+        // set the limit of the newly starting phrase (if any)
+        if (con_idx < con_comp_cnt) {
+          current_phrase_limit = rtl ? con_comps[con_idx]->Left() :
+                                       con_comps[con_idx]->Right();
+        }
+      } else {
+        // update the limit of the current phrase
+        if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
+          current_phrase_limit = MIN(current_phrase_limit,
+                                     con_comps[con_idx]->Left());
         } else {
-          // update the limit of the current phrase
-          if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
-            current_phrase_limit = MIN(current_phrase_limit,
-                                       con_comps[con_idx]->Left());
-          } else {
-            current_phrase_limit = MAX(current_phrase_limit,
-                                       con_comps[con_idx]->Right());
-          }
+          current_phrase_limit = MAX(current_phrase_limit,
+                                     con_comps[con_idx]->Right());
         }
       }
-      ret_val = true;
     }
+    ret_val = true;
   }
 
   // clean-up connected comps
diff --git a/cube/cube_line_segmenter.cpp b/cube/cube_line_segmenter.cpp
index 278011f090..4b75dca2cd 100644
--- a/cube/cube_line_segmenter.cpp
+++ b/cube/cube_line_segmenter.cpp
@@ -126,9 +126,6 @@ Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
                                    Box *cracked_line_box, int line_cnt) {
   // create lines pixa array
   Pixa **lines_pixa = new Pixa*[line_cnt];
-  if (lines_pixa == NULL) {
-    return NULL;
-  }
 
   memset(lines_pixa, 0, line_cnt * sizeof(*lines_pixa));
 
@@ -620,9 +617,6 @@ bool CubeLineSegmenter::AddLines(Pixa *lines) {
 // Index the specific pixa using RTL reading order
 int *CubeLineSegmenter::IndexRTL(Pixa *pixa) {
   int *pix_index = new int[pixa->n];
-  if (pix_index == NULL) {
-    return NULL;
-  }
 
   for (int pix = 0; pix < pixa->n; pix++) {
     pix_index[pix] = pix;
diff --git a/cube/cube_object.cpp b/cube/cube_object.cpp
index c7dec4d5b8..4d95f71ba2 100644
--- a/cube/cube_object.cpp
+++ b/cube/cube_object.cpp
@@ -54,47 +54,33 @@ void CubeObject::Init() {
 
 // Cleanup function
 void CubeObject::Cleanup() {
-  if (alt_list_ != NULL) {
-    delete alt_list_;
-    alt_list_ = NULL;
-  }
+  delete alt_list_;
+  alt_list_ = NULL;
 
-  if (deslanted_alt_list_ != NULL) {
-    delete deslanted_alt_list_;
-    deslanted_alt_list_ = NULL;
-  }
+  delete deslanted_alt_list_;
+  deslanted_alt_list_ = NULL;
 }
 
 CubeObject::~CubeObject() {
-  if (char_samp_ != NULL && own_char_samp_ == true) {
+  if (own_char_samp_ == true) {
     delete char_samp_;
     char_samp_ = NULL;
   }
 
-  if (srch_obj_ != NULL) {
-    delete srch_obj_;
-    srch_obj_ = NULL;
-  }
+  delete srch_obj_;
+  srch_obj_ = NULL;
 
-  if (deslanted_srch_obj_ != NULL) {
-    delete deslanted_srch_obj_;
-    deslanted_srch_obj_ = NULL;
-  }
+  delete deslanted_srch_obj_;
+  deslanted_srch_obj_ = NULL;
 
-  if (beam_obj_ != NULL) {
-    delete beam_obj_;
-    beam_obj_ = NULL;
-  }
+  delete beam_obj_;
+  beam_obj_ = NULL;
 
-  if (deslanted_beam_obj_ != NULL) {
-    delete deslanted_beam_obj_;
-    deslanted_beam_obj_ = NULL;
-  }
+  delete deslanted_beam_obj_;
+  deslanted_beam_obj_ = NULL;
 
-  if (deslanted_char_samp_ != NULL) {
-    delete deslanted_char_samp_;
-    deslanted_char_samp_ = NULL;
-  }
+  delete deslanted_char_samp_;
+  deslanted_char_samp_ = NULL;
 
   Cleanup();
 }
@@ -129,21 +115,11 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
   // create a beam search object
   if (beam_obj_ == NULL) {
     beam_obj_ = new BeamSearch(cntxt_, word_mode);
-    if (beam_obj_ == NULL) {
-      fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct "
-              "BeamSearch\n");
-      return NULL;
-    }
   }
 
   // create a cube search object
   if (srch_obj_ == NULL) {
     srch_obj_ = new CubeSearchObject(cntxt_, char_samp_);
-    if (srch_obj_ == NULL) {
-      fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct "
-              "CubeSearchObject\n");
-      return NULL;
-    }
   }
 
   // run a beam search against the tesslang model
@@ -156,11 +132,6 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
 
     if (deslanted_beam_obj_ == NULL) {
       deslanted_beam_obj_ = new BeamSearch(cntxt_);
-      if (deslanted_beam_obj_ == NULL) {
-        fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
-                "construct deslanted BeamSearch\n");
-        return NULL;
-      }
     }
 
     if (deslanted_srch_obj_ == NULL) {
@@ -176,11 +147,6 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
       }
 
       deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_);
-      if (deslanted_srch_obj_ == NULL) {
-        fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
-                "construct deslanted CubeSearchObject\n");
-        return NULL;
-      }
     }
 
     // run a beam search against the tesslang model
@@ -219,9 +185,6 @@ WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) {
  */
 int CubeObject::WordCost(const char *str) {
   WordListLangModel *lang_mod = new WordListLangModel(cntxt_);
-  if (lang_mod == NULL) {
-    return WORST_COST;
-  }
 
   if (lang_mod->AddString(str) == false) {
     delete lang_mod;
@@ -256,9 +219,6 @@ CharAltList *CubeObject::RecognizeChar() {
 bool CubeObject::Normalize() {
   // create a cube search object
   CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_);
-  if (srch_obj == NULL) {
-    return false;
-  }
   // Perform over-segmentation
   int seg_cnt = srch_obj->SegPtCnt();
   // Only perform normalization if segment count is large enough
diff --git a/cube/cube_search_object.cpp b/cube/cube_search_object.cpp
index 61294f26b6..ad807193d5 100644
--- a/cube/cube_search_object.cpp
+++ b/cube/cube_search_object.cpp
@@ -127,36 +127,14 @@ bool CubeSearchObject::Init() {
 
   // init cache
   reco_cache_ = new CharAltList **[segment_cnt_];
-  if (reco_cache_ == NULL) {
-    fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not "
-            "allocate CharAltList array\n");
-    return false;
-  }
 
   samp_cache_ = new CharSamp **[segment_cnt_];
-  if (samp_cache_ == NULL) {
-    fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not "
-            "allocate CharSamp array\n");
-    return false;
-  }
 
   for (int seg = 0; seg < segment_cnt_; seg++) {
     reco_cache_[seg] = new CharAltList *[segment_cnt_];
-    if (reco_cache_[seg] == NULL) {
-      fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not "
-              "allocate a single segment's CharAltList array\n");
-      return false;
-    }
-
     memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg]));
 
     samp_cache_[seg] = new CharSamp *[segment_cnt_];
-    if (samp_cache_[seg] == NULL) {
-      fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not "
-              "allocate a single segment's CharSamp array\n");
-      return false;
-    }
-
     memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg]));
   }
 
@@ -305,12 +283,10 @@ CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) {
         exp(-fabs(seg_cnt - 2.0)) *
         exp(-samp->Width() / static_cast<double>(samp->Height()));
 
-    if (alt_list) {
-      for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
-        alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val));
-      }
-      reco_cache_[start_pt + 1][end_pt] = alt_list;
+    for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
+      alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val));
     }
+    reco_cache_[start_pt + 1][end_pt] = alt_list;
   }
 
   return reco_cache_[start_pt + 1][end_pt];
@@ -353,11 +329,6 @@ bool CubeSearchObject::ComputeSpaceCosts() {
   // segmentation point
   int *max_left_x = new int[segment_cnt_ - 1];
   int *min_right_x = new int[segment_cnt_ - 1];
-  if (!max_left_x || !min_right_x) {
-    delete []min_right_x;
-    delete []max_left_x;
-    return false;
-  }
   if (rtl_) {
     min_right_x[0] = segments_[0]->Left();
     max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right();
@@ -384,11 +355,6 @@ bool CubeSearchObject::ComputeSpaceCosts() {
   // trivial cases
   space_cost_ = new int[segment_cnt_ - 1];
   no_space_cost_ = new int[segment_cnt_ - 1];
-  if (!space_cost_ || !no_space_cost_) {
-    delete []min_right_x;
-    delete []max_left_x;
-    return false;
-  }
 
   // go through all segmentation points determining the horizontal gap between
   // the images on both sides of each break points. Use the gap to estimate
@@ -400,7 +366,7 @@ bool CubeSearchObject::ComputeSpaceCosts() {
     float prob = 0.0;
 
     // gap is too small => no space
-    if (gap < min_spc_gap_) {
+    if (gap < min_spc_gap_ || max_spc_gap_ == min_spc_gap_) {
       prob = 0.0;
     } else if (gap > max_spc_gap_) {
       // gap is too big => definite space
diff --git a/cube/cube_tuning_params.cpp b/cube/cube_tuning_params.cpp
index ac16c9f5cb..e4a9b0cf02 100644
--- a/cube/cube_tuning_params.cpp
+++ b/cube/cube_tuning_params.cpp
@@ -54,11 +54,6 @@ CubeTuningParams::~CubeTuningParams() {
 CubeTuningParams *CubeTuningParams::Create(const string &data_file_path,
                                            const string &lang) {
   CubeTuningParams *obj = new CubeTuningParams();
-  if (!obj) {
-    fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
-            "allocate new tuning params object\n");
-    return NULL;
-  }
 
   string tuning_params_file;
   tuning_params_file = data_file_path + lang;
diff --git a/cube/cube_utils.cpp b/cube/cube_utils.cpp
index 13c9c236da..4741659d2a 100644
--- a/cube/cube_utils.cpp
+++ b/cube/cube_utils.cpp
@@ -90,9 +90,6 @@ int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
 char_32 *CubeUtils::StrDup(const char_32 *str32) {
   int len = StrLen(str32);
   char_32 *new_str = new char_32[len + 1];
-  if (new_str == NULL) {
-    return NULL;
-  }
   memcpy(new_str, str32, len * sizeof(*str32));
   new_str[len] = 0;
   return new_str;
@@ -165,9 +162,6 @@ unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
 
   // copy the char img to a temp buffer
   unsigned char *temp_buff = new unsigned char[wid * hgt];
-  if (temp_buff == NULL) {
-    return NULL;
-  }
   l_int32 w;
   l_int32 h;
   l_int32 d;
@@ -211,10 +205,6 @@ bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
   // read the contents
   rewind(fp);
   char *buff = new char[file_size];
-  if (buff == NULL) {
-    fclose(fp);
-    return false;
-  }
   int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
   if (read_bytes == file_size) {
     str->append(buff, file_size);
@@ -352,8 +342,6 @@ char_32 *CubeUtils::ToLower(const char_32 *str32, CharSet *char_set) {
   UNICHARSET *unicharset = char_set->InternalUnicharset();
   int len = StrLen(str32);
   char_32 *lower = new char_32[len + 1];
-  if (!lower)
-    return NULL;
   for (int i = 0; i < len; ++i) {
     char_32 ch = str32[i];
     if (ch == INVALID_UNICHAR_ID) {
@@ -385,8 +373,6 @@ char_32 *CubeUtils::ToUpper(const char_32 *str32, CharSet *char_set) {
   UNICHARSET *unicharset = char_set->InternalUnicharset();
   int len = StrLen(str32);
   char_32 *upper = new char_32[len + 1];
-  if (!upper)
-    return NULL;
   for (int i = 0; i < len; ++i) {
     char_32 ch = str32[i];
     if (ch == INVALID_UNICHAR_ID) {
diff --git a/cube/hybrid_neural_net_classifier.cpp b/cube/hybrid_neural_net_classifier.cpp
index 671a74acdf..29b50d0cbc 100644
--- a/cube/hybrid_neural_net_classifier.cpp
+++ b/cube/hybrid_neural_net_classifier.cpp
@@ -136,14 +136,7 @@ bool HybridNeuralNetCharClassifier::RunNets(CharSamp *char_samp) {
   // allocate i/p and o/p buffers if needed
   if (net_input_ == NULL) {
     net_input_ = new float[feat_cnt];
-    if (net_input_ == NULL) {
-      return false;
-    }
-
     net_output_ = new float[class_cnt];
-    if (net_output_ == NULL) {
-      return false;
-    }
   }
 
   // compute input features
@@ -196,9 +189,6 @@ CharAltList *HybridNeuralNetCharClassifier::Classify(CharSamp *char_samp) {
 
   // create an altlist
   CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
-  if (alt_list == NULL) {
-    return NULL;
-  }
 
   for (int out = 1; out < class_cnt; out++) {
     int cost = CubeUtils::Prob2Cost(net_output_[out]);
@@ -240,14 +230,7 @@ bool HybridNeuralNetCharClassifier::LoadFoldingSets(
   CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
   fold_set_cnt_ = str_vec.size();
   fold_sets_ = new int *[fold_set_cnt_];
-  if (fold_sets_ == NULL) {
-    return false;
-  }
   fold_set_len_ = new int[fold_set_cnt_];
-  if (fold_set_len_ == NULL) {
-    fold_set_cnt_ = 0;
-    return false;
-  }
 
   for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
     reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
@@ -266,12 +249,6 @@ bool HybridNeuralNetCharClassifier::LoadFoldingSets(
     CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
     fold_set_len_[fold_set] = str32.length();
     fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
-    if (fold_sets_[fold_set] == NULL) {
-      fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): "
-              "could not allocate folding set\n");
-      fold_set_cnt_ = fold_set;
-      return false;
-    }
     for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
       fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
     }
@@ -330,7 +307,7 @@ bool HybridNeuralNetCharClassifier::LoadNets(const string &data_file_path,
   // split into lines
   vector<string> str_vec;
   CubeUtils::SplitStringUsing(str, "\r\n", &str_vec);
-  if (str_vec.size() <= 0) {
+  if (str_vec.empty()) {
     return false;
   }
 
diff --git a/cube/search_column.cpp b/cube/search_column.cpp
index 9a042d016a..71f2222337 100644
--- a/cube/search_column.cpp
+++ b/cube/search_column.cpp
@@ -62,9 +62,6 @@ bool SearchColumn::Init() {
   // create hash table
   if (node_hash_table_ == NULL) {
     node_hash_table_ = new SearchNodeHashTable();
-    if (node_hash_table_ == NULL) {
-      return false;
-    }
   }
 
   init_ = true;
@@ -144,9 +141,6 @@ SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost,
   // node does not exist
   if (new_node == NULL) {
     new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_);
-    if (new_node == NULL) {
-      return NULL;
-    }
 
     // if the max node count has already been reached, check if the cost of
     // the new node exceeds the max cost. This indicates that it will be pruned
@@ -161,10 +155,6 @@ SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost,
       // alloc a new buff
       SearchNode **new_node_buff =
           new SearchNode *[node_cnt_ + kNodeAllocChunk];
-      if (new_node_buff == NULL) {
-        delete new_node;
-        return NULL;
-      }
 
       // free existing after copying contents
       if (node_array_ != NULL) {
@@ -195,9 +185,7 @@ SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost,
     }
 
     // free the edge
-    if (edge != NULL) {
-      delete edge;
-    }
+    delete edge;
   }
 
   // update Min and Max Costs
diff --git a/cube/search_node.cpp b/cube/search_node.cpp
index ff5bfbd844..cd46625023 100644
--- a/cube/search_node.cpp
+++ b/cube/search_node.cpp
@@ -147,9 +147,6 @@ char_32 *SearchNode::PathString() {
   }
 
   char_32 *char_ptr = new char_32[len + 1];
-  if (char_ptr == NULL) {
-    return NULL;
-  }
 
   int ch_idx = len;
 
diff --git a/cube/tess_lang_mod_edge.cpp b/cube/tess_lang_mod_edge.cpp
index 4d16f3ac28..911070d3e6 100644
--- a/cube/tess_lang_mod_edge.cpp
+++ b/cube/tess_lang_mod_edge.cpp
@@ -72,9 +72,6 @@ TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
 
 char *TessLangModEdge::Description() const {
   char *char_ptr = new char[256];
-  if (!char_ptr) {
-    return NULL;
-  }
 
   char dawg_str[256];
   char edge_str[32];
@@ -115,9 +112,8 @@ int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt,
   for (int i = 0; i < vec.size(); ++i) {
     const NodeChild &child = vec[i];
     if (child.unichar_id == INVALID_UNICHAR_ID) continue;
-    edge_array[edge_cnt] =
+    edge_array[edge_cnt++] =
       new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
-    if (edge_array[edge_cnt] != NULL) edge_cnt++;
   }
   return edge_cnt;
 }
diff --git a/cube/tess_lang_model.cpp b/cube/tess_lang_model.cpp
index 5113207260..3a4c7500d7 100644
--- a/cube/tess_lang_model.cpp
+++ b/cube/tess_lang_model.cpp
@@ -182,9 +182,6 @@ LangModEdge ** TessLangModel::GetEdges(CharAltList *alt_list,
     // preallocate the edge buffer
     (*edge_cnt) = dawg_cnt * max_edge_;
     edge_array = new LangModEdge *[(*edge_cnt)];
-    if (edge_array == NULL) {
-      return NULL;
-    }
 
     for (int dawg_idx = (*edge_cnt) = 0; dawg_idx < dawg_cnt; dawg_idx++) {
       const Dawg *curr_dawg = GetDawg(dawg_idx);
@@ -213,9 +210,6 @@ LangModEdge ** TessLangModel::GetEdges(CharAltList *alt_list,
     (*edge_cnt) = max_edge_;
     // allocate memory for edges
     edge_array = new LangModEdge *[(*edge_cnt)];
-    if (edge_array == NULL) {
-      return NULL;
-    }
 
     // get the FanOut edges from the root of each dawg
     (*edge_cnt) = FanOut(alt_list,
@@ -240,9 +234,6 @@ int TessLangModel::Edges(const char *strng, const Dawg *dawg,
       // create an edge object
       edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg, edge_ref,
                                                  class_id);
-      if (edge_array[edge_cnt] == NULL) {
-        return 0;
-      }
 
       reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
           SetEdgeMask(edge_mask);
@@ -264,10 +255,6 @@ int TessLangModel::OODEdges(CharAltList *alt_list, EDGE_REF edge_ref,
          alt_list->ClassCost(class_id) <= max_ood_shape_cost_)) {
       // create an edge object
       edge_array[edge_cnt] = new TessLangModEdge(cntxt_, class_id);
-      if (edge_array[edge_cnt] == NULL) {
-        return 0;
-      }
-
       edge_cnt++;
     }
   }
@@ -368,11 +355,9 @@ int TessLangModel::FanOut(CharAltList *alt_list, const Dawg *dawg,
               edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg,
                   child_edge->StartEdge(), child_edge->EndEdge(), class_id);
 
-              if (edge_array[edge_cnt] != NULL) {
-                reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
+              reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
                     SetEdgeMask(edge_mask);
-                edge_cnt++;
-              }
+              edge_cnt++;
             }
           }
         }
@@ -486,8 +471,6 @@ void TessLangModel::RemoveInvalidCharacters(string *lm_str) {
 
   int len = CubeUtils::StrLen(lm_str32.c_str());
   char_32 *clean_str32 = new char_32[len + 1];
-  if (!clean_str32)
-    return;
   int clean_len = 0;
   for (int i = 0; i < len; ++i) {
     int class_id = char_set->ClassID((char_32)lm_str32[i]);
diff --git a/cube/word_altlist.cpp b/cube/word_altlist.cpp
index d6775360ad..f91d56c996 100644
--- a/cube/word_altlist.cpp
+++ b/cube/word_altlist.cpp
@@ -45,11 +45,6 @@ bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
     word_alt_ = new char_32*[max_alt_];
     alt_cost_ = new int[max_alt_];
     alt_tag_ = new void *[max_alt_];
-
-    if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) {
-      return false;
-    }
-
     memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
   } else {
     // check if alt already exists
@@ -69,9 +64,6 @@ bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
   int len = CubeUtils::StrLen(word_str);
 
   word_alt_[alt_cnt_] = new char_32[len + 1];
-  if (word_alt_[alt_cnt_] == NULL) {
-    return false;
-  }
 
   if (len > 0) {
     memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
diff --git a/cube/word_list_lang_model.cpp b/cube/word_list_lang_model.cpp
index 67a6a5a985..bb07951d25 100644
--- a/cube/word_list_lang_model.cpp
+++ b/cube/word_list_lang_model.cpp
@@ -54,9 +54,6 @@ bool WordListLangModel::Init() {
   // false for now, until Cube has a way to express its preferred debug level.
   dawg_ = new Trie(DAWG_TYPE_WORD, "", NO_PERM,
                    cntxt_->CharacterSet()->ClassCount(), false);
-  if (dawg_ == NULL) {
-    return false;
-  }
   init_ = true;
   return true;
 }
@@ -97,9 +94,6 @@ LangModEdge **WordListLangModel::GetEdges(CharAltList *alt_list,
 
   // allocate memory for edges
   LangModEdge **edge_array = new LangModEdge *[kMaxEdge];
-  if (edge_array == NULL) {
-    return NULL;
-  }
 
   // now get all the emerging edges
   (*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref,
diff --git a/cube/word_size_model.cpp b/cube/word_size_model.cpp
index 6b9a4530fc..be3ccf734d 100644
--- a/cube/word_size_model.cpp
+++ b/cube/word_size_model.cpp
@@ -43,11 +43,6 @@ WordSizeModel *WordSizeModel::Create(const string &data_file_path,
                                      CharSet *char_set,
                                      bool contextual) {
   WordSizeModel *obj = new WordSizeModel(char_set, contextual);
-  if (!obj) {
-    fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate "
-            "new word size model object\n");
-    return NULL;
-  }
 
   if (!obj->Init(data_file_path, lang)) {
     delete obj;
@@ -96,19 +91,9 @@ bool WordSizeModel::Init(const string &data_file_path, const string &lang) {
       FontPairSizeInfo fnt_info;
 
       fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt];
-      if (!fnt_info.pair_size_info) {
-        fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allcoating "
-                "memory for font pair size info\n");
-        return false;
-      }
 
       fnt_info.pair_size_info[0] =
           new PairSizeInfo[size_class_cnt * size_class_cnt];
-      if (!fnt_info.pair_size_info[0]) {
-        fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allocating "
-                "memory for font pair size info\n");
-        return false;
-      }
 
       memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *
              sizeof(PairSizeInfo));
diff --git a/cube/word_unigrams.cpp b/cube/word_unigrams.cpp
index 70cc9ee1e2..052a025c90 100644
--- a/cube/word_unigrams.cpp
+++ b/cube/word_unigrams.cpp
@@ -76,32 +76,13 @@ WordUnigrams *WordUnigrams::Create(const string &data_file_path,
 
   // allocate memory
   WordUnigrams *word_unigrams_obj = new WordUnigrams();
-  if (word_unigrams_obj == NULL) {
-    fprintf(stderr, "Cube ERROR (WordUnigrams::Create): could not create "
-            "word unigrams object.\n");
-    return NULL;
-  }
 
   int full_len = str.length();
   int word_cnt = str_vec.size() / 2;
   word_unigrams_obj->words_ = new char*[word_cnt];
   word_unigrams_obj->costs_ = new int[word_cnt];
 
-  if (word_unigrams_obj->words_ == NULL ||
-      word_unigrams_obj->costs_ == NULL) {
-    fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error allocating "
-            "word unigram fields.\n");
-    delete word_unigrams_obj;
-    return NULL;
-  }
-
   word_unigrams_obj->words_[0] = new char[full_len];
-  if (word_unigrams_obj->words_[0] == NULL) {
-    fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error allocating "
-            "word unigram fields.\n");
-    delete word_unigrams_obj;
-    return NULL;
-  }
 
   // construct sorted list of words and costs
   word_unigrams_obj->word_cnt_ = 0;
@@ -163,7 +144,7 @@ int WordUnigrams::Cost(const char_32 *key_str32,
   CubeUtils::SplitStringUsing(key_str, " \t", &words);
 
   // no words => no cost
-  if (words.size() <= 0) {
+  if (words.empty()) {
     return 0;
   }
 
diff --git a/cutil/Makefile.am b/cutil/Makefile.am
index 15b339c8f3..5b0ffc6ebb 100644
--- a/cutil/Makefile.am
+++ b/cutil/Makefile.am
@@ -7,7 +7,7 @@ endif
 
 noinst_HEADERS = \
     bitvec.h callcpp.h const.h cutil.h cutil_class.h danerror.h efio.h \
-    emalloc.h freelist.h globals.h listio.h \
+    emalloc.h freelist.h globals.h \
     oldlist.h structures.h
 
 if !USING_MULTIPLELIBS
@@ -22,7 +22,7 @@ endif
 
 libtesseract_cutil_la_SOURCES = \
     bitvec.cpp callcpp.cpp cutil.cpp cutil_class.cpp danerror.cpp efio.cpp \
-    emalloc.cpp freelist.cpp listio.cpp \
+    emalloc.cpp freelist.cpp \
     oldlist.cpp structures.cpp
 
 
diff --git a/cutil/bitvec.h b/cutil/bitvec.h
index f70d748b91..d2a364d261 100644
--- a/cutil/bitvec.h
+++ b/cutil/bitvec.h
@@ -30,29 +30,29 @@ typedef uinT32 *BIT_VECTOR;
 /*-----------------------------------------------------------------------------
           Public Function Prototypes
 -----------------------------------------------------------------------------*/
-#define zero_all_bits(array,length) \
-{\
-    int index;                        /*temporary index*/\
-\
-for (index=0;index<length;index++)\
-    array[index]=0;                                        /*zero all bits*/\
-}
+#define zero_all_bits(array, length)         \
+  {                                          \
+    int index; /*temporary index*/           \
+                                             \
+    for (index = 0; index < length; index++) \
+      array[index] = 0; /*zero all bits*/    \
+  }
 
-#define set_all_bits(array,length) \
-{\
-    int index;                        /*temporary index*/\
-\
-for (index=0;index<length;index++)\
-    array[index]= ~0;                                    /*set all bits*/\
-}
+#define set_all_bits(array, length)          \
+  {                                          \
+    int index; /*temporary index*/           \
+                                             \
+    for (index = 0; index < length; index++) \
+      array[index] = ~0; /*set all bits*/    \
+  }
 
-#define copy_all_bits(source,dest,length) \
-{\
-    int index;                        /*temporary index*/\
-\
-for (index=0;index<length;index++)\
-    dest[index]=source[index];                            /*copy all bits*/\
-}
+#define copy_all_bits(source, dest, length)          \
+  {                                                  \
+    int index; /*temporary index*/                   \
+                                                     \
+    for (index = 0; index < length; index++)         \
+      dest[index] = source[index]; /*copy all bits*/ \
+  }
 
 #define SET_BIT(array,bit) (array[bit/BITSINLONG]|=1<<(bit&(BITSINLONG-1)))
 
diff --git a/cutil/cutil.h b/cutil/cutil.h
index 38b3ff9e39..42967093ab 100644
--- a/cutil/cutil.h
+++ b/cutil/cutil.h
@@ -92,26 +92,6 @@ typedef void (*void_dest) (void *);
 #define print_string(str)  \
   printf ("%s\n", str)
 
-/**********************************************************************
- * strfree
- *
- * Free the memory which was reserved by strsave.
- **********************************************************************/
-
-#define strfree(s)  (free_string(s))
-
-/**********************************************************************
- * strsave
- *
- * Reserve a spot in memory for the string to be stored. Copy the string
- * to it and return the result.
- **********************************************************************/
-
-#define strsave(s)    \
-  ((s) != NULL ?  \
-   ((char*) strcpy (alloc_string(strlen(s)+1), s))  :  \
-   (NULL))
-
 /*----------------------------------------------------------------------
                      F u n c t i o n s
 ----------------------------------------------------------------------*/
diff --git a/cutil/danerror.cpp b/cutil/danerror.cpp
index d9216bc7e0..db48484659 100644
--- a/cutil/danerror.cpp
+++ b/cutil/danerror.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:	danerror.c
- **	Purpose:	Routines for managing error trapping
- **	Author:		Dan Johnson
- **	History:	3/17/89, DSJ, Created.
+ ** Filename: danerror.c
+ ** Purpose:  Routines for managing error trapping
+ ** Author:   Dan Johnson
+ ** History:  3/17/89, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
diff --git a/cutil/efio.cpp b/cutil/efio.cpp
index 092ecf15e3..66235010ca 100644
--- a/cutil/efio.cpp
+++ b/cutil/efio.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:	efio.c
- **	Purpose:	Utility I/O routines
- **	Author:		Dan Johnson
- **	History:	5/21/89, DSJ, Created.
+ ** Filename: efio.c
+ ** Purpose:  Utility I/O routines
+ ** Author:   Dan Johnson
+ ** History:  5/21/89, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -40,7 +40,7 @@
  * @note Exceptions: #FOPENERROR unable to open specified file
  * @note History: 5/21/89, DSJ, Created.
  */
-FILE *Efopen(const char *Name, const char *Mode) { 
+FILE *Efopen(const char *Name, const char *Mode) {
   FILE *File;
   char ErrorMessage[MAXERRORMESSAGE];
 
diff --git a/cutil/emalloc.cpp b/cutil/emalloc.cpp
index 80ced8b397..3f16010d05 100644
--- a/cutil/emalloc.cpp
+++ b/cutil/emalloc.cpp
@@ -10,7 +10,7 @@
 **       History:
               4/3/89, DSJ, Created.
 **
-**	(c) Copyright Hewlett-Packard Company, 1988.
+**  (c) Copyright Hewlett-Packard Company, 1988.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
diff --git a/cutil/listio.cpp b/cutil/listio.cpp
deleted file mode 100644
index 475088f5b5..0000000000
--- a/cutil/listio.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-/* -*-C-*-
-################################################################################
-#
-# File:						listio.c
-# Description:				List I/O processing procedures.
-# Author:					Mark Seaman, Software Productivity
-# Created:					Thu Jul 23 13:24:09 1987
-# Modified:     Fri May 17 17:33:30 1991 (Mark Seaman) marks@hpgrlt
-# Language:					C
-# Package:					N/A
-# Status:					Reusable Software Component
-#
-# (c) Copyright 1987, Hewlett-Packard Company.
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-** http://www.apache.org/licenses/LICENSE-2.0
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-#
-################################################################################
-
-This file contains the implementations of a set of general purpose
-list I/O routines.  For the interface definitions look in the file
-"listio.h".
----------------------------------------------------------------------------*/
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include "listio.h"
-
-/*---------------------------------------------------------------------------
-        Public Function Code
----------------------------------------------------------------------------*/
-/*************************************************************************
- *  R E A D   L I S T
- *
- *							Read a list of strings from a file.  Return the string list to the
- *							caller.
- *************************************************************************/
-LIST read_list(const char *filename) {
-  FILE *infile;
-  char s[CHARS_PER_LINE];
-  LIST list;
-
-  if ((infile = open_file (filename, "r")) == NULL)
-    return (NIL_LIST);
-
-  list = NIL_LIST;
-  while (fgets (s, CHARS_PER_LINE, infile) != NULL) {
-    s[CHARS_PER_LINE - 1] = '\0';
-    if (strlen (s) > 0) {
-      if (s[strlen (s) - 1] == '\n')
-        s[strlen (s) - 1] = '\0';
-      if (strlen (s) > 0) {
-        list = push (list, (LIST) strsave (s));
-      }
-    }
-  }
-
-  fclose(infile);
-  return (reverse_d (list));
-}
diff --git a/cutil/listio.h b/cutil/listio.h
deleted file mode 100644
index 7d9c19f777..0000000000
--- a/cutil/listio.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* -*-C-*-
-################################################################################
-#
-# File:         listio.h
-# Description:  List I/O processing procedures.
-# Author:       Mark Seaman, Software Productivity
-# Created:      Thu Jul 23 13:24:09 1987
-# Modified:     Mon Oct 16 11:38:52 1989 (Mark Seaman) marks@hpgrlt
-# Language:     C
-# Package:      N/A
-# Status:       Reusable Software Component
-#
-# (c) Copyright 1987, Hewlett-Packard Company.
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-** http://www.apache.org/licenses/LICENSE-2.0
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-#
-################################################################################
- * Revision 1.5  89/06/27  11:56:00  11:56:00  marks (Mark Seaman)
- * Fixed MAC_OR_DOS bug
- *
-
-  This file contains the interface definitions to a set of general purpose
-  list I/O routines.
-
-***********************************************************************/
-#ifndef LISTIO_H
-#define LISTIO_H
-
-#include <stdio.h>
-#include "oldlist.h"
-
-/*----------------------------------------------------------------------------
-        Public Function Prototypes
---------------------------------------------------------------------------*/
-LIST read_list(const char *filename);
-#endif
diff --git a/cutil/oldlist.cpp b/cutil/oldlist.cpp
index 52c0d8680a..9e3f6f4c06 100644
--- a/cutil/oldlist.cpp
+++ b/cutil/oldlist.cpp
@@ -206,8 +206,8 @@ void destroy_nodes(LIST list, void_dest destructor) {
     destructor = memfree;
 
   while (list != NIL_LIST) {
-    (*destructor) (first_node (list));
-    list = pop (list);
+    if (first_node(list) != NULL) (*destructor)(first_node(list));
+    list = pop(list);
   }
 }
 
@@ -401,7 +401,6 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) {
   return (push_last (var_list, variable));
 }
 
-
 /**********************************************************************
  *   s e a r c h
  *
diff --git a/dict/context.cpp b/dict/context.cpp
index a9acb137c3..368ad462b4 100644
--- a/dict/context.cpp
+++ b/dict/context.cpp
@@ -32,28 +32,22 @@ namespace tesseract {
 static const int kMinAbsoluteGarbageWordLength = 10;
 static const float kMinAbsoluteGarbageAlphanumFrac = 0.5f;
 
-const int case_state_table[6][4] = { {
-                                  /*  0. Beginning of word       */
-    /*    P   U   L   D                                          */
-                                  /* -1. Error on case           */
-      0, 1, 5, 4
-    },
-    {                            /*  1. After initial capital    */
-      0, 3, 2, 4
-    },
-    {                            /*  2. After lower case         */
-      0, -1, 2, -1
-    },
-    {                            /*  3. After upper case         */
-      0, 3, -1, 4
-    },
-    {                            /*  4. After a digit            */
-      0, -1, -1, 4
-    },
-    {                            /*  5. After initial lower case */
-      5, -1, 2, -1
-    },
-  };
+const int case_state_table[6][4] = {
+    {/*  0. Beginning of word       */
+     /*    P   U   L   D                                          */
+     /* -1. Error on case           */
+     0, 1, 5, 4},
+    {/*  1. After initial capital    */
+     0, 3, 2, 4},
+    {/*  2. After lower case         */
+     0, -1, 2, -1},
+    {/*  3. After upper case         */
+     0, 3, -1, 4},
+    {/*  4. After a digit            */
+     0, -1, -1, 4},
+    {/*  5. After initial lower case */
+     5, -1, 2, -1},
+};
 
 int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) {
   int state = 0;
diff --git a/dict/dawg.h b/dict/dawg.h
index b37e771503..bd789f3722 100644
--- a/dict/dawg.h
+++ b/dict/dawg.h
@@ -128,7 +128,7 @@ class Dawg {
   inline const STRING &lang() const { return lang_; }
   inline PermuterType permuter() const { return perm_; }
 
-  virtual ~Dawg() {};
+  virtual ~Dawg() {}
 
   /// Returns true if the given word is in the Dawg.
   bool word_in_dawg(const WERD_CHOICE &word) const;
@@ -183,13 +183,20 @@ class Dawg {
   /// of the given unichar_id.
   virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
                                       const UNICHARSET &unicharset,
-                                      GenericVector<UNICHAR_ID> *vec) const {};
+                                      GenericVector<UNICHAR_ID> *vec) const {
+    (void)unichar_id;
+    (void)unicharset;
+    (void)vec;
+  }
 
   /// Returns the given EDGE_REF if the EDGE_RECORD that it points to has
   /// a self loop and the given unichar_id matches the unichar_id stored in the
   /// EDGE_RECORD, returns NO_EDGE otherwise.
   virtual EDGE_REF pattern_loop_edge(
       EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const {
+    (void)edge_ref;
+    (void)unichar_id;
+    (void)word_end;
     return false;
   }
 
@@ -368,14 +375,6 @@ struct DawgPosition {
 
 class DawgPositionVector : public GenericVector<DawgPosition> {
  public:
-  /// Overload destructor, since clear() does not delete data_[] any more.
-  ~DawgPositionVector() {
-    if (size_reserved_ > 0) {
-      delete[] data_;
-      size_used_ = 0;
-      size_reserved_ = 0;
-    }
-  }
   /// Overload clear() in order to avoid allocating/deallocating memory
   /// when clearing the vector and re-inserting entries into it later.
   void clear() { size_used_ = 0; }
diff --git a/dict/dict.cpp b/dict/dict.cpp
index 6f8e0249a1..0dcc7d6c6e 100644
--- a/dict/dict.cpp
+++ b/dict/dict.cpp
@@ -30,13 +30,12 @@ namespace tesseract {
 
 class Image;
 
-Dict::Dict(CCUtil* ccutil)
+Dict::Dict(CCUtil *ccutil)
     : letter_is_okay_(&tesseract::Dict::def_letter_is_okay),
       probability_in_context_(&tesseract::Dict::def_probability_in_context),
       params_model_classify_(NULL),
       ccutil_(ccutil),
-      STRING_MEMBER(user_words_file, "",
-                    "A filename of user-provided words.",
+      STRING_MEMBER(user_words_file, "", "A filename of user-provided words.",
                     getCCUtil()->params()),
       STRING_INIT_MEMBER(user_words_suffix, "",
                          "A suffix of user-provided words located in tessdata.",
@@ -54,33 +53,41 @@ Dict::Dict(CCUtil* ccutil)
                        getCCUtil()->params()),
       BOOL_INIT_MEMBER(load_unambig_dawg, true, "Load unambiguous word dawg.",
                        getCCUtil()->params()),
-      BOOL_INIT_MEMBER(load_punc_dawg, true, "Load dawg with punctuation"
-                       " patterns.", getCCUtil()->params()),
-      BOOL_INIT_MEMBER(load_number_dawg, true, "Load dawg with number"
-                       " patterns.", getCCUtil()->params()),
-      BOOL_INIT_MEMBER(load_bigram_dawg, true, "Load dawg with special word "
-                       "bigrams.", getCCUtil()->params()),
+      BOOL_INIT_MEMBER(load_punc_dawg, true,
+                       "Load dawg with punctuation"
+                       " patterns.",
+                       getCCUtil()->params()),
+      BOOL_INIT_MEMBER(load_number_dawg, true,
+                       "Load dawg with number"
+                       " patterns.",
+                       getCCUtil()->params()),
+      BOOL_INIT_MEMBER(load_bigram_dawg, true,
+                       "Load dawg with special word "
+                       "bigrams.",
+                       getCCUtil()->params()),
       double_MEMBER(xheight_penalty_subscripts, 0.125,
                     "Score penalty (0.1 = 10%) added if there are subscripts "
                     "or superscripts in a word, but it is otherwise OK.",
                     getCCUtil()->params()),
       double_MEMBER(xheight_penalty_inconsistent, 0.25,
                     "Score penalty (0.1 = 10%) added if an xheight is "
-                    "inconsistent.", getCCUtil()->params()),
+                    "inconsistent.",
+                    getCCUtil()->params()),
       double_MEMBER(segment_penalty_dict_frequent_word, 1.0,
                     "Score multiplier for word matches which have good case and"
                     "are frequent in the given language (lower is better).",
                     getCCUtil()->params()),
       double_MEMBER(segment_penalty_dict_case_ok, 1.1,
                     "Score multiplier for word matches that have good case "
-                    "(lower is better).", getCCUtil()->params()),
+                    "(lower is better).",
+                    getCCUtil()->params()),
       double_MEMBER(segment_penalty_dict_case_bad, 1.3125,
                     "Default score multiplier for word matches, which may have "
                     "case issues (lower is better).",
                     getCCUtil()->params()),
       double_MEMBER(segment_penalty_ngram_best_choice, 1.24,
-                   "Multipler to for the best choice from the ngram model.",
-                   getCCUtil()->params()),
+                    "Multipler to for the best choice from the ngram model.",
+                    getCCUtil()->params()),
       double_MEMBER(segment_penalty_dict_nonword, 1.25,
                     "Score multiplier for glyph fragment segmentations which "
                     "do not match a dictionary word (lower is better).",
@@ -88,11 +95,13 @@ Dict::Dict(CCUtil* ccutil)
       double_MEMBER(segment_penalty_garbage, 1.50,
                     "Score multiplier for poorly cased strings that are not in"
                     " the dictionary and generally look like garbage (lower is"
-                    " better).", getCCUtil()->params()),
+                    " better).",
+                    getCCUtil()->params()),
       STRING_MEMBER(output_ambig_words_file, "",
                     "Output file for ambiguities found in the dictionary",
                     getCCUtil()->params()),
-      INT_MEMBER(dawg_debug_level, 0, "Set to 1 for general debug info"
+      INT_MEMBER(dawg_debug_level, 0,
+                 "Set to 1 for general debug info"
                  ", to 2 for more details, to 3 to see all the debug messages",
                  getCCUtil()->params()),
       INT_MEMBER(hyphen_debug_level, 0, "Debug level for hyphenated words.",
@@ -109,12 +118,12 @@ Dict::Dict(CCUtil* ccutil)
                     "Certainty threshold for non-dict words",
                     getCCUtil()->params()),
       double_MEMBER(stopper_phase2_certainty_rejection_offset, 1.0,
-                    "Reject certainty offset",
-                    getCCUtil()->params()),
+                    "Reject certainty offset", getCCUtil()->params()),
       INT_MEMBER(stopper_smallword_size, 2,
                  "Size of dict word to be treated as non-dict word",
                  getCCUtil()->params()),
-      double_MEMBER(stopper_certainty_per_char, -0.50, "Certainty to add"
+      double_MEMBER(stopper_certainty_per_char, -0.50,
+                    "Certainty to add"
                     " for each dict char above small word size.",
                     getCCUtil()->params()),
       double_MEMBER(stopper_allowable_character_badness, 3.0,
@@ -130,9 +139,9 @@ Dict::Dict(CCUtil* ccutil)
                   "Deprecated- backward compatibility only",
                   getCCUtil()->params()),
       INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
-                 "Max words to keep in list",
-                 getCCUtil()->params()),
-      STRING_MEMBER(word_to_debug, "", "Word for which stopper debug"
+                 "Max words to keep in list", getCCUtil()->params()),
+      STRING_MEMBER(word_to_debug, "",
+                    "Word for which stopper debug"
                     " information should be printed to stdout",
                     getCCUtil()->params()),
       STRING_MEMBER(word_to_debug_lengths, "",
@@ -141,10 +150,10 @@ Dict::Dict(CCUtil* ccutil)
       INT_MEMBER(fragments_debug, 0, "Debug character fragments",
                  getCCUtil()->params()),
       BOOL_MEMBER(segment_nonalphabetic_script, false,
-                 "Don't use any alphabetic-specific tricks."
-                 "Set to true in the traineddata config file for"
-                 " scripts that are cursive or inherently fixed-pitch",
-                 getCCUtil()->params()),
+                  "Don't use any alphabetic-specific tricks."
+                  "Set to true in the traineddata config file for"
+                  " scripts that are cursive or inherently fixed-pitch",
+                  getCCUtil()->params()),
       BOOL_MEMBER(save_doc_words, 0, "Save Document Words",
                   getCCUtil()->params()),
       double_MEMBER(doc_dict_pending_threshold, 0.0,
@@ -152,8 +161,10 @@ Dict::Dict(CCUtil* ccutil)
                     getCCUtil()->params()),
       double_MEMBER(doc_dict_certainty_threshold, -2.25,
                     "Worst certainty for words that can be inserted into the"
-                    "document dictionary", getCCUtil()->params()),
-      INT_MEMBER(max_permuter_attempts, 10000, "Maximum number of different"
+                    "document dictionary",
+                    getCCUtil()->params()),
+      INT_MEMBER(max_permuter_attempts, 10000,
+                 "Maximum number of different"
                  " character choices to consider during permutation."
                  " This limit is especially useful when user patterns"
                  " are specified, since overly generic patterns can result in"
@@ -179,7 +190,8 @@ Dict::Dict(CCUtil* ccutil)
 }
 
 Dict::~Dict() {
-  if (hyphen_word_ != NULL) delete hyphen_word_;
+  End();
+  delete hyphen_word_;
   if (output_ambig_words_file_ != NULL) fclose(output_ambig_words_file_);
 }
 
@@ -190,10 +202,8 @@ DawgCache *Dict::GlobalDawgCache() {
   return &cache;
 }
 
-void Dict::Load(DawgCache *dawg_cache) {
-  STRING name;
-  STRING &lang = getCCUtil()->lang;
-
+// Sets up ready for a Load.
+void Dict::SetupForLoad(DawgCache *dawg_cache) {
   if (dawgs_.length() != 0) this->End();
 
   apostrophe_unichar_id_ = getUnicharset().unichar_to_id(kApostropheSymbol);
@@ -208,10 +218,10 @@ void Dict::Load(DawgCache *dawg_cache) {
     dawg_cache_ = new DawgCache();
     dawg_cache_is_ours_ = true;
   }
+}
 
-  TessdataManager &tessdata_manager = getCCUtil()->tessdata_manager;
-  const char *data_file_name = tessdata_manager.GetDataFileName().string();
-
+// Loads the dawgs needed by Tesseract. Call FinishLoad() after.
+void Dict::Load(const char *data_file_name, const STRING &lang) {
   // Load dawgs_.
   if (load_punc_dawg) {
     punc_dawg_ = dawg_cache_->GetSquishedDawg(
@@ -243,6 +253,7 @@ void Dict::Load(DawgCache *dawg_cache) {
     if (unambig_dawg_) dawgs_ += unambig_dawg_;
   }
 
+  STRING name;
   if (((STRING &)user_words_suffix).length() > 0 ||
       ((STRING &)user_words_file).length() > 0) {
     Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM,
@@ -288,8 +299,13 @@ void Dict::Load(DawgCache *dawg_cache) {
   // This dawg is temporary and should not be searched by letter_is_ok.
   pending_words_ = new Trie(DAWG_TYPE_WORD, lang, NO_PERM,
                             getUnicharset().size(), dawg_debug_level);
+}
 
-  // Construct a list of corresponding successors for each dawg. Each entry i
+// Completes the loading process after Load().
+// Returns false if no dictionaries were loaded.
+bool Dict::FinishLoad() {
+  if (dawgs_.empty()) return false;
+  // Construct a list of corresponding successors for each dawg. Each entry, i,
   // in the successors_ vector is a vector of integers that represent the
   // indices into the dawgs_ vector of the successors for dawg i.
   successors_.reserve(dawgs_.length());
@@ -304,6 +320,7 @@ void Dict::Load(DawgCache *dawg_cache) {
     }
     successors_ += lst;
   }
+  return true;
 }
 
 void Dict::End() {
@@ -323,10 +340,8 @@ void Dict::End() {
   dawgs_.clear();
   successors_.clear();
   document_words_ = NULL;
-  if (pending_words_ != NULL) {
-    delete pending_words_;
-    pending_words_ = NULL;
-  }
+  delete pending_words_;
+  pending_words_ = NULL;
 }
 
 // Returns true if in light of the current state unichar_id is allowed
@@ -356,6 +371,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
   // Initialization.
   PermuterType curr_perm = NO_PERM;
   dawg_args->updated_dawgs->clear();
+  dawg_args->valid_end = false;
 
   // Go over the active_dawgs vector and insert DawgPosition records
   // with the updated ref (an edge with the corresponding unichar id) into
@@ -393,6 +409,9 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
                 dawg_debug_level > 0,
                 "Append transition from punc dawg to current dawgs: ");
             if (sdawg->permuter() > curr_perm) curr_perm = sdawg->permuter();
+            if (sdawg->end_of_word(dawg_edge) &&
+                punc_dawg->end_of_word(punc_transition_edge))
+              dawg_args->valid_end = true;
           }
         }
       }
@@ -407,6 +426,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
             dawg_debug_level > 0,
             "Extend punctuation dawg: ");
         if (PUNC_PERM > curr_perm) curr_perm = PUNC_PERM;
+        if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true;
       }
       continue;
     }
@@ -424,6 +444,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
             dawg_debug_level > 0,
             "Return to punctuation dawg: ");
         if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter();
+        if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true;
       }
     }
 
@@ -433,8 +454,8 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
     // possible edges, not only for the exact unichar_id, but also
     // for all its character classes (alpha, digit, etc).
     if (dawg->type() == DAWG_TYPE_PATTERN) {
-      ProcessPatternEdges(dawg, pos, unichar_id, word_end,
-                          dawg_args->updated_dawgs, &curr_perm);
+      ProcessPatternEdges(dawg, pos, unichar_id, word_end, dawg_args,
+                          &curr_perm);
       // There can't be any successors to dawg that is of type
       // DAWG_TYPE_PATTERN, so we are done examining this DawgPosition.
       continue;
@@ -461,6 +482,9 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
         continue;
       }
       if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter();
+      if (dawg->end_of_word(edge) &&
+          (punc_dawg == NULL || punc_dawg->end_of_word(pos.punc_ref)))
+        dawg_args->valid_end = true;
       dawg_args->updated_dawgs->add_unique(
           DawgPosition(pos.dawg_index, edge, pos.punc_index, pos.punc_ref,
                        false),
@@ -485,7 +509,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
 
 void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos,
                                UNICHAR_ID unichar_id, bool word_end,
-                               DawgPositionVector *updated_dawgs,
+                               DawgArgs *dawg_args,
                                PermuterType *curr_perm) const {
   NODE_REF node = GetStartingNode(dawg, pos.dawg_ref);
   // Try to find the edge corresponding to the exact unichar_id and to all the
@@ -508,7 +532,8 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos,
         tprintf("Letter found in pattern dawg %d\n", pos.dawg_index);
       }
       if (dawg->permuter() > *curr_perm) *curr_perm = dawg->permuter();
-      updated_dawgs->add_unique(
+      if (dawg->end_of_word(edge)) dawg_args->valid_end = true;
+      dawg_args->updated_dawgs->add_unique(
           DawgPosition(pos.dawg_index, edge, pos.punc_index, pos.punc_ref,
                        pos.back_to_punc),
           dawg_debug_level > 0,
@@ -804,5 +829,12 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) {
   return false;
 }
 
+/// Returns true if the language is space-delimited (not CJ, or T).
+bool Dict::IsSpaceDelimitedLang() const {
+  const UNICHARSET &u_set = getUnicharset();
+  if (u_set.han_sid() > 0) return false;
+  if (u_set.katakana_sid() > 0) return false;
+  return true;
+}
 
 }  // namespace tesseract
diff --git a/dict/dict.h b/dict/dict.h
index 326f1235d5..a3883b5124 100644
--- a/dict/dict.h
+++ b/dict/dict.h
@@ -23,7 +23,6 @@
 #include "dawg.h"
 #include "dawg_cache.h"
 #include "host.h"
-#include "oldlist.h"
 #include "ratngs.h"
 #include "stopper.h"
 #include "trie.h"
@@ -76,11 +75,13 @@ enum XHeightConsistencyEnum {XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT};
 
 struct DawgArgs {
   DawgArgs(DawgPositionVector *d, DawgPositionVector *up, PermuterType p)
-      : active_dawgs(d), updated_dawgs(up), permuter(p) {}
+      : active_dawgs(d), updated_dawgs(up), permuter(p), valid_end(false) {}
 
   DawgPositionVector *active_dawgs;
   DawgPositionVector *updated_dawgs;
   PermuterType permuter;
+  // True if the current position is a valid word end.
+  bool valid_end;
 };
 
 class Dict {
@@ -243,7 +244,7 @@ class Dict {
                              CHAR_FRAGMENT_INFO *char_frag_info);
 
   /* stopper.cpp *************************************************************/
-  bool NoDangerousAmbig(WERD_CHOICE *BestChoice,
+  bool TESS_API NoDangerousAmbig(WERD_CHOICE *BestChoice,
                         DANGERR *fixpt,
                         bool fix_replaceable,
                         MATRIX* ratings);
@@ -294,7 +295,13 @@ class Dict {
   /// Initialize Dict class - load dawgs from [lang].traineddata and
   /// user-specified wordlist and parttern list.
   static DawgCache *GlobalDawgCache();
-  void Load(DawgCache *dawg_cache);
+  // Sets up ready for a Load.
+  void SetupForLoad(DawgCache *dawg_cache);
+  // Loads the dawgs needed by Tesseract. Call FinishLoad() after.
+  void Load(const char *data_file_name, const STRING &lang);
+  // Completes the loading process after Load().
+  // Returns false if no dictionaries were loaded.
+  bool FinishLoad();
   void End();
 
   // Resets the document dictionary analogous to ResetAdaptiveClassifier.
@@ -374,10 +381,11 @@ class Dict {
   double def_probability_in_context(
       const char* lang, const char* context, int context_bytes,
       const char* character, int character_bytes) {
-    (void) context;
-    (void) context_bytes;
-    (void) character;
-    (void) character_bytes;
+    (void)lang;
+    (void)context;
+    (void)context_bytes;
+    (void)character;
+    (void)character_bytes;
     return 0.0;
   }
   double ngram_probability_in_context(const char* lang,
@@ -397,9 +405,7 @@ class Dict {
   }
 
   inline void SetWildcardID(UNICHAR_ID id) { wildcard_unichar_id_ = id; }
-  inline UNICHAR_ID WildcardID() const {
-    return wildcard_unichar_id_;
-  }
+  inline UNICHAR_ID WildcardID() const { return wildcard_unichar_id_; }
   /// Return the number of dawgs in the dawgs_ vector.
   inline int NumDawgs() const { return dawgs_.size(); }
   /// Return i-th dawg pointer recorded in the dawgs_ vector.
@@ -436,7 +442,7 @@ class Dict {
   /// edges were found.
   void ProcessPatternEdges(const Dawg *dawg, const DawgPosition &info,
                            UNICHAR_ID unichar_id, bool word_end,
-                           DawgPositionVector *updated_dawgs,
+                           DawgArgs *dawg_args,
                            PermuterType *current_permuter) const;
 
   /// Read/Write/Access special purpose dawgs which contain words
@@ -483,6 +489,8 @@ class Dict {
   inline void SetWordsegRatingAdjustFactor(float f) {
     wordseg_rating_adjust_factor_ = f;
   }
+  /// Returns true if the language is space-delimited (not CJ, or T).
+  bool IsSpaceDelimitedLang() const;
 
  private:
   /** Private member variables. */
diff --git a/dict/stopper.cpp b/dict/stopper.cpp
index 660b4c8cf3..a0e3c02b2e 100644
--- a/dict/stopper.cpp
+++ b/dict/stopper.cpp
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    stopper.c
- **	Purpose:     Stopping criteria for word classifier.
- **	Author:      Dan Johnson
- **	History:     Mon Apr 29 14:56:49 1991, DSJ, Created.
+ ** Filename:    stopper.c
+ ** Purpose:     Stopping criteria for word classifier.
+ ** Author:      Dan Johnson
+ ** History:     Mon Apr 29 14:56:49 1991, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
@@ -41,7 +41,6 @@
 #pragma warning(disable:4800)  // int/bool warnings
 #endif
 
-using tesseract::ScriptPos;
 /*----------------------------------------------------------------------------
               Private Code
 ----------------------------------------------------------------------------*/
diff --git a/dict/stopper.h b/dict/stopper.h
index b028b0ee29..58d23734dd 100644
--- a/dict/stopper.h
+++ b/dict/stopper.h
@@ -1,10 +1,10 @@
 /******************************************************************************
- **	Filename:    stopper.h
- **	Purpose:     Stopping criteria for word classifier.
- **	Author:      Dan Johnson
- **	History:     Wed May  1 09:42:57 1991, DSJ, Created.
+ ** Filename:    stopper.h
+ ** Purpose:     Stopping criteria for word classifier.
+ ** Author:      Dan Johnson
+ ** History:     Wed May  1 09:42:57 1991, DSJ, Created.
  **
- **	(c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
diff --git a/dict/trie.cpp b/dict/trie.cpp
index 6a7a8d1e35..ac7dd33cdc 100644
--- a/dict/trie.cpp
+++ b/dict/trie.cpp
@@ -276,7 +276,6 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word,
 
 NODE_REF Trie::new_dawg_node() {
   TRIE_NODE_RECORD *node = new TRIE_NODE_RECORD();
-  if (node == NULL) return 0;  // failed to create new node
   nodes_.push_back(node);
   return nodes_.length() - 1;
 }
diff --git a/doc/ambiguous_words.1.html b/doc/ambiguous_words.1.html
index 3fd5f7f1f6..be74b62d0d 100644
--- a/doc/ambiguous_words.1.html
+++ b/doc/ambiguous_words.1.html
@@ -1,790 +1,790 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>AMBIGUOUS_WORDS(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-AMBIGUOUS_WORDS(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>ambiguous_words -
-   generate sets of words Tesseract is likely to find ambiguous
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><strong>ambiguous_words</strong> [-l lang] <em>TESSDATADIR</em> <em>WORDLIST</em> <em>AMBIGUOUSFILE</em></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>ambiguous_words(1) runs Tesseract in a special mode, and for each word
-in word list, produces a set of words which Tesseract thinks might be
-ambiguous with it.   <em>TESSDATADIR</em> must be set to the absolute path of
-a directory containing <em>tessdata/lang.traineddata</em>.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1)</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
-Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-05-13 19:59:45 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>AMBIGUOUS_WORDS(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+AMBIGUOUS_WORDS(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>ambiguous_words -
+   generate sets of words Tesseract is likely to find ambiguous
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><strong>ambiguous_words</strong> [-l lang] <em>TESSDATADIR</em> <em>WORDLIST</em> <em>AMBIGUOUSFILE</em></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>ambiguous_words(1) runs Tesseract in a special mode, and for each word
+in word list, produces a set of words which Tesseract thinks might be
+ambiguous with it.   <em>TESSDATADIR</em> must be set to the absolute path of
+a directory containing <em>tessdata/lang.traineddata</em>.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
+Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-05-13 19:59:45 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/ambiguous_words.1.xml b/doc/ambiguous_words.1.xml
index 6293866ceb..4900c6eb93 100644
--- a/doc/ambiguous_words.1.xml
+++ b/doc/ambiguous_words.1.xml
@@ -1,43 +1,43 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>AMBIGUOUS_WORDS(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>ambiguous_words</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>ambiguous_words</refname>
-    <refpurpose>generate sets of words Tesseract is likely to find ambiguous</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara><emphasis role="strong">ambiguous_words</emphasis> [-l lang] <emphasis>TESSDATADIR</emphasis> <emphasis>WORDLIST</emphasis> <emphasis>AMBIGUOUSFILE</emphasis></simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>ambiguous_words(1) runs Tesseract in a special mode, and for each word
-in word list, produces a set of words which Tesseract thinks might be
-ambiguous with it.   <emphasis>TESSDATADIR</emphasis> must be set to the absolute path of
-a directory containing <emphasis>tessdata/lang.traineddata</emphasis>.</simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1)</simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (C) 2012 Google, Inc.
-Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>AMBIGUOUS_WORDS(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>ambiguous_words</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>ambiguous_words</refname>
+    <refpurpose>generate sets of words Tesseract is likely to find ambiguous</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara><emphasis role="strong">ambiguous_words</emphasis> [-l lang] <emphasis>TESSDATADIR</emphasis> <emphasis>WORDLIST</emphasis> <emphasis>AMBIGUOUSFILE</emphasis></simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>ambiguous_words(1) runs Tesseract in a special mode, and for each word
+in word list, produces a set of words which Tesseract thinks might be
+ambiguous with it.   <emphasis>TESSDATADIR</emphasis> must be set to the absolute path of
+a directory containing <emphasis>tessdata/lang.traineddata</emphasis>.</simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1)</simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (C) 2012 Google, Inc.
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/cntraining.1.html b/doc/cntraining.1.html
index 706d3bd0f4..7653061e1e 100644
--- a/doc/cntraining.1.html
+++ b/doc/cntraining.1.html
@@ -1,805 +1,805 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>CNTRAINING(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-CNTRAINING(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>cntraining -
-   character normalization training for Tesseract
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><strong>cntraining</strong> [-D <em>dir</em>] <em>FILE</em>&#8230;</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>cntraining takes a list of .tr files, from which it generates the
-<strong>normproto</strong> data file (the character normalization sensitivity
-prototypes).</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_options">OPTIONS</h2>
-<div class="sectionbody">
-<div class="dlist"><dl>
-<dt class="hdlist1">
--D <em>dir</em>
-</dt>
-<dd>
-<p>
-        Directory to write output files to.
-</p>
-</dd>
-</dl></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), shapeclustering(1), mftraining(1)</p></div>
-<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Copyright (c) Hewlett-Packard Company, 1988
-Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-12 23:50:30 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>CNTRAINING(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+CNTRAINING(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>cntraining -
+   character normalization training for Tesseract
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><strong>cntraining</strong> [-D <em>dir</em>] <em>FILE</em>&#8230;</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>cntraining takes a list of .tr files, from which it generates the
+<strong>normproto</strong> data file (the character normalization sensitivity
+prototypes).</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_options">OPTIONS</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+-D <em>dir</em>
+</dt>
+<dd>
+<p>
+        Directory to write output files to.
+</p>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), shapeclustering(1), mftraining(1)</p></div>
+<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (c) Hewlett-Packard Company, 1988
+Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-12 23:50:30 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/cntraining.1.xml b/doc/cntraining.1.xml
index 6795f12f2c..6efc99be1d 100644
--- a/doc/cntraining.1.xml
+++ b/doc/cntraining.1.xml
@@ -1,58 +1,58 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>CNTRAINING(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>cntraining</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>cntraining</refname>
-    <refpurpose>character normalization training for Tesseract</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara><emphasis role="strong">cntraining</emphasis> [-D <emphasis>dir</emphasis>] <emphasis>FILE</emphasis>&#8230;</simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>cntraining takes a list of .tr files, from which it generates the
-<emphasis role="strong">normproto</emphasis> data file (the character normalization sensitivity
-prototypes).</simpara>
-</refsect1>
-<refsect1 id="_options">
-<title>OPTIONS</title>
-<variablelist>
-<varlistentry>
-<term>
--D <emphasis>dir</emphasis>
-</term>
-<listitem>
-<simpara>
-        Directory to write output files to.
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), shapeclustering(1), mftraining(1)</simpara>
-<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (c) Hewlett-Packard Company, 1988
-Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>CNTRAINING(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>cntraining</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>cntraining</refname>
+    <refpurpose>character normalization training for Tesseract</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara><emphasis role="strong">cntraining</emphasis> [-D <emphasis>dir</emphasis>] <emphasis>FILE</emphasis>&#8230;</simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>cntraining takes a list of .tr files, from which it generates the
+<emphasis role="strong">normproto</emphasis> data file (the character normalization sensitivity
+prototypes).</simpara>
+</refsect1>
+<refsect1 id="_options">
+<title>OPTIONS</title>
+<variablelist>
+<varlistentry>
+<term>
+-D <emphasis>dir</emphasis>
+</term>
+<listitem>
+<simpara>
+        Directory to write output files to.
+</simpara>
+</listitem>
+</varlistentry>
+</variablelist>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), shapeclustering(1), mftraining(1)</simpara>
+<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (c) Hewlett-Packard Company, 1988
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/combine_tessdata.1.asc b/doc/combine_tessdata.1.asc
index d93de7ea0f..7b5295f227 100644
--- a/doc/combine_tessdata.1.asc
+++ b/doc/combine_tessdata.1.asc
@@ -11,7 +11,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-combine_tessdata(1) is the main program to combine/extract/overwrite 
+combine_tessdata(1) is the main program to combine/extract/overwrite
 tessdata components in [lang].traineddata files.
 
 To combine all the individual tessdata components (unicharset, DAWGs,
diff --git a/doc/combine_tessdata.1.html b/doc/combine_tessdata.1.html
index 8de474b33b..a7f699f939 100644
--- a/doc/combine_tessdata.1.html
+++ b/doc/combine_tessdata.1.html
@@ -1,1014 +1,1014 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>COMBINE_TESSDATA(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-COMBINE_TESSDATA(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>combine_tessdata -
-   combine/extract/overwrite Tesseract data
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><strong>combine_tessdata</strong> [<em>OPTION</em>] <em>FILE</em>&#8230;</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>combine_tessdata(1) is the main program to combine/extract/overwrite
-tessdata components in [lang].traineddata files.</p></div>
-<div class="paragraph"><p>To combine all the individual tessdata components (unicharset, DAWGs,
-classifier templates, ambiguities, language configs) located at, say,
-/home/$USER/temp/eng.* run:</p></div>
-<div class="literalblock">
-<div class="content">
-<pre><code>combine_tessdata /home/$USER/temp/eng.</code></pre>
-</div></div>
-<div class="paragraph"><p>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</p></div>
-<div class="paragraph"><p>Specify option -e if you would like to extract individual components
-from a combined traineddata file. For example, to extract language config
-file and the unicharset from tessdata/eng.traineddata run:</p></div>
-<div class="literalblock">
-<div class="content">
-<pre><code>combine_tessdata -e tessdata/eng.traineddata \
-  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</code></pre>
-</div></div>
-<div class="paragraph"><p>The desired config file and unicharset will be written to
-/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</p></div>
-<div class="paragraph"><p>Specify option -o to overwrite individual components of the given
-[lang].traineddata file. For example, to overwrite language config
-and unichar ambiguities files in tessdata/eng.traineddata use:</p></div>
-<div class="literalblock">
-<div class="content">
-<pre><code>combine_tessdata -o tessdata/eng.traineddata \
-  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</code></pre>
-</div></div>
-<div class="paragraph"><p>As a result, tessdata/eng.traineddata will contain the new language config
-and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</p></div>
-<div class="paragraph"><p>Note: the file names of the files to extract to and to overwrite from should
-have the appropriate file suffixes (extensions) indicating their tessdata
-component type (.unicharset for the unicharset, .unicharambigs for unichar
-ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.</p></div>
-<div class="paragraph"><p>Specify option -u to unpack all the components to the specified path:</p></div>
-<div class="literalblock">
-<div class="content">
-<pre><code>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</code></pre>
-</div></div>
-<div class="paragraph"><p>This will create  /home/$USER/temp/eng.* files with individual tessdata
-components from tessdata/eng.traineddata.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_options">OPTIONS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><strong>-e</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
-    Extracts the specified components from the .traineddata file</p></div>
-<div class="paragraph"><p><strong>-o</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
-    Overwrites the specified components of the .traineddata file
-    with those provided on the comand line.</p></div>
-<div class="paragraph"><p><strong>-u</strong> <em>.traineddata</em> <em>PATHPREFIX</em>
-    Unpacks the .traineddata using the provided prefix.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_caveats">CAVEATS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><em>Prefix</em> refers to the full file prefix, including period (.)</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_components">COMPONENTS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The components in a Tesseract lang.traineddata file as of
-Tesseract 3.02 are briefly described below; For more information on
-many of these files, see
-<a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-<div class="dlist"><dl>
-<dt class="hdlist1">
-lang.config
-</dt>
-<dd>
-<p>
-  (Optional) Language-specific overrides to default config variables.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.unicharset
-</dt>
-<dd>
-<p>
-  (Required) The list of symbols that Tesseract recognizes, with properties.
-  See unicharset(5).
-</p>
-</dd>
-<dt class="hdlist1">
-lang.unicharambigs
-</dt>
-<dd>
-<p>
-  (Optional) This file contains information on pairs of recognized symbols
-  which are often confused.  For example, <em>rn</em> and <em>m</em>.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.inttemp
-</dt>
-<dd>
-<p>
-  (Required) Character shape templates for each unichar.  Produced by
-  mftraining(1).
-</p>
-</dd>
-<dt class="hdlist1">
-lang.pffmtable
-</dt>
-<dd>
-<p>
-  (Required) The number of features expected for each unichar.
-  Produced by mftraining(1) from <strong>.tr</strong> files.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.normproto
-</dt>
-<dd>
-<p>
-  (Required) Character normalization prototypes generated by cntraining(1)
-  from <strong>.tr</strong> files.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.punc-dawg
-</dt>
-<dd>
-<p>
-  (Optional) A dawg made from punctuation patterns found around words.
-  The "word" part is replaced by a single space.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.word-dawg
-</dt>
-<dd>
-<p>
-  (Optional) A dawg made from dictionary words from the language.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.number-dawg
-</dt>
-<dd>
-<p>
-  (Optional) A dawg made from tokens which originally contained digits.
-  Each digit is replaced by a space character.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.freq-dawg
-</dt>
-<dd>
-<p>
-  (Optional) A dawg made from the most frequent words which would have
-  gone into word-dawg.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.fixed-length-dawgs
-</dt>
-<dd>
-<p>
-  (Optional) Several dawgs of different fixed lengths&#8201;&#8212;&#8201;useful for
-  languages like Chinese.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.cube-unicharset
-</dt>
-<dd>
-<p>
-  (Optional) A unicharset for cube, if cube was trained on a different set
-  of symbols.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.cube-word-dawg
-</dt>
-<dd>
-<p>
-  (Optional) A word dawg for cube&#8217;s alternate unicharset.  Not needed if Cube
-  was trained with Tesseract&#8217;s unicharset.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.shapetable
-</dt>
-<dd>
-<p>
-  (Optional) When present, a shapetable is an extra layer between the character
-  classifier and the word recognizer that allows the character classifier to
-  return a collection of unichar ids and fonts instead of a single unichar-id
-  and font.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.bigram-dawg
-</dt>
-<dd>
-<p>
-  (Optional) A dawg of word bigrams where the words are separated by a space
-  and each digit is replaced by a <em>?</em>.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.unambig-dawg
-</dt>
-<dd>
-<p>
-  (Optional) TODO: Describe.
-</p>
-</dd>
-<dt class="hdlist1">
-lang.params-training-model
-</dt>
-<dd>
-<p>
-  (Optional) TODO: Describe.
-</p>
-</dd>
-</dl></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_history">HISTORY</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>combine_tessdata(1) first appeared in version 3.00 of Tesseract</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5),
-unicharambigs(5)</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Copyright (C) 2009, Google Inc.
-Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-12 23:52:02 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>COMBINE_TESSDATA(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+COMBINE_TESSDATA(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>combine_tessdata -
+   combine/extract/overwrite Tesseract data
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><strong>combine_tessdata</strong> [<em>OPTION</em>] <em>FILE</em>&#8230;</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>combine_tessdata(1) is the main program to combine/extract/overwrite
+tessdata components in [lang].traineddata files.</p></div>
+<div class="paragraph"><p>To combine all the individual tessdata components (unicharset, DAWGs,
+classifier templates, ambiguities, language configs) located at, say,
+/home/$USER/temp/eng.* run:</p></div>
+<div class="literalblock">
+<div class="content">
+<pre><code>combine_tessdata /home/$USER/temp/eng.</code></pre>
+</div></div>
+<div class="paragraph"><p>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</p></div>
+<div class="paragraph"><p>Specify option -e if you would like to extract individual components
+from a combined traineddata file. For example, to extract language config
+file and the unicharset from tessdata/eng.traineddata run:</p></div>
+<div class="literalblock">
+<div class="content">
+<pre><code>combine_tessdata -e tessdata/eng.traineddata \
+  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</code></pre>
+</div></div>
+<div class="paragraph"><p>The desired config file and unicharset will be written to
+/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</p></div>
+<div class="paragraph"><p>Specify option -o to overwrite individual components of the given
+[lang].traineddata file. For example, to overwrite language config
+and unichar ambiguities files in tessdata/eng.traineddata use:</p></div>
+<div class="literalblock">
+<div class="content">
+<pre><code>combine_tessdata -o tessdata/eng.traineddata \
+  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</code></pre>
+</div></div>
+<div class="paragraph"><p>As a result, tessdata/eng.traineddata will contain the new language config
+and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</p></div>
+<div class="paragraph"><p>Note: the file names of the files to extract to and to overwrite from should
+have the appropriate file suffixes (extensions) indicating their tessdata
+component type (.unicharset for the unicharset, .unicharambigs for unichar
+ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.</p></div>
+<div class="paragraph"><p>Specify option -u to unpack all the components to the specified path:</p></div>
+<div class="literalblock">
+<div class="content">
+<pre><code>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</code></pre>
+</div></div>
+<div class="paragraph"><p>This will create  /home/$USER/temp/eng.* files with individual tessdata
+components from tessdata/eng.traineddata.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_options">OPTIONS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><strong>-e</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
+    Extracts the specified components from the .traineddata file</p></div>
+<div class="paragraph"><p><strong>-o</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
+    Overwrites the specified components of the .traineddata file
+    with those provided on the comand line.</p></div>
+<div class="paragraph"><p><strong>-u</strong> <em>.traineddata</em> <em>PATHPREFIX</em>
+    Unpacks the .traineddata using the provided prefix.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_caveats">CAVEATS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><em>Prefix</em> refers to the full file prefix, including period (.)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_components">COMPONENTS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The components in a Tesseract lang.traineddata file as of
+Tesseract 3.02 are briefly described below; For more information on
+many of these files, see
+<a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+lang.config
+</dt>
+<dd>
+<p>
+  (Optional) Language-specific overrides to default config variables.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.unicharset
+</dt>
+<dd>
+<p>
+  (Required) The list of symbols that Tesseract recognizes, with properties.
+  See unicharset(5).
+</p>
+</dd>
+<dt class="hdlist1">
+lang.unicharambigs
+</dt>
+<dd>
+<p>
+  (Optional) This file contains information on pairs of recognized symbols
+  which are often confused.  For example, <em>rn</em> and <em>m</em>.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.inttemp
+</dt>
+<dd>
+<p>
+  (Required) Character shape templates for each unichar.  Produced by
+  mftraining(1).
+</p>
+</dd>
+<dt class="hdlist1">
+lang.pffmtable
+</dt>
+<dd>
+<p>
+  (Required) The number of features expected for each unichar.
+  Produced by mftraining(1) from <strong>.tr</strong> files.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.normproto
+</dt>
+<dd>
+<p>
+  (Required) Character normalization prototypes generated by cntraining(1)
+  from <strong>.tr</strong> files.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.punc-dawg
+</dt>
+<dd>
+<p>
+  (Optional) A dawg made from punctuation patterns found around words.
+  The "word" part is replaced by a single space.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.word-dawg
+</dt>
+<dd>
+<p>
+  (Optional) A dawg made from dictionary words from the language.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.number-dawg
+</dt>
+<dd>
+<p>
+  (Optional) A dawg made from tokens which originally contained digits.
+  Each digit is replaced by a space character.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.freq-dawg
+</dt>
+<dd>
+<p>
+  (Optional) A dawg made from the most frequent words which would have
+  gone into word-dawg.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.fixed-length-dawgs
+</dt>
+<dd>
+<p>
+  (Optional) Several dawgs of different fixed lengths&#8201;&#8212;&#8201;useful for
+  languages like Chinese.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.cube-unicharset
+</dt>
+<dd>
+<p>
+  (Optional) A unicharset for cube, if cube was trained on a different set
+  of symbols.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.cube-word-dawg
+</dt>
+<dd>
+<p>
+  (Optional) A word dawg for cube&#8217;s alternate unicharset.  Not needed if Cube
+  was trained with Tesseract&#8217;s unicharset.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.shapetable
+</dt>
+<dd>
+<p>
+  (Optional) When present, a shapetable is an extra layer between the character
+  classifier and the word recognizer that allows the character classifier to
+  return a collection of unichar ids and fonts instead of a single unichar-id
+  and font.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.bigram-dawg
+</dt>
+<dd>
+<p>
+  (Optional) A dawg of word bigrams where the words are separated by a space
+  and each digit is replaced by a <em>?</em>.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.unambig-dawg
+</dt>
+<dd>
+<p>
+  (Optional) TODO: Describe.
+</p>
+</dd>
+<dt class="hdlist1">
+lang.params-training-model
+</dt>
+<dd>
+<p>
+  (Optional) TODO: Describe.
+</p>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_history">HISTORY</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>combine_tessdata(1) first appeared in version 3.00 of Tesseract</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5),
+unicharambigs(5)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (C) 2009, Google Inc.
+Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-12 23:52:02 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/combine_tessdata.1.xml b/doc/combine_tessdata.1.xml
index 1a43995fb5..693e1343b5 100644
--- a/doc/combine_tessdata.1.xml
+++ b/doc/combine_tessdata.1.xml
@@ -1,281 +1,281 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>COMBINE_TESSDATA(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>combine_tessdata</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>combine_tessdata</refname>
-    <refpurpose>combine/extract/overwrite Tesseract data</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara><emphasis role="strong">combine_tessdata</emphasis> [<emphasis>OPTION</emphasis>] <emphasis>FILE</emphasis>&#8230;</simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>combine_tessdata(1) is the main program to combine/extract/overwrite
-tessdata components in [lang].traineddata files.</simpara>
-<simpara>To combine all the individual tessdata components (unicharset, DAWGs,
-classifier templates, ambiguities, language configs) located at, say,
-/home/$USER/temp/eng.* run:</simpara>
-<literallayout class="monospaced">combine_tessdata /home/$USER/temp/eng.</literallayout>
-<simpara>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</simpara>
-<simpara>Specify option -e if you would like to extract individual components
-from a combined traineddata file. For example, to extract language config
-file and the unicharset from tessdata/eng.traineddata run:</simpara>
-<literallayout class="monospaced">combine_tessdata -e tessdata/eng.traineddata \
-  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</literallayout>
-<simpara>The desired config file and unicharset will be written to
-/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</simpara>
-<simpara>Specify option -o to overwrite individual components of the given
-[lang].traineddata file. For example, to overwrite language config
-and unichar ambiguities files in tessdata/eng.traineddata use:</simpara>
-<literallayout class="monospaced">combine_tessdata -o tessdata/eng.traineddata \
-  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</literallayout>
-<simpara>As a result, tessdata/eng.traineddata will contain the new language config
-and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</simpara>
-<simpara>Note: the file names of the files to extract to and to overwrite from should
-have the appropriate file suffixes (extensions) indicating their tessdata
-component type (.unicharset for the unicharset, .unicharambigs for unichar
-ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.</simpara>
-<simpara>Specify option -u to unpack all the components to the specified path:</simpara>
-<literallayout class="monospaced">combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</literallayout>
-<simpara>This will create  /home/$USER/temp/eng.* files with individual tessdata
-components from tessdata/eng.traineddata.</simpara>
-</refsect1>
-<refsect1 id="_options">
-<title>OPTIONS</title>
-<simpara><emphasis role="strong">-e</emphasis> <emphasis>.traineddata</emphasis> <emphasis>FILE</emphasis>&#8230;:
-    Extracts the specified components from the .traineddata file</simpara>
-<simpara><emphasis role="strong">-o</emphasis> <emphasis>.traineddata</emphasis> <emphasis>FILE</emphasis>&#8230;:
-    Overwrites the specified components of the .traineddata file
-    with those provided on the comand line.</simpara>
-<simpara><emphasis role="strong">-u</emphasis> <emphasis>.traineddata</emphasis> <emphasis>PATHPREFIX</emphasis>
-    Unpacks the .traineddata using the provided prefix.</simpara>
-</refsect1>
-<refsect1 id="_caveats">
-<title>CAVEATS</title>
-<simpara><emphasis>Prefix</emphasis> refers to the full file prefix, including period (.)</simpara>
-</refsect1>
-<refsect1 id="_components">
-<title>COMPONENTS</title>
-<simpara>The components in a Tesseract lang.traineddata file as of
-Tesseract 3.02 are briefly described below; For more information on
-many of these files, see
-<ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-<variablelist>
-<varlistentry>
-<term>
-lang.config
-</term>
-<listitem>
-<simpara>
-  (Optional) Language-specific overrides to default config variables.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.unicharset
-</term>
-<listitem>
-<simpara>
-  (Required) The list of symbols that Tesseract recognizes, with properties.
-  See unicharset(5).
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.unicharambigs
-</term>
-<listitem>
-<simpara>
-  (Optional) This file contains information on pairs of recognized symbols
-  which are often confused.  For example, <emphasis>rn</emphasis> and <emphasis>m</emphasis>.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.inttemp
-</term>
-<listitem>
-<simpara>
-  (Required) Character shape templates for each unichar.  Produced by
-  mftraining(1).
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.pffmtable
-</term>
-<listitem>
-<simpara>
-  (Required) The number of features expected for each unichar.
-  Produced by mftraining(1) from <emphasis role="strong">.tr</emphasis> files.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.normproto
-</term>
-<listitem>
-<simpara>
-  (Required) Character normalization prototypes generated by cntraining(1)
-  from <emphasis role="strong">.tr</emphasis> files.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.punc-dawg
-</term>
-<listitem>
-<simpara>
-  (Optional) A dawg made from punctuation patterns found around words.
-  The "word" part is replaced by a single space.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.word-dawg
-</term>
-<listitem>
-<simpara>
-  (Optional) A dawg made from dictionary words from the language.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.number-dawg
-</term>
-<listitem>
-<simpara>
-  (Optional) A dawg made from tokens which originally contained digits.
-  Each digit is replaced by a space character.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.freq-dawg
-</term>
-<listitem>
-<simpara>
-  (Optional) A dawg made from the most frequent words which would have
-  gone into word-dawg.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.fixed-length-dawgs
-</term>
-<listitem>
-<simpara>
-  (Optional) Several dawgs of different fixed lengths&#8201;&#8212;&#8201;useful for
-  languages like Chinese.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.cube-unicharset
-</term>
-<listitem>
-<simpara>
-  (Optional) A unicharset for cube, if cube was trained on a different set
-  of symbols.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.cube-word-dawg
-</term>
-<listitem>
-<simpara>
-  (Optional) A word dawg for cube&#8217;s alternate unicharset.  Not needed if Cube
-  was trained with Tesseract&#8217;s unicharset.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.shapetable
-</term>
-<listitem>
-<simpara>
-  (Optional) When present, a shapetable is an extra layer between the character
-  classifier and the word recognizer that allows the character classifier to
-  return a collection of unichar ids and fonts instead of a single unichar-id
-  and font.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.bigram-dawg
-</term>
-<listitem>
-<simpara>
-  (Optional) A dawg of word bigrams where the words are separated by a space
-  and each digit is replaced by a <emphasis>?</emphasis>.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.unambig-dawg
-</term>
-<listitem>
-<simpara>
-  (Optional) TODO: Describe.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-lang.params-training-model
-</term>
-<listitem>
-<simpara>
-  (Optional) TODO: Describe.
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_history">
-<title>HISTORY</title>
-<simpara>combine_tessdata(1) first appeared in version 3.00 of Tesseract</simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5),
-unicharambigs(5)</simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (C) 2009, Google Inc.
-Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>COMBINE_TESSDATA(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>combine_tessdata</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>combine_tessdata</refname>
+    <refpurpose>combine/extract/overwrite Tesseract data</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara><emphasis role="strong">combine_tessdata</emphasis> [<emphasis>OPTION</emphasis>] <emphasis>FILE</emphasis>&#8230;</simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>combine_tessdata(1) is the main program to combine/extract/overwrite
+tessdata components in [lang].traineddata files.</simpara>
+<simpara>To combine all the individual tessdata components (unicharset, DAWGs,
+classifier templates, ambiguities, language configs) located at, say,
+/home/$USER/temp/eng.* run:</simpara>
+<literallayout class="monospaced">combine_tessdata /home/$USER/temp/eng.</literallayout>
+<simpara>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</simpara>
+<simpara>Specify option -e if you would like to extract individual components
+from a combined traineddata file. For example, to extract language config
+file and the unicharset from tessdata/eng.traineddata run:</simpara>
+<literallayout class="monospaced">combine_tessdata -e tessdata/eng.traineddata \
+  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</literallayout>
+<simpara>The desired config file and unicharset will be written to
+/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</simpara>
+<simpara>Specify option -o to overwrite individual components of the given
+[lang].traineddata file. For example, to overwrite language config
+and unichar ambiguities files in tessdata/eng.traineddata use:</simpara>
+<literallayout class="monospaced">combine_tessdata -o tessdata/eng.traineddata \
+  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</literallayout>
+<simpara>As a result, tessdata/eng.traineddata will contain the new language config
+and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</simpara>
+<simpara>Note: the file names of the files to extract to and to overwrite from should
+have the appropriate file suffixes (extensions) indicating their tessdata
+component type (.unicharset for the unicharset, .unicharambigs for unichar
+ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.</simpara>
+<simpara>Specify option -u to unpack all the components to the specified path:</simpara>
+<literallayout class="monospaced">combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</literallayout>
+<simpara>This will create  /home/$USER/temp/eng.* files with individual tessdata
+components from tessdata/eng.traineddata.</simpara>
+</refsect1>
+<refsect1 id="_options">
+<title>OPTIONS</title>
+<simpara><emphasis role="strong">-e</emphasis> <emphasis>.traineddata</emphasis> <emphasis>FILE</emphasis>&#8230;:
+    Extracts the specified components from the .traineddata file</simpara>
+<simpara><emphasis role="strong">-o</emphasis> <emphasis>.traineddata</emphasis> <emphasis>FILE</emphasis>&#8230;:
+    Overwrites the specified components of the .traineddata file
+    with those provided on the comand line.</simpara>
+<simpara><emphasis role="strong">-u</emphasis> <emphasis>.traineddata</emphasis> <emphasis>PATHPREFIX</emphasis>
+    Unpacks the .traineddata using the provided prefix.</simpara>
+</refsect1>
+<refsect1 id="_caveats">
+<title>CAVEATS</title>
+<simpara><emphasis>Prefix</emphasis> refers to the full file prefix, including period (.)</simpara>
+</refsect1>
+<refsect1 id="_components">
+<title>COMPONENTS</title>
+<simpara>The components in a Tesseract lang.traineddata file as of
+Tesseract 3.02 are briefly described below; For more information on
+many of these files, see
+<ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+<variablelist>
+<varlistentry>
+<term>
+lang.config
+</term>
+<listitem>
+<simpara>
+  (Optional) Language-specific overrides to default config variables.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.unicharset
+</term>
+<listitem>
+<simpara>
+  (Required) The list of symbols that Tesseract recognizes, with properties.
+  See unicharset(5).
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.unicharambigs
+</term>
+<listitem>
+<simpara>
+  (Optional) This file contains information on pairs of recognized symbols
+  which are often confused.  For example, <emphasis>rn</emphasis> and <emphasis>m</emphasis>.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.inttemp
+</term>
+<listitem>
+<simpara>
+  (Required) Character shape templates for each unichar.  Produced by
+  mftraining(1).
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.pffmtable
+</term>
+<listitem>
+<simpara>
+  (Required) The number of features expected for each unichar.
+  Produced by mftraining(1) from <emphasis role="strong">.tr</emphasis> files.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.normproto
+</term>
+<listitem>
+<simpara>
+  (Required) Character normalization prototypes generated by cntraining(1)
+  from <emphasis role="strong">.tr</emphasis> files.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.punc-dawg
+</term>
+<listitem>
+<simpara>
+  (Optional) A dawg made from punctuation patterns found around words.
+  The "word" part is replaced by a single space.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.word-dawg
+</term>
+<listitem>
+<simpara>
+  (Optional) A dawg made from dictionary words from the language.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.number-dawg
+</term>
+<listitem>
+<simpara>
+  (Optional) A dawg made from tokens which originally contained digits.
+  Each digit is replaced by a space character.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.freq-dawg
+</term>
+<listitem>
+<simpara>
+  (Optional) A dawg made from the most frequent words which would have
+  gone into word-dawg.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.fixed-length-dawgs
+</term>
+<listitem>
+<simpara>
+  (Optional) Several dawgs of different fixed lengths&#8201;&#8212;&#8201;useful for
+  languages like Chinese.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.cube-unicharset
+</term>
+<listitem>
+<simpara>
+  (Optional) A unicharset for cube, if cube was trained on a different set
+  of symbols.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.cube-word-dawg
+</term>
+<listitem>
+<simpara>
+  (Optional) A word dawg for cube&#8217;s alternate unicharset.  Not needed if Cube
+  was trained with Tesseract&#8217;s unicharset.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.shapetable
+</term>
+<listitem>
+<simpara>
+  (Optional) When present, a shapetable is an extra layer between the character
+  classifier and the word recognizer that allows the character classifier to
+  return a collection of unichar ids and fonts instead of a single unichar-id
+  and font.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.bigram-dawg
+</term>
+<listitem>
+<simpara>
+  (Optional) A dawg of word bigrams where the words are separated by a space
+  and each digit is replaced by a <emphasis>?</emphasis>.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.unambig-dawg
+</term>
+<listitem>
+<simpara>
+  (Optional) TODO: Describe.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+lang.params-training-model
+</term>
+<listitem>
+<simpara>
+  (Optional) TODO: Describe.
+</simpara>
+</listitem>
+</varlistentry>
+</variablelist>
+</refsect1>
+<refsect1 id="_history">
+<title>HISTORY</title>
+<simpara>combine_tessdata(1) first appeared in version 3.00 of Tesseract</simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5),
+unicharambigs(5)</simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (C) 2009, Google Inc.
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/dawg2wordlist.1.html b/doc/dawg2wordlist.1.html
index b700fe186d..0b2645dfb7 100644
--- a/doc/dawg2wordlist.1.html
+++ b/doc/dawg2wordlist.1.html
@@ -1,802 +1,802 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>DAWG2WORDLIST(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-DAWG2WORDLIST(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>dawg2wordlist -
-   convert a Tesseract DAWG to a wordlist
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><strong>dawg2wordlist</strong> <em>UNICHARSET</em> <em>DAWG</em> <em>WORDLIST</em></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
-Graph (DAWG) to a list of words using a unicharset as key.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_options">OPTIONS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><em>UNICHARSET</em>
-        The unicharset of the language. This is the unicharset
-        generated by mftraining(1).</p></div>
-<div class="paragraph"><p><em>DAWG</em>
-        The input DAWG, created by wordlist2dawg(1)</p></div>
-<div class="paragraph"><p><em>WORDLIST</em>
-        Plain text (output) file in UTF-8, one word per line</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
-combine_tessdata(1)</p></div>
-<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
-Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-12 23:52:09 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>DAWG2WORDLIST(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+DAWG2WORDLIST(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>dawg2wordlist -
+   convert a Tesseract DAWG to a wordlist
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><strong>dawg2wordlist</strong> <em>UNICHARSET</em> <em>DAWG</em> <em>WORDLIST</em></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
+Graph (DAWG) to a list of words using a unicharset as key.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_options">OPTIONS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><em>UNICHARSET</em>
+        The unicharset of the language. This is the unicharset
+        generated by mftraining(1).</p></div>
+<div class="paragraph"><p><em>DAWG</em>
+        The input DAWG, created by wordlist2dawg(1)</p></div>
+<div class="paragraph"><p><em>WORDLIST</em>
+        Plain text (output) file in UTF-8, one word per line</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
+combine_tessdata(1)</p></div>
+<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
+Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-12 23:52:09 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/dawg2wordlist.1.xml b/doc/dawg2wordlist.1.xml
index c73113191c..ee960ad9fc 100644
--- a/doc/dawg2wordlist.1.xml
+++ b/doc/dawg2wordlist.1.xml
@@ -1,53 +1,53 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>DAWG2WORDLIST(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>dawg2wordlist</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>dawg2wordlist</refname>
-    <refpurpose>convert a Tesseract DAWG to a wordlist</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara><emphasis role="strong">dawg2wordlist</emphasis> <emphasis>UNICHARSET</emphasis> <emphasis>DAWG</emphasis> <emphasis>WORDLIST</emphasis></simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
-Graph (DAWG) to a list of words using a unicharset as key.</simpara>
-</refsect1>
-<refsect1 id="_options">
-<title>OPTIONS</title>
-<simpara><emphasis>UNICHARSET</emphasis>
-        The unicharset of the language. This is the unicharset
-        generated by mftraining(1).</simpara>
-<simpara><emphasis>DAWG</emphasis>
-        The input DAWG, created by wordlist2dawg(1)</simpara>
-<simpara><emphasis>WORDLIST</emphasis>
-        Plain text (output) file in UTF-8, one word per line</simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
-combine_tessdata(1)</simpara>
-<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (C) 2012 Google, Inc.
-Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>DAWG2WORDLIST(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>dawg2wordlist</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>dawg2wordlist</refname>
+    <refpurpose>convert a Tesseract DAWG to a wordlist</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara><emphasis role="strong">dawg2wordlist</emphasis> <emphasis>UNICHARSET</emphasis> <emphasis>DAWG</emphasis> <emphasis>WORDLIST</emphasis></simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
+Graph (DAWG) to a list of words using a unicharset as key.</simpara>
+</refsect1>
+<refsect1 id="_options">
+<title>OPTIONS</title>
+<simpara><emphasis>UNICHARSET</emphasis>
+        The unicharset of the language. This is the unicharset
+        generated by mftraining(1).</simpara>
+<simpara><emphasis>DAWG</emphasis>
+        The input DAWG, created by wordlist2dawg(1)</simpara>
+<simpara><emphasis>WORDLIST</emphasis>
+        Plain text (output) file in UTF-8, one word per line</simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
+combine_tessdata(1)</simpara>
+<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (C) 2012 Google, Inc.
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/mftraining.1.asc b/doc/mftraining.1.asc
index 85e1263ade..43fe533a16 100644
--- a/doc/mftraining.1.asc
+++ b/doc/mftraining.1.asc
@@ -24,12 +24,12 @@ OPTIONS
 
 -F 'font_properties_file'::
 	(Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1:
-	
+
 	*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*
 
 -X 'xheights_file'::
 	(Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
-	
+
 	*font_name* *xheight*
 
 -D 'dir'::
diff --git a/doc/mftraining.1.html b/doc/mftraining.1.html
index 4abdfd6a6c..41a3804457 100644
--- a/doc/mftraining.1.html
+++ b/doc/mftraining.1.html
@@ -1,847 +1,847 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>MFTRAINING(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-MFTRAINING(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>mftraining -
-   feature training for Tesseract
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>mftraining -U <em>unicharset</em> -O <em>lang.unicharset</em> <em>FILE</em>&#8230;</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>mftraining takes a list of .tr files, from which it generates the
-files <strong>inttemp</strong> (the shape prototypes), <strong>shapetable</strong>, and <strong>pffmtable</strong>
-(the number of expected features for each character).  (A fourth file
-called Microfeat is also written by this program, but it is not used.)</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_options">OPTIONS</h2>
-<div class="sectionbody">
-<div class="dlist"><dl>
-<dt class="hdlist1">
--U <em>FILE</em>
-</dt>
-<dd>
-<p>
-        (Input) The unicharset generated by unicharset_extractor(1)
-</p>
-</dd>
-<dt class="hdlist1">
--F <em>font_properties_file</em>
-</dt>
-<dd>
-<p>
-        (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1:
-</p>
-<div class="literalblock">
-<div class="content">
-<pre><code>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</code></pre>
-</div></div>
-</dd>
-<dt class="hdlist1">
--X <em>xheights_file</em>
-</dt>
-<dd>
-<p>
-        (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
-</p>
-<div class="literalblock">
-<div class="content">
-<pre><code>*font_name* *xheight*</code></pre>
-</div></div>
-</dd>
-<dt class="hdlist1">
--D <em>dir</em>
-</dt>
-<dd>
-<p>
-        Directory to write output files to.
-</p>
-</dd>
-<dt class="hdlist1">
--O <em>FILE</em>
-</dt>
-<dd>
-<p>
-        (Output) The output unicharset that will be given to combine_tessdata(1)
-</p>
-</dd>
-</dl></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
-shapeclustering(1), unicharset(5)</p></div>
-<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Copyright (C) Hewlett-Packard Company, 1988
-Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-12 23:52:19 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>MFTRAINING(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+MFTRAINING(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>mftraining -
+   feature training for Tesseract
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>mftraining -U <em>unicharset</em> -O <em>lang.unicharset</em> <em>FILE</em>&#8230;</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>mftraining takes a list of .tr files, from which it generates the
+files <strong>inttemp</strong> (the shape prototypes), <strong>shapetable</strong>, and <strong>pffmtable</strong>
+(the number of expected features for each character).  (A fourth file
+called Microfeat is also written by this program, but it is not used.)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_options">OPTIONS</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+-U <em>FILE</em>
+</dt>
+<dd>
+<p>
+        (Input) The unicharset generated by unicharset_extractor(1)
+</p>
+</dd>
+<dt class="hdlist1">
+-F <em>font_properties_file</em>
+</dt>
+<dd>
+<p>
+        (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1:
+</p>
+<div class="literalblock">
+<div class="content">
+<pre><code>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</code></pre>
+</div></div>
+</dd>
+<dt class="hdlist1">
+-X <em>xheights_file</em>
+</dt>
+<dd>
+<p>
+        (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
+</p>
+<div class="literalblock">
+<div class="content">
+<pre><code>*font_name* *xheight*</code></pre>
+</div></div>
+</dd>
+<dt class="hdlist1">
+-D <em>dir</em>
+</dt>
+<dd>
+<p>
+        Directory to write output files to.
+</p>
+</dd>
+<dt class="hdlist1">
+-O <em>FILE</em>
+</dt>
+<dd>
+<p>
+        (Output) The output unicharset that will be given to combine_tessdata(1)
+</p>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
+shapeclustering(1), unicharset(5)</p></div>
+<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (C) Hewlett-Packard Company, 1988
+Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-12 23:52:19 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/mftraining.1.xml b/doc/mftraining.1.xml
index 239178a5c1..10b3c6d2e5 100644
--- a/doc/mftraining.1.xml
+++ b/doc/mftraining.1.xml
@@ -1,102 +1,102 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>MFTRAINING(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>mftraining</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>mftraining</refname>
-    <refpurpose>feature training for Tesseract</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara>mftraining -U <emphasis>unicharset</emphasis> -O <emphasis>lang.unicharset</emphasis> <emphasis>FILE</emphasis>&#8230;</simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>mftraining takes a list of .tr files, from which it generates the
-files <emphasis role="strong">inttemp</emphasis> (the shape prototypes), <emphasis role="strong">shapetable</emphasis>, and <emphasis role="strong">pffmtable</emphasis>
-(the number of expected features for each character).  (A fourth file
-called Microfeat is also written by this program, but it is not used.)</simpara>
-</refsect1>
-<refsect1 id="_options">
-<title>OPTIONS</title>
-<variablelist>
-<varlistentry>
-<term>
--U <emphasis>FILE</emphasis>
-</term>
-<listitem>
-<simpara>
-        (Input) The unicharset generated by unicharset_extractor(1)
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
--F <emphasis>font_properties_file</emphasis>
-</term>
-<listitem>
-<simpara>
-        (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1:
-</simpara>
-<literallayout class="monospaced">*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</literallayout>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
--X <emphasis>xheights_file</emphasis>
-</term>
-<listitem>
-<simpara>
-        (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
-</simpara>
-<literallayout class="monospaced">*font_name* *xheight*</literallayout>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
--D <emphasis>dir</emphasis>
-</term>
-<listitem>
-<simpara>
-        Directory to write output files to.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
--O <emphasis>FILE</emphasis>
-</term>
-<listitem>
-<simpara>
-        (Output) The output unicharset that will be given to combine_tessdata(1)
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
-shapeclustering(1), unicharset(5)</simpara>
-<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (C) Hewlett-Packard Company, 1988
-Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>MFTRAINING(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>mftraining</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>mftraining</refname>
+    <refpurpose>feature training for Tesseract</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara>mftraining -U <emphasis>unicharset</emphasis> -O <emphasis>lang.unicharset</emphasis> <emphasis>FILE</emphasis>&#8230;</simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>mftraining takes a list of .tr files, from which it generates the
+files <emphasis role="strong">inttemp</emphasis> (the shape prototypes), <emphasis role="strong">shapetable</emphasis>, and <emphasis role="strong">pffmtable</emphasis>
+(the number of expected features for each character).  (A fourth file
+called Microfeat is also written by this program, but it is not used.)</simpara>
+</refsect1>
+<refsect1 id="_options">
+<title>OPTIONS</title>
+<variablelist>
+<varlistentry>
+<term>
+-U <emphasis>FILE</emphasis>
+</term>
+<listitem>
+<simpara>
+        (Input) The unicharset generated by unicharset_extractor(1)
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+-F <emphasis>font_properties_file</emphasis>
+</term>
+<listitem>
+<simpara>
+        (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1:
+</simpara>
+<literallayout class="monospaced">*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</literallayout>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+-X <emphasis>xheights_file</emphasis>
+</term>
+<listitem>
+<simpara>
+        (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
+</simpara>
+<literallayout class="monospaced">*font_name* *xheight*</literallayout>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+-D <emphasis>dir</emphasis>
+</term>
+<listitem>
+<simpara>
+        Directory to write output files to.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+-O <emphasis>FILE</emphasis>
+</term>
+<listitem>
+<simpara>
+        (Output) The output unicharset that will be given to combine_tessdata(1)
+</simpara>
+</listitem>
+</varlistentry>
+</variablelist>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
+shapeclustering(1), unicharset(5)</simpara>
+<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (C) Hewlett-Packard Company, 1988
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/shapeclustering.1.asc b/doc/shapeclustering.1.asc
index 81ca0dbc09..0a1bfb035b 100644
--- a/doc/shapeclustering.1.asc
+++ b/doc/shapeclustering.1.asc
@@ -35,7 +35,7 @@ OPTIONS
 
 -X 'xheights_file'::
 	(Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
-	
+
 	'font_name' 'xheight'
 
 -O 'FILE'::
diff --git a/doc/shapeclustering.1.html b/doc/shapeclustering.1.html
index 845d49a815..5fca944fc8 100644
--- a/doc/shapeclustering.1.html
+++ b/doc/shapeclustering.1.html
@@ -1,850 +1,850 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>SHAPECLUSTERING(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-SHAPECLUSTERING(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>shapeclustering -
-   shape clustering training for Tesseract
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>shapeclustering -D <em>output_dir</em>
-    -U <em>unicharset</em> -O <em>mfunicharset</em>
-    -F <em>font_props</em> -X <em>xheights</em>
-    <em>FILE</em>&#8230;</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>shapeclustering(1) takes extracted feature .tr files (generated by
-tesseract(1) run in a special mode from box files) and produces a
-file <strong>shapetable</strong> and an enhanced unicharset.  This program is still
-experimental, and is not required (yet) for training Tesseract.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_options">OPTIONS</h2>
-<div class="sectionbody">
-<div class="dlist"><dl>
-<dt class="hdlist1">
--U <em>FILE</em>
-</dt>
-<dd>
-<p>
-        The unicharset generated by unicharset_extractor(1).
-</p>
-</dd>
-<dt class="hdlist1">
--D <em>dir</em>
-</dt>
-<dd>
-<p>
-        Directory to write output files to.
-</p>
-</dd>
-<dt class="hdlist1">
--F <em>font_properties_file</em>
-</dt>
-<dd>
-<p>
-        (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1:
-</p>
-<div class="literalblock">
-<div class="content">
-<pre><code>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</code></pre>
-</div></div>
-</dd>
-<dt class="hdlist1">
--X <em>xheights_file</em>
-</dt>
-<dd>
-<p>
-        (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
-</p>
-<div class="literalblock">
-<div class="content">
-<pre><code>'font_name' 'xheight'</code></pre>
-</div></div>
-</dd>
-<dt class="hdlist1">
--O <em>FILE</em>
-</dt>
-<dd>
-<p>
-        The output unicharset that will be given to combine_tessdata(1).
-</p>
-</dd>
-</dl></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
-unicharset(5)</p></div>
-<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Copyright (C) Google, 2011
-Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-12 23:52:24 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>SHAPECLUSTERING(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+SHAPECLUSTERING(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>shapeclustering -
+   shape clustering training for Tesseract
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>shapeclustering -D <em>output_dir</em>
+    -U <em>unicharset</em> -O <em>mfunicharset</em>
+    -F <em>font_props</em> -X <em>xheights</em>
+    <em>FILE</em>&#8230;</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>shapeclustering(1) takes extracted feature .tr files (generated by
+tesseract(1) run in a special mode from box files) and produces a
+file <strong>shapetable</strong> and an enhanced unicharset.  This program is still
+experimental, and is not required (yet) for training Tesseract.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_options">OPTIONS</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+-U <em>FILE</em>
+</dt>
+<dd>
+<p>
+        The unicharset generated by unicharset_extractor(1).
+</p>
+</dd>
+<dt class="hdlist1">
+-D <em>dir</em>
+</dt>
+<dd>
+<p>
+        Directory to write output files to.
+</p>
+</dd>
+<dt class="hdlist1">
+-F <em>font_properties_file</em>
+</dt>
+<dd>
+<p>
+        (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1:
+</p>
+<div class="literalblock">
+<div class="content">
+<pre><code>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</code></pre>
+</div></div>
+</dd>
+<dt class="hdlist1">
+-X <em>xheights_file</em>
+</dt>
+<dd>
+<p>
+        (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
+</p>
+<div class="literalblock">
+<div class="content">
+<pre><code>'font_name' 'xheight'</code></pre>
+</div></div>
+</dd>
+<dt class="hdlist1">
+-O <em>FILE</em>
+</dt>
+<dd>
+<p>
+        The output unicharset that will be given to combine_tessdata(1).
+</p>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
+unicharset(5)</p></div>
+<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (C) Google, 2011
+Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-12 23:52:24 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/shapeclustering.1.xml b/doc/shapeclustering.1.xml
index d02bcf8db9..933789ad3c 100644
--- a/doc/shapeclustering.1.xml
+++ b/doc/shapeclustering.1.xml
@@ -1,105 +1,105 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>SHAPECLUSTERING(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>shapeclustering</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>shapeclustering</refname>
-    <refpurpose>shape clustering training for Tesseract</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara>shapeclustering -D <emphasis>output_dir</emphasis>
-    -U <emphasis>unicharset</emphasis> -O <emphasis>mfunicharset</emphasis>
-    -F <emphasis>font_props</emphasis> -X <emphasis>xheights</emphasis>
-    <emphasis>FILE</emphasis>&#8230;</simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>shapeclustering(1) takes extracted feature .tr files (generated by
-tesseract(1) run in a special mode from box files) and produces a
-file <emphasis role="strong">shapetable</emphasis> and an enhanced unicharset.  This program is still
-experimental, and is not required (yet) for training Tesseract.</simpara>
-</refsect1>
-<refsect1 id="_options">
-<title>OPTIONS</title>
-<variablelist>
-<varlistentry>
-<term>
--U <emphasis>FILE</emphasis>
-</term>
-<listitem>
-<simpara>
-        The unicharset generated by unicharset_extractor(1).
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
--D <emphasis>dir</emphasis>
-</term>
-<listitem>
-<simpara>
-        Directory to write output files to.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
--F <emphasis>font_properties_file</emphasis>
-</term>
-<listitem>
-<simpara>
-        (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1:
-</simpara>
-<literallayout class="monospaced">'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</literallayout>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
--X <emphasis>xheights_file</emphasis>
-</term>
-<listitem>
-<simpara>
-        (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
-</simpara>
-<literallayout class="monospaced">'font_name' 'xheight'</literallayout>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
--O <emphasis>FILE</emphasis>
-</term>
-<listitem>
-<simpara>
-        The output unicharset that will be given to combine_tessdata(1).
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
-unicharset(5)</simpara>
-<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (C) Google, 2011
-Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>SHAPECLUSTERING(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>shapeclustering</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>shapeclustering</refname>
+    <refpurpose>shape clustering training for Tesseract</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara>shapeclustering -D <emphasis>output_dir</emphasis>
+    -U <emphasis>unicharset</emphasis> -O <emphasis>mfunicharset</emphasis>
+    -F <emphasis>font_props</emphasis> -X <emphasis>xheights</emphasis>
+    <emphasis>FILE</emphasis>&#8230;</simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>shapeclustering(1) takes extracted feature .tr files (generated by
+tesseract(1) run in a special mode from box files) and produces a
+file <emphasis role="strong">shapetable</emphasis> and an enhanced unicharset.  This program is still
+experimental, and is not required (yet) for training Tesseract.</simpara>
+</refsect1>
+<refsect1 id="_options">
+<title>OPTIONS</title>
+<variablelist>
+<varlistentry>
+<term>
+-U <emphasis>FILE</emphasis>
+</term>
+<listitem>
+<simpara>
+        The unicharset generated by unicharset_extractor(1).
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+-D <emphasis>dir</emphasis>
+</term>
+<listitem>
+<simpara>
+        Directory to write output files to.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+-F <emphasis>font_properties_file</emphasis>
+</term>
+<listitem>
+<simpara>
+        (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1:
+</simpara>
+<literallayout class="monospaced">'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</literallayout>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+-X <emphasis>xheights_file</emphasis>
+</term>
+<listitem>
+<simpara>
+        (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
+</simpara>
+<literallayout class="monospaced">'font_name' 'xheight'</literallayout>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+-O <emphasis>FILE</emphasis>
+</term>
+<listitem>
+<simpara>
+        The output unicharset that will be given to combine_tessdata(1).
+</simpara>
+</listitem>
+</varlistentry>
+</variablelist>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
+unicharset(5)</simpara>
+<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (C) Google, 2011
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/tesseract.1 b/doc/tesseract.1
index 95128fec99..89107f0312 100644
--- a/doc/tesseract.1
+++ b/doc/tesseract.1
@@ -84,7 +84,7 @@ Set value for control parameter\&. Multiple \-c arguments are allowed\&.
 The language to use\&. If none is specified, English is assumed\&. Multiple languages may be specified, separated by plus characters\&. Tesseract uses 3\-character ISO 639\-2 language codes\&. (See LANGUAGES)
 .RE
 .PP
-\fI\-psm N\fR
+\fI\--psm N\fR
 .RS 4
 Set Tesseract to only run a subset of layout analysis and assume a certain form of image\&. The options for
 \fBN\fR
@@ -139,7 +139,7 @@ pdf \- Output in pdf instead of a text file\&.
 .RE
 .RE
 .sp
-\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\-psm N\fR must occur before any \fIconfigfile\fR\&.
+\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\--psm N\fR must occur before any \fIconfigfile\fR\&.
 .SH "SINGLE OPTIONS"
 .PP
 \fI\-v\fR
diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc
index d6f34d5060..312aae07f6 100644
--- a/doc/tesseract.1.asc
+++ b/doc/tesseract.1.asc
@@ -54,7 +54,7 @@ OPTIONS
 	Multiple languages may be specified, separated by plus characters.
 	Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
 
-'-psm N'::
+'--psm N'::
 	Set Tesseract to only run a subset of layout analysis and assume
 	a certain form of image. The options for *N* are:
 
@@ -67,7 +67,7 @@ OPTIONS
 	6 = Assume a single uniform block of text.
 	7 = Treat the image as a single text line.
 	8 = Treat the image as a single word.
-	9 = Treat the image as a single word in a circle. 
+	9 = Treat the image as a single word in a circle.
 	10 = Treat the image as a single character.
 
 'configfile'::
@@ -78,7 +78,7 @@ OPTIONS
   * hocr - Output in hOCR format instead of as a text file.
   * pdf  - Output in pdf instead of a text file.
 
-*Nota Bene:*   The options '-l lang' and '-psm N' must occur
+*Nota Bene:*   The options '-l lang' and '--psm N' must occur
 before any 'configfile'.
 
 
@@ -264,11 +264,11 @@ on read_pattern_list().
 
 HISTORY
 -------
-The engine was developed at Hewlett Packard Laboratories Bristol and at 
-Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more 
-changes made in 1996 to port to Windows, and some C\+\+izing in 1998. A 
-lot of the code was written in C, and then some more was written in C\+\+. 
-The C\++ code makes heavy use of a list system using macros. This predates
+The engine was developed at Hewlett Packard Laboratories Bristol and at
+Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more
+changes made in 1996 to port to Windows, and some C\+\+izing in 1998. A
+lot of the code was written in C, and then some more was written in C\+\+.
+The C\+\+ code makes heavy use of a list system using macros. This predates
 stl, was portable before stl, and is more efficient than stl lists, but has
 the big negative that if you do get a segmentation violation, it is hard to
 debug.
@@ -276,18 +276,18 @@ debug.
 Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
 to train Tesseract.
 
-Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy. 
+Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy.
 See <https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf>. With Tesseract 2.00,
-scripts are now included to allow anyone to reproduce some of these tests. 
-See <https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract> for more 
+scripts are now included to allow anyone to reproduce some of these tests.
+See <https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract> for more
 details.
 
-Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, 
-and Korean. It also introduces a new, single-file based system of managing 
+Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
+and Korean. It also introduces a new, single-file based system of managing
 language data.
 
-Tesseract 3.02 adds BiDirectional text support, the ability to recognize 
-multiple languages in a single image, and improved layout analysis. 
+Tesseract 3.02 adds BiDirectional text support, the ability to recognize
+multiple languages in a single image, and improved layout analysis.
 
 For further details, see the file ReleaseNotes included with the distribution.
 
diff --git a/doc/tesseract.1.html b/doc/tesseract.1.html
index 90c5dae78c..d0addae65b 100644
--- a/doc/tesseract.1.html
+++ b/doc/tesseract.1.html
@@ -1,1163 +1,1163 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>TESSERACT(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-TESSERACT(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>tesseract -
-   command-line OCR engine
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><strong>tesseract</strong> <em>imagename</em>|<em>stdin</em> <em>outputbase</em>|<em>stdout</em> [options&#8230;] [configfile&#8230;]</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1) is a commercial quality OCR engine originally developed at HP
-between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by
-UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed
-at Google since then.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_in_out_arguments">IN/OUT ARGUMENTS</h2>
-<div class="sectionbody">
-<div class="dlist"><dl>
-<dt class="hdlist1">
-<em>imagename</em>
-</dt>
-<dd>
-<p>
-        The name of the input image.  Most image file formats (anything
-        readable by Leptonica) are supported.
-</p>
-</dd>
-<dt class="hdlist1">
-<em>stdin</em>
-</dt>
-<dd>
-<p>
-        Instruction to read data from standard input
-</p>
-</dd>
-<dt class="hdlist1">
-<em>outputbase</em>
-</dt>
-<dd>
-<p>
-        The basename of the output file (to which the appropriate extension
-        will be appended).  By default the output will be named <em>outbase.txt</em>.
-</p>
-</dd>
-<dt class="hdlist1">
-<em>stdout</em>
-</dt>
-<dd>
-<p>
-        Instruction to sent output data to standard output
-</p>
-</dd>
-</dl></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_options">OPTIONS</h2>
-<div class="sectionbody">
-<div class="dlist"><dl>
-<dt class="hdlist1">
-<em>--tessdata-dir /path</em>
-</dt>
-<dd>
-<p>
-        Specify the location of tessdata path
-</p>
-</dd>
-<dt class="hdlist1">
-<em>--user-words /path/to/file</em>
-</dt>
-<dd>
-<p>
-        Specify the location of user words file
-</p>
-</dd>
-<dt class="hdlist1">
-<em>--user-patterns /path/to/file specify</em>
-</dt>
-<dd>
-<p>
-        The location of user patterns file
-</p>
-</dd>
-<dt class="hdlist1">
-<em>-c configvar=value</em>
-</dt>
-<dd>
-<p>
-        Set value for control parameter. Multiple -c arguments are allowed.
-</p>
-</dd>
-<dt class="hdlist1">
-<em>-l lang</em>
-</dt>
-<dd>
-<p>
-        The language to use. If none is specified, English is assumed.
-        Multiple languages may be specified, separated by plus characters.
-        Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
-</p>
-</dd>
-<dt class="hdlist1">
-<em>-psm N</em>
-</dt>
-<dd>
-<p>
-        Set Tesseract to only run a subset of layout analysis and assume
-        a certain form of image. The options for <strong>N</strong> are:
-</p>
-<div class="literalblock">
-<div class="content">
-<pre><code>0 = Orientation and script detection (OSD) only.
-1 = Automatic page segmentation with OSD.
-2 = Automatic page segmentation, but no OSD, or OCR.
-3 = Fully automatic page segmentation, but no OSD. (Default)
-4 = Assume a single column of text of variable sizes.
-5 = Assume a single uniform block of vertically aligned text.
-6 = Assume a single uniform block of text.
-7 = Treat the image as a single text line.
-8 = Treat the image as a single word.
-9 = Treat the image as a single word in a circle.
-10 = Treat the image as a single character.</code></pre>
-</div></div>
-</dd>
-<dt class="hdlist1">
-<em>configfile</em>
-</dt>
-<dd>
-<p>
-        The name of a config to use. A config is a plaintext file which
-        contains a list of variables and their values, one per line, with a
-        space separating variable from value.  Interesting config files
-        include:<br />
-</p>
-<div class="ulist"><ul>
-<li>
-<p>
-hocr - Output in hOCR format instead of as a text file.
-</p>
-</li>
-<li>
-<p>
-pdf  - Output in pdf instead of a text file.
-</p>
-</li>
-</ul></div>
-</dd>
-</dl></div>
-<div class="paragraph"><p><strong>Nota Bene:</strong>   The options <em>-l lang</em> and <em>-psm N</em> must occur
-before any <em>configfile</em>.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_single_options">SINGLE OPTIONS</h2>
-<div class="sectionbody">
-<div class="dlist"><dl>
-<dt class="hdlist1">
-<em>-v</em>
-</dt>
-<dd>
-<p>
-        Returns the current version of the tesseract(1) executable.
-</p>
-</dd>
-<dt class="hdlist1">
-<em>--list-langs</em>
-</dt>
-<dd>
-<p>
-        list available languages for tesseract engine. Can be used with --tessdata-dir.
-</p>
-</dd>
-<dt class="hdlist1">
-<em>--print-parameters</em>
-</dt>
-<dd>
-<p>
-        print tesseract parameters to the stdout.
-</p>
-</dd>
-</dl></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_languages">LANGUAGES</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>There are currently language packs available for the following languages
-(in <a href="https://github.com/tesseract-ocr/tessdata">https://github.com/tesseract-ocr/tessdata</a>):</p></div>
-<div class="paragraph"><p><strong>afr</strong> (Afrikaans)
-<strong>amh</strong> (Amharic)
-<strong>ara</strong> (Arabic)
-<strong>asm</strong> (Assamese)
-<strong>aze</strong> (Azerbaijani)
-<strong>aze_cyrl</strong> (Azerbaijani - Cyrilic)
-<strong>bel</strong> (Belarusian)
-<strong>ben</strong> (Bengali)
-<strong>bod</strong> (Tibetan)
-<strong>bos</strong> (Bosnian)
-<strong>bul</strong> (Bulgarian)
-<strong>cat</strong> (Catalan; Valencian)
-<strong>ceb</strong> (Cebuano)
-<strong>ces</strong> (Czech)
-<strong>chi_sim</strong> (Chinese - Simplified)
-<strong>chi_tra</strong> (Chinese - Traditional)
-<strong>chr</strong> (Cherokee)
-<strong>cym</strong> (Welsh)
-<strong>dan</strong> (Danish)
-<strong>dan_frak</strong> (Danish - Fraktur)
-<strong>deu</strong> (German)
-<strong>deu_frak</strong> (German - Fraktur)
-<strong>dzo</strong> (Dzongkha)
-<strong>ell</strong> (Greek, Modern (1453-))
-<strong>eng</strong> (English)
-<strong>enm</strong> (English, Middle (1100-1500))
-<strong>epo</strong> (Esperanto)
-<strong>equ</strong> (Math / equation detection module)
-<strong>est</strong> (Estonian)
-<strong>eus</strong> (Basque)
-<strong>fas</strong> (Persian)
-<strong>fin</strong> (Finnish)
-<strong>fra</strong> (French)
-<strong>frk</strong> (Frankish)
-<strong>frm</strong> (French, Middle (ca.1400-1600))
-<strong>gle</strong> (Irish)
-<strong>glg</strong> (Galician)
-<strong>grc</strong> (Greek, Ancient (to 1453))
-<strong>guj</strong> (Gujarati)
-<strong>hat</strong> (Haitian; Haitian Creole)
-<strong>heb</strong> (Hebrew)
-<strong>hin</strong> (Hindi)
-<strong>hrv</strong> (Croatian)
-<strong>hun</strong> (Hungarian)
-<strong>iku</strong> (Inuktitut)
-<strong>ind</strong> (Indonesian)
-<strong>isl</strong> (Icelandic)
-<strong>ita</strong> (Italian)
-<strong>ita_old</strong> (Italian - Old)
-<strong>jav</strong> (Javanese)
-<strong>jpn</strong> (Japanese)
-<strong>kan</strong> (Kannada)
-<strong>kat</strong> (Georgian)
-<strong>kat_old</strong> (Georgian - Old)
-<strong>kaz</strong> (Kazakh)
-<strong>khm</strong> (Central Khmer)
-<strong>kir</strong> (Kirghiz; Kyrgyz)
-<strong>kor</strong> (Korean)
-<strong>kur</strong> (Kurdish)
-<strong>lao</strong> (Lao)
-<strong>lat</strong> (Latin)
-<strong>lav</strong> (Latvian)
-<strong>lit</strong> (Lithuanian)
-<strong>mal</strong> (Malayalam)
-<strong>mar</strong> (Marathi)
-<strong>mkd</strong> (Macedonian)
-<strong>mlt</strong> (Maltese)
-<strong>msa</strong> (Malay)
-<strong>mya</strong> (Burmese)
-<strong>nep</strong> (Nepali)
-<strong>nld</strong> (Dutch; Flemish)
-<strong>nor</strong> (Norwegian)
-<strong>ori</strong> (Oriya)
-<strong>osd</strong> (Orientation and script detection module)
-<strong>pan</strong> (Panjabi; Punjabi)
-<strong>pol</strong> (Polish)
-<strong>por</strong> (Portuguese)
-<strong>pus</strong> (Pushto; Pashto)
-<strong>ron</strong> (Romanian; Moldavian; Moldovan)
-<strong>rus</strong> (Russian)
-<strong>san</strong> (Sanskrit)
-<strong>sin</strong> (Sinhala; Sinhalese)
-<strong>slk</strong> (Slovak)
-<strong>slk_frak</strong> (Slovak - Fraktur)
-<strong>slv</strong> (Slovenian)
-<strong>spa</strong> (Spanish; Castilian)
-<strong>spa_old</strong> (Spanish; Castilian - Old)
-<strong>sqi</strong> (Albanian)
-<strong>srp</strong> (Serbian)
-<strong>srp_latn</strong> (Serbian - Latin)
-<strong>swa</strong> (Swahili)
-<strong>swe</strong> (Swedish)
-<strong>syr</strong> (Syriac)
-<strong>tam</strong> (Tamil)
-<strong>tel</strong> (Telugu)
-<strong>tgk</strong> (Tajik)
-<strong>tgl</strong> (Tagalog)
-<strong>tha</strong> (Thai)
-<strong>tir</strong> (Tigrinya)
-<strong>tur</strong> (Turkish)
-<strong>uig</strong> (Uighur; Uyghur)
-<strong>ukr</strong> (Ukrainian)
-<strong>urd</strong> (Urdu)
-<strong>uzb</strong> (Uzbek)
-<strong>uzb_cyrl</strong> (Uzbek - Cyrilic)
-<strong>vie</strong> (Vietnamese)
-<strong>yid</strong> (Yiddish)</p></div>
-<div class="paragraph"><p>To use a non-standard language pack named <strong>foo.traineddata</strong>, set the
-<strong>TESSDATA_PREFIX</strong> environment variable so the file can be found at
-<strong>TESSDATA_PREFIX</strong>/tessdata/<strong>foo</strong>.traineddata and give Tesseract the
-argument <em>-l foo</em>.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_config_files_and_augmenting_with_user_data">CONFIG FILES AND AUGMENTING WITH USER DATA</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Tesseract config files consist of lines with variable-value pairs (space
-separated).  The variables are documented as flags in the source code like
-the following one in tesseractclass.h:</p></div>
-<div class="paragraph"><p>STRING_VAR_H(tessedit_char_blacklist, "",
-             "Blacklist of chars not to recognize");</p></div>
-<div class="paragraph"><p>These variables may enable or disable various features of the engine, and
-may cause it to load (or not load) various data.  For instance, let&#8217;s suppose
-you want to OCR in English, but suppress the normal dictionary and load an
-alternative word list and an alternative list of patterns&#8201;&#8212;&#8201;these two files
-are the most commonly used extra data files.</p></div>
-<div class="paragraph"><p>If your language pack is in /path/to/eng.traineddata  and the hocr config
-is in /path/to/configs/hocr then create three new files:</p></div>
-<div class="paragraph"><p>/path/to/eng.user-words:</p></div>
-<div class="verseblock">
-<pre class="content">the
-quick
-brown
-fox
-jumped</pre>
-<div class="attribution">
-</div></div>
-<div class="paragraph"><p>/path/to/eng.user-patterns:</p></div>
-<div class="verseblock">
-<pre class="content">1-\d\d\d-GOOG-411
-www.\n\\\*.com</pre>
-<div class="attribution">
-</div></div>
-<div class="paragraph"><p>/path/to/configs/bazaar:</p></div>
-<div class="verseblock">
-<pre class="content">load_system_dawg     F
-load_freq_dawg       F
-user_words_suffix    user-words
-user_patterns_suffix user-patterns</pre>
-<div class="attribution">
-</div></div>
-<div class="paragraph"><p>Now, if you pass the word <em>bazaar</em> as a trailing command line parameter
-to Tesseract, Tesseract will not bother loading the system dictionary nor
-the dictionary of frequent words and will load and use the eng.user-words
-and eng.user-patterns files you provided.  The former is a simple word list,
-one per line.  The format of the latter is documented in dict/trie.h
-on read_pattern_list().</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_history">HISTORY</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The engine was developed at Hewlett Packard Laboratories Bristol and at
-Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more
-changes made in 1996 to port to Windows, and some C++izing in 1998. A
-lot of the code was written in C, and then some more was written in C++.
-The C\++ code makes heavy use of a list system using macros. This predates
-stl, was portable before stl, and is more efficient than stl lists, but has
-the big negative that if you do get a segmentation violation, it is hard to
-debug.</p></div>
-<div class="paragraph"><p>Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
-to train Tesseract.</p></div>
-<div class="paragraph"><p>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy.
-See <a href="https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf">https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf</a>. With Tesseract 2.00,
-scripts are now included to allow anyone to reproduce some of these tests.
-See <a href="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</a> for more
-details.</p></div>
-<div class="paragraph"><p>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
-and Korean. It also introduces a new, single-file based system of managing
-language data.</p></div>
-<div class="paragraph"><p>Tesseract 3.02 adds BiDirectional text support, the ability to recognize
-multiple languages in a single image, and improved layout analysis.</p></div>
-<div class="paragraph"><p>For further details, see the file ReleaseNotes included with the distribution.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_resources">RESOURCES</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br />
-Information on training: <a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1),
-shape_training(1), mftraining(1), unicharambigs(5), unicharset(5),
-unicharset_extractor(1), wordlist2dawg(1)</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Tesseract development was led at Hewlett-Packard and Google by Ray Smith.
-The development team has included:</p></div>
-<div class="paragraph"><p>Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger,
-Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke,
-Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle,
-Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel
-Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh
-Lloyd, Shobhit Saxena, and Thomas Kielbus.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-28 22:23:47 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>TESSERACT(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+TESSERACT(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>tesseract -
+   command-line OCR engine
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><strong>tesseract</strong> <em>imagename</em>|<em>stdin</em> <em>outputbase</em>|<em>stdout</em> [options&#8230;] [configfile&#8230;]</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1) is a commercial quality OCR engine originally developed at HP
+between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by
+UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed
+at Google since then.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_in_out_arguments">IN/OUT ARGUMENTS</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+<em>imagename</em>
+</dt>
+<dd>
+<p>
+        The name of the input image.  Most image file formats (anything
+        readable by Leptonica) are supported.
+</p>
+</dd>
+<dt class="hdlist1">
+<em>stdin</em>
+</dt>
+<dd>
+<p>
+        Instruction to read data from standard input
+</p>
+</dd>
+<dt class="hdlist1">
+<em>outputbase</em>
+</dt>
+<dd>
+<p>
+        The basename of the output file (to which the appropriate extension
+        will be appended).  By default the output will be named <em>outbase.txt</em>.
+</p>
+</dd>
+<dt class="hdlist1">
+<em>stdout</em>
+</dt>
+<dd>
+<p>
+        Instruction to sent output data to standard output
+</p>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_options">OPTIONS</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+<em>--tessdata-dir /path</em>
+</dt>
+<dd>
+<p>
+        Specify the location of tessdata path
+</p>
+</dd>
+<dt class="hdlist1">
+<em>--user-words /path/to/file</em>
+</dt>
+<dd>
+<p>
+        Specify the location of user words file
+</p>
+</dd>
+<dt class="hdlist1">
+<em>--user-patterns /path/to/file specify</em>
+</dt>
+<dd>
+<p>
+        The location of user patterns file
+</p>
+</dd>
+<dt class="hdlist1">
+<em>-c configvar=value</em>
+</dt>
+<dd>
+<p>
+        Set value for control parameter. Multiple -c arguments are allowed.
+</p>
+</dd>
+<dt class="hdlist1">
+<em>-l lang</em>
+</dt>
+<dd>
+<p>
+        The language to use. If none is specified, English is assumed.
+        Multiple languages may be specified, separated by plus characters.
+        Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
+</p>
+</dd>
+<dt class="hdlist1">
+<em>--psm N</em>
+</dt>
+<dd>
+<p>
+        Set Tesseract to only run a subset of layout analysis and assume
+        a certain form of image. The options for <strong>N</strong> are:
+</p>
+<div class="literalblock">
+<div class="content">
+<pre><code>0 = Orientation and script detection (OSD) only.
+1 = Automatic page segmentation with OSD.
+2 = Automatic page segmentation, but no OSD, or OCR.
+3 = Fully automatic page segmentation, but no OSD. (Default)
+4 = Assume a single column of text of variable sizes.
+5 = Assume a single uniform block of vertically aligned text.
+6 = Assume a single uniform block of text.
+7 = Treat the image as a single text line.
+8 = Treat the image as a single word.
+9 = Treat the image as a single word in a circle.
+10 = Treat the image as a single character.</code></pre>
+</div></div>
+</dd>
+<dt class="hdlist1">
+<em>configfile</em>
+</dt>
+<dd>
+<p>
+        The name of a config to use. A config is a plaintext file which
+        contains a list of variables and their values, one per line, with a
+        space separating variable from value.  Interesting config files
+        include:<br />
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+hocr - Output in hOCR format instead of as a text file.
+</p>
+</li>
+<li>
+<p>
+pdf  - Output in pdf instead of a text file.
+</p>
+</li>
+</ul></div>
+</dd>
+</dl></div>
+<div class="paragraph"><p><strong>Nota Bene:</strong>   The options <em>-l lang</em> and <em>--psm N</em> must occur
+before any <em>configfile</em>.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_single_options">SINGLE OPTIONS</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+<em>-v</em>
+</dt>
+<dd>
+<p>
+        Returns the current version of the tesseract(1) executable.
+</p>
+</dd>
+<dt class="hdlist1">
+<em>--list-langs</em>
+</dt>
+<dd>
+<p>
+        list available languages for tesseract engine. Can be used with --tessdata-dir.
+</p>
+</dd>
+<dt class="hdlist1">
+<em>--print-parameters</em>
+</dt>
+<dd>
+<p>
+        print tesseract parameters to the stdout.
+</p>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_languages">LANGUAGES</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>There are currently language packs available for the following languages
+(in <a href="https://github.com/tesseract-ocr/tessdata">https://github.com/tesseract-ocr/tessdata</a>):</p></div>
+<div class="paragraph"><p><strong>afr</strong> (Afrikaans)
+<strong>amh</strong> (Amharic)
+<strong>ara</strong> (Arabic)
+<strong>asm</strong> (Assamese)
+<strong>aze</strong> (Azerbaijani)
+<strong>aze_cyrl</strong> (Azerbaijani - Cyrilic)
+<strong>bel</strong> (Belarusian)
+<strong>ben</strong> (Bengali)
+<strong>bod</strong> (Tibetan)
+<strong>bos</strong> (Bosnian)
+<strong>bul</strong> (Bulgarian)
+<strong>cat</strong> (Catalan; Valencian)
+<strong>ceb</strong> (Cebuano)
+<strong>ces</strong> (Czech)
+<strong>chi_sim</strong> (Chinese - Simplified)
+<strong>chi_tra</strong> (Chinese - Traditional)
+<strong>chr</strong> (Cherokee)
+<strong>cym</strong> (Welsh)
+<strong>dan</strong> (Danish)
+<strong>dan_frak</strong> (Danish - Fraktur)
+<strong>deu</strong> (German)
+<strong>deu_frak</strong> (German - Fraktur)
+<strong>dzo</strong> (Dzongkha)
+<strong>ell</strong> (Greek, Modern (1453-))
+<strong>eng</strong> (English)
+<strong>enm</strong> (English, Middle (1100-1500))
+<strong>epo</strong> (Esperanto)
+<strong>equ</strong> (Math / equation detection module)
+<strong>est</strong> (Estonian)
+<strong>eus</strong> (Basque)
+<strong>fas</strong> (Persian)
+<strong>fin</strong> (Finnish)
+<strong>fra</strong> (French)
+<strong>frk</strong> (Frankish)
+<strong>frm</strong> (French, Middle (ca.1400-1600))
+<strong>gle</strong> (Irish)
+<strong>glg</strong> (Galician)
+<strong>grc</strong> (Greek, Ancient (to 1453))
+<strong>guj</strong> (Gujarati)
+<strong>hat</strong> (Haitian; Haitian Creole)
+<strong>heb</strong> (Hebrew)
+<strong>hin</strong> (Hindi)
+<strong>hrv</strong> (Croatian)
+<strong>hun</strong> (Hungarian)
+<strong>iku</strong> (Inuktitut)
+<strong>ind</strong> (Indonesian)
+<strong>isl</strong> (Icelandic)
+<strong>ita</strong> (Italian)
+<strong>ita_old</strong> (Italian - Old)
+<strong>jav</strong> (Javanese)
+<strong>jpn</strong> (Japanese)
+<strong>kan</strong> (Kannada)
+<strong>kat</strong> (Georgian)
+<strong>kat_old</strong> (Georgian - Old)
+<strong>kaz</strong> (Kazakh)
+<strong>khm</strong> (Central Khmer)
+<strong>kir</strong> (Kirghiz; Kyrgyz)
+<strong>kor</strong> (Korean)
+<strong>kur</strong> (Kurdish)
+<strong>lao</strong> (Lao)
+<strong>lat</strong> (Latin)
+<strong>lav</strong> (Latvian)
+<strong>lit</strong> (Lithuanian)
+<strong>mal</strong> (Malayalam)
+<strong>mar</strong> (Marathi)
+<strong>mkd</strong> (Macedonian)
+<strong>mlt</strong> (Maltese)
+<strong>msa</strong> (Malay)
+<strong>mya</strong> (Burmese)
+<strong>nep</strong> (Nepali)
+<strong>nld</strong> (Dutch; Flemish)
+<strong>nor</strong> (Norwegian)
+<strong>ori</strong> (Oriya)
+<strong>osd</strong> (Orientation and script detection module)
+<strong>pan</strong> (Panjabi; Punjabi)
+<strong>pol</strong> (Polish)
+<strong>por</strong> (Portuguese)
+<strong>pus</strong> (Pushto; Pashto)
+<strong>ron</strong> (Romanian; Moldavian; Moldovan)
+<strong>rus</strong> (Russian)
+<strong>san</strong> (Sanskrit)
+<strong>sin</strong> (Sinhala; Sinhalese)
+<strong>slk</strong> (Slovak)
+<strong>slk_frak</strong> (Slovak - Fraktur)
+<strong>slv</strong> (Slovenian)
+<strong>spa</strong> (Spanish; Castilian)
+<strong>spa_old</strong> (Spanish; Castilian - Old)
+<strong>sqi</strong> (Albanian)
+<strong>srp</strong> (Serbian)
+<strong>srp_latn</strong> (Serbian - Latin)
+<strong>swa</strong> (Swahili)
+<strong>swe</strong> (Swedish)
+<strong>syr</strong> (Syriac)
+<strong>tam</strong> (Tamil)
+<strong>tel</strong> (Telugu)
+<strong>tgk</strong> (Tajik)
+<strong>tgl</strong> (Tagalog)
+<strong>tha</strong> (Thai)
+<strong>tir</strong> (Tigrinya)
+<strong>tur</strong> (Turkish)
+<strong>uig</strong> (Uighur; Uyghur)
+<strong>ukr</strong> (Ukrainian)
+<strong>urd</strong> (Urdu)
+<strong>uzb</strong> (Uzbek)
+<strong>uzb_cyrl</strong> (Uzbek - Cyrilic)
+<strong>vie</strong> (Vietnamese)
+<strong>yid</strong> (Yiddish)</p></div>
+<div class="paragraph"><p>To use a non-standard language pack named <strong>foo.traineddata</strong>, set the
+<strong>TESSDATA_PREFIX</strong> environment variable so the file can be found at
+<strong>TESSDATA_PREFIX</strong>/tessdata/<strong>foo</strong>.traineddata and give Tesseract the
+argument <em>-l foo</em>.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_config_files_and_augmenting_with_user_data">CONFIG FILES AND AUGMENTING WITH USER DATA</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Tesseract config files consist of lines with variable-value pairs (space
+separated).  The variables are documented as flags in the source code like
+the following one in tesseractclass.h:</p></div>
+<div class="paragraph"><p>STRING_VAR_H(tessedit_char_blacklist, "",
+             "Blacklist of chars not to recognize");</p></div>
+<div class="paragraph"><p>These variables may enable or disable various features of the engine, and
+may cause it to load (or not load) various data.  For instance, let&#8217;s suppose
+you want to OCR in English, but suppress the normal dictionary and load an
+alternative word list and an alternative list of patterns&#8201;&#8212;&#8201;these two files
+are the most commonly used extra data files.</p></div>
+<div class="paragraph"><p>If your language pack is in /path/to/eng.traineddata  and the hocr config
+is in /path/to/configs/hocr then create three new files:</p></div>
+<div class="paragraph"><p>/path/to/eng.user-words:</p></div>
+<div class="verseblock">
+<pre class="content">the
+quick
+brown
+fox
+jumped</pre>
+<div class="attribution">
+</div></div>
+<div class="paragraph"><p>/path/to/eng.user-patterns:</p></div>
+<div class="verseblock">
+<pre class="content">1-\d\d\d-GOOG-411
+www.\n\\\*.com</pre>
+<div class="attribution">
+</div></div>
+<div class="paragraph"><p>/path/to/configs/bazaar:</p></div>
+<div class="verseblock">
+<pre class="content">load_system_dawg     F
+load_freq_dawg       F
+user_words_suffix    user-words
+user_patterns_suffix user-patterns</pre>
+<div class="attribution">
+</div></div>
+<div class="paragraph"><p>Now, if you pass the word <em>bazaar</em> as a trailing command line parameter
+to Tesseract, Tesseract will not bother loading the system dictionary nor
+the dictionary of frequent words and will load and use the eng.user-words
+and eng.user-patterns files you provided.  The former is a simple word list,
+one per line.  The format of the latter is documented in dict/trie.h
+on read_pattern_list().</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_history">HISTORY</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The engine was developed at Hewlett Packard Laboratories Bristol and at
+Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more
+changes made in 1996 to port to Windows, and some C++izing in 1998. A
+lot of the code was written in C, and then some more was written in C++.
+The C\++ code makes heavy use of a list system using macros. This predates
+stl, was portable before stl, and is more efficient than stl lists, but has
+the big negative that if you do get a segmentation violation, it is hard to
+debug.</p></div>
+<div class="paragraph"><p>Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
+to train Tesseract.</p></div>
+<div class="paragraph"><p>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy.
+See <a href="https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf">https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf</a>. With Tesseract 2.00,
+scripts are now included to allow anyone to reproduce some of these tests.
+See <a href="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</a> for more
+details.</p></div>
+<div class="paragraph"><p>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
+and Korean. It also introduces a new, single-file based system of managing
+language data.</p></div>
+<div class="paragraph"><p>Tesseract 3.02 adds BiDirectional text support, the ability to recognize
+multiple languages in a single image, and improved layout analysis.</p></div>
+<div class="paragraph"><p>For further details, see the file ReleaseNotes included with the distribution.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_resources">RESOURCES</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br />
+Information on training: <a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1),
+shape_training(1), mftraining(1), unicharambigs(5), unicharset(5),
+unicharset_extractor(1), wordlist2dawg(1)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Tesseract development was led at Hewlett-Packard and Google by Ray Smith.
+The development team has included:</p></div>
+<div class="paragraph"><p>Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger,
+Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke,
+Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle,
+Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel
+Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh
+Lloyd, Shobhit Saxena, and Thomas Kielbus.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-28 22:23:47 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/tesseract.1.xml b/doc/tesseract.1.xml
index 2f971caa7b..8ddce87cd6 100644
--- a/doc/tesseract.1.xml
+++ b/doc/tesseract.1.xml
@@ -1,424 +1,424 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>TESSERACT(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>tesseract</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>tesseract</refname>
-    <refpurpose>command-line OCR engine</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara><emphasis role="strong">tesseract</emphasis> <emphasis>imagename</emphasis>|<emphasis>stdin</emphasis> <emphasis>outputbase</emphasis>|<emphasis>stdout</emphasis> [options&#8230;] [configfile&#8230;]</simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>tesseract(1) is a commercial quality OCR engine originally developed at HP
-between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by
-UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed
-at Google since then.</simpara>
-</refsect1>
-<refsect1 id="_in_out_arguments">
-<title>IN/OUT ARGUMENTS</title>
-<variablelist>
-<varlistentry>
-<term>
-<emphasis>imagename</emphasis>
-</term>
-<listitem>
-<simpara>
-        The name of the input image.  Most image file formats (anything
-        readable by Leptonica) are supported.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>stdin</emphasis>
-</term>
-<listitem>
-<simpara>
-        Instruction to read data from standard input
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>outputbase</emphasis>
-</term>
-<listitem>
-<simpara>
-        The basename of the output file (to which the appropriate extension
-        will be appended).  By default the output will be named <emphasis>outbase.txt</emphasis>.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>stdout</emphasis>
-</term>
-<listitem>
-<simpara>
-        Instruction to sent output data to standard output
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_options">
-<title>OPTIONS</title>
-<variablelist>
-<varlistentry>
-<term>
-<emphasis>--tessdata-dir /path</emphasis>
-</term>
-<listitem>
-<simpara>
-        Specify the location of tessdata path
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>--user-words /path/to/file</emphasis>
-</term>
-<listitem>
-<simpara>
-        Specify the location of user words file
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>--user-patterns /path/to/file specify</emphasis>
-</term>
-<listitem>
-<simpara>
-        The location of user patterns file
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>-c configvar=value</emphasis>
-</term>
-<listitem>
-<simpara>
-        Set value for control parameter. Multiple -c arguments are allowed.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>-l lang</emphasis>
-</term>
-<listitem>
-<simpara>
-        The language to use. If none is specified, English is assumed.
-        Multiple languages may be specified, separated by plus characters.
-        Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>-psm N</emphasis>
-</term>
-<listitem>
-<simpara>
-        Set Tesseract to only run a subset of layout analysis and assume
-        a certain form of image. The options for <emphasis role="strong">N</emphasis> are:
-</simpara>
-<literallayout class="monospaced">0 = Orientation and script detection (OSD) only.
-1 = Automatic page segmentation with OSD.
-2 = Automatic page segmentation, but no OSD, or OCR.
-3 = Fully automatic page segmentation, but no OSD. (Default)
-4 = Assume a single column of text of variable sizes.
-5 = Assume a single uniform block of vertically aligned text.
-6 = Assume a single uniform block of text.
-7 = Treat the image as a single text line.
-8 = Treat the image as a single word.
-9 = Treat the image as a single word in a circle.
-10 = Treat the image as a single character.</literallayout>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>configfile</emphasis>
-</term>
-<listitem>
-<simpara>
-        The name of a config to use. A config is a plaintext file which
-        contains a list of variables and their values, one per line, with a
-        space separating variable from value.  Interesting config files
-        include:<?asciidoc-br?>
-</simpara>
-<itemizedlist>
-<listitem>
-<simpara>
-hocr - Output in hOCR format instead of as a text file.
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-pdf  - Output in pdf instead of a text file.
-</simpara>
-</listitem>
-</itemizedlist>
-</listitem>
-</varlistentry>
-</variablelist>
-<simpara><emphasis role="strong">Nota Bene:</emphasis>   The options <emphasis>-l lang</emphasis> and <emphasis>-psm N</emphasis> must occur
-before any <emphasis>configfile</emphasis>.</simpara>
-</refsect1>
-<refsect1 id="_single_options">
-<title>SINGLE OPTIONS</title>
-<variablelist>
-<varlistentry>
-<term>
-<emphasis>-v</emphasis>
-</term>
-<listitem>
-<simpara>
-        Returns the current version of the tesseract(1) executable.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>--list-langs</emphasis>
-</term>
-<listitem>
-<simpara>
-        list available languages for tesseract engine. Can be used with --tessdata-dir.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>--print-parameters</emphasis>
-</term>
-<listitem>
-<simpara>
-        print tesseract parameters to the stdout.
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_languages">
-<title>LANGUAGES</title>
-<simpara>There are currently language packs available for the following languages
-(in <ulink url="https://github.com/tesseract-ocr/tessdata">https://github.com/tesseract-ocr/tessdata</ulink>):</simpara>
-<simpara><emphasis role="strong">afr</emphasis> (Afrikaans)
-<emphasis role="strong">amh</emphasis> (Amharic)
-<emphasis role="strong">ara</emphasis> (Arabic)
-<emphasis role="strong">asm</emphasis> (Assamese)
-<emphasis role="strong">aze</emphasis> (Azerbaijani)
-<emphasis role="strong">aze_cyrl</emphasis> (Azerbaijani - Cyrilic)
-<emphasis role="strong">bel</emphasis> (Belarusian)
-<emphasis role="strong">ben</emphasis> (Bengali)
-<emphasis role="strong">bod</emphasis> (Tibetan)
-<emphasis role="strong">bos</emphasis> (Bosnian)
-<emphasis role="strong">bul</emphasis> (Bulgarian)
-<emphasis role="strong">cat</emphasis> (Catalan; Valencian)
-<emphasis role="strong">ceb</emphasis> (Cebuano)
-<emphasis role="strong">ces</emphasis> (Czech)
-<emphasis role="strong">chi_sim</emphasis> (Chinese - Simplified)
-<emphasis role="strong">chi_tra</emphasis> (Chinese - Traditional)
-<emphasis role="strong">chr</emphasis> (Cherokee)
-<emphasis role="strong">cym</emphasis> (Welsh)
-<emphasis role="strong">dan</emphasis> (Danish)
-<emphasis role="strong">dan_frak</emphasis> (Danish - Fraktur)
-<emphasis role="strong">deu</emphasis> (German)
-<emphasis role="strong">deu_frak</emphasis> (German - Fraktur)
-<emphasis role="strong">dzo</emphasis> (Dzongkha)
-<emphasis role="strong">ell</emphasis> (Greek, Modern (1453-))
-<emphasis role="strong">eng</emphasis> (English)
-<emphasis role="strong">enm</emphasis> (English, Middle (1100-1500))
-<emphasis role="strong">epo</emphasis> (Esperanto)
-<emphasis role="strong">equ</emphasis> (Math / equation detection module)
-<emphasis role="strong">est</emphasis> (Estonian)
-<emphasis role="strong">eus</emphasis> (Basque)
-<emphasis role="strong">fas</emphasis> (Persian)
-<emphasis role="strong">fin</emphasis> (Finnish)
-<emphasis role="strong">fra</emphasis> (French)
-<emphasis role="strong">frk</emphasis> (Frankish)
-<emphasis role="strong">frm</emphasis> (French, Middle (ca.1400-1600))
-<emphasis role="strong">gle</emphasis> (Irish)
-<emphasis role="strong">glg</emphasis> (Galician)
-<emphasis role="strong">grc</emphasis> (Greek, Ancient (to 1453))
-<emphasis role="strong">guj</emphasis> (Gujarati)
-<emphasis role="strong">hat</emphasis> (Haitian; Haitian Creole)
-<emphasis role="strong">heb</emphasis> (Hebrew)
-<emphasis role="strong">hin</emphasis> (Hindi)
-<emphasis role="strong">hrv</emphasis> (Croatian)
-<emphasis role="strong">hun</emphasis> (Hungarian)
-<emphasis role="strong">iku</emphasis> (Inuktitut)
-<emphasis role="strong">ind</emphasis> (Indonesian)
-<emphasis role="strong">isl</emphasis> (Icelandic)
-<emphasis role="strong">ita</emphasis> (Italian)
-<emphasis role="strong">ita_old</emphasis> (Italian - Old)
-<emphasis role="strong">jav</emphasis> (Javanese)
-<emphasis role="strong">jpn</emphasis> (Japanese)
-<emphasis role="strong">kan</emphasis> (Kannada)
-<emphasis role="strong">kat</emphasis> (Georgian)
-<emphasis role="strong">kat_old</emphasis> (Georgian - Old)
-<emphasis role="strong">kaz</emphasis> (Kazakh)
-<emphasis role="strong">khm</emphasis> (Central Khmer)
-<emphasis role="strong">kir</emphasis> (Kirghiz; Kyrgyz)
-<emphasis role="strong">kor</emphasis> (Korean)
-<emphasis role="strong">kur</emphasis> (Kurdish)
-<emphasis role="strong">lao</emphasis> (Lao)
-<emphasis role="strong">lat</emphasis> (Latin)
-<emphasis role="strong">lav</emphasis> (Latvian)
-<emphasis role="strong">lit</emphasis> (Lithuanian)
-<emphasis role="strong">mal</emphasis> (Malayalam)
-<emphasis role="strong">mar</emphasis> (Marathi)
-<emphasis role="strong">mkd</emphasis> (Macedonian)
-<emphasis role="strong">mlt</emphasis> (Maltese)
-<emphasis role="strong">msa</emphasis> (Malay)
-<emphasis role="strong">mya</emphasis> (Burmese)
-<emphasis role="strong">nep</emphasis> (Nepali)
-<emphasis role="strong">nld</emphasis> (Dutch; Flemish)
-<emphasis role="strong">nor</emphasis> (Norwegian)
-<emphasis role="strong">ori</emphasis> (Oriya)
-<emphasis role="strong">osd</emphasis> (Orientation and script detection module)
-<emphasis role="strong">pan</emphasis> (Panjabi; Punjabi)
-<emphasis role="strong">pol</emphasis> (Polish)
-<emphasis role="strong">por</emphasis> (Portuguese)
-<emphasis role="strong">pus</emphasis> (Pushto; Pashto)
-<emphasis role="strong">ron</emphasis> (Romanian; Moldavian; Moldovan)
-<emphasis role="strong">rus</emphasis> (Russian)
-<emphasis role="strong">san</emphasis> (Sanskrit)
-<emphasis role="strong">sin</emphasis> (Sinhala; Sinhalese)
-<emphasis role="strong">slk</emphasis> (Slovak)
-<emphasis role="strong">slk_frak</emphasis> (Slovak - Fraktur)
-<emphasis role="strong">slv</emphasis> (Slovenian)
-<emphasis role="strong">spa</emphasis> (Spanish; Castilian)
-<emphasis role="strong">spa_old</emphasis> (Spanish; Castilian - Old)
-<emphasis role="strong">sqi</emphasis> (Albanian)
-<emphasis role="strong">srp</emphasis> (Serbian)
-<emphasis role="strong">srp_latn</emphasis> (Serbian - Latin)
-<emphasis role="strong">swa</emphasis> (Swahili)
-<emphasis role="strong">swe</emphasis> (Swedish)
-<emphasis role="strong">syr</emphasis> (Syriac)
-<emphasis role="strong">tam</emphasis> (Tamil)
-<emphasis role="strong">tel</emphasis> (Telugu)
-<emphasis role="strong">tgk</emphasis> (Tajik)
-<emphasis role="strong">tgl</emphasis> (Tagalog)
-<emphasis role="strong">tha</emphasis> (Thai)
-<emphasis role="strong">tir</emphasis> (Tigrinya)
-<emphasis role="strong">tur</emphasis> (Turkish)
-<emphasis role="strong">uig</emphasis> (Uighur; Uyghur)
-<emphasis role="strong">ukr</emphasis> (Ukrainian)
-<emphasis role="strong">urd</emphasis> (Urdu)
-<emphasis role="strong">uzb</emphasis> (Uzbek)
-<emphasis role="strong">uzb_cyrl</emphasis> (Uzbek - Cyrilic)
-<emphasis role="strong">vie</emphasis> (Vietnamese)
-<emphasis role="strong">yid</emphasis> (Yiddish)</simpara>
-<simpara>To use a non-standard language pack named <emphasis role="strong">foo.traineddata</emphasis>, set the
-<emphasis role="strong">TESSDATA_PREFIX</emphasis> environment variable so the file can be found at
-<emphasis role="strong">TESSDATA_PREFIX</emphasis>/tessdata/<emphasis role="strong">foo</emphasis>.traineddata and give Tesseract the
-argument <emphasis>-l foo</emphasis>.</simpara>
-</refsect1>
-<refsect1 id="_config_files_and_augmenting_with_user_data">
-<title>CONFIG FILES AND AUGMENTING WITH USER DATA</title>
-<simpara>Tesseract config files consist of lines with variable-value pairs (space
-separated).  The variables are documented as flags in the source code like
-the following one in tesseractclass.h:</simpara>
-<simpara>STRING_VAR_H(tessedit_char_blacklist, "",
-             "Blacklist of chars not to recognize");</simpara>
-<simpara>These variables may enable or disable various features of the engine, and
-may cause it to load (or not load) various data.  For instance, let&#8217;s suppose
-you want to OCR in English, but suppress the normal dictionary and load an
-alternative word list and an alternative list of patterns&#8201;&#8212;&#8201;these two files
-are the most commonly used extra data files.</simpara>
-<simpara>If your language pack is in /path/to/eng.traineddata  and the hocr config
-is in /path/to/configs/hocr then create three new files:</simpara>
-<simpara>/path/to/eng.user-words:</simpara>
-<blockquote>
-<literallayout>the
-quick
-brown
-fox
-jumped</literallayout>
-</blockquote>
-<simpara>/path/to/eng.user-patterns:</simpara>
-<blockquote>
-<literallayout>1-\d\d\d-GOOG-411
-www.\n\\\*.com</literallayout>
-</blockquote>
-<simpara>/path/to/configs/bazaar:</simpara>
-<blockquote>
-<literallayout>load_system_dawg     F
-load_freq_dawg       F
-user_words_suffix    user-words
-user_patterns_suffix user-patterns</literallayout>
-</blockquote>
-<simpara>Now, if you pass the word <emphasis>bazaar</emphasis> as a trailing command line parameter
-to Tesseract, Tesseract will not bother loading the system dictionary nor
-the dictionary of frequent words and will load and use the eng.user-words
-and eng.user-patterns files you provided.  The former is a simple word list,
-one per line.  The format of the latter is documented in dict/trie.h
-on read_pattern_list().</simpara>
-</refsect1>
-<refsect1 id="_history">
-<title>HISTORY</title>
-<simpara>The engine was developed at Hewlett Packard Laboratories Bristol and at
-Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more
-changes made in 1996 to port to Windows, and some C++izing in 1998. A
-lot of the code was written in C, and then some more was written in C++.
-The C\++ code makes heavy use of a list system using macros. This predates
-stl, was portable before stl, and is more efficient than stl lists, but has
-the big negative that if you do get a segmentation violation, it is hard to
-debug.</simpara>
-<simpara>Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
-to train Tesseract.</simpara>
-<simpara>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy.
-See <ulink url="https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf">https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf</ulink>. With Tesseract 2.00,
-scripts are now included to allow anyone to reproduce some of these tests.
-See <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</ulink> for more
-details.</simpara>
-<simpara>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
-and Korean. It also introduces a new, single-file based system of managing
-language data.</simpara>
-<simpara>Tesseract 3.02 adds BiDirectional text support, the ability to recognize
-multiple languages in a single image, and improved layout analysis.</simpara>
-<simpara>For further details, see the file ReleaseNotes included with the distribution.</simpara>
-</refsect1>
-<refsect1 id="_resources">
-<title>RESOURCES</title>
-<simpara>Main web site: <ulink url="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</ulink><?asciidoc-br?>
-Information on training: <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1),
-shape_training(1), mftraining(1), unicharambigs(5), unicharset(5),
-unicharset_extractor(1), wordlist2dawg(1)</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>Tesseract development was led at Hewlett-Packard and Google by Ray Smith.
-The development team has included:</simpara>
-<simpara>Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger,
-Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke,
-Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle,
-Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel
-Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh
-Lloyd, Shobhit Saxena, and Thomas Kielbus.</simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>TESSERACT(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>tesseract</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>tesseract</refname>
+    <refpurpose>command-line OCR engine</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara><emphasis role="strong">tesseract</emphasis> <emphasis>imagename</emphasis>|<emphasis>stdin</emphasis> <emphasis>outputbase</emphasis>|<emphasis>stdout</emphasis> [options&#8230;] [configfile&#8230;]</simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>tesseract(1) is a commercial quality OCR engine originally developed at HP
+between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by
+UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed
+at Google since then.</simpara>
+</refsect1>
+<refsect1 id="_in_out_arguments">
+<title>IN/OUT ARGUMENTS</title>
+<variablelist>
+<varlistentry>
+<term>
+<emphasis>imagename</emphasis>
+</term>
+<listitem>
+<simpara>
+        The name of the input image.  Most image file formats (anything
+        readable by Leptonica) are supported.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>stdin</emphasis>
+</term>
+<listitem>
+<simpara>
+        Instruction to read data from standard input
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>outputbase</emphasis>
+</term>
+<listitem>
+<simpara>
+        The basename of the output file (to which the appropriate extension
+        will be appended).  By default the output will be named <emphasis>outbase.txt</emphasis>.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>stdout</emphasis>
+</term>
+<listitem>
+<simpara>
+        Instruction to sent output data to standard output
+</simpara>
+</listitem>
+</varlistentry>
+</variablelist>
+</refsect1>
+<refsect1 id="_options">
+<title>OPTIONS</title>
+<variablelist>
+<varlistentry>
+<term>
+<emphasis>--tessdata-dir /path</emphasis>
+</term>
+<listitem>
+<simpara>
+        Specify the location of tessdata path
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>--user-words /path/to/file</emphasis>
+</term>
+<listitem>
+<simpara>
+        Specify the location of user words file
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>--user-patterns /path/to/file specify</emphasis>
+</term>
+<listitem>
+<simpara>
+        The location of user patterns file
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>-c configvar=value</emphasis>
+</term>
+<listitem>
+<simpara>
+        Set value for control parameter. Multiple -c arguments are allowed.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>-l lang</emphasis>
+</term>
+<listitem>
+<simpara>
+        The language to use. If none is specified, English is assumed.
+        Multiple languages may be specified, separated by plus characters.
+        Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>--psm N</emphasis>
+</term>
+<listitem>
+<simpara>
+        Set Tesseract to only run a subset of layout analysis and assume
+        a certain form of image. The options for <emphasis role="strong">N</emphasis> are:
+</simpara>
+<literallayout class="monospaced">0 = Orientation and script detection (OSD) only.
+1 = Automatic page segmentation with OSD.
+2 = Automatic page segmentation, but no OSD, or OCR.
+3 = Fully automatic page segmentation, but no OSD. (Default)
+4 = Assume a single column of text of variable sizes.
+5 = Assume a single uniform block of vertically aligned text.
+6 = Assume a single uniform block of text.
+7 = Treat the image as a single text line.
+8 = Treat the image as a single word.
+9 = Treat the image as a single word in a circle.
+10 = Treat the image as a single character.</literallayout>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>configfile</emphasis>
+</term>
+<listitem>
+<simpara>
+        The name of a config to use. A config is a plaintext file which
+        contains a list of variables and their values, one per line, with a
+        space separating variable from value.  Interesting config files
+        include:<?asciidoc-br?>
+</simpara>
+<itemizedlist>
+<listitem>
+<simpara>
+hocr - Output in hOCR format instead of as a text file.
+</simpara>
+</listitem>
+<listitem>
+<simpara>
+pdf  - Output in pdf instead of a text file.
+</simpara>
+</listitem>
+</itemizedlist>
+</listitem>
+</varlistentry>
+</variablelist>
+<simpara><emphasis role="strong">Nota Bene:</emphasis>   The options <emphasis>-l lang</emphasis> and <emphasis>--psm N</emphasis> must occur
+before any <emphasis>configfile</emphasis>.</simpara>
+</refsect1>
+<refsect1 id="_single_options">
+<title>SINGLE OPTIONS</title>
+<variablelist>
+<varlistentry>
+<term>
+<emphasis>-v</emphasis>
+</term>
+<listitem>
+<simpara>
+        Returns the current version of the tesseract(1) executable.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>--list-langs</emphasis>
+</term>
+<listitem>
+<simpara>
+        list available languages for tesseract engine. Can be used with --tessdata-dir.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>--print-parameters</emphasis>
+</term>
+<listitem>
+<simpara>
+        print tesseract parameters to the stdout.
+</simpara>
+</listitem>
+</varlistentry>
+</variablelist>
+</refsect1>
+<refsect1 id="_languages">
+<title>LANGUAGES</title>
+<simpara>There are currently language packs available for the following languages
+(in <ulink url="https://github.com/tesseract-ocr/tessdata">https://github.com/tesseract-ocr/tessdata</ulink>):</simpara>
+<simpara><emphasis role="strong">afr</emphasis> (Afrikaans)
+<emphasis role="strong">amh</emphasis> (Amharic)
+<emphasis role="strong">ara</emphasis> (Arabic)
+<emphasis role="strong">asm</emphasis> (Assamese)
+<emphasis role="strong">aze</emphasis> (Azerbaijani)
+<emphasis role="strong">aze_cyrl</emphasis> (Azerbaijani - Cyrilic)
+<emphasis role="strong">bel</emphasis> (Belarusian)
+<emphasis role="strong">ben</emphasis> (Bengali)
+<emphasis role="strong">bod</emphasis> (Tibetan)
+<emphasis role="strong">bos</emphasis> (Bosnian)
+<emphasis role="strong">bul</emphasis> (Bulgarian)
+<emphasis role="strong">cat</emphasis> (Catalan; Valencian)
+<emphasis role="strong">ceb</emphasis> (Cebuano)
+<emphasis role="strong">ces</emphasis> (Czech)
+<emphasis role="strong">chi_sim</emphasis> (Chinese - Simplified)
+<emphasis role="strong">chi_tra</emphasis> (Chinese - Traditional)
+<emphasis role="strong">chr</emphasis> (Cherokee)
+<emphasis role="strong">cym</emphasis> (Welsh)
+<emphasis role="strong">dan</emphasis> (Danish)
+<emphasis role="strong">dan_frak</emphasis> (Danish - Fraktur)
+<emphasis role="strong">deu</emphasis> (German)
+<emphasis role="strong">deu_frak</emphasis> (German - Fraktur)
+<emphasis role="strong">dzo</emphasis> (Dzongkha)
+<emphasis role="strong">ell</emphasis> (Greek, Modern (1453-))
+<emphasis role="strong">eng</emphasis> (English)
+<emphasis role="strong">enm</emphasis> (English, Middle (1100-1500))
+<emphasis role="strong">epo</emphasis> (Esperanto)
+<emphasis role="strong">equ</emphasis> (Math / equation detection module)
+<emphasis role="strong">est</emphasis> (Estonian)
+<emphasis role="strong">eus</emphasis> (Basque)
+<emphasis role="strong">fas</emphasis> (Persian)
+<emphasis role="strong">fin</emphasis> (Finnish)
+<emphasis role="strong">fra</emphasis> (French)
+<emphasis role="strong">frk</emphasis> (Frankish)
+<emphasis role="strong">frm</emphasis> (French, Middle (ca.1400-1600))
+<emphasis role="strong">gle</emphasis> (Irish)
+<emphasis role="strong">glg</emphasis> (Galician)
+<emphasis role="strong">grc</emphasis> (Greek, Ancient (to 1453))
+<emphasis role="strong">guj</emphasis> (Gujarati)
+<emphasis role="strong">hat</emphasis> (Haitian; Haitian Creole)
+<emphasis role="strong">heb</emphasis> (Hebrew)
+<emphasis role="strong">hin</emphasis> (Hindi)
+<emphasis role="strong">hrv</emphasis> (Croatian)
+<emphasis role="strong">hun</emphasis> (Hungarian)
+<emphasis role="strong">iku</emphasis> (Inuktitut)
+<emphasis role="strong">ind</emphasis> (Indonesian)
+<emphasis role="strong">isl</emphasis> (Icelandic)
+<emphasis role="strong">ita</emphasis> (Italian)
+<emphasis role="strong">ita_old</emphasis> (Italian - Old)
+<emphasis role="strong">jav</emphasis> (Javanese)
+<emphasis role="strong">jpn</emphasis> (Japanese)
+<emphasis role="strong">kan</emphasis> (Kannada)
+<emphasis role="strong">kat</emphasis> (Georgian)
+<emphasis role="strong">kat_old</emphasis> (Georgian - Old)
+<emphasis role="strong">kaz</emphasis> (Kazakh)
+<emphasis role="strong">khm</emphasis> (Central Khmer)
+<emphasis role="strong">kir</emphasis> (Kirghiz; Kyrgyz)
+<emphasis role="strong">kor</emphasis> (Korean)
+<emphasis role="strong">kur</emphasis> (Kurdish)
+<emphasis role="strong">lao</emphasis> (Lao)
+<emphasis role="strong">lat</emphasis> (Latin)
+<emphasis role="strong">lav</emphasis> (Latvian)
+<emphasis role="strong">lit</emphasis> (Lithuanian)
+<emphasis role="strong">mal</emphasis> (Malayalam)
+<emphasis role="strong">mar</emphasis> (Marathi)
+<emphasis role="strong">mkd</emphasis> (Macedonian)
+<emphasis role="strong">mlt</emphasis> (Maltese)
+<emphasis role="strong">msa</emphasis> (Malay)
+<emphasis role="strong">mya</emphasis> (Burmese)
+<emphasis role="strong">nep</emphasis> (Nepali)
+<emphasis role="strong">nld</emphasis> (Dutch; Flemish)
+<emphasis role="strong">nor</emphasis> (Norwegian)
+<emphasis role="strong">ori</emphasis> (Oriya)
+<emphasis role="strong">osd</emphasis> (Orientation and script detection module)
+<emphasis role="strong">pan</emphasis> (Panjabi; Punjabi)
+<emphasis role="strong">pol</emphasis> (Polish)
+<emphasis role="strong">por</emphasis> (Portuguese)
+<emphasis role="strong">pus</emphasis> (Pushto; Pashto)
+<emphasis role="strong">ron</emphasis> (Romanian; Moldavian; Moldovan)
+<emphasis role="strong">rus</emphasis> (Russian)
+<emphasis role="strong">san</emphasis> (Sanskrit)
+<emphasis role="strong">sin</emphasis> (Sinhala; Sinhalese)
+<emphasis role="strong">slk</emphasis> (Slovak)
+<emphasis role="strong">slk_frak</emphasis> (Slovak - Fraktur)
+<emphasis role="strong">slv</emphasis> (Slovenian)
+<emphasis role="strong">spa</emphasis> (Spanish; Castilian)
+<emphasis role="strong">spa_old</emphasis> (Spanish; Castilian - Old)
+<emphasis role="strong">sqi</emphasis> (Albanian)
+<emphasis role="strong">srp</emphasis> (Serbian)
+<emphasis role="strong">srp_latn</emphasis> (Serbian - Latin)
+<emphasis role="strong">swa</emphasis> (Swahili)
+<emphasis role="strong">swe</emphasis> (Swedish)
+<emphasis role="strong">syr</emphasis> (Syriac)
+<emphasis role="strong">tam</emphasis> (Tamil)
+<emphasis role="strong">tel</emphasis> (Telugu)
+<emphasis role="strong">tgk</emphasis> (Tajik)
+<emphasis role="strong">tgl</emphasis> (Tagalog)
+<emphasis role="strong">tha</emphasis> (Thai)
+<emphasis role="strong">tir</emphasis> (Tigrinya)
+<emphasis role="strong">tur</emphasis> (Turkish)
+<emphasis role="strong">uig</emphasis> (Uighur; Uyghur)
+<emphasis role="strong">ukr</emphasis> (Ukrainian)
+<emphasis role="strong">urd</emphasis> (Urdu)
+<emphasis role="strong">uzb</emphasis> (Uzbek)
+<emphasis role="strong">uzb_cyrl</emphasis> (Uzbek - Cyrilic)
+<emphasis role="strong">vie</emphasis> (Vietnamese)
+<emphasis role="strong">yid</emphasis> (Yiddish)</simpara>
+<simpara>To use a non-standard language pack named <emphasis role="strong">foo.traineddata</emphasis>, set the
+<emphasis role="strong">TESSDATA_PREFIX</emphasis> environment variable so the file can be found at
+<emphasis role="strong">TESSDATA_PREFIX</emphasis>/tessdata/<emphasis role="strong">foo</emphasis>.traineddata and give Tesseract the
+argument <emphasis>-l foo</emphasis>.</simpara>
+</refsect1>
+<refsect1 id="_config_files_and_augmenting_with_user_data">
+<title>CONFIG FILES AND AUGMENTING WITH USER DATA</title>
+<simpara>Tesseract config files consist of lines with variable-value pairs (space
+separated).  The variables are documented as flags in the source code like
+the following one in tesseractclass.h:</simpara>
+<simpara>STRING_VAR_H(tessedit_char_blacklist, "",
+             "Blacklist of chars not to recognize");</simpara>
+<simpara>These variables may enable or disable various features of the engine, and
+may cause it to load (or not load) various data.  For instance, let&#8217;s suppose
+you want to OCR in English, but suppress the normal dictionary and load an
+alternative word list and an alternative list of patterns&#8201;&#8212;&#8201;these two files
+are the most commonly used extra data files.</simpara>
+<simpara>If your language pack is in /path/to/eng.traineddata  and the hocr config
+is in /path/to/configs/hocr then create three new files:</simpara>
+<simpara>/path/to/eng.user-words:</simpara>
+<blockquote>
+<literallayout>the
+quick
+brown
+fox
+jumped</literallayout>
+</blockquote>
+<simpara>/path/to/eng.user-patterns:</simpara>
+<blockquote>
+<literallayout>1-\d\d\d-GOOG-411
+www.\n\\\*.com</literallayout>
+</blockquote>
+<simpara>/path/to/configs/bazaar:</simpara>
+<blockquote>
+<literallayout>load_system_dawg     F
+load_freq_dawg       F
+user_words_suffix    user-words
+user_patterns_suffix user-patterns</literallayout>
+</blockquote>
+<simpara>Now, if you pass the word <emphasis>bazaar</emphasis> as a trailing command line parameter
+to Tesseract, Tesseract will not bother loading the system dictionary nor
+the dictionary of frequent words and will load and use the eng.user-words
+and eng.user-patterns files you provided.  The former is a simple word list,
+one per line.  The format of the latter is documented in dict/trie.h
+on read_pattern_list().</simpara>
+</refsect1>
+<refsect1 id="_history">
+<title>HISTORY</title>
+<simpara>The engine was developed at Hewlett Packard Laboratories Bristol and at
+Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more
+changes made in 1996 to port to Windows, and some C++izing in 1998. A
+lot of the code was written in C, and then some more was written in C++.
+The C\++ code makes heavy use of a list system using macros. This predates
+stl, was portable before stl, and is more efficient than stl lists, but has
+the big negative that if you do get a segmentation violation, it is hard to
+debug.</simpara>
+<simpara>Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
+to train Tesseract.</simpara>
+<simpara>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy.
+See <ulink url="https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf">https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf</ulink>. With Tesseract 2.00,
+scripts are now included to allow anyone to reproduce some of these tests.
+See <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</ulink> for more
+details.</simpara>
+<simpara>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
+and Korean. It also introduces a new, single-file based system of managing
+language data.</simpara>
+<simpara>Tesseract 3.02 adds BiDirectional text support, the ability to recognize
+multiple languages in a single image, and improved layout analysis.</simpara>
+<simpara>For further details, see the file ReleaseNotes included with the distribution.</simpara>
+</refsect1>
+<refsect1 id="_resources">
+<title>RESOURCES</title>
+<simpara>Main web site: <ulink url="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</ulink><?asciidoc-br?>
+Information on training: <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1),
+shape_training(1), mftraining(1), unicharambigs(5), unicharset(5),
+unicharset_extractor(1), wordlist2dawg(1)</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>Tesseract development was led at Hewlett-Packard and Google by Ray Smith.
+The development team has included:</simpara>
+<simpara>Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger,
+Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke,
+Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle,
+Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel
+Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh
+Lloyd, Shobhit Saxena, and Thomas Kielbus.</simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/unicharambigs.5.asc b/doc/unicharambigs.5.asc
index 7ce25e4478..079f6d53de 100644
--- a/doc/unicharambigs.5.asc
+++ b/doc/unicharambigs.5.asc
@@ -38,7 +38,7 @@ EXAMPLE
 3       i i i   1       m     0
 ...............................
 
-In this example, all instances of the '2' character sequence '''' will 
+In this example, all instances of the '2' character sequence '''' will
 *always* be replaced by the '1' character sequence '"'; a '1' character
 sequence 'm' *may* be replaced by the '2' character sequence 'rn', and
 the '3' character sequence *may* be replaced by the '1' character
diff --git a/doc/unicharambigs.5.html b/doc/unicharambigs.5.html
index c6a645e69c..bb9fb291a3 100644
--- a/doc/unicharambigs.5.html
+++ b/doc/unicharambigs.5.html
@@ -1,875 +1,875 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>UNICHARAMBIGS(5)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-UNICHARAMBIGS(5) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>unicharambigs -
-   Tesseract unicharset ambiguities
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The unicharambigs file (a component of traineddata, see combine_tessdata(1) )
-is used by Tesseract to represent possible ambiguities between characters,
-or groups of characters.</p></div>
-<div class="paragraph"><p>The file contains a number of lines, laid out as follow:</p></div>
-<div class="literalblock">
-<div class="content">
-<pre><code>[num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num]</code></pre>
-</div></div>
-<div class="hdlist"><table>
-<tr>
-<td class="hdlist1">
-Field one
-<br />
-</td>
-<td class="hdlist2">
-<p style="margin-top: 0;">
-the number of characters contained in field two
-</p>
-</td>
-</tr>
-<tr>
-<td class="hdlist1">
-Field two
-<br />
-</td>
-<td class="hdlist2">
-<p style="margin-top: 0;">
-the character sequence to be replaced
-</p>
-</td>
-</tr>
-<tr>
-<td class="hdlist1">
-Field three
-<br />
-</td>
-<td class="hdlist2">
-<p style="margin-top: 0;">
-the number of characters contained in field four
-</p>
-</td>
-</tr>
-<tr>
-<td class="hdlist1">
-Field four
-<br />
-</td>
-<td class="hdlist2">
-<p style="margin-top: 0;">
-the character sequence used to replace field two
-</p>
-</td>
-</tr>
-<tr>
-<td class="hdlist1">
-Field five
-<br />
-</td>
-<td class="hdlist2">
-<p style="margin-top: 0;">
-contains either 1 or 0. 1 denotes a mandatory
-replacement, 0 denotes an optional replacement.
-</p>
-</td>
-</tr>
-</table></div>
-<div class="paragraph"><p>Characters appearing in fields two and four should appear in
-unicharset. The numbers in fields one and three refer to the
-number of unichars (not bytes).</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_example">EXAMPLE</h2>
-<div class="sectionbody">
-<div class="literalblock">
-<div class="content">
-<pre><code>2       ' '     1       "     1
-1       m       2       r n   0
-3       i i i   1       m     0</code></pre>
-</div></div>
-<div class="paragraph"><p>In this example, all instances of the <em>2</em> character sequence <em>'</em>' will
-<strong>always</strong> be replaced by the <em>1</em> character sequence <em>"</em>; a <em>1</em> character
-sequence <em>m</em> <strong>may</strong> be replaced by the <em>2</em> character sequence <em>rn</em>, and
-the <em>3</em> character sequence <strong>may</strong> be replaced by the <em>1</em> character
-sequence <em>m</em>.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_history">HISTORY</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The unicharambigs file first appeared in Tesseract 3.00; prior to that, a
-similar format, called DangAmbigs (<em>dangerous ambiguities</em>) was used: the
-format was almost identical, except only mandatory replacements could be
-specified, and field 5 was absent.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_bugs">BUGS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>This is a documentation "bug": it&#8217;s not currently clear what should be done
-in the case of ligatures (such as <em>fi</em>) which may also appear as regular
-letters in the unicharset.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-05-13 19:59:45 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>UNICHARAMBIGS(5)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+UNICHARAMBIGS(5) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>unicharambigs -
+   Tesseract unicharset ambiguities
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The unicharambigs file (a component of traineddata, see combine_tessdata(1) )
+is used by Tesseract to represent possible ambiguities between characters,
+or groups of characters.</p></div>
+<div class="paragraph"><p>The file contains a number of lines, laid out as follow:</p></div>
+<div class="literalblock">
+<div class="content">
+<pre><code>[num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num]</code></pre>
+</div></div>
+<div class="hdlist"><table>
+<tr>
+<td class="hdlist1">
+Field one
+<br />
+</td>
+<td class="hdlist2">
+<p style="margin-top: 0;">
+the number of characters contained in field two
+</p>
+</td>
+</tr>
+<tr>
+<td class="hdlist1">
+Field two
+<br />
+</td>
+<td class="hdlist2">
+<p style="margin-top: 0;">
+the character sequence to be replaced
+</p>
+</td>
+</tr>
+<tr>
+<td class="hdlist1">
+Field three
+<br />
+</td>
+<td class="hdlist2">
+<p style="margin-top: 0;">
+the number of characters contained in field four
+</p>
+</td>
+</tr>
+<tr>
+<td class="hdlist1">
+Field four
+<br />
+</td>
+<td class="hdlist2">
+<p style="margin-top: 0;">
+the character sequence used to replace field two
+</p>
+</td>
+</tr>
+<tr>
+<td class="hdlist1">
+Field five
+<br />
+</td>
+<td class="hdlist2">
+<p style="margin-top: 0;">
+contains either 1 or 0. 1 denotes a mandatory
+replacement, 0 denotes an optional replacement.
+</p>
+</td>
+</tr>
+</table></div>
+<div class="paragraph"><p>Characters appearing in fields two and four should appear in
+unicharset. The numbers in fields one and three refer to the
+number of unichars (not bytes).</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_example">EXAMPLE</h2>
+<div class="sectionbody">
+<div class="literalblock">
+<div class="content">
+<pre><code>2       ' '     1       "     1
+1       m       2       r n   0
+3       i i i   1       m     0</code></pre>
+</div></div>
+<div class="paragraph"><p>In this example, all instances of the <em>2</em> character sequence <em>'</em>' will
+<strong>always</strong> be replaced by the <em>1</em> character sequence <em>"</em>; a <em>1</em> character
+sequence <em>m</em> <strong>may</strong> be replaced by the <em>2</em> character sequence <em>rn</em>, and
+the <em>3</em> character sequence <strong>may</strong> be replaced by the <em>1</em> character
+sequence <em>m</em>.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_history">HISTORY</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The unicharambigs file first appeared in Tesseract 3.00; prior to that, a
+similar format, called DangAmbigs (<em>dangerous ambiguities</em>) was used: the
+format was almost identical, except only mandatory replacements could be
+specified, and field 5 was absent.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_bugs">BUGS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>This is a documentation "bug": it&#8217;s not currently clear what should be done
+in the case of ligatures (such as <em>fi</em>) which may also appear as regular
+letters in the unicharset.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-05-13 19:59:45 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/unicharambigs.5.xml b/doc/unicharambigs.5.xml
index 75b3c66431..cbc0f50e50 100644
--- a/doc/unicharambigs.5.xml
+++ b/doc/unicharambigs.5.xml
@@ -1,126 +1,126 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>UNICHARAMBIGS(5)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>unicharambigs</refentrytitle>
-<manvolnum>5</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>unicharambigs</refname>
-    <refpurpose>Tesseract unicharset ambiguities</refpurpose>
-</refnamediv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>The unicharambigs file (a component of traineddata, see combine_tessdata(1) )
-is used by Tesseract to represent possible ambiguities between characters,
-or groups of characters.</simpara>
-<simpara>The file contains a number of lines, laid out as follow:</simpara>
-<literallayout class="monospaced">[num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num]</literallayout>
-<informaltable tabstyle="horizontal" frame="none" colsep="0" rowsep="0"><tgroup cols="2"><colspec colwidth="15*"/><colspec colwidth="85*"/><tbody valign="top">
-<row>
-<entry>
-<simpara>
-Field one
-</simpara>
-</entry>
-<entry>
-<simpara>
-the number of characters contained in field two
-</simpara>
-</entry>
-</row>
-<row>
-<entry>
-<simpara>
-Field two
-</simpara>
-</entry>
-<entry>
-<simpara>
-the character sequence to be replaced
-</simpara>
-</entry>
-</row>
-<row>
-<entry>
-<simpara>
-Field three
-</simpara>
-</entry>
-<entry>
-<simpara>
-the number of characters contained in field four
-</simpara>
-</entry>
-</row>
-<row>
-<entry>
-<simpara>
-Field four
-</simpara>
-</entry>
-<entry>
-<simpara>
-the character sequence used to replace field two
-</simpara>
-</entry>
-</row>
-<row>
-<entry>
-<simpara>
-Field five
-</simpara>
-</entry>
-<entry>
-<simpara>
-contains either 1 or 0. 1 denotes a mandatory
-replacement, 0 denotes an optional replacement.
-</simpara>
-</entry>
-</row>
-</tbody></tgroup></informaltable>
-<simpara>Characters appearing in fields two and four should appear in
-unicharset. The numbers in fields one and three refer to the
-number of unichars (not bytes).</simpara>
-</refsect1>
-<refsect1 id="_example">
-<title>EXAMPLE</title>
-<literallayout class="monospaced">2       ' '     1       "     1
-1       m       2       r n   0
-3       i i i   1       m     0</literallayout>
-<simpara>In this example, all instances of the <emphasis>2</emphasis> character sequence <emphasis>'</emphasis>' will
-<emphasis role="strong">always</emphasis> be replaced by the <emphasis>1</emphasis> character sequence <emphasis>"</emphasis>; a <emphasis>1</emphasis> character
-sequence <emphasis>m</emphasis> <emphasis role="strong">may</emphasis> be replaced by the <emphasis>2</emphasis> character sequence <emphasis>rn</emphasis>, and
-the <emphasis>3</emphasis> character sequence <emphasis role="strong">may</emphasis> be replaced by the <emphasis>1</emphasis> character
-sequence <emphasis>m</emphasis>.</simpara>
-</refsect1>
-<refsect1 id="_history">
-<title>HISTORY</title>
-<simpara>The unicharambigs file first appeared in Tesseract 3.00; prior to that, a
-similar format, called DangAmbigs (<emphasis>dangerous ambiguities</emphasis>) was used: the
-format was almost identical, except only mandatory replacements could be
-specified, and field 5 was absent.</simpara>
-</refsect1>
-<refsect1 id="_bugs">
-<title>BUGS</title>
-<simpara>This is a documentation "bug": it&#8217;s not currently clear what should be done
-in the case of ligatures (such as <emphasis>fi</emphasis>) which may also appear as regular
-letters in the unicharset.</simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), unicharset(5)</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>UNICHARAMBIGS(5)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>unicharambigs</refentrytitle>
+<manvolnum>5</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>unicharambigs</refname>
+    <refpurpose>Tesseract unicharset ambiguities</refpurpose>
+</refnamediv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>The unicharambigs file (a component of traineddata, see combine_tessdata(1) )
+is used by Tesseract to represent possible ambiguities between characters,
+or groups of characters.</simpara>
+<simpara>The file contains a number of lines, laid out as follow:</simpara>
+<literallayout class="monospaced">[num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num]</literallayout>
+<informaltable tabstyle="horizontal" frame="none" colsep="0" rowsep="0"><tgroup cols="2"><colspec colwidth="15*"/><colspec colwidth="85*"/><tbody valign="top">
+<row>
+<entry>
+<simpara>
+Field one
+</simpara>
+</entry>
+<entry>
+<simpara>
+the number of characters contained in field two
+</simpara>
+</entry>
+</row>
+<row>
+<entry>
+<simpara>
+Field two
+</simpara>
+</entry>
+<entry>
+<simpara>
+the character sequence to be replaced
+</simpara>
+</entry>
+</row>
+<row>
+<entry>
+<simpara>
+Field three
+</simpara>
+</entry>
+<entry>
+<simpara>
+the number of characters contained in field four
+</simpara>
+</entry>
+</row>
+<row>
+<entry>
+<simpara>
+Field four
+</simpara>
+</entry>
+<entry>
+<simpara>
+the character sequence used to replace field two
+</simpara>
+</entry>
+</row>
+<row>
+<entry>
+<simpara>
+Field five
+</simpara>
+</entry>
+<entry>
+<simpara>
+contains either 1 or 0. 1 denotes a mandatory
+replacement, 0 denotes an optional replacement.
+</simpara>
+</entry>
+</row>
+</tbody></tgroup></informaltable>
+<simpara>Characters appearing in fields two and four should appear in
+unicharset. The numbers in fields one and three refer to the
+number of unichars (not bytes).</simpara>
+</refsect1>
+<refsect1 id="_example">
+<title>EXAMPLE</title>
+<literallayout class="monospaced">2       ' '     1       "     1
+1       m       2       r n   0
+3       i i i   1       m     0</literallayout>
+<simpara>In this example, all instances of the <emphasis>2</emphasis> character sequence <emphasis>'</emphasis>' will
+<emphasis role="strong">always</emphasis> be replaced by the <emphasis>1</emphasis> character sequence <emphasis>"</emphasis>; a <emphasis>1</emphasis> character
+sequence <emphasis>m</emphasis> <emphasis role="strong">may</emphasis> be replaced by the <emphasis>2</emphasis> character sequence <emphasis>rn</emphasis>, and
+the <emphasis>3</emphasis> character sequence <emphasis role="strong">may</emphasis> be replaced by the <emphasis>1</emphasis> character
+sequence <emphasis>m</emphasis>.</simpara>
+</refsect1>
+<refsect1 id="_history">
+<title>HISTORY</title>
+<simpara>The unicharambigs file first appeared in Tesseract 3.00; prior to that, a
+similar format, called DangAmbigs (<emphasis>dangerous ambiguities</emphasis>) was used: the
+format was almost identical, except only mandatory replacements could be
+specified, and field 5 was absent.</simpara>
+</refsect1>
+<refsect1 id="_bugs">
+<title>BUGS</title>
+<simpara>This is a documentation "bug": it&#8217;s not currently clear what should be done
+in the case of ligatures (such as <emphasis>fi</emphasis>) which may also appear as regular
+letters in the unicharset.</simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), unicharset(5)</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/unicharset.5.html b/doc/unicharset.5.html
index 0f16c9e5e5..f3c3e7a9fc 100644
--- a/doc/unicharset.5.html
+++ b/doc/unicharset.5.html
@@ -1,965 +1,965 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>UNICHARSET(5)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-UNICHARSET(5) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>unicharset -
-   character properties file used by tesseract(1)
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Tesseract&#8217;s unicharset file contains information on each symbol
-(unichar) the Tesseract OCR engine is trained to recognize.</p></div>
-<div class="paragraph"><p>A unicharset file (i.e. <em>eng.unicharset</em>) is distributed as part of a
-Tesseract language pack (i.e. <em>eng.traineddata</em>).  For information on
-extracting the unicharset file, see combine_tessdata(1).</p></div>
-<div class="paragraph"><p>The first line of a unicharset file contains the number of unichars in
-the file.  After this line, each subsequent line provides information for
-a single unichar.  The first such line contains a placeholder reserved for
-the space character.  Each unichar is referred to within Tesseract by its
-Unichar ID, which is the line number (minus 1) within the unicharset file.
-Therefore, space gets unichar 0.</p></div>
-<div class="paragraph"><p>Each unichar line in the unicharset file (v2+) may have four space-separated fields:</p></div>
-<div class="literalblock">
-<div class="content">
-<pre><code>'character' 'properties' 'script' 'id'</code></pre>
-</div></div>
-<div class="paragraph"><p>Starting with Tesseract v3.02, more information may be given for each unichar:</p></div>
-<div class="literalblock">
-<div class="content">
-<pre><code>'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</code></pre>
-</div></div>
-<div class="paragraph"><p>Entries:</p></div>
-<div class="dlist"><dl>
-<dt class="hdlist1">
-<em>character</em>
-</dt>
-<dd>
-<p>
-The UTF-8 encoded string to be produced for this unichar.
-</p>
-</dd>
-<dt class="hdlist1">
-<em>properties</em>
-</dt>
-<dd>
-<p>
-An integer mask of character properties, one per bit.
-    From least to most significant bit, these are: isalpha, islower, isupper,
-    isdigit, ispunctuation.
-</p>
-</dd>
-<dt class="hdlist1">
-<em>glyph_metrics</em>
-</dt>
-<dd>
-<p>
-Ten comma-separated integers representing various standards
-    for where this glyph is to be found within a baseline-normalized coordinate
-    system where 128 is normalized to x-height.
-</p>
-<div class="ulist"><ul>
-<li>
-<p>
-min_bottom, max_bottom: the ranges where the bottom of the character can
-    be found.
-</p>
-</li>
-<li>
-<p>
-min_top, max_top: the ranges where the top of the character may be found.
-</p>
-</li>
-<li>
-<p>
-min_width, max_width: horizontal width of the character.
-</p>
-</li>
-<li>
-<p>
-min_bearing, max_bearing: how far from the usual start position does the
-    leftmost part of the character begin.
-</p>
-</li>
-<li>
-<p>
-min_advance, max_advance: how far from the printer&#8217;s cell left do we
-    advance to begin the next character.
-</p>
-</li>
-</ul></div>
-</dd>
-<dt class="hdlist1">
-<em>script</em>
-</dt>
-<dd>
-<p>
-Name of the script (Latin, Common, Greek, Cyrillic, Han, null).
-</p>
-</dd>
-<dt class="hdlist1">
-<em>other_case</em>
-</dt>
-<dd>
-<p>
-The Unichar ID of the other case version of this character
-    (upper or lower).
-</p>
-</dd>
-<dt class="hdlist1">
-<em>direction</em>
-</dt>
-<dd>
-<p>
-The Unicode BiDi direction of this character, as defined by
-    ICU&#8217;s enum UCharDirection. (0 = Left to Right, 1 = Right to Left,
-    2 = European Number&#8230;)
-</p>
-</dd>
-<dt class="hdlist1">
-<em>mirror</em>
-</dt>
-<dd>
-<p>
-The Unichar ID of the BiDirectional mirror of this character.
-    For example the mirror of open paren is close paren, but Latin Capital C
-    has no mirror, so it remains a Latin Capital C.
-</p>
-</dd>
-<dt class="hdlist1">
-<em>normed_form</em>
-</dt>
-<dd>
-<p>
-The UTF-8 representation of a "normalized form" of this unichar
-    for the purpose of blaming a module for errors given ground truth text.
-    For instance, a left or right single quote may normalize to an ASCII quote.
-</p>
-</dd>
-</dl></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_example_v2">EXAMPLE (v2)</h2>
-<div class="sectionbody">
-<div class="literalblock">
-<div class="content">
-<pre><code>; 10 Common 46
-b 3 Latin 59
-W 5 Latin 40
-7 8 Common 66
-= 0 Common 93</code></pre>
-</div></div>
-<div class="paragraph"><p>";" is a punctuation character. Its properties are thus represented by the
-binary number 10000 (10 in hexadecimal).</p></div>
-<div class="paragraph"><p>"b" is an alphabetic character and a lower case character. Its properties are
-thus represented by the binary number 00011 (3 in hexadecimal).</p></div>
-<div class="paragraph"><p>"W" is an alphabetic character and an upper case character. Its properties are
-thus represented by the binary number 00101 (5 in hexadecimal).</p></div>
-<div class="paragraph"><p>"7" is just a digit. Its properties are thus represented by the binary number
-01000 (8 in hexadecimal).</p></div>
-<div class="paragraph"><p>"=" is not punctuation nor a digit nor an alphabetic character. Its properties
-are thus represented by the binary number 00000 (0 in hexadecimal).</p></div>
-<div class="paragraph"><p>Japanese or Chinese alphabetic character properties are represented by the
-binary number 00001 (1 in hexadecimal): they are alphabetic, but neither
-upper nor lower case.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_example_v3_02">EXAMPLE (v3.02)</h2>
-<div class="sectionbody">
-<div class="literalblock">
-<div class="content">
-<pre><code>110
-NULL 0 NULL 0
-N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
-Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
-1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
-9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
-a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
-. . .</code></pre>
-</div></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_caveats">CAVEATS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Although the unicharset reader maintains the ability to read unicharsets
-of older formats and will assign default values to missing fields,
-the accuracy will be degraded.</p></div>
-<div class="paragraph"><p>Further, most other data files are indexed by the unicharset file,
-so changing it without re-generating the others is likely to have dire
-consequences.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_history">HISTORY</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The unicharset format first appeared with Tesseract 2.00, which was the
-first version to support languages other than English. The unicharset file
-contained only the first two fields, and the "ispunctuation" property was
-absent (punctuation was regarded as "0", as "=" is in the above example.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</p></div>
-<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-12 23:52:34 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>UNICHARSET(5)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+UNICHARSET(5) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>unicharset -
+   character properties file used by tesseract(1)
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Tesseract&#8217;s unicharset file contains information on each symbol
+(unichar) the Tesseract OCR engine is trained to recognize.</p></div>
+<div class="paragraph"><p>A unicharset file (i.e. <em>eng.unicharset</em>) is distributed as part of a
+Tesseract language pack (i.e. <em>eng.traineddata</em>).  For information on
+extracting the unicharset file, see combine_tessdata(1).</p></div>
+<div class="paragraph"><p>The first line of a unicharset file contains the number of unichars in
+the file.  After this line, each subsequent line provides information for
+a single unichar.  The first such line contains a placeholder reserved for
+the space character.  Each unichar is referred to within Tesseract by its
+Unichar ID, which is the line number (minus 1) within the unicharset file.
+Therefore, space gets unichar 0.</p></div>
+<div class="paragraph"><p>Each unichar line in the unicharset file (v2+) may have four space-separated fields:</p></div>
+<div class="literalblock">
+<div class="content">
+<pre><code>'character' 'properties' 'script' 'id'</code></pre>
+</div></div>
+<div class="paragraph"><p>Starting with Tesseract v3.02, more information may be given for each unichar:</p></div>
+<div class="literalblock">
+<div class="content">
+<pre><code>'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</code></pre>
+</div></div>
+<div class="paragraph"><p>Entries:</p></div>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+<em>character</em>
+</dt>
+<dd>
+<p>
+The UTF-8 encoded string to be produced for this unichar.
+</p>
+</dd>
+<dt class="hdlist1">
+<em>properties</em>
+</dt>
+<dd>
+<p>
+An integer mask of character properties, one per bit.
+    From least to most significant bit, these are: isalpha, islower, isupper,
+    isdigit, ispunctuation.
+</p>
+</dd>
+<dt class="hdlist1">
+<em>glyph_metrics</em>
+</dt>
+<dd>
+<p>
+Ten comma-separated integers representing various standards
+    for where this glyph is to be found within a baseline-normalized coordinate
+    system where 128 is normalized to x-height.
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+min_bottom, max_bottom: the ranges where the bottom of the character can
+    be found.
+</p>
+</li>
+<li>
+<p>
+min_top, max_top: the ranges where the top of the character may be found.
+</p>
+</li>
+<li>
+<p>
+min_width, max_width: horizontal width of the character.
+</p>
+</li>
+<li>
+<p>
+min_bearing, max_bearing: how far from the usual start position does the
+    leftmost part of the character begin.
+</p>
+</li>
+<li>
+<p>
+min_advance, max_advance: how far from the printer&#8217;s cell left do we
+    advance to begin the next character.
+</p>
+</li>
+</ul></div>
+</dd>
+<dt class="hdlist1">
+<em>script</em>
+</dt>
+<dd>
+<p>
+Name of the script (Latin, Common, Greek, Cyrillic, Han, null).
+</p>
+</dd>
+<dt class="hdlist1">
+<em>other_case</em>
+</dt>
+<dd>
+<p>
+The Unichar ID of the other case version of this character
+    (upper or lower).
+</p>
+</dd>
+<dt class="hdlist1">
+<em>direction</em>
+</dt>
+<dd>
+<p>
+The Unicode BiDi direction of this character, as defined by
+    ICU&#8217;s enum UCharDirection. (0 = Left to Right, 1 = Right to Left,
+    2 = European Number&#8230;)
+</p>
+</dd>
+<dt class="hdlist1">
+<em>mirror</em>
+</dt>
+<dd>
+<p>
+The Unichar ID of the BiDirectional mirror of this character.
+    For example the mirror of open paren is close paren, but Latin Capital C
+    has no mirror, so it remains a Latin Capital C.
+</p>
+</dd>
+<dt class="hdlist1">
+<em>normed_form</em>
+</dt>
+<dd>
+<p>
+The UTF-8 representation of a "normalized form" of this unichar
+    for the purpose of blaming a module for errors given ground truth text.
+    For instance, a left or right single quote may normalize to an ASCII quote.
+</p>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_example_v2">EXAMPLE (v2)</h2>
+<div class="sectionbody">
+<div class="literalblock">
+<div class="content">
+<pre><code>; 10 Common 46
+b 3 Latin 59
+W 5 Latin 40
+7 8 Common 66
+= 0 Common 93</code></pre>
+</div></div>
+<div class="paragraph"><p>";" is a punctuation character. Its properties are thus represented by the
+binary number 10000 (10 in hexadecimal).</p></div>
+<div class="paragraph"><p>"b" is an alphabetic character and a lower case character. Its properties are
+thus represented by the binary number 00011 (3 in hexadecimal).</p></div>
+<div class="paragraph"><p>"W" is an alphabetic character and an upper case character. Its properties are
+thus represented by the binary number 00101 (5 in hexadecimal).</p></div>
+<div class="paragraph"><p>"7" is just a digit. Its properties are thus represented by the binary number
+01000 (8 in hexadecimal).</p></div>
+<div class="paragraph"><p>"=" is not punctuation nor a digit nor an alphabetic character. Its properties
+are thus represented by the binary number 00000 (0 in hexadecimal).</p></div>
+<div class="paragraph"><p>Japanese or Chinese alphabetic character properties are represented by the
+binary number 00001 (1 in hexadecimal): they are alphabetic, but neither
+upper nor lower case.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_example_v3_02">EXAMPLE (v3.02)</h2>
+<div class="sectionbody">
+<div class="literalblock">
+<div class="content">
+<pre><code>110
+NULL 0 NULL 0
+N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
+Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
+1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
+9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
+a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
+. . .</code></pre>
+</div></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_caveats">CAVEATS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Although the unicharset reader maintains the ability to read unicharsets
+of older formats and will assign default values to missing fields,
+the accuracy will be degraded.</p></div>
+<div class="paragraph"><p>Further, most other data files are indexed by the unicharset file,
+so changing it without re-generating the others is likely to have dire
+consequences.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_history">HISTORY</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The unicharset format first appeared with Tesseract 2.00, which was the
+first version to support languages other than English. The unicharset file
+contained only the first two fields, and the "ispunctuation" property was
+absent (punctuation was regarded as "0", as "=" is in the above example.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</p></div>
+<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-12 23:52:34 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/unicharset.5.xml b/doc/unicharset.5.xml
index 9ae6257e60..40e03c6eea 100644
--- a/doc/unicharset.5.xml
+++ b/doc/unicharset.5.xml
@@ -1,219 +1,219 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>UNICHARSET(5)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>unicharset</refentrytitle>
-<manvolnum>5</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>unicharset</refname>
-    <refpurpose>character properties file used by tesseract(1)</refpurpose>
-</refnamediv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>Tesseract&#8217;s unicharset file contains information on each symbol
-(unichar) the Tesseract OCR engine is trained to recognize.</simpara>
-<simpara>A unicharset file (i.e. <emphasis>eng.unicharset</emphasis>) is distributed as part of a
-Tesseract language pack (i.e. <emphasis>eng.traineddata</emphasis>).  For information on
-extracting the unicharset file, see combine_tessdata(1).</simpara>
-<simpara>The first line of a unicharset file contains the number of unichars in
-the file.  After this line, each subsequent line provides information for
-a single unichar.  The first such line contains a placeholder reserved for
-the space character.  Each unichar is referred to within Tesseract by its
-Unichar ID, which is the line number (minus 1) within the unicharset file.
-Therefore, space gets unichar 0.</simpara>
-<simpara>Each unichar line in the unicharset file (v2+) may have four space-separated fields:</simpara>
-<literallayout class="monospaced">'character' 'properties' 'script' 'id'</literallayout>
-<simpara>Starting with Tesseract v3.02, more information may be given for each unichar:</simpara>
-<literallayout class="monospaced">'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</literallayout>
-<simpara>Entries:</simpara>
-<variablelist>
-<varlistentry>
-<term>
-<emphasis>character</emphasis>
-</term>
-<listitem>
-<simpara>
-The UTF-8 encoded string to be produced for this unichar.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>properties</emphasis>
-</term>
-<listitem>
-<simpara>
-An integer mask of character properties, one per bit.
-    From least to most significant bit, these are: isalpha, islower, isupper,
-    isdigit, ispunctuation.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>glyph_metrics</emphasis>
-</term>
-<listitem>
-<simpara>
-Ten comma-separated integers representing various standards
-    for where this glyph is to be found within a baseline-normalized coordinate
-    system where 128 is normalized to x-height.
-</simpara>
-<itemizedlist>
-<listitem>
-<simpara>
-min_bottom, max_bottom: the ranges where the bottom of the character can
-    be found.
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-min_top, max_top: the ranges where the top of the character may be found.
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-min_width, max_width: horizontal width of the character.
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-min_bearing, max_bearing: how far from the usual start position does the
-    leftmost part of the character begin.
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-min_advance, max_advance: how far from the printer&#8217;s cell left do we
-    advance to begin the next character.
-</simpara>
-</listitem>
-</itemizedlist>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>script</emphasis>
-</term>
-<listitem>
-<simpara>
-Name of the script (Latin, Common, Greek, Cyrillic, Han, null).
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>other_case</emphasis>
-</term>
-<listitem>
-<simpara>
-The Unichar ID of the other case version of this character
-    (upper or lower).
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>direction</emphasis>
-</term>
-<listitem>
-<simpara>
-The Unicode BiDi direction of this character, as defined by
-    ICU&#8217;s enum UCharDirection. (0 = Left to Right, 1 = Right to Left,
-    2 = European Number&#8230;)
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>mirror</emphasis>
-</term>
-<listitem>
-<simpara>
-The Unichar ID of the BiDirectional mirror of this character.
-    For example the mirror of open paren is close paren, but Latin Capital C
-    has no mirror, so it remains a Latin Capital C.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis>normed_form</emphasis>
-</term>
-<listitem>
-<simpara>
-The UTF-8 representation of a "normalized form" of this unichar
-    for the purpose of blaming a module for errors given ground truth text.
-    For instance, a left or right single quote may normalize to an ASCII quote.
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_example_v2">
-<title>EXAMPLE (v2)</title>
-<literallayout class="monospaced">; 10 Common 46
-b 3 Latin 59
-W 5 Latin 40
-7 8 Common 66
-= 0 Common 93</literallayout>
-<simpara>";" is a punctuation character. Its properties are thus represented by the
-binary number 10000 (10 in hexadecimal).</simpara>
-<simpara>"b" is an alphabetic character and a lower case character. Its properties are
-thus represented by the binary number 00011 (3 in hexadecimal).</simpara>
-<simpara>"W" is an alphabetic character and an upper case character. Its properties are
-thus represented by the binary number 00101 (5 in hexadecimal).</simpara>
-<simpara>"7" is just a digit. Its properties are thus represented by the binary number
-01000 (8 in hexadecimal).</simpara>
-<simpara>"=" is not punctuation nor a digit nor an alphabetic character. Its properties
-are thus represented by the binary number 00000 (0 in hexadecimal).</simpara>
-<simpara>Japanese or Chinese alphabetic character properties are represented by the
-binary number 00001 (1 in hexadecimal): they are alphabetic, but neither
-upper nor lower case.</simpara>
-</refsect1>
-<refsect1 id="_example_v3_02">
-<title>EXAMPLE (v3.02)</title>
-<literallayout class="monospaced">110
-NULL 0 NULL 0
-N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
-Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
-1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
-9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
-a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
-. . .</literallayout>
-</refsect1>
-<refsect1 id="_caveats">
-<title>CAVEATS</title>
-<simpara>Although the unicharset reader maintains the ability to read unicharsets
-of older formats and will assign default values to missing fields,
-the accuracy will be degraded.</simpara>
-<simpara>Further, most other data files are indexed by the unicharset file,
-so changing it without re-generating the others is likely to have dire
-consequences.</simpara>
-</refsect1>
-<refsect1 id="_history">
-<title>HISTORY</title>
-<simpara>The unicharset format first appeared with Tesseract 2.00, which was the
-first version to support languages other than English. The unicharset file
-contained only the first two fields, and the "ispunctuation" property was
-absent (punctuation was regarded as "0", as "=" is in the above example.</simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</simpara>
-<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>UNICHARSET(5)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>unicharset</refentrytitle>
+<manvolnum>5</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>unicharset</refname>
+    <refpurpose>character properties file used by tesseract(1)</refpurpose>
+</refnamediv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>Tesseract&#8217;s unicharset file contains information on each symbol
+(unichar) the Tesseract OCR engine is trained to recognize.</simpara>
+<simpara>A unicharset file (i.e. <emphasis>eng.unicharset</emphasis>) is distributed as part of a
+Tesseract language pack (i.e. <emphasis>eng.traineddata</emphasis>).  For information on
+extracting the unicharset file, see combine_tessdata(1).</simpara>
+<simpara>The first line of a unicharset file contains the number of unichars in
+the file.  After this line, each subsequent line provides information for
+a single unichar.  The first such line contains a placeholder reserved for
+the space character.  Each unichar is referred to within Tesseract by its
+Unichar ID, which is the line number (minus 1) within the unicharset file.
+Therefore, space gets unichar 0.</simpara>
+<simpara>Each unichar line in the unicharset file (v2+) may have four space-separated fields:</simpara>
+<literallayout class="monospaced">'character' 'properties' 'script' 'id'</literallayout>
+<simpara>Starting with Tesseract v3.02, more information may be given for each unichar:</simpara>
+<literallayout class="monospaced">'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</literallayout>
+<simpara>Entries:</simpara>
+<variablelist>
+<varlistentry>
+<term>
+<emphasis>character</emphasis>
+</term>
+<listitem>
+<simpara>
+The UTF-8 encoded string to be produced for this unichar.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>properties</emphasis>
+</term>
+<listitem>
+<simpara>
+An integer mask of character properties, one per bit.
+    From least to most significant bit, these are: isalpha, islower, isupper,
+    isdigit, ispunctuation.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>glyph_metrics</emphasis>
+</term>
+<listitem>
+<simpara>
+Ten comma-separated integers representing various standards
+    for where this glyph is to be found within a baseline-normalized coordinate
+    system where 128 is normalized to x-height.
+</simpara>
+<itemizedlist>
+<listitem>
+<simpara>
+min_bottom, max_bottom: the ranges where the bottom of the character can
+    be found.
+</simpara>
+</listitem>
+<listitem>
+<simpara>
+min_top, max_top: the ranges where the top of the character may be found.
+</simpara>
+</listitem>
+<listitem>
+<simpara>
+min_width, max_width: horizontal width of the character.
+</simpara>
+</listitem>
+<listitem>
+<simpara>
+min_bearing, max_bearing: how far from the usual start position does the
+    leftmost part of the character begin.
+</simpara>
+</listitem>
+<listitem>
+<simpara>
+min_advance, max_advance: how far from the printer&#8217;s cell left do we
+    advance to begin the next character.
+</simpara>
+</listitem>
+</itemizedlist>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>script</emphasis>
+</term>
+<listitem>
+<simpara>
+Name of the script (Latin, Common, Greek, Cyrillic, Han, null).
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>other_case</emphasis>
+</term>
+<listitem>
+<simpara>
+The Unichar ID of the other case version of this character
+    (upper or lower).
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>direction</emphasis>
+</term>
+<listitem>
+<simpara>
+The Unicode BiDi direction of this character, as defined by
+    ICU&#8217;s enum UCharDirection. (0 = Left to Right, 1 = Right to Left,
+    2 = European Number&#8230;)
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>mirror</emphasis>
+</term>
+<listitem>
+<simpara>
+The Unichar ID of the BiDirectional mirror of this character.
+    For example the mirror of open paren is close paren, but Latin Capital C
+    has no mirror, so it remains a Latin Capital C.
+</simpara>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+<emphasis>normed_form</emphasis>
+</term>
+<listitem>
+<simpara>
+The UTF-8 representation of a "normalized form" of this unichar
+    for the purpose of blaming a module for errors given ground truth text.
+    For instance, a left or right single quote may normalize to an ASCII quote.
+</simpara>
+</listitem>
+</varlistentry>
+</variablelist>
+</refsect1>
+<refsect1 id="_example_v2">
+<title>EXAMPLE (v2)</title>
+<literallayout class="monospaced">; 10 Common 46
+b 3 Latin 59
+W 5 Latin 40
+7 8 Common 66
+= 0 Common 93</literallayout>
+<simpara>";" is a punctuation character. Its properties are thus represented by the
+binary number 10000 (10 in hexadecimal).</simpara>
+<simpara>"b" is an alphabetic character and a lower case character. Its properties are
+thus represented by the binary number 00011 (3 in hexadecimal).</simpara>
+<simpara>"W" is an alphabetic character and an upper case character. Its properties are
+thus represented by the binary number 00101 (5 in hexadecimal).</simpara>
+<simpara>"7" is just a digit. Its properties are thus represented by the binary number
+01000 (8 in hexadecimal).</simpara>
+<simpara>"=" is not punctuation nor a digit nor an alphabetic character. Its properties
+are thus represented by the binary number 00000 (0 in hexadecimal).</simpara>
+<simpara>Japanese or Chinese alphabetic character properties are represented by the
+binary number 00001 (1 in hexadecimal): they are alphabetic, but neither
+upper nor lower case.</simpara>
+</refsect1>
+<refsect1 id="_example_v3_02">
+<title>EXAMPLE (v3.02)</title>
+<literallayout class="monospaced">110
+NULL 0 NULL 0
+N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
+Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
+1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
+9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
+a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
+. . .</literallayout>
+</refsect1>
+<refsect1 id="_caveats">
+<title>CAVEATS</title>
+<simpara>Although the unicharset reader maintains the ability to read unicharsets
+of older formats and will assign default values to missing fields,
+the accuracy will be degraded.</simpara>
+<simpara>Further, most other data files are indexed by the unicharset file,
+so changing it without re-generating the others is likely to have dire
+consequences.</simpara>
+</refsect1>
+<refsect1 id="_history">
+<title>HISTORY</title>
+<simpara>The unicharset format first appeared with Tesseract 2.00, which was the
+first version to support languages other than English. The unicharset file
+contained only the first two fields, and the "ispunctuation" property was
+absent (punctuation was regarded as "0", as "=" is in the above example.</simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</simpara>
+<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/unicharset_extractor.1.asc b/doc/unicharset_extractor.1.asc
index c972783a8e..bde21ab3ba 100644
--- a/doc/unicharset_extractor.1.asc
+++ b/doc/unicharset_extractor.1.asc
@@ -11,9 +11,9 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-Tesseract needs to know the set of possible characters it can output. 
-To generate the unicharset data file, use the unicharset_extractor 
-program on the same training pages bounding box files as used for 
+Tesseract needs to know the set of possible characters it can output.
+To generate the unicharset data file, use the unicharset_extractor
+program on the same training pages bounding box files as used for
 clustering:
 
     unicharset_extractor fontfile_1.box fontfile_2.box ...
@@ -21,19 +21,19 @@ clustering:
 The unicharset will be put into the file 'dir/unicharset', or simply
 './unicharset' if no output directory is provided.
 
-Tesseract also needs to have access to character properties isalpha, 
-isdigit, isupper, islower, ispunctuation. all of this auxilury data 
+Tesseract also needs to have access to character properties isalpha,
+isdigit, isupper, islower, ispunctuation. all of this auxilury data
 and more is encoded in this file. (See unicharset(5))
 
-If your system supports the wctype functions, these values will be set 
-automatically by unicharset_extractor and there is no need to edit the 
-unicharset file. On some older systems (eg Windows 95), the unicharset 
+If your system supports the wctype functions, these values will be set
+automatically by unicharset_extractor and there is no need to edit the
+unicharset file. On some older systems (eg Windows 95), the unicharset
 file must be edited by hand to add these property description codes.
 
-*NOTE* The unicharset file must be regenerated whenever inttemp, normproto 
-and pffmtable are generated (i.e. they must all be recreated when the box 
-file is changed) as they have to be in sync. This is made easier than in 
-previous versions by running unicharset_extractor before mftraining and 
+*NOTE* The unicharset file must be regenerated whenever inttemp, normproto
+and pffmtable are generated (i.e. they must all be recreated when the box
+file is changed) as they have to be in sync. This is made easier than in
+previous versions by running unicharset_extractor before mftraining and
 cntraining, and giving the unicharset to mftraining.
 
 SEE ALSO
diff --git a/doc/unicharset_extractor.1.html b/doc/unicharset_extractor.1.html
index a6ac9e898b..6fdeb5e953 100644
--- a/doc/unicharset_extractor.1.html
+++ b/doc/unicharset_extractor.1.html
@@ -1,815 +1,815 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>UNICHARSET_EXTRACTOR(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-UNICHARSET_EXTRACTOR(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>unicharset_extractor -
-   extract unicharset from Tesseract boxfiles
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><strong>unicharset_extractor</strong> <em>[-D dir]</em> <em>FILE</em>&#8230;</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Tesseract needs to know the set of possible characters it can output.
-To generate the unicharset data file, use the unicharset_extractor
-program on the same training pages bounding box files as used for
-clustering:</p></div>
-<div class="literalblock">
-<div class="content">
-<pre><code>unicharset_extractor fontfile_1.box fontfile_2.box ...</code></pre>
-</div></div>
-<div class="paragraph"><p>The unicharset will be put into the file <em>dir/unicharset</em>, or simply
-<em>./unicharset</em> if no output directory is provided.</p></div>
-<div class="paragraph"><p>Tesseract also needs to have access to character properties isalpha,
-isdigit, isupper, islower, ispunctuation. all of this auxilury data
-and more is encoded in this file. (See unicharset(5))</p></div>
-<div class="paragraph"><p>If your system supports the wctype functions, these values will be set
-automatically by unicharset_extractor and there is no need to edit the
-unicharset file. On some older systems (eg Windows 95), the unicharset
-file must be edited by hand to add these property description codes.</p></div>
-<div class="paragraph"><p><strong>NOTE</strong> The unicharset file must be regenerated whenever inttemp, normproto
-and pffmtable are generated (i.e. they must all be recreated when the box
-file is changed) as they have to be in sync. This is made easier than in
-previous versions by running unicharset_extractor before mftraining and
-cntraining, and giving the unicharset to mftraining.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
-<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_history">HISTORY</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>unicharset_extractor first appeared in Tesseract 2.00.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Copyright (C) 2006, Google Inc.
-Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-12 23:52:38 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>UNICHARSET_EXTRACTOR(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+UNICHARSET_EXTRACTOR(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>unicharset_extractor -
+   extract unicharset from Tesseract boxfiles
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><strong>unicharset_extractor</strong> <em>[-D dir]</em> <em>FILE</em>&#8230;</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Tesseract needs to know the set of possible characters it can output.
+To generate the unicharset data file, use the unicharset_extractor
+program on the same training pages bounding box files as used for
+clustering:</p></div>
+<div class="literalblock">
+<div class="content">
+<pre><code>unicharset_extractor fontfile_1.box fontfile_2.box ...</code></pre>
+</div></div>
+<div class="paragraph"><p>The unicharset will be put into the file <em>dir/unicharset</em>, or simply
+<em>./unicharset</em> if no output directory is provided.</p></div>
+<div class="paragraph"><p>Tesseract also needs to have access to character properties isalpha,
+isdigit, isupper, islower, ispunctuation. all of this auxilury data
+and more is encoded in this file. (See unicharset(5))</p></div>
+<div class="paragraph"><p>If your system supports the wctype functions, these values will be set
+automatically by unicharset_extractor and there is no need to edit the
+unicharset file. On some older systems (eg Windows 95), the unicharset
+file must be edited by hand to add these property description codes.</p></div>
+<div class="paragraph"><p><strong>NOTE</strong> The unicharset file must be regenerated whenever inttemp, normproto
+and pffmtable are generated (i.e. they must all be recreated when the box
+file is changed) as they have to be in sync. This is made easier than in
+previous versions by running unicharset_extractor before mftraining and
+cntraining, and giving the unicharset to mftraining.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
+<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_history">HISTORY</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>unicharset_extractor first appeared in Tesseract 2.00.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (C) 2006, Google Inc.
+Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-12 23:52:38 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/unicharset_extractor.1.xml b/doc/unicharset_extractor.1.xml
index bea4d1e16e..45087a8c64 100644
--- a/doc/unicharset_extractor.1.xml
+++ b/doc/unicharset_extractor.1.xml
@@ -1,63 +1,63 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>UNICHARSET_EXTRACTOR(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>unicharset_extractor</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>unicharset_extractor</refname>
-    <refpurpose>extract unicharset from Tesseract boxfiles</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara><emphasis role="strong">unicharset_extractor</emphasis> <emphasis>[-D dir]</emphasis> <emphasis>FILE</emphasis>&#8230;</simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>Tesseract needs to know the set of possible characters it can output.
-To generate the unicharset data file, use the unicharset_extractor
-program on the same training pages bounding box files as used for
-clustering:</simpara>
-<literallayout class="monospaced">unicharset_extractor fontfile_1.box fontfile_2.box ...</literallayout>
-<simpara>The unicharset will be put into the file <emphasis>dir/unicharset</emphasis>, or simply
-<emphasis>./unicharset</emphasis> if no output directory is provided.</simpara>
-<simpara>Tesseract also needs to have access to character properties isalpha,
-isdigit, isupper, islower, ispunctuation. all of this auxilury data
-and more is encoded in this file. (See unicharset(5))</simpara>
-<simpara>If your system supports the wctype functions, these values will be set
-automatically by unicharset_extractor and there is no need to edit the
-unicharset file. On some older systems (eg Windows 95), the unicharset
-file must be edited by hand to add these property description codes.</simpara>
-<simpara><emphasis role="strong">NOTE</emphasis> The unicharset file must be regenerated whenever inttemp, normproto
-and pffmtable are generated (i.e. they must all be recreated when the box
-file is changed) as they have to be in sync. This is made easier than in
-previous versions by running unicharset_extractor before mftraining and
-cntraining, and giving the unicharset to mftraining.</simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), unicharset(5)</simpara>
-<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-</refsect1>
-<refsect1 id="_history">
-<title>HISTORY</title>
-<simpara>unicharset_extractor first appeared in Tesseract 2.00.</simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (C) 2006, Google Inc.
-Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>UNICHARSET_EXTRACTOR(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>unicharset_extractor</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>unicharset_extractor</refname>
+    <refpurpose>extract unicharset from Tesseract boxfiles</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara><emphasis role="strong">unicharset_extractor</emphasis> <emphasis>[-D dir]</emphasis> <emphasis>FILE</emphasis>&#8230;</simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>Tesseract needs to know the set of possible characters it can output.
+To generate the unicharset data file, use the unicharset_extractor
+program on the same training pages bounding box files as used for
+clustering:</simpara>
+<literallayout class="monospaced">unicharset_extractor fontfile_1.box fontfile_2.box ...</literallayout>
+<simpara>The unicharset will be put into the file <emphasis>dir/unicharset</emphasis>, or simply
+<emphasis>./unicharset</emphasis> if no output directory is provided.</simpara>
+<simpara>Tesseract also needs to have access to character properties isalpha,
+isdigit, isupper, islower, ispunctuation. all of this auxilury data
+and more is encoded in this file. (See unicharset(5))</simpara>
+<simpara>If your system supports the wctype functions, these values will be set
+automatically by unicharset_extractor and there is no need to edit the
+unicharset file. On some older systems (eg Windows 95), the unicharset
+file must be edited by hand to add these property description codes.</simpara>
+<simpara><emphasis role="strong">NOTE</emphasis> The unicharset file must be regenerated whenever inttemp, normproto
+and pffmtable are generated (i.e. they must all be recreated when the box
+file is changed) as they have to be in sync. This is made easier than in
+previous versions by running unicharset_extractor before mftraining and
+cntraining, and giving the unicharset to mftraining.</simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), unicharset(5)</simpara>
+<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+</refsect1>
+<refsect1 id="_history">
+<title>HISTORY</title>
+<simpara>unicharset_extractor first appeared in Tesseract 2.00.</simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (C) 2006, Google Inc.
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/doc/wordlist2dawg.1.html b/doc/wordlist2dawg.1.html
index 58e5cab4fa..733570511a 100644
--- a/doc/wordlist2dawg.1.html
+++ b/doc/wordlist2dawg.1.html
@@ -1,820 +1,820 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-<head>
-<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
-<meta name="generator" content="AsciiDoc 8.6.9" />
-<title>WORDLIST2DAWG(1)</title>
-<style type="text/css">
-/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
-
-/* Default font. */
-body {
-  font-family: Georgia,serif;
-}
-
-/* Title font. */
-h1, h2, h3, h4, h5, h6,
-div.title, caption.title,
-thead, p.table.header,
-#toctitle,
-#author, #revnumber, #revdate, #revremark,
-#footer {
-  font-family: Arial,Helvetica,sans-serif;
-}
-
-body {
-  margin: 1em 5% 1em 5%;
-}
-
-a {
-  color: blue;
-  text-decoration: underline;
-}
-a:visited {
-  color: fuchsia;
-}
-
-em {
-  font-style: italic;
-  color: navy;
-}
-
-strong {
-  font-weight: bold;
-  color: #083194;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  color: #527bbd;
-  margin-top: 1.2em;
-  margin-bottom: 0.5em;
-  line-height: 1.3;
-}
-
-h1, h2, h3 {
-  border-bottom: 2px solid silver;
-}
-h2 {
-  padding-top: 0.5em;
-}
-h3 {
-  float: left;
-}
-h3 + * {
-  clear: left;
-}
-h5 {
-  font-size: 1.0em;
-}
-
-div.sectionbody {
-  margin-left: 0;
-}
-
-hr {
-  border: 1px solid silver;
-}
-
-p {
-  margin-top: 0.5em;
-  margin-bottom: 0.5em;
-}
-
-ul, ol, li > p {
-  margin-top: 0;
-}
-ul > li     { color: #aaa; }
-ul > li > * { color: black; }
-
-.monospaced, code, pre {
-  font-family: "Courier New", Courier, monospace;
-  font-size: inherit;
-  color: navy;
-  padding: 0;
-  margin: 0;
-}
-pre {
-  white-space: pre-wrap;
-}
-
-#author {
-  color: #527bbd;
-  font-weight: bold;
-  font-size: 1.1em;
-}
-#email {
-}
-#revnumber, #revdate, #revremark {
-}
-
-#footer {
-  font-size: small;
-  border-top: 2px solid silver;
-  padding-top: 0.5em;
-  margin-top: 4.0em;
-}
-#footer-text {
-  float: left;
-  padding-bottom: 0.5em;
-}
-#footer-badges {
-  float: right;
-  padding-bottom: 0.5em;
-}
-
-#preamble {
-  margin-top: 1.5em;
-  margin-bottom: 1.5em;
-}
-div.imageblock, div.exampleblock, div.verseblock,
-div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
-div.admonitionblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.admonitionblock {
-  margin-top: 2.0em;
-  margin-bottom: 2.0em;
-  margin-right: 10%;
-  color: #606060;
-}
-
-div.content { /* Block element content. */
-  padding: 0;
-}
-
-/* Block element titles. */
-div.title, caption.title {
-  color: #527bbd;
-  font-weight: bold;
-  text-align: left;
-  margin-top: 1.0em;
-  margin-bottom: 0.5em;
-}
-div.title + * {
-  margin-top: 0;
-}
-
-td div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content div.title:first-child {
-  margin-top: 0.0em;
-}
-div.content + div.title {
-  margin-top: 0.0em;
-}
-
-div.sidebarblock > div.content {
-  background: #ffffee;
-  border: 1px solid #dddddd;
-  border-left: 4px solid #f0f0f0;
-  padding: 0.5em;
-}
-
-div.listingblock > div.content {
-  border: 1px solid #dddddd;
-  border-left: 5px solid #f0f0f0;
-  background: #f8f8f8;
-  padding: 0.5em;
-}
-
-div.quoteblock, div.verseblock {
-  padding-left: 1.0em;
-  margin-left: 1.0em;
-  margin-right: 10%;
-  border-left: 5px solid #f0f0f0;
-  color: #888;
-}
-
-div.quoteblock > div.attribution {
-  padding-top: 0.5em;
-  text-align: right;
-}
-
-div.verseblock > pre.content {
-  font-family: inherit;
-  font-size: inherit;
-}
-div.verseblock > div.attribution {
-  padding-top: 0.75em;
-  text-align: left;
-}
-/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
-div.verseblock + div.attribution {
-  text-align: left;
-}
-
-div.admonitionblock .icon {
-  vertical-align: top;
-  font-size: 1.1em;
-  font-weight: bold;
-  text-decoration: underline;
-  color: #527bbd;
-  padding-right: 0.5em;
-}
-div.admonitionblock td.content {
-  padding-left: 0.5em;
-  border-left: 3px solid #dddddd;
-}
-
-div.exampleblock > div.content {
-  border-left: 3px solid #dddddd;
-  padding-left: 0.5em;
-}
-
-div.imageblock div.content { padding-left: 0; }
-span.image img { border-style: none; vertical-align: text-bottom; }
-a.image:visited { color: white; }
-
-dl {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-dt {
-  margin-top: 0.5em;
-  margin-bottom: 0;
-  font-style: normal;
-  color: navy;
-}
-dd > *:first-child {
-  margin-top: 0.1em;
-}
-
-ul, ol {
-    list-style-position: outside;
-}
-ol.arabic {
-  list-style-type: decimal;
-}
-ol.loweralpha {
-  list-style-type: lower-alpha;
-}
-ol.upperalpha {
-  list-style-type: upper-alpha;
-}
-ol.lowerroman {
-  list-style-type: lower-roman;
-}
-ol.upperroman {
-  list-style-type: upper-roman;
-}
-
-div.compact ul, div.compact ol,
-div.compact p, div.compact p,
-div.compact div, div.compact div {
-  margin-top: 0.1em;
-  margin-bottom: 0.1em;
-}
-
-tfoot {
-  font-weight: bold;
-}
-td > div.verse {
-  white-space: pre;
-}
-
-div.hdlist {
-  margin-top: 0.8em;
-  margin-bottom: 0.8em;
-}
-div.hdlist tr {
-  padding-bottom: 15px;
-}
-dt.hdlist1.strong, td.hdlist1.strong {
-  font-weight: bold;
-}
-td.hdlist1 {
-  vertical-align: top;
-  font-style: normal;
-  padding-right: 0.8em;
-  color: navy;
-}
-td.hdlist2 {
-  vertical-align: top;
-}
-div.hdlist.compact tr {
-  margin: 0;
-  padding-bottom: 0;
-}
-
-.comment {
-  background: yellow;
-}
-
-.footnote, .footnoteref {
-  font-size: 0.8em;
-}
-
-span.footnote, span.footnoteref {
-  vertical-align: super;
-}
-
-#footnotes {
-  margin: 20px 0 20px 0;
-  padding: 7px 0 0 0;
-}
-
-#footnotes div.footnote {
-  margin: 0 0 5px 0;
-}
-
-#footnotes hr {
-  border: none;
-  border-top: 1px solid silver;
-  height: 1px;
-  text-align: left;
-  margin-left: 0;
-  width: 20%;
-  min-width: 100px;
-}
-
-div.colist td {
-  padding-right: 0.5em;
-  padding-bottom: 0.3em;
-  vertical-align: top;
-}
-div.colist td img {
-  margin-top: 0.3em;
-}
-
-@media print {
-  #footer-badges { display: none; }
-}
-
-#toc {
-  margin-bottom: 2.5em;
-}
-
-#toctitle {
-  color: #527bbd;
-  font-size: 1.1em;
-  font-weight: bold;
-  margin-top: 1.0em;
-  margin-bottom: 0.1em;
-}
-
-div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
-  margin-top: 0;
-  margin-bottom: 0;
-}
-div.toclevel2 {
-  margin-left: 2em;
-  font-size: 0.9em;
-}
-div.toclevel3 {
-  margin-left: 4em;
-  font-size: 0.9em;
-}
-div.toclevel4 {
-  margin-left: 6em;
-  font-size: 0.9em;
-}
-
-span.aqua { color: aqua; }
-span.black { color: black; }
-span.blue { color: blue; }
-span.fuchsia { color: fuchsia; }
-span.gray { color: gray; }
-span.green { color: green; }
-span.lime { color: lime; }
-span.maroon { color: maroon; }
-span.navy { color: navy; }
-span.olive { color: olive; }
-span.purple { color: purple; }
-span.red { color: red; }
-span.silver { color: silver; }
-span.teal { color: teal; }
-span.white { color: white; }
-span.yellow { color: yellow; }
-
-span.aqua-background { background: aqua; }
-span.black-background { background: black; }
-span.blue-background { background: blue; }
-span.fuchsia-background { background: fuchsia; }
-span.gray-background { background: gray; }
-span.green-background { background: green; }
-span.lime-background { background: lime; }
-span.maroon-background { background: maroon; }
-span.navy-background { background: navy; }
-span.olive-background { background: olive; }
-span.purple-background { background: purple; }
-span.red-background { background: red; }
-span.silver-background { background: silver; }
-span.teal-background { background: teal; }
-span.white-background { background: white; }
-span.yellow-background { background: yellow; }
-
-span.big { font-size: 2em; }
-span.small { font-size: 0.6em; }
-
-span.underline { text-decoration: underline; }
-span.overline { text-decoration: overline; }
-span.line-through { text-decoration: line-through; }
-
-div.unbreakable { page-break-inside: avoid; }
-
-
-/*
- * xhtml11 specific
- *
- * */
-
-div.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-div.tableblock > table {
-  border: 3px solid #527bbd;
-}
-thead, p.table.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.table {
-  margin-top: 0;
-}
-/* Because the table frame attribute is overriden by CSS in most browsers. */
-div.tableblock > table[frame="void"] {
-  border-style: none;
-}
-div.tableblock > table[frame="hsides"] {
-  border-left-style: none;
-  border-right-style: none;
-}
-div.tableblock > table[frame="vsides"] {
-  border-top-style: none;
-  border-bottom-style: none;
-}
-
-
-/*
- * html5 specific
- *
- * */
-
-table.tableblock {
-  margin-top: 1.0em;
-  margin-bottom: 1.5em;
-}
-thead, p.tableblock.header {
-  font-weight: bold;
-  color: #527bbd;
-}
-p.tableblock {
-  margin-top: 0;
-}
-table.tableblock {
-  border-width: 3px;
-  border-spacing: 0px;
-  border-style: solid;
-  border-color: #527bbd;
-  border-collapse: collapse;
-}
-th.tableblock, td.tableblock {
-  border-width: 1px;
-  padding: 4px;
-  border-style: solid;
-  border-color: #527bbd;
-}
-
-table.tableblock.frame-topbot {
-  border-left-style: hidden;
-  border-right-style: hidden;
-}
-table.tableblock.frame-sides {
-  border-top-style: hidden;
-  border-bottom-style: hidden;
-}
-table.tableblock.frame-none {
-  border-style: hidden;
-}
-
-th.tableblock.halign-left, td.tableblock.halign-left {
-  text-align: left;
-}
-th.tableblock.halign-center, td.tableblock.halign-center {
-  text-align: center;
-}
-th.tableblock.halign-right, td.tableblock.halign-right {
-  text-align: right;
-}
-
-th.tableblock.valign-top, td.tableblock.valign-top {
-  vertical-align: top;
-}
-th.tableblock.valign-middle, td.tableblock.valign-middle {
-  vertical-align: middle;
-}
-th.tableblock.valign-bottom, td.tableblock.valign-bottom {
-  vertical-align: bottom;
-}
-
-
-/*
- * manpage specific
- *
- * */
-
-body.manpage h1 {
-  padding-top: 0.5em;
-  padding-bottom: 0.5em;
-  border-top: 2px solid silver;
-  border-bottom: 2px solid silver;
-}
-body.manpage h2 {
-  border-style: none;
-}
-body.manpage div.sectionbody {
-  margin-left: 3em;
-}
-
-@media print {
-  body.manpage div#toc { display: none; }
-}
-
-
-</style>
-<script type="text/javascript">
-/*<![CDATA[*/
-var asciidoc = {  // Namespace.
-
-/////////////////////////////////////////////////////////////////////
-// Table Of Contents generator
-/////////////////////////////////////////////////////////////////////
-
-/* Author: Mihai Bazon, September 2002
- * http://students.infoiasi.ro/~mishoo
- *
- * Table Of Content generator
- * Version: 0.4
- *
- * Feel free to use this script under the terms of the GNU General Public
- * License, as long as you do not remove or alter this notice.
- */
-
- /* modified by Troy D. Hanson, September 2006. License: GPL */
- /* modified by Stuart Rackham, 2006, 2009. License: GPL */
-
-// toclevels = 1..4.
-toc: function (toclevels) {
-
-  function getText(el) {
-    var text = "";
-    for (var i = el.firstChild; i != null; i = i.nextSibling) {
-      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
-        text += i.data;
-      else if (i.firstChild != null)
-        text += getText(i);
-    }
-    return text;
-  }
-
-  function TocEntry(el, text, toclevel) {
-    this.element = el;
-    this.text = text;
-    this.toclevel = toclevel;
-  }
-
-  function tocEntries(el, toclevels) {
-    var result = new Array;
-    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
-    // Function that scans the DOM tree for header elements (the DOM2
-    // nodeIterator API would be a better technique but not supported by all
-    // browsers).
-    var iterate = function (el) {
-      for (var i = el.firstChild; i != null; i = i.nextSibling) {
-        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
-          var mo = re.exec(i.tagName);
-          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
-            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
-          }
-          iterate(i);
-        }
-      }
-    }
-    iterate(el);
-    return result;
-  }
-
-  var toc = document.getElementById("toc");
-  if (!toc) {
-    return;
-  }
-
-  // Delete existing TOC entries in case we're reloading the TOC.
-  var tocEntriesToRemove = [];
-  var i;
-  for (i = 0; i < toc.childNodes.length; i++) {
-    var entry = toc.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div'
-     && entry.getAttribute("class")
-     && entry.getAttribute("class").match(/^toclevel/))
-      tocEntriesToRemove.push(entry);
-  }
-  for (i = 0; i < tocEntriesToRemove.length; i++) {
-    toc.removeChild(tocEntriesToRemove[i]);
-  }
-
-  // Rebuild TOC entries.
-  var entries = tocEntries(document.getElementById("content"), toclevels);
-  for (var i = 0; i < entries.length; ++i) {
-    var entry = entries[i];
-    if (entry.element.id == "")
-      entry.element.id = "_toc_" + i;
-    var a = document.createElement("a");
-    a.href = "#" + entry.element.id;
-    a.appendChild(document.createTextNode(entry.text));
-    var div = document.createElement("div");
-    div.appendChild(a);
-    div.className = "toclevel" + entry.toclevel;
-    toc.appendChild(div);
-  }
-  if (entries.length == 0)
-    toc.parentNode.removeChild(toc);
-},
-
-
-/////////////////////////////////////////////////////////////////////
-// Footnotes generator
-/////////////////////////////////////////////////////////////////////
-
-/* Based on footnote generation code from:
- * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
- */
-
-footnotes: function () {
-  // Delete existing footnote entries in case we're reloading the footnodes.
-  var i;
-  var noteholder = document.getElementById("footnotes");
-  if (!noteholder) {
-    return;
-  }
-  var entriesToRemove = [];
-  for (i = 0; i < noteholder.childNodes.length; i++) {
-    var entry = noteholder.childNodes[i];
-    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
-      entriesToRemove.push(entry);
-  }
-  for (i = 0; i < entriesToRemove.length; i++) {
-    noteholder.removeChild(entriesToRemove[i]);
-  }
-
-  // Rebuild footnote entries.
-  var cont = document.getElementById("content");
-  var spans = cont.getElementsByTagName("span");
-  var refs = {};
-  var n = 0;
-  for (i=0; i<spans.length; i++) {
-    if (spans[i].className == "footnote") {
-      n++;
-      var note = spans[i].getAttribute("data-note");
-      if (!note) {
-        // Use [\s\S] in place of . so multi-line matches work.
-        // Because JavaScript has no s (dotall) regex flag.
-        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
-        spans[i].innerHTML =
-          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-        spans[i].setAttribute("data-note", note);
-      }
-      noteholder.innerHTML +=
-        "<div class='footnote' id='_footnote_" + n + "'>" +
-        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
-        n + "</a>. " + note + "</div>";
-      var id =spans[i].getAttribute("id");
-      if (id != null) refs["#"+id] = n;
-    }
-  }
-  if (n == 0)
-    noteholder.parentNode.removeChild(noteholder);
-  else {
-    // Process footnoterefs.
-    for (i=0; i<spans.length; i++) {
-      if (spans[i].className == "footnoteref") {
-        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
-        href = href.match(/#.*/)[0];  // Because IE return full URL.
-        n = refs[href];
-        spans[i].innerHTML =
-          "[<a href='#_footnote_" + n +
-          "' title='View footnote' class='footnote'>" + n + "</a>]";
-      }
-    }
-  }
-},
-
-install: function(toclevels) {
-  var timerId;
-
-  function reinstall() {
-    asciidoc.footnotes();
-    if (toclevels) {
-      asciidoc.toc(toclevels);
-    }
-  }
-
-  function reinstallAndRemoveTimer() {
-    clearInterval(timerId);
-    reinstall();
-  }
-
-  timerId = setInterval(reinstall, 500);
-  if (document.addEventListener)
-    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
-  else
-    window.onload = reinstallAndRemoveTimer;
-}
-
-}
-asciidoc.install();
-/*]]>*/
-</script>
-</head>
-<body class="manpage">
-<div id="header">
-<h1>
-WORDLIST2DAWG(1) Manual Page
-</h1>
-<h2>NAME</h2>
-<div class="sectionbody">
-<p>wordlist2dawg -
-   convert a wordlist to a DAWG for Tesseract
-</p>
-</div>
-</div>
-<div id="content">
-<div class="sect1">
-<h2 id="_synopsis">SYNOPSIS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><strong>wordlist2dawg</strong> <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
-<div class="paragraph"><p><strong>wordlist2dawg</strong> -t <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
-<div class="paragraph"><p><strong>wordlist2dawg</strong> -r 1 <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
-<div class="paragraph"><p><strong>wordlist2dawg</strong> -r 2 <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
-<div class="paragraph"><p><strong>wordlist2dawg</strong> -l &lt;short&gt; &lt;long&gt; <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_description">DESCRIPTION</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph
-(DAWG) for use with Tesseract.  A DAWG is a compressed, space and time
-efficient representation of a word list.</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_options">OPTIONS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>-t
-        Verify that a given dawg file is equivalent to a given wordlist.</p></div>
-<div class="paragraph"><p>-r 1
-        Reverse a word if it contains an RTL character.</p></div>
-<div class="paragraph"><p>-r 2
-        Reverse all words.</p></div>
-<div class="paragraph"><p>-l &lt;short&gt; &lt;long&gt;
-        Produce a file with several dawgs in it, one each for words
-        of length &lt;short&gt;, &lt;short+1&gt;,&#8230; &lt;long&gt;</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_arguments">ARGUMENTS</h2>
-<div class="sectionbody">
-<div class="paragraph"><p><em>WORDLIST</em>
-        A plain text file in UTF-8, one word per line.</p></div>
-<div class="paragraph"><p><em>DAWG</em>
-        The output DAWG to write.</p></div>
-<div class="paragraph"><p><em>lang.unicharset</em>
-        The unicharset of the language. This is the unicharset
-        generated by mftraining(1).</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_see_also">SEE ALSO</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</p></div>
-<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_copying">COPYING</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>Copyright (C) 2006 Google, Inc.
-Licensed under the Apache License, Version 2.0</p></div>
-</div>
-</div>
-<div class="sect1">
-<h2 id="_author">AUTHOR</h2>
-<div class="sectionbody">
-<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
-</div>
-</div>
-</div>
-<div id="footnotes"><hr /></div>
-<div id="footer">
-<div id="footer-text">
-Last updated 2015-06-12 23:52:50 CEST
-</div>
-</div>
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc 8.6.9" />
+<title>WORDLIST2DAWG(1)</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+  font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+  font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+  margin: 1em 5% 1em 5%;
+}
+
+a {
+  color: blue;
+  text-decoration: underline;
+}
+a:visited {
+  color: fuchsia;
+}
+
+em {
+  font-style: italic;
+  color: navy;
+}
+
+strong {
+  font-weight: bold;
+  color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  color: #527bbd;
+  margin-top: 1.2em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+
+h1, h2, h3 {
+  border-bottom: 2px solid silver;
+}
+h2 {
+  padding-top: 0.5em;
+}
+h3 {
+  float: left;
+}
+h3 + * {
+  clear: left;
+}
+h5 {
+  font-size: 1.0em;
+}
+
+div.sectionbody {
+  margin-left: 0;
+}
+
+hr {
+  border: 1px solid silver;
+}
+
+p {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+  margin-top: 0;
+}
+ul > li     { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+  font-family: "Courier New", Courier, monospace;
+  font-size: inherit;
+  color: navy;
+  padding: 0;
+  margin: 0;
+}
+pre {
+  white-space: pre-wrap;
+}
+
+#author {
+  color: #527bbd;
+  font-weight: bold;
+  font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+  font-size: small;
+  border-top: 2px solid silver;
+  padding-top: 0.5em;
+  margin-top: 4.0em;
+}
+#footer-text {
+  float: left;
+  padding-bottom: 0.5em;
+}
+#footer-badges {
+  float: right;
+  padding-bottom: 0.5em;
+}
+
+#preamble {
+  margin-top: 1.5em;
+  margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.admonitionblock {
+  margin-top: 2.0em;
+  margin-bottom: 2.0em;
+  margin-right: 10%;
+  color: #606060;
+}
+
+div.content { /* Block element content. */
+  padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+  color: #527bbd;
+  font-weight: bold;
+  text-align: left;
+  margin-top: 1.0em;
+  margin-bottom: 0.5em;
+}
+div.title + * {
+  margin-top: 0;
+}
+
+td div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content div.title:first-child {
+  margin-top: 0.0em;
+}
+div.content + div.title {
+  margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+  background: #ffffee;
+  border: 1px solid #dddddd;
+  border-left: 4px solid #f0f0f0;
+  padding: 0.5em;
+}
+
+div.listingblock > div.content {
+  border: 1px solid #dddddd;
+  border-left: 5px solid #f0f0f0;
+  background: #f8f8f8;
+  padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+  padding-left: 1.0em;
+  margin-left: 1.0em;
+  margin-right: 10%;
+  border-left: 5px solid #f0f0f0;
+  color: #888;
+}
+
+div.quoteblock > div.attribution {
+  padding-top: 0.5em;
+  text-align: right;
+}
+
+div.verseblock > pre.content {
+  font-family: inherit;
+  font-size: inherit;
+}
+div.verseblock > div.attribution {
+  padding-top: 0.75em;
+  text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+  text-align: left;
+}
+
+div.admonitionblock .icon {
+  vertical-align: top;
+  font-size: 1.1em;
+  font-weight: bold;
+  text-decoration: underline;
+  color: #527bbd;
+  padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+  padding-left: 0.5em;
+  border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+  border-left: 3px solid #dddddd;
+  padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+dt {
+  margin-top: 0.5em;
+  margin-bottom: 0;
+  font-style: normal;
+  color: navy;
+}
+dd > *:first-child {
+  margin-top: 0.1em;
+}
+
+ul, ol {
+    list-style-position: outside;
+}
+ol.arabic {
+  list-style-type: decimal;
+}
+ol.loweralpha {
+  list-style-type: lower-alpha;
+}
+ol.upperalpha {
+  list-style-type: upper-alpha;
+}
+ol.lowerroman {
+  list-style-type: lower-roman;
+}
+ol.upperroman {
+  list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+  margin-top: 0.1em;
+  margin-bottom: 0.1em;
+}
+
+tfoot {
+  font-weight: bold;
+}
+td > div.verse {
+  white-space: pre;
+}
+
+div.hdlist {
+  margin-top: 0.8em;
+  margin-bottom: 0.8em;
+}
+div.hdlist tr {
+  padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+  font-weight: bold;
+}
+td.hdlist1 {
+  vertical-align: top;
+  font-style: normal;
+  padding-right: 0.8em;
+  color: navy;
+}
+td.hdlist2 {
+  vertical-align: top;
+}
+div.hdlist.compact tr {
+  margin: 0;
+  padding-bottom: 0;
+}
+
+.comment {
+  background: yellow;
+}
+
+.footnote, .footnoteref {
+  font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+  vertical-align: super;
+}
+
+#footnotes {
+  margin: 20px 0 20px 0;
+  padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+  margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+  border: none;
+  border-top: 1px solid silver;
+  height: 1px;
+  text-align: left;
+  margin-left: 0;
+  width: 20%;
+  min-width: 100px;
+}
+
+div.colist td {
+  padding-right: 0.5em;
+  padding-bottom: 0.3em;
+  vertical-align: top;
+}
+div.colist td img {
+  margin-top: 0.3em;
+}
+
+@media print {
+  #footer-badges { display: none; }
+}
+
+#toc {
+  margin-bottom: 2.5em;
+}
+
+#toctitle {
+  color: #527bbd;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 1.0em;
+  margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+div.toclevel2 {
+  margin-left: 2em;
+  font-size: 0.9em;
+}
+div.toclevel3 {
+  margin-left: 4em;
+  font-size: 0.9em;
+}
+div.toclevel4 {
+  margin-left: 6em;
+  font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+div.tableblock > table {
+  border: 3px solid #527bbd;
+}
+thead, p.table.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.table {
+  margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+  border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+  border-left-style: none;
+  border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+  border-top-style: none;
+  border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+  margin-top: 1.0em;
+  margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+  font-weight: bold;
+  color: #527bbd;
+}
+p.tableblock {
+  margin-top: 0;
+}
+table.tableblock {
+  border-width: 3px;
+  border-spacing: 0px;
+  border-style: solid;
+  border-color: #527bbd;
+  border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+  border-width: 1px;
+  padding: 4px;
+  border-style: solid;
+  border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+  border-left-style: hidden;
+  border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+  border-top-style: hidden;
+  border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+  border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+  text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+  text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+  text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+  vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+  vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+  vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+  padding-top: 0.5em;
+  padding-bottom: 0.5em;
+  border-top: 2px solid silver;
+  border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+  border-style: none;
+}
+body.manpage div.sectionbody {
+  margin-left: 3em;
+}
+
+@media print {
+  body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = {  // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+  function getText(el) {
+    var text = "";
+    for (var i = el.firstChild; i != null; i = i.nextSibling) {
+      if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+        text += i.data;
+      else if (i.firstChild != null)
+        text += getText(i);
+    }
+    return text;
+  }
+
+  function TocEntry(el, text, toclevel) {
+    this.element = el;
+    this.text = text;
+    this.toclevel = toclevel;
+  }
+
+  function tocEntries(el, toclevels) {
+    var result = new Array;
+    var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+    // Function that scans the DOM tree for header elements (the DOM2
+    // nodeIterator API would be a better technique but not supported by all
+    // browsers).
+    var iterate = function (el) {
+      for (var i = el.firstChild; i != null; i = i.nextSibling) {
+        if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+          var mo = re.exec(i.tagName);
+          if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+            result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+          }
+          iterate(i);
+        }
+      }
+    }
+    iterate(el);
+    return result;
+  }
+
+  var toc = document.getElementById("toc");
+  if (!toc) {
+    return;
+  }
+
+  // Delete existing TOC entries in case we're reloading the TOC.
+  var tocEntriesToRemove = [];
+  var i;
+  for (i = 0; i < toc.childNodes.length; i++) {
+    var entry = toc.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div'
+     && entry.getAttribute("class")
+     && entry.getAttribute("class").match(/^toclevel/))
+      tocEntriesToRemove.push(entry);
+  }
+  for (i = 0; i < tocEntriesToRemove.length; i++) {
+    toc.removeChild(tocEntriesToRemove[i]);
+  }
+
+  // Rebuild TOC entries.
+  var entries = tocEntries(document.getElementById("content"), toclevels);
+  for (var i = 0; i < entries.length; ++i) {
+    var entry = entries[i];
+    if (entry.element.id == "")
+      entry.element.id = "_toc_" + i;
+    var a = document.createElement("a");
+    a.href = "#" + entry.element.id;
+    a.appendChild(document.createTextNode(entry.text));
+    var div = document.createElement("div");
+    div.appendChild(a);
+    div.className = "toclevel" + entry.toclevel;
+    toc.appendChild(div);
+  }
+  if (entries.length == 0)
+    toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+  // Delete existing footnote entries in case we're reloading the footnodes.
+  var i;
+  var noteholder = document.getElementById("footnotes");
+  if (!noteholder) {
+    return;
+  }
+  var entriesToRemove = [];
+  for (i = 0; i < noteholder.childNodes.length; i++) {
+    var entry = noteholder.childNodes[i];
+    if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+      entriesToRemove.push(entry);
+  }
+  for (i = 0; i < entriesToRemove.length; i++) {
+    noteholder.removeChild(entriesToRemove[i]);
+  }
+
+  // Rebuild footnote entries.
+  var cont = document.getElementById("content");
+  var spans = cont.getElementsByTagName("span");
+  var refs = {};
+  var n = 0;
+  for (i=0; i<spans.length; i++) {
+    if (spans[i].className == "footnote") {
+      n++;
+      var note = spans[i].getAttribute("data-note");
+      if (!note) {
+        // Use [\s\S] in place of . so multi-line matches work.
+        // Because JavaScript has no s (dotall) regex flag.
+        note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+        spans[i].innerHTML =
+          "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+        spans[i].setAttribute("data-note", note);
+      }
+      noteholder.innerHTML +=
+        "<div class='footnote' id='_footnote_" + n + "'>" +
+        "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+        n + "</a>. " + note + "</div>";
+      var id =spans[i].getAttribute("id");
+      if (id != null) refs["#"+id] = n;
+    }
+  }
+  if (n == 0)
+    noteholder.parentNode.removeChild(noteholder);
+  else {
+    // Process footnoterefs.
+    for (i=0; i<spans.length; i++) {
+      if (spans[i].className == "footnoteref") {
+        var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+        href = href.match(/#.*/)[0];  // Because IE return full URL.
+        n = refs[href];
+        spans[i].innerHTML =
+          "[<a href='#_footnote_" + n +
+          "' title='View footnote' class='footnote'>" + n + "</a>]";
+      }
+    }
+  }
+},
+
+install: function(toclevels) {
+  var timerId;
+
+  function reinstall() {
+    asciidoc.footnotes();
+    if (toclevels) {
+      asciidoc.toc(toclevels);
+    }
+  }
+
+  function reinstallAndRemoveTimer() {
+    clearInterval(timerId);
+    reinstall();
+  }
+
+  timerId = setInterval(reinstall, 500);
+  if (document.addEventListener)
+    document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+  else
+    window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="manpage">
+<div id="header">
+<h1>
+WORDLIST2DAWG(1) Manual Page
+</h1>
+<h2>NAME</h2>
+<div class="sectionbody">
+<p>wordlist2dawg -
+   convert a wordlist to a DAWG for Tesseract
+</p>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_synopsis">SYNOPSIS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><strong>wordlist2dawg</strong> <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
+<div class="paragraph"><p><strong>wordlist2dawg</strong> -t <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
+<div class="paragraph"><p><strong>wordlist2dawg</strong> -r 1 <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
+<div class="paragraph"><p><strong>wordlist2dawg</strong> -r 2 <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
+<div class="paragraph"><p><strong>wordlist2dawg</strong> -l &lt;short&gt; &lt;long&gt; <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_description">DESCRIPTION</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph
+(DAWG) for use with Tesseract.  A DAWG is a compressed, space and time
+efficient representation of a word list.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_options">OPTIONS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>-t
+        Verify that a given dawg file is equivalent to a given wordlist.</p></div>
+<div class="paragraph"><p>-r 1
+        Reverse a word if it contains an RTL character.</p></div>
+<div class="paragraph"><p>-r 2
+        Reverse all words.</p></div>
+<div class="paragraph"><p>-l &lt;short&gt; &lt;long&gt;
+        Produce a file with several dawgs in it, one each for words
+        of length &lt;short&gt;, &lt;short+1&gt;,&#8230; &lt;long&gt;</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_arguments">ARGUMENTS</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><em>WORDLIST</em>
+        A plain text file in UTF-8, one word per line.</p></div>
+<div class="paragraph"><p><em>DAWG</em>
+        The output DAWG to write.</p></div>
+<div class="paragraph"><p><em>lang.unicharset</em>
+        The unicharset of the language. This is the unicharset
+        generated by mftraining(1).</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_see_also">SEE ALSO</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</p></div>
+<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_copying">COPYING</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (C) 2006 Google, Inc.
+Licensed under the Apache License, Version 2.0</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_author">AUTHOR</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2015-06-12 23:52:50 CEST
+</div>
+</div>
+</body>
+</html>
diff --git a/doc/wordlist2dawg.1.xml b/doc/wordlist2dawg.1.xml
index 907d3a574d..bad256fe70 100644
--- a/doc/wordlist2dawg.1.xml
+++ b/doc/wordlist2dawg.1.xml
@@ -1,69 +1,69 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
-<?asciidoc-toc?>
-<?asciidoc-numbered?>
-<refentry lang="en">
-<refentryinfo>
-    <title>WORDLIST2DAWG(1)</title>
-</refentryinfo>
-<refmeta>
-<refentrytitle>wordlist2dawg</refentrytitle>
-<manvolnum>1</manvolnum>
-<refmiscinfo class="source">&#160;</refmiscinfo>
-<refmiscinfo class="manual">&#160;</refmiscinfo>
-</refmeta>
-<refnamediv>
-    <refname>wordlist2dawg</refname>
-    <refpurpose>convert a wordlist to a DAWG for Tesseract</refpurpose>
-</refnamediv>
-<refsynopsisdiv id="_synopsis">
-<simpara><emphasis role="strong">wordlist2dawg</emphasis> <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
-<simpara><emphasis role="strong">wordlist2dawg</emphasis> -t <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
-<simpara><emphasis role="strong">wordlist2dawg</emphasis> -r 1 <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
-<simpara><emphasis role="strong">wordlist2dawg</emphasis> -r 2 <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
-<simpara><emphasis role="strong">wordlist2dawg</emphasis> -l &lt;short&gt; &lt;long&gt; <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph
-(DAWG) for use with Tesseract.  A DAWG is a compressed, space and time
-efficient representation of a word list.</simpara>
-</refsect1>
-<refsect1 id="_options">
-<title>OPTIONS</title>
-<simpara>-t
-        Verify that a given dawg file is equivalent to a given wordlist.</simpara>
-<simpara>-r 1
-        Reverse a word if it contains an RTL character.</simpara>
-<simpara>-r 2
-        Reverse all words.</simpara>
-<simpara>-l &lt;short&gt; &lt;long&gt;
-        Produce a file with several dawgs in it, one each for words
-        of length &lt;short&gt;, &lt;short+1&gt;,&#8230; &lt;long&gt;</simpara>
-</refsect1>
-<refsect1 id="_arguments">
-<title>ARGUMENTS</title>
-<simpara><emphasis>WORDLIST</emphasis>
-        A plain text file in UTF-8, one word per line.</simpara>
-<simpara><emphasis>DAWG</emphasis>
-        The output DAWG to write.</simpara>
-<simpara><emphasis>lang.unicharset</emphasis>
-        The unicharset of the language. This is the unicharset
-        generated by mftraining(1).</simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</simpara>
-<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (C) 2006 Google, Inc.
-Licensed under the Apache License, Version 2.0</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
-at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
-</refsect1>
-</refentry>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refentryinfo>
+    <title>WORDLIST2DAWG(1)</title>
+</refentryinfo>
+<refmeta>
+<refentrytitle>wordlist2dawg</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&#160;</refmiscinfo>
+<refmiscinfo class="manual">&#160;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>wordlist2dawg</refname>
+    <refpurpose>convert a wordlist to a DAWG for Tesseract</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara><emphasis role="strong">wordlist2dawg</emphasis> <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
+<simpara><emphasis role="strong">wordlist2dawg</emphasis> -t <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
+<simpara><emphasis role="strong">wordlist2dawg</emphasis> -r 1 <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
+<simpara><emphasis role="strong">wordlist2dawg</emphasis> -r 2 <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
+<simpara><emphasis role="strong">wordlist2dawg</emphasis> -l &lt;short&gt; &lt;long&gt; <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph
+(DAWG) for use with Tesseract.  A DAWG is a compressed, space and time
+efficient representation of a word list.</simpara>
+</refsect1>
+<refsect1 id="_options">
+<title>OPTIONS</title>
+<simpara>-t
+        Verify that a given dawg file is equivalent to a given wordlist.</simpara>
+<simpara>-r 1
+        Reverse a word if it contains an RTL character.</simpara>
+<simpara>-r 2
+        Reverse all words.</simpara>
+<simpara>-l &lt;short&gt; &lt;long&gt;
+        Produce a file with several dawgs in it, one each for words
+        of length &lt;short&gt;, &lt;short+1&gt;,&#8230; &lt;long&gt;</simpara>
+</refsect1>
+<refsect1 id="_arguments">
+<title>ARGUMENTS</title>
+<simpara><emphasis>WORDLIST</emphasis>
+        A plain text file in UTF-8, one word per line.</simpara>
+<simpara><emphasis>DAWG</emphasis>
+        The output DAWG to write.</simpara>
+<simpara><emphasis>lang.unicharset</emphasis>
+        The unicharset of the language. This is the unicharset
+        generated by mftraining(1).</simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</simpara>
+<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (C) 2006 Google, Inc.
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+<refsect1 id="_author">
+<title>AUTHOR</title>
+<simpara>The Tesseract OCR engine was written by Ray Smith and his research groups
+at Hewlett Packard (1985-1995) and Google (2006-present).</simpara>
+</refsect1>
+</refentry>
diff --git a/java/Makefile.am b/java/Makefile.am
index fddbc6f9ec..af3b1885c3 100644
--- a/java/Makefile.am
+++ b/java/Makefile.am
@@ -36,19 +36,20 @@ SCROLLVIEW_CLASSES = \
 	com/google/scrollview/ScrollView.class
 
 SCROLLVIEW_LIBS = \
-	$(srcdir)/piccolo2d-core-3.0.jar \
-	$(srcdir)/piccolo2d-extras-3.0.jar
+	piccolo2d-core-3.0.jar \
+	piccolo2d-extras-3.0.jar
 
-CLASSPATH = $(srcdir)/piccolo2d-core-3.0.jar:$(srcdir)/piccolo2d-extras-3.0.jar
+CLASSPATH = piccolo2d-core-3.0.jar:piccolo2d-extras-3.0.jar
 
 ScrollView.jar : $(SCROLLVIEW_CLASSES)
-	$(JAR) cfm $@ Manifest.txt com/google/scrollview/*.class \
+	$(JAR) cfm $@ $(srcdir)/Manifest.txt com/google/scrollview/*.class \
            com/google/scrollview/events/*.class com/google/scrollview/ui/*.class
 
-$(SCROLLVIEW_CLASSES) : $(SCROLLVIEW_FILES)
+$(SCROLLVIEW_CLASSES) : $(SCROLLVIEW_FILES) $(SCROLLVIEW_LIBS)
 	$(JAVAC) -encoding UTF8 -sourcepath $(srcdir) -classpath $(CLASSPATH) $(SCROLLVIEW_FILES) -d $(builddir)
 
-fetch-jars :
+.PHONY: fetch-jars
+fetch-jars $(SCROLLVIEW_LIBS):
 	curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0/piccolo2d-core-3.0.jar > piccolo2d-core-3.0.jar
 	curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0/piccolo2d-extras-3.0.jar > piccolo2d-extras-3.0.jar
 
@@ -64,7 +65,7 @@ uninstall:
 endif
 
 clean :
-	rm -f ScrollView.jar *.class $(srcdir)/*.class
+	rm -f ScrollView.jar $(SCROLLVIEW_CLASSES)
 
 # all-am does nothing, to make the java part optional.
 all all-am install :
diff --git a/neural_networks/runtime/input_file_buffer.cpp b/neural_networks/runtime/input_file_buffer.cpp
index c3ca67b604..2ab6d1b341 100644
--- a/neural_networks/runtime/input_file_buffer.cpp
+++ b/neural_networks/runtime/input_file_buffer.cpp
@@ -4,12 +4,21 @@
 //
 // input_file_buffer.h: Declarations of a class for an object that
 // represents an input file buffer.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include <string>
 #include "input_file_buffer.h"
 
 namespace tesseract {
-// default and only contsructor
+// default and only constructor
 InputFileBuffer::InputFileBuffer(const string &file_name)
   : file_name_(file_name) {
   fp_ = NULL;
diff --git a/neural_networks/runtime/input_file_buffer.h b/neural_networks/runtime/input_file_buffer.h
index 5aa7465c41..51110c4753 100644
--- a/neural_networks/runtime/input_file_buffer.h
+++ b/neural_networks/runtime/input_file_buffer.h
@@ -5,6 +5,15 @@
 // input_file_buffer.h: Declarations of a class for an object that
 // represents an input file buffer.
 //
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #ifndef INPUT_FILE_BUFFER_H
 #define INPUT_FILE_BUFFER_H
diff --git a/neural_networks/runtime/neural_net.cpp b/neural_networks/runtime/neural_net.cpp
index fd2c65af45..30fa4f5704 100644
--- a/neural_networks/runtime/neural_net.cpp
+++ b/neural_networks/runtime/neural_net.cpp
@@ -4,6 +4,15 @@
 //
 // neural_net.cpp: Declarations of a class for an object that
 // represents an arbitrary network of neurons
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 //
 #include <vector>
 #include <string>
@@ -148,9 +157,6 @@ bool NeuralNet::CreateFastNet() {
       node->fan_in_cnt = neurons_[node_idx].fan_in_cnt();
       // allocate memory for fan-in nodes
       node->inputs = new WeightedNode[node->fan_in_cnt];
-      if (node->inputs == NULL) {
-        return false;
-      }
       for (int fan_in = 0; fan_in < node->fan_in_cnt; fan_in++) {
         // identify fan-in neuron
         const int id = neurons_[node_idx].fan_in(fan_in)->id();
@@ -213,9 +219,6 @@ NeuralNet *NeuralNet::FromFile(const string file_name) {
 NeuralNet *NeuralNet::FromInputBuffer(InputFileBuffer *ib) {
       // create a new net object
   NeuralNet *net_obj = new NeuralNet();
-  if (net_obj == NULL) {
-    return NULL;
-  }
       // load the net
   if (!net_obj->ReadBinary(ib)) {
     delete net_obj;
diff --git a/neural_networks/runtime/neural_net.h b/neural_networks/runtime/neural_net.h
index 91d0d68a24..ff3c35323e 100644
--- a/neural_networks/runtime/neural_net.h
+++ b/neural_networks/runtime/neural_net.h
@@ -5,6 +5,15 @@
 // neural_net.h: Declarations of a class for an object that
 // represents an arbitrary network of neurons
 //
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #ifndef NEURAL_NET_H
 #define NEURAL_NET_H
@@ -131,9 +140,6 @@ class NeuralNet {
       }
       // set the size of the neurons vector
       neurons_ = new Neuron[neuron_cnt_];
-      if (neurons_ == NULL) {
-        return false;
-      }
       // read & validate inputs
       if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
         return false;
diff --git a/neural_networks/runtime/neuron.cpp b/neural_networks/runtime/neuron.cpp
index 36309082d9..f1ff7b2316 100644
--- a/neural_networks/runtime/neuron.cpp
+++ b/neural_networks/runtime/neuron.cpp
@@ -4,6 +4,15 @@
 //
 // neuron.cpp: The implementation of a class for an object
 // that represents a single neuron in a neural network
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "neuron.h"
 #include "input_file_buffer.h"
diff --git a/neural_networks/runtime/neuron.h b/neural_networks/runtime/neuron.h
index a13d4a2eb7..8021902f3f 100644
--- a/neural_networks/runtime/neuron.h
+++ b/neural_networks/runtime/neuron.h
@@ -5,6 +5,15 @@
 // neuron.h: Declarations of a class for an object that
 // represents a single neuron in a neural network
 //
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #ifndef NEURON_H
 #define NEURON_H
diff --git a/neural_networks/runtime/sigmoid_table.cpp b/neural_networks/runtime/sigmoid_table.cpp
index f170a10844..0be73b6f8f 100644
--- a/neural_networks/runtime/sigmoid_table.cpp
+++ b/neural_networks/runtime/sigmoid_table.cpp
@@ -3,6 +3,15 @@
 // Author: ahmadab@google.com (Ahmad Abdulkader)
 //
 // sigmoid_table.cpp: Sigmoid function lookup table
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "neuron.h"
 
diff --git a/opencl/oclkernels.h b/opencl/oclkernels.h
index b3a8316b12..5a34fb7a22 100644
--- a/opencl/oclkernels.h
+++ b/opencl/oclkernels.h
@@ -1,4 +1,12 @@
-
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef _OCL_KERNEL_H_
 #define _OCL_KERNEL_H_
 #ifndef USE_EXTERNAL_KERNEL
@@ -50,7 +58,7 @@ KERNEL(
 )
 
 KERNEL(
-\n__kernel void pixSubtract(__global int *dword, __global int *sword, 
+\n__kernel void pixSubtract(__global int *dword, __global int *sword,
                             const int wpl, const int h, __global int *outword)
 {
     const unsigned int row = get_global_id(1);
@@ -105,15 +113,15 @@ KERNEL(
     unsigned int prevword, nextword, currword,tempword;
     unsigned int destword;
     const int col = pos % wpl;
-    
+
     //Ignore the execss
     if (pos >= (wpl * h))
         return;
-    
-    
-    currword = *(sword + pos);  
+
+
+    currword = *(sword + pos);
     destword = currword;
-    
+
     //Handle boundary conditions
     if(col==0)
         prevword=0;
@@ -124,9 +132,9 @@ KERNEL(
         nextword=0;
     else
         nextword = *(sword + pos + 1);
-    
+
     //Loop unrolled
-    
+
     //1 bit to left and 1 bit to right
         //Get the max value on LHS of every pixel
         tempword = (prevword << (31)) | ((currword >> 1));
@@ -142,10 +150,10 @@ KERNEL(
         //Get max value on RHS of every pixel
         tempword = (currword << 2) | (nextword >> (30));
         destword |= tempword;
-    
-    
+
+
     *(dword + pos) = destword;
-    
+
 }\n
 )
 
@@ -200,7 +208,7 @@ KERNEL(
     unsigned int destword, tempword, lastword, currword;
     unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
     int i, j, siter, eiter;
-    
+
     //Ignore the execss
     if (pos >= (wpl*h) || (xn < 1 && xp < 1))
         return;
@@ -225,7 +233,7 @@ KERNEL(
         firstword = 0x0;
     else
         firstword = *(sword + pos - 1);
-    
+
     //Get next word
     if (col == (wpl - 1))
         secondword = 0x0;
@@ -237,7 +245,7 @@ KERNEL(
     {
         //Get the max value on LHS of every pixel
         tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32-i)) | ((currword >> i));
-        
+
         destword |= tempword;
 
         //Get max value on RHS of every pixel
@@ -266,11 +274,11 @@ KERNEL(
     else
         firstword = *(sword + row*wpl + siter);
 
-    if (eiter >= wpl)   
+    if (eiter >= wpl)
         lastword = 0x0;
     else
         lastword = *(sword + row*wpl + eiter);
-    
+
     for ( i = 1; i < nwords; i++)
     {
         //Gets LHS words
@@ -280,14 +288,14 @@ KERNEL(
             secondword = *(sword + row*wpl + siter + i);
 
         lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
-        
+
         firstword = secondword;
 
         if ((siter + i + 1) < 0)
             secondword = 0x0;
         else
             secondword = *(sword + row*wpl + siter + i + 1);
-        
+
         lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
 
         //Gets RHS words
@@ -295,7 +303,7 @@ KERNEL(
             firstword = 0x0;
         else
             firstword = *(sword + row*wpl + eiter - i);
-            
+
         rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
 
         lastword = firstword;
@@ -325,7 +333,7 @@ KERNEL(
         lastword = firstword;
         firstword = secondword;
     }
-    
+
     *(dword + pos) = destword;
 }\n
 )
@@ -342,14 +350,14 @@ KERNEL(
     unsigned int prevword, nextword, currword,tempword;
     unsigned int destword;
     int i;
-    
+
     //Ignore the execss
     if (pos >= (wpl * h))
         return;
 
-    currword = *(sword + pos);  
+    currword = *(sword + pos);
     destword = currword;
-    
+
     //Handle boundary conditions
     if(col==0)
         prevword=0;
@@ -360,7 +368,7 @@ KERNEL(
         nextword=0;
     else
         nextword = *(sword + pos + 1);
-    
+
     for (i = 1; i <= halfwidth; i++)
     {
         //Get the max value on LHS of every pixel
@@ -377,7 +385,7 @@ KERNEL(
 
         //Get max value on RHS of every pixel
         tempword = (currword << i) | (nextword >> (32 - i));
-        
+
         destword |= tempword;
     }
 
@@ -397,7 +405,7 @@ KERNEL(
     unsigned int tempword;
     unsigned int destword;
     int i, siter, eiter;
-    
+
     //Ignore the execss
     if (row >= h || col >= wpl)
         return;
@@ -427,27 +435,27 @@ KERNEL(
     unsigned int prevword, nextword, currword,tempword;
     unsigned int destword;
     const int col = pos % wpl;
-    
+
     //Ignore the execss
     if (pos >= (wpl * h))
         return;
-    
-    currword = *(sword + pos);  
+
+    currword = *(sword + pos);
     destword = currword;
-    
+
     //Handle boundary conditions
     if(col==0)
         prevword=0xffffffff;
     else
         prevword = *(sword + pos - 1);
-    
+
     if(col==(wpl - 1))
         nextword=0xffffffff;
     else
         nextword = *(sword + pos + 1);
-    
+
     //Loop unrolled
-    
+
     //1 bit to left and 1 bit to right
         //Get the min value on LHS of every pixel
         tempword = (prevword << (31)) | ((currword >> 1));
@@ -463,10 +471,10 @@ KERNEL(
         //Get min value on RHS of every pixel
         tempword = (currword << 2) | (nextword >> (30));
         destword &= tempword;
-    
-    
+
+
     *(dword + pos) = destword;
-    
+
 }\n
 )
 
@@ -491,7 +499,7 @@ KERNEL(
     if (row < 2 || row >= (h - 2))
     {
         destword = 0x0;
-    }   
+    }
     else
     {
         //2 words above
@@ -518,7 +526,7 @@ KERNEL(
         tempword = *(sword + i*wpl + col);
         destword &= tempword;
 
-        if (col == 0) 
+        if (col == 0)
         {
             destword &= fwmask;
         }
@@ -534,7 +542,7 @@ KERNEL(
 )
 
 KERNEL(
-\n__kernel void morphoErodeHor(__global int *sword,__global int *dword, const int xp, const int xn, const int wpl, 
+\n__kernel void morphoErodeHor(__global int *sword,__global int *dword, const int xp, const int xn, const int wpl,
                                 const int h, const char isAsymmetric, const int rwmask, const int lwmask)
 {
     const int col = get_global_id(0);
@@ -569,7 +577,7 @@ KERNEL(
         firstword = 0xffffffff;
     else
         firstword = *(sword + pos - 1);
-    
+
     //Get next word
     if (col == (wpl - 1))
         secondword = 0xffffffff;
@@ -585,7 +593,7 @@ KERNEL(
 
         //Get max value on RHS of every pixel
         tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
-        
+
         //tempword = (currword << i) | (secondword >> (32 - i));
         destword &= tempword;
     }
@@ -614,18 +622,18 @@ KERNEL(
         *(dword + pos) = destword;
         return;
     }
-    
+
     if (siter < 0)
         firstword = 0xffffffff;
     else
         firstword = *(sword + row*wpl + siter);
 
-    if (eiter >= wpl)   
+    if (eiter >= wpl)
         lastword = 0xffffffff;
     else
         lastword = *(sword + row*wpl + eiter);
-    
-    
+
+
     for ( i = 1; i < nwords; i++)
     {
         //Gets LHS words
@@ -635,14 +643,14 @@ KERNEL(
             secondword = *(sword + row*wpl + siter + i);
 
         lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
-        
+
         firstword = secondword;
 
         if ((siter + i + 1) < 0)
             secondword = 0xffffffff;
         else
             secondword = *(sword + row*wpl + siter + i + 1);
-        
+
         lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
 
         //Gets RHS words
@@ -650,7 +658,7 @@ KERNEL(
             firstword = 0xffffffff;
         else
             firstword = *(sword + row*wpl + eiter - i);
-            
+
         rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
 
         lastword = firstword;
@@ -680,7 +688,7 @@ KERNEL(
         lastword = firstword;
         firstword = secondword;
     }
-    
+
     if (isAsymmetric)
     {
         //Clear boundary pixels
@@ -700,8 +708,8 @@ KERNEL(
 
 KERNEL(
 \n__kernel void morphoErodeHor_32word(__global int *sword,__global int *dword,
-                            const int halfwidth, const int wpl, 
-                            const int h, const char clearBoundPixH, 
+                            const int halfwidth, const int wpl,
+                            const int h, const char clearBoundPixH,
                             const int rwmask, const int lwmask,
                             const char isEven)
 {
@@ -715,25 +723,25 @@ KERNEL(
     if (pos >= (wpl * h))
         return;
 
-    currword = *(sword + pos);  
+    currword = *(sword + pos);
     destword = currword;
-    
+
     //Handle boundary conditions
     if(col==0)
         prevword=0xffffffff;
     else
         prevword = *(sword + pos - 1);
-    
+
     if(col==(wpl - 1))
         nextword=0xffffffff;
     else
         nextword = *(sword + pos + 1);
-    
+
     for (i = 1; i <= halfwidth; i++)
     {
         //Get the min value on LHS of every pixel
         tempword = (prevword << (32-i)) | ((currword >> i));
-        
+
         destword &= tempword;
 
         //Get min value on RHS of every pixel
@@ -751,7 +759,7 @@ KERNEL(
 
     if (clearBoundPixH)
     {
-        if (col == 0) 
+        if (col == 0)
         {
             destword &= rwmask;
         }
@@ -767,7 +775,7 @@ KERNEL(
 
 KERNEL(
 \n__kernel void morphoErodeVer(__global int *sword,__global int *dword,
-                            const int yp, 
+                            const int yp,
                             const int wpl, const int h,
                             const char clearBoundPixV, const int yn)
 {
@@ -776,7 +784,7 @@ KERNEL(
     const unsigned int pos = row * wpl + col;
     unsigned int tempword, destword;
     int i, siter, eiter;
-    
+
     //Ignore the execss
     if (row >= h || col >= wpl)
         return;
@@ -796,7 +804,7 @@ KERNEL(
 
     //Clear boundary pixels
     if (clearBoundPixV && ((row < yp) || ((h - row) <= yn)))
-    {   
+    {
         destword = 0x0;
     }
 
@@ -884,23 +892,23 @@ KERNEL(
 \n      __global const uchar* data,
 \n      uint numPixels,
 \n        __global uint *histBuffer) { // each wg will write HIST_SIZE*NUM_CHANNELS into this result; cpu will accumulate across wg's
-\n  
+\n
 \n      /* declare variables */
-\n  
+\n
 \n      // work indices
 \n      size_t groupId = get_group_id(0);
 \n      size_t localId = get_local_id(0); // 0 -> 256-1
 \n      size_t globalId = get_global_id(0); // 0 -> 8*10*256-1=20480-1
 \n      uint numThreads = get_global_size(0);
-\n  
+\n
 \n      /* accumulate in global memory */
 \n      for ( uint pc = get_global_id(0); pc < numPixels; pc += get_global_size(0) ) {
 \n          uchar value = data[ pc ];
 \n          int idx = value * get_global_size(0) + get_global_id(0);
 \n           histBuffer[ idx ]++;
-\n          
+\n
 \n      }
-\n      
+\n
 \n  } // kernel_HistogramRectAllChannels_Grey
 
 )
@@ -993,35 +1001,35 @@ void kernel_HistogramRectOneChannelReduction(
 
 KERNEL(
 // unused
-  // each work group (x256) handles a histogram bin 
+  // each work group (x256) handles a histogram bin
 \n  __attribute__((reqd_work_group_size(256, 1, 1)))
 \n  __kernel
 \n  void kernel_HistogramRectAllChannelsReduction_Grey(
 \n      int n, // pixel redundancy that needs to be accumulated
 \n      __global uint *histBuffer,
 \n      __global uint* histResult) { // each wg accumulates 1 bin
-\n  
+\n
 \n      /* declare variables */
-\n  
+\n
 \n      // work indices
 \n      size_t groupId = get_group_id(0);
 \n      size_t localId = get_local_id(0); // 0 -> 256-1
 \n      size_t globalId = get_global_id(0); // 0 -> 8*10*256-1=20480-1
 \n      uint numThreads = get_global_size(0);
 \n        unsigned int hist = 0;
-\n  
+\n
 \n      /* accumulate in global memory */
 \n      for ( uint p = 0; p < n; p+=GROUP_SIZE) {
 \n            hist += histBuffer[ (get_group_id(0)*n + p)];
 \n      }
-\n  
+\n
 \n      /* reduction in local memory */
 \n      // populate local memory
 \n      __local unsigned int localHist[GROUP_SIZE];
 
 \n      localHist[localId] = hist;
 \n      barrier(CLK_LOCAL_MEM_FENCE);
-\n  
+\n
 \n      for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
 \n          if (localId < stride) {
 \n              hist = localHist[ (localId+stride)];
@@ -1032,12 +1040,11 @@ KERNEL(
 \n          }
 \n          barrier(CLK_LOCAL_MEM_FENCE);
 \n      }
-\n  
+\n
 \n      if (localId == 0)
 \n          histResult[get_group_id(0)] = localHist[0];
-\n  
+\n
 \n  } // kernel_HistogramRectAllChannelsReduction_Grey
-
 )
 
 // ThresholdRectToPix Kernel
@@ -1092,7 +1099,8 @@ void kernel_ThresholdRectToPix(
                 for ( int c = 0; c < NUM_CHANNELS; c++) {
                     unsigned char pixChan = pixels.s[p*NUM_CHANNELS + c];
                     if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
-                        word |=  (((uint)0x80000000) >> ((b*PIXELS_PER_BURST+p)&31));
+                        const uint kTopBit = 0x80000000;
+                        word |=  (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
                     }
                 }
             }
@@ -1146,10 +1154,10 @@ void kernel_ThresholdRectToPix_OneChan(
 
             // for each pixel in burst
             for ( int p = 0; p < PIXELS_PER_BURST; p++) {
-                
+
                   //int littleEndianIdx = p ^ 3;
                   //int bigEndianIdx = p;
-                  int idx = 
+                  int idx =
 \n#ifdef __ENDIAN_LITTLE__\n
                   p ^ 3;
 \n#else\n
@@ -1157,52 +1165,50 @@ void kernel_ThresholdRectToPix_OneChan(
 \n#endif\n
                 unsigned char pixChan = pixels.s[idx];
                 if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
-                    word |=  (0x80000000 >> ((b*PIXELS_PER_BURST+p)&31));
+                    const uint kTopBit = 0x80000000;
+                    word |=  (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
                 }
             }
         }
         pix[w] = word;
     }
 }
-
 )
 
-
 KERNEL(
-\n#define RED_SHIFT		24\n
-\n#define GREEN_SHIFT		16\n
-\n#define BLUE_SHIFT		8\n
+\n#define RED_SHIFT             24\n
+\n#define GREEN_SHIFT           16\n
+\n#define BLUE_SHIFT            8\n
 \n#define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))\n
 \n
 \n__attribute__((reqd_work_group_size(256, 1, 1)))\n
 \n__kernel\n
 \nvoid kernel_RGBToGray(
     __global const unsigned int *srcData,
-	__global unsigned char *dstData,
+    __global unsigned char *dstData,
     int srcWPL,
     int dstWPL,
     int height,
     int width,
-	float rwt,
-	float gwt,
-	float bwt ) {
-    
+    float rwt,
+    float gwt,
+    float bwt ) {
+
     // pixel index
     int pixelIdx = get_global_id(0);
     if (pixelIdx >= height*width) return;
 
-	unsigned int word = srcData[pixelIdx];
-	int output =	(rwt * ((word >> RED_SHIFT)	  & 0xff) +
+    unsigned int word = srcData[pixelIdx];
+    int output =    (rwt * ((word >> RED_SHIFT)   & 0xff) +
                      gwt * ((word >> GREEN_SHIFT) & 0xff) +
-                     bwt * ((word >> BLUE_SHIFT)  & 0xff) + 0.5);
+                     bwt * ((word >> BLUE_SHIFT)  & 0xff) + 0.5f);
     // SET_DATA_BYTE
     dstData[pixelIdx] = output;
 }
 )
-#endif
 
  ; // close char*
 
-#endif // USE_EXTERNAL_KERNEL
-//#endif //_OCL_KERNEL_H_
+#endif  // USE_EXTERNAL_KERNEL
+#endif  //_OCL_KERNEL_H_
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/opencl_device_selection.h b/opencl/opencl_device_selection.h
index 74272b35c0..b595be62d4 100644
--- a/opencl/opencl_device_selection.h
+++ b/opencl/opencl_device_selection.h
@@ -1,3 +1,12 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifdef USE_OPENCL
 #ifndef DEVICE_SELECTION_H
 #define DEVICE_SELECTION_H
@@ -59,12 +68,12 @@ typedef struct {
 typedef ds_status (*ds_score_release)(void* score);
 static ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) {
   ds_status status = DS_SUCCESS;
-  if (profile!=NULL) {
-    if (profile->devices!=NULL && sr!=NULL) {
+  if (profile != NULL) {
+    if (profile->devices != NULL && sr != NULL) {
       unsigned int i;
       for (i = 0; i < profile->numDevices; i++) {
-        if (profile->devices[i].oclDeviceName) free(profile->devices[i].oclDeviceName);
-        if (profile->devices[i].oclDriverVersion) free(profile->devices[i].oclDriverVersion);
+        free(profile->devices[i].oclDeviceName);
+        free(profile->devices[i].oclDriverVersion);
         status = sr(profile->devices[i].score);
         if (status != DS_SUCCESS)
           break;
@@ -81,19 +90,15 @@ static ds_status initDSProfile(ds_profile** p, const char* version) {
   int numDevices;
   cl_uint numPlatforms;
   cl_platform_id* platforms = NULL;
-  cl_device_id*   devices = NULL;
+  cl_device_id* devices = NULL;
   ds_status status = DS_SUCCESS;
-  ds_profile* profile = NULL;
   unsigned int next;
   unsigned int i;
 
-  if (p == NULL)
-    return DS_INVALID_PROFILE;
+  if (p == NULL) return DS_INVALID_PROFILE;
+  ds_profile* profile = (ds_profile*)malloc(sizeof(ds_profile));
+  if (profile == NULL) return DS_MEMORY_ERROR;
 
-  profile = (ds_profile*)malloc(sizeof(ds_profile));
-  if (profile == NULL)
-    return DS_MEMORY_ERROR;
-  
   memset(profile, 0, sizeof(ds_profile));
 
   clGetPlatformIDs(0, NULL, &numPlatforms);
@@ -123,7 +128,8 @@ static ds_status initDSProfile(ds_profile** p, const char* version) {
   }
 
   profile->numDevices = numDevices+1;     // +1 to numDevices to include the native CPU
-  profile->devices = (ds_device*)malloc(profile->numDevices*sizeof(ds_device));    
+  profile->devices =
+      (ds_device*)malloc(profile->numDevices * sizeof(ds_device));
   if (profile->devices == NULL) {
     profile->numDevices = 0;
     status = DS_MEMORY_ERROR;
@@ -143,14 +149,14 @@ static ds_status initDSProfile(ds_profile** p, const char* version) {
       profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE;
       profile->devices[next].oclDeviceID = devices[j];
 
-      clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME
-        , DS_DEVICE_NAME_LENGTH, &buffer, NULL);
+      clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME,
+                      DS_DEVICE_NAME_LENGTH, &buffer, NULL);
       length = strlen(buffer);
       profile->devices[next].oclDeviceName = (char*)malloc(length+1);
       memcpy(profile->devices[next].oclDeviceName, buffer, length+1);
 
-      clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION
-        , DS_DEVICE_NAME_LENGTH, &buffer, NULL);
+      clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION,
+                      DS_DEVICE_NAME_LENGTH, &buffer, NULL);
       length = strlen(buffer);
       profile->devices[next].oclDriverVersion = (char*)malloc(length+1);
       memcpy(profile->devices[next].oclDriverVersion, buffer, length+1);
@@ -160,15 +166,14 @@ static ds_status initDSProfile(ds_profile** p, const char* version) {
   profile->version = version;
 
 cleanup:
-  if (platforms)  free(platforms);
-  if (devices)    free(devices);
+  free(platforms);
+  free(devices);
   if (status == DS_SUCCESS) {
     *p = profile;
   }
   else {
     if (profile) {
-      if (profile->devices)
-        free(profile->devices);
+      free(profile->devices);
       free(profile);
     }
   }
@@ -206,8 +211,7 @@ static ds_status profileDevices(ds_profile* profile,
     
     switch (type) {
     case DS_EVALUATE_NEW_ONLY:
-      if (profile->devices[i].score != NULL)
-        break;
+      if (profile->devices[i].score != NULL) break;
       //  else fall through
     case DS_EVALUATE_ALL:
       evaluatorStatus = evaluator(profile->devices+i, evaluatorData);
@@ -252,14 +256,11 @@ static ds_status writeProfileToFile(ds_profile* profile,
                                     ds_score_serializer serializer,
                                     const char* file) {
   ds_status status = DS_SUCCESS;
-  FILE* profileFile = NULL;
 
+  if (profile == NULL) return DS_INVALID_PROFILE;
 
-  if (profile == NULL)
-    return DS_INVALID_PROFILE;
-
-  profileFile = fopen(file, "wb");
-  if (profileFile==NULL) {
+  FILE* profileFile = fopen(file, "wb");
+  if (profileFile == NULL) {
     status = DS_FILE_ERROR;
   }
   else {
@@ -322,7 +323,8 @@ static ds_status writeProfileToFile(ds_profile* profile,
       fwrite(DS_TAG_SCORE, sizeof(char), strlen(DS_TAG_SCORE), profileFile);
       status = serializer(profile->devices+i, &serializedScore,
                           &serializedScoreSize);
-      if (status == DS_SUCCESS && serializedScore!=NULL && serializedScoreSize > 0) {
+      if (status == DS_SUCCESS && serializedScore != NULL &&
+          serializedScoreSize > 0) {
         fwrite(serializedScore, sizeof(char), serializedScoreSize, profileFile);
         free(serializedScore);
       }
@@ -338,23 +340,21 @@ static ds_status writeProfileToFile(ds_profile* profile,
 
 static ds_status readProFile(const char* fileName, char** content,
                              size_t* contentSize) {
-  FILE * input = NULL;
   size_t size = 0;
-  char* binary = NULL;
 
   *contentSize = 0;
   *content = NULL;
 
-  input = fopen(fileName, "rb");
-  if(input == NULL) {
+  FILE* input = fopen(fileName, "rb");
+  if (input == NULL) {
     return DS_FILE_ERROR;
   }
 
   fseek(input, 0L, SEEK_END); 
   size = ftell(input);
   rewind(input);
-  binary = (char*)malloc(size);
-  if(binary == NULL) {
+  char* binary = (char*)malloc(size);
+  if (binary == NULL) {
     fclose(input);
     return DS_FILE_ERROR;
   }
@@ -401,8 +401,7 @@ static ds_status readProfileFromFile(ds_profile* profile,
   const char* contentEnd = NULL;
   size_t contentSize;
 
-  if (profile==NULL)
-    return DS_INVALID_PROFILE;
+  if (profile == NULL) return DS_INVALID_PROFILE;
 
   status = readProFile(file, &contentStart, &contentSize);
   if (status == DS_SUCCESS) {
@@ -424,7 +423,7 @@ static ds_status readProfileFromFile(ds_profile* profile,
     dataStart += strlen(DS_TAG_VERSION);
 
     dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END);
-    if (dataEnd==NULL) {
+    if (dataEnd == NULL) {
       status = DS_PROFILE_FILE_ERROR;
       goto cleanup;
     }
@@ -456,27 +455,27 @@ static ds_status readProfileFromFile(ds_profile* profile,
       const char* deviceDriverEnd;
 
       dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE);
-      if (dataStart==NULL) {
+      if (dataStart == NULL) {
         // nothing useful remain, quit...
         break;
       }
       dataStart+=strlen(DS_TAG_DEVICE);
       dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END);
-      if (dataEnd==NULL) {
+      if (dataEnd == NULL) {
         status = DS_PROFILE_FILE_ERROR;
         goto cleanup;
       }
 
       // parse the device type
       deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE);
-      if (deviceTypeStart==NULL) {
+      if (deviceTypeStart == NULL) {
         status = DS_PROFILE_FILE_ERROR;
         goto cleanup;       
       }
       deviceTypeStart+=strlen(DS_TAG_DEVICE_TYPE);
       deviceTypeEnd = findString(deviceTypeStart, contentEnd,
                                  DS_TAG_DEVICE_TYPE_END);
-      if (deviceTypeEnd==NULL) {
+      if (deviceTypeEnd == NULL) {
         status = DS_PROFILE_FILE_ERROR;
         goto cleanup;
       }
@@ -487,14 +486,14 @@ static ds_status readProfileFromFile(ds_profile* profile,
       if (deviceType == DS_DEVICE_OPENCL_DEVICE) {
 
         deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME);
-        if (deviceNameStart==NULL) {
+        if (deviceNameStart == NULL) {
           status = DS_PROFILE_FILE_ERROR;
           goto cleanup;       
         }
         deviceNameStart+=strlen(DS_TAG_DEVICE_NAME);
         deviceNameEnd = findString(deviceNameStart, contentEnd,
                                    DS_TAG_DEVICE_NAME_END);
-        if (deviceNameEnd==NULL) {
+        if (deviceNameEnd == NULL) {
           status = DS_PROFILE_FILE_ERROR;
           goto cleanup;       
         }
@@ -502,19 +501,18 @@ static ds_status readProfileFromFile(ds_profile* profile,
 
         deviceDriverStart = findString(dataStart, contentEnd,
                                        DS_TAG_DEVICE_DRIVER_VERSION);
-        if (deviceDriverStart==NULL) {
+        if (deviceDriverStart == NULL) {
           status = DS_PROFILE_FILE_ERROR;
           goto cleanup;       
         }
         deviceDriverStart+=strlen(DS_TAG_DEVICE_DRIVER_VERSION);
         deviceDriverEnd = findString(deviceDriverStart, contentEnd,
                                      DS_TAG_DEVICE_DRIVER_VERSION_END);
-        if (deviceDriverEnd ==NULL) {
+        if (deviceDriverEnd == NULL) {
           status = DS_PROFILE_FILE_ERROR;
           goto cleanup;       
         }
 
-
         // check if this device is on the system
         for (i = 0; i < profile->numDevices; i++) {
           if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE) {
@@ -530,7 +528,7 @@ static ds_status readProfileFromFile(ds_profile* profile,
                && strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart,
                           driverVersionLength)==0) {
               deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE);
-              if (deviceNameStart==NULL) {
+              if (deviceNameStart == NULL) {
                 status = DS_PROFILE_FILE_ERROR;
                 goto cleanup;       
               }
@@ -552,7 +550,7 @@ static ds_status readProfileFromFile(ds_profile* profile,
         for (i = 0; i < profile->numDevices; i++) {
           if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU) {
             deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE);
-            if (deviceScoreStart==NULL) {
+            if (deviceScoreStart == NULL) {
               status = DS_PROFILE_FILE_ERROR;
               goto cleanup;       
             }
@@ -574,23 +572,9 @@ static ds_status readProfileFromFile(ds_profile* profile,
     }
   }
 cleanup:
-  if (contentStart!=NULL) free(contentStart);
+  free(contentStart);
   return status;
 }
 
-static ds_status getNumDeviceWithEmptyScore(ds_profile* profile,
-                                            unsigned int* num) {
-  unsigned int i;
-  if (profile == NULL || num==NULL)
-    return DS_MEMORY_ERROR;
-  *num=0;
-  for (i = 0; i < profile->numDevices; i++) {
-    if (profile->devices[i].score == NULL) {
-      *num++;
-    }
-  }
-  return DS_SUCCESS;
-}
-
 #endif
 #endif
diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp
index f35fcd439d..c3bb745127 100644
--- a/opencl/openclwrapper.cpp
+++ b/opencl/openclwrapper.cpp
@@ -1,7 +1,14 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifdef _WIN32
-#include <windows.h>
 #include <io.h>
-
 #else
 #include <sys/types.h>
 #include <unistd.h>
@@ -16,23 +23,24 @@
 #include "thresholder.h"
 
 #if ON_APPLE
-#include <stdio.h>
 #include <mach/mach_time.h>
+#include <stdio.h>
 #endif
 
 /*
     Convenience macro to test the version of Leptonica.
 */
 #if defined(LIBLEPT_MAJOR_VERSION) && defined(LIBLEPT_MINOR_VERSION)
-#   define TESSERACT_LIBLEPT_PREREQ(maj, min) \
-        ((LIBLEPT_MAJOR_VERSION) > (maj) || ((LIBLEPT_MAJOR_VERSION) == (maj) && (LIBLEPT_MINOR_VERSION) >= (min)))
+#define TESSERACT_LIBLEPT_PREREQ(maj, min) \
+  ((LIBLEPT_MAJOR_VERSION) > (maj) ||      \
+   ((LIBLEPT_MAJOR_VERSION) == (maj) && (LIBLEPT_MINOR_VERSION) >= (min)))
 #else
-#   define TESSERACT_LIBLEPT_PREREQ(maj, min) 0
+#define TESSERACT_LIBLEPT_PREREQ(maj, min) 0
 #endif
 
-#if TESSERACT_LIBLEPT_PREREQ(1,73)
-#   define CALLOC LEPT_CALLOC
-#   define FREE LEPT_FREE
+#if TESSERACT_LIBLEPT_PREREQ(1, 73)
+#define CALLOC LEPT_CALLOC
+#define FREE LEPT_FREE
 #endif
 
 #ifdef USE_OPENCL
@@ -40,36 +48,28 @@
 #include "opencl_device_selection.h"
 GPUEnv OpenclDevice::gpuEnv;
 
-
 bool OpenclDevice::deviceIsSelected = false;
 ds_device OpenclDevice::selectedDevice;
 
-
 int OpenclDevice::isInited = 0;
 
 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
 
 static const l_uint32 lmask32[] = {
-    0x80000000, 0xc0000000, 0xe0000000, 0xf0000000,
-    0xf8000000, 0xfc000000, 0xfe000000, 0xff000000,
-    0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
-    0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000,
-    0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000,
-    0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
-    0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0,
-    0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff
-};
+    0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000, 0xfc000000,
+    0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
+    0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, 0xffff8000, 0xffffc000,
+    0xffffe000, 0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
+    0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, 0xfffffff8, 0xfffffffc,
+    0xfffffffe, 0xffffffff};
 
 static const l_uint32 rmask32[] = {
-    0x00000001, 0x00000003, 0x00000007, 0x0000000f,
-    0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
-    0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
-    0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
-    0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
-    0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
-    0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
-    0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff
-};
+    0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
+    0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
+    0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff,
+    0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
+    0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
+    0x7fffffff, 0xffffffff};
 
 struct tiff_transform {
     int vflip;    /* if non-zero, image needs a vertical fip */
@@ -90,7 +90,7 @@ static struct tiff_transform tiff_orientation_transforms[] = {
     {0, 0, -1}
 };
 
-static const l_int32  MAX_PAGES_IN_TIFF_FILE = 3000;
+static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000;
 
 cl_mem pixsCLBuffer, pixdCLBuffer, pixdCLIntermediate; //Morph operations buffers
 cl_mem pixThBuffer; //output from thresholdtopix calculation
@@ -100,7 +100,8 @@ KernelEnv rEnv;
 // substitute invalid characters in device name with _
 void legalizeFileName( char *fileName) {
     //printf("fileName: %s\n", fileName);
-    const char* invalidChars = "/\?:*\"><| "; // space is valid but can cause headaches
+    const char *invalidChars =
+        "/\?:*\"><| ";  // space is valid but can cause headaches
     // for each invalid char
     for (int i = 0; i < strlen(invalidChars); i++) {
         char invalidStr[4];
@@ -111,10 +112,11 @@ void legalizeFileName( char *fileName) {
         // initial ./ is valid for present directory
         //if (*pos == '.') pos++;
         //if (*pos == '/') pos++;
-        for ( char *pos = strstr(fileName, invalidStr); pos != NULL; pos = strstr(pos+1, invalidStr)) {
-            //printf("\tfound: %s, ", pos);
-            pos[0] = '_';
-            //printf("fileName: %s\n", fileName);
+        for (char *pos = strstr(fileName, invalidStr); pos != NULL;
+             pos = strstr(pos + 1, invalidStr)) {
+          // printf("\tfound: %s, ", pos);
+          pos[0] = '_';
+          // printf("fileName: %s\n", fileName);
         }
     }
 }
@@ -127,39 +129,41 @@ void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) {
     gpuInfo->mpDevID = device;
     gpuInfo->mpArryDevsID = new cl_device_id[1];
     gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID;
-    clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE       , sizeof(cl_device_type), (void *) &gpuInfo->mDevType       , &size);
+    clStatus =
+        clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE,
+                        sizeof(cl_device_type), &gpuInfo->mDevType, &size);
     CHECK_OPENCL( clStatus, "populateGPUEnv::getDeviceInfo(TYPE)");
     // platform
-    clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM   , sizeof(cl_platform_id), (void *) &gpuInfo->mpPlatformID   , &size);
+    clStatus =
+        clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM,
+                        sizeof(cl_platform_id), &gpuInfo->mpPlatformID, &size);
     CHECK_OPENCL( clStatus, "populateGPUEnv::getDeviceInfo(PLATFORM)");
     // context
     cl_context_properties props[3];
     props[0] = CL_CONTEXT_PLATFORM;
     props[1] = (cl_context_properties) gpuInfo->mpPlatformID;
     props[2] = 0;
-    gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID, NULL, NULL, &clStatus);
+    gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID, NULL,
+                                         NULL, &clStatus);
     CHECK_OPENCL( clStatus, "populateGPUEnv::createContext");
     // queue
     cl_command_queue_properties queueProperties = 0;
     gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus );
     CHECK_OPENCL( clStatus, "populateGPUEnv::createCommandQueue");
-
 }
 
 int OpenclDevice::LoadOpencl()
 {
 #ifdef WIN32
-    HINSTANCE HOpenclDll = NULL;
-  void * OpenclDll = NULL;
-    //fprintf(stderr, " LoadOpenclDllxx... \n");
-    OpenclDll = static_cast<HINSTANCE>( HOpenclDll );
-    OpenclDll = LoadLibrary( "openCL.dll" );
-    if ( !static_cast<HINSTANCE>( OpenclDll ) )
-    {
-        fprintf(stderr, "[OD] Load opencl.dll failed!\n");
-        FreeLibrary( static_cast<HINSTANCE>( OpenclDll ) );
-        return 0;
-
+  HINSTANCE HOpenclDll = NULL;
+  void *OpenclDll = NULL;
+  // fprintf(stderr, " LoadOpenclDllxx... \n");
+  OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
+  OpenclDll = LoadLibrary("openCL.dll");
+  if (!static_cast<HINSTANCE>(OpenclDll)) {
+    fprintf(stderr, "[OD] Load opencl.dll failed!\n");
+    FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
+    return 0;
     }
     fprintf(stderr, "[OD] Load opencl.dll successful!\n");
 #endif
@@ -182,60 +186,57 @@ cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer, size_t nElem
     return membuffer;
 }
 
-PIX* mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX* pixd, PIX* pixs, int elements, cl_mem_flags flags, bool memcopy = false, bool sync = true)
-{
-    PROCNAME("mapOutputCLBuffer");
-    if (!pixd)
-    {
-        if (memcopy)
-        {
-            if ((pixd = pixCreateTemplate(pixs)) == NULL)
-                (PIX *)ERROR_PTR("pixd not made", procName, NULL);
-        }
-        else
-        {
-            if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), pixGetDepth(pixs))) == NULL)
-                (PIX *)ERROR_PTR("pixd not made", procName, NULL);
-        }
-    }
-    l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0,
-                                                    elements * sizeof(l_uint32), 0, NULL, NULL, NULL );
-
-    if (memcopy)
-    {
-        memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32));
-    }
-    else
-    {
-        pixSetData(pixd, pValues);
+PIX *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX *pixd, PIX *pixs,
+                       int elements, cl_mem_flags flags, bool memcopy = false,
+                       bool sync = true) {
+  PROCNAME("mapOutputCLBuffer");
+  if (!pixd) {
+    if (memcopy) {
+      if ((pixd = pixCreateTemplate(pixs)) == NULL)
+        (PIX *)ERROR_PTR("pixd not made", procName, NULL);
+    } else {
+      if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs),
+                                  pixGetDepth(pixs))) == NULL)
+        (PIX *)ERROR_PTR("pixd not made", procName, NULL);
     }
+  }
+  l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(
+      rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0,
+      elements * sizeof(l_uint32), 0, NULL, NULL, NULL);
+
+  if (memcopy) {
+    memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32));
+  } else {
+    pixSetData(pixd, pValues);
+  }
 
-    clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,clbuffer,pValues,0,NULL,NULL);
+  clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, NULL,
+                          NULL);
 
-    if (sync)
-    {
-        clFinish( rEnv.mpkCmdQueue );
-    }
+  if (sync) {
+    clFinish(rEnv.mpkCmdQueue);
+  }
 
-    return pixd;
+  return pixd;
 }
 
  cl_mem allocateIntBuffer( KernelEnv rEnv, const l_uint32 *_pValues, size_t nElements, cl_int *pStatus , bool sync = false)
 {
-    cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
-        nElements * sizeof(l_int32), NULL, pStatus);
+   cl_mem xValues =
+       clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                      nElements * sizeof(l_int32), NULL, pStatus);
 
-    if (_pValues != NULL)
-    {
-        l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
-            nElements * sizeof(l_int32), 0, NULL, NULL, NULL );
+   if (_pValues != NULL) {
+     l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer(
+         rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
+         nElements * sizeof(l_int32), 0, NULL, NULL, NULL);
 
-        memcpy(pValues, _pValues, nElements * sizeof(l_int32));
+     memcpy(pValues, _pValues, nElements * sizeof(l_int32));
 
-        clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
+     clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, xValues, pValues, 0, NULL,
+                             NULL);
 
-        if (sync)
-            clFinish( rEnv.mpkCmdQueue );
+     if (sync) clFinish(rEnv.mpkCmdQueue);
     }
 
     return xValues;
@@ -244,27 +245,25 @@ PIX* mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX* pixd, PIX* pixs, in
 
 void OpenclDevice::releaseMorphCLBuffers()
 {
-    if (pixdCLIntermediate != NULL)
-        clReleaseMemObject(pixdCLIntermediate);
-    if (pixsCLBuffer != NULL)
-        clReleaseMemObject(pixsCLBuffer);
-    if (pixdCLBuffer != NULL)
-        clReleaseMemObject(pixdCLBuffer);
-    if (pixThBuffer != NULL)
-        clReleaseMemObject(pixThBuffer);
-	pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL;
+  if (pixdCLIntermediate != NULL) clReleaseMemObject(pixdCLIntermediate);
+  if (pixsCLBuffer != NULL) clReleaseMemObject(pixsCLBuffer);
+  if (pixdCLBuffer != NULL) clReleaseMemObject(pixdCLBuffer);
+  if (pixThBuffer != NULL) clReleaseMemObject(pixThBuffer);
+  pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL;
 }
 
 int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs)
 {
     SetKernelEnv( &rEnv );
 
-    if (pixThBuffer != NULL)
-    {
-        pixsCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
+    if (pixThBuffer != NULL) {
+      pixsCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl * h,
+                                            CL_MEM_ALLOC_HOST_PTR, &clStatus);
 
-        //Get the output from ThresholdToPix operation
-        clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0, sizeof(l_uint32) * wpl*h, 0, NULL, NULL);
+      // Get the output from ThresholdToPix operation
+      clStatus =
+          clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0,
+                              sizeof(l_uint32) * wpl * h, 0, NULL, NULL);
     }
     else
     {
@@ -275,9 +274,11 @@ int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs)
         pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl*h, CL_MEM_USE_HOST_PTR, &clStatus);
     }
 
-    pixdCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
+    pixdCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl * h,
+                                          CL_MEM_ALLOC_HOST_PTR, &clStatus);
 
-    pixdCLIntermediate = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
+    pixdCLIntermediate = allocateZeroCopyBuffer(
+        rEnv, NULL, wpl * h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
 
     return (int)clStatus;
 }
@@ -296,7 +297,6 @@ PERF_COUNT_SUB("LoadOpencl")
 #endif
     // sets up environment, compiles programs
 
-
     InitOpenclRunEnv_DeviceSelection( 0 );
 //PERF_COUNT_SUB("called InitOpenclRunEnv_DS")
 //PERF_COUNT_END
@@ -409,20 +409,20 @@ int OpenclDevice::BinaryGenerated( const char * clFileName, FILE ** fhandle )
     int status = 0;
     char *str = NULL;
     FILE *fd = NULL;
-    char fileName[256] = { 0 }, cl_name[128] = { 0 };
+    char fileName[256] = {0}, cl_name[128] = {0};
     char deviceName[1024];
-    clStatus = clGetDeviceInfo( gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL );
-    CHECK_OPENCL( clStatus, "clGetDeviceInfo" );
-    str = (char*) strstr( clFileName, (char*) ".cl" );
-    memcpy( cl_name, clFileName, str - clFileName );
+    clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,
+                               sizeof(deviceName), deviceName, NULL);
+    CHECK_OPENCL(clStatus, "clGetDeviceInfo");
+    str = (char *)strstr(clFileName, (char *)".cl");
+    memcpy(cl_name, clFileName, str - clFileName);
     cl_name[str - clFileName] = '\0';
-    sprintf( fileName, "%s-%s.bin", cl_name, deviceName );
+    sprintf(fileName, "%s-%s.bin", cl_name, deviceName);
     legalizeFileName(fileName);
-    fd = fopen( fileName, "rb" );
-    status = ( fd != NULL ) ? 1 : 0;
-    if ( fd != NULL )
-    {
-        *fhandle = fd;
+    fd = fopen(fileName, "rb");
+    status = (fd != NULL) ? 1 : 0;
+    if (fd != NULL) {
+      *fhandle = fd;
     }
     return status;
 
@@ -434,9 +434,8 @@ int OpenclDevice::CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * cl
     {
         if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
         {
-            if ( gpuEnvCached->mpArryPrograms[i] != NULL )
-            {
-                return 1;
+          if (gpuEnvCached->mpArryPrograms[i] != NULL) {
+            return 1;
             }
         }
     }
@@ -445,11 +444,10 @@ int OpenclDevice::CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * cl
 }
 int OpenclDevice::WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes )
 {
-    FILE *output = NULL;
-    output = fopen( fileName, "wb" );
-    if ( output == NULL )
-    {
-        return 0;
+  FILE *output = NULL;
+  output = fopen(fileName, "wb");
+  if (output == NULL) {
+    return 0;
     }
 
     fwrite( birary, sizeof(char), numBytes, output );
@@ -462,36 +460,37 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
 {
     unsigned int i = 0;
     cl_int clStatus;
-    size_t *binarySizes, numDevices=0;
+    size_t *binarySizes;
+    cl_uint numDevices;
     cl_device_id *mpArryDevsID;
     char **binaries, *str = NULL;
 
-    clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
-                   sizeof(numDevices), &numDevices, NULL );
+    clStatus = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
+                                sizeof(numDevices), &numDevices, NULL);
     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
 
     mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) * numDevices );
-    if ( mpArryDevsID == NULL )
-    {
-        return 0;
+    if (mpArryDevsID == NULL) {
+      return 0;
     }
     /* grab the handles to all of the devices in the program. */
-    clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
-                   sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL );
+    clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
+                                sizeof(cl_device_id) * numDevices, mpArryDevsID,
+                                NULL);
     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
 
     /* figure out the sizes of each of the binaries. */
     binarySizes = (size_t*) malloc( sizeof(size_t) * numDevices );
 
-    clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
-                   sizeof(size_t) * numDevices, binarySizes, NULL );
+    clStatus =
+        clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
+                         sizeof(size_t) * numDevices, binarySizes, NULL);
     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
 
     /* copy over all of the generated binaries. */
     binaries = (char**) malloc( sizeof(char *) * numDevices );
-    if ( binaries == NULL )
-    {
-        return 0;
+    if (binaries == NULL) {
+      return 0;
     }
 
     for ( i = 0; i < numDevices; i++ )
@@ -499,19 +498,18 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
         if ( binarySizes[i] != 0 )
         {
             binaries[i] = (char*) malloc( sizeof(char) * binarySizes[i] );
-            if ( binaries[i] == NULL )
-            {
-                return 0;
+            if (binaries[i] == NULL) {
+              return 0;
             }
         }
         else
         {
-            binaries[i] = NULL;
+          binaries[i] = NULL;
         }
     }
 
-    clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
-                   sizeof(char *) * numDevices, binaries, NULL );
+    clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARIES,
+                                sizeof(char *) * numDevices, binaries, NULL);
     CHECK_OPENCL(clStatus,"clGetProgramInfo");
 
     /* dump out each binary into its own separate file. */
@@ -523,7 +521,7 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
         {
             char deviceName[1024];
             clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
-                           sizeof(deviceName), deviceName, NULL);
+                                       sizeof(deviceName), deviceName, NULL);
             CHECK_OPENCL( clStatus, "clGetDeviceInfo" );
 
             str = (char*) strstr( clFileName, (char*) ".cl" );
@@ -543,45 +541,34 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
     // Release all resouces and memory
     for ( i = 0; i < numDevices; i++ )
     {
-        if ( binaries[i] != NULL )
-        {
-            free( binaries[i] );
-            binaries[i] = NULL;
-        }
+      free(binaries[i]);
+      binaries[i] = NULL;
     }
 
-    if ( binaries != NULL )
-    {
-        free( binaries );
-        binaries = NULL;
-    }
+    free(binaries);
+    binaries = NULL;
 
-    if ( binarySizes != NULL )
-    {
-        free( binarySizes );
-        binarySizes = NULL;
-    }
+    free(binarySizes);
+    binarySizes = NULL;
+
+    free(mpArryDevsID);
+    mpArryDevsID = NULL;
 
-    if ( mpArryDevsID != NULL )
-    {
-        free( mpArryDevsID );
-        mpArryDevsID = NULL;
-    }
     return 1;
 }
 
 void copyIntBuffer( KernelEnv rEnv, cl_mem xValues, const l_uint32 *_pValues, size_t nElements, cl_int *pStatus )
 {
-    l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
-        nElements * sizeof(l_int32), 0, NULL, NULL, NULL );
-    clFinish( rEnv.mpkCmdQueue );
-    if (_pValues != NULL)
-    {
-        for ( int i = 0; i < (int)nElements; i++ )
-            pValues[i] = (l_int32)_pValues[i];
+  l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer(
+      rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
+      nElements * sizeof(l_int32), 0, NULL, NULL, NULL);
+  clFinish(rEnv.mpkCmdQueue);
+  if (_pValues != NULL) {
+    for (int i = 0; i < (int)nElements; i++) pValues[i] = (l_int32)_pValues[i];
     }
 
-    clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
+    clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, xValues, pValues, 0, NULL,
+                            NULL);
     //clFinish( rEnv.mpkCmdQueue );
     return;
 }
@@ -595,7 +582,7 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
     const char *source;
     size_t source_size[1];
     int b_error, binary_status, binaryExisted, idx;
-    size_t numDevices;
+    cl_uint numDevices;
     cl_device_id *mpArryDevsID;
     FILE *fd, *fd1;
     const char* filename = "kernel.cl";
@@ -615,14 +602,13 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
 //PERF_COUNT_SUB("BinaryGenerated")
     if ( binaryExisted == 1 )
     {
-        clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
-                       sizeof(numDevices), &numDevices, NULL );
-        CHECK_OPENCL( clStatus, "clGetContextInfo" );
+      clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
+                                  sizeof(numDevices), &numDevices, NULL);
+      CHECK_OPENCL(clStatus, "clGetContextInfo");
 
-        mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) * numDevices );
-        if ( mpArryDevsID == NULL )
-        {
-            return 0;
+      mpArryDevsID = (cl_device_id *)malloc(sizeof(cl_device_id) * numDevices);
+      if (mpArryDevsID == NULL) {
+        return 0;
         }
 //PERF_COUNT_SUB("get numDevices")
         b_error = 0;
@@ -649,8 +635,9 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
 //PERF_COUNT_SUB("read file")
         fd = NULL;
         // grab the handles to all of the devices in the context.
-        clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
-                       sizeof( cl_device_id ) * numDevices, mpArryDevsID, NULL );
+        clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES,
+                                    sizeof(cl_device_id) * numDevices,
+                                    mpArryDevsID, NULL);
         CHECK_OPENCL( clStatus, "clGetContextInfo" );
 //PERF_COUNT_SUB("get devices")
         //fprintf(stderr, "[OD] Create kernel from binary\n");
@@ -662,7 +649,7 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
         free( binary );
         free( mpArryDevsID );
         mpArryDevsID = NULL;
-//PERF_COUNT_SUB("binaryExisted")
+        // PERF_COUNT_SUB("binaryExisted")
     }
     else
     {
@@ -674,9 +661,8 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
 //PERF_COUNT_SUB("!binaryExisted")
     }
 
-    if ( gpuInfo->mpArryPrograms[idx] == (cl_program) NULL )
-    {
-        return 0;
+    if (gpuInfo->mpArryPrograms[idx] == (cl_program) NULL) {
+      return 0;
     }
 
     //char options[512];
@@ -685,15 +671,17 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
 PERF_COUNT_START("OD::CompileKernel::clBuildProgram")
     if (!gpuInfo->mnIsUserCreated)
     {
-        clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
-                       buildOption, NULL, NULL);
-//PERF_COUNT_SUB("clBuildProgram notUserCreated")
+      clStatus =
+          clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
+                         buildOption, NULL, NULL);
+      // PERF_COUNT_SUB("clBuildProgram notUserCreated")
     }
     else
     {
-        clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
-                       buildOption, NULL, NULL);
-//PERF_COUNT_SUB("clBuildProgram isUserCreated")
+      clStatus =
+          clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
+                         buildOption, NULL, NULL);
+      // PERF_COUNT_SUB("clBuildProgram isUserCreated")
     }
 PERF_COUNT_END
     if ( clStatus != CL_SUCCESS )
@@ -701,13 +689,15 @@ PERF_COUNT_END
         printf ("BuildProgram error!\n");
         if ( !gpuInfo->mnIsUserCreated )
         {
-            clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
-                           CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+          clStatus = clGetProgramBuildInfo(
+              gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
+              CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
         }
         else
         {
-            clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
-                           CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
+          clStatus = clGetProgramBuildInfo(
+              gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
+              CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
         }
         if ( clStatus != CL_SUCCESS )
         {
@@ -715,9 +705,8 @@ PERF_COUNT_END
             return 0;
         }
         buildLog = (char*) malloc( length );
-        if ( buildLog == (char*) NULL )
-        {
-            return 0;
+        if (buildLog == (char *)NULL) {
+          return 0;
         }
         if ( !gpuInfo->mnIsUserCreated )
         {
@@ -736,10 +725,9 @@ PERF_COUNT_END
         }
 
         fd1 = fopen( "kernel-build.log", "w+" );
-        if ( fd1 != NULL )
-        {
-            fwrite( buildLog, sizeof(char), length, fd1 );
-            fclose( fd1 );
+        if (fd1 != NULL) {
+          fwrite(buildLog, sizeof(char), length, fd1);
+          fclose(fd1);
         }
 
         free( buildLog );
@@ -782,36 +770,40 @@ PERF_COUNT_START("pixReadFromTiffKernel")
 
     l_uint32 *pResult = (l_uint32 *)malloc(w*h * sizeof(l_uint32));
     rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "composeRGBPixel", &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateKernel");
+    CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel");
 
     //Allocate input and output OCL buffers
     valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
     outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
 
     //Kernel arguments
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&valuesCl );
+    clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl);
     CHECK_OPENCL( clStatus, "clSetKernelArg");
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(w), (void *)&w );
+    clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(w), &w);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(h), (void *)&h );
+    clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(h), &h);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(wpl), (void *)&wpl );
+    clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
     CHECK_OPENCL( clStatus, "clSetKernelArg" );
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), (void *)&outputCl );
+    clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl);
     CHECK_OPENCL( clStatus, "clSetKernelArg");
 
     //Kernel enqueue
 PERF_COUNT_SUB("before")
-    clStatus = clEnqueueNDRangeKernel( rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
-
-     /* map results back from gpu */
-    void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ, 0, w*h * sizeof(l_uint32), 0, NULL, NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer outputCl");
-    clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, NULL, NULL);
-
-    //Sync
-    clFinish( rEnv.mpkCmdQueue );
+clStatus =
+    clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL,
+                           globalThreads, localThreads, 0, NULL, NULL);
+CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel");
+
+/* map results back from gpu */
+void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ,
+                               0, w * h * sizeof(l_uint32), 0, NULL, NULL,
+                               &clStatus);
+CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl");
+clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, NULL, NULL);
+
+// Sync
+clFinish(rEnv.mpkCmdQueue);
 PERF_COUNT_SUB("kernel & map")
 PERF_COUNT_END
     return pResult;
@@ -827,18 +819,17 @@ PIX   *pix;
     PROCNAME("pixReadTiff");
 
     if (!filename)
-        return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
+      return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
 
     if ((fp = fopenReadStream(filename)) == NULL)
-        return (PIX *)ERROR_PTR("image file not found", procName, NULL);
+      return (PIX *)ERROR_PTR("image file not found", procName, NULL);
     if ((pix = pixReadStreamTiffCl(fp, n)) == NULL) {
-        fclose(fp);
-        return (PIX *)ERROR_PTR("pix not read", procName, NULL);
+      fclose(fp);
+      return (PIX *)ERROR_PTR("pix not read", procName, NULL);
     }
     fclose(fp);
 PERF_COUNT_END
     return pix;
-
 }
 TIFF *
 OpenclDevice::fopenTiffCl(FILE        *fp,
@@ -848,13 +839,12 @@ l_int32  fd;
 
     PROCNAME("fopenTiff");
 
-    if (!fp)
-        return (TIFF *)ERROR_PTR("stream not opened", procName, NULL);
+    if (!fp) return (TIFF *)ERROR_PTR("stream not opened", procName, NULL);
     if (!modestring)
-        return (TIFF *)ERROR_PTR("modestring not defined", procName, NULL);
+      return (TIFF *)ERROR_PTR("modestring not defined", procName, NULL);
 
     if ((fd = fileno(fp)) < 0)
-        return (TIFF *)ERROR_PTR("invalid file descriptor", procName, NULL);
+      return (TIFF *)ERROR_PTR("invalid file descriptor", procName, NULL);
     lseek(fd, 0, SEEK_SET);
 
     return TIFFFdOpen(fd, "TIFFstream", modestring);
@@ -1039,8 +1029,8 @@ tiffCloseCallback(thandle_t  handle)
 		*mstream->poutdata = mstream->buffer;
 		*mstream->poutsize = mstream->hw;
 	}
-	FREE(mstream);  /* never free the buffer! */
-	return 0;
+        FREE(mstream); /* never free the buffer! */
+        return 0;
 }
 
 
@@ -1104,26 +1094,26 @@ size_t      *pdatasize)
 	PROCNAME("fopenTiffMemstream");
 
 	if (!filename)
-		return (TIFF *)ERROR_PTR("filename not defined", procName, NULL);
-	if (!operation)
-		return (TIFF *)ERROR_PTR("operation not defined", procName, NULL);
-	if (!pdata)
-		return (TIFF *)ERROR_PTR("&data not defined", procName, NULL);
-	if (!pdatasize)
-		return (TIFF *)ERROR_PTR("&datasize not defined", procName, NULL);
-	if (!strcmp(operation, "r") && !strcmp(operation, "w"))
-		return (TIFF *)ERROR_PTR("operation not 'r' or 'w'}", procName, NULL);
-
-	if (!strcmp(operation, "r"))
-		mstream = memstreamCreateForRead(*pdata, *pdatasize);
-	else
-		mstream = memstreamCreateForWrite(pdata, pdatasize);
+          return (TIFF *)ERROR_PTR("filename not defined", procName, NULL);
+        if (!operation)
+          return (TIFF *)ERROR_PTR("operation not defined", procName, NULL);
+        if (!pdata)
+          return (TIFF *)ERROR_PTR("&data not defined", procName, NULL);
+        if (!pdatasize)
+          return (TIFF *)ERROR_PTR("&datasize not defined", procName, NULL);
+        if (!strcmp(operation, "r") && !strcmp(operation, "w"))
+          return (TIFF *)ERROR_PTR("operation not 'r' or 'w'}", procName,
+                                   NULL);
+
+        if (!strcmp(operation, "r"))
+          mstream = memstreamCreateForRead(*pdata, *pdatasize);
+        else
+          mstream = memstreamCreateForWrite(pdata, pdatasize);
 
-	return TIFFClientOpen(filename, operation, mstream,
-		tiffReadCallback, tiffWriteCallback,
-		tiffSeekCallback, tiffCloseCallback,
-		tiffSizeCallback, tiffMapCallback,
-		tiffUnmapCallback);
+        return TIFFClientOpen(filename, operation, mstream, tiffReadCallback,
+                              tiffWriteCallback, tiffSeekCallback,
+                              tiffCloseCallback, tiffSizeCallback,
+                              tiffMapCallback, tiffUnmapCallback);
 }
 
 
@@ -1134,38 +1124,38 @@ OpenclDevice::pixReadMemTiffCl(const l_uint8 *data,size_t size,l_int32  n)
 	l_int32  i, pagefound;
 	PIX     *pix;
 	TIFF    *tif;
-	//L_MEMSTREAM *memStream;
-	PROCNAME("pixReadMemTiffCl");
-
-	if (!data)
-		return (PIX *)ERROR_PTR("data pointer is NULL", procName, NULL);
-
-	if ((tif = fopenTiffMemstream("", "r", (l_uint8 **)&data, &size)) == NULL)
-		return (PIX *)ERROR_PTR("tif not opened", procName, NULL);
-
-	pagefound = FALSE;
-	pix = NULL;
-	for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) {
-		if (i == n) {
-			pagefound = TRUE;
-			if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
-				TIFFCleanup(tif);
-				return (PIX *)ERROR_PTR("pix not read", procName, NULL);
-			}
-			break;
-		}
-		if (TIFFReadDirectory(tif) == 0)
-			break;
-	}
+        // L_MEMSTREAM *memStream;
+        PROCNAME("pixReadMemTiffCl");
 
-	if (pagefound == FALSE) {
-		L_WARNING("tiff page %d not found", procName);
-		TIFFCleanup(tif);
-		return NULL;
-	}
+        if (!data)
+          return (PIX *)ERROR_PTR("data pointer is NULL", procName, NULL);
+
+        if ((tif = fopenTiffMemstream("", "r", (l_uint8 **)&data, &size)) ==
+            NULL)
+          return (PIX *)ERROR_PTR("tif not opened", procName, NULL);
 
-	TIFFCleanup(tif);
-	return pix;
+        pagefound = FALSE;
+        pix = NULL;
+        for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) {
+          if (i == n) {
+            pagefound = TRUE;
+            if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
+              TIFFCleanup(tif);
+              return (PIX *)ERROR_PTR("pix not read", procName, NULL);
+            }
+            break;
+          }
+          if (TIFFReadDirectory(tif) == 0) break;
+        }
+
+        if (pagefound == FALSE) {
+          L_WARNING("tiff page %d not found\n", procName, i);
+          TIFFCleanup(tif);
+          return NULL;
+        }
+
+        TIFFCleanup(tif);
+        return pix;
 }
 
 PIX *
@@ -1178,11 +1168,10 @@ TIFF    *tif;
 
     PROCNAME("pixReadStreamTiff");
 
-    if (!fp)
-        return (PIX *)ERROR_PTR("stream not defined", procName, NULL);
+    if (!fp) return (PIX *)ERROR_PTR("stream not defined", procName, NULL);
 
     if ((tif = fopenTiffCl(fp, "rb")) == NULL)
-        return (PIX *)ERROR_PTR("tif not opened", procName, NULL);
+      return (PIX *)ERROR_PTR("tif not opened", procName, NULL);
 
     pagefound = FALSE;
     pix = NULL;
@@ -1190,8 +1179,8 @@ TIFF    *tif;
         if (i == n) {
             pagefound = TRUE;
             if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
-                TIFFCleanup(tif);
-                return (PIX *)ERROR_PTR("pix not read", procName, NULL);
+              TIFFCleanup(tif);
+              return (PIX *)ERROR_PTR("pix not read", procName, NULL);
             }
             break;
         }
@@ -1252,7 +1241,6 @@ void compare(l_uint32  *cpu, l_uint32  *gpu,int size)
         }
     }
     printf("\nit matches\n");
-
 }
 
 //OpenCL implementation of pixReadFromTiffStream.
@@ -1272,73 +1260,70 @@ PIXCMAP   *cmap;
 
     PROCNAME("pixReadFromTiffStream");
 
-    if (!tif)
-        return (PIX *)ERROR_PTR("tif not defined", procName, NULL);
-
+    if (!tif) return (PIX *)ERROR_PTR("tif not defined", procName, NULL);
 
     TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps);
     TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
     bpp = bps * spp;
     if (bpp > 32)
-        return (PIX *)ERROR_PTR("can't handle bpp > 32", procName, NULL);
+      return (PIX *)ERROR_PTR("can't handle bpp > 32", procName, NULL);
     if (spp == 1)
         d = bps;
     else if (spp == 3 || spp == 4)
         d = 32;
     else
-        return (PIX *)ERROR_PTR("spp not in set {1,3,4}", procName, NULL);
+      return (PIX *)ERROR_PTR("spp not in set {1,3,4}", procName, NULL);
 
     TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
     TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
     tiffbpl = TIFFScanlineSize(tif);
 
     if ((pix = pixCreate(w, h, d)) == NULL)
-        return (PIX *)ERROR_PTR("pix not made", procName, NULL);
+      return (PIX *)ERROR_PTR("pix not made", procName, NULL);
     data = (l_uint8 *)pixGetData(pix);
     wpl = pixGetWpl(pix);
     bpl = 4 * wpl;
 
-
     if (spp == 1) {
-        if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1, sizeof(l_uint8))) == NULL)
-            return (PIX *)ERROR_PTR("calloc fail for linebuf", procName, NULL);
-
-        for (i = 0 ; i < h ; i++) {
-            if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
-                FREE(linebuf);
-                pixDestroy(&pix);
-                return (PIX *)ERROR_PTR("line read fail", procName, NULL);
-            }
-            memcpy((char *)data, (char *)linebuf, tiffbpl);
-            data += bpl;
+      if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1, sizeof(l_uint8))) ==
+          NULL)
+        return (PIX *)ERROR_PTR("calloc fail for linebuf", procName, NULL);
+
+      for (i = 0; i < h; i++) {
+        if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
+          FREE(linebuf);
+          pixDestroy(&pix);
+          return (PIX *)ERROR_PTR("line read fail", procName, NULL);
+        }
+        memcpy((char *)data, (char *)linebuf, tiffbpl);
+        data += bpl;
         }
         if (bps <= 8)
             pixEndianByteSwap(pix);
         else
-            pixEndianTwoByteSwap(pix);
+          pixEndianTwoByteSwap(pix);
         FREE(linebuf);
-    }
-    else {
-        if ((tiffdata = (l_uint32 *)CALLOC(w * h, sizeof(l_uint32))) == NULL) {
-            pixDestroy(&pix);
-            return (PIX *)ERROR_PTR("calloc fail for tiffdata", procName, NULL);
-        }
-        if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata,
-                                       ORIENTATION_TOPLEFT, 0)) {
-            FREE(tiffdata);
-            pixDestroy(&pix);
-            return (PIX *)ERROR_PTR("failed to read tiffdata", procName, NULL);
-        }
-        line = pixGetData(pix);
+    } else {
+      if ((tiffdata = (l_uint32 *)CALLOC(w * h, sizeof(l_uint32))) == NULL) {
+        pixDestroy(&pix);
+        return (PIX *)ERROR_PTR("calloc fail for tiffdata", procName, NULL);
+      }
+      if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata,
+                                     ORIENTATION_TOPLEFT, 0)) {
+        FREE(tiffdata);
+        pixDestroy(&pix);
+        return (PIX *)ERROR_PTR("failed to read tiffdata", procName, NULL);
+      }
+      line = pixGetData(pix);
 
-        //Invoke the OpenCL kernel for pixReadFromTiff
-        l_uint32* output_gpu=pixReadFromTiffKernel(tiffdata,w,h,wpl,line);
+      // Invoke the OpenCL kernel for pixReadFromTiff
+      l_uint32 *output_gpu = pixReadFromTiffKernel(tiffdata, w, h, wpl, line);
 
-        pixSetData(pix, output_gpu);
-        // pix already has data allocated, it now points to output_gpu?
-        FREE(tiffdata);
-        FREE(line);
-        //FREE(output_gpu);
+      pixSetData(pix, output_gpu);
+      // pix already has data allocated, it now points to output_gpu?
+      FREE(tiffdata);
+      FREE(line);
+      // FREE(output_gpu);
     }
 
     if (getTiffStreamResolutionCl(tif, &xres, &yres) == 0) {
@@ -1352,32 +1337,28 @@ PIXCMAP   *cmap;
     pixSetInputFormat(pix, comptype);
 
     if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) {
-
-        if ((cmap = pixcmapCreate(bps)) == NULL) {
-            pixDestroy(&pix);
-            return (PIX *)ERROR_PTR("cmap not made", procName, NULL);
+      if ((cmap = pixcmapCreate(bps)) == NULL) {
+        pixDestroy(&pix);
+        return (PIX *)ERROR_PTR("cmap not made", procName, NULL);
         }
         ncolors = 1 << bps;
         for (i = 0; i < ncolors; i++)
             pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8,
                             bluemap[i] >> 8);
         pixSetColormap(pix, cmap);
-    }
-    else {
-        if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) {
-
-            if (tiffcomp == COMPRESSION_CCITTFAX3 ||
-                tiffcomp == COMPRESSION_CCITTFAX4 ||
-                tiffcomp == COMPRESSION_CCITTRLE ||
-                tiffcomp == COMPRESSION_CCITTRLEW) {
-                photometry = PHOTOMETRIC_MINISWHITE;
-            }
-            else
-                photometry = PHOTOMETRIC_MINISBLACK;
-        }
-        if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) ||
-            (d == 8 && photometry == PHOTOMETRIC_MINISWHITE))
-            pixInvert(pix, pix);
+    } else {
+      if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) {
+        if (tiffcomp == COMPRESSION_CCITTFAX3 ||
+            tiffcomp == COMPRESSION_CCITTFAX4 ||
+            tiffcomp == COMPRESSION_CCITTRLE ||
+            tiffcomp == COMPRESSION_CCITTRLEW) {
+          photometry = PHOTOMETRIC_MINISWHITE;
+        } else
+          photometry = PHOTOMETRIC_MINISBLACK;
+      }
+      if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) ||
+          (d == 8 && photometry == PHOTOMETRIC_MINISWHITE))
+        pixInvert(pix, pix);
     }
 
     if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) {
@@ -1415,6 +1396,7 @@ pixDilateCL_55(l_int32  wpl, l_int32  h)
     localThreads[1] = GROUPSIZE_HMORY;
 
     rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor_5x5", &status );
+    CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_5x5");
 
     status = clSetKernelArg(rEnv.mpkKernel,
         0,
@@ -1424,24 +1406,12 @@ pixDilateCL_55(l_int32  wpl, l_int32  h)
         1,
         sizeof(cl_mem),
         &pixdCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        2,
-        sizeof(wpl),
-        (const void *)&wpl);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        3,
-        sizeof(h),
-        (const void *)&h);
-
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                            rEnv.mpkKernel,
-                            2,
-                            NULL,
-                            globalThreads,
-                            localThreads,
-                            0,
-                            NULL,
-                            NULL);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
 
     //Swap source and dest buffers
     pixtemp = pixsCLBuffer;
@@ -1457,6 +1427,7 @@ pixDilateCL_55(l_int32  wpl, l_int32  h)
     localThreads[1] = GROUPSIZE_Y;
 
     rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateVer_5x5", &status );
+    CHECK_OPENCL(status, "clCreateKernel morphoDilateVer_5x5");
 
     status = clSetKernelArg(rEnv.mpkKernel,
         0,
@@ -1466,23 +1437,11 @@ pixDilateCL_55(l_int32  wpl, l_int32  h)
         1,
         sizeof(cl_mem),
         &pixdCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        2,
-        sizeof(wpl),
-        (const void *)&wpl);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        3,
-        sizeof(h),
-        (const void *)&h);
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                            rEnv.mpkKernel,
-                            2,
-                            NULL,
-                            globalThreads,
-                            localThreads,
-                            0,
-                            NULL,
-                            NULL);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
 
     return status;
 }
@@ -1509,6 +1468,7 @@ pixErodeCL_55(l_int32  wpl, l_int32  h)
     localThreads[1] = GROUPSIZE_HMORY;
 
     rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor_5x5", &status );
+    CHECK_OPENCL(status, "clCreateKernel morphoErodeHor_5x5");
 
     status = clSetKernelArg(rEnv.mpkKernel,
         0,
@@ -1518,24 +1478,12 @@ pixErodeCL_55(l_int32  wpl, l_int32  h)
         1,
         sizeof(cl_mem),
         &pixdCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        2,
-        sizeof(wpl),
-        (const void *)&wpl);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        3,
-        sizeof(h),
-        (const void *)&h);
-
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                            rEnv.mpkKernel,
-                            2,
-                            NULL,
-                            globalThreads,
-                            localThreads,
-                            0,
-                            NULL,
-                            NULL);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
 
     //Swap source and dest buffers
     pixtemp = pixsCLBuffer;
@@ -1551,6 +1499,7 @@ pixErodeCL_55(l_int32  wpl, l_int32  h)
     localThreads[1] = GROUPSIZE_Y;
 
     rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeVer_5x5", &status );
+    CHECK_OPENCL(status, "clCreateKernel morphoErodeVer_5x5");
 
     status = clSetKernelArg(rEnv.mpkKernel,
         0,
@@ -1560,31 +1509,13 @@ pixErodeCL_55(l_int32  wpl, l_int32  h)
         1,
         sizeof(cl_mem),
         &pixdCLBuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        2,
-        sizeof(wpl),
-        (const void *)&wpl);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        3,
-        sizeof(h),
-        (const void *)&h);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        4,
-        sizeof(fwmask),
-        (const void *)&fwmask);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        5,
-        sizeof(lwmask),
-        (const void *)&lwmask);
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                            rEnv.mpkKernel,
-                            2,
-                            NULL,
-                            globalThreads,
-                            localThreads,
-                            0,
-                            NULL,
-                            NULL);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(fwmask), &fwmask);
+    status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(lwmask), &lwmask);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
 
     return status;
 }
@@ -1625,101 +1556,56 @@ pixDilateCL(l_int32  hsize, l_int32  vsize, l_int32  wpl, l_int32  h)
 
     if (xp > 31 || xn > 31)
     {
-        //Generic case.
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor", &status );
-
-        status = clSetKernelArg(rEnv.mpkKernel,
-            0,
-            sizeof(cl_mem),
-            &pixsCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-            1,
-            sizeof(cl_mem),
-            &pixdCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                2,
-                sizeof(xp),
-                (const void *)&xp);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                3,
-                sizeof(xn),
-                (const void *)&xn);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                4,
-                sizeof(wpl),
-                (const void *)&wpl);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                5,
-                sizeof(h),
-                (const void *)&h);
-        status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                                rEnv.mpkKernel,
-                                2,
-                                NULL,
-                                globalThreads,
-                                localThreads,
-                                0,
-                                NULL,
-                                NULL);
-
-        if (yp > 0 || yn > 0)
-        {
-            pixtemp = pixsCLBuffer;
-            pixsCLBuffer = pixdCLBuffer;
-            pixdCLBuffer = pixtemp;
+      // Generic case.
+      rEnv.mpkKernel =
+          clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status);
+      CHECK_OPENCL(status, "clCreateKernel morphoDilateHor");
+
+      status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+      status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+      status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
+      status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn);
+      status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl);
+      status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h);
+      status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                      NULL, globalThreads, localThreads, 0,
+                                      NULL, NULL);
+
+      if (yp > 0 || yn > 0) {
+        pixtemp = pixsCLBuffer;
+        pixsCLBuffer = pixdCLBuffer;
+        pixdCLBuffer = pixtemp;
         }
     }
     else if (xp > 0 || xn > 0 )
     {
-        //Specific Horizontal pass kernel for half width < 32
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor_32word", &status );
-        isEven = (xp != xn);
-
-        status = clSetKernelArg(rEnv.mpkKernel,
-            0,
-            sizeof(cl_mem),
-            &pixsCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-            1,
-            sizeof(cl_mem),
-            &pixdCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                2,
-                sizeof(xp),
-                (const void *)&xp);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                3,
-                sizeof(wpl),
-                (const void *)&wpl);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                4,
-                sizeof(h),
-                (const void *)&h);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                5,
-                sizeof(isEven),
-                (const void *)&isEven);
-        status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                                rEnv.mpkKernel,
-                                2,
-                                NULL,
-                                globalThreads,
-                                localThreads,
-                                0,
-                                NULL,
-                                NULL);
-
-        if (yp > 0 || yn > 0)
-        {
-            pixtemp = pixsCLBuffer;
-            pixsCLBuffer = pixdCLBuffer;
-            pixdCLBuffer = pixtemp;
-        }
+      // Specific Horizontal pass kernel for half width < 32
+      rEnv.mpkKernel =
+          clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status);
+      CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_32word");
+      isEven = (xp != xn);
+
+      status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+      status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+      status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
+      status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
+      status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
+      status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), &isEven);
+      status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                      NULL, globalThreads, localThreads, 0,
+                                      NULL, NULL);
+
+      if (yp > 0 || yn > 0) {
+        pixtemp = pixsCLBuffer;
+        pixsCLBuffer = pixdCLBuffer;
+        pixdCLBuffer = pixtemp;
+      }
     }
 
     if (yp > 0 || yn > 0)
     {
         rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateVer", &status );
+        CHECK_OPENCL(status, "clCreateKernel morphoDilateVer");
 
         status = clSetKernelArg(rEnv.mpkKernel,
             0,
@@ -1729,280 +1615,161 @@ pixDilateCL(l_int32  hsize, l_int32  vsize, l_int32  wpl, l_int32  h)
             1,
             sizeof(cl_mem),
             &pixdCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                2,
-                sizeof(yp),
-                (const void *)&yp);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                3,
-                sizeof(wpl),
-                (const void *)&wpl);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                4,
-                sizeof(h),
-                (const void *)&h);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                5,
-                sizeof(yn),
-                (const void *)&yn);
-        status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                                rEnv.mpkKernel,
-                                2,
-                                NULL,
-                                globalThreads,
-                                localThreads,
-                                0,
-                                NULL,
-                                NULL);
+        status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp);
+        status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
+        status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
+        status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(yn), &yn);
+        status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                        NULL, globalThreads, localThreads, 0,
+                                        NULL, NULL);
     }
 
-
     return status;
 }
 
 //Morphology Erode operation. Invokes the relevant OpenCL kernels
-cl_int
-pixErodeCL(l_int32  hsize, l_int32  vsize, l_uint32 wpl, l_uint32 h)
-{
-
-    l_int32  xp, yp, xn, yn;
-    SEL* sel;
-    size_t globalThreads[2];
-    size_t localThreads[2];
-    cl_mem pixtemp;
-    cl_int status;
-    int gsize;
-    char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC);
-    l_uint32 rwmask, lwmask;
-    char isEven;
-
-    sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
-
-    selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
-    selDestroy(&sel);
-    OpenclDevice::SetKernelEnv( &rEnv );
-
-    if (hsize == 5 && vsize == 5 && isAsymmetric)
-    {
-        //Specific kernel for 5x5
-        status = pixErodeCL_55(wpl, h);
-        return status;
-    }
-
-    lwmask = lmask32[31 - (xn & 31)];
-    rwmask = rmask32[31 - (xp & 31)];
-
-    //global and local work dimensions for Horizontal pass
-    gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
-    globalThreads[0] = gsize;
-    gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
-    globalThreads[1] = gsize;
-    localThreads[0] = GROUPSIZE_X;
-    localThreads[1] = GROUPSIZE_Y;
-
-    //Horizontal Pass
-    if (xp > 31 || xn > 31 )
-    {
-        //Generic case.
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor", &status );
-
-        status = clSetKernelArg(rEnv.mpkKernel,
-            0,
-            sizeof(cl_mem),
-            &pixsCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-            1,
-            sizeof(cl_mem),
-            &pixdCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                2,
-                sizeof(xp),
-                (const void *)&xp);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                3,
-                sizeof(xn),
-                (const void *)&xn);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                4,
-                sizeof(wpl),
-                (const void *)&wpl);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                5,
-                sizeof(h),
-                (const void *)&h);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                6,
-                sizeof(isAsymmetric),
-                (const void *)&isAsymmetric);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                7,
-                sizeof(rwmask),
-                (const void *)&rwmask);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                8,
-                sizeof(lwmask),
-                (const void *)&lwmask);
-        status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                                rEnv.mpkKernel,
-                                2,
-                                NULL,
-                                globalThreads,
-                                localThreads,
-                                0,
-                                NULL,
-                                NULL);
-
-        if (yp > 0 || yn > 0)
-        {
-            pixtemp = pixsCLBuffer;
-            pixsCLBuffer = pixdCLBuffer;
-            pixdCLBuffer = pixtemp;
-        }
-    }
-    else if (xp > 0 || xn > 0)
-    {
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor_32word", &status );
-        isEven = (xp != xn);
-
-        status = clSetKernelArg(rEnv.mpkKernel,
-            0,
-            sizeof(cl_mem),
-            &pixsCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-            1,
-            sizeof(cl_mem),
-            &pixdCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                2,
-                sizeof(xp),
-                (const void *)&xp);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                3,
-                sizeof(wpl),
-                (const void *)&wpl);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                4,
-                sizeof(h),
-                (const void *)&h);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                5,
-                sizeof(isAsymmetric),
-                (const void *)&isAsymmetric);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                6,
-                sizeof(rwmask),
-                (const void *)&rwmask);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                7,
-                sizeof(lwmask),
-                (const void *)&lwmask);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                8,
-                sizeof(isEven),
-                (const void *)&isEven);
-        status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                                rEnv.mpkKernel,
-                                2,
-                                NULL,
-                                globalThreads,
-                                localThreads,
-                                0,
-                                NULL,
-                                NULL);
+cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) {
+  l_int32 xp, yp, xn, yn;
+  SEL *sel;
+  size_t globalThreads[2];
+  size_t localThreads[2];
+  cl_mem pixtemp;
+  cl_int status;
+  int gsize;
+  char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC);
+  l_uint32 rwmask, lwmask;
+  char isEven;
+
+  sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
+
+  selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
+  selDestroy(&sel);
+  OpenclDevice::SetKernelEnv(&rEnv);
+
+  if (hsize == 5 && vsize == 5 && isAsymmetric) {
+    // Specific kernel for 5x5
+    status = pixErodeCL_55(wpl, h);
+    return status;
+  }
 
-        if (yp > 0 || yn > 0)
-        {
-            pixtemp = pixsCLBuffer;
-            pixsCLBuffer = pixdCLBuffer;
-            pixdCLBuffer = pixtemp;
-        }
+  lwmask = lmask32[31 - (xn & 31)];
+  rwmask = rmask32[31 - (xp & 31)];
+
+  // global and local work dimensions for Horizontal pass
+  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
+  globalThreads[0] = gsize;
+  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
+  globalThreads[1] = gsize;
+  localThreads[0] = GROUPSIZE_X;
+  localThreads[1] = GROUPSIZE_Y;
+
+  // Horizontal Pass
+  if (xp > 31 || xn > 31) {
+    // Generic case.
+    rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeHor", &status);
+
+    status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h);
+    status =
+        clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), &isAsymmetric);
+    status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), &rwmask);
+    status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), &lwmask);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
+
+    if (yp > 0 || yn > 0) {
+      pixtemp = pixsCLBuffer;
+      pixsCLBuffer = pixdCLBuffer;
+      pixdCLBuffer = pixtemp;
+    }
+  } else if (xp > 0 || xn > 0) {
+    rEnv.mpkKernel =
+        clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_32word", &status);
+    isEven = (xp != xn);
+
+    status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
+    status =
+        clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric);
+    status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), &rwmask);
+    status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), &lwmask);
+    status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), &isEven);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
+
+    if (yp > 0 || yn > 0) {
+      pixtemp = pixsCLBuffer;
+      pixsCLBuffer = pixdCLBuffer;
+      pixdCLBuffer = pixtemp;
     }
+  }
 
-    //Vertical Pass
-    if (yp > 0 || yn > 0)
-    {
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeVer", &status );
-
-        status = clSetKernelArg(rEnv.mpkKernel,
-            0,
-            sizeof(cl_mem),
-            &pixsCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-            1,
-            sizeof(cl_mem),
-            &pixdCLBuffer);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                2,
-                sizeof(yp),
-                (const void *)&yp);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                3,
-                sizeof(wpl),
-                (const void *)&wpl);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                4,
-                sizeof(h),
-                (const void *)&h);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                5,
-                sizeof(isAsymmetric),
-                (const void *)&isAsymmetric);
-        status = clSetKernelArg(rEnv.mpkKernel,
-                6,
-                sizeof(yn),
-                (const void *)&yn);
-        status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                                rEnv.mpkKernel,
-                                2,
-                                NULL,
-                                globalThreads,
-                                localThreads,
-                                0,
-                                NULL,
-                                NULL);
-    }
+  // Vertical Pass
+  if (yp > 0 || yn > 0) {
+    rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeVer", &status);
+    CHECK_OPENCL(status, "clCreateKernel morphoErodeVer");
+
+    status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
+    status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
+    status =
+        clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric);
+    status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), &yn);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
+  }
 
-    return status;
+  return status;
 }
 
 // OpenCL implementation of Morphology Dilate
 //Note: Assumes the source and dest opencl buffer are initialized. No check done
-PIX*
-OpenclDevice::pixDilateBrickCL(PIX  *pixd, PIX  *pixs, l_int32  hsize, l_int32  vsize, bool reqDataCopy = false)
-{
-    l_uint32 wpl, h;
+PIX *OpenclDevice::pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize,
+                                    l_int32 vsize, bool reqDataCopy = false) {
+  l_uint32 wpl, h;
 
-    wpl = pixGetWpl(pixs);
-    h = pixGetHeight(pixs);
+  wpl = pixGetWpl(pixs);
+  h = pixGetHeight(pixs);
 
-    clStatus = pixDilateCL(hsize, vsize, wpl, h);
+  clStatus = pixDilateCL(hsize, vsize, wpl, h);
 
-    if (reqDataCopy)
-    {
-        pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ, false);
-    }
+  if (reqDataCopy) {
+    pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h,
+                             CL_MAP_READ, false);
+  }
 
-    return pixd;
+  return pixd;
 }
 
 // OpenCL implementation of Morphology Erode
 //Note: Assumes the source and dest opencl buffer are initialized. No check done
-PIX*
-OpenclDevice::pixErodeBrickCL(PIX  *pixd, PIX  *pixs, l_int32  hsize, l_int32  vsize, bool reqDataCopy = false)
-{
-    l_uint32 wpl, h;
+PIX *OpenclDevice::pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize,
+                                   l_int32 vsize, bool reqDataCopy = false) {
+  l_uint32 wpl, h;
 
-    wpl = pixGetWpl(pixs);
-    h = pixGetHeight(pixs);
+  wpl = pixGetWpl(pixs);
+  h = pixGetHeight(pixs);
 
-    clStatus = pixErodeCL(hsize, vsize, wpl, h);
+  clStatus = pixErodeCL(hsize, vsize, wpl, h);
 
-    if (reqDataCopy)
-    {
-        pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
-    }
+  if (reqDataCopy) {
+    pixd =
+        mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ);
+  }
 
-    return pixd;
+  return pixd;
 }
 
 //Morphology Open operation. Invokes the relevant OpenCL kernels
@@ -2045,50 +1812,40 @@ pixCloseCL(l_int32  hsize, l_int32  vsize, l_int32  wpl, l_int32  h)
 
 // OpenCL implementation of Morphology Close
 //Note: Assumes the source and dest opencl buffer are initialized. No check done
-PIX*
-OpenclDevice::pixCloseBrickCL(PIX  *pixd,
-                              PIX  *pixs,
-                              l_int32  hsize,
-                              l_int32  vsize,
-                              bool reqDataCopy = false)
-{
-    l_uint32 wpl, h;
+PIX *OpenclDevice::pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize,
+                                   l_int32 vsize, bool reqDataCopy = false) {
+  l_uint32 wpl, h;
 
-    wpl = pixGetWpl(pixs);
-    h = pixGetHeight(pixs);
+  wpl = pixGetWpl(pixs);
+  h = pixGetHeight(pixs);
 
-    clStatus = pixCloseCL(hsize, vsize, wpl, h);
+  clStatus = pixCloseCL(hsize, vsize, wpl, h);
 
-    if (reqDataCopy)
-    {
-        pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
-    }
+  if (reqDataCopy) {
+    pixd =
+        mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ);
+  }
 
-    return pixd;
+  return pixd;
 }
 
 // OpenCL implementation of Morphology Open
 //Note: Assumes the source and dest opencl buffer are initialized. No check done
-PIX*
-OpenclDevice::pixOpenBrickCL(PIX  *pixd,
-                              PIX  *pixs,
-                              l_int32  hsize,
-                              l_int32  vsize,
-                              bool reqDataCopy = false)
-{
-    l_uint32 wpl, h;
+PIX *OpenclDevice::pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize,
+                                  l_int32 vsize, bool reqDataCopy = false) {
+  l_uint32 wpl, h;
 
-    wpl = pixGetWpl(pixs);
-    h = pixGetHeight(pixs);
+  wpl = pixGetWpl(pixs);
+  h = pixGetHeight(pixs);
 
-    clStatus = pixOpenCL(hsize, vsize, wpl, h);
+  clStatus = pixOpenCL(hsize, vsize, wpl, h);
 
-    if (reqDataCopy)
-    {
-        pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
-    }
+  if (reqDataCopy) {
+    pixd =
+        mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ);
+  }
 
-    return pixd;
+  return pixd;
 }
 
 //pix OR operation: outbuffer = buffer1 | buffer2
@@ -2106,6 +1863,7 @@ pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem ou
     globalThreads[1] = gsize;
 
     rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixOR", &status );
+    CHECK_OPENCL(status, "clCreateKernel pixOR");
 
     status = clSetKernelArg(rEnv.mpkKernel,
         0,
@@ -2119,23 +1877,11 @@ pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem ou
         2,
         sizeof(cl_mem),
         &outbuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-            3,
-            sizeof(wpl),
-            (const void *)&wpl);
-    status = clSetKernelArg(rEnv.mpkKernel,
-            4,
-            sizeof(h),
-            (const void *)&h);
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                            rEnv.mpkKernel,
-                            2,
-                            NULL,
-                            globalThreads,
-                            localThreads,
-                            0,
-                            NULL,
-                            NULL);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
 
     return status;
 }
@@ -2155,6 +1901,7 @@ pixANDCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem o
     globalThreads[1] = gsize;
 
     rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixAND", &status );
+    CHECK_OPENCL(status, "clCreateKernel pixAND");
 
     // Enqueue a kernel run call.
     status = clSetKernelArg(rEnv.mpkKernel,
@@ -2169,102 +1916,64 @@ pixANDCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem o
         2,
         sizeof(cl_mem),
         &outbuffer);
-    status = clSetKernelArg(rEnv.mpkKernel,
-            3,
-            sizeof(wpl),
-            (const void *)&wpl);
-    status = clSetKernelArg(rEnv.mpkKernel,
-            4,
-            sizeof(h),
-            (const void *)&h);
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                            rEnv.mpkKernel,
-                            2,
-                            NULL,
-                            globalThreads,
-                            localThreads,
-                            0,
-                            NULL,
-                            NULL);
+    status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
+    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
+                                    NULL, globalThreads, localThreads, 0,
+                                    NULL, NULL);
 
     return status;
 }
 
 //output = buffer1 & ~(buffer2)
-cl_int
-pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outBuffer = NULL)
-{
-    cl_int status;
-    size_t globalThreads[2];
-    int gsize;
-    size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
-
-    gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
-    globalThreads[0] = gsize;
-    gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
-    globalThreads[1] = gsize;
-
-    if (outBuffer != NULL)
-    {
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixSubtract", &status );
-    }
-    else
-    {
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixSubtract_inplace", &status );
-    }
+cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1,
+                          cl_mem buffer2, cl_mem outBuffer = NULL) {
+  cl_int status;
+  size_t globalThreads[2];
+  int gsize;
+  size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
+
+  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
+  globalThreads[0] = gsize;
+  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
+  globalThreads[1] = gsize;
+
+  if (outBuffer != NULL) {
+    rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "pixSubtract", &status);
+    CHECK_OPENCL(status, "clCreateKernel pixSubtract");
+  } else {
+    rEnv.mpkKernel =
+        clCreateKernel(rEnv.mpkProgram, "pixSubtract_inplace", &status);
+    CHECK_OPENCL(status, "clCreateKernel pixSubtract_inplace");
+  }
 
-    // Enqueue a kernel run call.
-    status = clSetKernelArg(rEnv.mpkKernel,
-        0,
-        sizeof(cl_mem),
-        &buffer1);
-    status = clSetKernelArg(rEnv.mpkKernel,
-        1,
-        sizeof(cl_mem),
-        &buffer2);
-    status = clSetKernelArg(rEnv.mpkKernel,
-            2,
-            sizeof(wpl),
-            (const void *)&wpl);
-    status = clSetKernelArg(rEnv.mpkKernel,
-            3,
-            sizeof(h),
-            (const void *)&h);
-    if (outBuffer != NULL)
-    {
-        status = clSetKernelArg(rEnv.mpkKernel,
-            4,
-            sizeof(cl_mem),
-            (const void *)&outBuffer);
-    }
-    status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
-                            rEnv.mpkKernel,
-                            2,
-                            NULL,
-                            globalThreads,
-                            localThreads,
-                            0,
-                            NULL,
-                            NULL);
+  // Enqueue a kernel run call.
+  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &buffer1);
+  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &buffer2);
+  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
+  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
+  if (outBuffer != NULL) {
+    status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outBuffer);
+  }
+  status =
+      clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL,
+                             globalThreads, localThreads, 0, NULL, NULL);
 
-    return status;
+  return status;
 }
 
 // OpenCL implementation of Subtract pix
 //Note: Assumes the source and dest opencl buffer are initialized. No check done
-PIX*
-OpenclDevice::pixSubtractCL(PIX  *pixd, PIX  *pixs1, PIX  *pixs2, bool reqDataCopy = false)
-{
-    l_uint32 wpl, h;
+PIX *OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2,
+                                 bool reqDataCopy = false) {
+  l_uint32 wpl, h;
 
-    PROCNAME("pixSubtractCL");
+  PROCNAME("pixSubtractCL");
 
-    if (!pixs1)
-        return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd);
-    if (!pixs2)
-        return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd);
-    if (pixGetDepth(pixs1) != pixGetDepth(pixs2))
-        return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd);
+  if (!pixs1) return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd);
+  if (!pixs2) return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd);
+  if (pixGetDepth(pixs1) != pixGetDepth(pixs2))
+    return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd);
 
 #if  EQUAL_SIZE_WARNING
     if (!pixSizesEqual(pixs1, pixs2))
@@ -2287,200 +1996,206 @@ OpenclDevice::pixSubtractCL(PIX  *pixd, PIX  *pixs1, PIX  *pixs2, bool reqDataCo
 
 // OpenCL implementation of Hollow pix
 //Note: Assumes the source and dest opencl buffer are initialized. No check done
-PIX*
-OpenclDevice::pixHollowCL(PIX  *pixd,
-                        PIX  *pixs,
-                        l_int32  close_hsize,
-                        l_int32  close_vsize,
-                        l_int32  open_hsize,
-                        l_int32  open_vsize,
-                        bool reqDataCopy = false)
-{
-    l_uint32 wpl, h;
-    cl_mem pixtemp;
-
-    wpl = pixGetWpl(pixs);
-    h = pixGetHeight(pixs);
-
-    //First step : Close Morph operation: Dilate followed by Erode
-    clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
-
-    //Store the output of close operation in an intermediate buffer
-    //this will be later used for pixsubtract
-    clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL);
-
-    //Second step: Open Operation - Erode followed by Dilate
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixtemp;
-
-    clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
-
-    //Third step: Subtract : (Close - Open)
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixdCLIntermediate;
-    pixdCLIntermediate = pixtemp;
-
-    clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
-
-    if (reqDataCopy)
-    {
-        //Read back output data from OCL buffer to cpu
-        pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
-    }
-    return pixd;
+PIX *OpenclDevice::pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize,
+                               l_int32 close_vsize, l_int32 open_hsize,
+                               l_int32 open_vsize, bool reqDataCopy = false) {
+  l_uint32 wpl, h;
+  cl_mem pixtemp;
+
+  wpl = pixGetWpl(pixs);
+  h = pixGetHeight(pixs);
+
+  // First step : Close Morph operation: Dilate followed by Erode
+  clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
+
+  // Store the output of close operation in an intermediate buffer
+  // this will be later used for pixsubtract
+  clStatus =
+      clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
+                          0, sizeof(int) * wpl * h, 0, NULL, NULL);
+
+  // Second step: Open Operation - Erode followed by Dilate
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixtemp;
+
+  clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
+
+  // Third step: Subtract : (Close - Open)
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixdCLIntermediate;
+  pixdCLIntermediate = pixtemp;
+
+  clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
+
+  if (reqDataCopy) {
+    // Read back output data from OCL buffer to cpu
+    pixd =
+        mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ);
+  }
+  return pixd;
 }
 
 // OpenCL implementation of Get Lines from pix function
 //Note: Assumes the source and dest opencl buffer are initialized. No check done
-void
-OpenclDevice::pixGetLinesCL(PIX  *pixd,
-                            PIX  *pixs,
-                            PIX** pix_vline,
-                            PIX** pix_hline,
-                            PIX** pixClosed,
-                            bool  getpixClosed,
-                            l_int32  close_hsize, l_int32  close_vsize,
-                            l_int32  open_hsize, l_int32  open_vsize,
-                            l_int32  line_hsize, l_int32  line_vsize)
-{
-    l_uint32 wpl, h;
-    cl_mem pixtemp;
-
-    wpl = pixGetWpl(pixs);
-    h = pixGetHeight(pixs);
-
-    //First step : Close Morph operation: Dilate followed by Erode
-    clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
-
-    //Copy the Close output to CPU buffer
-    if (getpixClosed)
-    {
-        *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, wpl*h, CL_MAP_READ, true, false);
-    }
+void OpenclDevice::pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline,
+                                 PIX **pix_hline, PIX **pixClosed,
+                                 bool getpixClosed, l_int32 close_hsize,
+                                 l_int32 close_vsize, l_int32 open_hsize,
+                                 l_int32 open_vsize, l_int32 line_hsize,
+                                 l_int32 line_vsize) {
+  l_uint32 wpl, h;
+  cl_mem pixtemp;
+
+  wpl = pixGetWpl(pixs);
+  h = pixGetHeight(pixs);
+
+  // First step : Close Morph operation: Dilate followed by Erode
+  clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
+
+  // Copy the Close output to CPU buffer
+  if (getpixClosed) {
+    *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs,
+                                   wpl * h, CL_MAP_READ, true, false);
+  }
 
-    //Store the output of close operation in an intermediate buffer
-    //this will be later used for pixsubtract
-    clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL);
+  // Store the output of close operation in an intermediate buffer
+  // this will be later used for pixsubtract
+  clStatus =
+      clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
+                          0, sizeof(int) * wpl * h, 0, NULL, NULL);
 
-    //Second step: Open Operation - Erode followed by Dilate
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixtemp;
+  // Second step: Open Operation - Erode followed by Dilate
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixtemp;
 
-    clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
+  clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
 
-    //Third step: Subtract : (Close - Open)
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixdCLIntermediate;
-    pixdCLIntermediate = pixtemp;
+  // Third step: Subtract : (Close - Open)
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixdCLIntermediate;
+  pixdCLIntermediate = pixtemp;
 
-    clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
+  clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
 
-    //Store the output of Hollow operation in an intermediate buffer
-    //this will be later used
-    clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL);
+  // Store the output of Hollow operation in an intermediate buffer
+  // this will be later used
+  clStatus =
+      clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
+                          0, sizeof(int) * wpl * h, 0, NULL, NULL);
 
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLBuffer;
-    pixdCLBuffer = pixtemp;
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLBuffer;
+  pixdCLBuffer = pixtemp;
 
-    //Fourth step: Get vertical line
-    //pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
-    clStatus = pixOpenCL(1, line_vsize, wpl, h);
+  // Fourth step: Get vertical line
+  // pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
+  clStatus = pixOpenCL(1, line_vsize, wpl, h);
 
-    //Copy the vertical line output to CPU buffer
-    *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl*h, CL_MAP_READ, true, false);
+  // Copy the vertical line output to CPU buffer
+  *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl * h,
+                                 CL_MAP_READ, true, false);
 
-    pixtemp = pixsCLBuffer;
-    pixsCLBuffer = pixdCLIntermediate;
-    pixdCLIntermediate = pixtemp;
+  pixtemp = pixsCLBuffer;
+  pixsCLBuffer = pixdCLIntermediate;
+  pixdCLIntermediate = pixtemp;
 
-    //Fifth step: Get horizontal line
-    //pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
-    clStatus = pixOpenCL(line_hsize, 1, wpl, h);
+  // Fifth step: Get horizontal line
+  // pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
+  clStatus = pixOpenCL(line_hsize, 1, wpl, h);
 
-    //Copy the horizontal line output to CPU buffer
-    *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl*h, CL_MAP_READ, true, true);
+  // Copy the horizontal line output to CPU buffer
+  *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl * h,
+                                 CL_MAP_READ, true, true);
 
-    return;
+  return;
 }
 
-
 /*************************************************************************
  *  HistogramRect
  *  Otsu Thresholding Operations
  *  histogramAllChannels is laid out as all channel 0, then all channel 1...
  *  only supports 1 or 4 channels (bytes_per_pixel)
  ************************************************************************/
-int OpenclDevice::HistogramRectOCL(
-    const unsigned char* imageData,
-    int bytes_per_pixel,
-    int bytes_per_line,
-    int left, // always 0
-    int top, // always 0
-    int width,
-    int height,
-    int kHistogramSize,
-    int* histogramAllChannels)
-{
-PERF_COUNT_START("HistogramRectOCL")
-    cl_int clStatus;
-    int retVal= 0;
-    KernelEnv histKern;
-    SetKernelEnv( &histKern );
-    KernelEnv histRedKern;
-    SetKernelEnv( &histRedKern );
-    /* map imagedata to device as read only */
-    // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be coherent which we don't need.
-    // faster option would be to allocate initial image buffer
-    // using a garlic bus memory type
-    cl_mem imageBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*sizeof(char), (void *)imageData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
-
-    /* setup work group size parameters */
-    int block_size = 256;
-    cl_uint numCUs;
-    clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL);
-    CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
-
-    int requestedOccupancy = 10;
-    int numWorkGroups = numCUs * requestedOccupancy;
-    int numThreads = block_size*numWorkGroups;
-    size_t local_work_size[] = {static_cast<size_t>(block_size)};
-    size_t global_work_size[] = {static_cast<size_t>(numThreads)};
-    size_t red_global_work_size[] = {static_cast<size_t>(block_size*kHistogramSize*bytes_per_pixel)};
-
-    /* map histogramAllChannels as write only */
-    int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;
-
-    cl_mem histogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, kHistogramSize*bytes_per_pixel*sizeof(int), (void *)histogramAllChannels, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer histogramBuffer");
-
-    /* intermediate histogram buffer */
-    int histRed = 256;
-    int tmpHistogramBins =  kHistogramSize*bytes_per_pixel*histRed;
-
-    cl_mem tmpHistogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE, tmpHistogramBins*sizeof(cl_uint), NULL, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer tmpHistogramBuffer");
-
-    /* atomic sync buffer */
-    int *zeroBuffer = new int[1];
-    zeroBuffer[0] = 0;
-    cl_mem atomicSyncBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int), (void *)zeroBuffer, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer atomicSyncBuffer");
-    delete[] zeroBuffer;
-    //Create kernel objects based on bytes_per_pixel
-    if (bytes_per_pixel == 1)
-    {
-        histKern.mpkKernel = clCreateKernel( histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus );
-        CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectOneChannel");
-
-        histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram, "kernel_HistogramRectOneChannelReduction", &clStatus );
-        CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectOneChannelReduction");
-    } else {
+int OpenclDevice::HistogramRectOCL(unsigned char *imageData,
+                                   int bytes_per_pixel, int bytes_per_line,
+                                   int left,  // always 0
+                                   int top,   // always 0
+                                   int width, int height, int kHistogramSize,
+                                   int *histogramAllChannels) {
+  PERF_COUNT_START("HistogramRectOCL")
+  cl_int clStatus;
+  int retVal = 0;
+  KernelEnv histKern;
+  SetKernelEnv(&histKern);
+  KernelEnv histRedKern;
+  SetKernelEnv(&histRedKern);
+  /* map imagedata to device as read only */
+  // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be
+  // coherent which we don't need.
+  // faster option would be to allocate initial image buffer
+  // using a garlic bus memory type
+  cl_mem imageBuffer = clCreateBuffer(
+      histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+      width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
+
+  /* setup work group size parameters */
+  int block_size = 256;
+  cl_uint numCUs;
+  clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
+                             sizeof(numCUs), &numCUs, NULL);
+  CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
+
+  int requestedOccupancy = 10;
+  int numWorkGroups = numCUs * requestedOccupancy;
+  int numThreads = block_size * numWorkGroups;
+  size_t local_work_size[] = {static_cast<size_t>(block_size)};
+  size_t global_work_size[] = {static_cast<size_t>(numThreads)};
+  size_t red_global_work_size[] = {
+      static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};
+
+  /* map histogramAllChannels as write only */
+  int numBins = kHistogramSize * bytes_per_pixel * numWorkGroups;
+
+  cl_mem histogramBuffer = clCreateBuffer(
+      histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+      kHistogramSize * bytes_per_pixel * sizeof(int), histogramAllChannels,
+      &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer histogramBuffer");
+
+  /* intermediate histogram buffer */
+  int histRed = 256;
+  int tmpHistogramBins = kHistogramSize * bytes_per_pixel * histRed;
+
+  cl_mem tmpHistogramBuffer =
+      clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE,
+                     tmpHistogramBins * sizeof(cl_uint), NULL, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer tmpHistogramBuffer");
+
+  /* atomic sync buffer */
+  int *zeroBuffer = new int[1];
+  zeroBuffer[0] = 0;
+  cl_mem atomicSyncBuffer = clCreateBuffer(
+      histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+      sizeof(cl_int), zeroBuffer, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer atomicSyncBuffer");
+  delete[] zeroBuffer;
+  // Create kernel objects based on bytes_per_pixel
+  if (bytes_per_pixel == 1) {
+    histKern.mpkKernel = clCreateKernel(
+        histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus);
+    CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectOneChannel");
+
+    histRedKern.mpkKernel =
+        clCreateKernel(histRedKern.mpkProgram,
+                       "kernel_HistogramRectOneChannelReduction", &clStatus);
+    CHECK_OPENCL(clStatus,
+                 "clCreateKernel kernel_HistogramRectOneChannelReduction");
+  } else {
     histKern.mpkKernel = clCreateKernel( histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus );
     CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectAllChannels");
 
@@ -2491,72 +2206,77 @@ PERF_COUNT_START("HistogramRectOCL")
     void *ptr;
 
     //Initialize tmpHistogramBuffer buffer
-    ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0, tmpHistogramBins*sizeof(cl_uint), 0, NULL, NULL, &clStatus);
+    ptr = clEnqueueMapBuffer(
+        histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0,
+        tmpHistogramBins * sizeof(cl_uint), 0, NULL, NULL, &clStatus);
     CHECK_OPENCL( clStatus, "clEnqueueMapBuffer tmpHistogramBuffer");
 
     memset(ptr, 0, tmpHistogramBins*sizeof(cl_uint));
-    clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, NULL, NULL);
+    clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0,
+                            NULL, NULL);
 
     /* set kernel 1 arguments */
-    clStatus = clSetKernelArg( histKern.mpkKernel, 0, sizeof(cl_mem), (void *)&imageBuffer );
+    clStatus =
+        clSetKernelArg(histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer);
     CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer");
     cl_uint numPixels = width*height;
-    clStatus = clSetKernelArg( histKern.mpkKernel, 1, sizeof(cl_uint), (void *)&numPixels );
+    clStatus =
+        clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels);
     CHECK_OPENCL( clStatus, "clSetKernelArg numPixels" );
-    clStatus = clSetKernelArg( histKern.mpkKernel, 2, sizeof(cl_mem), (void *)&tmpHistogramBuffer );
+    clStatus = clSetKernelArg(histKern.mpkKernel, 2, sizeof(cl_mem),
+                              &tmpHistogramBuffer);
     CHECK_OPENCL( clStatus, "clSetKernelArg tmpHistogramBuffer");
 
     /* set kernel 2 arguments */
     int n = numThreads/bytes_per_pixel;
-    clStatus = clSetKernelArg( histRedKern.mpkKernel, 0, sizeof(cl_int), (void *)&n );
+    clStatus = clSetKernelArg(histRedKern.mpkKernel, 0, sizeof(cl_int), &n);
     CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer");
-    clStatus = clSetKernelArg( histRedKern.mpkKernel, 1, sizeof(cl_mem), (void *)&tmpHistogramBuffer );
+    clStatus = clSetKernelArg(histRedKern.mpkKernel, 1, sizeof(cl_mem),
+                              &tmpHistogramBuffer);
     CHECK_OPENCL( clStatus, "clSetKernelArg tmpHistogramBuffer");
-    clStatus = clSetKernelArg( histRedKern.mpkKernel, 2, sizeof(cl_mem), (void *)&histogramBuffer );
+    clStatus = clSetKernelArg(histRedKern.mpkKernel, 2, sizeof(cl_mem),
+                              &histogramBuffer);
     CHECK_OPENCL( clStatus, "clSetKernelArg histogramBuffer");
 
     /* launch histogram */
 PERF_COUNT_SUB("before")
-    clStatus = clEnqueueNDRangeKernel(
-        histKern.mpkCmdQueue,
-        histKern.mpkKernel,
-        1, NULL, global_work_size, local_work_size,
-        0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels" );
-    clFinish( histKern.mpkCmdQueue );
-    if(clStatus !=0)
-    {
-		retVal = -1;
-	}
+clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1,
+                                  NULL, global_work_size, local_work_size, 0,
+                                  NULL, NULL);
+CHECK_OPENCL(clStatus,
+             "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels");
+clFinish(histKern.mpkCmdQueue);
+if (clStatus != 0) {
+  retVal = -1;
+    }
     /* launch histogram */
     clStatus = clEnqueueNDRangeKernel(
-        histRedKern.mpkCmdQueue,
-        histRedKern.mpkKernel,
-        1, NULL, red_global_work_size, local_work_size,
-        0, NULL, NULL );
+        histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, NULL,
+        red_global_work_size, local_work_size, 0, NULL, NULL);
     CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" );
     clFinish( histRedKern.mpkCmdQueue );
-	if(clStatus !=0)
-	{
-			retVal = -1;
-	}
-PERF_COUNT_SUB("redKernel")
+    if (clStatus != 0) {
+      retVal = -1;
+    }
+    PERF_COUNT_SUB("redKernel")
 
     /* map results back from gpu */
-    ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, CL_MAP_READ, 0, kHistogramSize*bytes_per_pixel*sizeof(int), 0, NULL, NULL, &clStatus);
+    ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE,
+                             CL_MAP_READ, 0,
+                             kHistogramSize * bytes_per_pixel * sizeof(int), 0,
+                             NULL, NULL, &clStatus);
     CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer");
-    if(clStatus !=0)
-    {
-				retVal = -1;
-	}
-    clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, NULL, NULL);
+    if (clStatus != 0) {
+      retVal = -1;
+    }
+    clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0,
+                            NULL, NULL);
 
     clReleaseMemObject(histogramBuffer);
     clReleaseMemObject(imageBuffer);
 PERF_COUNT_SUB("after")
 PERF_COUNT_END
-   return retVal;
-
+return retVal;
 }
 
 /*************************************************************************
@@ -2564,111 +2284,118 @@ PERF_COUNT_END
  * from the class, using thresholds/hi_values to the output IMAGE.
  * only supports 1 or 4 channels
  ************************************************************************/
-int OpenclDevice::ThresholdRectToPixOCL(
-    const unsigned char* imageData,
-    int bytes_per_pixel,
-    int bytes_per_line,
-    const int* thresholds,
-    const int* hi_values,
-    Pix** pix,
-    int height,
-    int width,
-    int top,
-    int left) {
-PERF_COUNT_START("ThresholdRectToPixOCL")
-    int retVal =0;
-    /* create pix result buffer */
-    *pix = pixCreate(width, height, 1);
-    uinT32* pixData = pixGetData(*pix);
-    int wpl = pixGetWpl(*pix);
-    int pixSize = wpl*height*sizeof(uinT32); // number of pixels
-
-    cl_int clStatus;
-    KernelEnv rEnv;
-    SetKernelEnv( &rEnv );
-
-    /* setup work group size parameters */
-    int block_size = 256;
-    cl_uint numCUs = 6;
-     clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL);
-    CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
-
-    int requestedOccupancy = 10;
-    int numWorkGroups = numCUs * requestedOccupancy;
-    int numThreads = block_size*numWorkGroups;
-    size_t local_work_size[] = {(size_t) block_size};
-    size_t global_work_size[] = {(size_t) numThreads};
-
-    /* map imagedata to device as read only */
-    // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be coherent which we don't need.
-    // faster option would be to allocate initial image buffer
-    // using a garlic bus memory type
-    cl_mem imageBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*sizeof(char), (void *)imageData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
-
-    /* map pix as write only */
-    pixThBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, pixSize, (void *)pixData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer pix");
-
-    /* map thresholds and hi_values */
-    cl_mem thresholdsBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*sizeof(int), (void *)thresholds, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer thresholdBuffer");
-    cl_mem hiValuesBuffer   = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*sizeof(int), (void *)hi_values, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer hiValuesBuffer");
-
-    /* compile kernel */
-    if (bytes_per_pixel == 4) {
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus );
-        CHECK_OPENCL( clStatus, "clCreateKernel kernel_ThresholdRectToPix");
-    } else {
-        rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus );
-        CHECK_OPENCL( clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan");
-    }
+int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData,
+                                        int bytes_per_pixel, int bytes_per_line,
+                                        int *thresholds, int *hi_values,
+                                        Pix **pix, int height, int width,
+                                        int top, int left) {
+  PERF_COUNT_START("ThresholdRectToPixOCL")
+  int retVal = 0;
+  /* create pix result buffer */
+  *pix = pixCreate(width, height, 1);
+  uinT32 *pixData = pixGetData(*pix);
+  int wpl = pixGetWpl(*pix);
+  int pixSize = wpl * height * sizeof(uinT32);  // number of pixels
+
+  cl_int clStatus;
+  KernelEnv rEnv;
+  SetKernelEnv(&rEnv);
+
+  /* setup work group size parameters */
+  int block_size = 256;
+  cl_uint numCUs = 6;
+  clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
+                             sizeof(numCUs), &numCUs, NULL);
+  CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
+
+  int requestedOccupancy = 10;
+  int numWorkGroups = numCUs * requestedOccupancy;
+  int numThreads = block_size * numWorkGroups;
+  size_t local_work_size[] = {(size_t)block_size};
+  size_t global_work_size[] = {(size_t)numThreads};
+
+  /* map imagedata to device as read only */
+  // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be
+  // coherent which we don't need.
+  // faster option would be to allocate initial image buffer
+  // using a garlic bus memory type
+  cl_mem imageBuffer = clCreateBuffer(
+      rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+      width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
+
+  /* map pix as write only */
+  pixThBuffer =
+      clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+                     pixSize, pixData, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer pix");
+
+  /* map thresholds and hi_values */
+  cl_mem thresholdsBuffer =
+      clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+                     bytes_per_pixel * sizeof(int), thresholds, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer thresholdBuffer");
+  cl_mem hiValuesBuffer =
+      clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+                     bytes_per_pixel * sizeof(int), hi_values, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer hiValuesBuffer");
+
+  /* compile kernel */
+  if (bytes_per_pixel == 4) {
+    rEnv.mpkKernel =
+        clCreateKernel(rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus);
+    CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix");
+  } else {
+    rEnv.mpkKernel = clCreateKernel(
+        rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus);
+    CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan");
+  }
 
-    /* set kernel arguments */
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&imageBuffer );
-    CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer");
-    cl_uint numPixels = width*height;
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(int), (void *)&height );
-    CHECK_OPENCL( clStatus, "clSetKernelArg height" );
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(int), (void *)&width );
-    CHECK_OPENCL( clStatus, "clSetKernelArg width" );
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(int), (void *)&wpl );
-    CHECK_OPENCL( clStatus, "clSetKernelArg wpl" );
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), (void *)&thresholdsBuffer );
-    CHECK_OPENCL( clStatus, "clSetKernelArg thresholdsBuffer" );
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 5, sizeof(cl_mem), (void *)&hiValuesBuffer );
-    CHECK_OPENCL( clStatus, "clSetKernelArg hiValuesBuffer" );
-    clStatus = clSetKernelArg( rEnv.mpkKernel, 6, sizeof(cl_mem), (void *)&pixThBuffer );
-    CHECK_OPENCL( clStatus, "clSetKernelArg pixThBuffer");
-
-    /* launch kernel & wait */
-PERF_COUNT_SUB("before")
-    clStatus = clEnqueueNDRangeKernel(
-        rEnv.mpkCmdQueue,
-        rEnv.mpkKernel,
-        1, NULL, global_work_size, local_work_size,
-        0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix" );
-    clFinish( rEnv.mpkCmdQueue );
-PERF_COUNT_SUB("kernel")
-	if(clStatus !=0)
-		{
-				printf("Setting return value to -1\n");
-				retVal = -1;
-	}
-    /* map results back from gpu */
-    void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, pixSize, 0, NULL, NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer");
-    clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, NULL);
+  /* set kernel arguments */
+  clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &imageBuffer);
+  CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer");
+  cl_uint numPixels = width * height;
+  clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), &height);
+  CHECK_OPENCL(clStatus, "clSetKernelArg height");
+  clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(int), &width);
+  CHECK_OPENCL(clStatus, "clSetKernelArg width");
+  clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), &wpl);
+  CHECK_OPENCL(clStatus, "clSetKernelArg wpl");
+  clStatus =
+      clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &thresholdsBuffer);
+  CHECK_OPENCL(clStatus, "clSetKernelArg thresholdsBuffer");
+  clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), &hiValuesBuffer);
+  CHECK_OPENCL(clStatus, "clSetKernelArg hiValuesBuffer");
+  clStatus = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), &pixThBuffer);
+  CHECK_OPENCL(clStatus, "clSetKernelArg pixThBuffer");
+
+  /* launch kernel & wait */
+  PERF_COUNT_SUB("before")
+  clStatus = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1,
+                                    NULL, global_work_size, local_work_size,
+                                    0, NULL, NULL);
+  CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix");
+  clFinish(rEnv.mpkCmdQueue);
+  PERF_COUNT_SUB("kernel")
+  if (clStatus != 0) {
+    printf("Setting return value to -1\n");
+    retVal = -1;
+  }
+  /* map results back from gpu */
+  void *ptr =
+      clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0,
+                         pixSize, 0, NULL, NULL, &clStatus);
+  CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer");
+  clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL,
+                          NULL);
 
-    clReleaseMemObject(imageBuffer);
-    clReleaseMemObject(thresholdsBuffer);
-    clReleaseMemObject(hiValuesBuffer);
+  clReleaseMemObject(imageBuffer);
+  clReleaseMemObject(thresholdsBuffer);
+  clReleaseMemObject(hiValuesBuffer);
 
-PERF_COUNT_SUB("after")
-PERF_COUNT_END
-return retVal;
+  PERF_COUNT_SUB("after")
+  PERF_COUNT_END
+  return retVal;
 }
 
 
@@ -2745,7 +2472,6 @@ void populateTessScoreEvaluationInputData( TessScoreEvaluationInputData *input )
     float fractionBlack = 0.1; // how much of the image should be blackened
     int numSpots = (height*width)*fractionBlack/(maxLineWidth*maxLineWidth/2/2);
     for (int i = 0; i < numSpots; i++) {
-
         int lineWidth = rand()%maxLineWidth;
         int col = lineWidth + rand()%(width-2*lineWidth);
         int row = lineWidth + rand()%(height-2*lineWidth);
@@ -2776,15 +2502,14 @@ typedef struct _TessDeviceScore {
  *****************************************************************************/
 
 double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
-
     double time = 0;
 #if ON_WINDOWS
     LARGE_INTEGER freq, time_funct_start, time_funct_end;
     QueryPerformanceFrequency(&freq);
 #elif ON_APPLE
-	mach_timebase_info_data_t info = { 0, 0 };
+    mach_timebase_info_data_t info = {0, 0};
     mach_timebase_info(&info);
-	long long start,stop;
+    long long start, stop;
 #else
     timespec time_funct_start, time_funct_end;
 #endif
@@ -2795,21 +2520,22 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu
     if (type == DS_DEVICE_OPENCL_DEVICE) {
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_start);
-#elif  ON_APPLE
-	start = mach_absolute_time();
+#elif ON_APPLE
+      start = mach_absolute_time();
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
 #endif
 
         OpenclDevice::gpuEnv = *env;
         int wpl = pixGetWpl(input.pix);
-        OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, wpl, NULL);
+        OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height,
+                                            wpl, NULL);
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_end);
         time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
-#elif  ON_APPLE
-		stop = mach_absolute_time();
-		time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;
+#elif ON_APPLE
+        stop = mach_absolute_time();
+        time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
         time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
@@ -2818,8 +2544,8 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu
     } else {
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_start);
-#elif  ON_APPLE
-		start = mach_absolute_time();
+#elif ON_APPLE
+      start = mach_absolute_time();
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
 #endif
@@ -2832,7 +2558,6 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu
         int idx = 0;
         for (i = 0; i < input.height ; i++) {
             for (j = 0; j < input.width; j++) {
-
                 l_uint32 tiffword = tiffdata[i * input.width + j];
                 l_int32 rval = ((tiffword) & 0xff);
                 l_int32 gval = (((tiffword) >> 8) & 0xff);
@@ -2845,9 +2570,9 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_end);
         time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
-#elif  ON_APPLE
-		stop = mach_absolute_time();
-		time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;
+#elif ON_APPLE
+        stop = mach_absolute_time();
+        time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
         time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
@@ -2862,15 +2587,14 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu
 }
 
 double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
-
     double time;
 #if ON_WINDOWS
     LARGE_INTEGER freq, time_funct_start, time_funct_end;
     QueryPerformanceFrequency(&freq);
-#elif  ON_APPLE
-	mach_timebase_info_data_t info = { 0, 0 };
+#elif ON_APPLE
+    mach_timebase_info_data_t info = {0, 0};
     mach_timebase_info(&info);
-	long long start,stop;
+    long long start, stop;
 #else
     timespec time_funct_start, time_funct_end;
 #endif
@@ -2882,58 +2606,56 @@ double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input,
     int kHistogramSize = 256;
     int bytes_per_line = input.width*input.numChannels;
     int *histogramAllChannels = new int[kHistogramSize*input.numChannels];
-    int retVal= 0;
+    int retVal = 0;
     // function call
     if (type == DS_DEVICE_OPENCL_DEVICE) {
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_start);
-#elif  ON_APPLE
-		start = mach_absolute_time();
+#elif ON_APPLE
+      start = mach_absolute_time();
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
 #endif
 
         OpenclDevice::gpuEnv = *env;
         int wpl = pixGetWpl(input.pix);
-        retVal= OpenclDevice::HistogramRectOCL(input.imageData, input.numChannels, bytes_per_line, top, left, input.width, input.height, kHistogramSize, histogramAllChannels);
+        retVal = OpenclDevice::HistogramRectOCL(
+            input.imageData, input.numChannels, bytes_per_line, top, left,
+            input.width, input.height, kHistogramSize, histogramAllChannels);
 
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_end);
         time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
-#elif  ON_APPLE
-		stop = mach_absolute_time();
-		if(retVal ==0)
-		{
-			time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;
-		}
-		else
-		{
-			time= FLT_MAX;
-		}
+#elif ON_APPLE
+        stop = mach_absolute_time();
+        if (retVal == 0) {
+          time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
+        } else {
+          time = FLT_MAX;
+        }
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
         time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
 #endif
     } else {
-
         int *histogram = new int[kHistogramSize];
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_start);
-#elif  ON_APPLE
-		start = mach_absolute_time();
+#elif ON_APPLE
+        start = mach_absolute_time();
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
 #endif
         for (int ch = 0; ch < input.numChannels; ++ch) {
-            tesseract::HistogramRect(input.pix, input.numChannels,
-                  left, top, input.width, input.height, histogram);
+          tesseract::HistogramRect(input.pix, input.numChannels, left, top,
+                                   input.width, input.height, histogram);
         }
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_end);
         time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
-#elif  ON_APPLE
-		stop = mach_absolute_time();
-		time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;
+#elif ON_APPLE
+        stop = mach_absolute_time();
+        time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
         time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
@@ -2959,13 +2681,13 @@ void ThresholdRectToPix_Native(const unsigned char* imagedata,
     int height = pixGetHeight(*pix);
 
   *pix = pixCreate(width, height, 1);
-  uinT32* pixdata = pixGetData(*pix);
+  uinT32 *pixdata = pixGetData(*pix);
   int wpl = pixGetWpl(*pix);
   const unsigned char* srcdata = imagedata + top * bytes_per_line +
                                  left * bytes_per_pixel;
   for (int y = 0; y < height; ++y) {
-    const uinT8* linedata = srcdata;
-    uinT32* pixline = pixdata + y * wpl;
+    const uinT8 *linedata = srcdata;
+    uinT32 *pixline = pixdata + y * wpl;
     for (int x = 0; x < width; ++x, linedata += bytes_per_pixel) {
       bool white_result = true;
       for (int ch = 0; ch < bytes_per_pixel; ++ch) {
@@ -2985,16 +2707,15 @@ void ThresholdRectToPix_Native(const unsigned char* imagedata,
 }
 
 double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
-
     double time;
-    int retVal =0;
+    int retVal = 0;
 #if ON_WINDOWS
     LARGE_INTEGER freq, time_funct_start, time_funct_end;
     QueryPerformanceFrequency(&freq);
-#elif  ON_APPLE
-	mach_timebase_info_data_t info = { 0, 0 };
+#elif ON_APPLE
+    mach_timebase_info_data_t info = {0, 0};
     mach_timebase_info(&info);
-	long long start,stop;
+    long long start, stop;
 #else
     timespec time_funct_start, time_funct_end;
 #endif
@@ -3020,29 +2741,29 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i
     if (type == DS_DEVICE_OPENCL_DEVICE) {
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_start);
-#elif  ON_APPLE
-		start = mach_absolute_time();
+#elif ON_APPLE
+      start = mach_absolute_time();
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
 #endif
 
         OpenclDevice::gpuEnv = *env;
         int wpl = pixGetWpl(input.pix);
-        retVal= OpenclDevice::ThresholdRectToPixOCL(input.imageData, input.numChannels, bytes_per_line, thresholds, hi_values, &input.pix, input.height, input.width, top, left);
+        retVal = OpenclDevice::ThresholdRectToPixOCL(
+            input.imageData, input.numChannels, bytes_per_line, thresholds,
+            hi_values, &input.pix, input.height, input.width, top, left);
 
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_end);
         time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
-#elif  ON_APPLE
-		stop = mach_absolute_time();
-		if(retVal ==0)
-		{
-			time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;;
-		}
-		else
-		{
-			time= FLT_MAX;
-		}
+#elif ON_APPLE
+        stop = mach_absolute_time();
+        if (retVal == 0) {
+          time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
+          ;
+        } else {
+          time = FLT_MAX;
+        }
 
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
@@ -3055,8 +2776,8 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i
         thresholder.SetImage( input.pix );
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_start);
-#elif  ON_APPLE
-		start = mach_absolute_time();
+#elif ON_APPLE
+        start = mach_absolute_time();
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
 #endif
@@ -3066,9 +2787,9 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_end);
         time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
-#elif  ON_APPLE
-		stop = mach_absolute_time();
-		time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;
+#elif ON_APPLE
+        stop = mach_absolute_time();
+        time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
         time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
@@ -3087,10 +2808,10 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in
 #if ON_WINDOWS
     LARGE_INTEGER freq, time_funct_start, time_funct_end;
     QueryPerformanceFrequency(&freq);
-#elif  ON_APPLE
-	mach_timebase_info_data_t info = { 0, 0 };
+#elif ON_APPLE
+    mach_timebase_info_data_t info = {0, 0};
     mach_timebase_info(&info);
-	long long start,stop;
+    long long start, stop;
 #else
     timespec time_funct_start, time_funct_end;
 #endif
@@ -3108,8 +2829,8 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in
     if (type == DS_DEVICE_OPENCL_DEVICE) {
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_start);
-#elif  ON_APPLE
-		start = mach_absolute_time();
+#elif ON_APPLE
+      start = mach_absolute_time();
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
 #endif
@@ -3117,16 +2838,19 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in
         OpenclDevice::gpuEnv = *env;
         OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
         Pix *pix_vline = NULL, *pix_hline = NULL, *pix_closed = NULL;
-        OpenclDevice::pixGetLinesCL(NULL, input.pix, &pix_vline, &pix_hline, &pix_closed, true, closing_brick, closing_brick, max_line_width, max_line_width, min_line_length, min_line_length);
+        OpenclDevice::pixGetLinesCL(
+            NULL, input.pix, &pix_vline, &pix_hline, &pix_closed, true,
+            closing_brick, closing_brick, max_line_width, max_line_width,
+            min_line_length, min_line_length);
 
         OpenclDevice::releaseMorphCLBuffers();
 
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_end);
         time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
-#elif  ON_APPLE
-		stop = mach_absolute_time();
-		time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;
+#elif ON_APPLE
+        stop = mach_absolute_time();
+        time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
         time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
@@ -3134,16 +2858,18 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in
     } else {
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_start);
-#elif  ON_APPLE
-		start = mach_absolute_time();
+#elif ON_APPLE
+      start = mach_absolute_time();
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
 #endif
 
         // native serial code
         Pix *src_pix = input.pix;
-        Pix *pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
-        Pix *pix_solid  = pixOpenBrick(NULL, pix_closed, max_line_width, max_line_width);
+        Pix *pix_closed =
+            pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
+        Pix *pix_solid =
+            pixOpenBrick(NULL, pix_closed, max_line_width, max_line_width);
         Pix *pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
         pixDestroy(&pix_solid);
         Pix *pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
@@ -3153,9 +2879,9 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in
 #if ON_WINDOWS
         QueryPerformanceCounter(&time_funct_end);
         time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
-#elif  ON_APPLE
-		stop = mach_absolute_time();
-		time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;
+#elif ON_APPLE
+        stop = mach_absolute_time();
+        time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
 #else
         clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
         time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
@@ -3173,11 +2899,10 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in
 
 #include "stdlib.h"
 
-
 // encode score object as byte string
 ds_status serializeScore( ds_device* device, void **serializedScore, unsigned int* serializedScoreSize ) {
     *serializedScoreSize = sizeof(TessDeviceScore);
-    *serializedScore = (void *) new unsigned char[*serializedScoreSize];
+    *serializedScore = new unsigned char[*serializedScoreSize];
     memcpy(*serializedScore, device->score, *serializedScoreSize);
     return DS_SUCCESS;
 }
@@ -3190,14 +2915,13 @@ ds_status deserializeScore( ds_device* device, const unsigned char* serializedSc
     return DS_SUCCESS;
 }
 
-ds_status releaseScore( void* score ) {
+ds_status releaseScore(void *score) {
   delete (TessDeviceScore *)score;
   return DS_SUCCESS;
 }
 
 // evaluate devices
 ds_status evaluateScoreForDevice( ds_device *device, void *inputData) {
-
     // overwrite statuc gpuEnv w/ current device
     // so native opencl calls can be used; they use static gpuEnv
     printf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native" );
@@ -3233,15 +2957,13 @@ ds_status evaluateScoreForDevice( ds_device *device, void *inputData) {
     float composeRGBPixelWeight     = 1.2f;
     float histogramRectWeight       = 2.4f;
     float thresholdRectToPixWeight  = 4.5f;
-    float getLineMasksMorphWeight   = 5.0f;
-
-    float weightedTime =
-        composeRGBPixelWeight       * composeRGBPixelTime +
-        histogramRectWeight         * histogramRectTime +
-        thresholdRectToPixWeight    * thresholdRectToPixTime +
-        getLineMasksMorphWeight     * getLineMasksMorphTime
-        ;
-    device->score = (void *)new TessDeviceScore;
+    float getLineMasksMorphWeight = 5.0f;
+
+    float weightedTime = composeRGBPixelWeight * composeRGBPixelTime +
+                         histogramRectWeight * histogramRectTime +
+                         thresholdRectToPixWeight * thresholdRectToPixTime +
+                         getLineMasksMorphWeight * getLineMasksMorphTime;
+    device->score = new TessDeviceScore;
     ((TessDeviceScore *)device->score)->time = weightedTime;
 
     printf("[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native" );
@@ -3256,83 +2978,104 @@ ds_status evaluateScoreForDevice( ds_device *device, void *inputData) {
 // initial call to select device
 ds_device OpenclDevice::getDeviceSelection( ) {
   if (!deviceIsSelected) {
-PERF_COUNT_START("getDeviceSelection")
-  // check if opencl is available at runtime
-  if( 1 == LoadOpencl() ) {
-    // opencl is available
-//PERF_COUNT_SUB("LoadOpencl")
-    // setup devices
-    ds_status status;
-    ds_profile *profile;
-    status = initDSProfile( &profile, "v0.1" );
-PERF_COUNT_SUB("initDSProfile")
-    // try reading scores from file
-    const char *fileName = "tesseract_opencl_profile_devices.dat";
-    status = readProfileFromFile( profile, deserializeScore, fileName);
-    if (status != DS_SUCCESS) {
-      // need to run evaluation
-      printf("[DS] Profile file not available (%s); performing profiling.\n", fileName);
-
-      // create input data
-      TessScoreEvaluationInputData input;
-      populateTessScoreEvaluationInputData( &input );
-//PERF_COUNT_SUB("populateTessScoreEvaluationInputData")
-      // perform evaluations
-      unsigned int numUpdates;
-      status =  profileDevices( profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (void *)&input, &numUpdates );
-PERF_COUNT_SUB("profileDevices")
-      // write scores to file
-      if ( status == DS_SUCCESS ) {
-        status = writeProfileToFile( profile, serializeScore, fileName);
-PERF_COUNT_SUB("writeProfileToFile")
-        if ( status == DS_SUCCESS ) {
-          printf("[DS] Scores written to file (%s).\n", fileName);
+    PERF_COUNT_START("getDeviceSelection")
+    // check if opencl is available at runtime
+    if (1 == LoadOpencl()) {
+      // opencl is available
+      // PERF_COUNT_SUB("LoadOpencl")
+      // setup devices
+      ds_status status;
+      ds_profile *profile;
+      status = initDSProfile(&profile, "v0.1");
+      PERF_COUNT_SUB("initDSProfile")
+      // try reading scores from file
+      const char *fileName = "tesseract_opencl_profile_devices.dat";
+      status = readProfileFromFile(profile, deserializeScore, fileName);
+      if (status != DS_SUCCESS) {
+        // need to run evaluation
+        printf("[DS] Profile file not available (%s); performing profiling.\n",
+               fileName);
+
+        // create input data
+        TessScoreEvaluationInputData input;
+        populateTessScoreEvaluationInputData(&input);
+        // PERF_COUNT_SUB("populateTessScoreEvaluationInputData")
+        // perform evaluations
+        unsigned int numUpdates;
+        status = profileDevices(profile, DS_EVALUATE_ALL,
+                                evaluateScoreForDevice, &input, &numUpdates);
+        PERF_COUNT_SUB("profileDevices")
+        // write scores to file
+        if (status == DS_SUCCESS) {
+          status = writeProfileToFile(profile, serializeScore, fileName);
+          PERF_COUNT_SUB("writeProfileToFile")
+          if (status == DS_SUCCESS) {
+            printf("[DS] Scores written to file (%s).\n", fileName);
+          } else {
+            printf(
+                "[DS] Error saving scores to file (%s); scores not written to "
+                "file.\n",
+                fileName);
+          }
         } else {
-          printf("[DS] Error saving scores to file (%s); scores not written to file.\n", fileName);
+          printf(
+              "[DS] Unable to evaluate performance; scores not written to "
+              "file.\n");
         }
       } else {
-        printf("[DS] Unable to evaluate performance; scores not written to file.\n");
+        PERF_COUNT_SUB("readProfileFromFile")
+        printf("[DS] Profile read from file (%s).\n", fileName);
       }
-    } else {
 
-PERF_COUNT_SUB("readProfileFromFile")
-                printf("[DS] Profile read from file (%s).\n", fileName);
-    }
-
-    // we now have device scores either from file or evaluation
-    // select fastest using custom Tesseract selection algorithm
-    float bestTime = FLT_MAX; // begin search with worst possible time
-    int bestDeviceIdx = -1;
-    for (int d = 0; d < profile->numDevices; d++) {
-      ds_device device = profile->devices[d];
-      TessDeviceScore score = *(TessDeviceScore *)device.score;
-
-      float time = score.time;
-              printf("[DS] Device[%i] %i:%s score is %f\n", d+1, device.type, device.oclDeviceName, time);
-      if (time < bestTime) {
-                  bestTime = time;
+      // we now have device scores either from file or evaluation
+      // select fastest using custom Tesseract selection algorithm
+      float bestTime = FLT_MAX;  // begin search with worst possible time
+      int bestDeviceIdx = -1;
+      for (int d = 0; d < profile->numDevices; d++) {
+        ds_device device = profile->devices[d];
+        TessDeviceScore score = *(TessDeviceScore *)device.score;
+
+        float time = score.time;
+        printf("[DS] Device[%i] %i:%s score is %f\n", d + 1, device.type,
+               device.oclDeviceName, time);
+        if (time < bestTime) {
+          bestTime = time;
           bestDeviceIdx = d;
+        }
       }
-    }
-    printf("[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native");
-    // cleanup
-    // TODO: call destructor for profile object?
+      printf("[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1,
+             profile->devices[bestDeviceIdx].oclDeviceName,
+             profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE
+                 ? "OpenCL"
+                 : "Native");
+      // cleanup
+      // TODO: call destructor for profile object?
 
-      bool overrided = false;
+      bool overridden = false;
       char *overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE");
       if (overrideDeviceStr != NULL) {
         int overrideDeviceIdx = atoi(overrideDeviceStr);
-        if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices ) {
-          printf("[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, %i)\n", overrideDeviceStr, overrideDeviceIdx);
+        if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) {
+          printf(
+              "[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, "
+              "%i)\n",
+              overrideDeviceStr, overrideDeviceIdx);
           bestDeviceIdx = overrideDeviceIdx - 1;
-          overrided = true;
+          overridden = true;
         } else {
-          printf("[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are valid devices).\n", overrideDeviceStr, profile->numDevices);
+          printf(
+              "[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are "
+              "valid devices).\n",
+              overrideDeviceStr, profile->numDevices);
         }
       }
 
-      if (overrided) {
-        printf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native");
+      if (overridden) {
+        printf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1,
+               profile->devices[bestDeviceIdx].oclDeviceName,
+               profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE
+                   ? "OpenCL"
+                   : "Native");
       }
       selectedDevice = profile->devices[bestDeviceIdx];
       // cleanup
@@ -3347,10 +3090,10 @@ PERF_COUNT_SUB("readProfileFromFile")
       selectedDevice.oclDriverVersion = NULL;
     }
     deviceIsSelected = true;
-PERF_COUNT_SUB("select from Profile")
-PERF_COUNT_END
+    PERF_COUNT_SUB("select from Profile")
+    PERF_COUNT_END
   }
-//PERF_COUNT_END
+  // PERF_COUNT_END
   return selectedDevice;
 }
 
@@ -3365,8 +3108,6 @@ bool OpenclDevice::selectedDeviceIsNativeCPU() {
   return (device.type == DS_DEVICE_NATIVE_CPU);
 }
 
-
-
 /*!
  *  pixConvertRGBToGray() from leptonica, converted to opencl kernel
  *
@@ -3378,111 +3119,112 @@ bool OpenclDevice::selectedDeviceIsNativeCPU() {
  *  Notes:
  *      (1) Use a weighted average of the RGB values.
  */
-#define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))
+#define SET_DATA_BYTE(pdata, n, val) \
+  (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))
 
-Pix * OpenclDevice::pixConvertRGBToGrayOCL(
-	Pix *srcPix, // 32-bit source
-	float rwt,
-	float gwt,
-	float bwt )
-{
-PERF_COUNT_START("pixConvertRGBToGrayOCL")
-	Pix *dstPix; // 8-bit destination
+Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix,  // 32-bit source
+                                          float rwt, float gwt, float bwt) {
+  PERF_COUNT_START("pixConvertRGBToGrayOCL")
+  Pix *dstPix;  // 8-bit destination
 
-	if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return NULL;
-
-	if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) {
-		// magic numbers from leptonica
-	    rwt = 0.3;
-	    gwt = 0.5;
-	    bwt = 0.2;
-	}
-	// normalize
-	float sum = rwt + gwt + bwt;
-	rwt /= sum;
-	gwt /= sum;
-	bwt /= sum;
-
-	// source pix
-	int w, h;
-	pixGetDimensions(srcPix, &w, &h, NULL);
-    //printf("Image is %i x %i\n", w, h);
-    unsigned int *srcData = pixGetData(srcPix);
-    int srcWPL = pixGetWpl(srcPix);
-	int srcSize = srcWPL * h * sizeof(unsigned int);
-
-	// destination pix
-    if ((dstPix = pixCreate(w, h, 8)) == NULL)
-        return NULL;
-    pixCopyResolution(dstPix, srcPix);
-    unsigned int *dstData = pixGetData(dstPix);
-    int dstWPL = pixGetWpl(dstPix);
-	int dstWords = dstWPL * h;
-	int dstSize = dstWords * sizeof(unsigned int);
-    //printf("dstSize = %i\n", dstSize);
-PERF_COUNT_SUB("pix setup")
-
-	// opencl objects
-	cl_int clStatus;
-    KernelEnv kEnv;
-    SetKernelEnv( &kEnv );
-
-	// source buffer
-	cl_mem srcBuffer = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, srcSize, (void *)srcData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer srcBuffer");
-
-    // destination buffer
-    cl_mem dstBuffer = clCreateBuffer( kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, dstSize, (void *)dstData, &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateBuffer dstBuffer");
-
-    // setup work group size parameters
-    int block_size = 256;
-    int numWorkGroups = ((h*w+block_size-1) / block_size );
-    int numThreads = block_size*numWorkGroups;
-    size_t local_work_size[] = {static_cast<size_t>(block_size)};
-    size_t global_work_size[] = {static_cast<size_t>(numThreads)};
-    //printf("Enqueueing %i threads for %i output pixels\n", numThreads, w*h);
-
-    /* compile kernel */
-    kEnv.mpkKernel = clCreateKernel( kEnv.mpkProgram, "kernel_RGBToGray", &clStatus );
-    CHECK_OPENCL( clStatus, "clCreateKernel kernel_RGBToGray");
-
-
-    /* set kernel arguments */
-    clStatus = clSetKernelArg( kEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&srcBuffer );
-    CHECK_OPENCL( clStatus, "clSetKernelArg srcBuffer");
-	clStatus = clSetKernelArg( kEnv.mpkKernel, 1, sizeof(cl_mem), (void *)&dstBuffer );
-    CHECK_OPENCL( clStatus, "clSetKernelArg dstBuffer");
-    clStatus = clSetKernelArg( kEnv.mpkKernel, 2, sizeof(int), (void *)&srcWPL );
-    CHECK_OPENCL( clStatus, "clSetKernelArg srcWPL" );
-    clStatus = clSetKernelArg( kEnv.mpkKernel, 3, sizeof(int), (void *)&dstWPL );
-    CHECK_OPENCL( clStatus, "clSetKernelArg dstWPL" );
-    clStatus = clSetKernelArg( kEnv.mpkKernel, 4, sizeof(int), (void *)&h );
-    CHECK_OPENCL( clStatus, "clSetKernelArg height" );
-    clStatus = clSetKernelArg( kEnv.mpkKernel, 5, sizeof(int), (void *)&w );
-    CHECK_OPENCL( clStatus, "clSetKernelArg width" );
-    clStatus = clSetKernelArg( kEnv.mpkKernel, 6, sizeof(float), (void *)&rwt );
-    CHECK_OPENCL( clStatus, "clSetKernelArg rwt" );
-    clStatus = clSetKernelArg( kEnv.mpkKernel, 7, sizeof(float), (void *)&gwt );
-    CHECK_OPENCL( clStatus, "clSetKernelArg gwt");
-    clStatus = clSetKernelArg( kEnv.mpkKernel, 8, sizeof(float), (void *)&bwt );
-    CHECK_OPENCL( clStatus, "clSetKernelArg bwt");
-
-    /* launch kernel & wait */
-PERF_COUNT_SUB("before")
-    clStatus = clEnqueueNDRangeKernel(
-        kEnv.mpkCmdQueue,
-        kEnv.mpkKernel,
-        1, NULL, global_work_size, local_work_size,
-        0, NULL, NULL );
-    CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray" );
-    clFinish( kEnv.mpkCmdQueue );
-PERF_COUNT_SUB("kernel")
+  if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return NULL;
 
-    /* map results back from gpu */
-    void *ptr = clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0, dstSize, 0, NULL, NULL, &clStatus);
-    CHECK_OPENCL( clStatus, "clEnqueueMapBuffer dstBuffer");
-    clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL, NULL);
+  if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) {
+    // magic numbers from leptonica
+    rwt = 0.3;
+    gwt = 0.5;
+    bwt = 0.2;
+  }
+  // normalize
+  float sum = rwt + gwt + bwt;
+  rwt /= sum;
+  gwt /= sum;
+  bwt /= sum;
+
+  // source pix
+  int w, h;
+  pixGetDimensions(srcPix, &w, &h, NULL);
+  // printf("Image is %i x %i\n", w, h);
+  unsigned int *srcData = pixGetData(srcPix);
+  int srcWPL = pixGetWpl(srcPix);
+  int srcSize = srcWPL * h * sizeof(unsigned int);
+
+  // destination pix
+  if ((dstPix = pixCreate(w, h, 8)) == NULL) return NULL;
+  pixCopyResolution(dstPix, srcPix);
+  unsigned int *dstData = pixGetData(dstPix);
+  int dstWPL = pixGetWpl(dstPix);
+  int dstWords = dstWPL * h;
+  int dstSize = dstWords * sizeof(unsigned int);
+  // printf("dstSize = %i\n", dstSize);
+  PERF_COUNT_SUB("pix setup")
+
+  // opencl objects
+  cl_int clStatus;
+  KernelEnv kEnv;
+  SetKernelEnv(&kEnv);
+
+  // source buffer
+  cl_mem srcBuffer =
+      clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+                     srcSize, srcData, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer srcBuffer");
+
+  // destination buffer
+  cl_mem dstBuffer =
+      clCreateBuffer(kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+                     dstSize, dstData, &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateBuffer dstBuffer");
+
+  // setup work group size parameters
+  int block_size = 256;
+  int numWorkGroups = ((h * w + block_size - 1) / block_size);
+  int numThreads = block_size * numWorkGroups;
+  size_t local_work_size[] = {static_cast<size_t>(block_size)};
+  size_t global_work_size[] = {static_cast<size_t>(numThreads)};
+  // printf("Enqueueing %i threads for %i output pixels\n", numThreads, w*h);
+
+  /* compile kernel */
+  kEnv.mpkKernel =
+      clCreateKernel(kEnv.mpkProgram, "kernel_RGBToGray", &clStatus);
+  CHECK_OPENCL(clStatus, "clCreateKernel kernel_RGBToGray");
+
+  /* set kernel arguments */
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), &srcBuffer);
+  CHECK_OPENCL(clStatus, "clSetKernelArg srcBuffer");
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), &dstBuffer);
+  CHECK_OPENCL(clStatus, "clSetKernelArg dstBuffer");
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(int), &srcWPL);
+  CHECK_OPENCL(clStatus, "clSetKernelArg srcWPL");
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(int), &dstWPL);
+  CHECK_OPENCL(clStatus, "clSetKernelArg dstWPL");
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(int), &h);
+  CHECK_OPENCL(clStatus, "clSetKernelArg height");
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 5, sizeof(int), &w);
+  CHECK_OPENCL(clStatus, "clSetKernelArg width");
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 6, sizeof(float), &rwt);
+  CHECK_OPENCL(clStatus, "clSetKernelArg rwt");
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 7, sizeof(float), &gwt);
+  CHECK_OPENCL(clStatus, "clSetKernelArg gwt");
+  clStatus = clSetKernelArg(kEnv.mpkKernel, 8, sizeof(float), &bwt);
+  CHECK_OPENCL(clStatus, "clSetKernelArg bwt");
+
+  /* launch kernel & wait */
+  PERF_COUNT_SUB("before")
+  clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
+                                    NULL, global_work_size, local_work_size,
+                                    0, NULL, NULL);
+  CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray");
+  clFinish(kEnv.mpkCmdQueue);
+  PERF_COUNT_SUB("kernel")
+
+  /* map results back from gpu */
+  void *ptr =
+      clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0,
+                         dstSize, 0, NULL, NULL, &clStatus);
+  CHECK_OPENCL(clStatus, "clEnqueueMapBuffer dstBuffer");
+  clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL,
+                          NULL);
 
 #if 0
     // validate: compute on cpu
@@ -3519,13 +3261,12 @@ PERF_COUNT_SUB("kernel")
         //printf("\n");
     }
 #endif
-    // release opencl objects
-    clReleaseMemObject(srcBuffer);
-    clReleaseMemObject(dstBuffer);
+  // release opencl objects
+  clReleaseMemObject(srcBuffer);
+  clReleaseMemObject(dstBuffer);
 
-
-PERF_COUNT_END
-	// success
-	return dstPix;
+  PERF_COUNT_END
+  // success
+  return dstPix;
 }
-#endif
+#endif
\ No newline at end of file
diff --git a/opencl/openclwrapper.h b/opencl/openclwrapper.h
index ae52a80155..e09e371dbb 100644
--- a/opencl/openclwrapper.h
+++ b/opencl/openclwrapper.h
@@ -1,3 +1,12 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include <stdio.h>
 #include "allheaders.h"
 #include "pix.h"
@@ -10,7 +19,8 @@
 // including CL/cl.h doesn't occur until USE_OPENCL defined below
 
 // platform preprocessor commands
-#if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN__ ) || defined( __MINGW32__ )
+#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \
+    defined(__CYGWIN__) || defined(__MINGW32__)
 #define ON_WINDOWS 1
 #define ON_LINUX   0
 #define ON_APPLE   0
@@ -80,21 +90,23 @@
     time_sub_start = time_funct_start; \
     time_sub_end = time_funct_start;
 
-#define PERF_COUNT_END \
-    QueryPerformanceCounter(&time_funct_end); \
-    elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \
-    printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
+#define PERF_COUNT_END                                                       \
+  QueryPerformanceCounter(&time_funct_end);                                  \
+  elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \
+                     (double)(freq.QuadPart);                                \
+  printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
 #else
 #define PERF_COUNT_START(FUNCT_NAME)
 #define PERF_COUNT_END
 #endif
 
 #if PERF_COUNT_VERBOSE >= 3
-#define PERF_COUNT_SUB(SUB) \
-    QueryPerformanceCounter(&time_sub_end); \
-    elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \
-    printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
-    time_sub_start = time_sub_end;
+#define PERF_COUNT_SUB(SUB)                                              \
+  QueryPerformanceCounter(&time_sub_end);                                \
+  elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \
+                     (double)(freq.QuadPart);                            \
+  printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec);      \
+  time_sub_start = time_sub_end;
 #else
 #define PERF_COUNT_SUB(SUB)
 #endif
@@ -112,21 +124,25 @@
     time_sub_start = time_funct_start; \
     time_sub_end = time_funct_start;
 
-#define PERF_COUNT_END \
-    clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \
-    elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \
-    printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
+#define PERF_COUNT_END                                                    \
+  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);                        \
+  elapsed_time_sec =                                                      \
+      (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +           \
+      (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \
+  printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
 #else
 #define PERF_COUNT_START(FUNCT_NAME)
 #define PERF_COUNT_END
 #endif
 
 #if PERF_COUNT_VERBOSE >= 3
-#define PERF_COUNT_SUB(SUB) \
-    clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \
-    elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \
-    printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
-    time_sub_start = time_sub_end;
+#define PERF_COUNT_SUB(SUB)                                           \
+  clock_gettime(CLOCK_MONOTONIC, &time_sub_end);                      \
+  elapsed_time_sec =                                                  \
+      (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 +           \
+      (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \
+  printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec);   \
+  time_sub_start = time_sub_end;
 #else
 #define PERF_COUNT_SUB(SUB)
 #endif
@@ -253,12 +269,12 @@ class OpenclDevice
     // OpenCL implementation of Morphology (Hollow = Closed - Open)
     static PIX* pixHollowCL(PIX  *pixd, PIX  *pixs, l_int32  close_hsize, l_int32  close_vsize, l_int32  open_hsize, l_int32  open_vsize, bool reqDataCopy);
 
-    static void pixGetLinesCL(PIX  *pixd, PIX  *pixs,
-                                            PIX** pix_vline, PIX** pix_hline,
-                                            PIX** pixClosed, bool  getpixClosed,
-                                            l_int32  close_hsize, l_int32  close_vsize,
-                                            l_int32  open_hsize, l_int32  open_vsize,
-                                            l_int32  line_hsize, l_int32  line_vsize);
+    static void pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline,
+                              PIX **pix_hline, PIX **pixClosed,
+                              bool getpixClosed, l_int32 close_hsize,
+                              l_int32 close_vsize, l_int32 open_hsize,
+                              l_int32 open_vsize, l_int32 line_hsize,
+                              l_int32 line_vsize);
 
     //int InitOpenclAttr( OpenCLEnv * env );
     //int ReleaseKernel( KernelEnv * env );
@@ -279,34 +295,23 @@ class OpenclDevice
     static void FreeOpenclDll();
 #endif
 
-
     inline static int AddKernelConfig( int kCount, const char *kName );
 
     /* for binarization */
-    static int HistogramRectOCL(
-        const unsigned char *imagedata,
-        int bytes_per_pixel,
-        int bytes_per_line,
-        int left,
-        int top,
-        int width,
-        int height,
-        int kHistogramSize,
-        int *histogramAllChannels);
-
-    static int ThresholdRectToPixOCL(
-        const unsigned char* imagedata,
-        int bytes_per_pixel,
-        int bytes_per_line,
-        const int* thresholds,
-        const int* hi_values,
-        Pix** pix,
-        int rect_height,
-        int rect_width,
-        int rect_top,
-        int rect_left);
-
-    static Pix * pixConvertRGBToGrayOCL( Pix *pix, float weightRed = 0.3, float weightGreen = 0.5, float weightBlue = 0.2 );
+    static int HistogramRectOCL(unsigned char *imagedata, int bytes_per_pixel,
+                                int bytes_per_line, int left, int top,
+                                int width, int height, int kHistogramSize,
+                                int *histogramAllChannels);
+
+    static int ThresholdRectToPixOCL(unsigned char *imagedata,
+                                     int bytes_per_pixel, int bytes_per_line,
+                                     int *thresholds, int *hi_values, Pix **pix,
+                                     int rect_height, int rect_width,
+                                     int rect_top, int rect_left);
+
+    static Pix *pixConvertRGBToGrayOCL(Pix *pix, float weightRed = 0.3,
+                                       float weightGreen = 0.5,
+                                       float weightBlue = 0.2);
 
     static ds_device getDeviceSelection();
     static ds_device selectedDevice;
diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml
new file mode 100644
index 0000000000..4dd68d4a44
--- /dev/null
+++ b/snap/snapcraft.yaml
@@ -0,0 +1,34 @@
+name: tesseract
+version: master
+summary: open source optical character recognition engine
+description: |
+  Tesseract has unicode (UTF-8) support, and can recognize more than 100
+  languages "out of the box". It can be trained to recognize other languages.
+  Tesseract supports various output formats: plain-text, hocr(html), pdf.
+
+grade: devel # must be 'stable' to release into candidate/stable channels
+confinement: strict
+
+apps:
+  tesseract:
+    command: env TESSDATA_PREFIX=$SNAP_USER_COMMON tesseract
+    plugs: [home]
+
+parts:
+  tesseract:
+    source: .
+    plugin: autotools
+    build-packages:
+      - autoconf-archive
+      - pkg-config
+      - libpng12-dev
+      - libjpeg8-dev
+      - libtiff5-dev
+      - zlib1g-dev
+      - libicu-dev
+      - libpango1.0-dev
+      - libcairo2-dev
+    after: [leptonica]
+  leptonica:
+    source: http://www.leptonica.org/source/leptonica-1.74.1.tar.gz
+    plugin: autotools
diff --git a/tessdata/configs/box.train.stderr b/tessdata/configs/box.train.stderr
index 6fc51fdd5e..d44ff2b2c7 100644
--- a/tessdata/configs/box.train.stderr
+++ b/tessdata/configs/box.train.stderr
@@ -1,7 +1,7 @@
-file_type                   .bl
-#tessedit_use_nn				F
-textord_fast_pitch_test	T
-tessedit_single_match	0
+file_type .bl
+#tessedit_use_nn F
+textord_fast_pitch_test T
+tessedit_single_match 0
 tessedit_zero_rejection T
 tessedit_minimal_rejection F
 tessedit_write_rep_codes F
diff --git a/tessdata/pdf.ttf b/tessdata/pdf.ttf
index 578974a9e8..d1472b20ef 100644
Binary files a/tessdata/pdf.ttf and b/tessdata/pdf.ttf differ
diff --git a/testing/reorgdata.sh b/testing/reorgdata.sh
index 141de4a6f4..8cee64ff80 100755
--- a/testing/reorgdata.sh
+++ b/testing/reorgdata.sh
@@ -1,4 +1,13 @@
 #!/bin/bash
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 if [ $# -ne 1 ]
 then
diff --git a/testing/runtestset.sh b/testing/runtestset.sh
index 0c9595f9aa..5c2a7e7aa2 100755
--- a/testing/runtestset.sh
+++ b/testing/runtestset.sh
@@ -64,7 +64,7 @@ do
      srcdir="$imdir"
   fi
 #  echo "$srcdir/$page.tif"
-  $tess $srcdir/$page.tif $resdir/$page -psm 6 $config 2>&1 |grep -v "OCR Engine"
+  $tess $srcdir/$page.tif $resdir/$page --psm 6 $config 2>&1 |grep -v "OCR Engine"
   if [ -r times.txt ]
   then
     read t <times.txt
diff --git a/textord/alignedblob.cpp b/textord/alignedblob.cpp
index 007d4ad38f..0dab26bf8d 100644
--- a/textord/alignedblob.cpp
+++ b/textord/alignedblob.cpp
@@ -188,7 +188,7 @@ ScrollView* AlignedBlob::DisplayTabs(const char* window_name,
   gsearch.StartFullSearch();
   BLOBNBOX* bbox;
   while ((bbox = gsearch.NextFullSearch()) != NULL) {
-    TBOX box = bbox->bounding_box();
+    const TBOX& box = bbox->bounding_box();
     int left_x = box.left();
     int right_x = box.right();
     int top_y = box.top();
diff --git a/textord/baselinedetect.cpp b/textord/baselinedetect.cpp
index a2b0173949..9bbd999e15 100644
--- a/textord/baselinedetect.cpp
+++ b/textord/baselinedetect.cpp
@@ -850,7 +850,8 @@ void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
   Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL;
   for (int i = 0; i < blocks_.size(); ++i) {
     BaselineBlock* bl_block = blocks_[i];
-    bl_block->PrepareForSplineFitting(page_tr, remove_noise);
+    if (enable_splines)
+      bl_block->PrepareForSplineFitting(page_tr, remove_noise);
     bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
     if (pix_spline) {
       bl_block->DrawPixSpline(pix_spline);
diff --git a/textord/bbgrid.cpp b/textord/bbgrid.cpp
index 06114748f5..4cadcdcf2e 100644
--- a/textord/bbgrid.cpp
+++ b/textord/bbgrid.cpp
@@ -231,7 +231,7 @@ Pix* GridReducedPix(const TBOX& box, int gridsize,
 // Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
 Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize,
                               ICOORD bleft, int* left, int* bottom) {
-  TBOX box = outline->bounding_box();
+  const TBOX& box = outline->bounding_box();
   Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
   int wpl = pixGetWpl(pix);
   l_uint32* data = pixGetData(pix);
@@ -257,7 +257,7 @@ Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize,
 // As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
 Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize,
                             ICOORD bleft, int* left, int* bottom) {
-  TBOX box = block->bounding_box();
+  const TBOX& box = block->bounding_box();
   Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
   int wpl = pixGetWpl(pix);
   l_uint32* data = pixGetData(pix);
diff --git a/textord/bbgrid.h b/textord/bbgrid.h
index d16b902ecf..066b5bae1e 100644
--- a/textord/bbgrid.h
+++ b/textord/bbgrid.h
@@ -364,7 +364,7 @@ template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {
   // An iterator over the list at (x_, y_) in the grid_.
   BBC_C_IT it_;
   // Set of unique returned elements used when unique_mode_ is true.
-  unordered_set<BBC*, PtrHash<BBC> > returns_;
+  TessHashSet<BBC*, PtrHash<BBC> > returns_;
 };
 
 // Sort function to sort a BBC by bounding_box().left().
@@ -623,7 +623,7 @@ void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::DisplayBoxes(ScrollView* tab_win) {
   gsearch.StartFullSearch();
   BBC* bbox;
   while ((bbox = gsearch.NextFullSearch()) != NULL) {
-    TBOX box = bbox->bounding_box();
+    const TBOX& box = bbox->bounding_box();
     int left_x = box.left();
     int right_x = box.right();
     int top_y = box.top();
diff --git a/textord/blkocc.h b/textord/blkocc.h
index 89462dc86f..f27bb9a5a5 100644
--- a/textord/blkocc.h
+++ b/textord/blkocc.h
@@ -52,9 +52,8 @@ class REGION_OCC:public ELIST_LINK
     float max_x;                 //Highest x in region
     inT16 region_type;           //Type of crossing
 
-    REGION_OCC() {
-    };                           //constructor used
-    //only in COPIER etc
+    REGION_OCC() {}  // constructor used
+    // only in COPIER etc
     REGION_OCC(  //constructor
                float min,
                float max,
@@ -72,12 +71,12 @@ ELISTIZEH (REGION_OCC)
 Adapted from the following procedure so that it can be used in the bands
 class in an include file...
 
-BOOL8						range_in_band[
+BOOL8           range_in_band[
               range within band?
-inT16						band_max,
-inT16						band_min,
-inT16						range_max,
-inT16						range_min]
+inT16           band_max,
+inT16           band_min,
+inT16           range_max,
+inT16           range_min]
 {
   if ( (range_min >= band_min) && (range_max < band_max) )
     return TRUE;
@@ -91,12 +90,12 @@ inT16						range_min]
 Adapted from the following procedure so that it can be used in the bands
 class in an include file...
 
-BOOL8						range_overlaps_band[
+BOOL8           range_overlaps_band[
               range crosses band?
-inT16						band_max,
-inT16						band_min,
-inT16						range_max,
-inT16						range_min]
+inT16           band_max,
+inT16           band_min,
+inT16           range_max,
+inT16           range_min]
 {
   if ( (range_max >= band_min) && (range_min < band_max) )
     return TRUE;
diff --git a/textord/ccnontextdetect.cpp b/textord/ccnontextdetect.cpp
index 1cb0e4c6c7..f6a7d8f41a 100644
--- a/textord/ccnontextdetect.cpp
+++ b/textord/ccnontextdetect.cpp
@@ -305,7 +305,7 @@ bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
   // Search the grid to see what intersects it.
   // Setup a Rectangle search for overlapping this blob.
   BlobGridSearch rsearch(this);
-  TBOX box = blob->bounding_box();
+  const TBOX& box = blob->bounding_box();
   rsearch.StartRectSearch(box);
   rsearch.SetUniqueMode(true);
   BLOBNBOX* neighbour;
diff --git a/textord/colpartition.cpp b/textord/colpartition.cpp
index effb5a9112..0d0b4ca39e 100644
--- a/textord/colpartition.cpp
+++ b/textord/colpartition.cpp
@@ -918,7 +918,7 @@ void ColPartition::ComputeLimits() {
     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
       bbox = it.data();
       if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
-        TBOX box = bbox->bounding_box();
+        const TBOX& box = bbox->bounding_box();
         int area = box.area();
         top_stats.add(box.top(), area);
         bottom_stats.add(box.bottom(), area);
@@ -1181,8 +1181,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
       if (best_end == NULL) {
         tprintf("No path\n");
       } else {
-        tprintf("Total cost = %d vs allowed %d\n",
-                best_end->total_cost(), blob_count);
+        tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
+                blob_count);
       }
     }
     delete [] projection;
@@ -1632,6 +1632,10 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright,
                                   ColPartition_LIST* used_parts) {
   if (block_parts->empty())
     return NULL;  // Nothing to do.
+  // If the block_parts are not in reading order, then it will make an invalid
+  // block polygon and bounding_box, so sort by bounding box now just to make
+  // sure.
+  block_parts->sort(&ColPartition::SortByBBox);
   ColPartition_IT it(block_parts);
   ColPartition* part = it.data();
   PolyBlockType type = part->type();
@@ -2121,7 +2125,7 @@ void ColPartition::RefinePartnersByOverlap(bool upper,
 // Return true if bbox belongs better in this than other.
 bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox,
                                        const ColPartition& other) {
-  TBOX box = bbox->bounding_box();
+  const TBOX& box = bbox->bounding_box();
   // Margins take priority.
   int left = box.left();
   int right = box.right();
diff --git a/textord/colpartition.h b/textord/colpartition.h
index 5c941cce15..7fcbc0004e 100644
--- a/textord/colpartition.h
+++ b/textord/colpartition.h
@@ -704,6 +704,25 @@ class ColPartition : public ELIST2_LINK {
   // doing a SideSearch when you want things in the same page column.
   bool IsInSameColumnAs(const ColPartition& part) const;
 
+  // Sort function to sort by bounding box.
+  static int SortByBBox(const void* p1, const void* p2) {
+    const ColPartition* part1 =
+        *reinterpret_cast<const ColPartition* const*>(p1);
+    const ColPartition* part2 =
+        *reinterpret_cast<const ColPartition* const*>(p2);
+    int mid_y1 = part1->bounding_box_.y_middle();
+    int mid_y2 = part2->bounding_box_.y_middle();
+    if ((part2->bounding_box_.bottom() <= mid_y1 &&
+         mid_y1 <= part2->bounding_box_.top()) ||
+        (part1->bounding_box_.bottom() <= mid_y2 &&
+         mid_y2 <= part1->bounding_box_.top())) {
+      // Sort by increasing x.
+      return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
+    }
+    // Sort by decreasing y.
+    return mid_y2 - mid_y1;
+  }
+
   // Sets the column bounds. Primarily used in testing.
   void set_first_column(int column) {
     first_column_ = column;
diff --git a/textord/colpartitiongrid.cpp b/textord/colpartitiongrid.cpp
index 4d703fbe7d..86c883280e 100644
--- a/textord/colpartitiongrid.cpp
+++ b/textord/colpartitiongrid.cpp
@@ -86,7 +86,7 @@ void ColPartitionGrid::HandleClick(int x, int y) {
   ColPartition* neighbour;
   FCOORD click(x, y);
   while ((neighbour = radsearch.NextRadSearch()) != NULL) {
-    TBOX nbox = neighbour->bounding_box();
+    const TBOX& nbox = neighbour->bounding_box();
     if (nbox.contains(click)) {
       tprintf("Block box:");
       neighbour->bounding_box().print();
@@ -1037,7 +1037,7 @@ void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns,
     ColPartition* part = part_it.data();
     ColPartitionSet* columns = NULL;
     if (best_columns != NULL) {
-      TBOX part_box = part->bounding_box();
+      const TBOX& part_box = part->bounding_box();
       // Get the columns from the y grid coord.
       int grid_x, grid_y;
       GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
@@ -1376,7 +1376,7 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition* part,
     // combined box to see if anything else is inappropriately overlapped.
     if (!part_box.contains(c_box) && !c_box.contains(part_box)) {
       // Search the combined rectangle to see if anything new is overlapped.
-      // This is a preliminary test designed to quickly weed-out stupid
+      // This is a preliminary test designed to quickly weed-out poor
       // merge candidates that would create a big list of overlapped objects
       // for the squared-order overlap analysis. Eg. vertical and horizontal
       // line-like objects that overlap real text when merged:
@@ -1569,7 +1569,7 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(
     const TBOX& im_box, const FCOORD& rerotation,
     bool debug, const ColPartition& part, int* best_distance) {
   // Set up a rectangle search bounded by the part.
-  TBOX part_box = part.bounding_box();
+  const TBOX& part_box = part.bounding_box();
   TBOX search_box;
   ICOORD dist_scaling;
   ComputeSearchBoxAndScaling(direction, part_box, gridsize(),
@@ -1619,10 +1619,10 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(
         image_bias - htext_score >= kSmoothDecisionMargin &&
         image_bias - vtext_score >= kSmoothDecisionMargin) {
       *best_distance = dists[NPT_IMAGE][0];
-      if (dists[NPT_WEAK_VTEXT].size() > 0 &&
+      if (!dists[NPT_WEAK_VTEXT].empty() &&
           *best_distance > dists[NPT_WEAK_VTEXT][0])
         *best_distance = dists[NPT_WEAK_VTEXT][0];
-      if (dists[NPT_WEAK_HTEXT].size() > 0 &&
+      if (!dists[NPT_WEAK_HTEXT].empty() &&
           *best_distance > dists[NPT_WEAK_HTEXT][0])
         *best_distance = dists[NPT_WEAK_HTEXT][0];
       return BRT_POLYIMAGE;
diff --git a/textord/devanagari_processing.h b/textord/devanagari_processing.h
index 990a5dfe39..0d070decd0 100644
--- a/textord/devanagari_processing.h
+++ b/textord/devanagari_processing.h
@@ -1,5 +1,14 @@
 // Copyright 2008 Google Inc. All Rights Reserved.
 // Author: shobhitsaxena@google.com (Shobhit Saxena)
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
@@ -41,9 +50,7 @@ class PixelHistogram {
     length_ = 0;
   }
 
-  int* hist() const {
-    return hist_;
-  }
+  int* hist() const { return hist_; }
 
   int length() const {
     return length_;
diff --git a/textord/drawedg.h b/textord/drawedg.h
index 6bf062d4ee..ef5ed5e202 100644
--- a/textord/drawedg.h
+++ b/textord/drawedg.h
@@ -1,8 +1,9 @@
 /**********************************************************************
  * File:        drawedg.h  (Formerly drawedge.h)
- * Description: Collection of functions to draw things to do with edge detection.
- * Author:					Ray Smith
- * Created:					Thu Jun 06 13:29:20 BST 1991
+ * Description: Collection of functions to draw things to do with edge
+ *detection.
+ * Author:          Ray Smith
+ * Created:         Thu Jun 06 13:29:20 BST 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/textord/fpchop.cpp b/textord/fpchop.cpp
index 4c18338b8f..699d419620 100644
--- a/textord/fpchop.cpp
+++ b/textord/fpchop.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        fpchop.cpp  (Formerly fp_chop.c)
  * Description: Code to chop fixed pitch text into character cells.
- * Author:		Ray Smith
- * Created:		Thu Sep 16 11:14:15 BST 1993
+ * Author:    Ray Smith
+ * Created:   Thu Sep 16 11:14:15 BST 1993
  *
  * (C) Copyright 1993, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -259,8 +259,8 @@ void split_to_blob(                                 //split the blob
                      pitch_error,
                      left_coutlines,
                      right_coutlines);
-  if (blob != NULL)
-    delete blob;                 //free it
+
+  delete blob;
 }
 
 /**********************************************************************
@@ -730,7 +730,6 @@ C_OUTLINE *join_chopped_fragments(                         //join pieces
   return NULL;
 }
 
-
 /**********************************************************************
  * join_segments
  *
diff --git a/textord/gap_map.cpp b/textord/gap_map.cpp
index 2f8440e601..421208784d 100644
--- a/textord/gap_map.cpp
+++ b/textord/gap_map.cpp
@@ -1,3 +1,12 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include          "statistc.h"
 #include          "gap_map.h"
 
diff --git a/textord/gap_map.h b/textord/gap_map.h
index 914e8dbdea..227db3646b 100644
--- a/textord/gap_map.h
+++ b/textord/gap_map.h
@@ -1,3 +1,12 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef           GAP_MAP_H
 #define           GAP_MAP_H
 
diff --git a/textord/imagefind.cpp b/textord/imagefind.cpp
index c119e69f95..14442d52ed 100644
--- a/textord/imagefind.cpp
+++ b/textord/imagefind.cpp
@@ -77,7 +77,7 @@ Pix* ImageFind::FindImages(Pix* pix) {
   // Leptonica will print an error message and return NULL if we call
   // pixGenHalftoneMask(pixr, NULL, ...) with too small image, so we
   // want to bypass that.
-  if (pixGetWidth(pixr) < kMinImageFindSize || 
+  if (pixGetWidth(pixr) < kMinImageFindSize ||
       pixGetHeight(pixr) < kMinImageFindSize) {
     pixDestroy(&pixr);
     return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
@@ -1115,7 +1115,7 @@ static bool TestWeakIntersectedPart(const TBOX& im_box,
                                     ColPartition* part) {
   if (part->flow() < BTFT_STRONG_CHAIN) {
     // A weak partition intersects the box.
-    TBOX part_box = part->bounding_box();
+    const TBOX& part_box = part->bounding_box();
     if (im_box.contains(part_box)) {
       int area = part_box.area();
       int intersect_area = IntersectArea(part_box, part_list);
@@ -1180,7 +1180,7 @@ static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) {
         part->flow() == BTFT_STRONG_CHAIN) {
       // Text intersects the box.
       any_text_in_padded_rect = true;
-      TBOX part_box = part->bounding_box();
+      const TBOX& part_box = part->bounding_box();
       if (box->overlap(part_box)) {
         return true;
       }
diff --git a/textord/makerow.cpp b/textord/makerow.cpp
index 1df4855b40..a5749ad680 100644
--- a/textord/makerow.cpp
+++ b/textord/makerow.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        makerow.cpp  (Formerly makerows.c)
  * Description: Code to arrange blobs into rows of text.
- * Author:		Ray Smith
- * Created:		Mon Sep 21 14:34:48 BST 1992
+ * Author:    Ray Smith
+ * Created:   Mon Sep 21 14:34:48 BST 1992
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -507,8 +507,7 @@ void vigorous_noise_removal(TO_BLOCK* block) {
             continue;  // Looks OK.
         }
         // It might be noise so get rid of it.
-        if (blob->cblob() != NULL)
-          delete blob->cblob();
+        delete blob->cblob();
         delete b_it.extract();
       } else {
         prev = blob;
@@ -671,7 +670,7 @@ BOOL8 find_best_dropout_row(                    //find neighbours
                             TO_ROW_IT *row_it,  //current position
                             BOOL8 testing_on    //correct orientation
                            ) {
-  inT32 next_index;              //of neighbouring row
+  inT32 next_index;              // of neighbouring row
   inT32 row_offset;              //from current row
   inT32 abs_dist;                //absolute distance
   inT8 row_inc;                  //increment to row_index
@@ -1786,7 +1785,7 @@ static int CountOverlaps(const TBOX& box, int min_height,
   BLOBNBOX_IT blob_it(blobs);
   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
     BLOBNBOX* blob = blob_it.data();
-    TBOX blob_box = blob->bounding_box();
+    const TBOX &blob_box = blob->bounding_box();
     if (blob_box.height() >= min_height && box.major_overlap(blob_box)) {
       ++overlaps;
     }
diff --git a/textord/oldbasel.cpp b/textord/oldbasel.cpp
index c73fe8d57b..99e55fdbb6 100644
--- a/textord/oldbasel.cpp
+++ b/textord/oldbasel.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        oldbasel.cpp  (Formerly oldbl.c)
  * Description: A re-implementation of the old baseline algorithm.
- * Author:		Ray Smith
- * Created:		Wed Oct  6 09:41:48 BST 1993
+ * Author:    Ray Smith
+ * Created:   Wed Oct  6 09:41:48 BST 1993
  *
  * (C) Copyright 1993, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -122,7 +122,7 @@ void Textord::correlate_lines(TO_BLOCK *block, float gradient) {
   TO_ROW **rows;                 //array of ptrs
   int rowcount;                  /*no of rows to do */
   int rowindex;                  /*no of row */
-                                 //iterator
+                                 // iterator
   TO_ROW_IT row_it = block->get_rows ();
 
   rowcount = row_it.length ();
@@ -1018,61 +1018,6 @@ int *partcount                   /*no of partitions */
   return bestpart;
 }
 
-
-///*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking
-//partitions and gives all the rest partid 0*/
-//
-//merge_partitions(partids,partcount,blobcount,bestpart)
-//register char              *partids;                     /*partition numbers*/
-//int                        partcount;                    /*no of partitions*/
-//int                        blobcount;                    /*no of blobs*/
-//int                        bestpart;                     /*best partition*/
-//{
-//   int                     blobindex;                    /*no along text line*/
-//   int                     runlength;                    /*run of same partition*/
-//   int                     bestrun;                      /*biggest runlength*/
-//
-//   bestrun=0;                                            /*no runs yet*/
-//   runlength=1;
-//   for (blobindex=1;blobindex<blobcount;blobindex++)
-//   {  if (partids[blobindex]!=partids[blobindex-1])
-//      {  if (runlength>bestrun)
-//            bestrun=runlength;                           /*find biggest run*/
-//         runlength=1;                                    /*new run*/
-//      }
-//      else
-//      {  runlength++;
-//      }
-//   }
-//   if (runlength>bestrun)
-//      bestrun=runlength;
-//
-//   for (blobindex=0;blobindex<blobcount;blobindex++)
-//   {  if (blobindex<1
-//      || partids[blobindex]!=partids[blobindex-1])
-//      {  if ((blobindex+1>=blobcount
-//         || partids[blobindex]!=partids[blobindex+1])
-//                                                         /*loner*/
-//         && (bestrun>2 || partids[blobindex]!=bestpart))
-//         {  partids[blobindex]=partcount;                /*discard loner*/
-//         }
-//         else if (blobindex+1<blobcount
-//         && partids[blobindex]==partids[blobindex+1]
-//                                                         /*pair*/
-//         && (blobindex+2>=blobcount
-//         || partids[blobindex]!=partids[blobindex+2])
-//         && (bestrun>3 || partids[blobindex]!=bestpart))
-//         {  partids[blobindex]=partcount;                /*discard both*/
-//            partids[blobindex+1]=partcount;
-//         }
-//      }
-//   }
-//   for (blobindex=0;blobindex<blobcount;blobindex++)
-//   {  if (partids[blobindex]<partcount)
-//         partids[blobindex]=0;                           /*all others together*/
-//   }
-//}
-
 /**********************************************************************
  * partition_coords
  *
@@ -1414,14 +1359,12 @@ int bestpart                     /*biggest partition */
   bestneg = 0.0;       /*no step yet */
   for (partition = 0; partition < partcount; partition++) {
     if (partition != bestpart) {
-
-	//by jetsoft divide by zero possible
-		if (partsizes[partition]==0)
-		partsteps[partition]=0;
-       else
-		partsteps[partition] /= partsizes[partition];
-	//
-
+      // by jetsoft divide by zero possible
+      if (partsizes[partition] == 0)
+        partsteps[partition] = 0;
+      else
+        partsteps[partition] /= partsizes[partition];
+      //
 
       if (partsteps[partition] >= MINASCRISE
       && partsizes[partition] > poscount) {
@@ -1459,8 +1402,8 @@ int blobcount,                   /*blobs in blobcoords */
 QSPLINE * baseline,              /*established */
 float jumplimit                  /*min ascender height */
 ) {
-  int blobindex;                 /*current blob */
-                                 /*height statistics */
+  int blobindex; /*current blob */
+                 /*height statistics */
   STATS heightstat (0, MAXHEIGHT);
   int height;                    /*height of blob */
   int xcentre;                   /*centre of blob */
diff --git a/textord/pithsync.h b/textord/pithsync.h
index f9ba479e2f..386426be72 100644
--- a/textord/pithsync.h
+++ b/textord/pithsync.h
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        pithsync.h  (Formerly pitsync2.h)
  * Description: Code to find the optimum fixed pitch segmentation of some blobs.
- * Author:		Ray Smith
- * Created:		Thu Nov 19 11:48:05 GMT 1992
+ * Author:    Ray Smith
+ * Created:   Thu Nov 19 11:48:05 GMT 1992
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -66,7 +66,7 @@ class FPCUTPT
       inT16 pitch,               //proposed pitch
       inT16 pitch_error);        //allowed tolerance
 
-    inT32 position() {  //access func
+    inT32 position() {  // access func
       return xpos;
     }
     double cost_function() {
diff --git a/textord/pitsync1.h b/textord/pitsync1.h
index c2fb9bec65..5374b003dd 100644
--- a/textord/pitsync1.h
+++ b/textord/pitsync1.h
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        pitsync1.h  (Formerly pitsync.h)
  * Description: Code to find the optimum fixed pitch segmentation of some blobs.
- * Author:		Ray Smith
- * Created:		Thu Nov 19 11:48:05 GMT 1992
+ * Author:    Ray Smith
+ * Created:   Thu Nov 19 11:48:05 GMT 1992
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -46,7 +46,7 @@ class FPSEGPT:public ELIST_LINK
             FPSEGPT_LIST *prev_list);  //previous segment
     FPSEGPT(FPCUTPT *cutpt);  //build from new type
 
-    inT32 position() {  //access func
+    inT32 position() {  // access func
       return xpos;
     }
     double cost_function() {
diff --git a/textord/scanedg.cpp b/textord/scanedg.cpp
index dbb3b66298..0430843619 100644
--- a/textord/scanedg.cpp
+++ b/textord/scanedg.cpp
@@ -335,7 +335,7 @@ void join_edges(CRACKEDGE *edge1,  // edges to join
   if (edge1->pos.x() + edge1->stepx != edge2->pos.x()
   || edge1->pos.y() + edge1->stepy != edge2->pos.y()) {
     CRACKEDGE *tempedge = edge1;
-    edge1 = edge2;               // swap araound
+    edge1 = edge2;               // swap around
     edge2 = tempedge;
   }
 
diff --git a/textord/strokewidth.cpp b/textord/strokewidth.cpp
index 5d0fdc5133..059aa9b85e 100644
--- a/textord/strokewidth.cpp
+++ b/textord/strokewidth.cpp
@@ -393,7 +393,7 @@ void StrokeWidth::GradeBlobsIntoPartitions(
 }
 
 static void PrintBoxWidths(BLOBNBOX* neighbour) {
-  TBOX nbox = neighbour->bounding_box();
+  const TBOX& nbox = neighbour->bounding_box();
   tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n",
           nbox.left(), nbox.bottom(), nbox.right(), nbox.top(),
           neighbour->horz_stroke_width(), neighbour->vert_stroke_width(),
@@ -1939,7 +1939,7 @@ ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name,
   gsearch.StartFullSearch();
   BLOBNBOX* bbox;
   while ((bbox = gsearch.NextFullSearch()) != NULL) {
-    TBOX box = bbox->bounding_box();
+    const TBOX& box = bbox->bounding_box();
     int left_x = box.left();
     int right_x = box.right();
     int top_y = box.top();
diff --git a/textord/tabfind.cpp b/textord/tabfind.cpp
index dc7a072b7d..30bad8bb3c 100644
--- a/textord/tabfind.cpp
+++ b/textord/tabfind.cpp
@@ -229,7 +229,7 @@ void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
                                        bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
                                        *gutter_width, box.top(), box.bottom());
   if (gutter_bbox != NULL) {
-    TBOX gutter_box = gutter_bbox->bounding_box();
+    const TBOX& gutter_box = gutter_bbox->bounding_box();
     *gutter_width = left ? tab_x - gutter_box.right()
                         : gutter_box.left() - tab_x;
   }
@@ -261,7 +261,7 @@ void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
   int neighbour_edge = left ? RightEdgeForBox(box, true, false)
                             : LeftEdgeForBox(box, true, false);
   if (neighbour != NULL) {
-    TBOX n_box = neighbour->bounding_box();
+    const TBOX& n_box = neighbour->bounding_box();
     if (debug) {
       tprintf("Found neighbour:");
       n_box.print();
diff --git a/textord/tablefind.cpp b/textord/tablefind.cpp
index 425bdbc218..b68655a57c 100644
--- a/textord/tablefind.cpp
+++ b/textord/tablefind.cpp
@@ -550,7 +550,7 @@ void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks,
   // iterate through the source list
   for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
     ColSegment* src_seg = src_it.data();
-    TBOX src_box = src_seg->bounding_box();
+    const TBOX& src_box = src_seg->bounding_box();
     bool match_found = false;
     // iterate through the destination list to find a matching column block
     for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) {
@@ -1342,7 +1342,7 @@ void TableFinder::GetTableRegions(ColSegment_LIST* table_columns,
   // create a bool array to hold projection on y-axis
   bool* table_region = new bool[page_height];
   while ((part = gsearch.NextFullSearch()) != NULL) {
-    TBOX part_box = part->bounding_box();
+    const TBOX& part_box = part->bounding_box();
     // reset the projection array
     for (int i = 0; i < page_height; i++) {
       table_region[i] = false;
@@ -1974,7 +1974,7 @@ void TableFinder::DisplayColPartitionConnections(
 
     ColPartition* upper_part = part->nearest_neighbor_above();
     if (upper_part) {
-      TBOX upper_box = upper_part->bounding_box();
+      const TBOX& upper_box = upper_part->bounding_box();
       int mid_x = (left_x + right_x) / 2;
       int mid_y = (top_y + bottom_y) / 2;
       int other_x = (upper_box.left() + upper_box.right()) / 2;
@@ -1985,7 +1985,7 @@ void TableFinder::DisplayColPartitionConnections(
     }
     ColPartition* lower_part = part->nearest_neighbor_below();
     if (lower_part) {
-      TBOX lower_box = lower_part->bounding_box();
+      const TBOX& lower_box = lower_part->bounding_box();
       int mid_x = (left_x + right_x) / 2;
       int mid_y = (top_y + bottom_y) / 2;
       int other_x = (lower_box.left() + lower_box.right()) / 2;
@@ -2098,7 +2098,7 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid* grid,
   table_search.StartFullSearch();
   ColSegment* table;
   while ((table = table_search.NextFullSearch()) != NULL) {
-    TBOX table_box = table->bounding_box();
+    const TBOX& table_box = table->bounding_box();
     // Start a rect search on table_box
     GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
         rectsearch(grid);
diff --git a/textord/tabvector.cpp b/textord/tabvector.cpp
index c8d508f0df..f3e99fa38f 100644
--- a/textord/tabvector.cpp
+++ b/textord/tabvector.cpp
@@ -435,7 +435,7 @@ bool TabVector::SimilarTo(const ICOORD& vertical,
     vsearch.StartVerticalSearch(left, right, top_y);
     BLOBNBOX* blob;
     while ((blob = vsearch.NextVerticalSearch(true)) != NULL) {
-      TBOX box = blob->bounding_box();
+      const TBOX& box = blob->bounding_box();
       if (box.top() > bottom_y)
         return true;  // Nothing found.
       if (box.bottom() < top_y)
@@ -523,12 +523,12 @@ const char* kAlignmentNames[] = {
 
 // Print basic information about this tab vector.
 void TabVector::Print(const char* prefix) {
-  tprintf("%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
-          " partners=%d\n",
-          prefix, kAlignmentNames[alignment_],
-          startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(),
-          mean_width_, percent_score_, sort_key_,
-          boxes_.length(), partners_.length());
+  tprintf(
+      "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
+      " partners=%d\n",
+      prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(),
+      endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_,
+      boxes_.length(), partners_.length());
 }
 
 // Print basic information about this tab vector and every box in it.
@@ -806,7 +806,7 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) {
     // Fit a line to all the boxes in the list.
     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
       BLOBNBOX* bbox = it.data();
-      TBOX box = bbox->bounding_box();
+      const TBOX& box = bbox->bounding_box();
       int x1 = IsRightTab() ? box.right() : box.left();
       ICOORD boxpt(x1, box.bottom());
       linepoints.Add(boxpt);
@@ -831,7 +831,7 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) {
   int width_count = 0;
   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
     BLOBNBOX* bbox = it.data();
-    TBOX box = bbox->bounding_box();
+    const TBOX& box = bbox->bounding_box();
     mean_width_ += box.width();
     ++width_count;
     int x1 = IsRightTab() ? box.right() : box.left();
diff --git a/textord/textlineprojection.cpp b/textord/textlineprojection.cpp
index 6018e5fdaa..2651a19b33 100644
--- a/textord/textlineprojection.cpp
+++ b/textord/textlineprojection.cpp
@@ -760,7 +760,7 @@ void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
   pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
 }
 #ifdef _MSC_VER
-#pragma optimize( "", on )
+#pragma optimize("", on)
 #endif  // _MSC_VER
 
 // Transform tesseract image coordinates to coordinates used in the projection.
diff --git a/textord/textord.cpp b/textord/textord.cpp
index 1f7e8a8869..94ef49c783 100644
--- a/textord/textord.cpp
+++ b/textord/textord.cpp
@@ -33,7 +33,8 @@
 namespace tesseract {
 
 Textord::Textord(CCStruct* ccstruct)
-    : ccstruct_(ccstruct), use_cjk_fp_model_(false),
+    : ccstruct_(ccstruct),
+      use_cjk_fp_model_(false),
       // makerow.cpp ///////////////////////////////////////////
       BOOL_MEMBER(textord_single_height_mode, false,
                   "Script has no xheight, so use a single mode",
@@ -46,24 +47,20 @@ Textord::Textord(CCStruct* ccstruct)
                   "old_to_method.",
                   ccstruct_->params()),
       BOOL_MEMBER(tosp_only_use_prop_rows, true,
-                  "Block stats to use fixed pitch rows?",
-                  ccstruct_->params()),
+                  "Block stats to use fixed pitch rows?", ccstruct_->params()),
       BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
                   "Force word breaks on punct to break long lines in non-space "
                   "delimited langs",
                   ccstruct_->params()),
-      BOOL_MEMBER(tosp_use_pre_chopping, false,
-                  "Space stats use prechopping?",
+      BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
                   ccstruct_->params()),
       BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
                   ccstruct_->params()),
-      BOOL_MEMBER(tosp_block_use_cert_spaces, true,
-                  "Only stat OBVIOUS spaces",
+      BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
                   ccstruct_->params()),
       BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
                   ccstruct_->params()),
-      BOOL_MEMBER(tosp_narrow_blobs_not_cert, true,
-            "Only stat OBVIOUS spaces",
+      BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
                   ccstruct_->params()),
       BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
                   ccstruct_->params()),
@@ -78,30 +75,24 @@ Textord::Textord(CCStruct* ccstruct)
                   "Don't restrict kn->sp fuzzy limit to tables",
                   ccstruct_->params()),
       BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
-                  "Use within xht gap for wd breaks",
-                  ccstruct_->params()),
+                  "Use within xht gap for wd breaks", ccstruct_->params()),
       BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
                   ccstruct_->params()),
       BOOL_MEMBER(tosp_only_use_xht_gaps, false,
-                  "Only use within xht gap for wd breaks",
-                  ccstruct_->params()),
+                  "Only use within xht gap for wd breaks", ccstruct_->params()),
       BOOL_MEMBER(tosp_rule_9_test_punct, false,
-                  "Don't chng kn to space next to punct",
-                  ccstruct_->params()),
+                  "Don't chng kn to space next to punct", ccstruct_->params()),
       BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
                   ccstruct_->params()),
       BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
                   ccstruct_->params()),
       BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
                   ccstruct_->params()),
-      INT_MEMBER(tosp_debug_level, 0, "Debug data",
-                 ccstruct_->params()),
+      INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
       INT_MEMBER(tosp_enough_space_samples_for_median, 3,
-           "or should we use mean",
-                 ccstruct_->params()),
+                 "or should we use mean", ccstruct_->params()),
       INT_MEMBER(tosp_redo_kern_limit, 10,
-                 "No.samples reqd to reestimate for row",
-                 ccstruct_->params()),
+                 "No.samples reqd to reestimate for row", ccstruct_->params()),
       INT_MEMBER(tosp_few_samples, 40,
                  "No.gaps reqd with 1 large gap to treat as a table",
                  ccstruct_->params()),
@@ -114,30 +105,24 @@ Textord::Textord(CCStruct* ccstruct)
                     "Factor for defining space threshold in terms of space and "
                     "kern sizes",
                     ccstruct_->params()),
-      double_MEMBER(tosp_threshold_bias1, 0,
-                    "how far between kern and space?",
+      double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
                     ccstruct_->params()),
-      double_MEMBER(tosp_threshold_bias2, 0,
-                    "how far between kern and space?",
+      double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
                     ccstruct_->params()),
       double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
                     ccstruct_->params()),
       double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
-                    "narrow if w/h less than this",
-                    ccstruct_->params()),
+                    "narrow if w/h less than this", ccstruct_->params()),
       double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
                     ccstruct_->params()),
       double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
                     ccstruct_->params()),
       double_MEMBER(tosp_fuzzy_space_factor, 0.6,
-                    "Fract of xheight for fuzz sp",
-                    ccstruct_->params()),
+                    "Fract of xheight for fuzz sp", ccstruct_->params()),
       double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
-                    "Fract of xheight for fuzz sp",
-                    ccstruct_->params()),
+                    "Fract of xheight for fuzz sp", ccstruct_->params()),
       double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
-                    "Fract of xheight for fuzz sp",
-                    ccstruct_->params()),
+                    "Fract of xheight for fuzz sp", ccstruct_->params()),
       double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
                     ccstruct_->params()),
       double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
@@ -156,14 +141,11 @@ Textord::Textord(CCStruct* ccstruct)
                     "Fract of kerns reqd for isolated row stats",
                     ccstruct_->params()),
       double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
-                    "Min difference of kn & sp in table",
-                    ccstruct_->params()),
+                    "Min difference of kn & sp in table", ccstruct_->params()),
       double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
-                    "Expect spaces bigger than this",
-                    ccstruct_->params()),
+                    "Expect spaces bigger than this", ccstruct_->params()),
       double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
-                    "Fuzzy if less than this",
-                    ccstruct_->params()),
+                    "Fuzzy if less than this", ccstruct_->params()),
       double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
                     ccstruct_->params()),
       double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
@@ -172,20 +154,16 @@ Textord::Textord(CCStruct* ccstruct)
                     "Don't trust spaces less than this time kn",
                     ccstruct_->params()),
       double_MEMBER(tosp_init_guess_kn_mult, 2.2,
-                    "Thresh guess - mult kn by this",
-                    ccstruct_->params()),
+                    "Thresh guess - mult kn by this", ccstruct_->params()),
       double_MEMBER(tosp_init_guess_xht_mult, 0.28,
-                    "Thresh guess - mult xht by this",
-                    ccstruct_->params()),
+                    "Thresh guess - mult xht by this", ccstruct_->params()),
       double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
-                    "Multiplier on kn to limit thresh",
-                    ccstruct_->params()),
+                    "Multiplier on kn to limit thresh", ccstruct_->params()),
       double_MEMBER(tosp_flip_caution, 0.0,
                     "Don't autoflip kn to sp when large separation",
                     ccstruct_->params()),
       double_MEMBER(tosp_large_kerning, 0.19,
-                    "Limit use of xht gap with large kns",
-                    ccstruct_->params()),
+                    "Limit use of xht gap with large kns", ccstruct_->params()),
       double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
                     "Limit use of xht gap with odd small kns",
                     ccstruct_->params()),
@@ -193,11 +171,9 @@ Textord::Textord(CCStruct* ccstruct)
                     "Don't reduce box if the top left is non blank",
                     ccstruct_->params()),
       double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
-                    "Don't let sp minus kn get too small",
-                    ccstruct_->params()),
+                    "Don't let sp minus kn get too small", ccstruct_->params()),
       double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
-                    "How wide fuzzies need context",
-                    ccstruct_->params()),
+                    "How wide fuzzies need context", ccstruct_->params()),
       // tordmain.cpp ///////////////////////////////////////////
       BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
                   ccstruct_->params()),
@@ -206,34 +182,27 @@ Textord::Textord(CCStruct* ccstruct)
       BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
                   ccstruct_->params()),
       INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
-                  ccstruct_->params()),
+                 ccstruct_->params()),
       INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
-                  ccstruct_->params()),
+                 ccstruct_->params()),
       double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
                     ccstruct_->params()),
       double_MEMBER(textord_noise_area_ratio, 0.7,
-                    "Fraction of bounding box for noise",
-                    ccstruct_->params()),
+                    "Fraction of bounding box for noise", ccstruct_->params()),
       double_MEMBER(textord_blob_size_smallile, 20,
-                    "Percentile for small blobs",
-                    ccstruct_->params()),
+                    "Percentile for small blobs", ccstruct_->params()),
       double_MEMBER(textord_initialx_ile, 0.75,
-                    "Ile of sizes for xheight guess",
-                    ccstruct_->params()),
+                    "Ile of sizes for xheight guess", ccstruct_->params()),
       double_MEMBER(textord_initialasc_ile, 0.90,
-                    "Ile of sizes for xheight guess",
-                    ccstruct_->params()),
-      INT_MEMBER(textord_noise_sizefraction, 10,
-                 "Fraction of size for maxima",
+                    "Ile of sizes for xheight guess", ccstruct_->params()),
+      INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
                  ccstruct_->params()),
       double_MEMBER(textord_noise_sizelimit, 0.5,
-                    "Fraction of x for big t count",
-                    ccstruct_->params()),
+                    "Fraction of x for big t count", ccstruct_->params()),
       INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
                  ccstruct_->params()),
       double_MEMBER(textord_noise_normratio, 2.0,
-                    "Dot to norm ratio for deletion",
-                    ccstruct_->params()),
+                    "Dot to norm ratio for deletion", ccstruct_->params()),
       BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
                   ccstruct_->params()),
       BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
@@ -242,24 +211,20 @@ Textord::Textord(CCStruct* ccstruct)
                     "xh fract height error for norm blobs",
                     ccstruct_->params()),
       double_MEMBER(textord_noise_sxfract, 0.4,
-                    "xh fract width error for norm blobs",
-                    ccstruct_->params()),
-      double_MEMBER(textord_noise_hfract, 1.0/64,
+                    "xh fract width error for norm blobs", ccstruct_->params()),
+      double_MEMBER(textord_noise_hfract, 1.0 / 64,
                     "Height fraction to discard outlines as speckle noise",
                     ccstruct_->params()),
       INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
                  ccstruct_->params()),
       double_MEMBER(textord_noise_rowratio, 6.0,
-                    "Dot to norm ratio for deletion",
-                    ccstruct_->params()),
+                    "Dot to norm ratio for deletion", ccstruct_->params()),
       BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
                   ccstruct_->params()),
       double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
                     ccstruct_->params()),
       double_MEMBER(textord_blshift_xfraction, 9.99,
-                    "Min size of baseline shift",
-                    ccstruct_->params()) {
-}
+                    "Min size of baseline shift", ccstruct_->params()) {}
 
 Textord::~Textord() {
 }
@@ -324,10 +289,9 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew,
   BaselineDetect baseline_detector(textord_baseline_debug,
                                    reskew, to_blocks);
   baseline_detector.ComputeStraightBaselines(use_box_bottoms);
-  baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true,
-                                                      textord_heavy_nr,
-                                                      textord_show_final_rows,
-                                                      this);
+  baseline_detector.ComputeBaselineSplinesAndXheights(
+      page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
+      textord_show_final_rows, this);
   // Now make the words in the lines.
   if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
     // SINGLE_LINE uses the old word maker on the single line.
diff --git a/textord/topitch.cpp b/textord/topitch.cpp
index ae9999f7db..cfde683b0b 100644
--- a/textord/topitch.cpp
+++ b/textord/topitch.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        topitch.cpp  (Formerly to_pitch.c)
  * Description: Code to determine fixed pitchness and the pitch if fixed.
- * Author:		Ray Smith
- * Created:		Tue Aug 24 16:57:29 BST 1993
+ * Author:    Ray Smith
+ * Created:   Tue Aug 24 16:57:29 BST 1993
  *
  * (C) Copyright 1993, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -1084,7 +1084,7 @@ BOOL8 count_pitch_stats(                       //find lines
     return FALSE;
   prev_valid = FALSE;
   prev_centre = 0;
-  prev_right = 0;                //stop compiler warning
+  prev_right = 0;  // stop compiler warning
   joined_box = blob_it.data ()->bounding_box ();
   do {
     blob_it.forward ();
@@ -1285,8 +1285,6 @@ float tune_row_pitch2(                             //find fp cells
     return initial_pitch;
   }
   sum_proj = new STATS[textord_pitch_range * 2 + 1];
-  if (sum_proj == NULL)
-    return initial_pitch;
 
   for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
     pitch_delta++)
diff --git a/textord/tordmain.cpp b/textord/tordmain.cpp
index f09a186d4f..0eaf843ec3 100644
--- a/textord/tordmain.cpp
+++ b/textord/tordmain.cpp
@@ -251,6 +251,7 @@ void Textord::filter_blobs(ICOORD page_tr,         // top right
       &block->noise_blobs,
       &block->small_blobs,
       &block->large_blobs);
+    if (block->line_size == 0) block->line_size = 1;
     block->line_spacing = block->line_size *
         (tesseract::CCStruct::kDescenderFraction +
          tesseract::CCStruct::kXHeightFraction +
@@ -360,7 +361,7 @@ void Textord::cleanup_nontext_block(BLOCK* block) {
   // Non-text blocks must contain at least one row.
   ROW_IT row_it(block->row_list());
   if (row_it.empty()) {
-    TBOX box = block->bounding_box();
+    const TBOX& box = block->bounding_box();
     float height = box.height();
     inT32 xstarts[2] = {box.left(), box.right()};
     double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())};
@@ -769,6 +770,7 @@ void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
   PointerVector<WordWithBox> word_ptrs;
   for (int g = 0; g < groups.size(); ++g) {
     const BlockGroup* group = groups[g];
+    if (group->bounding_box.null_box()) continue;
     WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(),
                        group->bounding_box.topright());
     for (int b = 0; b < group->blocks.size(); ++b) {
diff --git a/textord/tospace.cpp b/textord/tospace.cpp
index bec346ef52..4358436917 100644
--- a/textord/tospace.cpp
+++ b/textord/tospace.cpp
@@ -1,3 +1,12 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /**********************************************************************
  * tospace.cpp
  *
@@ -419,9 +428,8 @@ void Textord::row_spacing_stats(
     if (suspected_table &&
     (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {
       if (tosp_debug_level > 5)
-        tprintf ("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n",
-          block_idx, row_idx,
-          row->kern_size, row->space_threshold, row->space_size);
+        tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx,
+                row_idx, row->kern_size, row->space_threshold, row->space_size);
       row->space_threshold =
         (inT32) (tosp_table_kn_sp_ratio * row->kern_size);
       row->space_size = MAX (row->space_threshold + 1, row->xheight);
@@ -441,10 +449,9 @@ void Textord::row_spacing_stats(
           MAX (tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5),
           row->xheight / 2);
       if (tosp_debug_level > 5)
-        tprintf
-          ("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
-          block_idx, row_idx, row->kern_size, row->space_threshold,
-          row->space_size, sane_space);
+        tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
+                block_idx, row_idx, row->kern_size, row->space_threshold,
+                row->space_size, sane_space);
       row->space_size = sane_space;
       row->space_threshold =
         inT32 (floor ((row->space_size + row->kern_size) /
@@ -455,10 +462,9 @@ void Textord::row_spacing_stats(
       MAX (row->kern_size, 2.5)));
     if (row->space_threshold > sane_threshold) {
       if (tosp_debug_level > 5)
-        tprintf ("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
-          block_idx, row_idx,
-          row->kern_size,
-          row->space_threshold, row->space_size, sane_threshold);
+        tprintf("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
+                block_idx, row_idx, row->kern_size, row->space_threshold,
+                row->space_size, sane_threshold);
       row->space_threshold = sane_threshold;
       if (row->space_size <= sane_threshold)
         row->space_size = row->space_threshold + 1.0f;
@@ -498,7 +504,7 @@ void Textord::row_spacing_stats(
       MIN (inT32 (ceil (tosp_fuzzy_space_factor * row->xheight)),
       inT32 (row->space_size));
     if (row->min_space <= row->space_threshold)
-                                 //Don't be silly
+      // Don't be silly
       row->min_space = row->space_threshold + 1;
     /*
     Lets try to guess the max certain kern gap by looking at the cluster of
@@ -559,7 +565,7 @@ void Textord::row_spacing_stats(
       row->kern_size));
   }
   if (row->max_nonspace > row->space_threshold) {
-                                 //Don't be silly
+    // Don't be silly
     row->max_nonspace = row->space_threshold;
   }
 
@@ -700,8 +706,8 @@ BOOL8 Textord::isolated_row_stats(TO_ROW *row,
     ((small_gaps_count / (float) total) < tosp_enough_small_gaps) ||
   (total - small_gaps_count < 1)) {
     if (tosp_debug_level > 5)
-      tprintf ("B:%d R:%d -- Can't do isolated row stats.\n",
-        block_idx, row_idx);
+      tprintf("B:%d R:%d -- Can't do isolated row stats.\n", block_idx,
+              row_idx);
     return FALSE;
   }
   blob_it.set_to_list (row->blob_list ());
@@ -1130,10 +1136,10 @@ ROW *Textord::make_prop_words(
       else
         blanks = 0;
       if (tosp_debug_level > 5)
-        tprintf
-          ("Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
-          word->bounding_box ().left (), word->bounding_box ().bottom (),
-          repetition_spacing, current_gap, blanks);
+        tprintf(
+            "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
+            word->bounding_box().left(), word->bounding_box().bottom(),
+            repetition_spacing, current_gap, blanks);
       word->set_blanks (blanks);
                                  //NO uncertainty
       word->set_flag (W_FUZZY_SP, FALSE);
@@ -1326,9 +1332,10 @@ BOOL8 Textord::make_a_word_break(
     we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY
     be used if the function returns TRUE - ie the word is to be broken.
     */
-    blanks = (uinT8) (current_gap / row->space_size);
-    if (blanks < 1)
-      blanks = 1;
+    int num_blanks = current_gap;
+    if (row->space_size > 1.0f)
+      num_blanks = IntCastRounded(current_gap / row->space_size);
+    blanks = static_cast<uinT8>(ClipToRange(num_blanks, 1, MAX_UINT8));
     fuzzy_sp = FALSE;
     fuzzy_non = FALSE;
     /*
@@ -1686,10 +1693,9 @@ void Textord::mark_gap(
       blob.bottom () + blob.height () / 2.0f);
  }
   if (tosp_debug_level > 5)
-    tprintf ("  (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n",
-      blob.left () - current_gap / 2, blob.bottom (), rule,
-      prev_gap, prev_blob_width, current_gap,
-      next_blob_width, next_gap);
+    tprintf("  (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n",
+            blob.left() - current_gap / 2, blob.bottom(), rule, prev_gap,
+            prev_blob_width, current_gap, next_blob_width, next_gap);
 }
 #endif
 
@@ -1727,8 +1733,7 @@ BOOL8 Textord::ignore_big_gap(TO_ROW *row,
                               inT16 right) {
   inT16 gap = right - left + 1;
 
-  if (tosp_ignore_big_gaps > 999)
-    return FALSE;                //Don't ignore
+  if (tosp_ignore_big_gaps > 999) return FALSE;  // Don't ignore
   if (tosp_ignore_big_gaps > 0)
     return (gap > tosp_ignore_big_gaps * row->xheight);
   if (gap > tosp_ignore_very_big_gaps * row->xheight)
@@ -1750,7 +1755,6 @@ BOOL8 Textord::ignore_big_gap(TO_ROW *row,
   return FALSE;
 }
 
-
 /**********************************************************************
  * reduced_box_next
  *
diff --git a/textord/tovars.cpp b/textord/tovars.cpp
index 71114358ac..6b1b833248 100644
--- a/textord/tovars.cpp
+++ b/textord/tovars.cpp
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        tovars.cpp  (Formerly to_vars.c)
  * Description: Variables used by textord.
- * Author:		Ray Smith
- * Created:		Tue Aug 24 16:55:02 BST 1993
+ * Author:    Ray Smith
+ * Created:   Tue Aug 24 16:55:02 BST 1993
  *
  * (C) Copyright 1993, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -49,8 +49,8 @@ EXTERN double_VAR (textord_words_default_minspace, 0.6,
 EXTERN double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight");
 EXTERN double_VAR (textord_words_default_nonspace, 0.2,
 "Fraction of xheight");
-EXTERN double_VAR (textord_words_initial_lower, 0.25,
-"Max initial cluster size");
+EXTERN double_VAR(textord_words_initial_lower, 0.25,
+                  "Max initial cluster size");
 EXTERN double_VAR (textord_words_initial_upper, 0.15,
 "Min initial cluster spacing");
 EXTERN double_VAR (textord_words_minlarge, 0.75,
@@ -67,7 +67,7 @@ EXTERN double_VAR (textord_pitch_rowsimilarity, 0.08,
 "Fraction of xheight for sameness");
 EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE,
 "Scale scores on big words");
-EXTERN double_VAR (words_initial_lower, 0.5, "Max initial cluster size");
+EXTERN double_VAR(words_initial_lower, 0.5, "Max initial cluster size");
 EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing");
 EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight");
 EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight");
diff --git a/textord/tovars.h b/textord/tovars.h
index 99edae1d2c..46315bb96b 100644
--- a/textord/tovars.h
+++ b/textord/tovars.h
@@ -1,8 +1,8 @@
 /**********************************************************************
  * File:        tovars.h  (Formerly to_vars.h)
  * Description: Variables used by textord.
- * Author:		Ray Smith
- * Created:		Tue Aug 24 16:55:02 BST 1993
+ * Author:    Ray Smith
+ * Created:   Tue Aug 24 16:55:02 BST 1993
  *
  * (C) Copyright 1993, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -51,8 +51,8 @@ extern double_VAR_H (textord_words_default_minspace, 0.6,
 extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight");
 extern double_VAR_H (textord_words_default_nonspace, 0.2,
 "Fraction of xheight");
-extern double_VAR_H (textord_words_initial_lower, 0.25,
-"Max initial cluster size");
+extern double_VAR_H(textord_words_initial_lower, 0.25,
+                    "Max initial cluster size");
 extern double_VAR_H (textord_words_initial_upper, 0.15,
 "Min initial cluster spacing");
 extern double_VAR_H (textord_words_minlarge, 0.75,
@@ -69,7 +69,7 @@ extern double_VAR_H (textord_pitch_rowsimilarity, 0.08,
 "Fraction of xheight for sameness");
 extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE,
 "Scale scores on big words");
-extern double_VAR_H (words_initial_lower, 0.5, "Max initial cluster size");
+extern double_VAR_H(words_initial_lower, 0.5, "Max initial cluster size");
 extern double_VAR_H (words_initial_upper, 0.15,
 "Min initial cluster spacing");
 extern double_VAR_H (words_default_prop_nonspace, 0.25,
diff --git a/training/CMakeLists.txt b/training/CMakeLists.txt
index a733e73ea0..c13ed77479 100644
--- a/training/CMakeLists.txt
+++ b/training/CMakeLists.txt
@@ -2,10 +2,16 @@
 # tesseract
 #
 
-if (STATIC OR NOT (WIN32 OR CYGWIN))
+if (NOT CPPAN_BUILD AND NOT (WIN32 OR CYGWIN))
+    return()
+endif()
+
+if (CPPAN_BUILD)
+    set(ICU_FOUND 1)
+endif()
 
 # experimental
-if (MSVC)
+if (MSVC AND NOT CPPAN_BUILD)
 
 include(CheckTypeSize)
 check_type_size("void *" SIZEOF_VOID_P)
@@ -48,10 +54,12 @@ endif()
 
 set(ICU_ROOT ${icu_dir}/icu)
 
-endif(MSVC)
+endif()
 # experimental
 
+if (NOT CPPAN_BUILD)
 find_package(ICU COMPONENTS uc i18n)
+endif()
 
 ########################################
 # LIBRARY tessopt
@@ -74,7 +82,7 @@ set(common_training_hdr
     commontraining.h
 )
 add_library                 (common_training ${common_training_src} ${common_training_hdr})
-target_link_libraries       (common_training tesseract tessopt)
+target_link_libraries       (common_training libtesseract tessopt)
 project_group               (common_training "Training Tools")
 
 
@@ -83,7 +91,7 @@ project_group               (common_training "Training Tools")
 ########################################
 
 add_executable              (ambiguous_words ambiguous_words.cpp)
-target_link_libraries       (ambiguous_words tesseract)
+target_link_libraries       (ambiguous_words libtesseract)
 project_group               (ambiguous_words "Training Tools")
 
 
@@ -101,7 +109,7 @@ project_group               (classifier_tester "Training Tools")
 ########################################
 
 add_executable              (combine_tessdata combine_tessdata.cpp)
-target_link_libraries       (combine_tessdata tesseract)
+target_link_libraries       (combine_tessdata libtesseract)
 project_group               (combine_tessdata "Training Tools")
 
 
@@ -119,7 +127,7 @@ project_group               (cntraining "Training Tools")
 ########################################
 
 add_executable              (dawg2wordlist dawg2wordlist.cpp)
-target_link_libraries       (dawg2wordlist tesseract)
+target_link_libraries       (dawg2wordlist libtesseract)
 project_group               (dawg2wordlist "Training Tools")
 
 
@@ -146,7 +154,7 @@ project_group               (shapeclustering "Training Tools")
 ########################################
 
 add_executable              (unicharset_extractor unicharset_extractor.cpp)
-target_link_libraries       (unicharset_extractor tesseract tessopt)
+target_link_libraries       (unicharset_extractor libtesseract tessopt)
 project_group               (unicharset_extractor "Training Tools")
 
 
@@ -155,7 +163,7 @@ project_group               (unicharset_extractor "Training Tools")
 ########################################
 
 add_executable              (wordlist2dawg wordlist2dawg.cpp)
-target_link_libraries       (wordlist2dawg tesseract)
+target_link_libraries       (wordlist2dawg libtesseract)
 project_group               (wordlist2dawg "Training Tools")
 
 
@@ -165,7 +173,9 @@ project_group               (wordlist2dawg "Training Tools")
 
 if (ICU_FOUND)
 
+if (NOT CPPAN_BUILD)
 include_directories(${ICU_INCLUDE_DIRS})
+endif()
 
 add_executable              (set_unicharset_properties
     set_unicharset_properties.cpp
@@ -177,7 +187,11 @@ add_executable              (set_unicharset_properties
     normstrngs.h
     icuerrorcode.h
 )
+if (NOT CPPAN_BUILD)
 target_link_libraries       (set_unicharset_properties common_training ${ICU_LIBRARIES})
+else()
+target_link_libraries       (set_unicharset_properties common_training pvt.cppan.demo.unicode.icu.i18n)
+endif()
 project_group               (set_unicharset_properties "Training Tools")
 
 
@@ -221,7 +235,7 @@ endif()
 add_executable              (text2image ${text2image_src})
 target_include_directories  (text2image BEFORE PRIVATE ${Cairo_INCLUDE_DIRS} ${Pango_INCLUDE_DIRS})
 target_compile_definitions  (text2image PRIVATE -DPANGO_ENABLE_ENGINE)
-target_link_libraries       (text2image tesseract common_training
+target_link_libraries       (text2image libtesseract common_training
     ${ICU_LIBRARIES}
     ${Pango_LIBRARIES}
     ${Cairo_LIBRARIES}
@@ -233,6 +247,5 @@ project_group               (text2image "Training Tools")
 
 endif(PKG_CONFIG_FOUND)
 endif(ICU_FOUND)
-endif(STATIC OR NOT (WIN32 OR CYGWIN))
 
 ###############################################################################
diff --git a/training/Makefile.am b/training/Makefile.am
index fe3d85bcdc..2506226654 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -10,14 +10,7 @@ AM_CPPFLAGS += \
 
 EXTRA_DIST = language-specific.sh tesstrain.sh tesstrain_utils.sh
 
-if MINGW
-# try static build
-#AM_LDFLAGS += -all-static
-#libic=-lsicuin -licudt -lsicuuc
-libicu=-licuin -licuuc
-else
 libicu=-licui18n -licuuc
-endif
 # TODO: training programs can not be linked to shared library created 
 # with -fvisibility 
 if VISIBILITY
diff --git a/training/boxchar.cpp b/training/boxchar.cpp
index b99c12a600..d3d5160895 100644
--- a/training/boxchar.cpp
+++ b/training/boxchar.cpp
@@ -49,7 +49,8 @@ void BoxChar::AddBox(int x, int y, int width, int height) {
 }
 
 /* static */
-void BoxChar::TranslateBoxes(int xshift, int yshift, vector<BoxChar*>* boxes) {
+void BoxChar::TranslateBoxes(int xshift, int yshift,
+                             std::vector<BoxChar*>* boxes) {
   for (int i = 0; i < boxes->size(); ++i) {
     BOX* box = (*boxes)[i]->box_;
     if (box != NULL) {
@@ -62,7 +63,7 @@ void BoxChar::TranslateBoxes(int xshift, int yshift, vector<BoxChar*>* boxes) {
 // Prepares for writing the boxes to a file by inserting newlines, spaces,
 // and re-ordering so the boxes are strictly left-to-right.
 /* static */
-void BoxChar::PrepareToWrite(vector<BoxChar*>* boxes) {
+void BoxChar::PrepareToWrite(std::vector<BoxChar*>* boxes) {
   bool rtl_rules = ContainsMostlyRTL(*boxes);
   bool vertical_rules = MostlyVertical(*boxes);
   InsertNewlines(rtl_rules, vertical_rules, boxes);
@@ -73,13 +74,12 @@ void BoxChar::PrepareToWrite(vector<BoxChar*>* boxes) {
   if (rtl_rules) {
     ReorderRTLText(boxes);
   }
-  tprintf("Rtl = %d ,vertical=%d\n", rtl_rules, vertical_rules);
 }
 
 // Inserts newline (tab) characters into the vector at newline positions.
 /* static */
 void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules,
-                             vector<BoxChar*>* boxes) {
+                             std::vector<BoxChar*>* boxes) {
   int prev_i = -1;
   int max_shift = 0;
   for (int i = 0; i < boxes->size(); ++i) {
@@ -142,7 +142,7 @@ void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules,
 // Converts NULL boxes to space characters, with appropriate bounding boxes.
 /* static */
 void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
-                           vector<BoxChar*>* boxes) {
+                           std::vector<BoxChar*>* boxes) {
   // After InsertNewlines, any remaining null boxes are not newlines, and are
   // singletons, so add a box to each remaining null box.
   for (int i = 1; i + 1 < boxes->size(); ++i) {
@@ -198,7 +198,7 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
 
 // Reorders text in a right-to-left script in left-to-right order.
 /* static */
-void BoxChar::ReorderRTLText(vector<BoxChar*>* boxes) {
+void BoxChar::ReorderRTLText(std::vector<BoxChar*>* boxes) {
   // After adding newlines and spaces, this task is simply a matter of sorting
   // by left each group of boxes between newlines.
   BoxCharPtrSort sorter;
@@ -212,7 +212,7 @@ void BoxChar::ReorderRTLText(vector<BoxChar*>* boxes) {
 
 // Returns true if the vector contains mostly RTL characters.
 /* static */
-bool BoxChar::ContainsMostlyRTL(const vector<BoxChar*>& boxes) {
+bool BoxChar::ContainsMostlyRTL(const std::vector<BoxChar*>& boxes) {
   int num_rtl = 0, num_ltr = 0;
   for (int i = 0; i < boxes.size(); ++i) {
     // Convert the unichar to UTF32 representation
@@ -241,7 +241,7 @@ bool BoxChar::ContainsMostlyRTL(const vector<BoxChar*>& boxes) {
 
 // Returns true if the text is mostly laid out vertically.
 /* static */
-bool BoxChar::MostlyVertical(const vector<BoxChar*>& boxes) {
+bool BoxChar::MostlyVertical(const std::vector<BoxChar*>& boxes) {
   inT64 total_dx = 0, total_dy = 0;
   for (int i = 1; i < boxes.size(); ++i) {
     if (boxes[i - 1]->box_ != NULL && boxes[i]->box_ != NULL &&
@@ -260,7 +260,7 @@ bool BoxChar::MostlyVertical(const vector<BoxChar*>& boxes) {
 
 // Returns the total length of all the strings in the boxes.
 /* static */
-int BoxChar::TotalByteLength(const vector<BoxChar*>& boxes) {
+int BoxChar::TotalByteLength(const std::vector<BoxChar*>& boxes) {
   int total_length = 0;
   for (int i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size();
   return total_length;
@@ -270,7 +270,8 @@ int BoxChar::TotalByteLength(const vector<BoxChar*>& boxes) {
 // The rotation is in radians clockwise about the given center.
 /* static */
 void BoxChar::RotateBoxes(float rotation, int xcenter, int ycenter,
-                          int start_box, int end_box, vector<BoxChar*>* boxes) {
+                          int start_box, int end_box,
+                          std::vector<BoxChar*>* boxes) {
   Boxa* orig = boxaCreate(0);
   for (int i = start_box; i < end_box; ++i) {
     BOX* box = (*boxes)[i]->box_;
@@ -290,14 +291,21 @@ void BoxChar::RotateBoxes(float rotation, int xcenter, int ycenter,
 const int kMaxLineLength = 1024;
 /* static */
 void BoxChar::WriteTesseractBoxFile(const string& filename, int height,
-                                    const vector<BoxChar*>& boxes) {
+                                    const std::vector<BoxChar*>& boxes) {
+  string output = GetTesseractBoxStr(height, boxes);
+  File::WriteStringToFileOrDie(output, filename);
+}
+
+/* static */
+string BoxChar::GetTesseractBoxStr(int height,
+                                   const std::vector<BoxChar*>& boxes) {
   string output;
   char buffer[kMaxLineLength];
   for (int i = 0; i < boxes.size(); ++i) {
     const Box* box = boxes[i]->box_;
     if (box == NULL) {
       tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
-      return;
+      return "";
     }
     int nbytes =
         snprintf(buffer, kMaxLineLength, "%s %d %d %d %d %d\n",
@@ -305,6 +313,7 @@ void BoxChar::WriteTesseractBoxFile(const string& filename, int height,
                  box->x + box->w, height - box->y, boxes[i]->page_);
     output.append(buffer, nbytes);
   }
-  File::WriteStringToFileOrDie(output, filename);
+  return output;
 }
+
 }  // namespace tesseract
diff --git a/training/boxchar.h b/training/boxchar.h
index 27b568a143..3748c4abad 100644
--- a/training/boxchar.h
+++ b/training/boxchar.h
@@ -100,6 +100,9 @@ class BoxChar {
   // is needed to convert to tesseract coordinates.
   static void WriteTesseractBoxFile(const string& name, int height,
                                     const vector<BoxChar*>& boxes);
+  // Gets the tesseract box file as a string from the vector of boxes.
+  // The image height is needed to convert to tesseract coordinates.
+  static string GetTesseractBoxStr(int height, const vector<BoxChar*>& boxes);
 
  private:
   string ch_;
diff --git a/training/classifier_tester.cpp b/training/classifier_tester.cpp
index 48f3781ebb..ed7e50cd2f 100644
--- a/training/classifier_tester.cpp
+++ b/training/classifier_tester.cpp
@@ -48,9 +48,9 @@ enum ClassifierName {
 
 const char* names[] = {"pruner", "full",
 #ifndef NO_CUBE_BUILD
-  "cube", "cubetess",
+                       "cube", "cubetess",
 #endif  // NO_CUBE_BUILD
-  NULL };
+                       NULL};
 
 static tesseract::ShapeClassifier* InitializeClassifier(
     const char* classifer_name, const UNICHARSET& unicharset,
@@ -80,7 +80,7 @@ static tesseract::ShapeClassifier* InitializeClassifier(
   tesseract::Classify* classify = NULL;
   if (
 #ifndef NO_CUBE_BUILD
-    classifier == CN_CUBE || classifier == CN_CUBETESS ||
+      classifier == CN_CUBE || classifier == CN_CUBETESS ||
 #endif  // NO_CUBE_BUILD
       classifier == CN_PRUNER || classifier == CN_FULL) {
 #ifndef NO_CUBE_BUILD
diff --git a/training/cntraining.cpp b/training/cntraining.cpp
index ab19ddb93d..6f4f42aebe 100644
--- a/training/cntraining.cpp
+++ b/training/cntraining.cpp
@@ -20,7 +20,6 @@
  ** limitations under the License.
 ******************************************************************************/
 
-
 /*----------------------------------------------------------------------------
           Include Files and Type Defines
 ----------------------------------------------------------------------------*/
@@ -53,10 +52,8 @@ int main (
           Private Function Prototypes
 ----------------------------------------------------------------------------*/
 
-void WriteNormProtos (
-     const char  *Directory,
-     LIST  LabeledProtoList,
-   CLUSTERER *Clusterer);
+void WriteNormProtos(const char *Directory, LIST LabeledProtoList,
+                     const FEATURE_DESC_STRUCT *feature_desc);
 
 /*
 PARAMDESC *ConvertToPARAMDESC(
@@ -81,7 +78,6 @@ CLUSTERCONFIG  CNConfig =
   elliptical, 0.025, 0.05, 0.8, 1e-3, 0
 };
 
-
 /*----------------------------------------------------------------------------
               Public Code
 ----------------------------------------------------------------------------*/
@@ -134,8 +130,7 @@ CLUSTERCONFIG  CNConfig =
 * @note Exceptions: none
 * @note History: Fri Aug 18 08:56:17 1989, DSJ, Created.
 */
-int main(int  argc, char* argv[])
-{
+int main(int argc, char *argv[]) {
   // Set the global Config parameters before parsing the command line.
   Config = CNConfig;
 
@@ -165,13 +160,18 @@ int main(int  argc, char* argv[])
   // reduce the min samples:
   // Config.MinSamples = 0.5 / num_fonts;
   pCharList = CharList;
+  // The norm protos will count the source protos, so we keep them here in
+  // freeable_protos, so they can be freed later.
+  GenericVector<LIST> freeable_protos;
   iterate(pCharList) {
     //Cluster
-    if (Clusterer)
-       FreeClusterer(Clusterer);
     CharSample = (LABELEDLIST)first_node(pCharList);
     Clusterer =
       SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE);
+    if (Clusterer == NULL) {  // To avoid a SIGSEGV
+      fprintf(stderr, "Error: NULL clusterer!\n");
+      return 1;
+    }
     float SavedMinSamples = Config.MinSamples;
     // To disable the tendency to produce a single cluster for all fonts,
     // make MagicSamples an impossible to achieve number:
@@ -190,21 +190,21 @@ int main(int  argc, char* argv[])
     }
     Config.MinSamples = SavedMinSamples;
     AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
+    freeable_protos.push_back(ProtoList);
+    FreeClusterer(Clusterer);
   }
   FreeTrainingSamples(CharList);
-  if (Clusterer == NULL) { // To avoid a SIGSEGV
-    fprintf(stderr, "Error: NULL clusterer!\n");
-    return 1;
-  }
-  WriteNormProtos(FLAGS_D.c_str(), NormProtoList, Clusterer);
+  int desc_index = ShortNameToFeatureType(FeatureDefs, PROGRAM_FEATURE_TYPE);
+  WriteNormProtos(FLAGS_D.c_str(), NormProtoList,
+                  FeatureDefs.FeatureDesc[desc_index]);
   FreeNormProtoList(NormProtoList);
-  FreeProtoList(&ProtoList);
-  FreeClusterer(Clusterer);
+  for (int i = 0; i < freeable_protos.size(); ++i) {
+    FreeProtoList(&freeable_protos[i]);
+  }
   printf ("\n");
   return 0;
 }  // main
 
-
 /*----------------------------------------------------------------------------
               Private Code
 ----------------------------------------------------------------------------*/
@@ -216,16 +216,13 @@ int main(int  argc, char* argv[])
 * of the samples.
 * @param Directory  directory to place sample files into
 * @param LabeledProtoList List of labeled protos
-* @param Clusterer The CLUSTERER to use
+* @param feature_desc Description of the features
 * @return none
 * @note Exceptions: none
 * @note History: Fri Aug 18 16:17:06 1989, DSJ, Created.
 */
-void WriteNormProtos (
-     const char  *Directory,
-     LIST  LabeledProtoList,
-     CLUSTERER *Clusterer)
-{
+void WriteNormProtos(const char *Directory, LIST LabeledProtoList,
+                     const FEATURE_DESC_STRUCT *feature_desc) {
   FILE    *File;
   STRING Filename;
   LABELEDLIST LabeledProto;
@@ -240,8 +237,8 @@ void WriteNormProtos (
   Filename += "normproto";
   printf ("\nWriting %s ...", Filename.string());
   File = Efopen (Filename.string(), "wb");
-  fprintf(File,"%0d\n",Clusterer->SampleSize);
-  WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc);
+  fprintf(File, "%0d\n", feature_desc->NumParams);
+  WriteParamDesc(File, feature_desc->NumParams, feature_desc->ParamDesc);
   iterate(LabeledProtoList)
   {
     LabeledProto = (LABELEDLIST) first_node (LabeledProtoList);
@@ -256,7 +253,7 @@ void WriteNormProtos (
       exit(1);
     }
     fprintf(File, "\n%s %d\n", LabeledProto->Label, N);
-    WriteProtos(File, Clusterer->SampleSize, LabeledProto->List, true, false);
+    WriteProtos(File, feature_desc->NumParams, LabeledProto->List, true, false);
   }
   fclose (File);
 
diff --git a/training/commandlineflags.cpp b/training/commandlineflags.cpp
index 06bfbe6589..56dcb84221 100644
--- a/training/commandlineflags.cpp
+++ b/training/commandlineflags.cpp
@@ -1,3 +1,12 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "commandlineflags.h"
 
 #ifdef USE_STD_NAMESPACE
@@ -106,36 +115,36 @@ void PrintCommandLineFlags() {
     if (!strncmp(GlobalParams()->int_params[i]->name_str(),
                  kFlagNamePrefix, kFlagNamePrefixLen)) {
       printf("  --%s  %s  (type:int default:%d)\n",
-              GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen,
-              GlobalParams()->int_params[i]->info_str(),
-              inT32(*(GlobalParams()->int_params[i])));
+             GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen,
+             GlobalParams()->int_params[i]->info_str(),
+             inT32(*(GlobalParams()->int_params[i])));
     }
   }
   for (int i = 0; i < GlobalParams()->double_params.size(); ++i) {
     if (!strncmp(GlobalParams()->double_params[i]->name_str(),
                  kFlagNamePrefix, kFlagNamePrefixLen)) {
       printf("  --%s  %s  (type:double default:%g)\n",
-              GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen,
-              GlobalParams()->double_params[i]->info_str(),
-              static_cast<double>(*(GlobalParams()->double_params[i])));
+             GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen,
+             GlobalParams()->double_params[i]->info_str(),
+             static_cast<double>(*(GlobalParams()->double_params[i])));
     }
   }
   for (int i = 0; i < GlobalParams()->bool_params.size(); ++i) {
     if (!strncmp(GlobalParams()->bool_params[i]->name_str(),
                  kFlagNamePrefix, kFlagNamePrefixLen)) {
       printf("  --%s  %s  (type:bool default:%s)\n",
-              GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen,
-              GlobalParams()->bool_params[i]->info_str(),
-              (BOOL8(*(GlobalParams()->bool_params[i])) ? "true" : "false"));
+             GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen,
+             GlobalParams()->bool_params[i]->info_str(),
+             (BOOL8(*(GlobalParams()->bool_params[i])) ? "true" : "false"));
     }
   }
   for (int i = 0; i < GlobalParams()->string_params.size(); ++i) {
     if (!strncmp(GlobalParams()->string_params[i]->name_str(),
                  kFlagNamePrefix, kFlagNamePrefixLen)) {
       printf("  --%s  %s  (type:string default:%s)\n",
-              GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen,
-              GlobalParams()->string_params[i]->info_str(),
-              GlobalParams()->string_params[i]->string());
+             GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen,
+             GlobalParams()->string_params[i]->info_str(),
+             GlobalParams()->string_params[i]->string());
     }
   }
 }
diff --git a/training/commontraining.cpp b/training/commontraining.cpp
index 1c4cc832bc..57ecdbecef 100644
--- a/training/commontraining.cpp
+++ b/training/commontraining.cpp
@@ -39,7 +39,6 @@
 #include <math.h>
 
 using tesseract::CCUtil;
-using tesseract::FontInfo;
 using tesseract::IntFeatureSpace;
 using tesseract::ParamUtils;
 using tesseract::ShapeTable;
@@ -312,9 +311,7 @@ const char *GetNextFilename(int argc, const char* const * argv) {
     return argv[tessoptind++];
   else
     return NULL;
-}	/* GetNextFilename */
-
-
+} /* GetNextFilename */
 
 /*---------------------------------------------------------------------------*/
 /**
@@ -328,11 +325,8 @@ const char *GetNextFilename(int argc, const char* const * argv) {
  * @note Exceptions: none
  * @note History: Fri Aug 18 15:57:41 1989, DSJ, Created.
  */
-LABELEDLIST FindList (
-    LIST	List,
-    char	*Label)
-{
-  LABELEDLIST	LabeledList;
+LABELEDLIST FindList(LIST List, char* Label) {
+  LABELEDLIST LabeledList;
 
   iterate (List)
   {
@@ -342,7 +336,7 @@ LABELEDLIST FindList (
   }
   return (NULL);
 
-}	/* FindList */
+} /* FindList */
 
 /*---------------------------------------------------------------------------*/
 /**
@@ -354,10 +348,8 @@ LABELEDLIST FindList (
  * @note Exceptions: none
  * @note History: Fri Aug 18 16:08:46 1989, DSJ, Created.
  */
-LABELEDLIST NewLabeledList (
-    const char	*Label)
-{
-  LABELEDLIST	LabeledList;
+LABELEDLIST NewLabeledList(const char* Label) {
+  LABELEDLIST LabeledList;
 
   LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
   LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
@@ -367,7 +359,7 @@ LABELEDLIST NewLabeledList (
   LabeledList->font_sample_count = 0;
   return (LabeledList);
 
-}	/* NewLabeledList */
+} /* NewLabeledList */
 
 /*---------------------------------------------------------------------------*/
 // TODO(rays) This is now used only by cntraining. Convert cntraining to use
@@ -386,7 +378,7 @@ LABELEDLIST NewLabeledList (
  * @return none
  * @note Globals: none
  * @note Exceptions: none
- * @note History: 
+ * @note History:
  * - Fri Aug 18 13:11:39 1989, DSJ, Created.
  * - Tue May 17 1998 simplifications to structure, illiminated
  *   font, and feature specification levels of structure.
@@ -460,17 +452,17 @@ void FreeTrainingSamples(LIST CharList) {
   FEATURE_SET FeatureSet;
   LIST FeatureList;
 
-
-  iterate(CharList) {  /* iterate through all of the fonts */
+  LIST nodes = CharList;
+  iterate(CharList) { /* iterate through all of the fonts */
     char_sample = (LABELEDLIST) first_node(CharList);
     FeatureList = char_sample->List;
-    iterate(FeatureList) {  /* iterate through all of the classes */
+    iterate(FeatureList) { /* iterate through all of the classes */
       FeatureSet = (FEATURE_SET) first_node(FeatureList);
       FreeFeatureSet(FeatureSet);
     }
     FreeLabeledList(char_sample);
   }
-  destroy(CharList);
+  destroy(nodes);
 }  /* FreeTrainingSamples */
 
 /*---------------------------------------------------------------------------*/
@@ -532,15 +524,15 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
     }
     CharID++;
   }
-  if ( Sample != NULL ) free( Sample );
-  return( Clusterer );
+  free(Sample);
+  return Clusterer;
 
-}	/* SetUpForClustering */
+} /* SetUpForClustering */
 
 /*------------------------------------------------------------------------*/
 void MergeInsignificantProtos(LIST ProtoList, const char* label,
-                              CLUSTERER	*Clusterer, CLUSTERCONFIG *Config) {
-  PROTOTYPE	*Prototype;
+                              CLUSTERER* Clusterer, CLUSTERCONFIG* Config) {
+  PROTOTYPE* Prototype;
   bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
 
   LIST pProtoList = ProtoList;
@@ -600,7 +592,7 @@ void MergeInsignificantProtos(LIST ProtoList, const char* label,
       Prototype->Significant = true;
     }
   }
-}	/* MergeInsignificantProtos */
+} /* MergeInsignificantProtos */
 
 /*-----------------------------------------------------------------------------*/
 void CleanUpUnusedData(
@@ -695,14 +687,11 @@ LIST RemoveInsignificantProtos(
   }
   FreeProtoList(&ProtoList);
   return (NewProtoList);
-}	/* RemoveInsignificantProtos */
+} /* RemoveInsignificantProtos */
 
 /*----------------------------------------------------------------------------*/
-MERGE_CLASS FindClass (
-    LIST	List,
-    const char	*Label)
-{
-  MERGE_CLASS	MergeClass;
+MERGE_CLASS FindClass(LIST List, const char* Label) {
+  MERGE_CLASS MergeClass;
 
   iterate (List)
   {
@@ -712,13 +701,11 @@ MERGE_CLASS FindClass (
   }
   return (NULL);
 
-}	/* FindClass */
+} /* FindClass */
 
 /*---------------------------------------------------------------------------*/
-MERGE_CLASS NewLabeledClass (
-    const char	*Label)
-{
-  MERGE_CLASS	MergeClass;
+MERGE_CLASS NewLabeledClass(const char* Label) {
+  MERGE_CLASS MergeClass;
 
   MergeClass = new MERGE_CLASS_NODE;
   MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
@@ -726,7 +713,7 @@ MERGE_CLASS NewLabeledClass (
   MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
   return (MergeClass);
 
-}	/* NewLabeledClass */
+} /* NewLabeledClass */
 
 /*-----------------------------------------------------------------------------*/
 /**
@@ -738,38 +725,37 @@ MERGE_CLASS NewLabeledClass (
  * @note Exceptions: none
  * @note History: Fri Aug 18 17:44:27 1989, DSJ, Created.
  */
-void FreeLabeledClassList (
-    LIST	ClassList)
-{
-  MERGE_CLASS	MergeClass;
+void FreeLabeledClassList(LIST ClassList) {
+  MERGE_CLASS MergeClass;
 
-  iterate (ClassList) 		/* iterate through all of the fonts */
+  LIST nodes = ClassList;
+  iterate(ClassList) /* iterate through all of the fonts */
   {
     MergeClass = (MERGE_CLASS) first_node (ClassList);
     free (MergeClass->Label);
     FreeClass(MergeClass->Class);
     delete MergeClass;
   }
-  destroy (ClassList);
+  destroy(nodes);
 
-}	/* FreeLabeledClassList */
+} /* FreeLabeledClassList */
 
 /* SetUpForFloat2Int */
 CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
                                 LIST LabeledClassList) {
-  MERGE_CLASS	MergeClass;
-  CLASS_TYPE		Class;
-  int				NumProtos;
-  int				NumConfigs;
-  int				NumWords;
-  int				i, j;
-  float			Values[3];
-  PROTO			NewProto;
-  PROTO			OldProto;
-  BIT_VECTOR		NewConfig;
-  BIT_VECTOR		OldConfig;
-
-  // 	printf("Float2Int ...\n");
+  MERGE_CLASS MergeClass;
+  CLASS_TYPE Class;
+  int NumProtos;
+  int NumConfigs;
+  int NumWords;
+  int i, j;
+  float Values[3];
+  PROTO NewProto;
+  PROTO OldProto;
+  BIT_VECTOR NewConfig;
+  BIT_VECTOR OldConfig;
+
+  //  printf("Float2Int ...\n");
 
   CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()];
   iterate(LabeledClassList)
@@ -821,9 +807,9 @@ CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
 void Normalize (
     float  *Values)
 {
-  register float Slope;
-  register float Intercept;
-  register float Normalizer;
+  float Slope;
+  float Intercept;
+  float Normalizer;
 
   Slope      = tan (Values [2] * 2 * PI);
   Intercept  = Values [1] - Slope * Values [0];
@@ -835,20 +821,20 @@ void Normalize (
 } // Normalize
 
 /*-------------------------------------------------------------------------*/
-void FreeNormProtoList (
-    LIST	CharList)
+void FreeNormProtoList(LIST CharList)
 
 {
-  LABELEDLIST	char_sample;
+  LABELEDLIST char_sample;
 
-  iterate (CharList) 		/* iterate through all of the fonts */
+  LIST nodes = CharList;
+  iterate(CharList) /* iterate through all of the fonts */
   {
     char_sample = (LABELEDLIST) first_node (CharList);
     FreeLabeledList (char_sample);
   }
-  destroy (CharList);
+  destroy(nodes);
 
-}	// FreeNormProtoList
+}  // FreeNormProtoList
 
 /*---------------------------------------------------------------------------*/
 void AddToNormProtosList(
@@ -869,19 +855,16 @@ void AddToNormProtosList(
 }
 
 /*---------------------------------------------------------------------------*/
-int NumberOfProtos(
-    LIST ProtoList,
-    BOOL8	CountSigProtos,
-    BOOL8	CountInsigProtos)
-{
+int NumberOfProtos(LIST ProtoList, BOOL8 CountSigProtos,
+                   BOOL8 CountInsigProtos) {
   int N = 0;
-  PROTOTYPE	*Proto;
+  PROTOTYPE* Proto;
 
   iterate(ProtoList)
   {
     Proto = (PROTOTYPE *) first_node ( ProtoList );
-    if (( Proto->Significant && CountSigProtos )	||
-        ( ! Proto->Significant && CountInsigProtos ) )
+    if ((Proto->Significant && CountSigProtos) ||
+        (!Proto->Significant && CountInsigProtos))
       N++;
   }
   return(N);
diff --git a/training/degradeimage.cpp b/training/degradeimage.cpp
index f9c3cfb048..333f3703dc 100644
--- a/training/degradeimage.cpp
+++ b/training/degradeimage.cpp
@@ -22,10 +22,36 @@
 
 #include <stdlib.h>
 #include "allheaders.h"   // from leptonica
+#include "genericvector.h"
 #include "helpers.h"  // For TRand.
+#include "rect.h"
 
 namespace tesseract {
 
+// A randomized perspective distortion can be applied to synthetic input.
+// The perspective distortion comes from leptonica, which uses 2 sets of 4
+// corners to determine the distortion. There are random values for each of
+// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead
+// defined in terms of a single shear value. This reduces the degrees of
+// freedom enough to make the distortion more realistic than it would otherwise
+// be if all 8 coordinates could move independently.
+// One additional factor is used for the color of the pixels that don't exist
+// in the source image.
+// Name for each of the randomizing factors.
+enum FactorNames {
+  FN_INCOLOR,
+  FN_Y0,
+  FN_Y1,
+  FN_Y2,
+  FN_Y3,
+  FN_X0,
+  FN_X1,
+  FN_SHEAR,
+  // x2 = x1 - shear
+  // x3 = x0 + shear
+  FN_NUM_FACTORS
+};
+
 // Rotation is +/- kRotationRange radians.
 const float kRotationRange = 0.02f;
 // Number of grey levels to shift by for each exposure step.
@@ -144,4 +170,141 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
   return input;
 }
 
+// Creates and returns a Pix distorted by various means according to the bool
+// flags. If boxes is not NULL, the boxes are resized/positioned according to
+// any spatial distortion and also by the integer reduction factor box_scale
+// so they will match what the network will output.
+// Returns NULL on error. The returned Pix must be pixDestroyed.
+Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
+                         bool white_noise, bool smooth_noise, bool blur,
+                         int box_reduction, TRand* randomizer,
+                         GenericVector<TBOX>* boxes) {
+  Pix* distorted = pixCopy(NULL, const_cast<Pix*>(pix));
+  // Things to do to synthetic training data.
+  if (invert && randomizer->SignedRand(1.0) < 0)
+    pixInvert(distorted, distorted);
+  if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
+    // TODO(rays) Cook noise in a more thread-safe manner than rand().
+    // Attempt to make the sequences reproducible.
+    srand(randomizer->IntRand());
+    Pix* pixn = pixAddGaussianNoise(distorted, 8.0);
+    pixDestroy(&distorted);
+    if (smooth_noise) {
+      distorted = pixBlockconv(pixn, 1, 1);
+      pixDestroy(&pixn);
+    } else {
+      distorted = pixn;
+    }
+  }
+  if (blur && randomizer->SignedRand(1.0) > 0.0) {
+    Pix* blurred = pixBlockconv(distorted, 1, 1);
+    pixDestroy(&distorted);
+    distorted = blurred;
+  }
+  if (perspective)
+    GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
+  if (boxes != NULL) {
+    for (int b = 0; b < boxes->size(); ++b) {
+      (*boxes)[b].scale(1.0f / box_reduction);
+      if ((*boxes)[b].width() <= 0)
+        (*boxes)[b].set_right((*boxes)[b].left() + 1);
+    }
+  }
+  return distorted;
+}
+
+// Distorts anything that has a non-null pointer with the same pseudo-random
+// perspective distortion. Width and height only need to be set if there
+// is no pix. If there is a pix, then they will be taken from there.
+void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
+                                   Pix** pix, GenericVector<TBOX>* boxes) {
+  if (pix != NULL && *pix != NULL) {
+    width = pixGetWidth(*pix);
+    height = pixGetHeight(*pix);
+  }
+  float* im_coeffs = NULL;
+  float* box_coeffs = NULL;
+  l_int32 incolor =
+      ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
+  if (pix != NULL && *pix != NULL) {
+    // Transform the image.
+    Pix* transformed = pixProjective(*pix, im_coeffs, incolor);
+    if (transformed == NULL) {
+      tprintf("Projective transformation failed!!\n");
+      return;
+    }
+    pixDestroy(pix);
+    *pix = transformed;
+  }
+  if (boxes != NULL) {
+    // Transform the boxes.
+    for (int b = 0; b < boxes->size(); ++b) {
+      int x1, y1, x2, y2;
+      const TBOX& box = (*boxes)[b];
+      projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,
+                               &y1);
+      projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),
+                               &x2, &y2);
+      TBOX new_box1(x1, height - y2, x2, height - y1);
+      projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),
+                               &x1, &y1);
+      projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,
+                               &y2);
+      TBOX new_box2(x1, height - y1, x2, height - y2);
+      (*boxes)[b] = new_box1.bounding_union(new_box2);
+    }
+  }
+  free(im_coeffs);
+  free(box_coeffs);
+}
+
+// Computes the coefficients of a randomized projective transformation.
+// The image transform requires backward transformation coefficient, and the
+// box transform the forward coefficients.
+// Returns the incolor arg to pixProjective.
+int ProjectiveCoeffs(int width, int height, TRand* randomizer,
+                     float** im_coeffs, float** box_coeffs) {
+  // Setup "from" points.
+  Pta* src_pts = ptaCreate(4);
+  ptaAddPt(src_pts, 0.0f, 0.0f);
+  ptaAddPt(src_pts, width, 0.0f);
+  ptaAddPt(src_pts, width, height);
+  ptaAddPt(src_pts, 0.0f, height);
+  // Extract factors from pseudo-random sequence.
+  float factors[FN_NUM_FACTORS];
+  float shear = 0.0f;  // Shear is signed.
+  for (int i = 0; i < FN_NUM_FACTORS; ++i) {
+    // Everything is squared to make wild values rarer.
+    if (i == FN_SHEAR) {
+      // Shear is signed.
+      shear = randomizer->SignedRand(0.5 / 3.0);
+      shear = shear >= 0.0 ? shear * shear : -shear * shear;
+      // Keep the sheared points within the original rectangle.
+      if (shear < -factors[FN_X0]) shear = -factors[FN_X0];
+      if (shear > factors[FN_X1]) shear = factors[FN_X1];
+      factors[i] = shear;
+    } else if (i != FN_INCOLOR) {
+      factors[i] = fabs(randomizer->SignedRand(1.0));
+      if (i <= FN_Y3)
+        factors[i] *= 5.0 / 8.0;
+      else
+        factors[i] *= 0.5;
+      factors[i] *= factors[i];
+    }
+  }
+  // Setup "to" points.
+  Pta* dest_pts = ptaCreate(4);
+  ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
+  ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
+  ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,
+           (1 - factors[FN_Y2]) * height);
+  ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,
+           (1 - factors[FN_Y3]) * height);
+  getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
+  getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
+  ptaDestroy(&src_pts);
+  ptaDestroy(&dest_pts);
+  return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
+}
+
 }  // namespace tesseract
diff --git a/training/degradeimage.h b/training/degradeimage.h
index 2add6282f8..a7af9565ff 100644
--- a/training/degradeimage.h
+++ b/training/degradeimage.h
@@ -20,12 +20,13 @@
 #ifndef TESSERACT_TRAINING_DEGRADEIMAGE_H_
 #define TESSERACT_TRAINING_DEGRADEIMAGE_H_
 
-struct Pix;
+#include "allheaders.h"
+#include "genericvector.h"
+#include "helpers.h"  // For TRand.
+#include "rect.h"
 
 namespace tesseract {
 
-class TRand;
-
 // Degrade the pix as if by a print/copy/scan cycle with exposure > 0
 // corresponding to darkening on the copier and <0 lighter and 0 not copied.
 // If rotation is not NULL, the clockwise rotation in radians is saved there.
@@ -34,6 +35,27 @@ class TRand;
 struct Pix* DegradeImage(struct Pix* input, int exposure, TRand* randomizer,
                          float* rotation);
 
+// Creates and returns a Pix distorted by various means according to the bool
+// flags. If boxes is not NULL, the boxes are resized/positioned according to
+// any spatial distortion and also by the integer reduction factor box_scale
+// so they will match what the network will output.
+// Returns NULL on error. The returned Pix must be pixDestroyed.
+Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
+                         bool white_noise, bool smooth_noise, bool blur,
+                         int box_reduction, TRand* randomizer,
+                         GenericVector<TBOX>* boxes);
+// Distorts anything that has a non-null pointer with the same pseudo-random
+// perspective distortion. Width and height only need to be set if there
+// is no pix. If there is a pix, then they will be taken from there.
+void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
+                                   Pix** pix, GenericVector<TBOX>* boxes);
+// Computes the coefficients of a randomized projective transformation.
+// The image transform requires backward transformation coefficient, and the
+// box transform the forward coefficients.
+// Returns the incolor arg to pixProjective.
+int ProjectiveCoeffs(int width, int height, TRand* randomizer,
+                     float** im_coeffs, float** box_coeffs);
+
 }  // namespace tesseract
 
 #endif  // TESSERACT_TRAINING_DEGRADEIMAGE_H_
diff --git a/training/fileio.cpp b/training/fileio.cpp
index e3e43bd023..bb1f4afcef 100644
--- a/training/fileio.cpp
+++ b/training/fileio.cpp
@@ -81,8 +81,9 @@ bool File::ReadFileToString(const string& filename, string* out) {
 }
 
 string File::JoinPath(const string& prefix, const string& suffix) {
-  return (!prefix.size() || prefix[prefix.size() - 1] == '/') ?
-      prefix + suffix : prefix + "/" + suffix;
+  return (prefix.empty() || prefix[prefix.size() - 1] == '/')
+             ? prefix + suffix
+             : prefix + "/" + suffix;
 }
 
 bool File::Delete(const char* pathname) {
diff --git a/training/language-specific.sh b/training/language-specific.sh
index a62f1e3cf3..b3c38cf876 100755
--- a/training/language-specific.sh
+++ b/training/language-specific.sh
@@ -1,4 +1,14 @@
 #
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 # Set some language specific variables. Works in conjunction with
 # tesstrain.sh
 #
@@ -868,6 +878,9 @@ set_lang_specific_parameters() {
   AMBIGS_FILTER_DENOMINATOR="100000"
   LEADING="32"
   MEAN_COUNT="40"  # Default for latin script.
+  # Language to mix with the language for maximum accuracy. Defaults to eng.
+  # If no language is good, set to the base language.
+  MIX_LANG="eng"
 
   case ${lang} in
     # Latin languages.
@@ -959,11 +972,13 @@ set_lang_specific_parameters() {
           WORD_DAWG_SIZE=1000000
           test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
 
-    # Cyrillic script-based languages.
+    # Cyrillic script-based languages. It is bad to mix Latin with Cyrillic.
     rus ) test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" )
+          MIX_LANG="rus"
           NUMBER_DAWG_FACTOR=0.05
           WORD_DAWG_SIZE=1000000 ;;
     aze_cyrl | bel | bul | kaz | mkd | srp | tgk | ukr | uzb_cyrl )
+          MIX_LANG="${lang}"
           test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" ) ;;
 
     # Special code for performing Cyrillic language-id that is trained on
diff --git a/training/ligature_table.h b/training/ligature_table.h
index ecae7943dd..83e7dc3c4d 100644
--- a/training/ligature_table.h
+++ b/training/ligature_table.h
@@ -32,7 +32,7 @@ namespace tesseract {
 class PangoFontInfo;  // defined in pango_font_info.h
 
 // Map to substitute strings for ligatures.
-typedef hash_map<string, string, StringHash> LigHash;
+typedef TessHashMap<string, string, StringHash> LigHash;
 
 class LigatureTable {
  public:
diff --git a/training/mftraining.cpp b/training/mftraining.cpp
index 60314a1cdf..9e2e250927 100644
--- a/training/mftraining.cpp
+++ b/training/mftraining.cpp
@@ -64,9 +64,6 @@
 #include "tprintf.h"
 #include "unicity_table.h"
 
-using tesseract::Classify;
-using tesseract::FontInfo;
-using tesseract::FontSpacingInfo;
 using tesseract::IndexMapBiDi;
 using tesseract::MasterTrainer;
 using tesseract::Shape;
@@ -305,6 +302,9 @@ int main (int argc, char **argv) {
                                     *shape_table, float_classes,
                                     inttemp_file.string(),
                                     pffmtable_file.string());
+  for (int c = 0; c < unicharset->size(); ++c) {
+    FreeClassFields(&float_classes[c]);
+  }
   delete [] float_classes;
   FreeLabeledClassList(mf_classes);
   delete trainer;
diff --git a/training/normstrngs.cpp b/training/normstrngs.cpp
index acffeee13d..e7cac21f4b 100644
--- a/training/normstrngs.cpp
+++ b/training/normstrngs.cpp
@@ -113,12 +113,12 @@ bool is_double_quote(const char32 ch) {
   return false;
 }
 
-STRING NormalizeUTF8String(const char* str8) {
+STRING NormalizeUTF8String(bool decompose, const char* str8) {
   GenericVector<char32> str32, out_str32, norm_str;
   UTF8ToUTF32(str8, &str32);
   for (int i = 0; i < str32.length(); ++i) {
     norm_str.clear();
-    NormalizeChar32(str32[i], &norm_str);
+    NormalizeChar32(str32[i], decompose, &norm_str);
     for (int j = 0; j < norm_str.length(); ++j) {
       out_str32.push_back(norm_str[j]);
     }
@@ -128,10 +128,10 @@ STRING NormalizeUTF8String(const char* str8) {
   return out_str8;
 }
 
-void NormalizeChar32(char32 ch, GenericVector<char32>* str) {
+void NormalizeChar32(char32 ch, bool decompose, GenericVector<char32>* str) {
   IcuErrorCode error_code;
   const icu::Normalizer2* nfkc = icu::Normalizer2::getInstance(
-      NULL, "nfkc", UNORM2_COMPOSE, error_code);
+      NULL, "nfkc", decompose ? UNORM2_DECOMPOSE : UNORM2_COMPOSE, error_code);
   error_code.assertSuccess();
   error_code.reset();
 
diff --git a/training/normstrngs.h b/training/normstrngs.h
index 71e7b8da08..6fca3193ab 100644
--- a/training/normstrngs.h
+++ b/training/normstrngs.h
@@ -39,11 +39,16 @@ void UTF32ToUTF8(const GenericVector<char32>& str32, STRING* utf8_str);
 // assumption of this function is that the input is already as fully composed
 // as it can be, but may require some compatibility normalizations or just
 // OCR evaluation related normalizations.
-void NormalizeChar32(char32 ch, GenericVector<char32>* str);
+void NormalizeChar32(char32 ch, bool decompose, GenericVector<char32>* str);
 
 // Normalize a UTF8 string. Same as above, but for UTF8-encoded strings, that
 // can contain multiple UTF32 code points.
-STRING NormalizeUTF8String(const char* str8);
+STRING NormalizeUTF8String(bool decompose, const char* str8);
+// Default behavior is to compose, until it is proven that decomposed benefits
+// at least one language.
+inline STRING NormalizeUTF8String(const char* str8) {
+  return NormalizeUTF8String(false, str8);
+}
 
 // Apply just the OCR-specific normalizations and return the normalized char.
 char32 OCRNormalize(char32 ch);
diff --git a/training/pango_font_info.cpp b/training/pango_font_info.cpp
index c2b508c02d..41e352eae4 100644
--- a/training/pango_font_info.cpp
+++ b/training/pango_font_info.cpp
@@ -60,15 +60,6 @@
 
 STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp",
                   "Overrides fontconfig default temporary dir");
-BOOL_PARAM_FLAG(fontconfig_refresh_cache, false,
-                "Does a one-time deletion of cache files from the "
-                "fontconfig_tmpdir before initializing fontconfig.");
-BOOL_PARAM_FLAG(fontconfig_refresh_config_file, true,
-                "Does a one-time reset of the fontconfig config file to point"
-                " to fonts_dir before initializing fontconfig. Set to true"
-                " if fontconfig_refresh_cache is true. Set it to false to use"
-                " multiple instances in separate processes without having to"
-                " rescan the fonts_dir, using a previously setup font cache");
 
 #ifndef USE_STD_NAMESPACE
 #include "ocr/trainingdata/typesetting/legacy_fonts.h"
@@ -91,7 +82,8 @@ namespace tesseract {
 // in pixels.
 const int kDefaultResolution = 300;
 
-bool PangoFontInfo::fontconfig_initialized_ = false;
+string PangoFontInfo::fonts_dir_;
+string PangoFontInfo::cache_dir_;
 
 PangoFontInfo::PangoFontInfo() : desc_(NULL), resolution_(kDefaultResolution) {
   Clear();
@@ -119,6 +111,8 @@ void PangoFontInfo::Clear() {
   }
 }
 
+PangoFontInfo::~PangoFontInfo() { pango_font_description_free(desc_); }
+
 string PangoFontInfo::DescriptionName() const {
   if (!desc_) return "";
   char* desc_str = pango_font_description_to_string(desc_);
@@ -127,59 +121,63 @@ string PangoFontInfo::DescriptionName() const {
   return desc_name;
 }
 
-// Initializes Fontconfig for use by writing a fake fonts.conf file into the
-// FLAGS_fontconfigs_tmpdir directory, that points to the supplied
-// fonts_dir, and then overrides the FONTCONFIG_PATH environment variable
-// to point to this fonts.conf file. If force_clear, the cache is refreshed
-// even if it has already been initialized.
-void PangoFontInfo::InitFontConfig(bool force_clear, const string& fonts_dir) {
-  if ((fontconfig_initialized_ && !force_clear) || fonts_dir.empty()) {
-    fontconfig_initialized_ = true;
-    return;
-  }
-  if (FLAGS_fontconfig_refresh_cache || force_clear) {
-    File::DeleteMatchingFiles(File::JoinPath(
-        FLAGS_fontconfig_tmpdir.c_str(), "*cache-?").c_str());
-  }
-  if (FLAGS_fontconfig_refresh_config_file || FLAGS_fontconfig_refresh_cache ||
-      force_clear) {
-    const int MAX_FONTCONF_FILESIZE = 1024;
-    char fonts_conf_template[MAX_FONTCONF_FILESIZE];
-    snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
-             "<?xml version=\"1.0\"?>\n"
-             "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
-             "<fontconfig>\n"
-             "<dir>%s</dir>\n"
-             "<cachedir>%s</cachedir>\n"
-             "<config></config>\n"
-             "</fontconfig>", fonts_dir.c_str(),
-             FLAGS_fontconfig_tmpdir.c_str());
-    string fonts_conf_file = File::JoinPath(FLAGS_fontconfig_tmpdir.c_str(),
-                                            "fonts.conf");
-    File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
+// If not already initialized, initializes FontConfig by setting its
+// environment variable and creating a fonts.conf file that points to the
+// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
+/* static */
+void PangoFontInfo::SoftInitFontConfig() {
+  if (fonts_dir_.empty()) {
+    HardInitFontConfig(FLAGS_fonts_dir.c_str(),
+                       FLAGS_fontconfig_tmpdir.c_str());
   }
+}
+
+// Re-initializes font config, whether or not already initialized.
+// If already initialized, any existing cache is deleted, just to be sure.
+/* static */
+void PangoFontInfo::HardInitFontConfig(const string& fonts_dir,
+                                       const string& cache_dir) {
+  if (!cache_dir_.empty()) {
+    File::DeleteMatchingFiles(
+        File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
+  }
+  const int MAX_FONTCONF_FILESIZE = 1024;
+  char fonts_conf_template[MAX_FONTCONF_FILESIZE];
+  cache_dir_ = cache_dir;
+  fonts_dir_ = fonts_dir;
+  snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
+           "<?xml version=\"1.0\"?>\n"
+           "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
+           "<fontconfig>\n"
+           "<dir>%s</dir>\n"
+           "<cachedir>%s</cachedir>\n"
+           "<config></config>\n"
+           "</fontconfig>",
+           fonts_dir.c_str(), cache_dir_.c_str());
+  string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf");
+  File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
 #ifdef _WIN32
   std::string env("FONTCONFIG_PATH=");
-  env.append(FLAGS_fontconfig_tmpdir.c_str());
+  env.append(cache_dir_.c_str());
   putenv(env.c_str());
   putenv("LANG=en_US.utf8");
 #else
-  setenv("FONTCONFIG_PATH", FLAGS_fontconfig_tmpdir.c_str(), true);
+  setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true);
   // Fix the locale so that the reported font names are consistent.
   setenv("LANG", "en_US.utf8", true);
 #endif  // _WIN32
-  if (!fontconfig_initialized_ || force_clear) {
-    if (FcInitReinitialize() != FcTrue) {
-      tprintf("FcInitiReinitialize failed!!\n");
-    }
+
+  if (FcInitReinitialize() != FcTrue) {
+    tprintf("FcInitiReinitialize failed!!\n");
   }
-  fontconfig_initialized_ = true;
   FontUtils::ReInit();
+  // Clear Pango's font cache too.
+  pango_cairo_font_map_set_default(NULL);
 }
 
 static void ListFontFamilies(PangoFontFamily*** families,
                              int* n_families) {
-  PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str());
+  PangoFontInfo::SoftInitFontConfig();
   PangoFontMap* font_map = pango_cairo_font_map_get_default();
   DISABLE_HEAP_LEAK_CHECK;
   pango_font_map_list_families(font_map, families, n_families);
@@ -253,7 +251,7 @@ bool PangoFontInfo::ParseFontDescriptionName(const string& name) {
 // in the font map. Note that if the font is wholly missing, this could
 // correspond to a completely different font family and face.
 PangoFont* PangoFontInfo::ToPangoFont() const {
-  InitFontConfig(false, FLAGS_fonts_dir.c_str());
+  SoftInitFontConfig();
   PangoFontMap* font_map = pango_cairo_font_map_get_default();
   PangoContext* context = pango_context_new();
   pango_cairo_context_set_resolution(context, resolution_);
@@ -437,10 +435,15 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
     PangoGlyph dotted_circle_glyph;
     PangoFont* font = run->item->analysis.font;
 
-    PangoGlyphString * glyphs = pango_glyph_string_new();
+#ifdef _WIN32  // Fixme! Leaks memory and breaks unittests.
+    PangoGlyphString* glyphs = pango_glyph_string_new();
     char s[] = "\xc2\xa7";
     pango_shape(s, sizeof(s), &(run->item->analysis), glyphs);
     dotted_circle_glyph = glyphs->glyphs[0].glyph;
+#else
+    dotted_circle_glyph = pango_fc_font_get_glyph(
+        reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
+#endif
 
     if (TLOG_IS_ON(2)) {
       PangoFontDescription* desc = pango_font_describe(font);
@@ -519,22 +522,21 @@ vector<string> FontUtils::available_fonts_;  // cache list
 bool FontUtils::IsAvailableFont(const char* input_query_desc,
                                 string* best_match) {
   string query_desc(input_query_desc);
-  if (PANGO_VERSION <= 12005) {
-    // Strip commas and any ' Medium' substring in the name.
-    query_desc.erase(std::remove(query_desc.begin(), query_desc.end(), ','),
-                     query_desc.end());
-    const string kMediumStr = " Medium";
-    std::size_t found = query_desc.find(kMediumStr);
-    if (found != std::string::npos) {
-      query_desc.erase(found, kMediumStr.length());
-    }
+#if (PANGO_VERSION <= 12005)
+  // Strip commas and any ' Medium' substring in the name.
+  query_desc.erase(std::remove(query_desc.begin(), query_desc.end(), ','),
+                   query_desc.end());
+  const string kMediumStr = " Medium";
+  std::size_t found = query_desc.find(kMediumStr);
+  if (found != std::string::npos) {
+    query_desc.erase(found, kMediumStr.length());
   }
-
+#endif
   PangoFontDescription *desc = pango_font_description_from_string(
       query_desc.c_str());
   PangoFont* selected_font = NULL;
   {
-    PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str());
+    PangoFontInfo::SoftInitFontConfig();
     PangoFontMap* font_map = pango_cairo_font_map_get_default();
     PangoContext* context = pango_context_new();
     pango_context_set_font_map(context, font_map);
@@ -589,7 +591,7 @@ static bool ShouldIgnoreFontFamilyName(const char* query) {
 // Outputs description names of available fonts.
 /* static */
 const vector<string>& FontUtils::ListAvailableFonts() {
-  if (available_fonts_.size()) {
+  if (!available_fonts_.empty()) {
     return available_fonts_;
   }
 #ifndef USE_STD_NAMESPACE
@@ -686,9 +688,8 @@ void FontUtils::GetAllRenderableCharacters(const vector<string>& fonts,
 // Utilities written to be backward compatible with StringRender
 
 /* static */
-int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,
-                         const string& fontname,
-                         int* raw_score,
+int FontUtils::FontScore(const TessHashMap<char32, inT64>& ch_map,
+                         const string& fontname, int* raw_score,
                          vector<bool>* ch_flags) {
   PangoFontInfo font_info;
   if (!font_info.ParseFontDescriptionName(fontname)) {
@@ -703,7 +704,7 @@ int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,
   }
   *raw_score = 0;
   int ok_chars = 0;
-  for (unordered_map<char32, inT64>::const_iterator it = ch_map.begin();
+  for (TessHashMap<char32, inT64>::const_iterator it = ch_map.begin();
        it != ch_map.end(); ++it) {
     bool covered = (IsWhitespace(it->first) ||
                     (pango_coverage_get(coverage, it->first)
@@ -721,7 +722,7 @@ int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,
 
 
 /* static */
-string FontUtils::BestFonts(const unordered_map<char32, inT64>& ch_map,
+string FontUtils::BestFonts(const TessHashMap<char32, inT64>& ch_map,
                             vector<pair<const char*, vector<bool> > >* fonts) {
   const double kMinOKFraction = 0.99;
   // Weighted fraction of characters that must be renderable in a font to make
diff --git a/training/pango_font_info.h b/training/pango_font_info.h
index 421139a163..5c04bf0a9c 100644
--- a/training/pango_font_info.h
+++ b/training/pango_font_info.h
@@ -24,10 +24,16 @@
 #include <utility>
 #include <vector>
 
+#include "commandlineflags.h"
 #include "hashfn.h"
 #include "host.h"
-#include "util.h"
 #include "pango/pango-font.h"
+#include "pango/pango.h"
+#include "pango/pangocairo.h"
+#include "util.h"
+
+DECLARE_STRING_PARAM_FLAG(fonts_dir);
+DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir);
 
 typedef signed int char32;
 
@@ -44,6 +50,7 @@ class PangoFontInfo {
     DECORATIVE,
   };
   PangoFontInfo();
+  ~PangoFontInfo();
   // Initialize from parsing a font description name, defined as a string of the
   // format:
   //   "FamilyName [FaceName] [PointSize]"
@@ -73,7 +80,7 @@ class PangoFontInfo {
   // If true, returns individual graphemes. Any whitespace characters in the
   // original string are also included in the list.
   bool CanRenderString(const char* utf8_word, int len,
-                       vector<string>* graphemes) const;
+                       std::vector<string>* graphemes) const;
   bool CanRenderString(const char* utf8_word, int len) const;
 
   // Retrieves the x_bearing and x_advance for the given utf8 character in the
@@ -83,25 +90,29 @@ class PangoFontInfo {
   bool GetSpacingProperties(const string& utf8_char,
                             int* x_bearing, int* x_advance) const;
 
-  // Initializes FontConfig by setting its environment variable and creating
-  // a fonts.conf file that points to the given fonts_dir. Once initialized,
-  // it is not re-initialized unless force_clear is true.
-  static void InitFontConfig(bool force_clear, const string& fonts_dir);
+  // If not already initialized, initializes FontConfig by setting its
+  // environment variable and creating a fonts.conf file that points to the
+  // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
+  static void SoftInitFontConfig();
+  // Re-initializes font config, whether or not already initialized.
+  // If already initialized, any existing cache is deleted, just to be sure.
+  static void HardInitFontConfig(const string& fonts_dir,
+                                 const string& cache_dir);
 
   // Accessors
   string DescriptionName() const;
   // Font Family name eg. "Arial"
   const string& family_name() const    { return family_name_; }
   // Size in points (1/72"), rounded to the nearest integer.
-  int font_size() const          { return font_size_; }
-  bool is_bold() const           { return is_bold_; }
-  bool is_italic() const         { return is_italic_; }
-  bool is_smallcaps() const      { return is_smallcaps_; }
-  bool is_monospace() const      { return is_monospace_; }
-  bool is_fraktur() const        { return is_fraktur_; }
+  int font_size() const { return font_size_; }
+  bool is_bold() const { return is_bold_; }
+  bool is_italic() const { return is_italic_; }
+  bool is_smallcaps() const { return is_smallcaps_; }
+  bool is_monospace() const { return is_monospace_; }
+  bool is_fraktur() const { return is_fraktur_; }
   FontTypeEnum font_type() const { return font_type_; }
 
-  int resolution() const         { return resolution_; }
+  int resolution() const { return resolution_; }
   void set_resolution(const int resolution) {
     resolution_ = resolution;
   }
@@ -130,8 +141,14 @@ class PangoFontInfo {
   int resolution_;
   // Fontconfig operates through an environment variable, so it intrinsically
   // cannot be thread-friendly, but you can serialize multiple independent
-  // font configurations by calling InitFontConfig(true, path).
-  static bool fontconfig_initialized_;
+  // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
+  // These hold the last initialized values set by HardInitFontConfig or
+  // the first call to SoftInitFontConfig.
+  // Directory to be scanned for font files.
+  static string fonts_dir_;
+  // Directory to store the cache of font information. (Can be the same as
+  // fonts_dir_)
+  static string cache_dir_;
 
  private:
   PangoFontInfo(const PangoFontInfo&);
@@ -152,29 +169,29 @@ class FontUtils {
   // best_match is not NULL, the closest matching font is returned there.
   static bool IsAvailableFont(const char* font_desc, string* best_match);
   // Outputs description names of available fonts.
-  static const vector<string>& ListAvailableFonts();
+  static const std::vector<string>& ListAvailableFonts();
 
   // Picks font among available fonts that covers and can render the given word,
   // and returns the font description name and the decomposition of the word to
   // graphemes. Returns false if no suitable font was found.
   static bool SelectFont(const char* utf8_word, const int utf8_len,
-                         string* font_name, vector<string>* graphemes);
+                         string* font_name, std::vector<string>* graphemes);
 
   // Picks font among all_fonts that covers and can render the given word,
   // and returns the font description name and the decomposition of the word to
   // graphemes. Returns false if no suitable font was found.
   static bool SelectFont(const char* utf8_word, const int utf8_len,
-                         const vector<string>& all_fonts,
-                         string* font_name, vector<string>* graphemes);
+                         const std::vector<string>& all_fonts,
+                         string* font_name, std::vector<string>* graphemes);
 
   // Returns a bitmask where the value of true at index 'n' implies that unicode
   // value 'n' is renderable by at least one available font.
-  static void GetAllRenderableCharacters(vector<bool>* unichar_bitmap);
+  static void GetAllRenderableCharacters(std::vector<bool>* unichar_bitmap);
   // Variant of the above function that inspects only the provided font names.
-  static void GetAllRenderableCharacters(const vector<string>& font_names,
-                                         vector<bool>* unichar_bitmap);
+  static void GetAllRenderableCharacters(const std::vector<string>& font_names,
+                                         std::vector<bool>* unichar_bitmap);
   static void GetAllRenderableCharacters(const string& font_name,
-                                         vector<bool>* unichar_bitmap);
+                                         std::vector<bool>* unichar_bitmap);
 
   // NOTE: The following utilities were written to be backward compatible with
   // StringRender.
@@ -185,23 +202,24 @@ class FontUtils {
   // In the flags vector, each flag is set according to whether the
   // corresponding character (in order of iterating ch_map) can be rendered.
   // The return string is a list of the acceptable fonts that were used.
-  static string BestFonts(const unordered_map<char32, inT64>& ch_map,
-      vector<std::pair<const char*, vector<bool> > >* font_flag);
+  static string BestFonts(
+      const TessHashMap<char32, inT64>& ch_map,
+      std::vector<std::pair<const char*, std::vector<bool> > >* font_flag);
 
   // FontScore returns the weighted renderability score of the given
   // hash map character table in the given font. The unweighted score
   // is also returned in raw_score.
   // The values in the bool vector ch_flags correspond to whether the
   // corresponding character (in order of iterating ch_map) can be rendered.
-  static int FontScore(const unordered_map<char32, inT64>& ch_map,
+  static int FontScore(const TessHashMap<char32, inT64>& ch_map,
                        const string& fontname, int* raw_score,
-                       vector<bool>* ch_flags);
+                       std::vector<bool>* ch_flags);
 
   // PangoFontInfo is reinitialized, so clear the static list of fonts.
   static void ReInit();
 
  private:
-  static vector<string> available_fonts_;  // cache list
+  static std::vector<string> available_fonts_;  // cache list
 };
 }  // namespace tesseract
 
diff --git a/training/set_unicharset_properties.cpp b/training/set_unicharset_properties.cpp
index 00844ecb56..691c6dcfa6 100644
--- a/training/set_unicharset_properties.cpp
+++ b/training/set_unicharset_properties.cpp
@@ -1,3 +1,13 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 // This program reads a unicharset file, puts the result in a UNICHARSET
 // object, fills it with properties about the unichars it contains and writes
 // the result back to a file.
diff --git a/training/stringrenderer.cpp b/training/stringrenderer.cpp
index fbff55fb11..07a0e2003d 100644
--- a/training/stringrenderer.cpp
+++ b/training/stringrenderer.cpp
@@ -52,7 +52,7 @@ static const int kDefaultOutputResolution = 300;
 // Word joiner (U+2060) inserted after letters in ngram mode, as per
 // recommendation in http://unicode.org/reports/tr14/ to avoid line-breaks at
 // hyphens and other non-alpha characters.
-static const char* kWordJoinerUTF8 = "\xE2\x81\xA0"; //u8"\u2060";
+static const char* kWordJoinerUTF8 = "\xE2\x81\xA0";  // u8"\u2060";
 static const char32 kWordJoiner = 0x2060;
 
 static bool IsCombiner(int ch) {
@@ -108,6 +108,7 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width,
       underline_start_prob_(0),
       underline_continuation_prob_(0),
       underline_style_(PANGO_UNDERLINE_SINGLE),
+      features_(NULL),
       drop_uncovered_chars_(true),
       strip_unrenderable_words_(false),
       add_ligatures_(false),
@@ -120,7 +121,6 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width,
       box_padding_(0),
       total_chars_(0),
       font_index_(0),
-      features_(NULL),
       last_offset_(0) {
   pen_color_[0] = 0.0;
   pen_color_[1] = 0.0;
@@ -209,8 +209,7 @@ void StringRenderer::SetLayoutProperties() {
 #if (PANGO_VERSION_MAJOR == 1 && PANGO_VERSION_MINOR >= 38)
   if (add_ligatures_) {
     set_features("liga, clig, dlig, hlig");
-    PangoAttribute* feature_attr =
-      pango_attr_font_features_new(features_);
+    PangoAttribute* feature_attr = pango_attr_font_features_new(features_);
     pango_attr_list_change(attr_list, feature_attr);
   }
 #endif
@@ -245,7 +244,7 @@ void StringRenderer::SetWordUnderlineAttributes(const string& page_text) {
   int offset = 0;
   TRand rand;
   bool started_underline = false;
-  PangoAttribute* und_attr = nullptr;
+  PangoAttribute* und_attr = NULL;
 
   while (offset < page_text.length()) {
     offset += SpanUTF8Whitespace(text + offset);
@@ -264,7 +263,7 @@ void StringRenderer::SetWordUnderlineAttributes(const string& page_text) {
         // previous word.
         pango_attr_list_insert(attr_list, und_attr);
         started_underline = false;
-        und_attr = nullptr;
+        und_attr = NULL;
       }
     }
     if (!started_underline && RandBool(underline_start_prob_, &rand)) {
@@ -348,6 +347,11 @@ void StringRenderer::ClearBoxes() {
   boxaDestroy(&page_boxes_);
 }
 
+string StringRenderer::GetBoxesStr() {
+  BoxChar::PrepareToWrite(&boxchars_);
+  return BoxChar::GetTesseractBoxStr(page_height_, boxchars_);
+}
+
 void StringRenderer::WriteAllBoxes(const string& filename) {
   BoxChar::PrepareToWrite(&boxchars_);
   BoxChar::WriteTesseractBoxFile(filename, page_height_, boxchars_);
@@ -396,7 +400,7 @@ bool StringRenderer::GetClusterStrings(vector<string>* cluster_text) {
        it != start_byte_to_text.end(); ++it) {
     cluster_text->push_back(it->second);
   }
-  return cluster_text->size();
+  return !cluster_text->empty();
 }
 
 // Merges an array of BoxChars into words based on the identification of
@@ -496,7 +500,7 @@ void StringRenderer::ComputeClusterBoxes() {
     const int end_byte_index = cluster_start_to_end_index[start_byte_index];
     string cluster_text = string(text + start_byte_index,
                                  end_byte_index - start_byte_index);
-    if (cluster_text.size() && cluster_text[0] == '\n') {
+    if (!cluster_text.empty() && cluster_text[0] == '\n') {
       tlog(2, "Skipping newlines at start of text.\n");
       continue;
     }
@@ -596,11 +600,12 @@ void StringRenderer::ComputeClusterBoxes() {
       all_boxes = boxaCreate(0);
     boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE);
   }
-  boxaGetExtent(all_boxes, NULL, NULL, &page_box);
-  boxaDestroy(&all_boxes);
-  if (page_boxes_ == NULL)
-    page_boxes_ = boxaCreate(0);
-  boxaAddBox(page_boxes_, page_box, L_INSERT);
+  if (all_boxes != NULL) {
+    boxaGetExtent(all_boxes, NULL, NULL, &page_box);
+    boxaDestroy(&all_boxes);
+    if (page_boxes_ == NULL) page_boxes_ = boxaCreate(0);
+    boxaAddBox(page_boxes_, page_box, L_INSERT);
+  }
 }
 
 
diff --git a/training/stringrenderer.h b/training/stringrenderer.h
index 942b7fddce..b4646f71b1 100644
--- a/training/stringrenderer.h
+++ b/training/stringrenderer.h
@@ -90,7 +90,7 @@ class StringRenderer {
   void set_underline_style(const PangoUnderline style) {
     underline_style_ = style;
   }
-  void set_features(const char *features) {
+  void set_features(const char* features) {
     free(features_);
     features_ = strdup(features);
   }
@@ -130,16 +130,12 @@ class StringRenderer {
   const PangoFontInfo& font() const {
     return font_;
   }
-  int h_margin() const {
-    return h_margin_;
-  }
-  int v_margin() const {
-    return v_margin_;
-  }
+  int h_margin() const { return h_margin_; }
+  int v_margin() const { return v_margin_; }
 
   // Get the boxchars of all clusters rendered thus far (or since the last call
   // to ClearBoxes()).
-  const vector<BoxChar*>& GetBoxes() const;
+  const std::vector<BoxChar*>& GetBoxes() const;
   // Get the rendered page bounding boxes of all pages created thus far (or
   // since last call to ClearBoxes()).
   Boxa* GetPageBoxes() const;
@@ -148,6 +144,9 @@ class StringRenderer {
   void RotatePageBoxes(float rotation);
   // Delete all boxes.
   void ClearBoxes();
+  // Returns the boxes in a boxfile string.
+  string GetBoxesStr();
+  // Writes the boxes to a boxfile.
   void WriteAllBoxes(const string& filename);
   // Removes space-delimited words from the string that are not renderable by
   // the current font and returns the count of such words.
@@ -172,8 +171,8 @@ class StringRenderer {
   void SetWordUnderlineAttributes(const string& page_text);
   // Compute bounding boxes around grapheme clusters.
   void ComputeClusterBoxes();
-  void CorrectBoxPositionsToLayout(vector<BoxChar*>* boxchars);
-  bool GetClusterStrings(vector<string>* cluster_text);
+  void CorrectBoxPositionsToLayout(std::vector<BoxChar*>* boxchars);
+  bool GetClusterStrings(std::vector<string>* cluster_text);
   int FindFirstPageBreakOffset(const char* text, int text_length);
 
   PangoFontInfo font_;
@@ -189,7 +188,7 @@ class StringRenderer {
   double underline_start_prob_;
   double underline_continuation_prob_;
   PangoUnderline underline_style_;
-  char *features_;
+  char* features_;
   // Text filtering options
   bool drop_uncovered_chars_;
   bool strip_unrenderable_words_;
@@ -205,13 +204,13 @@ class StringRenderer {
   int page_;
   // Boxes and associated text for all pages rendered with RenderToImage() since
   // the last call to ClearBoxes().
-  vector<BoxChar*> boxchars_;
+  std::vector<BoxChar*> boxchars_;
   int box_padding_;
   // Bounding boxes for pages since the last call to ClearBoxes().
   Boxa* page_boxes_;
 
   // Objects cached for subsequent calls to RenderAllFontsToImage()
-  hash_map<char32, inT64> char_map_;  // Time-saving char histogram.
+  TessHashMap<char32, inT64> char_map_;  // Time-saving char histogram.
   int total_chars_;   // Number in the string to be rendered.
   int font_index_;    // Index of next font to use in font list.
   int last_offset_;   // Offset returned from last successful rendering
diff --git a/training/tesstrain_utils.sh b/training/tesstrain_utils.sh
index c45d00378d..906a20ac4f 100755
--- a/training/tesstrain_utils.sh
+++ b/training/tesstrain_utils.sh
@@ -90,8 +90,8 @@ parse_flags() {
             --)
                 break;;
             --fontlist)
-		fn=0
-		FONTS=""
+    fn=0
+    FONTS=""
                 while test $j -lt ${#ARGV[@]}; do
                     test -z "${ARGV[$j]}" && break
                     test `echo ${ARGV[$j]} | cut -c -2` = "--" && break
@@ -199,7 +199,7 @@ generate_font_image() {
 
     local common_args="--fontconfig_tmpdir=${FONT_CONFIG_CACHE}"
     common_args+=" --fonts_dir=${FONTS_DIR} --strip_unrenderable_words"
-    common_args+=" --fontconfig_refresh_config_file=false --leading=${LEADING}"
+    common_args+=" --leading=${LEADING}"
     common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}"
     common_args+=" --outputbase=${outbase}"
 
diff --git a/training/text2image.cpp b/training/text2image.cpp
index 406669dc10..946f6facda 100644
--- a/training/text2image.cpp
+++ b/training/text2image.cpp
@@ -251,6 +251,8 @@ void ExtractFontProperties(const string &utf8_text,
       // the input consists of the separated characters.  NOTE(ranjith): As per
       // behdad@ this is not currently controllable at the level of the Pango
       // API.
+      // The most frequent of all is a single character "word" made by the CJK
+      // segmenter.
       // Safeguard against these cases here by just skipping the bigram.
       if (IsWhitespaceBox(boxes[b+1])) {
         continue;
@@ -445,7 +447,7 @@ int main(int argc, char** argv) {
     string pango_name;
     if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) {
       tprintf("Could not find font named %s.\n", FLAGS_font.c_str());
-      if (!pango_name.empty()) { 
+      if (!pango_name.empty()) {
         tprintf("Pango suggested font %s.\n", pango_name.c_str());
       }
       tprintf("Please correct --font arg.\n");
@@ -523,7 +525,7 @@ int main(int argc, char** argv) {
     if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
         !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
       tprintf("Failed to load unicharset from file %s\n",
-                 FLAGS_unicharset_file.c_str());
+              FLAGS_unicharset_file.c_str());
       exit(1);
     }
 
@@ -604,7 +606,8 @@ int main(int argc, char** argv) {
           rotation = -1 * page_rotation[page_num];
         }
         if (FLAGS_degrade_image) {
-          pix = DegradeImage(pix, FLAGS_exposure, &randomizer, FLAGS_rotate_image ? &rotation : NULL);
+          pix = DegradeImage(pix, FLAGS_exposure, &randomizer,
+                             FLAGS_rotate_image ? &rotation : NULL);
         }
         render.RotatePageBoxes(rotation);
 
diff --git a/training/unicharset_training_utils.cpp b/training/unicharset_training_utils.cpp
index 10aaf0e6c3..efa3a22cd5 100644
--- a/training/unicharset_training_utils.cpp
+++ b/training/unicharset_training_utils.cpp
@@ -37,7 +37,8 @@ namespace tesseract {
 
 // Helper sets the character attribute properties and sets up the script table.
 // Does not set tops and bottoms.
-void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) {
+void SetupBasicProperties(bool report_errors, bool decompose,
+                          UNICHARSET* unicharset) {
   for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
     // Convert any custom ligatures.
     const char* unichar_str = unicharset->id_to_unichar(unichar_id);
@@ -129,7 +130,7 @@ void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) {
     }
 
     // Record normalized version of this unichar.
-    STRING normed_str = tesseract::NormalizeUTF8String(unichar_str);
+    STRING normed_str = tesseract::NormalizeUTF8String(decompose, unichar_str);
     if (unichar_id != 0 && normed_str.length() > 0) {
       unicharset->set_normed(unichar_id, normed_str.c_str());
     } else {
@@ -158,7 +159,7 @@ void SetPropertiesForInputFile(const string& script_dir,
 
   // Set unichar properties
   tprintf("Setting unichar properties\n");
-  SetupBasicProperties(true, &unicharset);
+  SetupBasicProperties(true, false, &unicharset);
   string xheights_str;
   for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
     // Load the unicharset for the script if available.
diff --git a/training/unicharset_training_utils.h b/training/unicharset_training_utils.h
index ff2262875d..f03e12ace4 100644
--- a/training/unicharset_training_utils.h
+++ b/training/unicharset_training_utils.h
@@ -33,7 +33,13 @@ namespace tesseract {
 
 // Helper sets the character attribute properties and sets up the script table.
 // Does not set tops and bottoms.
-void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset);
+void SetupBasicProperties(bool report_errors, bool decompose,
+                          UNICHARSET* unicharset);
+// Default behavior is to compose, until it is proven that decomposed benefits
+// at least one language.
+inline void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) {
+  SetupBasicProperties(report_errors, false, unicharset);
+}
 
 // Helper to set the properties for an input unicharset file, writes to the
 // output file. If an appropriate script unicharset can be found in the
diff --git a/viewer/scrollview.cpp b/viewer/scrollview.cpp
index ac059d5469..d4eb6d46a5 100644
--- a/viewer/scrollview.cpp
+++ b/viewer/scrollview.cpp
@@ -37,9 +37,9 @@
 #include "scrollview.h"
 
 #ifdef _MSC_VER
-#pragma warning(disable:4786)  // Don't give stupid warnings for stl
-#pragma warning(disable:4018)  // signed/unsigned warnings
-#pragma warning(disable:4530)  // exception warnings
+#pragma warning(disable : 4786)  // Don't give irrelevant warnings for stl
+#pragma warning(disable : 4018)  // signed/unsigned warnings
+#pragma warning(disable : 4530)  // exception warnings
 #endif
 
 const int kSvPort = 8461;
diff --git a/viewer/scrollview.h b/viewer/scrollview.h
index 12fac4e5db..cf1e182efb 100644
--- a/viewer/scrollview.h
+++ b/viewer/scrollview.h
@@ -89,7 +89,7 @@ class SVEventHandler {
 
 // Gets called by the SV Window. Does nothing on default, overwrite this
 // to implement the desired behaviour
-    virtual void Notify(const SVEvent* sve) { }
+    virtual void Notify(const SVEvent* sve) { (void)sve; }
 };
 
 // The ScrollView class provides the expernal API to the scrollviewer process.
@@ -327,7 +327,7 @@ class ScrollView {
 // be unique among menubar eventIDs.
   void MenuItem(const char* parent, const char* name, int cmdEvent);
 
-// This adds a new checkbox entry, which might initially be flagged.
+  // This adds a new checkbox entry, which might initially be flagged.
   void MenuItem(const char* parent, const char* name,
                 int cmdEvent, bool flagged);
 
diff --git a/viewer/svpaint.cpp b/viewer/svpaint.cpp
index 4d2f49d951..c267257bac 100644
--- a/viewer/svpaint.cpp
+++ b/viewer/svpaint.cpp
@@ -1,6 +1,15 @@
 // Copyright 2007 Google Inc. All Rights Reserved.
 //
 // Author: Joern Wanke
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 //
 // Simple drawing program to illustrate ScrollView capabilities.
 //
diff --git a/viewer/svutil.cpp b/viewer/svutil.cpp
index 80e11019e5..34a22861eb 100644
--- a/viewer/svutil.cpp
+++ b/viewer/svutil.cpp
@@ -22,6 +22,7 @@
 
 #include <stdio.h>
 #ifdef _WIN32
+#include <windows.h>
 struct addrinfo {
   struct sockaddr* ai_addr;
   int ai_addrlen;
@@ -31,13 +32,13 @@ struct addrinfo {
 };
 #else
 #include <arpa/inet.h>
+#include <netdb.h>
 #include <netinet/in.h>
 #include <pthread.h>
 #include <semaphore.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <string.h>
-#include <netdb.h>
 #include <sys/select.h>
 #include <sys/socket.h>
 #ifdef __linux__
@@ -56,10 +57,34 @@ struct addrinfo {
 #include "config_auto.h"
 #endif
 
-#ifndef GRAPHICS_DISABLED
-
 #include "svutil.h"
 
+SVMutex::SVMutex() {
+#ifdef _WIN32
+  mutex_ = CreateMutex(0, FALSE, 0);
+#else
+  pthread_mutex_init(&mutex_, NULL);
+#endif
+}
+
+void SVMutex::Lock() {
+#ifdef _WIN32
+  WaitForSingleObject(mutex_, INFINITE);
+#else
+  pthread_mutex_lock(&mutex_);
+#endif
+}
+
+void SVMutex::Unlock() {
+#ifdef _WIN32
+  ReleaseMutex(mutex_);
+#else
+  pthread_mutex_unlock(&mutex_);
+#endif
+}
+
+#ifndef GRAPHICS_DISABLED
+
 const int kMaxMsgSize = 4096;
 
 // Signals a thread to exit.
@@ -161,29 +186,6 @@ void SVSemaphore::Wait() {
 #endif
 }
 
-SVMutex::SVMutex() {
-#ifdef _WIN32
-  mutex_ = CreateMutex(0, FALSE, 0);
-#else
-  pthread_mutex_init(&mutex_, NULL);
-#endif
-}
-
-void SVMutex::Lock() {
-#ifdef _WIN32
-  WaitForSingleObject(mutex_, INFINITE);
-#else
-  pthread_mutex_lock(&mutex_);
-#endif
-}
-
-void SVMutex::Unlock() {
-#ifdef _WIN32
-  ReleaseMutex(mutex_);
-#else
-  pthread_mutex_unlock(&mutex_);
-#endif
-}
 
 // Create new thread.
 
@@ -200,7 +202,10 @@ void SVSync::StartThread(void *(*func)(void*), void* arg) {
   &threadid);    // returns the thread identifier
 #else
   pthread_t helper;
-  pthread_create(&helper, NULL, func, arg);
+  pthread_attr_t attr;
+  pthread_attr_init(&attr);
+  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+  pthread_create(&helper, &attr, func, arg);
 #endif
 }
 
@@ -214,7 +219,7 @@ void SVNetwork::Send(const char* msg) {
 // Send the whole buffer.
 void SVNetwork::Flush() {
   mutex_send_->Lock();
-  while (msg_buffer_out_.size() > 0) {
+  while (!msg_buffer_out_.empty()) {
     int i = send(stream_, msg_buffer_out_.c_str(), msg_buffer_out_.length(), 0);
     msg_buffer_out_.erase(0, i);
   }
@@ -302,7 +307,8 @@ static std::string ScrollViewCommand(std::string scrollview_path) {
   const char* cmd_template = "-Djava.library.path=%s -jar %s/ScrollView.jar";
 
 #else
-  const char* cmd_template = "-c \"trap 'kill %%1' 0 1 2 ; java "
+  const char* cmd_template =
+      "-c \"trap 'kill %%1' 0 1 2 ; java "
       "-Xms1024m -Xmx2048m -jar %s/ScrollView.jar"
       " & wait\"";
 #endif
diff --git a/viewer/svutil.h b/viewer/svutil.h
index ccfce917fe..667c052083 100644
--- a/viewer/svutil.h
+++ b/viewer/svutil.h
@@ -26,6 +26,7 @@
 
 #ifdef _WIN32
 #ifndef __GNUC__
+#include "platform.h"
 #include <windows.h>
 #if defined(_MSC_VER) && _MSC_VER < 1900
 #define snprintf _snprintf
@@ -102,6 +103,17 @@ class SVMutex {
 #endif
 };
 
+// Auto-unlocking object that locks a mutex on construction and unlocks it
+// on destruction.
+class SVAutoLock {
+ public:
+  explicit SVAutoLock(SVMutex* mutex) : mutex_(mutex) { mutex->Lock(); }
+  ~SVAutoLock() { mutex_->Unlock(); }
+
+ private:
+  SVMutex* mutex_;
+};
+
 /// The SVNetwork class takes care of the remote connection for ScrollView
 /// This means setting up and maintaining a remote connection, sending and
 /// receiving messages and closing the connection.
diff --git a/vs2010/include/tesseract_versionnumbers.props b/vs2010/include/tesseract_versionnumbers.props
index e16989d0e7..77bb3e03ab 100644
--- a/vs2010/include/tesseract_versionnumbers.props
+++ b/vs2010/include/tesseract_versionnumbers.props
@@ -4,9 +4,9 @@
     <Import Project="leptonica_versionnumbers.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros">
-    <LIBTESS_VERSION>303</LIBTESS_VERSION>
-    <LIBTESS_VERSION_R>3,3,0,0</LIBTESS_VERSION_R>
-    <LIBTESS_NUMBER>3.03</LIBTESS_NUMBER>
+    <LIBTESS_VERSION>305</LIBTESS_VERSION>
+    <LIBTESS_VERSION_R>3,5,0,0</LIBTESS_VERSION_R>
+    <LIBTESS_NUMBER>3.05</LIBTESS_NUMBER>
   </PropertyGroup>
   <PropertyGroup>
     <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
diff --git a/vs2010/libtesseract/libtesseract.rc b/vs2010/libtesseract/libtesseract.rc
index f72d17f5a2..a4c5fc4568 100644
--- a/vs2010/libtesseract/libtesseract.rc
+++ b/vs2010/libtesseract/libtesseract.rc
@@ -72,7 +72,7 @@ BEGIN
             VALUE "FileDescription", "Tesseract OCR library"
             VALUE "FileVersion", "3, 5, 0, 0"
             VALUE "InternalName", "libtesseract"
-            VALUE "LegalCopyright", "Copyright (C) 2015 Google, Inc. Licensed under the Apache License, Version 2.0"
+            VALUE "LegalCopyright", "Copyright (C) 2016 Google, Inc. Licensed under the Apache License, Version 2.0"
             VALUE "OriginalFilename", "libtesseract"
             VALUE "ProductName", "Tesseract OCR Library"
             VALUE "ProductVersion", "3, 5, 0, 0"
diff --git a/vs2010/libtesseract/libtesseract.vcxproj b/vs2010/libtesseract/libtesseract.vcxproj
index 9b734d5d52..10762fe92e 100644
--- a/vs2010/libtesseract/libtesseract.vcxproj
+++ b/vs2010/libtesseract/libtesseract.vcxproj
@@ -27,7 +27,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
-    <ProjectName>libtesseract304</ProjectName>
+    <ProjectName>libtesseract305</ProjectName>
     <ProjectGuid>{D14F28C7-0CAE-4C37-B174-40FDEFBD4FE0}</ProjectGuid>
     <RootNamespace>libtesseract</RootNamespace>
   </PropertyGroup>
@@ -448,7 +448,6 @@ copy "$(TargetDir)$(TargetName).lib" ..\..\..\lib
     <ClCompile Include="..\..\wordrec\language_model.cpp" />
     <ClCompile Include="..\..\textord\linefind.cpp" />
     <ClCompile Include="..\..\ccstruct\linlsq.cpp" />
-    <ClCompile Include="..\..\cutil\listio.cpp" />
     <ClCompile Include="..\..\ccmain\ltrresultiterator.cpp" />
     <ClCompile Include="..\..\ccutil\mainblk.cpp" />
     <ClCompile Include="..\..\textord\makerow.cpp" />
@@ -716,7 +715,6 @@ copy "$(TargetDir)$(TargetName).lib" ..\..\..\lib
     <ClInclude Include="..\..\wordrec\language_model.h" />
     <ClInclude Include="..\..\textord\linefind.h" />
     <ClInclude Include="..\..\ccstruct\linlsq.h" />
-    <ClInclude Include="..\..\cutil\listio.h" />
     <ClInclude Include="..\..\ccutil\lsterr.h" />
     <ClInclude Include="..\..\ccmain\ltrresultiterator.h" />
     <ClInclude Include="..\..\textord\makerow.h" />
@@ -867,4 +865,4 @@ copy "$(TargetDir)$(TargetName).lib" ..\..\..\lib
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/vs2010/libtesseract/libtesseract.vcxproj.filters b/vs2010/libtesseract/libtesseract.vcxproj.filters
index 911f3f7b9f..aa18a673e2 100644
--- a/vs2010/libtesseract/libtesseract.vcxproj.filters
+++ b/vs2010/libtesseract/libtesseract.vcxproj.filters
@@ -361,9 +361,6 @@
     <ClCompile Include="..\..\textord\linefind.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\cutil\listio.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\wordrec\lm_consistency.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -1170,9 +1167,6 @@
     <ClInclude Include="..\..\ccstruct\linlsq.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\cutil\listio.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
     <ClInclude Include="..\..\wordrec\lm_consistency.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -1610,4 +1604,4 @@
       <Filter>Resource Files</Filter>
     </ResourceCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/vs2010/port/vcsversion.h b/vs2010/port/vcsversion.h
index 6d5bed8074..d620733040 100644
--- a/vs2010/port/vcsversion.h
+++ b/vs2010/port/vcsversion.h
@@ -1,2 +1,2 @@
-#define GIT_REV "3.04.00"
+#define GIT_REV "3.05.00dev"
 
diff --git a/vs2010/tesseract/tesseract.rc b/vs2010/tesseract/tesseract.rc
index f2fc5248dc..48ae7fc88b 100644
--- a/vs2010/tesseract/tesseract.rc
+++ b/vs2010/tesseract/tesseract.rc
@@ -72,7 +72,7 @@ BEGIN
             VALUE "FileDescription", "Tesseract command-line OCR engine"
             VALUE "FileVersion", "3,5,0,0"
             VALUE "InternalName", "tesseract"
-            VALUE "LegalCopyright", "Copyright (C) 2015 Google, Inc. Licensed under the Apache License, Version 2.0"
+            VALUE "LegalCopyright", "Copyright (C) 2016 Google, Inc. Licensed under the Apache License, Version 2.0"
             VALUE "OriginalFilename", "tesseract.exe"
             VALUE "ProductName", "Tesseract-OCR"
             VALUE "ProductVersion", "3.05.00dev"
diff --git a/wordrec/associate.h b/wordrec/associate.h
index 3d6fc44708..10b1e0b7d9 100644
--- a/wordrec/associate.h
+++ b/wordrec/associate.h
@@ -47,9 +47,7 @@ struct AssociateStats {
     gap_sum = 0;
   }
 
-  void Print() {
-    tprintf("AssociateStats: w(%g %d)\n", shape_cost, bad_shape);
-  }
+  void Print() { tprintf("AssociateStats: s(%g %d)\n", shape_cost, bad_shape); }
 
   float shape_cost;           // cost of blob shape
   bool bad_shape;             // true if the shape of the blob is unacceptable
diff --git a/wordrec/chopper.cpp b/wordrec/chopper.cpp
index 69a458bc2c..dfda3e9183 100644
--- a/wordrec/chopper.cpp
+++ b/wordrec/chopper.cpp
@@ -426,7 +426,7 @@ void Wordrec::chop_word_main(WERD_RES *word) {
 
   if (word->best_choice == NULL) {
     // SegSearch found no valid paths, so just use the leading diagonal.
-    word->FakeWordFromRatings();
+    word->FakeWordFromRatings(TOP_CHOICE_PERM);
   }
   word->RebuildBestState();
   // If we finished without a hyphen at the end of the word, let the next word
@@ -568,9 +568,7 @@ int Wordrec::select_blob_to_split(
 
   for (x = 0; x < blob_choices.size(); ++x) {
     if (blob_choices[x] == NULL) {
-      if (fragments != NULL) {
-        delete[] fragments;
-      }
+      delete[] fragments;
       return x;
     } else {
       blob_choice = blob_choices[x];
@@ -614,9 +612,7 @@ int Wordrec::select_blob_to_split(
       }
     }
   }
-  if (fragments != NULL) {
-    delete[] fragments;
-  }
+  delete[] fragments;
   // TODO(daria): maybe a threshold of badness for
   // worst_near_fragment would be useful.
   return worst_index_near_fragment != -1 ?
diff --git a/wordrec/language_model.cpp b/wordrec/language_model.cpp
index f0e3be66f2..99710478ed 100644
--- a/wordrec/language_model.cpp
+++ b/wordrec/language_model.cpp
@@ -32,7 +32,7 @@
 #include "params.h"
 #include "params_training_featdef.h"
 
-#if defined(_MSC_VER) || defined(ANDROID)
+#if (defined(_MSC_VER) && _MSC_VER < 1900) || defined(ANDROID)
 double log2(double n) {
   return log(n) / log(2.0);
 }
@@ -988,7 +988,7 @@ float LanguageModel::ComputeNgramCost(const char *unichar,
             unichar, context_ptr, CertaintyScore(certainty)/denom, prob,
             ngram_and_classifier_cost);
   }
-  if (modified_context != NULL) delete[] modified_context;
+  delete[] modified_context;
   return ngram_and_classifier_cost;
 }
 
diff --git a/wordrec/lm_state.h b/wordrec/lm_state.h
index 623bbb5e7f..6229e9b350 100644
--- a/wordrec/lm_state.h
+++ b/wordrec/lm_state.h
@@ -48,8 +48,8 @@ typedef unsigned char LanguageModelFlagsType;
 /// Each ViterbiStateEntry contains information from various components of the
 /// language model: dawgs in which the path is found, character ngram model
 /// probability of the path, script/chartype/font consistency info, state for
-/// language-specific heuristics (e.g. hyphenated and compound words, lower/upper
-/// case preferences, etc).
+/// language-specific heuristics (e.g. hyphenated and compound words,
+/// lower/upper case preferences, etc).
 ///
 /// Each ViterbiStateEntry also contains the parent pointer, so that the path
 /// that it represents (WERD_CHOICE) can be constructed by following these
@@ -165,13 +165,13 @@ struct ViterbiStateEntry : public ELIST_LINK {
 
   /// Various information about the characters on the path represented
   /// by this ViterbiStateEntry.
-  float ratings_sum;  //< sum of ratings of character on the path
-  float min_certainty;  //< minimum certainty on the path
-  int adapted;  //< number of BLOB_CHOICES from adapted templates
-  int length;  //< number of characters on the path
+  float ratings_sum;     //< sum of ratings of character on the path
+  float min_certainty;   //< minimum certainty on the path
+  int adapted;           //< number of BLOB_CHOICES from adapted templates
+  int length;            //< number of characters on the path
   float outline_length;  //< length of the outline so far
   LMConsistencyInfo consistency_info;  //< path consistency info
-  AssociateStats associate_stats;  //< character widths/gaps/seams
+  AssociateStats associate_stats;      //< character widths/gaps/seams
 
   /// Flags for marking the entry as a top choice path with
   /// the smallest rating or lower/upper case letters).
diff --git a/wordrec/measure.h b/wordrec/measure.h
index 9c73906853..894938e55a 100644
--- a/wordrec/measure.h
+++ b/wordrec/measure.h
@@ -60,10 +60,9 @@ typedef struct
  * Add one more sample to a measurement.
  **********************************************************************/
 
-#define ADD_SAMPLE(m,s)                           \
-(m.sum_of_samples += (float) (s),               \
-	m.sum_of_squares += (float) (s) * (float) (s), \
-	++m.num_samples)
+#define ADD_SAMPLE(m, s)           \
+  (m.sum_of_samples += (float)(s), \
+   m.sum_of_squares += (float)(s) * (float)(s), ++m.num_samples)
 
 /**********************************************************************
  * mean
@@ -71,10 +70,8 @@ typedef struct
  * Return the mean value of the measurement.
  **********************************************************************/
 
-#define MEAN(m)                                       \
-((m).num_samples ?                                  \
-	((float) ((m).sum_of_samples / (m).num_samples)) : \
-	0)
+#define MEAN(m) \
+  ((m).num_samples ? ((float)((m).sum_of_samples / (m).num_samples)) : 0)
 
 /**********************************************************************
  * new_measurement
@@ -83,10 +80,8 @@ typedef struct
  * samples.
  **********************************************************************/
 
-#define new_measurement(m)   \
-((m).num_samples    = 0, \
-	(m).sum_of_samples = 0, \
-	(m).sum_of_squares = 0)
+#define new_measurement(m) \
+  ((m).num_samples = 0, (m).sum_of_samples = 0, (m).sum_of_squares = 0)
 
 /**********************************************************************
  * number_of_samples
@@ -112,13 +107,12 @@ typedef struct
  * Return the variance of the measurement.
  **********************************************************************/
 
-#define VARIANCE(m)                                   \
-(((m).num_samples > 1) ?                            \
-	((float)                                           \
-	(((m).num_samples * (m).sum_of_squares -          \
-		(m).sum_of_samples * (m).sum_of_samples) /      \
-	(((m).num_samples - 1) * (m).num_samples)))   :  \
-	0)
+#define VARIANCE(m)                                           \
+  (((m).num_samples > 1)                                      \
+       ? ((float)(((m).num_samples * (m).sum_of_squares -     \
+                   (m).sum_of_samples * (m).sum_of_samples) / \
+                  (((m).num_samples - 1) * (m).num_samples))) \
+       : 0)
 
 /**********************************************************************
  * print_summary
@@ -126,10 +120,8 @@ typedef struct
  * Summarize a MEASUREMENT record.
  **********************************************************************/
 
-#define print_summary(string,measure)                       \
-cprintf ("\t%-20s \tn = %d, \tm = %4.2f, \ts = %4.2f\n ",  \
-			string,                                          \
-			number_of_samples  (measure),                    \
-			MEAN           (measure),                    \
-			standard_deviation (measure))
+#define print_summary(string, measure)                             \
+  cprintf("\t%-20s \tn = %d, \tm = %4.2f, \ts = %4.2f\n ", string, \
+          number_of_samples(measure), MEAN(measure),               \
+          standard_deviation(measure))
 #endif
diff --git a/wordrec/pieces.cpp b/wordrec/pieces.cpp
index 04e340396e..f7b406d5bc 100644
--- a/wordrec/pieces.cpp
+++ b/wordrec/pieces.cpp
@@ -267,7 +267,6 @@ void Wordrec::merge_and_put_fragment_lists(inT16 row, inT16 column,
   delete [] choice_lists_it;
 }
 
-
 /**********************************************************************
  * get_fragment_lists
  *
diff --git a/wordrec/tface.cpp b/wordrec/tface.cpp
index e21fcb8829..b1fc1779fb 100644
--- a/wordrec/tface.cpp
+++ b/wordrec/tface.cpp
@@ -49,7 +49,11 @@ void Wordrec::program_editup(const char *textbase,
   if (textbase != NULL) imagefile = textbase;
   InitFeatureDefs(&feature_defs_);
   InitAdaptiveClassifier(init_classifier);
-  if (init_dict) getDict().Load(Dict::GlobalDawgCache());
+  if (init_dict) {
+    getDict().SetupForLoad(Dict::GlobalDawgCache());
+    getDict().Load(tessdata_manager.GetDataFileName().string(), lang);
+    getDict().FinishLoad();
+  }
   pass2_ok_split = chop_ok_split;
 }