Skip to content

Commit

Permalink
Major bug fixes to pango renderer and resolved issue of hash_map vs u…
Browse files Browse the repository at this point in the history
…nordered_map
  • Loading branch information
theraysmith committed Nov 7, 2016
1 parent 2c837df commit a987e6d
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 96 deletions.
28 changes: 9 additions & 19 deletions ccutil/hashfn.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,15 @@
#ifndef HASHFN_H
#define HASHFN_H

#ifdef USE_STD_NAMESPACE
#if (__cplusplus >= 201103L) || defined(_MSC_VER) // Visual Studio
#include <unordered_map>
#include <unordered_set>
#define hash_map std::unordered_map
#if (_MSC_VER >= 1500 && _MSC_VER < 1600) // Visual Studio 2008
using namespace std::tr1;
#if defined(_MSC_VER) && (_MSC_VER >= 1500 && _MSC_VER < 1600) // VS 2008
#define TessHashMap std::tr1::unordered_map
#define TessHashSet std::tr1::unordered_set
#else // _MSC_VER
using std::unordered_map;
using std::unordered_set;
#define TessHashMap std::unordered_map
#define TessHashSet std::unordered_set
#include <memory>
#define SmartPtr std::unique_ptr
#define HAVE_UNIQUE_PTR
Expand All @@ -41,23 +40,14 @@ using std::unordered_set;
#include <ext/hash_set>
using __gnu_cxx::hash_map;
using __gnu_cxx::hash_set;
#define unordered_map hash_map
#define unordered_set hash_set
#define TessHashMap __gnu_cxx::hash_map
#define TessHashSet __gnu_cxx::hash_set
#else
#include <hash_map>
#include <hash_set>
#define TessHashMap hash_map
#define TessHashSet :hash_set
#endif // gcc
#elif (__clang__)
#include <unordered_map>
#include <unordered_set>
#define hash_map std::unordered_map
#define unordered_set std::unordered_set
#else // USE_STD_NAMESPACE
#include <hash_map>
#include <hash_set>
#define unordered_map hash_map
#define unordered_set hash_set
#endif // USE_STD_NAMESPACE

#ifndef HAVE_UNIQUE_PTR
// Trivial smart ptr. Expand to add features of std::unique_ptr as required.
Expand Down
2 changes: 1 addition & 1 deletion textord/bbgrid.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {
// An iterator over the list at (x_, y_) in the grid_.
BBC_C_IT it_;
// Set of unique returned elements used when unique_mode_ is true.
unordered_set<BBC*, PtrHash<BBC> > returns_;
TessHashSet<BBC*, PtrHash<BBC> > returns_;
};

// Sort function to sort a BBC by bounding_box().left().
Expand Down
2 changes: 1 addition & 1 deletion training/ligature_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace tesseract {
class PangoFontInfo; // defined in pango_font_info.h

// Map to substitute strings for ligatures.
typedef hash_map<string, string, StringHash> LigHash;
typedef TessHashMap<string, string, StringHash> LigHash;

class LigatureTable {
public:
Expand Down
106 changes: 51 additions & 55 deletions training/pango_font_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,6 @@

STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp",
"Overrides fontconfig default temporary dir");
BOOL_PARAM_FLAG(fontconfig_refresh_cache, false,
"Does a one-time deletion of cache files from the "
"fontconfig_tmpdir before initializing fontconfig.");
BOOL_PARAM_FLAG(fontconfig_refresh_config_file, true,
"Does a one-time reset of the fontconfig config file to point"
" to fonts_dir before initializing fontconfig. Set to true"
" if fontconfig_refresh_cache is true. Set it to false to use"
" multiple instances in separate processes without having to"
" rescan the fonts_dir, using a previously setup font cache");

#ifndef USE_STD_NAMESPACE
#include "ocr/trainingdata/typesetting/legacy_fonts.h"
Expand All @@ -91,7 +82,8 @@ namespace tesseract {
// in pixels.
const int kDefaultResolution = 300;

bool PangoFontInfo::fontconfig_initialized_ = false;
string PangoFontInfo::fonts_dir_;
string PangoFontInfo::cache_dir_;

PangoFontInfo::PangoFontInfo() : desc_(NULL), resolution_(kDefaultResolution) {
Clear();
Expand Down Expand Up @@ -119,6 +111,8 @@ void PangoFontInfo::Clear() {
}
}

PangoFontInfo::~PangoFontInfo() { pango_font_description_free(desc_); }

string PangoFontInfo::DescriptionName() const {
if (!desc_) return "";
char* desc_str = pango_font_description_to_string(desc_);
Expand All @@ -127,59 +121,62 @@ string PangoFontInfo::DescriptionName() const {
return desc_name;
}

// Initializes Fontconfig for use by writing a fake fonts.conf file into the
// FLAGS_fontconfigs_tmpdir directory, that points to the supplied
// fonts_dir, and then overrides the FONTCONFIG_PATH environment variable
// to point to this fonts.conf file. If force_clear, the cache is refreshed
// even if it has already been initialized.
void PangoFontInfo::InitFontConfig(bool force_clear, const string& fonts_dir) {
if ((fontconfig_initialized_ && !force_clear) || fonts_dir.empty()) {
fontconfig_initialized_ = true;
return;
}
if (FLAGS_fontconfig_refresh_cache || force_clear) {
File::DeleteMatchingFiles(File::JoinPath(
FLAGS_fontconfig_tmpdir.c_str(), "*cache-?").c_str());
}
if (FLAGS_fontconfig_refresh_config_file || FLAGS_fontconfig_refresh_cache ||
force_clear) {
const int MAX_FONTCONF_FILESIZE = 1024;
char fonts_conf_template[MAX_FONTCONF_FILESIZE];
snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
"<?xml version=\"1.0\"?>\n"
"<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
"<fontconfig>\n"
"<dir>%s</dir>\n"
"<cachedir>%s</cachedir>\n"
"<config></config>\n"
"</fontconfig>", fonts_dir.c_str(),
FLAGS_fontconfig_tmpdir.c_str());
string fonts_conf_file = File::JoinPath(FLAGS_fontconfig_tmpdir.c_str(),
"fonts.conf");
File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
// If not already initialized, initializes FontConfig by setting its
// environment variable and creating a fonts.conf file that points to the
// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
/* static */
void PangoFontInfo::SoftInitFontConfig() {
if (fonts_dir_.empty()) {
HardInitFontConfig(FLAGS_fonts_dir, FLAGS_fontconfig_tmpdir);
}
}

// Re-initializes font config, whether or not already initialized.
// If already initialized, any existing cache is deleted, just to be sure.
/* static */
void PangoFontInfo::HardInitFontConfig(const string& fonts_dir,
const string& cache_dir) {
if (!cache_dir_.empty()) {
File::DeleteMatchingFiles(
File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
}
const int MAX_FONTCONF_FILESIZE = 1024;
char fonts_conf_template[MAX_FONTCONF_FILESIZE];
cache_dir_ = cache_dir;
fonts_dir_ = fonts_dir;
snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
"<?xml version=\"1.0\"?>\n"
"<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
"<fontconfig>\n"
"<dir>%s</dir>\n"
"<cachedir>%s</cachedir>\n"
"<config></config>\n"
"</fontconfig>",
fonts_dir.c_str(), cache_dir_.c_str());
string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf");
File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
#ifdef _WIN32
std::string env("FONTCONFIG_PATH=");
env.append(FLAGS_fontconfig_tmpdir.c_str());
env.append(cache_dir_.c_str());
putenv(env.c_str());
putenv("LANG=en_US.utf8");
#else
setenv("FONTCONFIG_PATH", FLAGS_fontconfig_tmpdir.c_str(), true);
setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true);
// Fix the locale so that the reported font names are consistent.
setenv("LANG", "en_US.utf8", true);
#endif // _WIN32
if (!fontconfig_initialized_ || force_clear) {
if (FcInitReinitialize() != FcTrue) {
tprintf("FcInitiReinitialize failed!!\n");
}

if (FcInitReinitialize() != FcTrue) {
tprintf("FcInitiReinitialize failed!!\n");
}
fontconfig_initialized_ = true;
FontUtils::ReInit();
// Clear Pango's font cache too.
pango_cairo_font_map_set_default(NULL);
}

static void ListFontFamilies(PangoFontFamily*** families,
int* n_families) {
PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str());
PangoFontInfo::SoftInitFontConfig();
PangoFontMap* font_map = pango_cairo_font_map_get_default();
DISABLE_HEAP_LEAK_CHECK;
pango_font_map_list_families(font_map, families, n_families);
Expand Down Expand Up @@ -253,7 +250,7 @@ bool PangoFontInfo::ParseFontDescriptionName(const string& name) {
// in the font map. Note that if the font is wholly missing, this could
// correspond to a completely different font family and face.
PangoFont* PangoFontInfo::ToPangoFont() const {
InitFontConfig(false, FLAGS_fonts_dir.c_str());
SoftInitFontConfig();
PangoFontMap* font_map = pango_cairo_font_map_get_default();
PangoContext* context = pango_context_new();
pango_cairo_context_set_resolution(context, resolution_);
Expand Down Expand Up @@ -538,7 +535,7 @@ bool FontUtils::IsAvailableFont(const char* input_query_desc,
query_desc.c_str());
PangoFont* selected_font = NULL;
{
PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str());
PangoFontInfo::SoftInitFontConfig();
PangoFontMap* font_map = pango_cairo_font_map_get_default();
PangoContext* context = pango_context_new();
pango_context_set_font_map(context, font_map);
Expand Down Expand Up @@ -690,9 +687,8 @@ void FontUtils::GetAllRenderableCharacters(const vector<string>& fonts,
// Utilities written to be backward compatible with StringRender

/* static */
int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,
const string& fontname,
int* raw_score,
int FontUtils::FontScore(const TessHashMap<char32, inT64>& ch_map,
const string& fontname, int* raw_score,
vector<bool>* ch_flags) {
PangoFontInfo font_info;
if (!font_info.ParseFontDescriptionName(fontname)) {
Expand All @@ -707,7 +703,7 @@ int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,
}
*raw_score = 0;
int ok_chars = 0;
for (unordered_map<char32, inT64>::const_iterator it = ch_map.begin();
for (TessHashMap<char32, inT64>::const_iterator it = ch_map.begin();
it != ch_map.end(); ++it) {
bool covered = (IsWhitespace(it->first) ||
(pango_coverage_get(coverage, it->first)
Expand All @@ -725,7 +721,7 @@ int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,


/* static */
string FontUtils::BestFonts(const unordered_map<char32, inT64>& ch_map,
string FontUtils::BestFonts(const TessHashMap<char32, inT64>& ch_map,
vector<pair<const char*, vector<bool> > >* fonts) {
const double kMinOKFraction = 0.99;
// Weighted fraction of characters that must be renderable in a font to make
Expand Down
36 changes: 27 additions & 9 deletions training/pango_font_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@
#include <utility>
#include <vector>

#include "commandlineflags.h"
#include "hashfn.h"
#include "host.h"
#include "util.h"
#include "pango/pango-font.h"
#include "pango/pango.h"
#include "pango/pangocairo.h"
#include "util.h"

DECLARE_STRING_PARAM_FLAG(fonts_dir);
DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir);

typedef signed int char32;

Expand All @@ -44,6 +50,7 @@ class PangoFontInfo {
DECORATIVE,
};
PangoFontInfo();
~PangoFontInfo();
// Initialize from parsing a font description name, defined as a string of the
// format:
// "FamilyName [FaceName] [PointSize]"
Expand Down Expand Up @@ -83,10 +90,14 @@ class PangoFontInfo {
bool GetSpacingProperties(const string& utf8_char,
int* x_bearing, int* x_advance) const;

// Initializes FontConfig by setting its environment variable and creating
// a fonts.conf file that points to the given fonts_dir. Once initialized,
// it is not re-initialized unless force_clear is true.
static void InitFontConfig(bool force_clear, const string& fonts_dir);
// If not already initialized, initializes FontConfig by setting its
// environment variable and creating a fonts.conf file that points to the
// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
static void SoftInitFontConfig();
// Re-initializes font config, whether or not already initialized.
// If already initialized, any existing cache is deleted, just to be sure.
static void HardInitFontConfig(const string& fonts_dir,
const string& cache_dir);

// Accessors
string DescriptionName() const;
Expand Down Expand Up @@ -130,8 +141,14 @@ class PangoFontInfo {
int resolution_;
// Fontconfig operates through an environment variable, so it intrinsically
// cannot be thread-friendly, but you can serialize multiple independent
// font configurations by calling InitFontConfig(true, path).
static bool fontconfig_initialized_;
// font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
// These hold the last initialized values set by HardInitFontConfig or
// the first call to SoftInitFontConfig.
// Directory to be scanned for font files.
static string fonts_dir_;
// Directory to store the cache of font information. (Can be the same as
// fonts_dir_)
static string cache_dir_;

private:
PangoFontInfo(const PangoFontInfo&);
Expand Down Expand Up @@ -185,15 +202,16 @@ class FontUtils {
// In the flags vector, each flag is set according to whether the
// corresponding character (in order of iterating ch_map) can be rendered.
// The return string is a list of the acceptable fonts that were used.
static string BestFonts(const unordered_map<char32, inT64>& ch_map,
static string BestFonts(
const TessHashMap<char32, inT64>& ch_map,
vector<std::pair<const char*, vector<bool> > >* font_flag);

// FontScore returns the weighted renderability score of the given
// hash map character table in the given font. The unweighted score
// is also returned in raw_score.
// The values in the bool vector ch_flags correspond to whether the
// corresponding character (in order of iterating ch_map) can be rendered.
static int FontScore(const unordered_map<char32, inT64>& ch_map,
static int FontScore(const TessHashMap<char32, inT64>& ch_map,
const string& fontname, int* raw_score,
vector<bool>* ch_flags);

Expand Down
7 changes: 6 additions & 1 deletion training/stringrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width,
underline_start_prob_(0),
underline_continuation_prob_(0),
underline_style_(PANGO_UNDERLINE_SINGLE),
features_(NULL),
drop_uncovered_chars_(true),
strip_unrenderable_words_(false),
add_ligatures_(false),
Expand All @@ -120,7 +121,6 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width,
box_padding_(0),
total_chars_(0),
font_index_(0),
features_(NULL),
last_offset_(0) {
pen_color_[0] = 0.0;
pen_color_[1] = 0.0;
Expand Down Expand Up @@ -347,6 +347,11 @@ void StringRenderer::ClearBoxes() {
boxaDestroy(&page_boxes_);
}

string StringRenderer::GetBoxesStr() {
BoxChar::PrepareToWrite(&boxchars_);
return BoxChar::GetTesseractBoxStr(page_height_, boxchars_);
}

void StringRenderer::WriteAllBoxes(const string& filename) {
BoxChar::PrepareToWrite(&boxchars_);
BoxChar::WriteTesseractBoxFile(filename, page_height_, boxchars_);
Expand Down
Loading

0 comments on commit a987e6d

Please sign in to comment.