Skip to content

Commit

Permalink
Format code
Browse files Browse the repository at this point in the history
It was formatted using
clang-format-7 -i -style=Google $(find -name "*.h" -o -name "*.cpp")

Signed-off-by: Stefan Weil <[email protected]>
  • Loading branch information
stweil committed May 24, 2018
1 parent 5a56d0c commit 99ef870
Show file tree
Hide file tree
Showing 506 changed files with 34,744 additions and 37,076 deletions.
739 changes: 323 additions & 416 deletions src/api/baseapi.cpp

Large diffs are not rendered by default.

199 changes: 91 additions & 108 deletions src/api/baseapi.h

Large diffs are not rendered by default.

1,018 changes: 500 additions & 518 deletions src/api/capi.cpp

Large diffs are not rendered by default.

668 changes: 411 additions & 257 deletions src/api/capi.h

Large diffs are not rendered by default.

161 changes: 76 additions & 85 deletions src/api/pdfrenderer.cpp

Large diffs are not rendered by default.

42 changes: 17 additions & 25 deletions src/api/renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ namespace tesseract {
/**********************************************************************
* Base Renderer interface implementation
**********************************************************************/
TessResultRenderer::TessResultRenderer(const char *outputbase,
TessResultRenderer::TessResultRenderer(const char* outputbase,
const char* extension)
: file_extension_(extension),
title_(""), imagenum_(-1),
title_(""),
imagenum_(-1),
fout_(stdout),
next_(nullptr),
happy_(true) {
Expand Down Expand Up @@ -108,21 +109,15 @@ void TessResultRenderer::AppendData(const char* s, int len) {
if (n != len) happy_ = false;
}

bool TessResultRenderer::BeginDocumentHandler() {
return happy_;
}

bool TessResultRenderer::EndDocumentHandler() {
return happy_;
}
bool TessResultRenderer::BeginDocumentHandler() { return happy_; }

bool TessResultRenderer::EndDocumentHandler() { return happy_; }

/**********************************************************************
* UTF8 Text Renderer interface implementation
**********************************************************************/
TessTextRenderer::TessTextRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "txt") {
}
TessTextRenderer::TessTextRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "txt") {}

bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
Expand All @@ -143,14 +138,14 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
/**********************************************************************
* HOcr Text Renderer interface implementation
**********************************************************************/
TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
TessHOcrRenderer::TessHOcrRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "hocr") {
font_info_ = false;
font_info_ = false;
}

TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
TessHOcrRenderer::TessHOcrRenderer(const char* outputbase, bool font_info)
: TessResultRenderer(outputbase, "hocr") {
font_info_ = font_info;
font_info_ = font_info;
}

bool TessHOcrRenderer::BeginDocumentHandler() {
Expand All @@ -166,12 +161,11 @@ bool TessHOcrRenderer::BeginDocumentHandler() {
"<meta http-equiv=\"Content-Type\" content=\"text/html;"
"charset=utf-8\" />\n"
" <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
"' />\n"
"' />\n"
" <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
" ocr_line ocrx_word");
if (font_info_)
AppendString(
" ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
AppendString(" ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
AppendString(
"'/>\n"
"</head>\n<body>\n");
Expand Down Expand Up @@ -229,9 +223,8 @@ bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
/**********************************************************************
* UNLV Text Renderer interface implementation
**********************************************************************/
TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "unlv") {
}
TessUnlvRenderer::TessUnlvRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "unlv") {}

bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
Expand All @@ -245,9 +238,8 @@ bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
/**********************************************************************
* BoxText Renderer interface implementation
**********************************************************************/
TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "box") {
}
TessBoxTextRenderer::TessBoxTextRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "box") {}

bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
Expand Down
203 changes: 101 additions & 102 deletions src/api/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,104 +43,103 @@ class TessBaseAPI;
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();

// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer* next);

// Returns the next renderer or nullptr.
TessResultRenderer* next() { return next_; }

/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char* title);

/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI* api);

/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();

const char* file_extension() const { return file_extension_; }
const char* title() const { return title_.c_str(); }

/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const { return imagenum_; }

protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase,
const char* extension);

// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();

// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI* api) = 0;

// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();

// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char* s);

// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char* s, int len);

private:
const char* file_extension_; // standard extension for generated output
STRING title_; // title of document being renderered
int imagenum_; // index of last image added

FILE* fout_; // output file pointer
TessResultRenderer* next_; // Can link multiple renderers together
bool happy_; // I get grumpy when the disk fills up, etc.
public:
virtual ~TessResultRenderer();

// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer* next);

// Returns the next renderer or nullptr.
TessResultRenderer* next() { return next_; }

/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char* title);

/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI* api);

/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();

const char* file_extension() const { return file_extension_; }
const char* title() const { return title_.c_str(); }

/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const { return imagenum_; }

protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char* outputbase, const char* extension);

// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();

// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI* api) = 0;

// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();

// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char* s);

// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char* s, int len);

private:
const char* file_extension_; // standard extension for generated output
STRING title_; // title of document being renderered
int imagenum_; // index of last image added

FILE* fout_; // output file pointer
TessResultRenderer* next_; // Can link multiple renderers together
bool happy_; // I get grumpy when the disk fills up, etc.
};

/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
explicit TessTextRenderer(const char* outputbase);

protected:
virtual bool AddImageHandler(TessBaseAPI* api);
Expand All @@ -151,8 +150,8 @@ class TESS_API TessTextRenderer : public TessResultRenderer {
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
explicit TessHOcrRenderer(const char* outputbase, bool font_info);
explicit TessHOcrRenderer(const char* outputbase);

protected:
virtual bool BeginDocumentHandler();
Expand All @@ -177,7 +176,7 @@ class TESS_API TessTsvRenderer : public TessResultRenderer {
virtual bool EndDocumentHandler();

private:
bool font_info_; // whether to print font information
bool font_info_; // whether to print font information
};

/**
Expand All @@ -187,7 +186,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false);
TessPDFRenderer(const char* outputbase, const char* datadir,
bool textonly = false);

protected:
virtual bool BeginDocumentHandler();
Expand All @@ -203,26 +203,25 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
long int obj_; // counter for PDF objects
GenericVector<long int> offsets_; // offset of every PDF object in bytes
GenericVector<long int> pages_; // object number for every /Page object
const char *datadir_; // where to find the custom font
const char* datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
void AppendPDFObject(const char* data);
// Create the /Contents object for an entire page.
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size);
static bool imageToPDFObj(Pix* pix, char* filename, long int objnum,
char** pdf_object, long int* pdf_object_size);
};


/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
explicit TessUnlvRenderer(const char* outputbase);

protected:
virtual bool AddImageHandler(TessBaseAPI* api);
Expand All @@ -233,7 +232,7 @@ class TESS_API TessUnlvRenderer : public TessResultRenderer {
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
explicit TessBoxTextRenderer(const char* outputbase);

protected:
virtual bool AddImageHandler(TessBaseAPI* api);
Expand Down
Loading

0 comments on commit 99ef870

Please sign in to comment.