Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use clang-format for code formatting #1598

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
742 changes: 325 additions & 417 deletions src/api/baseapi.cpp

Large diffs are not rendered by default.

202 changes: 93 additions & 109 deletions src/api/baseapi.h

Large diffs are not rendered by default.

1,018 changes: 500 additions & 518 deletions src/api/capi.cpp

Large diffs are not rendered by default.

668 changes: 411 additions & 257 deletions src/api/capi.h

Large diffs are not rendered by default.

211 changes: 101 additions & 110 deletions src/api/pdfrenderer.cpp

Large diffs are not rendered by default.

42 changes: 17 additions & 25 deletions src/api/renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ namespace tesseract {
/**********************************************************************
* Base Renderer interface implementation
**********************************************************************/
TessResultRenderer::TessResultRenderer(const char *outputbase,
TessResultRenderer::TessResultRenderer(const char* outputbase,
const char* extension)
: file_extension_(extension),
title_(""), imagenum_(-1),
title_(""),
imagenum_(-1),
fout_(stdout),
next_(nullptr),
happy_(true) {
Expand Down Expand Up @@ -108,21 +109,15 @@ void TessResultRenderer::AppendData(const char* s, int len) {
if (n != len) happy_ = false;
}

bool TessResultRenderer::BeginDocumentHandler() {
return happy_;
}

bool TessResultRenderer::EndDocumentHandler() {
return happy_;
}
bool TessResultRenderer::BeginDocumentHandler() { return happy_; }

bool TessResultRenderer::EndDocumentHandler() { return happy_; }

/**********************************************************************
* UTF8 Text Renderer interface implementation
**********************************************************************/
TessTextRenderer::TessTextRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "txt") {
}
TessTextRenderer::TessTextRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "txt") {}

bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
Expand All @@ -143,14 +138,14 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
/**********************************************************************
* HOcr Text Renderer interface implementation
**********************************************************************/
TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
TessHOcrRenderer::TessHOcrRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "hocr") {
font_info_ = false;
font_info_ = false;
}

TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
TessHOcrRenderer::TessHOcrRenderer(const char* outputbase, bool font_info)
: TessResultRenderer(outputbase, "hocr") {
font_info_ = font_info;
font_info_ = font_info;
}

bool TessHOcrRenderer::BeginDocumentHandler() {
Expand All @@ -166,12 +161,11 @@ bool TessHOcrRenderer::BeginDocumentHandler() {
"<meta http-equiv=\"Content-Type\" content=\"text/html;"
"charset=utf-8\" />\n"
" <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
"' />\n"
"' />\n"
" <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
" ocr_line ocrx_word");
if (font_info_)
AppendString(
" ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
AppendString(" ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
AppendString(
"'/>\n"
"</head>\n<body>\n");
Expand Down Expand Up @@ -229,9 +223,8 @@ bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
/**********************************************************************
* UNLV Text Renderer interface implementation
**********************************************************************/
TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "unlv") {
}
TessUnlvRenderer::TessUnlvRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "unlv") {}

bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
Expand All @@ -245,9 +238,8 @@ bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
/**********************************************************************
* BoxText Renderer interface implementation
**********************************************************************/
TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "box") {
}
TessBoxTextRenderer::TessBoxTextRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "box") {}

bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
Expand Down
203 changes: 101 additions & 102 deletions src/api/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,104 +43,103 @@ class TessBaseAPI;
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();

// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer* next);

// Returns the next renderer or nullptr.
TessResultRenderer* next() { return next_; }

/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char* title);

/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI* api);

/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();

const char* file_extension() const { return file_extension_; }
const char* title() const { return title_.c_str(); }

/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const { return imagenum_; }

protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase,
const char* extension);

// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();

// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI* api) = 0;

// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();

// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char* s);

// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char* s, int len);

private:
const char* file_extension_; // standard extension for generated output
STRING title_; // title of document being renderered
int imagenum_; // index of last image added

FILE* fout_; // output file pointer
TessResultRenderer* next_; // Can link multiple renderers together
bool happy_; // I get grumpy when the disk fills up, etc.
public:
virtual ~TessResultRenderer();

// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer* next);

// Returns the next renderer or nullptr.
TessResultRenderer* next() { return next_; }

/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char* title);

/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI* api);

/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();

const char* file_extension() const { return file_extension_; }
const char* title() const { return title_.c_str(); }

/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const { return imagenum_; }

protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char* outputbase, const char* extension);

// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();

// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI* api) = 0;

// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();

// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char* s);

// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char* s, int len);

private:
const char* file_extension_; // standard extension for generated output
STRING title_; // title of document being renderered
int imagenum_; // index of last image added

FILE* fout_; // output file pointer
TessResultRenderer* next_; // Can link multiple renderers together
bool happy_; // I get grumpy when the disk fills up, etc.
};

/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
explicit TessTextRenderer(const char* outputbase);

protected:
virtual bool AddImageHandler(TessBaseAPI* api);
Expand All @@ -151,8 +150,8 @@ class TESS_API TessTextRenderer : public TessResultRenderer {
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
explicit TessHOcrRenderer(const char* outputbase, bool font_info);
explicit TessHOcrRenderer(const char* outputbase);

protected:
virtual bool BeginDocumentHandler();
Expand All @@ -177,7 +176,7 @@ class TESS_API TessTsvRenderer : public TessResultRenderer {
virtual bool EndDocumentHandler();

private:
bool font_info_; // whether to print font information
bool font_info_; // whether to print font information
};

/**
Expand All @@ -187,7 +186,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false);
TessPDFRenderer(const char* outputbase, const char* datadir,
bool textonly = false);

protected:
virtual bool BeginDocumentHandler();
Expand All @@ -203,26 +203,25 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
long int obj_; // counter for PDF objects
GenericVector<long int> offsets_; // offset of every PDF object in bytes
GenericVector<long int> pages_; // object number for every /Page object
const char *datadir_; // where to find the custom font
const char* datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
void AppendPDFObject(const char* data);
// Create the /Contents object for an entire page.
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size);
static bool imageToPDFObj(Pix* pix, char* filename, long int objnum,
char** pdf_object, long int* pdf_object_size);
};


/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
explicit TessUnlvRenderer(const char* outputbase);

protected:
virtual bool AddImageHandler(TessBaseAPI* api);
Expand All @@ -233,7 +232,7 @@ class TESS_API TessUnlvRenderer : public TessResultRenderer {
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
explicit TessBoxTextRenderer(const char* outputbase);

protected:
virtual bool AddImageHandler(TessBaseAPI* api);
Expand Down
Loading