Skip to content

Commit

Permalink
RAII: Result of ResultIterator::GetUTF8Text() was leaked inside TessB…
Browse files Browse the repository at this point in the history
…aseAPI::GetUTF8Text()
  • Loading branch information
rfschtkt committed May 9, 2017
1 parent b86b4fa commit ec165b3
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 21 deletions.
11 changes: 4 additions & 7 deletions api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1267,9 +1267,7 @@ char* TessBaseAPI::GetUTF8Text() {
ResultIterator *it = GetIterator();
do {
if (it->Empty(RIL_PARA)) continue;
char *para_text = it->GetUTF8Text(RIL_PARA);
text += para_text;
delete []para_text;
text += it->GetUTF8Text(RIL_PARA).get();
} while (it->Next(RIL_PARA));
char* result = new char[text.length() + 1];
strncpy(result, text.string(), text.length() + 1);
Expand Down Expand Up @@ -1539,11 +1537,10 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (bold) hocr_str += "<strong>";
if (italic) hocr_str += "<em>";
do {
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
const std::unique_ptr<const char[]> grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
if (grapheme && grapheme[0] != 0) {
hocr_str += HOcrEscape(grapheme);
hocr_str += HOcrEscape(grapheme.get());
}
delete []grapheme;
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (italic) hocr_str += "</em>";
Expand Down Expand Up @@ -1661,7 +1658,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;

do {
tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
tsv_str += res_it->GetUTF8Text(RIL_SYMBOL).get();
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
tsv_str += "\n"; // end of row
Expand Down
2 changes: 1 addition & 1 deletion api/capi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -733,7 +733,7 @@ TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, Tess

TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level)
{
return handle->GetUTF8Text(level);
return handle->GetUTF8Text(level).release();
}

TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level)
Expand Down
5 changes: 2 additions & 3 deletions api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,10 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
STRING pdf_word("");
int pdf_word_len = 0;
do {
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
const std::unique_ptr<const char[]> grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
if (grapheme && grapheme[0] != '\0') {
GenericVector<int> unicodes;
UNICHAR::UTF8ToUnicode(grapheme, &unicodes);
UNICHAR::UTF8ToUnicode(grapheme.get(), &unicodes);
char utf16[kMaxBytesPerCodepoint];
for (int i = 0; i < unicodes.length(); i++) {
int code = unicodes[i];
Expand All @@ -473,7 +473,6 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
}
}
}
delete []grapheme;
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (word_length > 0 && pdf_word_len > 0 && fontsize > 0) {
Expand Down
5 changes: 2 additions & 3 deletions ccmain/paragraphs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2446,8 +2446,8 @@ void InitializeRowInfo(bool after_recognition,
return;
}
info->text = "";
char *text = it.GetUTF8Text(RIL_TEXTLINE);
int trailing_ws_idx = strlen(text); // strip trailing space
const std::unique_ptr<const char[]> text = it.GetUTF8Text(RIL_TEXTLINE);
int trailing_ws_idx = strlen(text.get()); // strip trailing space
while (trailing_ws_idx > 0 &&
// isspace() only takes ASCII
((text[trailing_ws_idx - 1] & 0x80) == 0) &&
Expand All @@ -2460,7 +2460,6 @@ void InitializeRowInfo(bool after_recognition,
for (int i = 0; i < trailing_ws_idx; i++)
info->text += text[i];
}
delete []text;

if (info->text.size() == 0) {
return;
Expand Down
10 changes: 5 additions & 5 deletions ccmain/resultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,10 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level,

/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
* object at the given level.
*/
char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
if (it_->word() == NULL) return NULL; // Already at the end!
std::unique_ptr<char[]> ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
if (it_->word() == NULL) return nullptr; // Already at the end!
STRING text;
switch (level) {
case RIL_BLOCK:
Expand Down Expand Up @@ -591,8 +591,8 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
break;
}
int length = text.length() + 1;
char* result = new char[length];
strncpy(result, text.string(), length);
std::unique_ptr<char[]> result(new char[length]);
strncpy(result.get(), text.string(), length);
return result;
}

Expand Down
5 changes: 3 additions & 2 deletions ccmain/resultiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_

#include <memory> // std::unique_ptr
#include "platform.h"
#include "ltrresultiterator.h"

Expand Down Expand Up @@ -86,9 +87,9 @@ class TESS_API ResultIterator : public LTRResultIterator {

/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
* object at the given level.
*/
virtual char* GetUTF8Text(PageIteratorLevel level) const;
virtual std::unique_ptr<char[]> GetUTF8Text(PageIteratorLevel level) const;

/**
* Return whether the current paragraph's dominant reading direction
Expand Down

0 comments on commit ec165b3

Please sign in to comment.