From ec165b31de8e7d4cdfa9f96a90f6debbc09ec620 Mon Sep 17 00:00:00 2001 From: Raf Schietekat Date: Tue, 9 May 2017 14:21:42 +0200 Subject: [PATCH] RAII: Result of ResultIterator::GetUTF8Text() was leaked inside TessBaseAPI::GetUTF8Text() --- api/baseapi.cpp | 11 ++++------- api/capi.cpp | 2 +- api/pdfrenderer.cpp | 5 ++--- ccmain/paragraphs.cpp | 5 ++--- ccmain/resultiterator.cpp | 10 +++++----- ccmain/resultiterator.h | 5 +++-- 6 files changed, 17 insertions(+), 21 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 457c7ab61f..4ee930370d 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1267,9 +1267,7 @@ char* TessBaseAPI::GetUTF8Text() { ResultIterator *it = GetIterator(); do { if (it->Empty(RIL_PARA)) continue; - char *para_text = it->GetUTF8Text(RIL_PARA); - text += para_text; - delete []para_text; + text += it->GetUTF8Text(RIL_PARA).get(); } while (it->Next(RIL_PARA)); char* result = new char[text.length() + 1]; strncpy(result, text.string(), text.length() + 1); @@ -1539,11 +1537,10 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { if (bold) hocr_str += ""; if (italic) hocr_str += ""; do { - const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL); + const std::unique_ptr grapheme = res_it->GetUTF8Text(RIL_SYMBOL); if (grapheme && grapheme[0] != 0) { - hocr_str += HOcrEscape(grapheme); + hocr_str += HOcrEscape(grapheme.get()); } - delete []grapheme; res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); if (italic) hocr_str += ""; @@ -1661,7 +1658,7 @@ char* TessBaseAPI::GetTSVText(int page_number) { if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; do { - tsv_str += res_it->GetUTF8Text(RIL_SYMBOL); + tsv_str += res_it->GetUTF8Text(RIL_SYMBOL).get(); res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); tsv_str += "\n"; // end of row diff --git a/api/capi.cpp b/api/capi.cpp index 4f69731400..63aca1c205 100644 --- a/api/capi.cpp +++ b/api/capi.cpp @@ -733,7 +733,7 @@ TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, Tess TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level) { - return handle->GetUTF8Text(level); + return handle->GetUTF8Text(level).release(); } TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level) diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp index 93b336691f..a6ff8d5b25 100644 --- a/api/pdfrenderer.cpp +++ b/api/pdfrenderer.cpp @@ -460,10 +460,10 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, STRING pdf_word(""); int pdf_word_len = 0; do { - const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL); + const std::unique_ptr grapheme = res_it->GetUTF8Text(RIL_SYMBOL); if (grapheme && grapheme[0] != '\0') { GenericVector unicodes; - UNICHAR::UTF8ToUnicode(grapheme, &unicodes); + UNICHAR::UTF8ToUnicode(grapheme.get(), &unicodes); char utf16[kMaxBytesPerCodepoint]; for (int i = 0; i < unicodes.length(); i++) { int code = unicodes[i]; @@ -473,7 +473,6 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, } } } - delete []grapheme; res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); if (word_length > 0 && pdf_word_len > 0 && fontsize > 0) { diff --git a/ccmain/paragraphs.cpp b/ccmain/paragraphs.cpp index c7d21a9192..42c34a3733 100644 --- a/ccmain/paragraphs.cpp +++ b/ccmain/paragraphs.cpp @@ -2446,8 +2446,8 @@ void InitializeRowInfo(bool after_recognition, return; } info->text = ""; - char *text = it.GetUTF8Text(RIL_TEXTLINE); - int trailing_ws_idx = strlen(text); // strip trailing space + const std::unique_ptr text = it.GetUTF8Text(RIL_TEXTLINE); + int trailing_ws_idx = strlen(text.get()); // strip trailing space while (trailing_ws_idx > 0 && // isspace() only takes ASCII ((text[trailing_ws_idx - 1] & 0x80) == 0) && @@ -2460,7 +2460,6 @@ void InitializeRowInfo(bool after_recognition, for (int i = 0; i < trailing_ws_idx; i++) info->text += text[i]; } - delete []text; if (info->text.size() == 0) { return; diff --git a/ccmain/resultiterator.cpp b/ccmain/resultiterator.cpp index 77514a6a59..24d0b3fbb7 100644 --- a/ccmain/resultiterator.cpp +++ b/ccmain/resultiterator.cpp @@ -551,10 +551,10 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, /** * Returns the null terminated UTF-8 encoded text string for the current - * object at the given level. Use delete [] to free after use. + * object at the given level. */ -char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const { - if (it_->word() == NULL) return NULL; // Already at the end! +std::unique_ptr ResultIterator::GetUTF8Text(PageIteratorLevel level) const { + if (it_->word() == NULL) return nullptr; // Already at the end! STRING text; switch (level) { case RIL_BLOCK: @@ -591,8 +591,8 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const { break; } int length = text.length() + 1; - char* result = new char[length]; - strncpy(result, text.string(), length); + std::unique_ptr result(new char[length]); + strncpy(result.get(), text.string(), length); return result; } diff --git a/ccmain/resultiterator.h b/ccmain/resultiterator.h index e5516836d5..ab33372113 100644 --- a/ccmain/resultiterator.h +++ b/ccmain/resultiterator.h @@ -22,6 +22,7 @@ #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_ #define TESSERACT_CCMAIN_RESULT_ITERATOR_H_ +#include // std::unique_ptr #include "platform.h" #include "ltrresultiterator.h" @@ -86,9 +87,9 @@ class TESS_API ResultIterator : public LTRResultIterator { /** * Returns the null terminated UTF-8 encoded text string for the current - * object at the given level. Use delete [] to free after use. + * object at the given level. */ - virtual char* GetUTF8Text(PageIteratorLevel level) const; + virtual std::unique_ptr GetUTF8Text(PageIteratorLevel level) const; /** * Return whether the current paragraph's dominant reading direction