diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 457c7ab61f..e1dec94c3e 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -46,6 +46,7 @@ #include #include #include +#include // std::unique_ptr #include "allheaders.h" @@ -1267,9 +1268,8 @@ char* TessBaseAPI::GetUTF8Text() { ResultIterator *it = GetIterator(); do { if (it->Empty(RIL_PARA)) continue; - char *para_text = it->GetUTF8Text(RIL_PARA); - text += para_text; - delete []para_text; + const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); + text += para_text.get(); } while (it->Next(RIL_PARA)); char* result = new char[text.length() + 1]; strncpy(result, text.string(), text.length() + 1); @@ -1393,6 +1393,7 @@ static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, * Image name/input_file_ can be set by SetInputName before calling * GetHOCRText * STL removed from original patch submission and refactored by rays. + * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetHOCRText(int page_number) { return GetHOCRText(NULL, page_number); @@ -1405,6 +1406,7 @@ char* TessBaseAPI::GetHOCRText(int page_number) { * Image name/input_file_ can be set by SetInputName before calling * GetHOCRText * STL removed from original patch submission and refactored by rays. + * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0)) @@ -1539,11 +1541,10 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { if (bold) hocr_str += ""; if (italic) hocr_str += ""; do { - const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL); + const std::unique_ptr grapheme(res_it->GetUTF8Text(RIL_SYMBOL)); if (grapheme && grapheme[0] != 0) { - hocr_str += HOcrEscape(grapheme); + hocr_str += HOcrEscape(grapheme.get()); } - delete []grapheme; res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); if (italic) hocr_str += ""; @@ -1576,6 +1577,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { /** * Make a TSV-formatted string from the internal data structures. * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetTSVText(int page_number) { if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0)) @@ -1661,7 +1663,7 @@ char* TessBaseAPI::GetTSVText(int page_number) { if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; do { - tsv_str += res_it->GetUTF8Text(RIL_SYMBOL); + tsv_str += std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); tsv_str += "\n"; // end of row @@ -1700,8 +1702,9 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + /** * The recognized text is returned as a char* which is coded - * as a UTF8 box file and must be freed with the delete [] operator. + * as a UTF8 box file. * page_number is a 0-base page index that will appear in the box file. + * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetBoxText(int page_number) { if (tesseract_ == NULL || @@ -1718,7 +1721,7 @@ char* TessBaseAPI::GetBoxText(int page_number) { do { int left, top, right, bottom; if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { - char* text = it->GetUTF8Text(RIL_SYMBOL); + const std::unique_ptr text(it->GetUTF8Text(RIL_SYMBOL)); // Tesseract uses space for recognition failure. Fix to a reject // character, kTesseractReject so we don't create illegal box files. for (int i = 0; text[i] != '\0'; ++i) { @@ -1727,10 +1730,9 @@ char* TessBaseAPI::GetBoxText(int page_number) { } snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n", - text, left, image_height_ - bottom, + text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number); output_length += strlen(result + output_length); - delete [] text; // Just in case... if (output_length + kMaxBytesPerLine > total_length) break; @@ -1755,8 +1757,8 @@ const int kLatinChs[] = { /** * The recognized text is returned as a char* which is coded - * as UNLV format Latin-1 with specific reject and suspect codes - * and must be freed with the delete [] operator. + * as UNLV format Latin-1 with specific reject and suspect codes. + * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetUNLVText() { if (tesseract_ == NULL || @@ -1981,9 +1983,9 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { PageSegMode current_psm = GetPageSegMode(); SetPageSegMode(mode); SetVariable("classify_enable_learning", "0"); - char* text = GetUTF8Text(); + const std::unique_ptr text(GetUTF8Text()); if (debug) { - tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr); + tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); } if (text != NULL) { PAGE_RES_IT it(page_res_); @@ -2023,7 +2025,6 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { tesseract_->EnableLearning = true; tesseract_->LearnWord(NULL, word_res); } - delete [] text; } else { success = false; } diff --git a/api/baseapi.h b/api/baseapi.h index 5263b2d8d7..1efa76ba8e 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -591,6 +591,7 @@ class TESS_API TessBaseAPI { * monitor can be used to * cancel the recognition * receive progress callbacks + * Returned string must be freed with the delete [] operator. */ char* GetHOCRText(ETEXT_DESC* monitor, int page_number); @@ -598,28 +599,30 @@ class TESS_API TessBaseAPI { * Make a HTML-formatted string with hOCR markup from the internal * data structures. * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. */ char* GetHOCRText(int page_number); /** * Make a TSV-formatted string from the internal data structures. * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. */ char* GetTSVText(int page_number); /** * The recognized text is returned as a char* which is coded in the same - * format as a box file used in training. Returned string must be freed with - * the delete [] operator. + * format as a box file used in training. * Constructs coordinates in the original image - not just the rectangle. * page_number is a 0-based page index that will appear in the box file. + * Returned string must be freed with the delete [] operator. */ char* GetBoxText(int page_number); /** * The recognized text is returned as a char* which is coded - * as UNLV format Latin-1 with specific reject and suspect codes - * and must be freed with the delete [] operator. + * as UNLV format Latin-1 with specific reject and suspect codes. + * Returned string must be freed with the delete [] operator. */ char* GetUNLVText(); diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp index 93b336691f..c388bb5af6 100644 --- a/api/pdfrenderer.cpp +++ b/api/pdfrenderer.cpp @@ -20,6 +20,7 @@ #include "config_auto.h" #endif +#include // std::unique_ptr #include "allheaders.h" #include "baseapi.h" #include "math.h" @@ -460,10 +461,10 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, STRING pdf_word(""); int pdf_word_len = 0; do { - const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL); + const std::unique_ptr grapheme(res_it->GetUTF8Text(RIL_SYMBOL)); if (grapheme && grapheme[0] != '\0') { GenericVector unicodes; - UNICHAR::UTF8ToUnicode(grapheme, &unicodes); + UNICHAR::UTF8ToUnicode(grapheme.get(), &unicodes); char utf16[kMaxBytesPerCodepoint]; for (int i = 0; i < unicodes.length(); i++) { int code = unicodes[i]; @@ -473,7 +474,6 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, } } } - delete []grapheme; res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); if (word_length > 0 && pdf_word_len > 0 && fontsize > 0) { @@ -570,14 +570,13 @@ bool TessPDFRenderer::BeginDocumentHandler() { // CIDTOGIDMAP const int kCIDToGIDMapSize = 2 * (1 << 16); - unsigned char *cidtogidmap = new unsigned char[kCIDToGIDMapSize]; + const std::unique_ptr cidtogidmap(new unsigned char[kCIDToGIDMapSize]); for (int i = 0; i < kCIDToGIDMapSize; i++) { cidtogidmap[i] = (i % 2) ? 1 : 0; } size_t len; unsigned char *comp = - zlibCompress(cidtogidmap, kCIDToGIDMapSize, &len); - delete[] cidtogidmap; + zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len); n = snprintf(buf, sizeof(buf), "5 0 obj\n" "<<\n" @@ -670,10 +669,9 @@ bool TessPDFRenderer::BeginDocumentHandler() { fseek(fp, 0, SEEK_END); long int size = ftell(fp); fseek(fp, 0, SEEK_SET); - char *buffer = new char[size]; - if (fread(buffer, 1, size, fp) != size) { + const std::unique_ptr buffer(new char[size]); + if (fread(buffer.get(), 1, size, fp) != size) { fclose(fp); - delete[] buffer; return false; } fclose(fp); @@ -686,13 +684,11 @@ bool TessPDFRenderer::BeginDocumentHandler() { ">>\n" "stream\n", size, size); if (n >= sizeof(buf)) { - delete[] buffer; return false; } AppendString(buf); objsize = strlen(buf); - AppendData(buffer, size); - delete[] buffer; + AppendData(buffer.get(), size); objsize += size; AppendString(endstream_endobj); objsize += strlen(endstream_endobj); @@ -887,12 +883,11 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { AppendPDFObject(buf); // CONTENTS - char* pdftext = GetPDFTextObjects(api, width, height); - long pdftext_len = strlen(pdftext); - unsigned char *pdftext_casted = reinterpret_cast(pdftext); + const std::unique_ptr pdftext(GetPDFTextObjects(api, width, height)); + const long pdftext_len = strlen(pdftext.get()); size_t len; unsigned char *comp_pdftext = - zlibCompress(pdftext_casted, pdftext_len, &len); + zlibCompress(reinterpret_cast(pdftext.get()), pdftext_len, &len); long comp_pdftext_len = len; n = snprintf(buf, sizeof(buf), "%ld 0 obj\n" @@ -901,7 +896,6 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { ">>\n" "stream\n", obj_, comp_pdftext_len); if (n >= sizeof(buf)) { - delete[] pdftext; lept_free(comp_pdftext); return false; } @@ -910,7 +904,6 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { AppendData(reinterpret_cast(comp_pdftext), comp_pdftext_len); objsize += comp_pdftext_len; lept_free(comp_pdftext); - delete[] pdftext; const char *b2 = "endstream\n" "endobj\n"; diff --git a/api/renderer.cpp b/api/renderer.cpp index e683149381..822e5244e7 100644 --- a/api/renderer.cpp +++ b/api/renderer.cpp @@ -19,6 +19,7 @@ #include "config_auto.h" #endif +#include // std::unique_ptr #include #include "baseapi.h" #include "genericvector.h" @@ -122,13 +123,12 @@ TessTextRenderer::TessTextRenderer(const char *outputbase) } bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { - char* utf8 = api->GetUTF8Text(); + const std::unique_ptr utf8(api->GetUTF8Text()); if (utf8 == NULL) { return false; } - AppendString(utf8); - delete[] utf8; + AppendString(utf8.get()); bool pageBreak = false; api->GetBoolVariable("include_page_breaks", &pageBreak); @@ -186,11 +186,10 @@ bool TessHOcrRenderer::EndDocumentHandler() { } bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { - char* hocr = api->GetHOCRText(imagenum()); + const std::unique_ptr hocr(api->GetHOCRText(imagenum())); if (hocr == NULL) return false; - AppendString(hocr); - delete[] hocr; + AppendString(hocr.get()); return true; } @@ -219,11 +218,10 @@ bool TessTsvRenderer::BeginDocumentHandler() { bool TessTsvRenderer::EndDocumentHandler() { return true; } bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { - char* tsv = api->GetTSVText(imagenum()); + const std::unique_ptr tsv(api->GetTSVText(imagenum())); if (tsv == NULL) return false; - AppendString(tsv); - delete[] tsv; + AppendString(tsv.get()); return true; } @@ -236,11 +234,10 @@ TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) } bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { - char* unlv = api->GetUNLVText(); + const std::unique_ptr unlv(api->GetUNLVText()); if (unlv == NULL) return false; - AppendString(unlv); - delete[] unlv; + AppendString(unlv.get()); return true; } @@ -253,11 +250,10 @@ TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) } bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { - char* text = api->GetBoxText(imagenum()); + const std::unique_ptr text(api->GetBoxText(imagenum())); if (text == NULL) return false; - AppendString(text); - delete[] text; + AppendString(text.get()); return true; } diff --git a/ccmain/paragraphs.cpp b/ccmain/paragraphs.cpp index c7d21a9192..a8ef87be2c 100644 --- a/ccmain/paragraphs.cpp +++ b/ccmain/paragraphs.cpp @@ -21,6 +21,7 @@ #endif #include +#include // std::unique_ptr #include "genericvector.h" #include "helpers.h" @@ -2446,8 +2447,8 @@ void InitializeRowInfo(bool after_recognition, return; } info->text = ""; - char *text = it.GetUTF8Text(RIL_TEXTLINE); - int trailing_ws_idx = strlen(text); // strip trailing space + const std::unique_ptr text(it.GetUTF8Text(RIL_TEXTLINE)); + int trailing_ws_idx = strlen(text.get()); // strip trailing space while (trailing_ws_idx > 0 && // isspace() only takes ASCII ((text[trailing_ws_idx - 1] & 0x80) == 0) && @@ -2460,7 +2461,6 @@ void InitializeRowInfo(bool after_recognition, for (int i = 0; i < trailing_ws_idx; i++) info->text += text[i]; } - delete []text; if (info->text.size() == 0) { return; diff --git a/ccstruct/ocrblock.cpp b/ccstruct/ocrblock.cpp index ad7893b05a..19f2aecbfd 100644 --- a/ccstruct/ocrblock.cpp +++ b/ccstruct/ocrblock.cpp @@ -18,6 +18,7 @@ **********************************************************************/ #include +#include // std::unique_ptr #include "blckerr.h" #include "ocrblock.h" #include "stepblob.h" @@ -380,9 +381,8 @@ void BLOCK::compute_row_margins() { TBOX row_box = row->bounding_box(); int left_y = row->base_line(row_box.left()) + row->x_height(); int left_margin; - ICOORDELT_LIST *segments = lines.get_line(left_y); - LeftMargin(segments, row_box.left(), &left_margin); - delete segments; + const std::unique_ptr segments_left(lines.get_line(left_y)); + LeftMargin(segments_left.get(), row_box.left(), &left_margin); if (row_box.top() >= drop_cap_bottom) { int drop_cap_distance = row_box.left() - row->space() - drop_cap_right; @@ -394,9 +394,8 @@ void BLOCK::compute_row_margins() { int right_y = row->base_line(row_box.right()) + row->x_height(); int right_margin; - segments = lines.get_line(right_y); - RightMargin(segments, row_box.right(), &right_margin); - delete segments; + const std::unique_ptr segments_right(lines.get_line(right_y)); + RightMargin(segments_right.get(), row_box.right(), &right_margin); row->set_lmargin(left_margin); row->set_rmargin(right_margin); } diff --git a/ccstruct/pdblock.cpp b/ccstruct/pdblock.cpp index cf3289f2e7..4dde3988d0 100644 --- a/ccstruct/pdblock.cpp +++ b/ccstruct/pdblock.cpp @@ -18,6 +18,7 @@ **********************************************************************/ #include +#include // std::unique_ptr #include "allheaders.h" #include "blckerr.h" #include "pdblock.h" @@ -140,9 +141,9 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) { // rasterized interior. (Runs of interior pixels on a line.) PB_LINE_IT *lines = new PB_LINE_IT(&image_block); for (int y = box.bottom(); y < box.top(); ++y) { - ICOORDELT_LIST* segments = lines->get_line(y); + const std::unique_ptr segments(lines->get_line(y)); if (!segments->empty()) { - ICOORDELT_IT s_it(segments); + ICOORDELT_IT s_it(segments.get()); // Each element of segments is a start x and x size of the // run of interior pixels. for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) { @@ -154,7 +155,6 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) { xext, 1, PIX_SET, NULL, 0, 0); } } - delete segments; } delete lines; } else { diff --git a/ccstruct/polyblk.cpp b/ccstruct/polyblk.cpp index b5ca2e1212..bae881b1ff 100644 --- a/ccstruct/polyblk.cpp +++ b/ccstruct/polyblk.cpp @@ -20,6 +20,7 @@ #include #include #include +#include // std::unique_ptr #include "elst.h" #include "polyblk.h" @@ -273,7 +274,6 @@ void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) { inT16 y; inT16 width; PB_LINE_IT *lines; - ICOORDELT_LIST *segments; ICOORDELT_IT s_it; lines = new PB_LINE_IT (this); @@ -281,9 +281,9 @@ void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) { for (y = this->bounding_box ()->bottom (); y <= this->bounding_box ()->top (); y++) { - segments = lines->get_line (y); + const std::unique_ptr segments(lines->get_line (y)); if (!segments->empty ()) { - s_it.set_to_list (segments); + s_it.set_to_list (segments.get()); for (s_it.mark_cycle_pt (); !s_it.cycled_list (); s_it.forward ()) { // Note different use of ICOORDELT, x coord is x coord of pixel // at the start of line segment, y coord is length of line segment diff --git a/textord/scanedg.cpp b/textord/scanedg.cpp index 0430843619..89b7c31a7f 100644 --- a/textord/scanedg.cpp +++ b/textord/scanedg.cpp @@ -19,6 +19,8 @@ #include "scanedg.h" +#include // std::unique_ptr + #include "allheaders.h" #include "edgloop.h" @@ -93,7 +95,6 @@ void make_margins( //get a line inT16 y //line coord ) { PB_LINE_IT *lines; - ICOORDELT_LIST *segments; //bits of a line ICOORDELT_IT seg_it; inT32 start; //of segment inT16 xext; //of segment @@ -101,9 +102,9 @@ void make_margins( //get a line if (block->poly_block () != NULL) { lines = new PB_LINE_IT (block->poly_block ()); - segments = lines->get_line (y); + const std::unique_ptr segments(lines->get_line (y)); if (!segments->empty ()) { - seg_it.set_to_list (segments); + seg_it.set_to_list (segments.get()); seg_it.mark_cycle_pt (); start = seg_it.data ()->x (); xext = seg_it.data ()->y (); @@ -122,7 +123,6 @@ void make_margins( //get a line for (xindex = left; xindex < right; xindex++) pixels[xindex - left] = margin; } - delete segments; delete lines; } else {