From fb863c97a9c8e9c05874af6f6fbd77923cc6f6e9 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 19 May 2017 20:56:55 +0200 Subject: [PATCH] UNICHARSET: Add missing initialization The member variable default_sid_ was used without being initialized. Valgrind report for `tesseract --oem 1 hello.png hello`: Conditional jump or move depends on uninitialised value(s) at 0x14352E: BITS16::set_bit(unsigned char, unsigned char) (bits16.h:50) by 0x143E27: WERD::set_flag(WERD_FLAGS, unsigned char) (werd.h:129) by 0x27D053: WERD_RES::SetupWordScript(UNICHARSET const&) (pageres.cpp:381) by 0x27CAFD: WERD_RES::SetupForRecognition(UNICHARSET const&, tesseract::Tesseract*, Pix*, int, TBOX const*, bool, bool, bool, ROW*, BLOCK const*) (pageres.cpp:316) by 0x145903: tesseract::Tesseract::SetupWordPassN(int, tesseract::WordData*) (control.cpp:182) by 0x145780: tesseract::Tesseract::SetupAllWordsPassN(int, TBOX const*, char const*, PAGE_RES*, GenericVector*) (control.cpp:168) by 0x146293: tesseract::Tesseract::recog_all_words(PAGE_RES*, ETEXT_DESC*, TBOX const*, char const*, int) (control.cpp:336) by 0x12F356: tesseract::TessBaseAPI::Recognize(ETEXT_DESC*) (baseapi.cpp:878) by 0x13036D: tesseract::TessBaseAPI::ProcessPage(Pix*, int, char const*, char const*, int, tesseract::TessResultRenderer*) (baseapi.cpp:1184) by 0x13014A: tesseract::TessBaseAPI::ProcessPagesInternal(char const*, char const*, int, tesseract::TessResultRenderer*) (baseapi.cpp:1140) by 0x12FBCE: tesseract::TessBaseAPI::ProcessPages(char const*, char const*, int, tesseract::TessResultRenderer*) (baseapi.cpp:1040) by 0x12C3DF: main (tesseractmain.cpp:515) Uninitialised value was created by a heap allocation at 0x4C2C21F: operator new(unsigned long) (vg_replace_malloc.c:334) by 0x12D88B: tesseract::TessBaseAPI::Init(char const*, int, char const*, tesseract::OcrEngineMode, char**, int, GenericVector const*, GenericVector const*, bool, bool (*)(STRING const&, GenericVector*)) (baseapi.cpp:320) by 0x12D6DA: tesseract::TessBaseAPI::Init(char const*, char const*, tesseract::OcrEngineMode, char**, int, GenericVector const*, GenericVector const*, bool) (baseapi.cpp:284) by 0x12C088: main (tesseractmain.cpp:440) Signed-off-by: Stefan Weil --- ccutil/unicharset.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ccutil/unicharset.h b/ccutil/unicharset.h index 716147eebd..a2e4e3b76d 100644 --- a/ccutil/unicharset.h +++ b/ccutil/unicharset.h @@ -292,6 +292,7 @@ class UNICHARSET { katakana_sid_ = 0; thai_sid_ = 0; hangul_sid_ = 0; + default_sid_ = 0; } // Return the size of the set (the number of different UNICHAR it holds).