From 36ed6da3499c93c2d04de29ee2f02f6d9975a1fe Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 16 May 2019 07:09:27 +0200 Subject: [PATCH] Fix baseapi_test with locale de_DE.UTF-8 The unittest failed with LANG=de_DE.UTF-8: $ unittest/baseapi_test Running main() from ../../../../unittest/../googletest/googletest/src/gtest_main.cc [==========] Running 12 tests from 2 test suites. [----------] Global test environment set-up. [----------] 10 tests from TesseractTest [ RUN ] TesseractTest.ArraySizeTest [ OK ] TesseractTest.ArraySizeTest (0 ms) [ RUN ] TesseractTest.BasicTesseractTest [ OK ] TesseractTest.BasicTesseractTest (1251 ms) [ RUN ] TesseractTest.IteratesParagraphsEvenIfNotDetected [ OK ] TesseractTest.IteratesParagraphsEvenIfNotDetected (347 ms) [ RUN ] TesseractTest.HOCRWorksWithoutSetInputName [ OK ] TesseractTest.HOCRWorksWithoutSetInputName (403 ms) [ RUN ] TesseractTest.HOCRContainsBaseline [ OK ] TesseractTest.HOCRContainsBaseline (389 ms) [ RUN ] TesseractTest.RickSnyderNotFuckSnyder [ OK ] TesseractTest.RickSnyderNotFuckSnyder (346 ms) [ RUN ] TesseractTest.AdaptToWordStrTest Trying to adapt "136 " to "1 3 6" Trying to adapt "256 " to "2 5 6" Trying to adapt "410 " to "4 1 0" Trying to adapt "432 " to "4 3 2" Trying to adapt "540 " to "5 4 0" Trying to adapt "692 " to "6 9 2" Trying to adapt "779 " to "7 7 9" Trying to adapt "793 " to "7 9 3" Trying to adapt "808 " to "8 0 8" Trying to adapt "815 " to "8 1 5" Trying to adapt "12 " to "1 2" Trying to adapt "12 " to "1 2" [ OK ] TesseractTest.AdaptToWordStrTest (788 ms) [ RUN ] TesseractTest.BasicLSTMTest [ OK ] TesseractTest.BasicLSTMTest (4525 ms) [ RUN ] TesseractTest.LSTMGeometryTest [ OK ] TesseractTest.LSTMGeometryTest (615 ms) [ RUN ] TesseractTest.InitConfigOnlyTest Error: unichar ? in normproto file is not in unichar set. Error: unichar 0.232621 in normproto file is not in unichar set. Error: unichar 0.000400 in normproto file is not in unichar set. Error: unichar 0.231864 in normproto file is not in unichar set. [...] Error: unichar ? in normproto file is not in unichar set. Error: unichar 0.233915 in normproto file is not in unichar set. Error: unichar 0.000400 in normproto file is not in unichar set. Error: unichar 0.221755 in normproto file is not in unichar set. Error: unichar 0.000400 in normproto file is not in unichar set. Error: unichar ? in normproto file is not in unichar set. baseapi_test(21845,0x1134c45c0) malloc: *** error for object 0x927f96c28005e0: pointer being freed was not allocated baseapi_test(21845,0x1134c45c0) malloc: *** set a breakpoint in malloc_error_break to debug [INFO] Lang eng took 327ms in regular init [INFO] Lang chi_tra took 1422ms in regular init Abort trap: 6 TesseractTest.InitConfigOnlyTest is fixed by using std::istringstream instead of sscanf. Signed-off-by: Stefan Weil --- src/classify/normmatch.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/classify/normmatch.cpp b/src/classify/normmatch.cpp index 4efaf5cb7b..2c14c50f3c 100644 --- a/src/classify/normmatch.cpp +++ b/src/classify/normmatch.cpp @@ -21,6 +21,7 @@ #include #include +#include // for std::istringstream #include "classify.h" #include "clusttool.h" @@ -113,7 +114,7 @@ float Classify::ComputeNormMatch(CLASS_ID ClassId, feature.Params[CharNormRx] * 8000.0 + feature.Params[CharNormRy] * feature.Params[CharNormRy] * 8000.0); - return (1.0 - NormEvidenceOf (Match)); + return (1.0 - NormEvidenceOf(Match)); } BestMatch = FLT_MAX; @@ -209,7 +210,11 @@ NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) { const int kMaxLineSize = 100; char line[kMaxLineSize]; while (fp->FGets(line, kMaxLineSize) != nullptr) { - if (sscanf(line, "%s %d", unichar, &NumProtos) != 2) continue; + std::istringstream stream(line); + stream >> unichar >> NumProtos; + if (stream.fail()) { + continue; + } if (unicharset.contains_unichar(unichar)) { unichar_id = unicharset.unichar_to_id(unichar); Protos = NormProtos->Protos[unichar_id];