From bc95798e011a39acf9778b95c8d8c5847774cc47 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Wed, 7 Dec 2016 13:21:05 -0800 Subject: [PATCH] Implement a new orientation and script detection API for C and C++ See issue #424. The existing C API for TessBaseAPIDetectOS requires a C caller to successfully allocate struct OSResults which is actually a C++ class. Generally it won't be possible for a regular C compiler to do this properly. It's also assumed that most API level users of Tesseract are only interested in Tesseract's best guess as to script and orientation, not the individual values for all possible scripts. This introduces a new API with a better name that is more closely aligned with the output of 'tesseract -psm 0'. Both tesseract -psm 0 and this API now share the same code in baseapi.cpp. --- api/baseapi.cpp | 37 +++++++++++++++++++++++++------------ api/baseapi.h | 10 ++++++++++ api/capi.cpp | 31 ++++++++++++++++++++++++++++++- api/capi.h | 7 +++++++ 4 files changed, 72 insertions(+), 13 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index b24742435f..129406a22a 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1891,31 +1891,44 @@ char* TessBaseAPI::GetUNLVText() { return result; } -/** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * page_number is a 0-based page index that will appear in the osd file. - */ -char* TessBaseAPI::GetOsdText(int page_number) { +bool TessBaseAPI::DetectOrientationScript(int& orient_deg, float& orient_conf, std::string& script, float& script_conf) { OSResults osr; bool osd = DetectOS(&osr); if (!osd) { - return NULL; + return false; } int orient_id = osr.best_result.orientation_id; int script_id = osr.get_best_script(orient_id); - float orient_conf = osr.best_result.oconfidence; - float script_conf = osr.best_result.sconfidence; + orient_conf = osr.best_result.oconfidence; + script_conf = osr.best_result.sconfidence; const char* script_name = osr.unicharset->get_script_from_script_id(script_id); // clockwise orientation of the input image, in degrees - int orient_deg = orient_id * 90; + orient_deg = orient_id * 90; + + script = script_name; + return true; +} + +/** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ +char* TessBaseAPI::GetOsdText(int page_number) { + int orient_deg; + float orient_conf; + std::string script_name; + float script_conf; + + if (!DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf)) + return NULL; // clockwise rotation needed to make the page upright - int rotate = OrientationIdToValue(orient_id); + int rotate = OrientationIdToValue(orient_deg / 90); const int kOsdBufsize = 255; char* osd_buf = new char[kOsdBufsize]; @@ -1926,7 +1939,7 @@ char* TessBaseAPI::GetOsdText(int page_number) { "Orientation confidence: %.2f\n" "Script: %s\n" "Script confidence: %.2f\n", - page_number, orient_deg, rotate, orient_conf, script_name, + page_number, orient_deg, rotate, orient_conf, script_name.c_str(), script_conf); return osd_buf; diff --git a/api/baseapi.h b/api/baseapi.h index 2e1bb8f30c..14161409c5 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -26,6 +26,7 @@ (patch)) #include +#include // To avoid collision with other typenames include the ABSOLUTE MINIMUM // complexity of includes here. Use forward declarations wherever possible // and hide includes of complex types in baseapi.cpp. @@ -618,6 +619,15 @@ class TESS_API TessBaseAPI { */ char* GetUNLVText(); + /** + * Detect the orientation of the input image and apparent script (alphabet). + * orient_deg is the detected clockwise rotation of the input image + * orient_conf is the confidence (15.0 is reasonable) + * script is an ASCII string, the name of the script, e.g. "Latin" + * script_conf is confidence level in the script + */ + bool DetectOrientationScript(int& orient_deg, float& orient_conf, std::string& script, float& script_conf); + /** * The recognized text is returned as a char* which is coded * as UTF8 and must be freed with the delete [] operator. diff --git a/api/capi.cpp b/api/capi.cpp index 849d296104..7e1fe3665a 100644 --- a/api/capi.cpp +++ b/api/capi.cpp @@ -538,9 +538,38 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results) { - return handle->DetectOS(results) ? TRUE : FALSE; + return FALSE; // Unsafe ABI, return FALSE always } +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name, + int* best_orientation_deg, float* script_confidence, + float* orientation_confidence) +{ + int orient_deg; + float orient_conf; + std::string script_name; + float script_conf; + BOOL success; + + success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf); + if (!success) + return FALSE; + if (best_script_name) { + *best_script_name = new char [script_name.length() + 1]; + strcpy(*best_script_name, script_name.c_str()); + } + + if (best_orientation_deg) + *best_orientation_deg = orient_deg; + if (script_confidence) + *script_confidence = script_conf; + if (orientation_confidence) + *orientation_confidence = orient_conf; + + return TRUE; +} + + TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, int* num_features, int* FeatureOutlineIndex) { diff --git a/api/capi.h b/api/capi.h index a0c54a20e4..be51454f78 100644 --- a/api/capi.h +++ b/api/capi.h @@ -285,8 +285,15 @@ TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle); TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f); TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f); + +// Deprecated, no longer working TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results); +// Call TessDeleteText(*best_script_name) to free memory allocated by this function +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name, + int* best_orientation_deg, float* script_confidence, + float* orientation_confidence); + TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, int* num_features, int* FeatureOutlineIndex);