diff --git a/tess-two/jni/com_googlecode_tesseract_android/pageiterator.cpp b/tess-two/jni/com_googlecode_tesseract_android/pageiterator.cpp index 402dc015c..45063d140 100644 --- a/tess-two/jni/com_googlecode_tesseract_android/pageiterator.cpp +++ b/tess-two/jni/com_googlecode_tesseract_android/pageiterator.cpp @@ -41,6 +41,29 @@ jboolean Java_com_googlecode_tesseract_android_PageIterator_nativeNext(JNIEnv *e return pageIterator->Next(enumLevel) ? JNI_TRUE : JNI_FALSE; } +jintArray Java_com_googlecode_tesseract_android_PageIterator_nativeBoundingBox(JNIEnv *env, jclass clazz, + jint nativePageIterator, jint level) { + int size = 4; + jintArray result = env->NewIntArray(size); + + LOG_ASSERT((result != NULL), "Could not create Java confidence array!"); + + PageIterator *pageIterator = (PageIterator *) nativePageIterator; + PageIteratorLevel enumLevel = (PageIteratorLevel) level; + int x1, y1, x2, y2; + pageIterator->BoundingBox(enumLevel, &x1, &y1, &x2, &y2); + + // fill a temp structure to use to populate the java int array + jint fill[6]; + fill[0] = x1; + fill[1] = y1; + fill[2] = x2; + fill[3] = y2; + + env->SetIntArrayRegion(result, 0, size, fill); + return result; +} + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp b/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp index 242cf6e81..a53216d92 100644 --- a/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp +++ b/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp @@ -422,6 +422,60 @@ jint Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetResultIterator(J return (jint) nat->api.GetIterator(); } +jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetHOCRText(JNIEnv *env, + jobject thiz, jint page) { + + native_data_t *nat = get_native_data(env, thiz); + + char *text = nat->api.GetHOCRText(page); + + jstring result = env->NewStringUTF(text); + + free(text); + + return result; +} + +jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetBoxText(JNIEnv *env, + jobject thiz, jint page) { + + native_data_t *nat = get_native_data(env, thiz); + + char *text = nat->api.GetBoxText(page); + + jstring result = env->NewStringUTF(text); + + free(text); + + return result; +} + +void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetInputName(JNIEnv *env, + jobject thiz, + jstring name) { + native_data_t *nat = get_native_data(env, thiz); + const char *c_name = env->GetStringUTFChars(name, NULL); + nat->api.SetInputName(c_name); + env->ReleaseStringUTFChars(name, c_name); +} + +void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetOutputName(JNIEnv *env, + jobject thiz, + jstring name) { + native_data_t *nat = get_native_data(env, thiz); + const char *c_name = env->GetStringUTFChars(name, NULL); + nat->api.SetOutputName(c_name); + env->ReleaseStringUTFChars(name, c_name); +} + +void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeReadConfigFile(JNIEnv *env, + jobject thiz, + jstring fileName) { + native_data_t *nat = get_native_data(env, thiz); + const char *c_file_name = env->GetStringUTFChars(fileName, NULL); + nat->api.ReadConfigFile(c_file_name); + env->ReleaseStringUTFChars(fileName, c_file_name); +} #ifdef __cplusplus } #endif diff --git a/tess-two/src/com/googlecode/tesseract/android/PageIterator.java b/tess-two/src/com/googlecode/tesseract/android/PageIterator.java index 5e6a63b2d..1095ea805 100644 --- a/tess-two/src/com/googlecode/tesseract/android/PageIterator.java +++ b/tess-two/src/com/googlecode/tesseract/android/PageIterator.java @@ -60,6 +60,16 @@ public boolean next(int level) { return nativeNext(mNativePageIterator, level); } + /** + * get bounding box x,y,w,h + * @param level + * @return + */ + public int[] getBoundingBox(int level){ + return nativeBoundingBox(mNativePageIterator, level); + } + private static native void nativeBegin(int nativeIterator); private static native boolean nativeNext(int nativeIterator, int level); + private static native int[] nativeBoundingBox(int nativeIterator, int level); } diff --git a/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java b/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java index dd90394ca..a4e337fa8 100644 --- a/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java +++ b/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java @@ -486,6 +486,56 @@ public ResultIterator getResultIterator() { return new ResultIterator(nativeResultIterator); } + /** + * + * Make a HTML-formatted string with hOCR markup from the internal data + * structures. + * GetHOCRText STL removed from original patch submission and refactored by + * rays. + * Warning make sure call {@link #setInputName} before this method + * @param page is 0-based but will appear in the output as 1-based. + * @return HTML-formatted string with hOCR markup + */ + public String getHOCRText(int page){ + return nativeGetHOCRText(page); + } + + /** + * Set the name of the input file. Needed only for training and + * loading a UNLV zone file. + * @param name input file name + */ + public void setInputName(String name){ + nativeSetInputName(name); + } + + /** Set the name of the output files. + * Needed only for debugging. + * @param name output file name + */ + public void setOutputName(String name){ + nativeSetOutputName(name); + } + + /** + * Read a "config" file containing a set of parameter name, value pairs. + * Searches the standard places: tessdata/configs, tessdata/tessconfigs + * and also accepts a relative or absolute path name. + * @param filename the configuration file name, without path, should be place in tessdata/configs, tessdata/tessconfigs + */ + public void ReadConfigFile(String filename){ + nativeReadConfigFile(filename); + } + + /** + * The recognized text is returned which is coded + * as a UTF8 box file. + * @param page is a 0-base page index that will appear in the box file. + */ + public String getBoxText(int page){ + return nativeGetBoxText(page); + } + // ****************** // * Native methods * // ****************** @@ -543,4 +593,14 @@ private native void nativeSetImageBytes( private native int nativeGetWords(); private native int nativeGetResultIterator(); + + private native String nativeGetBoxText(int page_number); + + private native String nativeGetHOCRText(int page_number); + + private native void nativeSetInputName(String name); + + private native void nativeSetOutputName(String name); + + private native void nativeReadConfigFile(String fileName); }