diff --git a/README.md b/README.md index c7936739a..a4e426717 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,9 @@ native code: -keep class com.googlecode.tesseract.android.PageIterator { private long mNativePageIterator; } +-keep class com.googlecode.tesseract.android.PdfRenderer { + private long mNativePdfRenderer; +} -keep class com.googlecode.tesseract.android.ResultIterator { private long mNativeResultIterator; } diff --git a/runtests.sh b/runtests.sh index 304949733..b0137e927 100755 --- a/runtests.sh +++ b/runtests.sh @@ -13,5 +13,6 @@ adb push -p tesseract-ocr/tessdata/eng.cube.params /mnt/sdcard/tesseract/tessdat adb push -p tesseract-ocr/tessdata/eng.cube.size /mnt/sdcard/tesseract/tessdata adb push -p tesseract-ocr/tessdata/eng.cube.word-freq /mnt/sdcard/tesseract/tessdata adb push -p tesseract-ocr/tessdata/eng.tesseract_cube.nn /mnt/sdcard/tesseract/tessdata +adb push -p tess-two/jni/com_googlecode_tesseract_android/src/tessdata/pdf.ttf /mnt/sdcard/tesseract/tessdata adb shell am instrument -w com.googlecode.tesseract.android.test/android.test.InstrumentationTestRunner diff --git a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java index b8f1ac5c2..676f0881b 100644 --- a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java +++ b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java @@ -43,10 +43,10 @@ import com.googlecode.tesseract.android.TessBaseAPI.ProgressValues; public class TessBaseAPITest extends TestCase { - private static final String TESSBASE_PATH = "/mnt/sdcard/tesseract/"; - private static final String DEFAULT_LANGUAGE = "eng"; - private static final String TESSDATA_PATH = TESSBASE_PATH + "tessdata/"; - private static final String[] EXPECTED_CUBE_DATA_FILES_ENG = { + static final String TESSBASE_PATH = "/mnt/sdcard/tesseract/"; + static final String DEFAULT_LANGUAGE = "eng"; + static final String TESSDATA_PATH = TESSBASE_PATH + "tessdata/"; + static final String[] EXPECTED_CUBE_DATA_FILES_ENG = { "eng.cube.bigrams", "eng.cube.fold", "eng.cube.lm", diff --git a/tess-two-test/src/com/googlecode/tesseract/android/test/TessPdfRendererTest.java b/tess-two-test/src/com/googlecode/tesseract/android/test/TessPdfRendererTest.java new file mode 100644 index 000000000..1ea56fcdd --- /dev/null +++ b/tess-two-test/src/com/googlecode/tesseract/android/test/TessPdfRendererTest.java @@ -0,0 +1,124 @@ +/* + * Copyright 2015 Robert Theis + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.googlecode.tesseract.android.test; + +import java.io.File; +import java.io.IOException; + +import junit.framework.TestCase; +import android.graphics.Bitmap; +import android.graphics.Canvas; +import android.graphics.Color; +import android.graphics.Paint; +import android.graphics.Paint.Align; +import android.graphics.Paint.Style; +import android.test.suitebuilder.annotation.SmallTest; + +import com.googlecode.leptonica.android.Pix; +import com.googlecode.leptonica.android.ReadFile; +import com.googlecode.leptonica.android.WriteFile; +import com.googlecode.tesseract.android.TessBaseAPI; +import com.googlecode.tesseract.android.TessPdfRenderer; + +public class TessPdfRendererTest extends TestCase { + + private final static String OUTPUT_PATH = "/mnt/sdcard/"; + + @SmallTest + public void testCreate() { + // Attempt to initialize the API. + final TessBaseAPI baseApi = new TessBaseAPI(); + baseApi.init(TessBaseAPITest.TESSBASE_PATH, + TessBaseAPITest.DEFAULT_LANGUAGE); + + String pdfBasename = "testCreate"; + + // Attempt to create a TessPdfRenderer instance. + TessPdfRenderer pdfRenderer = new TessPdfRenderer(baseApi, OUTPUT_PATH + + pdfBasename); + + pdfRenderer.recycle(); + baseApi.end(); + } + + @SmallTest + public void testAddPageToDocument() throws IOException { + // Attempt to initialize the API. + final TessBaseAPI baseApi = new TessBaseAPI(); + baseApi.init(TessBaseAPITest.TESSBASE_PATH, + TessBaseAPITest.DEFAULT_LANGUAGE); + + String pdfBasename = "testAddPageToDocument"; + + // Attempt to create a TessPdfRenderer instance. + TessPdfRenderer pdfRenderer = new TessPdfRenderer(baseApi, OUTPUT_PATH + + pdfBasename); + + // Start the PDF writing process. + boolean beginSuccess = baseApi.beginDocument(pdfRenderer); + // assertTrue(beginSuccess); + + // Add a page to the PDF. + final Pix pixOne = getTextImage("page one", 640, 480); + final File fileOne = File.createTempFile("testPageOne", ".png"); + WriteFile.writeImpliedFormat(pixOne, fileOne); + boolean addedPageOne = baseApi.addPageToDocument(pixOne, + fileOne.getAbsolutePath(), pdfRenderer); + assertTrue(addedPageOne); + + // Add a second page. + final Pix pixTwo = getTextImage("page two", 640, 480); + final File fileTwo = File.createTempFile("testPageTwo", ".png"); + WriteFile.writeImpliedFormat(pixTwo, fileTwo); + boolean addedPageTwo = baseApi.addPageToDocument(pixTwo, + fileTwo.getAbsolutePath(), pdfRenderer); + assertTrue(addedPageTwo); + + // Finish writing to the PDF document. + boolean endSuccess = baseApi.endDocument(pdfRenderer); + assertTrue(endSuccess); + + // Ensure that a PDF file was created. + File pdf = new File(OUTPUT_PATH + pdfBasename + ".pdf"); + assertTrue(pdf.isFile()); + assertTrue(pdf.length() > 0); + + pdfRenderer.recycle(); + baseApi.end(); + pixOne.recycle(); + pixTwo.recycle(); + } + + private static Pix getTextImage(String text, int width, int height) { + final Bitmap bmp = Bitmap.createBitmap(width, height, + Bitmap.Config.ARGB_8888); + final Paint paint = new Paint(); + final Canvas canvas = new Canvas(bmp); + + canvas.drawColor(Color.WHITE); + + paint.setColor(Color.BLACK); + paint.setStyle(Style.FILL); + paint.setAntiAlias(true); + paint.setTextAlign(Align.CENTER); + paint.setTextSize(24.0f); + canvas.drawText(text, width / 2, height / 2, paint); + + return ReadFile.readBitmap(bmp); + } + +} \ No newline at end of file diff --git a/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp b/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp index 8e10feb8d..8a165cf4e 100644 --- a/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp +++ b/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp @@ -22,6 +22,7 @@ #include "baseapi.h" #include "ocrclass.h" #include "allheaders.h" +#include "renderer.h" static jfieldID field_mNativeData; static jmethodID method_onProgressValues; @@ -587,6 +588,63 @@ void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeReadConfigFile(JNIE nat->api.ReadConfigFile(c_file_name); env->ReleaseStringUTFChars(fileName, c_file_name); } + +jlong Java_com_googlecode_tesseract_android_TessPdfRenderer_nativeCreate(JNIEnv *env, + jobject thiz, + jobject jTessBaseApi, + jstring outputPath) { + native_data_t *nat = get_native_data(env, jTessBaseApi); + const char *c_output_path = env->GetStringUTFChars(outputPath, NULL); + + tesseract::TessPDFRenderer* result = new tesseract::TessPDFRenderer(c_output_path, nat->api.GetDatapath()); + + env->ReleaseStringUTFChars(outputPath, c_output_path); + + return (jlong) result; +} + +void Java_com_googlecode_tesseract_android_TessPdfRenderer_nativeRecycle(JNIEnv *env, + jobject thiz, + jlong jPointer) { + tesseract::TessPDFRenderer* renderer = (tesseract::TessPDFRenderer*) jPointer; + delete renderer; +} + +jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeBeginDocument(JNIEnv *env, + jobject thiz, + jlong jRenderer) { + + tesseract::TessPDFRenderer* pdfRenderer = (tesseract::TessPDFRenderer*) jRenderer; + return pdfRenderer->BeginDocument(""); +} + +jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeEndDocument(JNIEnv *env, + jobject thiz, + jlong jRenderer) { + + tesseract::TessPDFRenderer* pdfRenderer = (tesseract::TessPDFRenderer*) jRenderer; + return pdfRenderer->EndDocument(); +} + +jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeAddPageToDocument(JNIEnv *env, + jobject thiz, + jlong jPix, + jstring jPath, + jlong jRenderer) { + + tesseract::TessPDFRenderer* pdfRenderer = (tesseract::TessPDFRenderer*) jRenderer; + + native_data_t *nat = get_native_data(env, thiz); + PIX* pix = (PIX*) jPix; + const char *inputImage = env->GetStringUTFChars(jPath, NULL); + + nat->api.ProcessPage(pix, 0, inputImage, NULL, 0, pdfRenderer); + + env->ReleaseStringUTFChars(jPath, inputImage); + + return true; +} + #ifdef __cplusplus } #endif diff --git a/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java b/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java index 9d427838f..f6275cda9 100644 --- a/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java +++ b/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java @@ -853,6 +853,40 @@ protected void onProgressValues(final int percent, final int left, } } + /** + * Starts a new document. This clears the contents of the output data. + * + * @return {@code true} on success. {@code false} on failure + */ + public boolean beginDocument(TessPdfRenderer tessPdfRenderer) { + return nativeBeginDocument(tessPdfRenderer.getNativePdfRenderer()); + } + + /** + * Finishes the document and finalizes the output data. + * Invalid if beginDocument not yet called. + * + * @return {@code true} on success. {@code false} on failure + */ + public boolean endDocument(TessPdfRenderer tessPdfRenderer) { + return nativeEndDocument(tessPdfRenderer.getNativePdfRenderer()); + } + + /** + * Adds the given data to the opened document (if any). + * + * @param imageToProcess image to be used for OCR + * @param imageToWrite path to image to be written into resulting document + * + * @return {@code true} on success. {@code false} on failure + */ + public boolean addPageToDocument(Pix imageToProcess, String imageToWrite, + TessPdfRenderer tessPdfRenderer) { + return nativeAddPageToDocument(imageToProcess.getNativePix(), + imageToWrite, tessPdfRenderer.getNativePdfRenderer()); + } + + // ****************** // * Native methods * // ****************** @@ -927,4 +961,10 @@ private native void nativeSetImageBytes( private native void nativeReadConfigFile(String fileName); private native int nativeStop(); + + private native boolean nativeBeginDocument(long rendererPointer); + + private native boolean nativeEndDocument(long rendererPointer); + + private native boolean nativeAddPageToDocument(long nativePix, String imagePath, long rendererPointer); } diff --git a/tess-two/src/com/googlecode/tesseract/android/TessPdfRenderer.java b/tess-two/src/com/googlecode/tesseract/android/TessPdfRenderer.java new file mode 100644 index 000000000..a60fe0058 --- /dev/null +++ b/tess-two/src/com/googlecode/tesseract/android/TessPdfRenderer.java @@ -0,0 +1,73 @@ +/* + * Copyright 2015 Robert Theis + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.googlecode.tesseract.android; + +/** + * Java representation of a native Tesseract PDF renderer + */ +public class TessPdfRenderer { + + /** + * Used by the native implementation of the class. + */ + private final long mNativePdfRenderer; + + static { + System.loadLibrary("tess"); + } + + private boolean mRecycled; + + /** + * Constructs an instance of a Tesseract PDF renderer. + * + * When the instance of TessPdfRenderer is no longer needed, its + * {@link #recycle} method must be invoked to dispose of it. + * + * @param baseApi API instance to use for performing OCR + * @param outputPath Full path to write the resulting PDF to, not + * including the ".pdf" extension + */ + public TessPdfRenderer(TessBaseAPI baseApi, String outputPath) { + this.mNativePdfRenderer = nativeCreate(baseApi, outputPath); + mRecycled = false; + } + + /** + * @return A pointer to the native TessPdfRenderer object. + */ + public long getNativePdfRenderer() { + if (mRecycled) + throw new IllegalStateException(); + + return mNativePdfRenderer; + } + + /** + * Releases resources and frees any memory associated with this + * TessPdfRenderer object. Must be called on object destruction. + */ + public void recycle() { + nativeRecycle(mNativePdfRenderer); + mRecycled = true; + } + + private static native long nativeCreate(TessBaseAPI tessBaseAPI, String outputPath); + + private static native void nativeRecycle(long nativePointer); + +} \ No newline at end of file