From 68f6321e3d7e2dda1ab60d3b4eba01d285de1908 Mon Sep 17 00:00:00 2001 From: Xyresic Date: Tue, 6 Dec 2016 03:13:08 -0800 Subject: [PATCH 1/4] Issue 185 --- .../android/test/TessBaseAPITest.java | 128 ++++++++++++------ .../tessbaseapi.cpp | 6 - .../tesseract/android/TessBaseAPI.java | 7 +- 3 files changed, 92 insertions(+), 49 deletions(-) diff --git a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java index d158bf044..b7898568c 100644 --- a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java +++ b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java @@ -16,13 +16,6 @@ package com.googlecode.tesseract.android.test; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.List; - -import junit.framework.TestCase; - import android.annotation.SuppressLint; import android.graphics.Bitmap; import android.graphics.Bitmap.CompressFormat; @@ -32,8 +25,10 @@ import android.graphics.Paint.Align; import android.graphics.Paint.Style; import android.graphics.Rect; +import android.os.AsyncTask; import android.test.suitebuilder.annotation.SmallTest; import android.text.Html; +import android.util.Log; import android.util.Pair; import com.googlecode.leptonica.android.Pix; @@ -44,6 +39,13 @@ import com.googlecode.tesseract.android.TessBaseAPI.ProgressNotifier; import com.googlecode.tesseract.android.TessBaseAPI.ProgressValues; +import junit.framework.TestCase; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.List; + public class TessBaseAPITest extends TestCase { @SuppressLint("SdCardPath") static final String TESSBASE_PATH = "/sdcard/tesseract/"; @@ -131,19 +133,47 @@ public void testChoiceIterator() { private static Bitmap getTextImage(String text, int width, int height) { final Bitmap bmp = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888); - final Paint paint = new Paint(); - final Canvas canvas = new Canvas(bmp); + final Canvas canvas = new Canvas(bmp); canvas.drawColor(Color.WHITE); + drawTextNewLines(text, canvas); + return bmp; + } + + /** + * Draws text (with newlines) centered onto the canvas. If the text does not fit horizontally, + * it will be cut off. If the text does not fit vertically, the start of the text will be at + * the top of the image and whatever not fitting onto the image being cut off. If the text + * fits vertically it will be centered vertically. + * + * @param text String to draw onto the canvas + * @param canvas Canvas to draw text onto + */ + private static void drawTextNewLines(String text, Canvas canvas){ + final Paint paint = new Paint(); paint.setColor(Color.BLACK); paint.setStyle(Style.FILL); paint.setAntiAlias(true); paint.setTextAlign(Align.CENTER); paint.setTextSize(24.0f); - canvas.drawText(text, width / 2, height / 2, paint); - return bmp; + String[] textArray = text.split("\n"); + int width = canvas.getWidth(); + int height = canvas.getHeight(); + int count = textArray.length; + int lineSize = (int) (paint.descent() - paint.ascent()); + int maxLinesToPushUp = height / lineSize; + maxLinesToPushUp = count < maxLinesToPushUp ? count : maxLinesToPushUp; + int pixelsToPushUp = (maxLinesToPushUp - 1) / 2 * lineSize; + + int x = width / 2; + int y = (height / 2) - pixelsToPushUp; + + for (String line : textArray){ + canvas.drawText(line, x, y, paint); + y += lineSize; + } } @SmallTest @@ -687,36 +717,52 @@ public void testSetVariable() { bmp.recycle(); } - // @SmallTest - // public void testStop() throws InterruptedException { - // final TessBaseAPI baseApi = new TessBaseAPI(); - // final String inputText = "The quick brown fox jumps over the lazy dog."; - // final Bitmap bmp = getTextImage(inputText, 640, 480); - // - // boolean success = baseApi.init(TESSBASE_PATH, DEFAULT_LANGUAGE); - // assertTrue(success); - // - // baseApi.setImage(bmp); - // - // class LoopingRecognitionTask extends AsyncTask { - // - // @Override - // protected Void doInBackground(Void... params) { - // while (true) - // baseApi.getUTF8Text(); - // } - // } - // - // LoopingRecognitionTask task = new LoopingRecognitionTask(); - // task.execute(); - // - // Thread.sleep(200); - // - // baseApi.stop(); - // - // baseApi.end(); - // bmp.recycle(); - // } + @SmallTest + public void testStop() throws InterruptedException { + + StringBuilder inputTextBuilder = new StringBuilder(); + for (int i = 0; i < 200; i++){ + inputTextBuilder.append("The quick brown fox jumps over the lazy dog.\n"); + } + final Bitmap bmp = getTextImage(inputTextBuilder.toString(), 640, 4000); + + final Object progressLock = new Object(); + + final TessBaseAPI baseApi = new TessBaseAPI(new ProgressNotifier() { + @Override + public void onProgressValues(ProgressValues progressValues) { + Log.d("TEST", "Progress: " + progressValues.getPercent()); + if (progressValues.getPercent() > 1){ + synchronized (progressLock){ + progressLock.notify(); + } + } + } + }); + + class LongRecognitionTask extends AsyncTask { + @Override + protected Void doInBackground(Void... params) { + baseApi.getHOCRText(0); + return null; + } + } + + boolean success = baseApi.init(TESSBASE_PATH, DEFAULT_LANGUAGE); + assertTrue(success); + baseApi.setImage(bmp); + + LongRecognitionTask task = new LongRecognitionTask(); + task.execute(); + + synchronized (progressLock){ + progressLock.wait(); + } + + baseApi.stop(); + baseApi.end(); + bmp.recycle(); + } @SmallTest public void testWordConfidences() { diff --git a/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp b/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp index 33103a985..b1c25c4d0 100644 --- a/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp +++ b/tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp @@ -296,12 +296,6 @@ jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetUTF8Text(JNIE native_data_t *nat = (native_data_t*) mNativeData; nat->initStateVariables(env, &thiz); - ETEXT_DESC monitor; - monitor.progress_callback = progressJavaCallback; - monitor.cancel = cancelFunc; - monitor.cancel_this = nat; - monitor.progress_this = nat; - char *text = nat->api.GetUTF8Text(); jstring result = env->NewStringUTF(text); diff --git a/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java b/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java index 5ee1eb983..b3cf14596 100644 --- a/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java +++ b/tess-two/src/com/googlecode/tesseract/android/TessBaseAPI.java @@ -561,6 +561,9 @@ public void setImage(byte[] imagedata, int width, int height, int bpp, int bpl) /** * The recognized text is returned as a String which is coded as UTF8. + * This is a blocking operation that will not work with {@link #stop()}. + * Call {@link #getHOCRText(int)} before calling this function to + * interrupt a recognition task with {@link #stop()} * * @return the recognized text */ @@ -718,7 +721,7 @@ public ResultIterator getResultIterator() { /** * Make a HTML-formatted string with hOCR markup from the internal data - * structures. + * structures. Interruptible by {@link #stop()}. * * @param page is 0-based but will appear in the output as 1-based. * @return HTML-formatted string with hOCR markup @@ -796,7 +799,7 @@ public String getVersion() { } /** - * Cancel any recognition in progress. + * Cancel recognition started by {@link #getHOCRText(int)}. */ public void stop() { if (mRecycled) From c61cb6457d42bc2228b77f20fac46d3af54c80e1 Mon Sep 17 00:00:00 2001 From: Xyresic Date: Tue, 6 Dec 2016 03:27:13 -0800 Subject: [PATCH 2/4] Fix getHOCRText() crashing I believe why calling stop() and end() right after each other was crashing the program. It's because getHOCRText() will try to return the recognition done up to that point - but if we end() before that finishes, we will hit problems since end() cleans up TessBaseAPI --- .../tesseract/android/test/TessBaseAPITest.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java index b7898568c..e60132890 100644 --- a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java +++ b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java @@ -28,7 +28,6 @@ import android.os.AsyncTask; import android.test.suitebuilder.annotation.SmallTest; import android.text.Html; -import android.util.Log; import android.util.Pair; import com.googlecode.leptonica.android.Pix; @@ -727,11 +726,9 @@ public void testStop() throws InterruptedException { final Bitmap bmp = getTextImage(inputTextBuilder.toString(), 640, 4000); final Object progressLock = new Object(); - final TessBaseAPI baseApi = new TessBaseAPI(new ProgressNotifier() { @Override public void onProgressValues(ProgressValues progressValues) { - Log.d("TEST", "Progress: " + progressValues.getPercent()); if (progressValues.getPercent() > 1){ synchronized (progressLock){ progressLock.notify(); @@ -744,6 +741,9 @@ class LongRecognitionTask extends AsyncTask { @Override protected Void doInBackground(Void... params) { baseApi.getHOCRText(0); + synchronized (progressLock){ + progressLock.notify(); + } return null; } } @@ -755,11 +755,20 @@ protected Void doInBackground(Void... params) { LongRecognitionTask task = new LongRecognitionTask(); task.execute(); + // Wait for recognition to start synchronized (progressLock){ progressLock.wait(); } baseApi.stop(); + + // Wait for getHOCRText() to complete (it will return recognition done up to that point), + // otherwise we may end() and recycle baseApi before getHOCRText() finishes returning the + // data and cause an exception + synchronized (progressLock){ + progressLock.wait(); + } + baseApi.end(); bmp.recycle(); } From c947584ef960053de0600e22d9bd246241d938cf Mon Sep 17 00:00:00 2001 From: Xyresic Date: Tue, 6 Dec 2016 03:41:23 -0800 Subject: [PATCH 3/4] Fail if OCR happens too fast --- .../tesseract/android/test/TessBaseAPITest.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java index e60132890..55f117ae6 100644 --- a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java +++ b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java @@ -729,6 +729,9 @@ public void testStop() throws InterruptedException { final TessBaseAPI baseApi = new TessBaseAPI(new ProgressNotifier() { @Override public void onProgressValues(ProgressValues progressValues) { + if (progressValues.getPercent() > 50){ + fail("OCR recognition was too fast, try to increase the image size and amount of text?"); + } if (progressValues.getPercent() > 1){ synchronized (progressLock){ progressLock.notify(); @@ -762,9 +765,8 @@ protected Void doInBackground(Void... params) { baseApi.stop(); - // Wait for getHOCRText() to complete (it will return recognition done up to that point), - // otherwise we may end() and recycle baseApi before getHOCRText() finishes returning the - // data and cause an exception + // Wait for getHOCRText() to complete, otherwise we may end() and recycle baseApi before + // getHOCRText() finishes execution on the AsyncTask thread and cause an exception synchronized (progressLock){ progressLock.wait(); } From 33175da51af00580da3b0462c0a59ebf4ef5f975 Mon Sep 17 00:00:00 2001 From: Xyresic Date: Tue, 6 Dec 2016 05:11:33 -0800 Subject: [PATCH 4/4] Should use a semaphore instead of locks. Subtle bug in that there is no guarantee that wait() will be called before notify(). --- .../android/test/TessBaseAPITest.java | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java index 55f117ae6..24b42526f 100644 --- a/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java +++ b/tess-two-test/src/com/googlecode/tesseract/android/test/TessBaseAPITest.java @@ -44,6 +44,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.util.List; +import java.util.concurrent.Semaphore; public class TessBaseAPITest extends TestCase { @SuppressLint("SdCardPath") @@ -725,7 +726,7 @@ public void testStop() throws InterruptedException { } final Bitmap bmp = getTextImage(inputTextBuilder.toString(), 640, 4000); - final Object progressLock = new Object(); + final Semaphore progressSem = new Semaphore(0); final TessBaseAPI baseApi = new TessBaseAPI(new ProgressNotifier() { @Override public void onProgressValues(ProgressValues progressValues) { @@ -733,9 +734,7 @@ public void onProgressValues(ProgressValues progressValues) { fail("OCR recognition was too fast, try to increase the image size and amount of text?"); } if (progressValues.getPercent() > 1){ - synchronized (progressLock){ - progressLock.notify(); - } + progressSem.release(); } } }); @@ -744,9 +743,7 @@ class LongRecognitionTask extends AsyncTask { @Override protected Void doInBackground(Void... params) { baseApi.getHOCRText(0); - synchronized (progressLock){ - progressLock.notify(); - } + progressSem.release(); return null; } } @@ -759,17 +756,13 @@ protected Void doInBackground(Void... params) { task.execute(); // Wait for recognition to start - synchronized (progressLock){ - progressLock.wait(); - } + progressSem.acquire(); baseApi.stop(); // Wait for getHOCRText() to complete, otherwise we may end() and recycle baseApi before // getHOCRText() finishes execution on the AsyncTask thread and cause an exception - synchronized (progressLock){ - progressLock.wait(); - } + progressSem.acquire(); baseApi.end(); bmp.recycle();