From efa152cc9811ac275beefc3dc3f3a4a48f2c95b1 Mon Sep 17 00:00:00 2001 From: Jonathan Palma Date: Thu, 2 Jul 2020 00:58:33 -0600 Subject: [PATCH] feat(android): add tesseract support [UNSAFE] --- android/build.gradle | 1 + .../com/reactlibrary/TesseractOcrModule.java | 119 +++++++++++++++++- index.js | 5 +- 3 files changed, 121 insertions(+), 4 deletions(-) diff --git a/android/build.gradle b/android/build.gradle index 5dd382b..885c6e5 100644 --- a/android/build.gradle +++ b/android/build.gradle @@ -73,6 +73,7 @@ repositories { dependencies { //noinspection GradleDynamicVersion implementation 'com.facebook.react:react-native:+' // From node_modules + implementation 'com.rmtheis:tess-two:9.1.0' } def configureReactNativePom(def pom) { diff --git a/android/src/main/java/com/reactlibrary/TesseractOcrModule.java b/android/src/main/java/com/reactlibrary/TesseractOcrModule.java index 3e4274a..0923b34 100644 --- a/android/src/main/java/com/reactlibrary/TesseractOcrModule.java +++ b/android/src/main/java/com/reactlibrary/TesseractOcrModule.java @@ -1,5 +1,9 @@ package com.reactlibrary; +import android.os.Handler; +import android.support.annotation.Nullable; +import android.util.Log; + import com.facebook.react.bridge.ReactApplicationContext; import com.facebook.react.bridge.ReactContextBaseJavaModule; import com.facebook.react.bridge.ReactMethod; @@ -8,10 +12,20 @@ public class TesseractOcrModule extends ReactContextBaseJavaModule { private final ReactApplicationContext reactContext; + private Handler handler = new Handler(); + private TessBaseAPI tesseract; + + private static final String KEY_ALLOW_LIST = "allowlist"; + private static final String KEY_DENY_LIST = "denylist"; + private static final String DATA_PATH = Environment.getExternalStorageDirectory().toString() + File.separator; + private static final String DATA_FILES_DIRECTORY = "tessdata"; public TesseractOcrModule(ReactApplicationContext reactContext) { super(reactContext); this.reactContext = reactContext; + if (!this.DATA_PATH.contains(reactContext.getPackageName())) { + this.DATA_PATH += reactContext.getPackageName() + File.separator; + } } @Override @@ -20,8 +34,107 @@ public String getName() { } @ReactMethod - public void sampleMethod(String stringArgument, int numberArgument, Callback callback) { - // TODO: Implement some actually useful functionality - callback.invoke("Received numberArgument: " + numberArgument + " stringArgument: " + stringArgument); + public void stop(Promise promise) { + Log.d(getName(), "stop"); + try { + tesseract.stop(); + tesseract.end(); + promise.resolve("Recognition was canceled"); + } catch (Exception e) { + Log.e(getName(), e.toString()); + promise.reject("An error occurred when trying to stop recognition", e.toString()); + } + } + + @SuppressLint("StaticFieldLeak") + @ReactMethod + public void recognize(String path, String lang, @Nullable ReadableMap options, final Callback callback, final Promise promise) { + Log.d(getName(), "recognize"); + + try { + prepareTesseract(); + BitmapFactory.Options options = new BitmapFactory.Options(); + Bitmap bitmap = BitmapFactory.decodeFile(path, options); + + new Thread() { + @Override + public void run() { + tesseract = new TessBaseAPI(new TessBaseAPI.ProgressNotifier() { + @Override + public void onProgressValues(TessBaseAPI.ProgressValues progressValues) { + Log.d("progress " + progressValues.getPercent().toString()); + callback(progressValues.getPercent()); + } + }); + tesseract.init(DATA_PATH, lang); + tesseract.setImage(bitmap); + tesseract.getHOCRText(0); + + String recognizedText = tesseract.getUTF8Text(); + + tesseract.end(); + promise.resolve(recognizedText); + } + + }.start(); + + } catch (Exception e) { + Log.e(getName(), e.getMessage()); + promise.reject("Could not recognize text", e.toString()); + } + } + + private void prepareDirectory(String path) { + Log.d(getName(), "prepare directory"); + + File dir = new File(path); + if (!dir.exists()) { + if (!dir.mkdirs()) { + Log.e(getName(), "Could not create directory (" + path + "), please make sure the app is allowed to write to external storage."); + } + } else { + Log.d(getName(), "Created directory " + path); + } + } + + private void prepareTesseract() { + Log.d(getName(), "prepare tesseract environment"); + + try { + prepareDirectory(DATA_PATH + DATA_FILES_DIRECTORY); + } catch (Exception e) { + e.printStackTrace(); + } + + copyTessDataFiles(DATA_FILES_DIRECTORY); + } + + private void copyTessDataFiles(String path) { + Log.d(getName(), "copy tesseract data files"); + try { + String fileList[] = reactContext.getAssets().list(path); + + for (String fileName : fileList) { + String oldPath = path + File.separator + fileName; + String newPath = DATA_PATH + oldPath; + + if (!(new File(newPath)).exists()) { + InputStream in = reactContext.getAssets().open(oldPath); + OutputStream out = new FileOutputStream(newPath); + byte[] buf = new byte[1024]; + int len; + + while ((len = in.read(buf)) > 0) { + out.write(buf, 0, len); + } + in.close(); + out.close(); + + Log.d(getName(), "Copied " + oldPath + " to " + newPath); + } + } + } catch (IOException e) { + Log.e(getName(), "Unable to copy files to tessdata", e.toString()); + } } } diff --git a/index.js b/index.js index 7d46aa0..49ec8eb 100644 --- a/index.js +++ b/index.js @@ -1,5 +1,8 @@ -import { NativeModules } from 'react-native'; +import { NativeModules } from "react-native"; const { TesseractOcr } = NativeModules; +TesseractOcr.recognize = (path, lang, { onProgress, ...rest }) => + TesseractOcr.recognize(path, lang, rest, onProgress); + export default TesseractOcr;