-
Notifications
You must be signed in to change notification settings - Fork 950
Using Tesseract OCR iOS
Adrian edited this page Oct 16, 2017
·
8 revisions
MyViewController.h
#import <TesseractOCR/TesseractOCR.h>
@interface MyViewController : UIViewController <G8TesseractDelegate>
@end
MyViewController.m
- (void)viewDidLoad
{
[super viewDidLoad];
// Languages are used for recognition (e.g. eng, ita, etc.). Tesseract engine
// will search for the .traineddata language file in the tessdata directory.
// For example, specifying "eng+ita" will search for "eng.traineddata" and
// "ita.traineddata". Cube engine will search for "eng.cube.*" files.
// See https://github.com/tesseract-ocr/tessdata.
// Create your G8Tesseract object using the initWithLanguage method:
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:@"eng+ita"];
// Optionaly: You could specify engine to recognize with.
// G8OCREngineModeTesseractOnly by default. It provides more features and faster
// than Cube engine. See G8Constants.h for more information.
//tesseract.engineMode = G8OCREngineModeTesseractOnly;
// Set up the delegate to receive Tesseract's callbacks.
// self should respond to TesseractDelegate and implement a
// "- (BOOL)shouldCancelImageRecognitionForTesseract:(G8Tesseract *)tesseract"
// method to receive a callback to decide whether or not to interrupt
// Tesseract before it finishes a recognition.
tesseract.delegate = self;
// Optional: Limit the character set Tesseract should try to recognize from
tesseract.charWhitelist = @"0123456789";
// This is wrapper for common Tesseract variable kG8ParamTesseditCharWhitelist:
// [tesseract setVariableValue:@"0123456789" forKey:kG8ParamTesseditCharBlacklist];
// See G8TesseractParameters.h for a complete list of Tesseract variables
// Optional: Limit the character set Tesseract should not try to recognize from
//tesseract.charBlacklist = @"OoZzBbSs";
// Specify the image Tesseract should recognize on
tesseract.image = [[UIImage imageNamed:@"image_sample.jpg"] g8_blackAndWhite];
// Optional: Limit the area of the image Tesseract should recognize on to a rectangle
tesseract.rect = CGRectMake(20, 20, 100, 100);
// Optional: Limit recognition time with a few seconds
tesseract.maximumRecognitionTime = 2.0;
// Start the recognition
[tesseract recognize];
// Retrieve the recognized text
NSLog(@"%@", [tesseract recognizedText]);
// You could retrieve more information about recognized text with that methods:
NSArray *characterBoxes = [tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelSymbol];
NSArray *paragraphs = [tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelParagraph];
NSArray *characterChoices = tesseract.characterChoices;
UIImage *imageWithBlocks = [tesseract imageWithBlocks:characterBoxes drawText:YES thresholded:NO];
}
- (void)progressImageRecognitionForTesseract:(G8Tesseract *)tesseract {
NSLog(@"progress: %lu", (unsigned long)tesseract.progress);
}
- (BOOL)shouldCancelImageRecognitionForTesseract:(G8Tesseract *)tesseract {
return NO; // return YES, if you need to interrupt tesseract before it finishes
}
MyViewController.h
#import <TesseractOCR/TesseractOCR.h>
@interface MyViewController : UIViewController
@end
MyViewController.m
- (void)viewDidLoad
{
// Create RecognitionOperation
G8RecognitionOperation *operation = [[G8RecognitionOperation alloc] initWithLanguage:@"eng+ita"];
// Configure inner G8Tesseract object as described before
operation.tesseract.charWhitelist = @"01234567890";
operation.tesseract.image = [[UIImage imageNamed:@"image_sample.jpg"] g8_blackAndWhite];
// Setup the recognitionCompleteBlock to receive the Tesseract object
// after text recognition. It will hold the recognized text.
operation.recognitionCompleteBlock = ^(G8Tesseract *recognizedTesseract) {
// Retrieve the recognized text upon completion
NSLog(@"%@", [recognizedTesseract recognizedText]);
};
// Add operation to queue
NSOperationQueue *queue = [[NSOperationQueue alloc] init];
[queue addOperation:operation];
}
Make sure that you have used an Objective-C bridging header to include the library. Instructions on configuring a bridging header file can be found in the Apple Developer Library.
ViewController.swift
import UIKit
import TesseractOCR
class ViewController: UIViewController, G8TesseractDelegate {
override func viewDidLoad() {
super.viewDidLoad()
var tesseract:G8Tesseract = G8Tesseract(language:"eng+ita")
//tesseract.language = "eng+ita"
tesseract.delegate = self
tesseract.charWhitelist = "01234567890"
tesseract.image = UIImage(named: "image_sample.jpg")
tesseract.recognize()
print(tesseract.recognizedText)
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
}
func shouldCancelImageRecognitionForTesseract(tesseract: G8Tesseract!) -> Bool {
return false // return true if you need to interrupt tesseract before it finishes
}
}