Skip to content

Commit

Permalink
Vision beta samples (#1154)
Browse files Browse the repository at this point in the history
* Add Vision beta (Localized objects & Handwriting OCR)

* Updates after review

* Updates after review

* Update to released client library

* Update client library

* Update client library

* Update to Inc.
  • Loading branch information
nirupa-kumar authored and nnegrey committed Jul 23, 2018
1 parent 857163d commit fa1cf66
Show file tree
Hide file tree
Showing 6 changed files with 355 additions and 45 deletions.
10 changes: 10 additions & 0 deletions vision/beta/cloud-client/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,13 @@ mvn exec:java -DDetect -Dexec.args="crop ./resources/landmark.jpg"
mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \
gs://<BUCKET_ID>/"
```

#### Localized Objects
```
mvn exec:java -DDetect -Dexec.args="object-localization ./resources/puppies.jpg"
```

#### Hand-written OCR
```
mvn exec:java -DDetect -Dexec.args="handwritten-ocr ./resources/handwritten.jpg"
```
4 changes: 2 additions & 2 deletions vision/beta/cloud-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-vision</artifactId>
<version>1.34.0</version>
<version>1.37.1</version>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
<version>1.34.0</version>
<version>1.37.1</version>
</dependency>
<!-- [END dependencies] -->

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vision/beta/cloud-client/resources/puppies.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
332 changes: 291 additions & 41 deletions vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2017 Google Inc.
* Copyright 2018 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,45 +22,46 @@
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Storage.BlobListOption;
import com.google.cloud.storage.StorageOptions;
import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse;
import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder;
import com.google.cloud.vision.v1p2beta1.AnnotateImageRequest;
import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse;
import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest;
import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse;
import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse;
import com.google.cloud.vision.v1p2beta1.BatchAnnotateImagesResponse;
import com.google.cloud.vision.v1p2beta1.Block;
import com.google.cloud.vision.v1p2beta1.ColorInfo;
import com.google.cloud.vision.v1p2beta1.CropHint;
import com.google.cloud.vision.v1p2beta1.CropHintsAnnotation;
import com.google.cloud.vision.v1p2beta1.DominantColorsAnnotation;
import com.google.cloud.vision.v1p2beta1.EntityAnnotation;
import com.google.cloud.vision.v1p2beta1.FaceAnnotation;
import com.google.cloud.vision.v1p2beta1.Feature;
import com.google.cloud.vision.v1p2beta1.Feature.Type;
import com.google.cloud.vision.v1p2beta1.GcsDestination;
import com.google.cloud.vision.v1p2beta1.GcsSource;
import com.google.cloud.vision.v1p2beta1.Image;
import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient;
import com.google.cloud.vision.v1p2beta1.ImageContext;
import com.google.cloud.vision.v1p2beta1.ImageSource;
import com.google.cloud.vision.v1p2beta1.InputConfig;
import com.google.cloud.vision.v1p2beta1.LocationInfo;
import com.google.cloud.vision.v1p2beta1.OperationMetadata;
import com.google.cloud.vision.v1p2beta1.OutputConfig;
import com.google.cloud.vision.v1p2beta1.Page;
import com.google.cloud.vision.v1p2beta1.Paragraph;
import com.google.cloud.vision.v1p2beta1.SafeSearchAnnotation;
import com.google.cloud.vision.v1p2beta1.Symbol;
import com.google.cloud.vision.v1p2beta1.TextAnnotation;
import com.google.cloud.vision.v1p2beta1.WebDetection;
import com.google.cloud.vision.v1p2beta1.WebDetection.WebEntity;
import com.google.cloud.vision.v1p2beta1.WebDetection.WebImage;
import com.google.cloud.vision.v1p2beta1.WebDetection.WebLabel;
import com.google.cloud.vision.v1p2beta1.WebDetection.WebPage;
import com.google.cloud.vision.v1p2beta1.WebDetectionParams;
import com.google.cloud.vision.v1p2beta1.Word;
import com.google.cloud.vision.v1p3beta1.AnnotateFileResponse;
import com.google.cloud.vision.v1p3beta1.AnnotateFileResponse.Builder;
import com.google.cloud.vision.v1p3beta1.AnnotateImageRequest;
import com.google.cloud.vision.v1p3beta1.AnnotateImageResponse;
import com.google.cloud.vision.v1p3beta1.AsyncAnnotateFileRequest;
import com.google.cloud.vision.v1p3beta1.AsyncAnnotateFileResponse;
import com.google.cloud.vision.v1p3beta1.AsyncBatchAnnotateFilesResponse;
import com.google.cloud.vision.v1p3beta1.BatchAnnotateImagesResponse;
import com.google.cloud.vision.v1p3beta1.Block;
import com.google.cloud.vision.v1p3beta1.ColorInfo;
import com.google.cloud.vision.v1p3beta1.CropHint;
import com.google.cloud.vision.v1p3beta1.CropHintsAnnotation;
import com.google.cloud.vision.v1p3beta1.DominantColorsAnnotation;
import com.google.cloud.vision.v1p3beta1.EntityAnnotation;
import com.google.cloud.vision.v1p3beta1.FaceAnnotation;
import com.google.cloud.vision.v1p3beta1.Feature;
import com.google.cloud.vision.v1p3beta1.Feature.Type;
import com.google.cloud.vision.v1p3beta1.GcsDestination;
import com.google.cloud.vision.v1p3beta1.GcsSource;
import com.google.cloud.vision.v1p3beta1.Image;
import com.google.cloud.vision.v1p3beta1.ImageAnnotatorClient;
import com.google.cloud.vision.v1p3beta1.ImageContext;
import com.google.cloud.vision.v1p3beta1.ImageSource;
import com.google.cloud.vision.v1p3beta1.InputConfig;
import com.google.cloud.vision.v1p3beta1.LocalizedObjectAnnotation;
import com.google.cloud.vision.v1p3beta1.LocationInfo;
import com.google.cloud.vision.v1p3beta1.OperationMetadata;
import com.google.cloud.vision.v1p3beta1.OutputConfig;
import com.google.cloud.vision.v1p3beta1.Page;
import com.google.cloud.vision.v1p3beta1.Paragraph;
import com.google.cloud.vision.v1p3beta1.SafeSearchAnnotation;
import com.google.cloud.vision.v1p3beta1.Symbol;
import com.google.cloud.vision.v1p3beta1.TextAnnotation;
import com.google.cloud.vision.v1p3beta1.WebDetection;
import com.google.cloud.vision.v1p3beta1.WebDetection.WebEntity;
import com.google.cloud.vision.v1p3beta1.WebDetection.WebImage;
import com.google.cloud.vision.v1p3beta1.WebDetection.WebLabel;
import com.google.cloud.vision.v1p3beta1.WebDetection.WebPage;
import com.google.cloud.vision.v1p3beta1.WebDetectionParams;
import com.google.cloud.vision.v1p3beta1.Word;
import com.google.protobuf.ByteString;
import com.google.protobuf.util.JsonFormat;

Expand Down Expand Up @@ -101,7 +102,8 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
+ "\n"
+ "Commands:\n"
+ "\tfaces | labels | landmarks | logos | text | safe-search | properties"
+ "| web | web-entities | web-entities-include-geo | crop | ocr \n"
+ "| web | web-entities | web-entities-include-geo | crop | ocr\n"
+ "| object-localization| handwritten-ocr\n"
+ "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage "
+ "resource (gs://...)\n"
+ "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n"
Expand Down Expand Up @@ -189,6 +191,18 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
} else if (command.equals("ocr")) {
String destPath = args.length > 2 ? args[2] : "";
detectDocumentsGcs(path, destPath);
} else if (command.equals("object-localization")) {
if (path.startsWith("gs://")) {
detectLocalizedObjectsGcs(path, out);
} else {
detectLocalizedObjects(path, out);
}
} else if (command.equals("handwritten-ocr")) {
if (path.startsWith("gs://")) {
detectHandwrittenOcrGcs(path, out);
} else {
detectHandwrittenOcr(path, out);
}
}
}

Expand Down Expand Up @@ -1423,4 +1437,240 @@ public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinatio
}
}
// [END vision_async_detect_document_ocr]

// [START vision_localize_objects]
/**
* Detects localized objects in the specified local image.
*
* @param filePath The path to the file to perform localized object detection on.
* @param out A {@link PrintStream} to write detected objects to.
* @throws Exception on errors while closing the client.
* @throws IOException on Input/Output errors.
*/
public static void detectLocalizedObjects(String filePath, PrintStream out)
throws Exception, IOException {
List<AnnotateImageRequest> requests = new ArrayList<>();

ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));

Image img = Image.newBuilder().setContent(imgBytes).build();
AnnotateImageRequest request =
AnnotateImageRequest.newBuilder()
.addFeatures(Feature.newBuilder().setType(Type.OBJECT_LOCALIZATION))
.setImage(img)
.build();
requests.add(request);

// Perform the request
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
List<AnnotateImageResponse> responses = response.getResponsesList();

// Display the results
for (AnnotateImageResponse res : responses) {
for (LocalizedObjectAnnotation entity : res.getLocalizedObjectAnnotationsList()) {
out.format("Object name: %s\n", entity.getName());
out.format("Confidence: %s\n", entity.getScore());
out.format("Normalized Vertices:\n");
entity
.getBoundingPoly()
.getNormalizedVerticesList()
.forEach(vertex -> out.format("- (%s, %s)\n", vertex.getX(), vertex.getY()));
}
}
}
}
// [END vision_localize_objects]

// [START vision_localize_objects_uri]
/**
* Detects localized objects in a remote image on Google Cloud Storage.
*
* @param gcsPath The path to the remote file on Google Cloud Storage to detect localized objects
* on.
* @param out A {@link PrintStream} to write detected objects to.
* @throws Exception on errors while closing the client.
* @throws IOException on Input/Output errors.
*/
public static void detectLocalizedObjectsGcs(String gcsPath, PrintStream out)
throws Exception, IOException {
List<AnnotateImageRequest> requests = new ArrayList<>();

ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
Image img = Image.newBuilder().setSource(imgSource).build();

AnnotateImageRequest request =
AnnotateImageRequest.newBuilder()
.addFeatures(Feature.newBuilder().setType(Type.OBJECT_LOCALIZATION))
.setImage(img)
.build();
requests.add(request);

// Perform the request
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
List<AnnotateImageResponse> responses = response.getResponsesList();
client.close();
// Display the results
for (AnnotateImageResponse res : responses) {
for (LocalizedObjectAnnotation entity : res.getLocalizedObjectAnnotationsList()) {
out.format("Object name: %s\n", entity.getName());
out.format("Confidence: %s\n", entity.getScore());
out.format("Normalized Vertices:\n");
entity
.getBoundingPoly()
.getNormalizedVerticesList()
.forEach(vertex -> out.format("- (%s, %s)\n", vertex.getX(), vertex.getY()));
}
}
}
}
// [END vision_localize_objects_uri]

// [START vision_handwritten_ocr]
/**
* Performs handwritten text detection on a local image file.
*
* @param filePath The path to the local file to detect handwritten text on.
* @param out A {@link PrintStream} to write the results to.
* @throws Exception on errors while closing the client.
* @throws IOException on Input/Output errors.
*/
public static void detectHandwrittenOcr(String filePath, PrintStream out) throws Exception {
List<AnnotateImageRequest> requests = new ArrayList<>();

ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));

Image img = Image.newBuilder().setContent(imgBytes).build();
Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
// Set the Language Hint codes for handwritten OCR
ImageContext imageContext =
ImageContext.newBuilder().addLanguageHints("en-t-i0-handwrit").build();

AnnotateImageRequest request =
AnnotateImageRequest.newBuilder()
.addFeatures(feat)
.setImage(img)
.setImageContext(imageContext)
.build();
requests.add(request);

try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
List<AnnotateImageResponse> responses = response.getResponsesList();
client.close();

for (AnnotateImageResponse res : responses) {
if (res.hasError()) {
out.printf("Error: %s\n", res.getError().getMessage());
return;
}

// For full list of available annotations, see http://g.co/cloud/vision/docs
TextAnnotation annotation = res.getFullTextAnnotation();
for (Page page : annotation.getPagesList()) {
String pageText = "";
for (Block block : page.getBlocksList()) {
String blockText = "";
for (Paragraph para : block.getParagraphsList()) {
String paraText = "";
for (Word word : para.getWordsList()) {
String wordText = "";
for (Symbol symbol : word.getSymbolsList()) {
wordText = wordText + symbol.getText();
out.format(
"Symbol text: %s (confidence: %f)\n",
symbol.getText(), symbol.getConfidence());
}
out.format("Word text: %s (confidence: %f)\n\n", wordText, word.getConfidence());
paraText = String.format("%s %s", paraText, wordText);
}
// Output Example using Paragraph:
out.println("\nParagraph: \n" + paraText);
out.format("Paragraph Confidence: %f\n", para.getConfidence());
blockText = blockText + paraText;
}
pageText = pageText + blockText;
}
}
out.println("\nComplete annotation:");
out.println(annotation.getText());
}
}
}
// [END vision_handwritten_ocr]

// [START vision_handwritten_ocr_uri]
/**
* Performs handwritten text detection on a remote image on Google Cloud Storage.
*
* @param gcsPath The path to the remote file on Google Cloud Storage to detect handwritten text
* on.
* @param out A {@link PrintStream} to write the results to.
* @throws Exception on errors while closing the client.
* @throws IOException on Input/Output errors.
*/
public static void detectHandwrittenOcrGcs(String gcsPath, PrintStream out) throws Exception {
List<AnnotateImageRequest> requests = new ArrayList<>();

ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
Image img = Image.newBuilder().setSource(imgSource).build();

Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
// Set the parameters for the image
ImageContext imageContext =
ImageContext.newBuilder().addLanguageHints("en-t-i0-handwrit").build();

AnnotateImageRequest request =
AnnotateImageRequest.newBuilder()
.addFeatures(feat)
.setImage(img)
.setImageContext(imageContext)
.build();
requests.add(request);

try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
List<AnnotateImageResponse> responses = response.getResponsesList();
client.close();

for (AnnotateImageResponse res : responses) {
if (res.hasError()) {
out.printf("Error: %s\n", res.getError().getMessage());
return;
}

// For full list of available annotations, see http://g.co/cloud/vision/docs
TextAnnotation annotation = res.getFullTextAnnotation();
for (Page page : annotation.getPagesList()) {
String pageText = "";
for (Block block : page.getBlocksList()) {
String blockText = "";
for (Paragraph para : block.getParagraphsList()) {
String paraText = "";
for (Word word : para.getWordsList()) {
String wordText = "";
for (Symbol symbol : word.getSymbolsList()) {
wordText = wordText + symbol.getText();
out.format(
"Symbol text: %s (confidence: %f)\n",
symbol.getText(), symbol.getConfidence());
}
out.format("Word text: %s (confidence: %f)\n\n", wordText, word.getConfidence());
paraText = String.format("%s %s", paraText, wordText);
}
// Output Example using Paragraph:
out.println("\nParagraph: \n" + paraText);
out.format("Paragraph Confidence: %f\n", para.getConfidence());
blockText = blockText + paraText;
}
pageText = pageText + blockText;
}
}
out.println("\nComplete annotation:");
out.println(annotation.getText());
}
}
}
// [END vision_handwritten_ocr_uri]
}
Loading

0 comments on commit fa1cf66

Please sign in to comment.