From 077d1f64e1901b12e52fefc2bb3ca7f41bf52295 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Thu, 4 Aug 2022 10:12:07 -0500 Subject: [PATCH] feat: Added Human Review Request Sample - Illustrates how to call `review_document()` to send a processed document to Human in the Loop (HITL) --- samples/snippets/review_document_sample.py | 95 +++++++++++++++++++ .../snippets/review_document_sample_test.py | 39 ++++++++ 2 files changed, 134 insertions(+) create mode 100644 samples/snippets/review_document_sample.py create mode 100644 samples/snippets/review_document_sample_test.py diff --git a/samples/snippets/review_document_sample.py b/samples/snippets/review_document_sample.py new file mode 100644 index 00000000..6e96a1ea --- /dev/null +++ b/samples/snippets/review_document_sample.py @@ -0,0 +1,95 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_review_document] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai_v1 as documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # https://cloud.google.com/document-ai/docs/file-types + + +def review_document_sample( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +): + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + # Create a client + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # Make Processing Request + inline_document = process_document( + project_id, location, processor_id, file_path, mime_type + ) + + # Get the full resource name of the human review config, e.g.: + # projects/project_id/locations/location/processor/processor_id/humanReviewConfig + human_review_config = client.human_review_config_path( + project_id, location, processor_id + ) + + # Options are DEFAULT, URGENT + priority = documentai.ReviewDocumentRequest.Priority.DEFAULT + + # Configure the human review request + request = documentai.ReviewDocumentRequest( + inline_document=inline_document, + human_review_config=human_review_config, + enable_schema_validation=False, + priority=priority, + ) + + # Make a request for human review of the processed document + operation = client.review_document(request=request) + + # Print operation name, can be used to check status of the request + print(operation.operation.name) + + +def process_document( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +) -> documentai.Document: + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project_id/locations/location/processor/processor_id + # You must create new processors in the Cloud Console first + name = client.processor_path(project_id, location, processor_id) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest(name=name, raw_document=raw_document) + + result = client.process_document(request=request) + + return result.document + + +# [END documentai_review_document] diff --git a/samples/snippets/review_document_sample_test.py b/samples/snippets/review_document_sample_test.py new file mode 100644 index 00000000..2ce0f478 --- /dev/null +++ b/samples/snippets/review_document_sample_test.py @@ -0,0 +1,39 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import review_document_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "b7054d67d76c39f1" +file_path = "resources/invoice.pdf" +mime_type = "application/pdf" + + +def test_process_documents(capsys): + review_document_sample.review_document_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + assert "projects/" in out + assert "locations/" in out + assert "operations/" in out