From 8c5c1638fd1fb0923d3431801e4c89e3e8f2008b Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 20 Sep 2022 17:18:05 -0500 Subject: [PATCH 1/7] docs(samples): Added Processor Version Samples To Be Published in documentation: https://cloud.google.com/document-ai/docs/manage-processor - `get_processor_version` - `list_processor_versions` - `set_default_processor_version` --- .../snippets/get_processor_version_sample.py | 51 +++++++++++++++ .../get_processor_version_sample_test.py | 37 +++++++++++ .../list_processor_versions_sample.py | 52 ++++++++++++++++ .../list_processor_versions_sample_test.py | 34 ++++++++++ .../set_default_processor_version_sample.py | 62 +++++++++++++++++++ ...t_default_processor_version_sample_test.py | 35 +++++++++++ 6 files changed, 271 insertions(+) create mode 100644 samples/snippets/get_processor_version_sample.py create mode 100644 samples/snippets/get_processor_version_sample_test.py create mode 100644 samples/snippets/list_processor_versions_sample.py create mode 100644 samples/snippets/list_processor_versions_sample_test.py create mode 100644 samples/snippets/set_default_processor_version_sample.py create mode 100644 samples/snippets/set_default_processor_version_sample_test.py diff --git a/samples/snippets/get_processor_version_sample.py b/samples/snippets/get_processor_version_sample.py new file mode 100644 index 00000000..c9d11b80 --- /dev/null +++ b/samples/snippets/get_processor_version_sample.py @@ -0,0 +1,51 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_get_processor_version] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def get_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Make GetProcessorVersion request + processor_version = client.get_processor_version(name=name) + + # Print the processor version information + print(f"Processor Version: {processor_version_id}") + print(f"Display Name: {processor_version.display_name}") + print(processor_version.state) + + +# [END documentai_get_processor_version] diff --git a/samples/snippets/get_processor_version_sample_test.py b/samples/snippets/get_processor_version_sample_test.py new file mode 100644 index 00000000..c077e69a --- /dev/null +++ b/samples/snippets/get_processor_version_sample_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import get_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "91e072f8626a76b7" +processor_version_id = "pretrained-ocr-v1.0-2020-09-23" + + +def test_get_processor_version(capsys): + get_processor_version_sample.get_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + out, _ = capsys.readouterr() + + assert "Processor Version: pretrained-ocr" in out + assert "Display Name: Google Stable" in out + assert "DEPLOYED" in out diff --git a/samples/snippets/list_processor_versions_sample.py b/samples/snippets/list_processor_versions_sample.py new file mode 100644 index 00000000..6679e57a --- /dev/null +++ b/samples/snippets/list_processor_versions_sample.py @@ -0,0 +1,52 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_list_processor_versions] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' + + +def list_processor_versions_sample(project_id: str, location: str, processor_id: str): + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor + # e.g.: projects/project_id/locations/location/processors/processor_id + parent = client.processor_path(project_id, location, processor_id) + + # Make ListProcessorVersions request + processor_versions = client.list_processor_versions(parent=parent) + + # Print the processor version information + for processor_version in processor_versions: + processor_version_id = client.parse_processor_version_path( + processor_version.name + )["processor_version"] + + print(f"Processor Version: {processor_version_id}") + print(f"Display Name: {processor_version.display_name}") + print(processor_version.state) + print("") + + +# [END documentai_list_processor_versions] diff --git a/samples/snippets/list_processor_versions_sample_test.py b/samples/snippets/list_processor_versions_sample_test.py new file mode 100644 index 00000000..5bba2728 --- /dev/null +++ b/samples/snippets/list_processor_versions_sample_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import list_processor_versions_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "91e072f8626a76b7" + + +def test_list_processor_versions(capsys): + list_processor_versions_sample.list_processor_versions_sample( + project_id=project_id, location=location, processor_id=processor_id + ) + out, _ = capsys.readouterr() + + assert "Processor Version: pretrained-ocr" in out + assert "Display Name: Google Stable" in out + assert "Display Name: Google Release Candidate" in out + assert "DEPLOYED" in out diff --git a/samples/snippets/set_default_processor_version_sample.py b/samples/snippets/set_default_processor_version_sample.py new file mode 100644 index 00000000..fa5be769 --- /dev/null +++ b/samples/snippets/set_default_processor_version_sample.py @@ -0,0 +1,62 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_set_default_processor_version] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import NotFound +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def set_default_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor + # e.g.: projects/project_id/locations/location/processors/processor_id + processor = client.processor_path(project_id, location, processor_id) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + processor_version = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + request = documentai.SetDefaultProcessorVersionRequest( + processor=processor, default_processor_version=processor_version + ) + + # Make SetDefaultProcessorVersion request + try: + operation = client.set_default_processor_version(request) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + except NotFound as e: + print(e.message) + + +# [END documentai_set_default_processor_version] diff --git a/samples/snippets/set_default_processor_version_sample_test.py b/samples/snippets/set_default_processor_version_sample_test.py new file mode 100644 index 00000000..dd8377fd --- /dev/null +++ b/samples/snippets/set_default_processor_version_sample_test.py @@ -0,0 +1,35 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import set_default_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "91e072f8626a76b7" +processor_version_id = "pretrained-ocr-v1.1-2022-09-12" + + +def test_set_default_processor_version(capsys): + set_default_processor_version_sample.set_default_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + out, _ = capsys.readouterr() + + assert "operation" in out From 3c271145875cfd7b846e151a2a932d9ca3ed663c Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 21 Sep 2022 09:52:04 -0500 Subject: [PATCH 2/7] docs(samples): Adjusted Bad Batch Input test to --- .../snippets/batch_process_documents_sample_bad_input_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/batch_process_documents_sample_bad_input_test.py b/samples/snippets/batch_process_documents_sample_bad_input_test.py index 7d2ffd75..a130f63d 100644 --- a/samples/snippets/batch_process_documents_sample_bad_input_test.py +++ b/samples/snippets/batch_process_documents_sample_bad_input_test.py @@ -44,4 +44,4 @@ def test_batch_process_documents_with_bad_input(capsys): out, _ = capsys.readouterr() assert "Failed" in out except Exception as e: - assert "Internal error" in e.message + assert "Failed" in e.message From 31077d9dc1943a679f049533978d02000708fafe Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 21 Sep 2022 10:49:16 -0500 Subject: [PATCH 3/7] docs(samples): Added Deploy/Undeploy Samples --- .../deploy_processor_version_sample.py | 56 ++++++++++++++++++ .../deploy_processor_version_sample_test.py | 35 ++++++++++++ .../undeploy_processor_version_sample.py | 57 +++++++++++++++++++ .../undeploy_processor_version_sample_test.py | 35 ++++++++++++ 4 files changed, 183 insertions(+) create mode 100644 samples/snippets/deploy_processor_version_sample.py create mode 100644 samples/snippets/deploy_processor_version_sample_test.py create mode 100644 samples/snippets/undeploy_processor_version_sample.py create mode 100644 samples/snippets/undeploy_processor_version_sample_test.py diff --git a/samples/snippets/deploy_processor_version_sample.py b/samples/snippets/deploy_processor_version_sample.py new file mode 100644 index 00000000..217a23b3 --- /dev/null +++ b/samples/snippets/deploy_processor_version_sample.py @@ -0,0 +1,56 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_deploy_processor_version] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def deploy_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Make DeployProcessorVersion request + try: + operation = client.deploy_processor_version(name=name) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + # Deploy request will fail if the + # processor version is already deployed + except FailedPrecondition as e: + print(e.message) + + +# [END documentai_deploy_processor_version] diff --git a/samples/snippets/deploy_processor_version_sample_test.py b/samples/snippets/deploy_processor_version_sample_test.py new file mode 100644 index 00000000..3d3e1420 --- /dev/null +++ b/samples/snippets/deploy_processor_version_sample_test.py @@ -0,0 +1,35 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import deploy_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "91e072f8626a76b7" +processor_version_id = "pretrained-ocr-v1.0-2020-09-23" + + +def test_deploy_processor_version(capsys): + deploy_processor_version_sample.deploy_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + out, _ = capsys.readouterr() + + assert "DEPLOYED" in out or "operation" in out diff --git a/samples/snippets/undeploy_processor_version_sample.py b/samples/snippets/undeploy_processor_version_sample.py new file mode 100644 index 00000000..52cc2dda --- /dev/null +++ b/samples/snippets/undeploy_processor_version_sample.py @@ -0,0 +1,57 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_undeploy_processor_version] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition, InvalidArgument +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def undeploy_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Make UndeployProcessorVersion request + try: + operation = client.undeploy_processor_version(name=name) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + # Uneploy request will fail if the + # processor version is already undeployed + # or if a request is made on a pretrained processor + except (FailedPrecondition, InvalidArgument) as e: + print(e.message) + + +# [END documentai_undeploy_processor_version] diff --git a/samples/snippets/undeploy_processor_version_sample_test.py b/samples/snippets/undeploy_processor_version_sample_test.py new file mode 100644 index 00000000..27ded4fa --- /dev/null +++ b/samples/snippets/undeploy_processor_version_sample_test.py @@ -0,0 +1,35 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import undeploy_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "91e072f8626a76b7" +processor_version_id = "pretrained-ocr-v1.0-2020-09-23" + + +def test_undeploy_processor_version(capsys): + undeploy_processor_version_sample.undeploy_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + out, _ = capsys.readouterr() + + assert "UNDEPLOY" in out or "pretrained" in out or "operation" in out From 4cb4903ac1b7126ef48fb7da2861702395de1f38 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 21 Sep 2022 11:51:43 -0500 Subject: [PATCH 4/7] docs(samples): Added process & batchProcess examples for processorVersions - Removed Processor Version from basic process and batchProcess examples - Removed Note about must create processors in the Cloud Console - Added note that processor must be created before running sample where missing --- ...cess_documents_processor_version_sample.py | 153 ++++++++++++++++++ ...documents_processor_version_sample_test.py | 64 ++++++++ .../batch_process_documents_sample.py | 10 -- .../snippets/get_processor_version_sample.py | 2 +- .../list_processor_versions_sample.py | 2 +- .../snippets/process_document_form_sample.py | 1 - ...ocess_document_processor_version_sample.py | 74 +++++++++ ..._document_processor_version_sample_test.py | 40 +++++ .../process_document_quality_sample.py | 1 - samples/snippets/process_document_sample.py | 12 +- .../process_document_specialized_sample.py | 1 - .../process_document_splitter_sample.py | 1 - samples/snippets/quickstart_sample.py | 1 - samples/snippets/review_document_sample.py | 1 - .../set_default_processor_version_sample.py | 2 +- .../undeploy_processor_version_sample.py | 2 +- 16 files changed, 336 insertions(+), 31 deletions(-) create mode 100644 samples/snippets/batch_process_documents_processor_version_sample.py create mode 100644 samples/snippets/batch_process_documents_processor_version_sample_test.py create mode 100644 samples/snippets/process_document_processor_version_sample.py create mode 100644 samples/snippets/process_document_processor_version_sample_test.py diff --git a/samples/snippets/batch_process_documents_processor_version_sample.py b/samples/snippets/batch_process_documents_processor_version_sample.py new file mode 100644 index 00000000..5dcd881b --- /dev/null +++ b/samples/snippets/batch_process_documents_processor_version_sample.py @@ -0,0 +1,153 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START documentai_batch_process_documents_processor_version] +import re + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai, storage + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version_id = "YOUR_PROCESSOR_VERSION_ID" # Processor version to use +# gcs_input_uri = "YOUR_INPUT_URI" # Format: gs://bucket/directory/file.pdf +# input_mime_type = "application/pdf" +# gcs_output_bucket = "YOUR_OUTPUT_BUCKET_NAME" # Format: gs://bucket +# gcs_output_uri_prefix = "YOUR_OUTPUT_URI_PREFIX" # Format: directory/subdirectory/ + + +def batch_process_documents_processor_version( + project_id: str, + location: str, + processor_id: str, + processor_version_id: str, + gcs_input_uri: str, + input_mime_type: str, + gcs_output_bucket: str, + gcs_output_uri_prefix: str, + timeout: int = 300, +): + + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + gcs_document = documentai.GcsDocument( + gcs_uri=gcs_input_uri, mime_type=input_mime_type + ) + + # Load GCS Input URI into a List of document files + gcs_documents = documentai.GcsDocuments(documents=[gcs_document]) + input_config = documentai.BatchDocumentsInputConfig(gcs_documents=gcs_documents) + + # NOTE: Alternatively, specify a GCS URI Prefix to process an entire directory + # + # gcs_input_uri = "gs://bucket/directory/" + # gcs_prefix = documentai.GcsPrefix(gcs_uri_prefix=gcs_input_uri) + # input_config = documentai.BatchDocumentsInputConfig(gcs_prefix=gcs_prefix) + # + + # Cloud Storage URI for the Output Directory + destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}/" + + gcs_output_config = documentai.DocumentOutputConfig.GcsOutputConfig( + gcs_uri=destination_uri + ) + + # Where to write results + output_config = documentai.DocumentOutputConfig(gcs_output_config=gcs_output_config) + + # The full resource name of the processor version + # e.g. projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id} + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + request = documentai.BatchProcessRequest( + name=name, + input_documents=input_config, + document_output_config=output_config, + ) + + # BatchProcess returns a Long Running Operation (LRO) + operation = client.batch_process_documents(request) + + # Continually polls the operation until it is complete. + # This could take some time for larger files + # Format: projects/PROJECT_NUMBER/locations/LOCATION/operations/OPERATION_ID + print(f"Waiting for operation {operation.operation.name} to complete...") + operation.result(timeout=timeout) + + # NOTE: Can also use callbacks for asynchronous processing + # + # def my_callback(future): + # result = future.result() + # + # operation.add_done_callback(my_callback) + + # Once the operation is complete, + # get output document information from operation metadata + metadata = documentai.BatchProcessMetadata(operation.metadata) + + if metadata.state != documentai.BatchProcessMetadata.State.SUCCEEDED: + raise ValueError(f"Batch Process Failed: {metadata.state_message}") + + storage_client = storage.Client() + + print("Output files:") + # One process per Input Document + for process in metadata.individual_process_statuses: + # output_gcs_destination format: gs://BUCKET/PREFIX/OPERATION_NUMBER/INPUT_FILE_NUMBER/ + # The Cloud Storage API requires the bucket name and URI prefix separately + matches = re.match(r"gs://(.*?)/(.*)", process.output_gcs_destination) + if not matches: + print( + "Could not parse output GCS destination:", + process.output_gcs_destination, + ) + continue + + output_bucket, output_prefix = matches.groups() + + # Get List of Document Objects from the Output Bucket + output_blobs = storage_client.list_blobs(output_bucket, prefix=output_prefix) + + # Document AI may output multiple JSON files per source file + for blob in output_blobs: + # Document AI should only output JSON files to GCS + if ".json" not in blob.name: + print( + f"Skipping non-supported file: {blob.name} - Mimetype: {blob.content_type}" + ) + continue + + # Download JSON File as bytes object and convert to Document Object + print(f"Fetching {blob.name}") + document = documentai.Document.from_json( + blob.download_as_bytes(), ignore_unknown_fields=True + ) + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + + # Read the text recognition output from the processor + print("The document contains the following text:") + print(document.text) + + +# [END documentai_batch_process_documents_processor_version] diff --git a/samples/snippets/batch_process_documents_processor_version_sample_test.py b/samples/snippets/batch_process_documents_processor_version_sample_test.py new file mode 100644 index 00000000..b39c8ab2 --- /dev/null +++ b/samples/snippets/batch_process_documents_processor_version_sample_test.py @@ -0,0 +1,64 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from uuid import uuid4 + +from google.cloud import storage +from google.cloud.exceptions import NotFound +import pytest +from samples.snippets import batch_process_documents_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +processor_version_id = "pretrained-form-parser-v1.0-2020-09-23" +gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf" +input_mime_type = "application/pdf" +gcs_output_uri_prefix = uuid4() +BUCKET_NAME = f"document-ai-python-{uuid4()}" + + +@pytest.fixture(scope="module") +def test_bucket(): + storage_client = storage.Client() + bucket = storage_client.create_bucket(BUCKET_NAME) + yield bucket.name + + try: + blobs = list(bucket.list_blobs()) + for blob in blobs: + blob.delete() + bucket.delete() + except NotFound: + print("Bucket already deleted.") + + +def test_batch_process_documents_processor_version(capsys, test_bucket): + batch_process_documents_processor_version_sample.batch_process_documents_processor_version( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + gcs_input_uri=gcs_input_uri, + input_mime_type=input_mime_type, + gcs_output_bucket=f"gs://{test_bucket}", + gcs_output_uri_prefix=gcs_output_uri_prefix, + ) + out, _ = capsys.readouterr() + + assert "operation" in out + assert "Fetching" in out + assert "text:" in out diff --git a/samples/snippets/batch_process_documents_sample.py b/samples/snippets/batch_process_documents_sample.py index f3e01936..f01d8787 100644 --- a/samples/snippets/batch_process_documents_sample.py +++ b/samples/snippets/batch_process_documents_sample.py @@ -23,7 +23,6 @@ # project_id = 'YOUR_PROJECT_ID' # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' # processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample -# processor_version = "pretrained" # Optional. Processor version to use # gcs_input_uri = "YOUR_INPUT_URI" # Format: gs://bucket/directory/file.pdf # input_mime_type = "application/pdf" # gcs_output_bucket = "YOUR_OUTPUT_BUCKET_NAME" # Format: gs://bucket @@ -73,17 +72,8 @@ def batch_process_documents( # The full resource name of the processor, e.g.: # projects/project_id/locations/location/processor/processor_id - # You must create new processors in the Cloud Console first name = client.processor_path(project_id, location, processor_id) - # NOTE: Alternatively, specify the processor_version to specify a particular version of the processor to use - # projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processorVersion} - # - # name = client.processor_version_path( - # project_id, location, processor_id, processor_version - # ) - # - request = documentai.BatchProcessRequest( name=name, input_documents=input_config, diff --git a/samples/snippets/get_processor_version_sample.py b/samples/snippets/get_processor_version_sample.py index c9d11b80..5dc0babd 100644 --- a/samples/snippets/get_processor_version_sample.py +++ b/samples/snippets/get_processor_version_sample.py @@ -21,7 +21,7 @@ # TODO(developer): Uncomment these variables before running the sample. # project_id = 'YOUR_PROJECT_ID' # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' -# processor_id = 'YOUR_PROCESSOR_ID' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample # processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' diff --git a/samples/snippets/list_processor_versions_sample.py b/samples/snippets/list_processor_versions_sample.py index 6679e57a..f615c1bb 100644 --- a/samples/snippets/list_processor_versions_sample.py +++ b/samples/snippets/list_processor_versions_sample.py @@ -21,7 +21,7 @@ # TODO(developer): Uncomment these variables before running the sample. # project_id = 'YOUR_PROJECT_ID' # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' -# processor_id = 'YOUR_PROCESSOR_ID' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample def list_processor_versions_sample(project_id: str, location: str, processor_id: str): diff --git a/samples/snippets/process_document_form_sample.py b/samples/snippets/process_document_form_sample.py index 7922490f..c113d6a8 100644 --- a/samples/snippets/process_document_form_sample.py +++ b/samples/snippets/process_document_form_sample.py @@ -81,7 +81,6 @@ def process_document( # The full resource name of the processor, e.g.: # projects/project_id/locations/location/processor/processor_id - # You must create new processors in the Cloud Console first name = client.processor_path(project_id, location, processor_id) # Read the file into memory diff --git a/samples/snippets/process_document_processor_version_sample.py b/samples/snippets/process_document_processor_version_sample.py new file mode 100644 index 00000000..39db461e --- /dev/null +++ b/samples/snippets/process_document_processor_version_sample.py @@ -0,0 +1,74 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_document_processor_version] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' # Processor version to use +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types +# field_mask = "text,entities,pages.pageNumber" # Optional. The fields to return in the Document object. + + +def process_document_processor_version_sample( + project_id: str, + location: str, + processor_id: str, + processor_version_id: str, + file_path: str, + mime_type: str, + field_mask: str = None, +): + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g. projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id} + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest( + name=name, raw_document=raw_document, field_mask=field_mask + ) + + result = client.process_document(request=request) + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + document = result.document + + # Read the text recognition output from the processor + print("The document contains the following text:") + print(document.text) + + +# [END documentai_process_document_processor_version] diff --git a/samples/snippets/process_document_processor_version_sample_test.py b/samples/snippets/process_document_processor_version_sample_test.py new file mode 100644 index 00000000..e943503c --- /dev/null +++ b/samples/snippets/process_document_processor_version_sample_test.py @@ -0,0 +1,40 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import process_document_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +processor_version_id = "pretrained-form-parser-v1.0-2020-09-23" +file_path = "resources/invoice.pdf" +mime_type = "application/pdf" + + +def test_process_document_processor_versions(capsys): + process_document_processor_version_sample.process_document_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + file_path=file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + assert "text:" in out + assert "Invoice" in out diff --git a/samples/snippets/process_document_quality_sample.py b/samples/snippets/process_document_quality_sample.py index 0bbca75d..79e88977 100644 --- a/samples/snippets/process_document_quality_sample.py +++ b/samples/snippets/process_document_quality_sample.py @@ -60,7 +60,6 @@ def process_document( # The full resource name of the processor, e.g.: # projects/project_id/locations/location/processor/processor_id - # You must create new processors in the Cloud Console first name = client.processor_path(project_id, location, processor_id) # Read the file into memory diff --git a/samples/snippets/process_document_sample.py b/samples/snippets/process_document_sample.py index 13d7850e..2cd93538 100644 --- a/samples/snippets/process_document_sample.py +++ b/samples/snippets/process_document_sample.py @@ -21,8 +21,7 @@ # TODO(developer): Uncomment these variables before running the sample. # project_id = 'YOUR_PROJECT_ID' # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' -# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample -# processor_version = "pretrained" # Optional. Processor version to use +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample # file_path = '/path/to/local/pdf' # mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types # field_mask = "text,entities,pages.pageNumber" # Optional. The fields to return in the Document object. @@ -43,17 +42,8 @@ def process_document_sample( # The full resource name of the processor, e.g.: # projects/{project_id}/locations/{location}/processors/{processor_id} - # You must create new processors in the Cloud Console first name = client.processor_path(project_id, location, processor_id) - # NOTE: Alternatively, specify the processor_version to specify a particular version of the processor to use - # projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processorVersion} - # - # name = client.processor_version_path( - # project_id, location, processor_id, processor_version - # ) - # - # Read the file into memory with open(file_path, "rb") as image: image_content = image.read() diff --git a/samples/snippets/process_document_specialized_sample.py b/samples/snippets/process_document_specialized_sample.py index c78ca704..272d8cdc 100644 --- a/samples/snippets/process_document_specialized_sample.py +++ b/samples/snippets/process_document_specialized_sample.py @@ -78,7 +78,6 @@ def process_document( # The full resource name of the processor, e.g.: # projects/project_id/locations/location/processor/processor_id - # You must create new processors in the Cloud Console first name = client.processor_path(project_id, location, processor_id) # Read the file into memory diff --git a/samples/snippets/process_document_splitter_sample.py b/samples/snippets/process_document_splitter_sample.py index b78c557c..e49e8ff1 100644 --- a/samples/snippets/process_document_splitter_sample.py +++ b/samples/snippets/process_document_splitter_sample.py @@ -67,7 +67,6 @@ def process_document( # The full resource name of the processor, e.g.: # projects/project_id/locations/location/processor/processor_id - # You must create new processors in the Cloud Console first name = client.processor_path(project_id, location, processor_id) # Read the file into memory diff --git a/samples/snippets/quickstart_sample.py b/samples/snippets/quickstart_sample.py index 23dcf084..e830acdf 100644 --- a/samples/snippets/quickstart_sample.py +++ b/samples/snippets/quickstart_sample.py @@ -38,7 +38,6 @@ def quickstart( # The full resource name of the processor, e.g.: # projects/project_id/locations/location/processor/processor_id - # You must create new processors in the Cloud Console first name = client.processor_path(project_id, location, processor_id) # Read the file into memory diff --git a/samples/snippets/review_document_sample.py b/samples/snippets/review_document_sample.py index 2bfa4dcd..969cf59a 100644 --- a/samples/snippets/review_document_sample.py +++ b/samples/snippets/review_document_sample.py @@ -74,7 +74,6 @@ def process_document( # The full resource name of the processor, e.g.: # projects/project_id/locations/location/processor/processor_id - # You must create new processors in the Cloud Console first name = client.processor_path(project_id, location, processor_id) # Read the file into memory diff --git a/samples/snippets/set_default_processor_version_sample.py b/samples/snippets/set_default_processor_version_sample.py index fa5be769..2c7c3797 100644 --- a/samples/snippets/set_default_processor_version_sample.py +++ b/samples/snippets/set_default_processor_version_sample.py @@ -22,7 +22,7 @@ # TODO(developer): Uncomment these variables before running the sample. # project_id = 'YOUR_PROJECT_ID' # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' -# processor_id = 'YOUR_PROCESSOR_ID' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample # processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' diff --git a/samples/snippets/undeploy_processor_version_sample.py b/samples/snippets/undeploy_processor_version_sample.py index 52cc2dda..da1ece03 100644 --- a/samples/snippets/undeploy_processor_version_sample.py +++ b/samples/snippets/undeploy_processor_version_sample.py @@ -22,7 +22,7 @@ # TODO(developer): Uncomment these variables before running the sample. # project_id = 'YOUR_PROJECT_ID' # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' -# processor_id = 'YOUR_PROCESSOR_ID' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample # processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' From d9afe7f9d382db736cbe59978b8d1718da624a51 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 21 Sep 2022 12:07:34 -0500 Subject: [PATCH 5/7] docs(samples): Adjusted Enable/Disable Processor Test to avoid Race Conditions --- samples/snippets/disable_processor_sample_test.py | 4 +--- samples/snippets/enable_processor_sample_test.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/samples/snippets/disable_processor_sample_test.py b/samples/snippets/disable_processor_sample_test.py index 0420ee48..0a816723 100644 --- a/samples/snippets/disable_processor_sample_test.py +++ b/samples/snippets/disable_processor_sample_test.py @@ -28,9 +28,7 @@ def test_disable_processor(capsys): ) out, _ = capsys.readouterr() - assert "projects" in out - assert "locations" in out - assert "operations" in out + assert "projects" in out or "DISABLED" in out # Re-Enable Processor enable_processor_sample.enable_processor_sample( diff --git a/samples/snippets/enable_processor_sample_test.py b/samples/snippets/enable_processor_sample_test.py index 0a238cd0..a4ccc370 100644 --- a/samples/snippets/enable_processor_sample_test.py +++ b/samples/snippets/enable_processor_sample_test.py @@ -28,9 +28,7 @@ def test_enable_processor(capsys): ) out, _ = capsys.readouterr() - assert "projects" in out - assert "locations" in out - assert "operations" in out + assert "projects" in out or "ENABLED" in out # Re-Disable Processor disable_processor_sample.disable_processor_sample( From ef457f2cfffecf5a7b421dccbbcd8cde34d10608 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 21 Sep 2022 16:21:13 -0500 Subject: [PATCH 6/7] docs(samples): Added Delete Processor Version Sample - Also Fixed Spelling error in Undeploy Comments --- .../delete_processor_version_sample.py | 58 +++++++++++++++++++ .../delete_processor_version_sample_test.py | 35 +++++++++++ .../undeploy_processor_version_sample.py | 4 +- 3 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 samples/snippets/delete_processor_version_sample.py create mode 100644 samples/snippets/delete_processor_version_sample_test.py diff --git a/samples/snippets/delete_processor_version_sample.py b/samples/snippets/delete_processor_version_sample.py new file mode 100644 index 00000000..8ff8703d --- /dev/null +++ b/samples/snippets/delete_processor_version_sample.py @@ -0,0 +1,58 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_delete_processor_version] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition, InvalidArgument +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def delete_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Make DeleteProcessorVersion request + try: + operation = client.delete_processor_version(name=name) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + # Delete request will fail if the + # processor version doesn't exist + # or if a request is made on a pretrained processor version + # or the default processor version + except (FailedPrecondition, InvalidArgument) as e: + print(e.message) + + +# [END documentai_delete_processor_version] diff --git a/samples/snippets/delete_processor_version_sample_test.py b/samples/snippets/delete_processor_version_sample_test.py new file mode 100644 index 00000000..6a0fb8ee --- /dev/null +++ b/samples/snippets/delete_processor_version_sample_test.py @@ -0,0 +1,35 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import delete_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "91e072f8626a76b7" +processor_version_id = "pretrained-ocr-v1.0-2020-09-23" + + +def test_delete_processor_version(capsys): + delete_processor_version_sample.delete_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + out, _ = capsys.readouterr() + + assert "DELETE" in out or "pretrained" in out or "operation" in out diff --git a/samples/snippets/undeploy_processor_version_sample.py b/samples/snippets/undeploy_processor_version_sample.py index da1ece03..0ad19276 100644 --- a/samples/snippets/undeploy_processor_version_sample.py +++ b/samples/snippets/undeploy_processor_version_sample.py @@ -47,9 +47,9 @@ def undeploy_processor_version_sample( print(operation.operation.name) # Wait for operation to complete operation.result() - # Uneploy request will fail if the + # Undeploy request will fail if the # processor version is already undeployed - # or if a request is made on a pretrained processor + # or if a request is made on a pretrained processor version except (FailedPrecondition, InvalidArgument) as e: print(e.message) From 70ece5174f354b3ead9a7d39656fb32de010d054 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Fri, 23 Sep 2022 13:00:39 -0500 Subject: [PATCH 7/7] docs(samples): Updated non-idempotent unit tests to use mocks - Also replaced test ocr processor id after making a breaking change to the project - Added `field_mask` to process_documents tests --- ...cess_documents_processor_version_sample.py | 4 ++-- .../delete_processor_version_sample_test.py | 20 ++++++++++++++---- .../deploy_processor_version_sample_test.py | 21 +++++++++++++++---- samples/snippets/get_processor_sample_test.py | 2 +- .../get_processor_version_sample_test.py | 2 +- .../list_processor_versions_sample_test.py | 2 +- .../process_document_ocr_sample_test.py | 2 +- ..._document_processor_version_sample_test.py | 4 +++- .../snippets/process_document_sample_test.py | 2 ++ ...t_default_processor_version_sample_test.py | 15 ++++++++++--- .../undeploy_processor_version_sample_test.py | 21 +++++++++++++++---- 11 files changed, 73 insertions(+), 22 deletions(-) diff --git a/samples/snippets/batch_process_documents_processor_version_sample.py b/samples/snippets/batch_process_documents_processor_version_sample.py index 5dcd881b..7050c3ec 100644 --- a/samples/snippets/batch_process_documents_processor_version_sample.py +++ b/samples/snippets/batch_process_documents_processor_version_sample.py @@ -22,8 +22,8 @@ # TODO(developer): Uncomment these variables before running the sample. # project_id = 'YOUR_PROJECT_ID' # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' -# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample -# processor_version_id = "YOUR_PROCESSOR_VERSION_ID" # Processor version to use +# processor_id = 'YOUR_PROCESSOR_ID' # Example: aeb8cea219b7c272 +# processor_version_id = "YOUR_PROCESSOR_VERSION_ID" # Example: pretrained-ocr-v1.0-2020-09-23 # gcs_input_uri = "YOUR_INPUT_URI" # Format: gs://bucket/directory/file.pdf # input_mime_type = "application/pdf" # gcs_output_bucket = "YOUR_OUTPUT_BUCKET_NAME" # Format: gs://bucket diff --git a/samples/snippets/delete_processor_version_sample_test.py b/samples/snippets/delete_processor_version_sample_test.py index 6a0fb8ee..4104cd07 100644 --- a/samples/snippets/delete_processor_version_sample_test.py +++ b/samples/snippets/delete_processor_version_sample_test.py @@ -15,21 +15,33 @@ import os +import mock from samples.snippets import delete_processor_version_sample location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -processor_id = "91e072f8626a76b7" -processor_version_id = "pretrained-ocr-v1.0-2020-09-23" +processor_id = "aaaaaaaaa" +processor_version_id = "xxxxxxxxxx" -def test_delete_processor_version(capsys): +@mock.patch( + "google.cloud.documentai.DocumentProcessorServiceClient.delete_processor_version" +) +@mock.patch("google.api_core.operation.Operation") +def test_delete_processor_version( + operation_mock, delete_processor_version_mock, capsys +): + delete_processor_version_mock.return_value = operation_mock + delete_processor_version_sample.delete_processor_version_sample( project_id=project_id, location=location, processor_id=processor_id, processor_version_id=processor_version_id, ) + + delete_processor_version_mock.assert_called_once() + out, _ = capsys.readouterr() - assert "DELETE" in out or "pretrained" in out or "operation" in out + assert "operation" in out diff --git a/samples/snippets/deploy_processor_version_sample_test.py b/samples/snippets/deploy_processor_version_sample_test.py index 3d3e1420..31f96925 100644 --- a/samples/snippets/deploy_processor_version_sample_test.py +++ b/samples/snippets/deploy_processor_version_sample_test.py @@ -15,21 +15,34 @@ import os +import mock from samples.snippets import deploy_processor_version_sample location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -processor_id = "91e072f8626a76b7" -processor_version_id = "pretrained-ocr-v1.0-2020-09-23" +processor_id = "aaaaaaaaa" +processor_version_id = "xxxxxxxxxx" -def test_deploy_processor_version(capsys): +# TODO: Switch to Real Endpoint when Deployable Versions are Available +@mock.patch( + "google.cloud.documentai.DocumentProcessorServiceClient.deploy_processor_version" +) +@mock.patch("google.api_core.operation.Operation") +def test_deploy_processor_version( + operation_mock, deploy_processor_version_mock, capsys +): + deploy_processor_version_mock.return_value = operation_mock + deploy_processor_version_sample.deploy_processor_version_sample( project_id=project_id, location=location, processor_id=processor_id, processor_version_id=processor_version_id, ) + + deploy_processor_version_mock.assert_called_once() + out, _ = capsys.readouterr() - assert "DEPLOYED" in out or "operation" in out + assert "operation" in out diff --git a/samples/snippets/get_processor_sample_test.py b/samples/snippets/get_processor_sample_test.py index b1848422..f8a37bea 100644 --- a/samples/snippets/get_processor_sample_test.py +++ b/samples/snippets/get_processor_sample_test.py @@ -19,7 +19,7 @@ location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -processor_id = "91e072f8626a76b7" +processor_id = "52a38e080c1a7296" def test_get_processor(capsys): diff --git a/samples/snippets/get_processor_version_sample_test.py b/samples/snippets/get_processor_version_sample_test.py index c077e69a..071d740a 100644 --- a/samples/snippets/get_processor_version_sample_test.py +++ b/samples/snippets/get_processor_version_sample_test.py @@ -19,7 +19,7 @@ location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -processor_id = "91e072f8626a76b7" +processor_id = "52a38e080c1a7296" processor_version_id = "pretrained-ocr-v1.0-2020-09-23" diff --git a/samples/snippets/list_processor_versions_sample_test.py b/samples/snippets/list_processor_versions_sample_test.py index 5bba2728..16219795 100644 --- a/samples/snippets/list_processor_versions_sample_test.py +++ b/samples/snippets/list_processor_versions_sample_test.py @@ -19,7 +19,7 @@ location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -processor_id = "91e072f8626a76b7" +processor_id = "52a38e080c1a7296" def test_list_processor_versions(capsys): diff --git a/samples/snippets/process_document_ocr_sample_test.py b/samples/snippets/process_document_ocr_sample_test.py index ba46ca17..d6cceb46 100644 --- a/samples/snippets/process_document_ocr_sample_test.py +++ b/samples/snippets/process_document_ocr_sample_test.py @@ -19,7 +19,7 @@ location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -processor_id = "91e072f8626a76b7" +processor_id = "52a38e080c1a7296" file_path = "resources/handwritten_form.pdf" mime_type = "application/pdf" diff --git a/samples/snippets/process_document_processor_version_sample_test.py b/samples/snippets/process_document_processor_version_sample_test.py index e943503c..50d8bc63 100644 --- a/samples/snippets/process_document_processor_version_sample_test.py +++ b/samples/snippets/process_document_processor_version_sample_test.py @@ -20,9 +20,10 @@ location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] processor_id = "90484cfdedb024f6" -processor_version_id = "pretrained-form-parser-v1.0-2020-09-23" +processor_version_id = "stable" file_path = "resources/invoice.pdf" mime_type = "application/pdf" +field_mask = "text,pages.pageNumber" def test_process_document_processor_versions(capsys): @@ -33,6 +34,7 @@ def test_process_document_processor_versions(capsys): processor_version_id=processor_version_id, file_path=file_path, mime_type=mime_type, + field_mask=field_mask, ) out, _ = capsys.readouterr() diff --git a/samples/snippets/process_document_sample_test.py b/samples/snippets/process_document_sample_test.py index bf7bcd63..6da4ae36 100644 --- a/samples/snippets/process_document_sample_test.py +++ b/samples/snippets/process_document_sample_test.py @@ -22,6 +22,7 @@ processor_id = "90484cfdedb024f6" file_path = "resources/invoice.pdf" mime_type = "application/pdf" +field_mask = "text,pages.pageNumber" def test_process_documents(capsys): @@ -31,6 +32,7 @@ def test_process_documents(capsys): processor_id=processor_id, file_path=file_path, mime_type=mime_type, + field_mask=field_mask, ) out, _ = capsys.readouterr() diff --git a/samples/snippets/set_default_processor_version_sample_test.py b/samples/snippets/set_default_processor_version_sample_test.py index dd8377fd..5750344d 100644 --- a/samples/snippets/set_default_processor_version_sample_test.py +++ b/samples/snippets/set_default_processor_version_sample_test.py @@ -19,8 +19,9 @@ location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -processor_id = "91e072f8626a76b7" -processor_version_id = "pretrained-ocr-v1.1-2022-09-12" +processor_id = "aeb8cea219b7c272" +current_default_processor_version = "pretrained-ocr-v1.0-2020-09-23" +new_default_processor_version = "pretrained-ocr-v1.1-2022-09-12" def test_set_default_processor_version(capsys): @@ -28,8 +29,16 @@ def test_set_default_processor_version(capsys): project_id=project_id, location=location, processor_id=processor_id, - processor_version_id=processor_version_id, + processor_version_id=new_default_processor_version, ) out, _ = capsys.readouterr() assert "operation" in out + + # Set back to previous default + set_default_processor_version_sample.set_default_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=current_default_processor_version, + ) diff --git a/samples/snippets/undeploy_processor_version_sample_test.py b/samples/snippets/undeploy_processor_version_sample_test.py index 27ded4fa..36f27438 100644 --- a/samples/snippets/undeploy_processor_version_sample_test.py +++ b/samples/snippets/undeploy_processor_version_sample_test.py @@ -15,21 +15,34 @@ import os +import mock from samples.snippets import undeploy_processor_version_sample location = "us" project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -processor_id = "91e072f8626a76b7" -processor_version_id = "pretrained-ocr-v1.0-2020-09-23" +processor_id = "aaaaaaaaa" +processor_version_id = "xxxxxxxxxx" -def test_undeploy_processor_version(capsys): +# TODO: Switch to Real Endpoint when Deployable Versions are Available +@mock.patch( + "google.cloud.documentai.DocumentProcessorServiceClient.undeploy_processor_version" +) +@mock.patch("google.api_core.operation.Operation") +def test_undeploy_processor_version( + operation_mock, undeploy_processor_version_mock, capsys +): + undeploy_processor_version_mock.return_value = operation_mock + undeploy_processor_version_sample.undeploy_processor_version_sample( project_id=project_id, location=location, processor_id=processor_id, processor_version_id=processor_version_id, ) + + undeploy_processor_version_mock.assert_called_once() + out, _ = capsys.readouterr() - assert "UNDEPLOY" in out or "pretrained" in out or "operation" in out + assert "operation" in out