From b1b0f92c54f1f04541c135ea6b6eca5f66d9bf6a Mon Sep 17 00:00:00 2001 From: Mike <45373284+munkhuushmgl@users.noreply.github.com> Date: Fri, 11 Dec 2020 11:27:00 -0800 Subject: [PATCH] samples: added test that covers the wrong file type case (#69) * samples: added test that covers the wrong file type case --- ...documents_sample_bad_input_v1beta3_test.py | 44 +++++++++++++++++++ batch_process_documents_sample_v1beta3.py | 4 +- 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 batch_process_documents_sample_bad_input_v1beta3_test.py diff --git a/batch_process_documents_sample_bad_input_v1beta3_test.py b/batch_process_documents_sample_bad_input_v1beta3_test.py new file mode 100644 index 000000000000..e0a7e4689dde --- /dev/null +++ b/batch_process_documents_sample_bad_input_v1beta3_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from uuid import uuid4 + +from samples.snippets import batch_process_documents_sample_v1beta3 + +location = "us" +project_id = os.getenv("GOOGLE_CLOUD_PROJECT") +processor_id = "90484cfdedb024f6" +gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf" +# following bucket contains .csv file which will cause the sample to fail. +gcs_output_full_uri_with_wrong_type = "gs://documentai-beta-samples" +BUCKET_NAME = f"document-ai-python-{uuid4()}" + + +def test_batch_process_documents_with_bad_input(capsys): + try: + batch_process_documents_sample_v1beta3.batch_process_documents( + project_id=project_id, + location=location, + processor_id=processor_id, + gcs_input_uri=gcs_input_uri, + gcs_output_uri=gcs_output_full_uri_with_wrong_type, + gcs_output_uri_prefix="test", + timeout=450, + ) + out, _ = capsys.readouterr() + assert "Failed to process" in out + except Exception as e: + assert "Failed to process" in e.message diff --git a/batch_process_documents_sample_v1beta3.py b/batch_process_documents_sample_v1beta3.py index dcedbbf5b5a9..dae938b2ca8a 100644 --- a/batch_process_documents_sample_v1beta3.py +++ b/batch_process_documents_sample_v1beta3.py @@ -35,6 +35,7 @@ def batch_process_documents( gcs_input_uri, gcs_output_uri, gcs_output_uri_prefix, + timeout: int = 300, ): client = documentai.DocumentProcessorServiceClient() @@ -63,7 +64,7 @@ def batch_process_documents( operation = client.batch_process_documents(request) # Wait for the operation to finish - operation.result() + operation.result(timeout=timeout) # Results are written to GCS. Use a regex to find # output files @@ -79,6 +80,7 @@ def batch_process_documents( for i, blob in enumerate(blob_list): # Download the contents of this blob as a bytes object. if ".json" not in blob.name: + print(f"skipping non-supported file type {blob.name}") return # Only parses JSON files blob_as_bytes = blob.download_as_bytes()