Skip to content
This repository has been archived by the owner on Sep 20, 2023. It is now read-only.

Commit

Permalink
fix(samples): Fix Typos in Batch process & get processor Samples (#420)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtskinner authored Nov 27, 2022
1 parent 2176536 commit 7bdedd1
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import re

from google.api_core.client_options import ClientOptions
from google.api_core.exceptions import RetryError
from google.api_core.exceptions import InternalServerError, RetryError
from google.cloud import documentai, storage

# TODO(developer): Uncomment these variables before running the sample.
Expand Down Expand Up @@ -66,7 +66,8 @@ def batch_process_documents_processor_version(
#

# Cloud Storage URI for the Output Directory
destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}/"
# This must end with a trailing forward slash `/`
destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}"

gcs_output_config = documentai.DocumentOutputConfig.GcsOutputConfig(
gcs_uri=destination_uri, field_mask=field_mask
Expand Down Expand Up @@ -97,7 +98,7 @@ def batch_process_documents_processor_version(
print(f"Waiting for operation {operation.operation.name} to complete...")
operation.result(timeout=timeout)
# Catch exception when operation doesn't finish before timeout
except (RetryError) as e:
except (RetryError, InternalServerError) as e:
print(e.message)

# NOTE: Can also use callbacks for asynchronous processing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
processor_version_id = "pretrained-form-parser-v1.0-2020-09-23"
gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf"
input_mime_type = "application/pdf"
gcs_output_uri_prefix = uuid4()
gcs_output_uri_prefix = f"{uuid4()}/"
field_mask = "text,pages.pageNumber"
BUCKET_NAME = f"document-ai-python-{uuid4()}"

Expand Down
7 changes: 4 additions & 3 deletions samples/snippets/batch_process_documents_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import re

from google.api_core.client_options import ClientOptions
from google.api_core.exceptions import RetryError
from google.api_core.exceptions import InternalServerError, RetryError
from google.cloud import documentai, storage

# TODO(developer): Uncomment these variables before running the sample.
Expand Down Expand Up @@ -64,7 +64,8 @@ def batch_process_documents(
#

# Cloud Storage URI for the Output Directory
destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}/"
# This must end with a trailing forward slash `/`
destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}"

gcs_output_config = documentai.DocumentOutputConfig.GcsOutputConfig(
gcs_uri=destination_uri, field_mask=field_mask
Expand Down Expand Up @@ -93,7 +94,7 @@ def batch_process_documents(
print(f"Waiting for operation {operation.operation.name} to complete...")
operation.result(timeout=timeout)
# Catch exception when operation doesn't finish before timeout
except (RetryError) as e:
except (RetryError, InternalServerError) as e:
print(e.message)

# NOTE: Can also use callbacks for asynchronous processing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import os
from uuid import uuid4

from google.api_core.exceptions import InternalServerError, RetryError
from samples.snippets import batch_process_documents_sample

location = "us"
Expand All @@ -25,7 +26,7 @@
input_mime_type = "application/pdf"
# following bucket contains .csv file which will cause the sample to fail.
gcs_output_full_uri_with_wrong_type = "gs://documentai-beta-samples"
gcs_output_uri_prefix = "test"
gcs_output_uri_prefix = "test/"
BUCKET_NAME = f"document-ai-python-{uuid4()}"


Expand All @@ -41,7 +42,8 @@ def test_batch_process_documents_with_bad_input(capsys):
gcs_output_uri_prefix=gcs_output_uri_prefix,
timeout=450,
)
except ValueError:
out, _ = capsys.readouterr()
assert "Failed" in out
except Exception as e:
assert "Failed" in e.message
assert "Failed" in out or "error" in out
except (InternalServerError, RetryError) as e:
assert "error" in e.message
2 changes: 1 addition & 1 deletion samples/snippets/batch_process_documents_sample_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
processor_id = "90484cfdedb024f6"
gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf"
input_mime_type = "application/pdf"
gcs_output_uri_prefix = uuid4()
gcs_output_uri_prefix = f"{uuid4()}/"
field_mask = "text,pages.pageNumber"
BUCKET_NAME = f"document-ai-python-{uuid4()}"

Expand Down
2 changes: 1 addition & 1 deletion samples/snippets/get_processor_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def get_processor_sample(project_id: str, location: str, processor_id: str):
client = documentai.DocumentProcessorServiceClient(client_options=opts)

# The full resource name of the processor, e.g.:
# projects/project_id/locations/location/processor/processor_id
# projects/{project_id}/locations/{location}/processors/{processor_id}
name = client.processor_path(project_id, location, processor_id)

# Make GetProcessor request
Expand Down

0 comments on commit 7bdedd1

Please sign in to comment.