docs(samples): Added Image Quality Output to Document OCR Processor

googleapis · Nov 9, 2022 · f7b2bfd · f7b2bfd
1 parent e7c6904
commit f7b2bfd
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 4 deletions.
diff --git a/samples/snippets/process_document_ocr_sample.py b/samples/snippets/process_document_ocr_sample.py
@@ -24,16 +24,22 @@
 # project_id = 'YOUR_PROJECT_ID'
 # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu'
 # processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample
+# processor_version = 'rc' # Refer to https://cloud.google.com/document-ai/docs/manage-processor-versions for more information
 # file_path = '/path/to/local/pdf'
 # mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
 
 
 def process_document_ocr_sample(
-    project_id: str, location: str, processor_id: str, file_path: str, mime_type: str
+    project_id: str,
+    location: str,
+    processor_id: str,
+    processor_version: str,
+    file_path: str,
+    mime_type: str,
 ) -> None:
     # Online processing request to Document AI
     document = process_document(
-        project_id, location, processor_id, file_path, mime_type
+        project_id, location, processor_id, processor_version, file_path, mime_type
     )
 
     # For a full list of Document object attributes, please reference this page:
@@ -52,9 +58,18 @@ def process_document_ocr_sample(
         print_lines(page.lines, text)
         print_tokens(page.tokens, text)
 
+        # Currently supported in version pretrained-ocr-v1.1-2022-09-12
+        if page.image_quality_scores:
+            print_image_quality_scores(page.image_quality_scores)
+
 
 def process_document(
-    project_id: str, location: str, processor_id: str, file_path: str, mime_type: str
+    project_id: str,
+    location: str,
+    processor_id: str,
+    processor_version: str,
+    file_path: str,
+    mime_type: str,
 ) -> documentai.Document:
     # You must set the api_endpoint if you use a location other than 'us', e.g.:
     opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
@@ -64,7 +79,9 @@ def process_document(
     # The full resource name of the processor, e.g.:
     # projects/project_id/locations/location/processor/processor_id
     # You must create processors before running sample code.
-    name = client.processor_path(project_id, location, processor_id)
+    name = client.processor_version_path(
+        project_id, location, processor_id, processor_version
+    )
 
     # Read the file into memory
     with open(file_path, "rb") as image:
@@ -133,6 +150,16 @@ def print_tokens(tokens: Sequence[documentai.Document.Page.Token], text: str) ->
     print(f"        Last token break type: {repr(last_token_break_type)}")
 
 
+def print_image_quality_scores(
+    image_quality_scores: documentai.Document.Page.ImageQualityScores,
+) -> None:
+    print(f"    Quality score: {image_quality_scores.quality_score:.1%}")
+    print("    Detected defects:")
+
+    for detected_defect in image_quality_scores.detected_defects:
+        print(f"        {detected_defect.type_}: {detected_defect.confidence:.1%}")
+
+
 def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str:
     """
     Document AI identifies text in different parts of the document by their
@@ -150,3 +177,14 @@ def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str:
 
 
 # [END documentai_process_ocr_document]
+
+project_id = "document-ai-test-337818"
+location = "us"  # Format is 'us' or 'eu'
+processor_id = "a77291b5110b387"
+processor_version = "rc"
+file_path = "resources/handwritten_form.pdf"
+mime_type = "application/pdf"
+
+process_document_ocr_sample(
+    project_id, location, processor_id, processor_version, file_path, mime_type
+)
diff --git a/samples/snippets/process_document_ocr_sample_test.py b/samples/snippets/process_document_ocr_sample_test.py
@@ -20,6 +20,7 @@
 location = "us"
 project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
 processor_id = "52a38e080c1a7296"
+processor_version = "rc"
 file_path = "resources/handwritten_form.pdf"
 mime_type = "application/pdf"
 
@@ -29,6 +30,7 @@ def test_process_documents(capsys):
         project_id=project_id,
         location=location,
         processor_id=processor_id,
+        processor_version=processor_version,
         file_path=file_path,
         mime_type=mime_type,
     )