chore(deps): bump google.golang.org/protobuf from 1.30.0 to 1.33.0 in…

… /cx-content-moderation (#766) Bumps google.golang.org/protobuf from 1.30.0 to 1.33.0. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=google.golang.org/protobuf&package-manager=go_modules&previous-version=1.30.0&new-version=1.33.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- <details> <summary>Dependabot commands and options</summary> <br /> You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/GoogleCloudPlatform/document-ai-samples/network/alerts). </details>
GoogleCloudPlatform · Mar 14, 2024 · c4bc200 · c4bc200
2 parents 2bd2d76 + b4a2452
commit c4bc200
Show file tree

Hide file tree

Showing 7 changed files with 456 additions and 294 deletions.
diff --git a/cx-content-moderation/go.mod b/cx-content-moderation/go.mod
@@ -32,5 +32,5 @@ require (
 	google.golang.org/genproto/googleapis/api v0.0.0-20230530153820-e85fd2cbaebc // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20230530153820-e85fd2cbaebc // indirect
 	google.golang.org/grpc v1.56.3 // indirect
-	google.golang.org/protobuf v1.30.0 // indirect
+	google.golang.org/protobuf v1.33.0 // indirect
 )
diff --git a/cx-content-moderation/go.sum b/cx-content-moderation/go.sum
@@ -194,8 +194,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
-google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

diff --git a/cx-content-moderation/main.go b/cx-content-moderation/main.go
@@ -20,7 +20,6 @@ import (
 	"log"
 	"os"
 
-	"cloud.google.com/go/compute/metadata"
 	documentai "cloud.google.com/go/documentai/apiv1beta3"
 	"cloud.google.com/go/documentai/apiv1beta3/documentaipb"
 
@@ -30,10 +29,8 @@ import (
 )
 
 var (
-	projectID                      string = getProjectID()
-	contentModerationProcessorName string = envCheck("CONTENT_MODERATION_NAME", "")
-	port                           string = envCheck("PORT", "8080")
-	logname                        string = envCheck("LOGNAME", "cx-content-moderation")
+	contentModerationProcessorName = envCheck("CONTENT_MODERATION_NAME", "")
+	port                           = envCheck("PORT", "8080")
 )
 
 const location = "us"
@@ -56,7 +53,7 @@ func analyzeCommentHandler(res *ezcx.WebhookResponse, req *ezcx.WebhookRequest)
 	params := req.GetSessionParameters()
 	text := req.GetText()
 	if text == "" {
-		return fmt.Errorf("No text provided.")
+		return fmt.Errorf("no text provided")
 	}
 
 	// perform content moderation on text
@@ -73,7 +70,7 @@ func analyzeCommentHandler(res *ezcx.WebhookResponse, req *ezcx.WebhookRequest)
 		attributes[attribute.GetType()] = attribute.GetConfidence()
 	}
 	if params == nil {
-		params = make(map[string]any)
+		params = make(map[string]interface{})
 	}
 	params["content-moderation"] = attributes
 
@@ -123,26 +120,9 @@ func apiEndpoint() string {
 
 // envCheck checks for an environment variable, otherwise returns default
 func envCheck(environmentVariable, defaultVar string) string {
-	if envar, ok := os.LookupEnv(environmentVariable); !ok {
+	envar, ok := os.LookupEnv(environmentVariable)
+	if envar == "" || !ok {
 		return defaultVar
-	} else if envar == "" {
-		return defaultVar
-	} else {
-		return envar
-	}
-}
-
-// getProjectID checks for a local environment variable and then GCP metadata to
-func getProjectID() string {
-	projectID := envCheck("PROJECT_ID", "")
-	if projectID == "" { // local
-		projectID = envCheck("GOOGLE_CLOUD_PROJECT", "") // appengine
-		if projectID == "" {                             // gcp metadata
-			var err error
-			if projectID, err = metadata.ProjectID(); err != nil {
-				log.Fatal("Unable to get Google Cloud Project ID", err)
-			}
-		}
 	}
-	return projectID
+	return envar
 }
diff --git a/incubator-tools/Reference_architecture_asynchronous/auto_deploy_v8/CFScript/main.py b/incubator-tools/Reference_architecture_asynchronous/auto_deploy_v8/CFScript/main.py
@@ -6,9 +6,14 @@
 
 import concurrent.futures
 from typing import List
+
+from google.cloud import documentai
+from google.cloud import firestore
+from google.cloud import storage
 import pandas as pd
-from google.cloud import documentai, firestore, storage
-from utilities import batch_process_documents_sample, copy_blob, list_blobs
+from utilities import batch_process_documents_sample
+from utilities import copy_blob
+from utilities import list_blobs
 
 INPUT_BUCKET_NAME = "your_test_bucket_name"
 GCS_OUTPUT_URI_PREFIX = "your_output_folder_prefix"
@@ -200,8 +205,11 @@ def metadata_reader(metadata: documentai.BatchProcessMetadata) -> List:
                 "operation_id": i.output_gcs_destination.split("/")[-2],
                 "file_output_gcs_destination": i.output_gcs_destination,
                 "file_human_review_status": i.human_review_status.state.name,
-                "file_human_review_operation_id":
-                i.human_review_status.human_review_operation.split("/")[-1],
+                "file_human_review_operation_id": i.human_review_status.human_review_operation.split(  # pylint: line-too-long
+                    "/"
+                )[
+                    -1
+                ],
             }
         )
     return info_array
@@ -242,9 +250,7 @@ def file_copy(array_having_file_names: List, bucket_name_with_folder: str) -> No
         )
 
 
-def concurrent_processing(
-    daira_output_test: str, batch_array: List
-) -> None:
+def concurrent_processing(daira_output_test: str, batch_array: List) -> None:
     """
     To create a concurrent process for batch processing the files .
 

diff --git a/incubator-tools/advance_table_line_enhancement/tool_helper_functions.py b/incubator-tools/advance_table_line_enhancement/tool_helper_functions.py
@@ -16,23 +16,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """This module contains helper functions for Advance Table Parsing Tool"""
+from collections import defaultdict
+from io import BytesIO
 import math
 import re
 import time
-from collections import defaultdict
-from io import BytesIO
-from typing import Dict, List, MutableSequence, Tuple, Union, Any
+from typing import Any, Dict, List, MutableSequence, Tuple, Union
 
-import numpy as np
-import pandas as pd
-import PyPDF2
 from google.api_core.client_options import ClientOptions
-from google.api_core.exceptions import InternalServerError, RetryError
-from google.cloud import documentai, storage
+from google.api_core.exceptions import InternalServerError
+from google.api_core.exceptions import RetryError
+from google.cloud import documentai
+from google.cloud import storage
 from google.longrunning import operations_pb2
 from google.longrunning.operations_pb2 import GetOperationRequest
+import numpy as np
+import pandas as pd
 from PIL import Image as PilImage
 from PIL import ImageDraw
+import PyPDF2
 
 
 def batch_process_documents(
@@ -382,9 +384,7 @@ def poll_hitl_operations(
         ]
         if not operations:
             break
-        print(
-            f"Still waiting for {len(operations)} HITL operations to complete"
-        )
+        print(f"Still waiting for {len(operations)} HITL operations to complete")
         time.sleep(100)
     print(f"Finished waiting for all {num_operations} HITL operations.")
 
@@ -449,7 +449,7 @@ def parse_document_tables(output_bucket, output_prefix, output_csv_prefix):
         output_bucket=output_bucket, output_prefix=output_prefix
     )
     for file_key, document in doc_obj_dict.items():
-        for _ , page in enumerate(document.pages):
+        for _, page in enumerate(document.pages):
             header_row_values: List[List[str]] = []
             body_row_values: List[List[str]] = []
             for index, table in enumerate(page.tables):
@@ -820,7 +820,7 @@ def process_taxonomy_disclosure(st: str) -> str:
     ea = re.search(r"^[A-Z]\.\s[a-zA-Z\s-]+", st)
     if ea:
         span = ea.span()
-        interstr = st[span[0]:span[1]].split("\n")[0]
+        interstr = st[span[0] : span[1]].split("\n")[0]
     return interstr
 
 
@@ -839,7 +839,7 @@ def process_taxonomy_disclosure_complex(st: str) -> Tuple[str, str]:
     ea = re.search(r"^[A-Z]\.[1-9](.|)[a-zA-Z()\s-]+", st)
     if ea:
         span = ea.span()
-        interstr = st[span[0]:span[1]].split("\n")[0:-1]
+        interstr = st[span[0] : span[1]].split("\n")[0:-1]
         ans = " ".join(interstr)
         st = st.replace(st[span[0] : span[1]], "")
     return st, ans
@@ -859,7 +859,9 @@ def process_taxonomy_disclosure_multiple(row: pd.Series) -> None:
     st = row["taxonomy_disclosure"]
     row_ea = re.findall(r"\d.\d+ [a-zA-Z\s]+", st)
     if len(row_ea) > 1:
-        row["taxonomy_disclosure"] = "\n".join([ea.replace("\n", " ").strip() for ea in row_ea])
+        row["taxonomy_disclosure"] = "\n".join(
+            [ea.replace("\n", " ").strip() for ea in row_ea]
+        )
 
 
 def collect_multiple_values(row: pd.Series, col: str) -> List:
@@ -884,8 +886,9 @@ def collect_multiple_values(row: pd.Series, col: str) -> List:
     return split_row
 
 
-def collect_and_extend_values(final_df_: pd.DataFrame, final_data_: dict,
-                              row: pd.Series, col: str) -> None:
+def collect_and_extend_values(
+    final_df_: pd.DataFrame, final_data_: dict, row: pd.Series, col: str
+) -> None:
     """
     Collect and extend values from a specific column in a row to the final data structure.
 
@@ -914,8 +917,9 @@ def collect_and_extend_values(final_df_: pd.DataFrame, final_data_: dict,
         final_data_ = update_data(final_df_, final_data_, ea_)
 
 
-def extend_column_data(final_data_: dict, row: pd.Series,
-                       column: str, split_row: List[str]) -> None:
+def extend_column_data(
+    final_data_: dict, row: pd.Series, column: str, split_row: List[str]
+) -> None:
     """
     Extend column data in the final data structure.
 
@@ -985,13 +989,18 @@ def post_process(
     )
     # Post-processing code matches expected values and rearranges them into the final dataframe
     final_data_: Dict[Any, Any] = defaultdict(list)
-    for _ , row in dest_df.iterrows():
+    for _, row in dest_df.iterrows():
         if row["taxonomy_disclosure"] is np.nan:
             continue
         st = row["taxonomy_disclosure"]
-        st = st.replace(process_taxonomy_disclosure(row["taxonomy_disclosure"]) + "\n", "").strip()
-        final_data_ = update_data(final_df_, final_data_, process_taxonomy_disclosure(
-                    row["taxonomy_disclosure"]))
+        st = st.replace(
+            process_taxonomy_disclosure(row["taxonomy_disclosure"]) + "\n", ""
+        ).strip()
+        final_data_ = update_data(
+            final_df_,
+            final_data_,
+            process_taxonomy_disclosure(row["taxonomy_disclosure"]),
+        )
         row["taxonomy_disclosure"] = st
 
         st = row["taxonomy_disclosure"]
@@ -1053,10 +1062,10 @@ def run_table_extractor_pipeline(
         )
         final_data_2_processed = final_data_new2.copy()
         nrows = 0  # num of rows
-        for _ , v in final_data_new2.items():
+        for _, v in final_data_new2.items():
             nrows = max(len(v), nrows)
 
-        for _ , v in final_data_2_processed.items():
+        for _, v in final_data_2_processed.items():
             length = len(v)
             if length != nrows:
                 v.extend([np.nan] * (nrows - length))
@@ -1103,7 +1112,7 @@ def walk_the_ocr(
             )
             cde_document = cde_jsons[file[:-4]]
             print("NO HITL")
-        _ , y_coord, row_map_cde, _ = get_coordinates_map(cde_document)
+        _, y_coord, row_map_cde, _ = get_coordinates_map(cde_document)
         fp_document_path = fp_input_output_map[file]
         fp_document = read_json_output(
             output_bucket=gcs_output_bucket, output_prefix=fp_document_path
@@ -1159,8 +1168,7 @@ def draw_vertical(
             )
         if (
             n + 1 < len(x_coordinates[idx])
-            and (x_coordinates[idx][n + 1][1] + voffset // 2)
-            - (cor[1] + voffset // 2)
+            and (x_coordinates[idx][n + 1][1] + voffset // 2) - (cor[1] + voffset // 2)
             > 50
         ):
             draw.line(
@@ -1275,7 +1283,7 @@ def enhance_and_save_pdfs(
         try:
             images_for_pdf = []
             for idx, page in enumerate(document.pages):
-                x_coordinates, _ , _ , max_ycd = get_coordinates_map(document)
+                x_coordinates, _, _, max_ycd = get_coordinates_map(document)
                 image_content = page.image.content
                 image = PilImage.open(BytesIO(image_content))
                 draw = ImageDraw.Draw(image)
@@ -1284,8 +1292,18 @@ def enhance_and_save_pdfs(
                 hoffset_ = factor * voffset
                 # Draw horizontal
                 if idx in max_ycd:
-                    draw_horizontal(idx, max_ycd, hoffset, hoffset_, min_x,
-                                    min_height, max_x, line_colour, line_width, draw)
+                    draw_horizontal(
+                        idx,
+                        max_ycd,
+                        hoffset,
+                        hoffset_,
+                        min_x,
+                        min_height,
+                        max_x,
+                        line_colour,
+                        line_width,
+                        draw,
+                    )
                     # for n, y in enumerate(max_ycd[idx]):
                     #     if n == 0:  # column header min y coord
                     #         draw.line(
@@ -1311,8 +1329,17 @@ def enhance_and_save_pdfs(
                     #         )
                 # Drawing vertical lines
                 if idx in x_coordinates:
-                    draw_vertical(idx, x_coordinates, hoffset_, min_height,
-                                  max_height, line_colour, line_width, voffset, draw)
+                    draw_vertical(
+                        idx,
+                        x_coordinates,
+                        hoffset_,
+                        min_height,
+                        max_height,
+                        line_colour,
+                        line_width,
+                        voffset,
+                        draw,
+                    )
                     # for n, cor in enumerate(x_coordinates[idx]):
                     #     if n == 0:
                     #         draw.line(