feat: DataScans service (#3797)

feat: DataScans service feat: added StorageFormat.iceberg chore: formatting changes PiperOrigin-RevId: 496586743 Source-Link: googleapis/googleapis@58f5c43 Source-Link: googleapis/googleapis-gen@193c125 Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWRhdGFwbGV4Ly5Pd2xCb3QueWFtbCIsImgiOiIxOTNjMTI1ZDA0ZjI2NDllZGI4MjFkNDViOGU2YmE2YTkxYzliMDkwIn0= See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Benjamin E. Coe <[email protected]>
googleapis · Dec 20, 2022 · 374205f · 374205f
1 parent a4ccc4f
commit 374205f
Show file tree

Hide file tree

Showing 38 changed files with 33,371 additions and 2,440 deletions.
diff --git a/packages/google-cloud-dataplex/README.md b/packages/google-cloud-dataplex/README.md
@@ -96,6 +96,14 @@ Samples are in the [`samples/`](https://github.com/googleapis/google-cloud-node/
 | Content_service.set_iam_policy | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/content_service.set_iam_policy.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/content_service.set_iam_policy.js,samples/README.md) |
 | Content_service.test_iam_permissions | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/content_service.test_iam_permissions.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/content_service.test_iam_permissions.js,samples/README.md) |
 | Content_service.update_content | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/content_service.update_content.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/content_service.update_content.js,samples/README.md) |
+| Data_scan_service.create_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.create_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.create_data_scan.js,samples/README.md) |
+| Data_scan_service.delete_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.delete_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.delete_data_scan.js,samples/README.md) |
+| Data_scan_service.get_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.get_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.get_data_scan.js,samples/README.md) |
+| Data_scan_service.get_data_scan_job | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.get_data_scan_job.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.get_data_scan_job.js,samples/README.md) |
+| Data_scan_service.list_data_scan_jobs | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.list_data_scan_jobs.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.list_data_scan_jobs.js,samples/README.md) |
+| Data_scan_service.list_data_scans | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.list_data_scans.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.list_data_scans.js,samples/README.md) |
+| Data_scan_service.run_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.run_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.run_data_scan.js,samples/README.md) |
+| Data_scan_service.update_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.update_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.update_data_scan.js,samples/README.md) |
 | Dataplex_service.cancel_job | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.cancel_job.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.cancel_job.js,samples/README.md) |
 | Dataplex_service.create_asset | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.create_asset.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.create_asset.js,samples/README.md) |
 | Dataplex_service.create_environment | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.create_environment.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.create_environment.js,samples/README.md) |

diff --git a/packages/google-cloud-dataplex/protos/google/cloud/dataplex/v1/data_profile.proto b/packages/google-cloud-dataplex/protos/google/cloud/dataplex/v1/data_profile.proto
@@ -0,0 +1,207 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.dataplex.v1;
+
+import "google/cloud/dataplex/v1/processing.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
+option java_multiple_files = true;
+option java_outer_classname = "DataProfileProto";
+option java_package = "com.google.cloud.dataplex.v1";
+
+// DataProfileScan related setting.
+message DataProfileSpec {}
+
+// DataProfileResult defines the output of DataProfileScan.
+// Each field of the table will have field type specific profile result.
+message DataProfileResult {
+  // Profile information describing the structure and layout of the data
+  // and contains the profile info.
+  message Profile {
+    // Represents a column field within a table schema.
+    message Field {
+      // ProfileInfo defines the profile information for each schema field type.
+      message ProfileInfo {
+        // StringFieldInfo defines output info for any string type field.
+        message StringFieldInfo {
+          // The minimum length of the string field in the sampled data.
+          // Optional if zero non-null rows.
+          int64 min_length = 1;
+
+          // The maximum length of a string field in the sampled data.
+          // Optional if zero non-null rows.
+          int64 max_length = 2;
+
+          // The average length of a string field in the sampled data.
+          // Optional if zero non-null rows.
+          double average_length = 3;
+        }
+
+        // IntegerFieldInfo defines output for any integer type field.
+        message IntegerFieldInfo {
+          // The average of non-null values of integer field in the sampled
+          // data. Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double average = 1;
+
+          // The standard deviation of non-null of integer field in the sampled
+          // data. Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double standard_deviation = 3;
+
+          // The minimum value of an integer field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          int64 min = 4;
+
+          // A quartile divide the number of data points into four parts, or
+          // quarters, of more-or-less equal size. Three main quartiles used
+          // are: The first quartile (Q1) splits off the lowest 25% of data from
+          // the highest 75%. It is also known as the lower or 25th empirical
+          // quartile, as 25% of the data is below this point. The second
+          // quartile (Q2) is the median of a data set. So, 50% of the data lies
+          // below this point. The third quartile (Q3) splits off the highest
+          // 25% of data from the lowest 75%. It is known as the upper or 75th
+          // empirical quartile, as 75% of the data lies below this point. So,
+          // here the quartiles is provided as an ordered list of quartile
+          // values, occurring in order Q1, median, Q3.
+          repeated int64 quartiles = 6;
+
+          // The maximum value of an integer field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          int64 max = 5;
+        }
+
+        // DoubleFieldInfo defines output for any double type field.
+        message DoubleFieldInfo {
+          // The average of non-null values of double field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null rows.
+          double average = 1;
+
+          // The standard deviation of non-null of double field in the sampled
+          // data. Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double standard_deviation = 3;
+
+          // The minimum value of a double field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double min = 4;
+
+          // A quartile divide the numebr of data points into four parts, or
+          // quarters, of more-or-less equal size. Three main quartiles used
+          // are: The first quartile (Q1) splits off the lowest 25% of data from
+          // the highest 75%. It is also known as the lower or 25th empirical
+          // quartile, as 25% of the data is below this point. The second
+          // quartile (Q2) is the median of a data set. So, 50% of the data lies
+          // below this point. The third quartile (Q3) splits off the highest
+          // 25% of data from the lowest 75%. It is known as the upper or 75th
+          // empirical quartile, as 75% of the data lies below this point. So,
+          // here the quartiles is provided as an ordered list of quartile
+          // values, occurring in order Q1, median, Q3.
+          repeated double quartiles = 6;
+
+          // The maximum value of a double field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double max = 5;
+        }
+
+        // The TopNValue defines the structure of output of top N values of a
+        // field.
+        message TopNValue {
+          // The value is the string value of the actual value from the field.
+          string value = 1;
+
+          // The frequency count of the corresponding value in the field.
+          int64 count = 2;
+        }
+
+        // The ratio of null rows against the rows in the sampled data.
+        double null_ratio = 2;
+
+        // The ratio of rows that are distinct against the rows in the sampled
+        // data.
+        double distinct_ratio = 3;
+
+        // The array of top N values of the field in the sampled data.
+        // Currently N is set as 10 or equal to distinct values in the field,
+        // whichever is smaller. This will be optional for complex non-groupable
+        // data-types such as JSON, ARRAY, JSON, STRUCT.
+        repeated TopNValue top_n_values = 4;
+
+        // The corresponding profile for specific field type.
+        // Each field will have only one field type specific profile output.
+        oneof field_info {
+          // The corresponding string field profile.
+          StringFieldInfo string_profile = 101;
+
+          // The corresponding integer field profile.
+          IntegerFieldInfo integer_profile = 102;
+
+          // The corresponding double field profile.
+          DoubleFieldInfo double_profile = 103;
+        }
+      }
+
+      // The name of the field.
+      string name = 1;
+
+      // The field data type. Possible values include:
+      //
+      // * STRING
+      // * BYTE
+      // * INT64
+      // * INT32
+      // * INT16
+      // * DOUBLE
+      // * FLOAT
+      // * DECIMAL
+      // * BOOLEAN
+      // * BINARY
+      // * TIMESTAMP
+      // * DATE
+      // * TIME
+      // * NULL
+      // * RECORD
+      string type = 2;
+
+      // The mode of the field. Its value will be:
+      // REQUIRED, if it is a required field.
+      // NULLABLE, if it is an optional field.
+      // REPEATED, if it is a repeated field.
+      string mode = 3;
+
+      // The profile information for the corresponding field.
+      ProfileInfo profile = 4;
+    }
+
+    // The sequence of fields describing data in table entities.
+    repeated Field fields = 2;
+  }
+
+  // The count of all rows in the sampled data.
+  // Return 0, if zero rows.
+  int64 row_count = 3;
+
+  // This represents the profile information per field.
+  Profile profile = 4;
+
+  // The data scanned for this profile.
+  ScannedData scanned_data = 5;
+}