Skip to content

Commit

Permalink
feat: DataScans service (#3797)
Browse files Browse the repository at this point in the history
feat: DataScans service
feat: added StorageFormat.iceberg
chore: formatting changes
PiperOrigin-RevId: 496586743
Source-Link: googleapis/googleapis@58f5c43
Source-Link: googleapis/googleapis-gen@193c125
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWRhdGFwbGV4Ly5Pd2xCb3QueWFtbCIsImgiOiIxOTNjMTI1ZDA0ZjI2NDllZGI4MjFkNDViOGU2YmE2YTkxYzliMDkwIn0=
See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: Benjamin E. Coe <[email protected]>
  • Loading branch information
3 people authored Dec 20, 2022
1 parent a4ccc4f commit 374205f
Show file tree
Hide file tree
Showing 38 changed files with 33,371 additions and 2,440 deletions.
8 changes: 8 additions & 0 deletions packages/google-cloud-dataplex/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ Samples are in the [`samples/`](https://github.com/googleapis/google-cloud-node/
| Content_service.set_iam_policy | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/content_service.set_iam_policy.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/content_service.set_iam_policy.js,samples/README.md) |
| Content_service.test_iam_permissions | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/content_service.test_iam_permissions.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/content_service.test_iam_permissions.js,samples/README.md) |
| Content_service.update_content | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/content_service.update_content.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/content_service.update_content.js,samples/README.md) |
| Data_scan_service.create_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.create_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.create_data_scan.js,samples/README.md) |
| Data_scan_service.delete_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.delete_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.delete_data_scan.js,samples/README.md) |
| Data_scan_service.get_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.get_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.get_data_scan.js,samples/README.md) |
| Data_scan_service.get_data_scan_job | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.get_data_scan_job.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.get_data_scan_job.js,samples/README.md) |
| Data_scan_service.list_data_scan_jobs | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.list_data_scan_jobs.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.list_data_scan_jobs.js,samples/README.md) |
| Data_scan_service.list_data_scans | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.list_data_scans.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.list_data_scans.js,samples/README.md) |
| Data_scan_service.run_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.run_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.run_data_scan.js,samples/README.md) |
| Data_scan_service.update_data_scan | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.update_data_scan.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/data_scan_service.update_data_scan.js,samples/README.md) |
| Dataplex_service.cancel_job | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.cancel_job.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.cancel_job.js,samples/README.md) |
| Dataplex_service.create_asset | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.create_asset.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.create_asset.js,samples/README.md) |
| Dataplex_service.create_environment | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.create_environment.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-dataplex/samples/generated/v1/dataplex_service.create_environment.js,samples/README.md) |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dataplex.v1;

import "google/cloud/dataplex/v1/processing.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
option java_multiple_files = true;
option java_outer_classname = "DataProfileProto";
option java_package = "com.google.cloud.dataplex.v1";

// DataProfileScan related setting.
message DataProfileSpec {}

// DataProfileResult defines the output of DataProfileScan.
// Each field of the table will have field type specific profile result.
message DataProfileResult {
// Profile information describing the structure and layout of the data
// and contains the profile info.
message Profile {
// Represents a column field within a table schema.
message Field {
// ProfileInfo defines the profile information for each schema field type.
message ProfileInfo {
// StringFieldInfo defines output info for any string type field.
message StringFieldInfo {
// The minimum length of the string field in the sampled data.
// Optional if zero non-null rows.
int64 min_length = 1;

// The maximum length of a string field in the sampled data.
// Optional if zero non-null rows.
int64 max_length = 2;

// The average length of a string field in the sampled data.
// Optional if zero non-null rows.
double average_length = 3;
}

// IntegerFieldInfo defines output for any integer type field.
message IntegerFieldInfo {
// The average of non-null values of integer field in the sampled
// data. Return NaN, if the field has a NaN. Optional if zero non-null
// rows.
double average = 1;

// The standard deviation of non-null of integer field in the sampled
// data. Return NaN, if the field has a NaN. Optional if zero non-null
// rows.
double standard_deviation = 3;

// The minimum value of an integer field in the sampled data.
// Return NaN, if the field has a NaN. Optional if zero non-null
// rows.
int64 min = 4;

// A quartile divide the number of data points into four parts, or
// quarters, of more-or-less equal size. Three main quartiles used
// are: The first quartile (Q1) splits off the lowest 25% of data from
// the highest 75%. It is also known as the lower or 25th empirical
// quartile, as 25% of the data is below this point. The second
// quartile (Q2) is the median of a data set. So, 50% of the data lies
// below this point. The third quartile (Q3) splits off the highest
// 25% of data from the lowest 75%. It is known as the upper or 75th
// empirical quartile, as 75% of the data lies below this point. So,
// here the quartiles is provided as an ordered list of quartile
// values, occurring in order Q1, median, Q3.
repeated int64 quartiles = 6;

// The maximum value of an integer field in the sampled data.
// Return NaN, if the field has a NaN. Optional if zero non-null
// rows.
int64 max = 5;
}

// DoubleFieldInfo defines output for any double type field.
message DoubleFieldInfo {
// The average of non-null values of double field in the sampled data.
// Return NaN, if the field has a NaN. Optional if zero non-null rows.
double average = 1;

// The standard deviation of non-null of double field in the sampled
// data. Return NaN, if the field has a NaN. Optional if zero non-null
// rows.
double standard_deviation = 3;

// The minimum value of a double field in the sampled data.
// Return NaN, if the field has a NaN. Optional if zero non-null
// rows.
double min = 4;

// A quartile divide the numebr of data points into four parts, or
// quarters, of more-or-less equal size. Three main quartiles used
// are: The first quartile (Q1) splits off the lowest 25% of data from
// the highest 75%. It is also known as the lower or 25th empirical
// quartile, as 25% of the data is below this point. The second
// quartile (Q2) is the median of a data set. So, 50% of the data lies
// below this point. The third quartile (Q3) splits off the highest
// 25% of data from the lowest 75%. It is known as the upper or 75th
// empirical quartile, as 75% of the data lies below this point. So,
// here the quartiles is provided as an ordered list of quartile
// values, occurring in order Q1, median, Q3.
repeated double quartiles = 6;

// The maximum value of a double field in the sampled data.
// Return NaN, if the field has a NaN. Optional if zero non-null
// rows.
double max = 5;
}

// The TopNValue defines the structure of output of top N values of a
// field.
message TopNValue {
// The value is the string value of the actual value from the field.
string value = 1;

// The frequency count of the corresponding value in the field.
int64 count = 2;
}

// The ratio of null rows against the rows in the sampled data.
double null_ratio = 2;

// The ratio of rows that are distinct against the rows in the sampled
// data.
double distinct_ratio = 3;

// The array of top N values of the field in the sampled data.
// Currently N is set as 10 or equal to distinct values in the field,
// whichever is smaller. This will be optional for complex non-groupable
// data-types such as JSON, ARRAY, JSON, STRUCT.
repeated TopNValue top_n_values = 4;

// The corresponding profile for specific field type.
// Each field will have only one field type specific profile output.
oneof field_info {
// The corresponding string field profile.
StringFieldInfo string_profile = 101;

// The corresponding integer field profile.
IntegerFieldInfo integer_profile = 102;

// The corresponding double field profile.
DoubleFieldInfo double_profile = 103;
}
}

// The name of the field.
string name = 1;

// The field data type. Possible values include:
//
// * STRING
// * BYTE
// * INT64
// * INT32
// * INT16
// * DOUBLE
// * FLOAT
// * DECIMAL
// * BOOLEAN
// * BINARY
// * TIMESTAMP
// * DATE
// * TIME
// * NULL
// * RECORD
string type = 2;

// The mode of the field. Its value will be:
// REQUIRED, if it is a required field.
// NULLABLE, if it is an optional field.
// REPEATED, if it is a repeated field.
string mode = 3;

// The profile information for the corresponding field.
ProfileInfo profile = 4;
}

// The sequence of fields describing data in table entities.
repeated Field fields = 2;
}

// The count of all rows in the sampled data.
// Return 0, if zero rows.
int64 row_count = 3;

// This represents the profile information per field.
Profile profile = 4;

// The data scanned for this profile.
ScannedData scanned_data = 5;
}
Loading

0 comments on commit 374205f

Please sign in to comment.