-
Notifications
You must be signed in to change notification settings - Fork 600
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: DataScans service feat: added StorageFormat.iceberg chore: formatting changes PiperOrigin-RevId: 496586743 Source-Link: googleapis/googleapis@58f5c43 Source-Link: googleapis/googleapis-gen@193c125 Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWRhdGFwbGV4Ly5Pd2xCb3QueWFtbCIsImgiOiIxOTNjMTI1ZDA0ZjI2NDllZGI4MjFkNDViOGU2YmE2YTkxYzliMDkwIn0= See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Benjamin E. Coe <[email protected]>
- Loading branch information
1 parent
a4ccc4f
commit 374205f
Showing
38 changed files
with
33,371 additions
and
2,440 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
207 changes: 207 additions & 0 deletions
207
packages/google-cloud-dataplex/protos/google/cloud/dataplex/v1/data_profile.proto
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,207 @@ | ||
// Copyright 2022 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
syntax = "proto3"; | ||
|
||
package google.cloud.dataplex.v1; | ||
|
||
import "google/cloud/dataplex/v1/processing.proto"; | ||
|
||
option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex"; | ||
option java_multiple_files = true; | ||
option java_outer_classname = "DataProfileProto"; | ||
option java_package = "com.google.cloud.dataplex.v1"; | ||
|
||
// DataProfileScan related setting. | ||
message DataProfileSpec {} | ||
|
||
// DataProfileResult defines the output of DataProfileScan. | ||
// Each field of the table will have field type specific profile result. | ||
message DataProfileResult { | ||
// Profile information describing the structure and layout of the data | ||
// and contains the profile info. | ||
message Profile { | ||
// Represents a column field within a table schema. | ||
message Field { | ||
// ProfileInfo defines the profile information for each schema field type. | ||
message ProfileInfo { | ||
// StringFieldInfo defines output info for any string type field. | ||
message StringFieldInfo { | ||
// The minimum length of the string field in the sampled data. | ||
// Optional if zero non-null rows. | ||
int64 min_length = 1; | ||
|
||
// The maximum length of a string field in the sampled data. | ||
// Optional if zero non-null rows. | ||
int64 max_length = 2; | ||
|
||
// The average length of a string field in the sampled data. | ||
// Optional if zero non-null rows. | ||
double average_length = 3; | ||
} | ||
|
||
// IntegerFieldInfo defines output for any integer type field. | ||
message IntegerFieldInfo { | ||
// The average of non-null values of integer field in the sampled | ||
// data. Return NaN, if the field has a NaN. Optional if zero non-null | ||
// rows. | ||
double average = 1; | ||
|
||
// The standard deviation of non-null of integer field in the sampled | ||
// data. Return NaN, if the field has a NaN. Optional if zero non-null | ||
// rows. | ||
double standard_deviation = 3; | ||
|
||
// The minimum value of an integer field in the sampled data. | ||
// Return NaN, if the field has a NaN. Optional if zero non-null | ||
// rows. | ||
int64 min = 4; | ||
|
||
// A quartile divide the number of data points into four parts, or | ||
// quarters, of more-or-less equal size. Three main quartiles used | ||
// are: The first quartile (Q1) splits off the lowest 25% of data from | ||
// the highest 75%. It is also known as the lower or 25th empirical | ||
// quartile, as 25% of the data is below this point. The second | ||
// quartile (Q2) is the median of a data set. So, 50% of the data lies | ||
// below this point. The third quartile (Q3) splits off the highest | ||
// 25% of data from the lowest 75%. It is known as the upper or 75th | ||
// empirical quartile, as 75% of the data lies below this point. So, | ||
// here the quartiles is provided as an ordered list of quartile | ||
// values, occurring in order Q1, median, Q3. | ||
repeated int64 quartiles = 6; | ||
|
||
// The maximum value of an integer field in the sampled data. | ||
// Return NaN, if the field has a NaN. Optional if zero non-null | ||
// rows. | ||
int64 max = 5; | ||
} | ||
|
||
// DoubleFieldInfo defines output for any double type field. | ||
message DoubleFieldInfo { | ||
// The average of non-null values of double field in the sampled data. | ||
// Return NaN, if the field has a NaN. Optional if zero non-null rows. | ||
double average = 1; | ||
|
||
// The standard deviation of non-null of double field in the sampled | ||
// data. Return NaN, if the field has a NaN. Optional if zero non-null | ||
// rows. | ||
double standard_deviation = 3; | ||
|
||
// The minimum value of a double field in the sampled data. | ||
// Return NaN, if the field has a NaN. Optional if zero non-null | ||
// rows. | ||
double min = 4; | ||
|
||
// A quartile divide the numebr of data points into four parts, or | ||
// quarters, of more-or-less equal size. Three main quartiles used | ||
// are: The first quartile (Q1) splits off the lowest 25% of data from | ||
// the highest 75%. It is also known as the lower or 25th empirical | ||
// quartile, as 25% of the data is below this point. The second | ||
// quartile (Q2) is the median of a data set. So, 50% of the data lies | ||
// below this point. The third quartile (Q3) splits off the highest | ||
// 25% of data from the lowest 75%. It is known as the upper or 75th | ||
// empirical quartile, as 75% of the data lies below this point. So, | ||
// here the quartiles is provided as an ordered list of quartile | ||
// values, occurring in order Q1, median, Q3. | ||
repeated double quartiles = 6; | ||
|
||
// The maximum value of a double field in the sampled data. | ||
// Return NaN, if the field has a NaN. Optional if zero non-null | ||
// rows. | ||
double max = 5; | ||
} | ||
|
||
// The TopNValue defines the structure of output of top N values of a | ||
// field. | ||
message TopNValue { | ||
// The value is the string value of the actual value from the field. | ||
string value = 1; | ||
|
||
// The frequency count of the corresponding value in the field. | ||
int64 count = 2; | ||
} | ||
|
||
// The ratio of null rows against the rows in the sampled data. | ||
double null_ratio = 2; | ||
|
||
// The ratio of rows that are distinct against the rows in the sampled | ||
// data. | ||
double distinct_ratio = 3; | ||
|
||
// The array of top N values of the field in the sampled data. | ||
// Currently N is set as 10 or equal to distinct values in the field, | ||
// whichever is smaller. This will be optional for complex non-groupable | ||
// data-types such as JSON, ARRAY, JSON, STRUCT. | ||
repeated TopNValue top_n_values = 4; | ||
|
||
// The corresponding profile for specific field type. | ||
// Each field will have only one field type specific profile output. | ||
oneof field_info { | ||
// The corresponding string field profile. | ||
StringFieldInfo string_profile = 101; | ||
|
||
// The corresponding integer field profile. | ||
IntegerFieldInfo integer_profile = 102; | ||
|
||
// The corresponding double field profile. | ||
DoubleFieldInfo double_profile = 103; | ||
} | ||
} | ||
|
||
// The name of the field. | ||
string name = 1; | ||
|
||
// The field data type. Possible values include: | ||
// | ||
// * STRING | ||
// * BYTE | ||
// * INT64 | ||
// * INT32 | ||
// * INT16 | ||
// * DOUBLE | ||
// * FLOAT | ||
// * DECIMAL | ||
// * BOOLEAN | ||
// * BINARY | ||
// * TIMESTAMP | ||
// * DATE | ||
// * TIME | ||
// * NULL | ||
// * RECORD | ||
string type = 2; | ||
|
||
// The mode of the field. Its value will be: | ||
// REQUIRED, if it is a required field. | ||
// NULLABLE, if it is an optional field. | ||
// REPEATED, if it is a repeated field. | ||
string mode = 3; | ||
|
||
// The profile information for the corresponding field. | ||
ProfileInfo profile = 4; | ||
} | ||
|
||
// The sequence of fields describing data in table entities. | ||
repeated Field fields = 2; | ||
} | ||
|
||
// The count of all rows in the sampled data. | ||
// Return 0, if zero rows. | ||
int64 row_count = 3; | ||
|
||
// This represents the profile information per field. | ||
Profile profile = 4; | ||
|
||
// The data scanned for this profile. | ||
ScannedData scanned_data = 5; | ||
} |
Oops, something went wrong.