Skip to content

Commit

Permalink
feat: [dlp] Introduce Discovery API protos and methods (#4761)
Browse files Browse the repository at this point in the history
* feat: Introduce Discovery API protos and methods
    Add DeidentifyDataSource result summary protos
    Add protos for nullness and uniqueness, and column data profiles
    Add SensitivityScore proto to InfoType
docs: Update comments for many messages.

PiperOrigin-RevId: 576598642

Source-Link: googleapis/googleapis@24813ac

Source-Link: googleapis/googleapis-gen@8528bf8
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLXByaXZhY3ktZGxwLy5Pd2xCb3QueWFtbCIsImgiOiI4NTI4YmY4ZDc2MGM5MjUxYzIxMjI4MWJlMjFiZDAwZjE0NGM5YTY1In0=

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: Denis DelGrosso <[email protected]>
  • Loading branch information
3 people authored Oct 26, 2023
1 parent ea49e4e commit 692e10f
Show file tree
Hide file tree
Showing 33 changed files with 26,316 additions and 9,509 deletions.
5 changes: 5 additions & 0 deletions packages/google-privacy-dlp/README.md

Large diffs are not rendered by default.

1,350 changes: 1,088 additions & 262 deletions packages/google-privacy-dlp/protos/google/privacy/dlp/v2/dlp.proto

Large diffs are not rendered by default.

124 changes: 85 additions & 39 deletions packages/google-privacy-dlp/protos/google/privacy/dlp/v2/storage.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,51 +38,73 @@ message InfoType {

// Optional version name for this InfoType.
string version = 2;

// Optional custom sensitivity for this InfoType.
// This only applies to data profiling.
SensitivityScore sensitivity_score = 3;
}

// Score is a summary of all elements in the data profile.
// A higher number means more sensitive.
// Score is calculated from of all elements in the data profile.
// A higher level means the data is more sensitive.
message SensitivityScore {
// Various score levels for resources.
// Various sensitivity score levels for resources.
enum SensitivityScoreLevel {
// Unused.
SENSITIVITY_SCORE_UNSPECIFIED = 0;

// No sensitive information detected. Limited access.
// No sensitive information detected. The resource isn't publicly
// accessible.
SENSITIVITY_LOW = 10;

// Medium risk - PII, potentially sensitive data, or fields with free-text
// data that are at higher risk of having intermittent sensitive data.
// Consider limiting access.
// Medium risk. Contains personally identifiable information (PII),
// potentially sensitive data, or fields with free-text data that are at a
// higher risk of having intermittent sensitive data. Consider limiting
// access.
SENSITIVITY_MODERATE = 20;

// High risk – SPII may be present. Exfiltration of data may lead to user
// data loss. Re-identification of users may be possible. Consider limiting
// usage and or removing SPII.
// High risk. Sensitive personally identifiable information (SPII) can be
// present. Exfiltration of data can lead to user data loss.
// Re-identification of users might be possible. Consider limiting usage and
// or removing SPII.
SENSITIVITY_HIGH = 30;
}

// The score applied to the resource.
// The sensitivity score applied to the resource.
SensitivityScoreLevel score = 1;
}

// Categorization of results based on how likely they are to represent a match,
// based on the number of elements they contain which imply a match.
// Coarse-grained confidence level of how well a particular finding
// satisfies the criteria to match a particular infoType.
//
// Likelihood is calculated based on the number of signals a
// finding has that implies that the finding matches the infoType. For
// example, a string that has an '@' and a '.com' is more likely to be a
// match for an email address than a string that only has an '@'.
//
// In general, the highest likelihood level has the strongest signals that
// indicate a match. That is, a finding with a high likelihood has a low chance
// of being a false positive.
//
// For more information about each likelihood level
// and how likelihood works, see [Match
// likelihood](https://cloud.google.com/dlp/docs/likelihood).
enum Likelihood {
// Default value; same as POSSIBLE.
LIKELIHOOD_UNSPECIFIED = 0;

// Few matching elements.
// Highest chance of a false positive.
VERY_UNLIKELY = 1;

// High chance of a false positive.
UNLIKELY = 2;

// Some matching elements.
// Some matching signals. The default value.
POSSIBLE = 3;

// Low chance of a false positive.
LIKELY = 4;

// Many matching elements.
// Confidence level is high. Lowest chance of a false positive.
VERY_LIKELY = 5;
}

Expand Down Expand Up @@ -163,9 +185,7 @@ message CustomInfoType {
// output. This should be used in conjunction with a field on the
// transformation such as `surrogate_info_type`. This CustomInfoType does
// not support the use of `detection_rules`.
message SurrogateType {

}
message SurrogateType {}

// Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
// `CustomInfoType` to alter behavior under certain circumstances, depending
Expand Down Expand Up @@ -282,6 +302,13 @@ message CustomInfoType {
// If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
// to be returned. It still can be used for rules matching.
ExclusionType exclusion_type = 8;

// Sensitivity for this CustomInfoType. If this CustomInfoType extends an
// existing InfoType, the sensitivity here will take precedence over that of
// the original InfoType. If unset for a CustomInfoType, it will default to
// HIGH.
// This only applies to data profiling.
SensitivityScore sensitivity_score = 9;
}

// General identifier of a data field in a storage service.
Expand Down Expand Up @@ -330,7 +357,7 @@ enum FileType {
// scanning attempts to convert the content of the file to utf_8 to scan
// the file.
// If you wish to avoid this fall back, specify one or more of the other
// FileType's in your storage scan.
// file types in your storage scan.
BINARY_FILE = 1;

// Included file extensions:
Expand All @@ -343,19 +370,24 @@ enum FileType {
TEXT_FILE = 2;

// Included file extensions:
// bmp, gif, jpg, jpeg, jpe, png.
// bytes_limit_per_file has no effect on image files.
// Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
// bmp, gif, jpg, jpeg, jpe, png. Setting
// [bytes_limit_per_file][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file]
// or
// [bytes_limit_per_file_percent][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file]
// has no effect on image files. Image inspection is restricted to the
// `global`, `us`, `asia`, and `europe` regions.
IMAGE = 3;

// Word files >30 MB will be scanned as binary files.
// Microsoft Word files larger than 30 MB will be scanned as binary files.
// Included file extensions:
// docx, dotx, docm, dotm
// docx, dotx, docm, dotm. Setting `bytes_limit_per_file` or
// `bytes_limit_per_file_percent` has no effect on Word files.
WORD = 5;

// PDF files >30 MB will be scanned as binary files.
// PDF files larger than 30 MB will be scanned as binary files.
// Included file extensions:
// pdf
// pdf. Setting `bytes_limit_per_file` or `bytes_limit_per_file_percent`
// has no effect on PDF files.
PDF = 6;

// Included file extensions:
Expand All @@ -370,14 +402,16 @@ enum FileType {
// tsv
TSV = 9;

// Powerpoint files >30 MB will be scanned as binary files.
// Included file extensions:
// pptx, pptm, potx, potm, pot
// Microsoft PowerPoint files larger than 30 MB will be scanned as binary
// files. Included file extensions:
// pptx, pptm, potx, potm, pot. Setting `bytes_limit_per_file` or
// `bytes_limit_per_file_percent` has no effect on PowerPoint files.
POWERPOINT = 11;

// Excel files >30 MB will be scanned as binary files.
// Microsoft Excel files larger than 30 MB will be scanned as binary files.
// Included file extensions:
// xlsx, xlsm, xltx, xltm
// xlsx, xlsm, xltx, xltm. Setting `bytes_limit_per_file` or
// `bytes_limit_per_file_percent` has no effect on Excel files.
EXCEL = 12;
}

Expand Down Expand Up @@ -478,16 +512,22 @@ message CloudStorageOptions {
FileSet file_set = 1;

// Max number of bytes to scan from a file. If a scanned file's size is bigger
// than this value then the rest of the bytes are omitted. Only one
// of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
// Cannot be set if de-identification is requested.
// than this value then the rest of the bytes are omitted. Only one of
// `bytes_limit_per_file` and `bytes_limit_per_file_percent` can be specified.
// This field can't be set if de-identification is requested. For certain file
// types, setting this field has no effect. For more information, see [Limits
// on bytes scanned per
// file](https://cloud.google.com/dlp/docs/supported-file-types#max-byte-size-per-file).
int64 bytes_limit_per_file = 4;

// Max percentage of bytes to scan from a file. The rest are omitted. The
// number of bytes scanned is rounded down. Must be between 0 and 100,
// inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
// of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
// Cannot be set if de-identification is requested.
// inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one of
// bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
// This field can't be set if de-identification is requested. For certain file
// types, setting this field has no effect. For more information, see [Limits
// on bytes scanned per
// file](https://cloud.google.com/dlp/docs/supported-file-types#max-byte-size-per-file).
int32 bytes_limit_per_file_percent = 8;

// List of file type groups to include in the scan.
Expand Down Expand Up @@ -565,9 +605,15 @@ message BigQueryOptions {

// References to fields excluded from scanning. This allows you to skip
// inspection of entire columns which you know have no findings.
// When inspecting a table, we recommend that you inspect all columns.
// Otherwise, findings might be affected because hints from excluded columns
// will not be used.
repeated FieldId excluded_fields = 5;

// Limit scanning only to these fields.
// When inspecting a table, we recommend that you inspect all columns.
// Otherwise, findings might be affected because hints from excluded columns
// will not be used.
repeated FieldId included_fields = 7;
}

Expand Down
Loading

0 comments on commit 692e10f

Please sign in to comment.