Skip to content

Commit

Permalink
feat: add sorting and ordering of results (#147)
Browse files Browse the repository at this point in the history
  • Loading branch information
dpebot authored and JustinBeckwith committed Sep 25, 2018
1 parent 30042b6 commit 187f935
Show file tree
Hide file tree
Showing 5 changed files with 470 additions and 24 deletions.
142 changes: 139 additions & 3 deletions packages/google-privacy-dlp/protos/google/privacy/dlp/v2/dlp.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018 Google LLC
// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -11,6 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

Expand Down Expand Up @@ -400,6 +401,59 @@ service DlpService {
}
}

// List of exclude infoTypes.
message ExcludeInfoTypes {
// InfoType list in ExclusionRule rule drops a finding when it overlaps or
// contained within with a finding of an infoType from this list. For
// example, for `InspectionRuleSet.info_types` containing "PHONE_NUMBER"` and
// `exclusion_rule` containing `exclude_info_types.info_types` with
// "EMAIL_ADDRESS" the phone number findings are dropped if they overlap
// with EMAIL_ADDRESS finding.
// That leads to "[email protected]" to generate only a single
// finding, namely email address.
repeated InfoType info_types = 1;
}

// The rule that specifies conditions when findings of infoTypes specified in
// `InspectionRuleSet` are removed from results.
message ExclusionRule {
oneof type {
// Dictionary which defines the rule.
CustomInfoType.Dictionary dictionary = 1;

// Regular expression which defines the rule.
CustomInfoType.Regex regex = 2;

// Set of infoTypes for which findings would affect this rule.
ExcludeInfoTypes exclude_info_types = 3;
}

// How the rule is applied, see MatchingType documentation for details.
MatchingType matching_type = 4;
}

// A single inspection rule to be applied to infoTypes, specified in
// `InspectionRuleSet`.
message InspectionRule {
oneof type {
// Hotword-based detection rule.
CustomInfoType.DetectionRule.HotwordRule hotword_rule = 1;

// Exclusion rule.
ExclusionRule exclusion_rule = 2;
}
}

// Rule set for modifying a set of infoTypes to alter behavior under certain
// circumstances, depending on the specific details of the rules within the set.
message InspectionRuleSet {
// List of infoTypes this rule set is applied to.
repeated InfoType info_types = 1;

// Set of rules to be applied to infoTypes. The rules are applied in order.
repeated InspectionRule rules = 2;
}

// Configuration description of the scanning process.
// When used with redactContent only info_types and min_likelihood are currently
// used.
Expand Down Expand Up @@ -468,6 +522,11 @@ message InspectConfig {
// List of options defining data content to scan.
// If empty, text, images, and other content will be included.
repeated ContentOption content_options = 8;

// Set of rules to apply to the findings for this InspectConfig.
// Exclusion rules, contained in the set are executed in the end, other
// rules are executed in the order they are specified for each info type.
repeated InspectionRuleSet rule_set = 10;
}

// Container for bytes to inspect or redact.
Expand Down Expand Up @@ -2335,6 +2394,21 @@ message ListInspectTemplatesRequest {
// Optional size of the page, can be limited by server. If zero server returns
// a page of max size 100.
int32 page_size = 3;

// Optional comma separated list of fields to order by,
// followed by `asc` or `desc` postfix. This list is case-insensitive,
// default sorting order is ascending, redundant space characters are
// insignificant.
//
// Example: `name asc,update_time, create_time desc`
//
// Supported fields are:
//
// - `create_time`: corresponds to time the template was created.
// - `update_time`: corresponds to time the template was last updated.
// - `name`: corresponds to template's name.
// - `display_name`: corresponds to template's display name.
string order_by = 4;
}

// Response message for ListInspectTemplates.
Expand Down Expand Up @@ -2433,9 +2507,11 @@ message ListJobTriggersRequest {
//
// Supported fields are:
//
// - `create_time`: corresponds to time the triggeredJob was created.
// - `update_time`: corresponds to time the triggeredJob was last updated.
// - `create_time`: corresponds to time the JobTrigger was created.
// - `update_time`: corresponds to time the JobTrigger was last updated.
// - `name`: corresponds to JobTrigger's name.
// - `display_name`: corresponds to JobTrigger's display name.
// - `status`: corresponds to JobTrigger's status.
string order_by = 4;
}

Expand Down Expand Up @@ -2646,6 +2722,21 @@ message ListDeidentifyTemplatesRequest {
// Optional size of the page, can be limited by server. If zero server returns
// a page of max size 100.
int32 page_size = 3;

// Optional comma separated list of fields to order by,
// followed by `asc` or `desc` postfix. This list is case-insensitive,
// default sorting order is ascending, redundant space characters are
// insignificant.
//
// Example: `name asc,update_time, create_time desc`
//
// Supported fields are:
//
// - `create_time`: corresponds to time the template was created.
// - `update_time`: corresponds to time the template was last updated.
// - `name`: corresponds to template's name.
// - `display_name`: corresponds to template's display name.
string order_by = 4;
}

// Response message for ListDeidentifyTemplates.
Expand Down Expand Up @@ -2798,6 +2889,22 @@ message ListStoredInfoTypesRequest {
// Optional size of the page, can be limited by server. If zero server returns
// a page of max size 100.
int32 page_size = 3;

// Optional comma separated list of fields to order by,
// followed by `asc` or `desc` postfix. This list is case-insensitive,
// default sorting order is ascending, redundant space characters are
// insignificant.
//
// Example: `name asc, display_name, create_time desc`
//
// Supported fields are:
//
// - `create_time`: corresponds to time the most recent version of the
// resource was created.
// - `state`: corresponds to the state of the resource.
// - `name`: corresponds to resource name.
// - `display_name`: corresponds to info type's display name.
string order_by = 4;
}

// Response message for ListStoredInfoTypes.
Expand Down Expand Up @@ -2830,6 +2937,35 @@ enum ContentOption {
CONTENT_IMAGE = 2;
}

// Type of the match which can be applied to different ways of matching, like
// Dictionary, regular expression and intersecting with findings of another
// info type.
enum MatchingType {
// Invalid.
MATCHING_TYPE_UNSPECIFIED = 0;

// Full match.
//
// - Dictionary: join of Dictionary results matched complete finding quote
// - Regex: all regex matches fill a finding quote start to end
// - Exclude info type: completely inside affecting info types findings
MATCHING_TYPE_FULL_MATCH = 1;

// Partial match.
//
// - Dictionary: at least one of the tokens in the finding matches
// - Regex: substring of the finding matches
// - Exclude info type: intersects with affecting info types findings
MATCHING_TYPE_PARTIAL_MATCH = 2;

// Inverse match.
//
// - Dictionary: no tokens in the finding match the dictionary
// - Regex: finding doesn't match the regex
// - Exclude info type: no intersection with affecting info types findings
MATCHING_TYPE_INVERSE_MATCH = 3;
}

// Parts of the APIs which use certain infoTypes.
enum InfoTypeSupportedBy {
ENUM_TYPE_UNSPECIFIED = 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018 Google LLC
// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -11,6 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

Expand Down Expand Up @@ -95,7 +96,9 @@ message CustomInfoType {

// Message defining a custom regular expression.
message Regex {
// Pattern defining the regular expression.
// Pattern defining the regular expression. Its syntax
// (https://github.com/google/re2/wiki/Syntax) can be found under the
// google/re2 repository on GitHub.
string pattern = 1;
}

Expand Down Expand Up @@ -170,8 +173,21 @@ message CustomInfoType {
}
}

// All CustomInfoTypes must have a name
// that does not conflict with built-in InfoTypes or other CustomInfoTypes.
enum ExclusionType {
// A finding of this custom info type will not be excluded from results.
EXCLUSION_TYPE_UNSPECIFIED = 0;

// A finding of this custom info type will be excluded from final results,
// but can still affect rule execution.
EXCLUSION_TYPE_EXCLUDE = 1;
}

// CustomInfoType can either be a new infoType, or an extension of built-in
// infoType, when the name matches one of existing infoTypes and that infoType
// is specified in `InspectContent.info_types` field. Specifying the latter
// adds findings to the one detected by the system. If built-in info type is
// not specified in `InspectContent.info_types` list then the name is treated
// as a custom info type.
InfoType info_type = 1;

// Likelihood to return for this CustomInfoType. This base value can be
Expand Down Expand Up @@ -199,6 +215,10 @@ message CustomInfoType {
// Rules are applied in order that they are specified. Not supported for the
// `surrogate_type` CustomInfoType.
repeated DetectionRule detection_rules = 7;

// If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
// to be returned. It still can be used for rules matching.
ExclusionType exclusion_type = 8;
}

// General identifier of a data field in a storage service.
Expand Down Expand Up @@ -237,13 +257,13 @@ message DatastoreOptions {
KindExpression kind = 2;
}

// Options defining a file or a set of files (path ending with *) within
// a Google Cloud Storage bucket.
// Options defining a file or a set of files within a Google Cloud Storage
// bucket.
message CloudStorageOptions {
// Set of files to scan.
message FileSet {
// The url, in the format `gs://<bucket>/<path>`. Trailing wildcard in the
// path is allowed.
// The Cloud Storage url of the file(s) to scan, in the format
// `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
string url = 1;
}

Expand All @@ -261,6 +281,7 @@ message CloudStorageOptions {
RANDOM_START = 2;
}

// The set of one or more files to scan.
FileSet file_set = 1;

// Max number of bytes to scan from a file. If a scanned file's size is bigger
Expand Down
Loading

0 comments on commit 187f935

Please sign in to comment.