From 7f531ae1a16cdfa2c9c6f561b92ec627cd37ea40 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Mon, 4 Sep 2017 23:01:00 -0700 Subject: [PATCH 01/11] UNTESTED: Add DLP BigQuery sample --- .../main/java/com/example/dlp/Inspect.java | 95 ++++++++++++++++++- .../test/java/com/example/dlp/InspectIT.java | 9 +- 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 03e3b616b09..7a5177d379b 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -332,6 +332,78 @@ private static void inspectDatastore( // [END dlp_inspect_datastore] } + private static void inspectBigquery( + String projectId, + String datasetId, + String tableId, + Likelihood minLikelihood, + List infoTypes) { + // [START dlp_inspect_datastore] + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // (Optional) The project ID to run the API call under + // projectId = my-project-id + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // datasetId = "my_dataset"; + + // The ID of the table to inspect, e.g. 'my_table' + // tableId = "my_table"; + + // The minimum likelihood required before returning a match + // minLikelihood = LIKELIHOOD_UNSPECIFIED; + + // The infoTypes of information to match + // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; + + // Reference to the Datastore namespace + TableReference tableReference = + TableReference.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build(); + + // Reference to the Datastore kind + BigqueryOptions bigqueryOptions = + BigqueryOptions.newBuilder().setKind(kindExpression).setPartitionId(partitionId).build(); + + // Construct Datastore configuration to be inspected + StorageConfig storageConfig = + StorageConfig.newBuilder().setTableReference(tableReference).build(); + + InspectConfig inspectConfig = + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .build(); + + // optionally provide an output configuration to store results, default : none + OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); + + // asynchronously submit an inspect operation + OperationFuture responseFuture = + dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); + + // ... + // block on response, returning job id of the operation + InspectOperationResult inspectOperationResult = responseFuture.get(); + ResultName resultName = inspectOperationResult.getNameAsResultName(); + InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); + + if (inspectResult.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : inspectResult.getFindingsList()) { + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); + } + } else { + System.out.println("No findings."); + } + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Error in inspectBigguery: " + e.getMessage()); + } + // [END dlp_inspect_datastore] + } + /** * Command line application to inspect data using the Data Loss Prevention API. * Supported data formats : string, file, text files on GCS and Datastore entities @@ -352,6 +424,9 @@ public static void main(String[] args) throws Exception { Option datastoreOption = new Option("ds", "Google Datastore", false, "inspect Datastore kind"); optionsGroup.addOption(datastoreOption); + Option bigqueryOption = new Option("bq", "Google BigQuery", false, "inspect BigQuery table"); + optionsGroup.addOption(bigqueryOption); + Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); @@ -377,9 +452,15 @@ public static void main(String[] args) throws Exception { Option gcsFileNameOption = Option.builder("fileName").hasArg(true).required(false).build(); commandLineOptions.addOption(gcsFileNameOption); - Option datastoreProjectIdOption = + Option datasetIdOption = Option.builder("datasetId").hasArg(true).required(false).build(); + commandLineOptions.addOption(datasetIdOption); + + Option tableIdOption = Option.builder("tableId").hasArg(true).required(false).build(); + commandLineOptions.addOption(tableIdOption); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); - commandLineOptions.addOption(datastoreProjectIdOption); + commandLineOptions.addOption(projectIdOption); Option datastoreNamespaceOption = Option.builder("namespace").hasArg(true).required(false).build(); @@ -436,8 +517,16 @@ public static void main(String[] args) throws Exception { // use default project id when project id is not specified String projectId = cmd.getOptionValue( - datastoreProjectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); inspectDatastore(projectId, namespaceId, kind, minLikelihood, infoTypesList); + } else if (cmd.hasOption("bq")) { + String datasetId = cmd.getOptionValue(datasetIdOption.getOpt()); + String tableId = cmd.getOptionValue(tableIdOption.getOpt()); + // use default project id when project id is not specified + String projectId = + cmd.getOptionValue( + projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + inspectBigquery(projectId, datasetId, tableId, minLikelihood, infoTypesList); } } } diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 788236a72fb..e4c2bfcf3b9 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -87,7 +87,7 @@ public void testGcsFileInspectionReturnsInfoTypes() throws Exception { assertTrue(output.contains("EMAIL_ADDRESS")); } - // Requires a Datastore kind containing an entity + // Requires a Datastore kind containing an entity // with phone number and email address properties. @Test public void testDatastoreInspectionReturnsInfoTypes() throws Exception { @@ -97,6 +97,13 @@ public void testDatastoreInspectionReturnsInfoTypes() throws Exception { assertTrue(output.contains("EMAIL_ADDRESS")); } + @Test + public void testBigqueryInspectionReturnsInfoTypes() throws Exception { + Inspect.main(new String[] {"-bq", "-dataset", "integration_tests_dlp", "-table", "harmful"}); + String output = bout.toString(); + assertTrue(output.contains("CREDIT_CARD_NUMBER")); + } + @After public void tearDown() { System.setOut(null); From eca2c8b361c07f8f9b20c060b7cca7dd80c3af5c Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Tue, 17 Oct 2017 23:18:32 -0700 Subject: [PATCH 02/11] Update DLP dependency --- dlp/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlp/pom.xml b/dlp/pom.xml index 5c5a86c0e71..5d9f75c3cd0 100644 --- a/dlp/pom.xml +++ b/dlp/pom.xml @@ -42,7 +42,7 @@ com.google.cloud google-cloud-dlp - 0.21.1-alpha + 0.26.0-alpha From 3c8a224944c10825fe9112cbba396cb8fb6fba9f Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Tue, 17 Oct 2017 23:24:59 -0700 Subject: [PATCH 03/11] Fix inspectBQ tests --- .../main/java/com/example/dlp/Inspect.java | 40 +++++-------------- .../test/java/com/example/dlp/InspectIT.java | 4 +- 2 files changed, 13 insertions(+), 31 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 7a5177d379b..235127cff71 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -20,24 +20,8 @@ import com.google.cloud.ServiceOptions; import com.google.cloud.dlp.v2beta1.DlpServiceClient; import com.google.longrunning.Operation; -import com.google.privacy.dlp.v2beta1.CloudStorageOptions; +import com.google.privacy.dlp.v2beta1.*; import com.google.privacy.dlp.v2beta1.CloudStorageOptions.FileSet; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.DatastoreOptions; -import com.google.privacy.dlp.v2beta1.Finding; -import com.google.privacy.dlp.v2beta1.InfoType; -import com.google.privacy.dlp.v2beta1.InspectConfig; -import com.google.privacy.dlp.v2beta1.InspectContentRequest; -import com.google.privacy.dlp.v2beta1.InspectContentResponse; -import com.google.privacy.dlp.v2beta1.InspectOperationMetadata; -import com.google.privacy.dlp.v2beta1.InspectOperationResult; -import com.google.privacy.dlp.v2beta1.InspectResult; -import com.google.privacy.dlp.v2beta1.KindExpression; -import com.google.privacy.dlp.v2beta1.Likelihood; -import com.google.privacy.dlp.v2beta1.OutputStorageConfig; -import com.google.privacy.dlp.v2beta1.PartitionId; -import com.google.privacy.dlp.v2beta1.ResultName; -import com.google.privacy.dlp.v2beta1.StorageConfig; import com.google.protobuf.ByteString; import java.net.URLConnection; import java.nio.file.Files; @@ -338,7 +322,7 @@ private static void inspectBigquery( String tableId, Likelihood minLikelihood, List infoTypes) { - // [START dlp_inspect_datastore] + // [START dlp_inspect_bigquery] // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -357,17 +341,15 @@ private static void inspectBigquery( // The infoTypes of information to match // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - // Reference to the Datastore namespace - TableReference tableReference = - TableReference.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build(); - - // Reference to the Datastore kind - BigqueryOptions bigqueryOptions = - BigqueryOptions.newBuilder().setKind(kindExpression).setPartitionId(partitionId).build(); + // Reference to the BigQuery table + BigQueryTable tableReference = + BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build(); + BigQueryOptions bigQueryOptions = + BigQueryOptions.newBuilder().setTableReference(tableReference).build(); - // Construct Datastore configuration to be inspected + // Construct BigQuery configuration to be inspected StorageConfig storageConfig = - StorageConfig.newBuilder().setTableReference(tableReference).build(); + StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() @@ -401,12 +383,12 @@ private static void inspectBigquery( e.printStackTrace(); System.out.println("Error in inspectBigguery: " + e.getMessage()); } - // [END dlp_inspect_datastore] + // [END dlp_inspect_bigquery] } /** * Command line application to inspect data using the Data Loss Prevention API. - * Supported data formats : string, file, text files on GCS and Datastore entities + * Supported data formats: strings, files, text files on GCS, BigQuery tables, and Datastore entities */ public static void main(String[] args) throws Exception { diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index e4c2bfcf3b9..2039839e5ce 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -99,9 +99,9 @@ public void testDatastoreInspectionReturnsInfoTypes() throws Exception { @Test public void testBigqueryInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] {"-bq", "-dataset", "integration_tests_dlp", "-table", "harmful"}); + Inspect.main(new String[] {"-bq", "-datasetId", "integration_tests_dlp", "-tableId", "harmful"}); String output = bout.toString(); - assertTrue(output.contains("CREDIT_CARD_NUMBER")); + assertTrue(output.contains("PHONE_NUMBER")); } @After From 2a6148bd4e45a077e5ff7b8824434881fb215ad5 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Tue, 17 Oct 2017 23:26:20 -0700 Subject: [PATCH 04/11] Add DeID + RiskAnalysis samples --- dlp/src/main/java/com/example/dlp/DeId.java | 237 ++++++++++ .../java/com/example/dlp/RiskAnalysis.java | 441 ++++++++++++++++++ dlp/src/test/java/com/example/dlp/DeIdIT.java | 77 +++ .../java/com/example/dlp/RiskAnalysisIT.java | 105 +++++ 4 files changed, 860 insertions(+) create mode 100644 dlp/src/main/java/com/example/dlp/DeId.java create mode 100644 dlp/src/main/java/com/example/dlp/RiskAnalysis.java create mode 100644 dlp/src/test/java/com/example/dlp/DeIdIT.java create mode 100644 dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java diff --git a/dlp/src/main/java/com/example/dlp/DeId.java b/dlp/src/main/java/com/example/dlp/DeId.java new file mode 100644 index 00000000000..31b16499d1b --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/DeId.java @@ -0,0 +1,237 @@ +/** + * Copyright 2017 Google Inc. + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.cloud.dlp.v2beta1.DlpServiceClient; +import com.google.common.io.BaseEncoding; +import com.google.privacy.dlp.v2beta1.*; +import com.google.privacy.dlp.v2beta1.InfoTypeTransformations.InfoTypeTransformation; +import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; +import com.google.protobuf.ByteString; +import org.apache.commons.cli.*; + +public class DeId { + + private static void deidentifyWithMask(String string, Character maskingCharacter, int numberToMask) { + // [START dlp_deidentify_mask] + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // The string to deidentify + // string = "My SSN is 372819127"; + + // (Optional) The maximum number of sensitive characters to mask in a match + // If omitted from the request or set to 0, the API will mask any matching characters + // numberToMask = 5; + + // (Optional) The character to mask matching sensitive data with + // maskingCharacter = 'x'; + + ContentItem contentItem = + ContentItem.newBuilder() + .setType("text/plain") + .setValue(string) + .build(); + + CharacterMaskConfig characterMaskConfig = + CharacterMaskConfig.newBuilder() + .setMaskingCharacter(maskingCharacter.toString()) + .setNumberToMask(numberToMask) + .build(); + + PrimitiveTransformation primitiveTransformation = + PrimitiveTransformation.newBuilder() + .setCharacterMaskConfig(characterMaskConfig) + .build(); + + InfoTypeTransformation infoTypeTransformationObject = + InfoTypeTransformation.newBuilder() + .setPrimitiveTransformation(primitiveTransformation) + .build(); + + InfoTypeTransformations infoTypeTransformationArray = + InfoTypeTransformations.newBuilder() + .addTransformations(infoTypeTransformationObject) + .build(); + + DeidentifyConfig deidentifyConfig = + DeidentifyConfig.newBuilder() + .setInfoTypeTransformations(infoTypeTransformationArray) + .build(); + + DeidentifyContentRequest request = + DeidentifyContentRequest.newBuilder() + .setDeidentifyConfig(deidentifyConfig) + .addItems(contentItem) + .build(); + + DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); + + for (ContentItem item : response.getItemsList()) { + System.out.println(item.getValue()); + } + } catch (Exception e) { + System.out.println("Error in deidentifyWithMask: " + e.getMessage()); + } + // [END dlp_deidentify_mask] + } + + private static void deidentifyWithFpe( + String string, CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey) { + // [START dlp_deidentify_fpe] + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // The string to deidentify + // string = "My SSN is 372819127"; + + // The set of characters to replace sensitive ones with + // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify#FfxCommonNativeAlphabet + // alphabet = FfxCommonNativeAlphabet.ALPHA_NUMERIC; + + // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key + // keyName = "projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME"; + + // The encrypted ('wrapped') AES-256 key to use + // This key should be encrypted using the Cloud KMS key specified above + // const wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY" + + ContentItem contentItem = + ContentItem.newBuilder() + .setType("text/plain") + .setValue(string) + .build(); + + KmsWrappedCryptoKey kmsWrappedCryptoKey = + KmsWrappedCryptoKey.newBuilder() + .setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))) + .setCryptoKeyName(keyName) + .build(); + + CryptoKey cryptoKey = + CryptoKey.newBuilder() + .setKmsWrapped(kmsWrappedCryptoKey) + .build(); + + CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig = + CryptoReplaceFfxFpeConfig.newBuilder() + .setCryptoKey(cryptoKey) + .setCommonAlphabet(alphabet) + .build(); + + PrimitiveTransformation primitiveTransformation = + PrimitiveTransformation.newBuilder() + .setCryptoReplaceFfxFpeConfig(cryptoReplaceFfxFpeConfig) + .build(); + + InfoTypeTransformation infoTypeTransformationObject = + InfoTypeTransformation.newBuilder() + .setPrimitiveTransformation(primitiveTransformation) + .build(); + + InfoTypeTransformations infoTypeTransformationArray = + InfoTypeTransformations.newBuilder() + .addTransformations(infoTypeTransformationObject) + .build(); + + DeidentifyConfig deidentifyConfig = + DeidentifyConfig.newBuilder() + .setInfoTypeTransformations(infoTypeTransformationArray) + .build(); + + DeidentifyContentRequest request = + DeidentifyContentRequest.newBuilder() + .setDeidentifyConfig(deidentifyConfig) + .addItems(contentItem) + .build(); + + DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); + + for (ContentItem item : response.getItemsList()) { + System.out.println(item.getValue()); + } + } catch (Exception e) { + System.out.println("Error in deidentifyWithFpe: " + e.getMessage()); + } + // [END dlp_deidentify_fpe] + } + + /** + * Command line application to de-identify data using the Data Loss Prevention API. + * Supported data format: strings + */ + public static void main(String[] args) throws Exception { + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + + Option deidentifyMaskingOption = new Option("m", "mask", true, "deid with character masking"); + optionsGroup.addOption(deidentifyMaskingOption); + + Option deidentifyFpeOption = new Option("f", "fpe", true, "deid with FFX FPE"); + optionsGroup.addOption(deidentifyFpeOption); + + Options commandLineOptions = new Options(); + commandLineOptions.addOptionGroup(optionsGroup); + + Option maskingCharacterOption = Option.builder("maskingCharacter").hasArg(true).required(false).build(); + commandLineOptions.addOption(maskingCharacterOption); + + Option numberToMaskOption = Option.builder("numberToMask").hasArg(true).required(false).build(); + commandLineOptions.addOption(numberToMaskOption); + + Option alphabetOption = Option.builder("commonAlphabet").hasArg(true).required(false).build(); + commandLineOptions.addOption(alphabetOption); + + Option wrappedKeyOption = Option.builder("wrappedKey").hasArg(true).required(false).build(); + commandLineOptions.addOption(wrappedKeyOption); + + Option keyNameOption = Option.builder("keyName").hasArg(true).required(false).build(); + commandLineOptions.addOption(keyNameOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(DeId.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + if (cmd.hasOption("m")) { + // deid with character masking + int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0")); + char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0); + String val = cmd.getOptionValue(deidentifyMaskingOption.getOpt()); + deidentifyWithMask(val, maskingCharacter, numberToMask); + } else if (cmd.hasOption("f")) { + // deid with FPE + String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); + String keyName = cmd.getOptionValue(keyNameOption.getOpt()); + String val = cmd.getOptionValue(deidentifyFpeOption.getOpt()); + FfxCommonNativeAlphabet alphabet = + FfxCommonNativeAlphabet.valueOf( + cmd.getOptionValue( + alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); + deidentifyWithFpe(val, alphabet, keyName, wrappedKey); + } + } +} diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java new file mode 100644 index 00000000000..d0009980a88 --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -0,0 +1,441 @@ +/** + * Copyright 2017 Google Inc. + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.api.gax.rpc.OperationFuture; +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2beta1.DlpServiceClient; +import com.google.longrunning.Operation; +import com.google.privacy.dlp.v2beta1.*; +import com.google.privacy.dlp.v2beta1.PrivacyMetric.NumericalStatsConfig; +import com.google.privacy.dlp.v2beta1.PrivacyMetric.CategoricalStatsConfig; +import com.google.privacy.dlp.v2beta1.PrivacyMetric.KAnonymityConfig; +import com.google.privacy.dlp.v2beta1.PrivacyMetric.LDiversityConfig; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.NumericalStatsResult; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityHistogramBucket; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityEquivalenceClass; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityHistogramBucket; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityEquivalenceClass; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.CategoricalStatsResult.CategoricalStatsHistogramBucket; +import org.apache.commons.cli.*; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +public class RiskAnalysis { + + private static void numericalStatsAnalysis( + String projectId, String datasetId, String tableId, String columnName) + throws Exception { + // [START dlp_numerical_stats_analysis] + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // (Optional) The project ID to run the API call under + // projectId = process.env.GCLOUD_PROJECT; + + // The ID of the dataset to inspect, e.g. "my_dataset" + // datasetId = "my_dataset"; + + // The ID of the table to inspect, e.g. "my_table" + // tableId = "my_table"; + + // The name of the column to compute risk metrics for, e.g. 'firstName' + // columnName = "firstName"; + + FieldId fieldId = + FieldId.newBuilder() + .setColumnName(columnName) + .build(); + + NumericalStatsConfig numericalStatsConfig = + NumericalStatsConfig.newBuilder() + .setField(fieldId) + .build(); + + BigQueryTable bigQueryTable = + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); + + PrivacyMetric privacyMetric = + PrivacyMetric.newBuilder() + .setNumericalStatsConfig(numericalStatsConfig) + .build(); + + AnalyzeDataSourceRiskRequest request = + AnalyzeDataSourceRiskRequest.newBuilder() + .setPrivacyMetric(privacyMetric) + .setSourceTable(bigQueryTable) + .build(); + + // asynchronously submit a risk analysis operation + OperationFuture responseFuture = + dlpServiceClient.analyzeDataSourceRiskAsync(request); + + // ... + // block on response + RiskAnalysisOperationResult response = responseFuture.get(); + NumericalStatsResult results = + response.getNumericalStatsResult(); + + System.out.println("Value range: [" + results.getMaxValue() + ", " + results.getMinValue() + "]"); + + // Print out unique quantiles + String previousValue = ""; + for (int i = 0; i < results.getQuantileValuesCount(); i++) { + Value valueObj = results.getQuantileValues(i); + String value = valueObj.toString(); + + if (!previousValue.equals(value)) { + System.out.println("Value at " + i + "% quantile: " + value.toString()); + previousValue = value; + } + } + } catch (Exception e) { + System.out.println("Error in numericalStatsAnalysis: " + e.getMessage()); + } + // [END dlp_numerical_stats_analysis] + } + + private static void categoricalStatsAnalysis( + String projectId, String datasetId, String tableId, String columnName) + throws Exception { + // [START dlp_categorical_stats_analysis] + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // (Optional) The project ID to run the API call under + // projectId = process.env.GCLOUD_PROJECT; + + // The ID of the dataset to inspect, e.g. "my_dataset" + // datasetId = "my_dataset"; + + // The ID of the table to inspect, e.g. "my_table" + // tableId = "my_table"; + + // The name of the column to compute risk metrics for, e.g. 'firstName' + // columnName = "firstName"; + + FieldId fieldId = + FieldId.newBuilder() + .setColumnName(columnName) + .build(); + + CategoricalStatsConfig categoricalStatsConfig = + CategoricalStatsConfig.newBuilder() + .setField(fieldId) + .build(); + + BigQueryTable bigQueryTable = + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); + + PrivacyMetric privacyMetric = + PrivacyMetric.newBuilder() + .setCategoricalStatsConfig(categoricalStatsConfig) + .build(); + + AnalyzeDataSourceRiskRequest request = + AnalyzeDataSourceRiskRequest.newBuilder() + .setPrivacyMetric(privacyMetric) + .setSourceTable(bigQueryTable) + .build(); + + // asynchronously submit a risk analysis operation + OperationFuture responseFuture = + dlpServiceClient.analyzeDataSourceRiskAsync(request); + + // ... + // block on response + RiskAnalysisOperationResult response = responseFuture.get(); + CategoricalStatsHistogramBucket results = + response.getCategoricalStatsResult().getValueFrequencyHistogramBuckets(0); + + System.out.println("Most common value occurs " + results.getValueFrequencyUpperBound() + " time(s)"); + System.out.println("Least common value occurs " + results.getValueFrequencyLowerBound() + " time(s)"); + + for (ValueFrequency valueFrequency : results.getBucketValuesList()) { + System.out.println("Value " + + valueFrequency.getValue().toString() + + " occurs " + + valueFrequency.getCount() + + " time(s)."); + } + + } catch (Exception e) { + System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage()); + } + // [END dlp_categorical_stats_analysis] + } + + private static void kAnonymityAnalysis( + String projectId, String datasetId, String tableId, List quasiIds) + throws Exception { + // [START dlp_k_anonymity] + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // (Optional) The project ID to run the API call under + // projectId = process.env.GCLOUD_PROJECT; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // tableId = 'my_table'; + + // A set of columns that form a composite key ('quasi-identifiers') + // quasiIds = [{ columnName: 'age' }, { columnName: 'city' }]; + + List quasiIdFields = + quasiIds + .stream() + .map(columnName -> FieldId.newBuilder().setColumnName(columnName).build()) + .collect(Collectors.toList()); + + KAnonymityConfig kAnonymityConfig = + KAnonymityConfig.newBuilder() + .addAllQuasiIds(quasiIdFields) + .build(); + + BigQueryTable bigQueryTable = + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); + + PrivacyMetric privacyMetric = + PrivacyMetric.newBuilder() + .setKAnonymityConfig(kAnonymityConfig) + .build(); + + AnalyzeDataSourceRiskRequest request = + AnalyzeDataSourceRiskRequest.newBuilder() + .setPrivacyMetric(privacyMetric) + .setSourceTable(bigQueryTable) + .build(); + + // asynchronously submit a risk analysis operation + OperationFuture responseFuture = + dlpServiceClient.analyzeDataSourceRiskAsync(request); + + // ... + // block on response + RiskAnalysisOperationResult response = responseFuture.get(); + KAnonymityHistogramBucket results = + response.getKAnonymityResult().getEquivalenceClassHistogramBuckets(0); + + System.out.println("Bucket size range: [" + + results.getEquivalenceClassSizeLowerBound() + + ", " + + results.getEquivalenceClassSizeUpperBound() + + "]" + ); + + for (KAnonymityEquivalenceClass bucket : results.getBucketValuesList()) { + List quasiIdValues = bucket.getQuasiIdsValuesList() + .stream() + .map(v -> v.toString()) + .collect(Collectors.toList()); + + System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); + System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); + } + } catch (Exception e) { + System.out.println("Error in kAnonymityAnalysis: " + e.getMessage()); + } + // [END dlp_k_anonymity] + } + + private static void lDiversityAnalysis( + String projectId, String datasetId, String tableId, String sensitiveAttribute, List quasiIds) + throws Exception { + // [START dlp_l_diversity] + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // (Optional) The project ID to run the API call under + // projectId = process.env.GCLOUD_PROJECT; + + // The ID of the dataset to inspect, e.g. "my_dataset" + // datasetId = "my_dataset"; + + // The ID of the table to inspect, e.g. "my_table" + // tableId = "my_table"; + + // The column to measure l-diversity relative to, e.g. "firstName" + // sensitiveAttribute = "name"; + + // A set of columns that form a composite key ('quasi-identifiers') + // quasiIds = [{ columnName: "age" }, { columnName: "city" }]; + + FieldId sensitiveAttributeField = FieldId.newBuilder().setColumnName(sensitiveAttribute).build(); + + List quasiIdFields = + quasiIds + .stream() + .map(columnName -> FieldId.newBuilder().setColumnName(columnName).build()) + .collect(Collectors.toList()); + + LDiversityConfig lDiversityConfig = + LDiversityConfig.newBuilder() + .addAllQuasiIds(quasiIdFields) + .setSensitiveAttribute(sensitiveAttributeField) + .build(); + + BigQueryTable bigQueryTable = + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); + + PrivacyMetric privacyMetric = + PrivacyMetric.newBuilder() + .setLDiversityConfig(lDiversityConfig) + .build(); + + AnalyzeDataSourceRiskRequest request = + AnalyzeDataSourceRiskRequest.newBuilder() + .setPrivacyMetric(privacyMetric) + .setSourceTable(bigQueryTable) + .build(); + + // asynchronously submit a risk analysis operation + OperationFuture responseFuture = + dlpServiceClient.analyzeDataSourceRiskAsync(request); + + // ... + // block on response + RiskAnalysisOperationResult response = responseFuture.get(); + LDiversityHistogramBucket results = + response.getLDiversityResult().getSensitiveValueFrequencyHistogramBuckets(0); + + for (LDiversityEquivalenceClass bucket : results.getBucketValuesList()) { + List quasiIdValues = bucket.getQuasiIdsValuesList() + .stream() + .map(v -> v.toString()) + .collect(Collectors.toList()); + + System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); + System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); + + for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) { + System.out.println("\t\tSensitive value " + + valueFrequency.getValue().toString() + + " occurs " + + valueFrequency.getCount() + + " time(s)."); + } + } + } catch (Exception e) { + System.out.println("Error in lDiversityAnalysis: " + e.getMessage()); + } + // [END dlp_l_diversity] + } + + + /** + * Command line application to perform risk analysis using the Data Loss Prevention API. + * Supported data format: BigQuery tables + */ + public static void main(String[] args) throws Exception { + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + + Option numericalAnalysisOption = new Option("n", "numerical"); + optionsGroup.addOption(numericalAnalysisOption); + + Option categoricalAnalysisOption = new Option("c", "categorical"); + optionsGroup.addOption(categoricalAnalysisOption); + + Option kAnonymityOption = new Option("k", "kAnonymity"); + optionsGroup.addOption(kAnonymityOption); + + Option lDiversityOption = new Option("l", "lDiversity"); + optionsGroup.addOption(lDiversityOption); + + Options commandLineOptions = new Options(); + commandLineOptions.addOptionGroup(optionsGroup); + + Option datasetIdOption = Option.builder("datasetId").hasArg(true).required(false).build(); + commandLineOptions.addOption(datasetIdOption); + + Option tableIdOption = Option.builder("tableId").hasArg(true).required(false).build(); + commandLineOptions.addOption(tableIdOption); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + + Option columnNameOption = Option.builder("columnName").hasArg(true).required(false).build(); + commandLineOptions.addOption(columnNameOption); + + Option sensitiveAttributeOption = Option.builder("sensitiveAttribute").hasArg(true).required(false).build(); + commandLineOptions.addOption(sensitiveAttributeOption); + + Option quasiIdColumnNamesOption = Option.builder("quasiIdColumnNames").hasArg(true).required(false).build(); + commandLineOptions.addOption(quasiIdColumnNamesOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(RiskAnalysis.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + String datasetId = cmd.getOptionValue(datasetIdOption.getOpt()); + String tableId = cmd.getOptionValue(tableIdOption.getOpt()); + // use default project id when project id is not specified + String projectId = + cmd.getOptionValue( + projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + + if (cmd.hasOption("n")) { + // numerical stats analysis + String columnName = cmd.getOptionValue(columnNameOption.getOpt()); + numericalStatsAnalysis(projectId, datasetId, tableId, columnName); + } else if (cmd.hasOption("c")) { + // categorical stats analysis + String columnName = cmd.getOptionValue(columnNameOption.getOpt()); + categoricalStatsAnalysis(projectId, datasetId, tableId, columnName); + } else if (cmd.hasOption("k")) { + // k-anonymity analysis + List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); + kAnonymityAnalysis(projectId, datasetId, tableId, quasiIdColumnNames); + } else if (cmd.hasOption("l")) { + // l-diversity analysis + String sensitiveAttribute = cmd.getOptionValue(sensitiveAttributeOption.getOpt()); + List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); + lDiversityAnalysis(projectId, datasetId, tableId, sensitiveAttribute, quasiIdColumnNames); + } + } +} diff --git a/dlp/src/test/java/com/example/dlp/DeIdIT.java b/dlp/src/test/java/com/example/dlp/DeIdIT.java new file mode 100644 index 00000000000..18f792e9f08 --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/DeIdIT.java @@ -0,0 +1,77 @@ +/** + * Copyright 2017 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import jdk.nashorn.internal.runtime.regexp.RegExpMatcher; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.junit.Assert.*; + +@RunWith(JUnit4.class) +public class DeIdIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + // Update to wrapped local encryption key + private String wrappedKey = System.getenv("DLP_DEID_WRAPPED_KEY"); + + // Update to name of KMS key used to wrap local encryption key + private String keyName = System.getenv("DLP_DEID_KEY_NAME"); + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + assertNotNull(System.getenv("DLP_DEID_WRAPPED_KEY")); + assertNotNull(System.getenv("DLP_DEID_KEY_NAME")); + } + + @Test + public void testDeidStringMasksCharacters() throws Exception { + String text = "\"My SSN is 372819127\""; + DeId.main(new String[] {"-m", text, "-maskingCharacter", "x", "-numberToMask", "5"}); + String output = bout.toString(); + assertEquals(output, "My SSN is xxxxx9127\n"); + } + + @Test + public void testDeidStringPerformsFpe() throws Exception { + String text = "\"My SSN is 372819127\""; + DeId.main(new String[] {"-f", text, "-wrappedKey", wrappedKey, "-keyName", keyName}); + String output = bout.toString(); + assertFalse(output.contains(text)); + assertTrue(Pattern.compile("My SSN is \\w+").matcher(output).find()); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } +} diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java new file mode 100644 index 00000000000..498dab2a556 --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -0,0 +1,105 @@ +/** + * Copyright 2017 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.regex.Pattern; + +import static org.junit.Assert.*; + +@RunWith(JUnit4.class) +public class RiskAnalysisIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + assertNotNull(System.getenv("DLP_DEID_WRAPPED_KEY")); + assertNotNull(System.getenv("DLP_DEID_KEY_NAME")); + } + + @Test + public void testNumericalStats() throws Exception { + RiskAnalysis.main(new String[] { + "-n", + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-columnName", "Age" + }); + String output = bout.toString(); + assertTrue(Pattern.compile("Value at 0% quantile: integer_value: \\d{2}").matcher(output).find()); + assertTrue(Pattern.compile("Value at \\d{2}% quantile: integer_value: \\d{2}").matcher(output).find()); + } + + @Test + public void testCategoricalStats() throws Exception { + RiskAnalysis.main(new String[] { + "-c", + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-columnName", "Mystery" + }); + String output = bout.toString(); + assertTrue(Pattern.compile("Most common value occurs \\d time\\(s\\)").matcher(output).find()); + } + + @Test + public void testKAnonymity() throws Exception { + RiskAnalysis.main(new String[] { + "-k", + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-quasiIdColumnNames", "Age", "Mystery" + }); + String output = bout.toString(); + assertTrue(Pattern.compile("Bucket size range: \\[\\d, \\d\\]").matcher(output).find()); + assertTrue(output.contains("Quasi-ID values: integer_value: 19")); + assertTrue(output.contains("Class size: 1")); + } + + @Test + public void testLDiversity() throws Exception { + RiskAnalysis.main(new String[] { + "-l", + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-sensitiveAttribute", "Name", + "-quasiIdColumnNames", "Age", "Mystery" + }); + String output = bout.toString(); + assertTrue(output.contains("Quasi-ID values: integer_value: 19")); + assertTrue(output.contains("Class size: 1")); + assertTrue(output.contains("Sensitive value string_value: \"James\"")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } +} From 7e439bca890aa57d13ce4615e1ac4cc3de7f051a Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Wed, 18 Oct 2017 11:38:58 -0700 Subject: [PATCH 05/11] Address comments: clean up imports, document DeId requests, other misc fixes --- .../dlp/{DeId.java => DeIdentification.java} | 49 ++++++++++++++----- .../main/java/com/example/dlp/Inspect.java | 20 +++++++- .../java/com/example/dlp/RiskAnalysis.java | 18 ++++++- .../{DeIdIT.java => DeIdentificationIT.java} | 14 +++--- .../java/com/example/dlp/RiskAnalysisIT.java | 3 +- 5 files changed, 80 insertions(+), 24 deletions(-) rename dlp/src/main/java/com/example/dlp/{DeId.java => DeIdentification.java} (82%) rename dlp/src/test/java/com/example/dlp/{DeIdIT.java => DeIdentificationIT.java} (83%) diff --git a/dlp/src/main/java/com/example/dlp/DeId.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java similarity index 82% rename from dlp/src/main/java/com/example/dlp/DeId.java rename to dlp/src/main/java/com/example/dlp/DeIdentification.java index 31b16499d1b..99f08d22535 100644 --- a/dlp/src/main/java/com/example/dlp/DeId.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -18,15 +18,31 @@ import com.google.cloud.dlp.v2beta1.DlpServiceClient; import com.google.common.io.BaseEncoding; -import com.google.privacy.dlp.v2beta1.*; +import com.google.privacy.dlp.v2beta1.CharacterMaskConfig; +import com.google.privacy.dlp.v2beta1.ContentItem; +import com.google.privacy.dlp.v2beta1.CryptoKey; +import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig; +import com.google.privacy.dlp.v2beta1.DeidentifyConfig; +import com.google.privacy.dlp.v2beta1.DeidentifyContentRequest; +import com.google.privacy.dlp.v2beta1.DeidentifyContentResponse; +import com.google.privacy.dlp.v2beta1.InfoTypeTransformations; import com.google.privacy.dlp.v2beta1.InfoTypeTransformations.InfoTypeTransformation; import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; +import com.google.privacy.dlp.v2beta1.KmsWrappedCryptoKey; +import com.google.privacy.dlp.v2beta1.PrimitiveTransformation; import com.google.protobuf.ByteString; -import org.apache.commons.cli.*; - -public class DeId { - - private static void deidentifyWithMask(String string, Character maskingCharacter, int numberToMask) { +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class DeIdentification { + + private static void deIdentifyWithMask(String string, Character maskingCharacter, int numberToMask) { // [START dlp_deidentify_mask] // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -53,6 +69,7 @@ private static void deidentifyWithMask(String string, Character maskingCharacter .setNumberToMask(numberToMask) .build(); + // Create the deidentification transformation configuration PrimitiveTransformation primitiveTransformation = PrimitiveTransformation.newBuilder() .setCharacterMaskConfig(characterMaskConfig) @@ -68,6 +85,7 @@ private static void deidentifyWithMask(String string, Character maskingCharacter .addTransformations(infoTypeTransformationObject) .build(); + // Create the deidentification request object DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder() .setInfoTypeTransformations(infoTypeTransformationArray) @@ -79,6 +97,7 @@ private static void deidentifyWithMask(String string, Character maskingCharacter .addItems(contentItem) .build(); + // Execute the deidentification request DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); for (ContentItem item : response.getItemsList()) { @@ -90,8 +109,8 @@ private static void deidentifyWithMask(String string, Character maskingCharacter // [END dlp_deidentify_mask] } - private static void deidentifyWithFpe( - String string, CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey) { + private static void deIdentifyWithFpe( + String string, FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey) { // [START dlp_deidentify_fpe] // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -116,6 +135,7 @@ private static void deidentifyWithFpe( .setValue(string) .build(); + // Create the format-preserving encryption (FPE) configuration KmsWrappedCryptoKey kmsWrappedCryptoKey = KmsWrappedCryptoKey.newBuilder() .setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))) @@ -133,6 +153,7 @@ private static void deidentifyWithFpe( .setCommonAlphabet(alphabet) .build(); + // Create the deidentification transformation configuration PrimitiveTransformation primitiveTransformation = PrimitiveTransformation.newBuilder() .setCryptoReplaceFfxFpeConfig(cryptoReplaceFfxFpeConfig) @@ -148,6 +169,7 @@ private static void deidentifyWithFpe( .addTransformations(infoTypeTransformationObject) .build(); + // Create the deidentification request object DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder() .setInfoTypeTransformations(infoTypeTransformationArray) @@ -159,6 +181,7 @@ private static void deidentifyWithFpe( .addItems(contentItem) .build(); + // Execute the deidentification request DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); for (ContentItem item : response.getItemsList()) { @@ -211,19 +234,19 @@ public static void main(String[] args) throws Exception { cmd = parser.parse(commandLineOptions, args); } catch (ParseException e) { System.out.println(e.getMessage()); - formatter.printHelp(DeId.class.getName(), commandLineOptions); + formatter.printHelp(DeIdentification.class.getName(), commandLineOptions); System.exit(1); return; } if (cmd.hasOption("m")) { - // deid with character masking + // deidentification with character masking int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0")); char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0); String val = cmd.getOptionValue(deidentifyMaskingOption.getOpt()); - deidentifyWithMask(val, maskingCharacter, numberToMask); + deIdentifyWithMask(val, maskingCharacter, numberToMask); } else if (cmd.hasOption("f")) { - // deid with FPE + // deidentification with FPE String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); String keyName = cmd.getOptionValue(keyNameOption.getOpt()); String val = cmd.getOptionValue(deidentifyFpeOption.getOpt()); @@ -231,7 +254,7 @@ public static void main(String[] args) throws Exception { FfxCommonNativeAlphabet.valueOf( cmd.getOptionValue( alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); - deidentifyWithFpe(val, alphabet, keyName, wrappedKey); + deIdentifyWithFpe(val, alphabet, keyName, wrappedKey); } } } diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 235127cff71..1d3a321f853 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -20,8 +20,26 @@ import com.google.cloud.ServiceOptions; import com.google.cloud.dlp.v2beta1.DlpServiceClient; import com.google.longrunning.Operation; -import com.google.privacy.dlp.v2beta1.*; +import com.google.privacy.dlp.v2beta1.BigQueryOptions; +import com.google.privacy.dlp.v2beta1.BigQueryTable; +import com.google.privacy.dlp.v2beta1.CloudStorageOptions; import com.google.privacy.dlp.v2beta1.CloudStorageOptions.FileSet; +import com.google.privacy.dlp.v2beta1.ContentItem; +import com.google.privacy.dlp.v2beta1.DatastoreOptions; +import com.google.privacy.dlp.v2beta1.Finding; +import com.google.privacy.dlp.v2beta1.InfoType; +import com.google.privacy.dlp.v2beta1.InspectConfig; +import com.google.privacy.dlp.v2beta1.InspectContentRequest; +import com.google.privacy.dlp.v2beta1.InspectContentResponse; +import com.google.privacy.dlp.v2beta1.InspectOperationMetadata; +import com.google.privacy.dlp.v2beta1.InspectOperationResult; +import com.google.privacy.dlp.v2beta1.InspectResult; +import com.google.privacy.dlp.v2beta1.KindExpression; +import com.google.privacy.dlp.v2beta1.Likelihood; +import com.google.privacy.dlp.v2beta1.OutputStorageConfig; +import com.google.privacy.dlp.v2beta1.PartitionId; +import com.google.privacy.dlp.v2beta1.ResultName; +import com.google.privacy.dlp.v2beta1.StorageConfig; import com.google.protobuf.ByteString; import java.net.URLConnection; import java.nio.file.Files; diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index d0009980a88..293be8158ce 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -20,18 +20,32 @@ import com.google.cloud.ServiceOptions; import com.google.cloud.dlp.v2beta1.DlpServiceClient; import com.google.longrunning.Operation; -import com.google.privacy.dlp.v2beta1.*; +import com.google.privacy.dlp.v2beta1.AnalyzeDataSourceRiskRequest; +import com.google.privacy.dlp.v2beta1.BigQueryTable; +import com.google.privacy.dlp.v2beta1.FieldId; +import com.google.privacy.dlp.v2beta1.PrivacyMetric; import com.google.privacy.dlp.v2beta1.PrivacyMetric.NumericalStatsConfig; import com.google.privacy.dlp.v2beta1.PrivacyMetric.CategoricalStatsConfig; import com.google.privacy.dlp.v2beta1.PrivacyMetric.KAnonymityConfig; import com.google.privacy.dlp.v2beta1.PrivacyMetric.LDiversityConfig; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationMetadata; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.NumericalStatsResult; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityHistogramBucket; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityEquivalenceClass; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityHistogramBucket; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityEquivalenceClass; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.CategoricalStatsResult.CategoricalStatsHistogramBucket; -import org.apache.commons.cli.*; +import com.google.privacy.dlp.v2beta1.Value; +import com.google.privacy.dlp.v2beta1.ValueFrequency; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; import java.util.Arrays; import java.util.List; diff --git a/dlp/src/test/java/com/example/dlp/DeIdIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java similarity index 83% rename from dlp/src/test/java/com/example/dlp/DeIdIT.java rename to dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index 18f792e9f08..9d8813504fc 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -16,7 +16,6 @@ package com.example.dlp; -import jdk.nashorn.internal.runtime.regexp.RegExpMatcher; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -24,15 +23,16 @@ import org.junit.runners.JUnit4; import java.io.ByteArrayOutputStream; -import java.io.File; import java.io.PrintStream; -import java.util.regex.Matcher; import java.util.regex.Pattern; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; @RunWith(JUnit4.class) -public class DeIdIT { +public class DeIdentificationIT { private ByteArrayOutputStream bout; private PrintStream out; @@ -55,7 +55,7 @@ public void setUp() { @Test public void testDeidStringMasksCharacters() throws Exception { String text = "\"My SSN is 372819127\""; - DeId.main(new String[] {"-m", text, "-maskingCharacter", "x", "-numberToMask", "5"}); + DeIdentification.main(new String[] {"-m", text, "-maskingCharacter", "x", "-numberToMask", "5"}); String output = bout.toString(); assertEquals(output, "My SSN is xxxxx9127\n"); } @@ -63,7 +63,7 @@ public void testDeidStringMasksCharacters() throws Exception { @Test public void testDeidStringPerformsFpe() throws Exception { String text = "\"My SSN is 372819127\""; - DeId.main(new String[] {"-f", text, "-wrappedKey", wrappedKey, "-keyName", keyName}); + DeIdentification.main(new String[] {"-f", text, "-wrappedKey", wrappedKey, "-keyName", keyName}); String output = bout.toString(); assertFalse(output.contains(text)); assertTrue(Pattern.compile("My SSN is \\w+").matcher(output).find()); diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index 498dab2a556..69b152669da 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -26,7 +26,8 @@ import java.io.PrintStream; import java.util.regex.Pattern; -import static org.junit.Assert.*; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; @RunWith(JUnit4.class) public class RiskAnalysisIT { From 18038cb00c8b992523d660d5bd5739c361ea3ec7 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Wed, 18 Oct 2017 11:45:15 -0700 Subject: [PATCH 06/11] Add missing comment --- dlp/src/main/java/com/example/dlp/DeIdentification.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 99f08d22535..c9336866723 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -100,6 +100,8 @@ private static void deIdentifyWithMask(String string, Character maskingCharacter // Execute the deidentification request DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); + // Print the character-masked input value + // e.g. "My SSN is 123456789" --> "My SSN is *********" for (ContentItem item : response.getItemsList()) { System.out.println(item.getValue()); } @@ -184,6 +186,8 @@ private static void deIdentifyWithFpe( // Execute the deidentification request DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); + // Print the deidentified input value + // e.g. "My SSN is 123456789" --> "My SSN is 7261298621" for (ContentItem item : response.getItemsList()) { System.out.println(item.getValue()); } From f7ba95ee82eb83b3d25d6bf4b2809fe9f5abdea1 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Wed, 18 Oct 2017 14:39:16 -0700 Subject: [PATCH 07/11] Add env vars to README.md --- dlp/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dlp/README.md b/dlp/README.md index 2c976b5a73f..37180819cdc 100644 --- a/dlp/README.md +++ b/dlp/README.md @@ -6,8 +6,10 @@ a powerful detection engine for personally identifiable information and other pr ## Setup - A Google Cloud project with billing enabled - [Enable](https://console.cloud.google.com/launcher/details/google/dlp.googleapis.com) the DLP API. -- (Local testing)[Create a service account](https://cloud.google.com/docs/authentication/getting-started) +- (Local testing) [Create a service account](https://cloud.google.com/docs/authentication/getting-started) and set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable pointing to the downloaded credentials file. +- (Local testing) Set the `DLP_DEID_WRAPPED_KEY` environment variable to an AES-256 key encrypted ('wrapped') [with a Cloud Key Management Service (KMS) key](https://cloud.google.com/kms/docs/encrypt-decrypt). +- (Local testing) Set the `DLP_DEID_KEY_NAME` environment variable to the path-name of the Cloud KMS key you wrapped `DLP_DEID_WRAPPED_KEY` with. ## Build This project uses the [Assembly Plugin](https://maven.apache.org/plugins/maven-assembly-plugin/usage.html) to build an uber jar. From bd6eb957ef7437a7d2abd0a362b191e25976f4a3 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Wed, 18 Oct 2017 14:41:30 -0700 Subject: [PATCH 08/11] Switch to Javadoc-style comments --- .../com/example/dlp/DeIdentification.java | 34 +++++----- .../java/com/example/dlp/RiskAnalysis.java | 64 ++++++++++--------- 2 files changed, 52 insertions(+), 46 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index c9336866723..b5715a1c994 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -44,17 +44,19 @@ public class DeIdentification { private static void deIdentifyWithMask(String string, Character maskingCharacter, int numberToMask) { // [START dlp_deidentify_mask] + /** + * Deidentify a string by masking sensitive information with a character using the DLP API. + * @param string The string to deidentify. + * @param maskingCharacter (Optional) The character to mask sensitive data with. + * @param numberToMask (Optional) The number of characters' worth of sensitive data to mask. + * Omitting this value or setting it to 0 will mask all sensitive characters. + */ + // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // The string to deidentify // string = "My SSN is 372819127"; - - // (Optional) The maximum number of sensitive characters to mask in a match - // If omitted from the request or set to 0, the API will mask any matching characters // numberToMask = 5; - - // (Optional) The character to mask matching sensitive data with // maskingCharacter = 'x'; ContentItem contentItem = @@ -114,22 +116,22 @@ private static void deIdentifyWithMask(String string, Character maskingCharacter private static void deIdentifyWithFpe( String string, FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey) { // [START dlp_deidentify_fpe] + /** + * Deidentify a string by encrypting sensitive information while preserving format using the DLP API. + * @param string The string to deidentify. + * @param alphabet The set of characters to use when encrypting the input. For more information, see + * cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify#FfxCommonNativeAlphabet + * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. + * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. + */ + // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // The string to deidentify // string = "My SSN is 372819127"; - - // The set of characters to replace sensitive ones with - // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify#FfxCommonNativeAlphabet // alphabet = FfxCommonNativeAlphabet.ALPHA_NUMERIC; - - // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key // keyName = "projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME"; - - // The encrypted ('wrapped') AES-256 key to use - // This key should be encrypted using the Cloud KMS key specified above - // const wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY" + // wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY" ContentItem contentItem = ContentItem.newBuilder() diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index 293be8158ce..f050ffcae92 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -57,19 +57,21 @@ private static void numericalStatsAnalysis( String projectId, String datasetId, String tableId, String columnName) throws Exception { // [START dlp_numerical_stats_analysis] + + /** + * Calculate numerical statistics for a column in a BigQuery table using the DLP API. + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param columnName The name of the column to analyze. This column must contain only numerical data. + */ + // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // (Optional) The project ID to run the API call under // projectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. "my_dataset" // datasetId = "my_dataset"; - - // The ID of the table to inspect, e.g. "my_table" // tableId = "my_table"; - - // The name of the column to compute risk metrics for, e.g. 'firstName' // columnName = "firstName"; FieldId fieldId = @@ -133,19 +135,20 @@ private static void categoricalStatsAnalysis( String projectId, String datasetId, String tableId, String columnName) throws Exception { // [START dlp_categorical_stats_analysis] + /** + * Calculate categorical statistics for a column in a BigQuery table using the DLP API. + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param columnName The name of the column to analyze. This column can contain numerical or non-numerical data. + */ + // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // (Optional) The project ID to run the API call under // projectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. "my_dataset" // datasetId = "my_dataset"; - - // The ID of the table to inspect, e.g. "my_table" // tableId = "my_table"; - - // The name of the column to compute risk metrics for, e.g. 'firstName' // columnName = "firstName"; FieldId fieldId = @@ -207,19 +210,20 @@ private static void kAnonymityAnalysis( String projectId, String datasetId, String tableId, List quasiIds) throws Exception { // [START dlp_k_anonymity] + /** + * Calculate k-anonymity for quasi-identifiers in a BigQuery table using the DLP API. + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param quasiIds The names of a set of columns that form a composite key ('quasi-identifiers'). + */ + // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // (Optional) The project ID to run the API call under // projectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. 'my_dataset' // datasetId = 'my_dataset'; - - // The ID of the table to inspect, e.g. 'my_table' // tableId = 'my_table'; - - // A set of columns that form a composite key ('quasi-identifiers') // quasiIds = [{ columnName: 'age' }, { columnName: 'city' }]; List quasiIdFields = @@ -287,22 +291,22 @@ private static void lDiversityAnalysis( String projectId, String datasetId, String tableId, String sensitiveAttribute, List quasiIds) throws Exception { // [START dlp_l_diversity] + /** + * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table using the DLP API. + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against + * @param quasiIds The names of a set of columns that form a composite key ('quasi-identifiers'). + */ + // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // (Optional) The project ID to run the API call under // projectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. "my_dataset" // datasetId = "my_dataset"; - - // The ID of the table to inspect, e.g. "my_table" // tableId = "my_table"; - - // The column to measure l-diversity relative to, e.g. "firstName" // sensitiveAttribute = "name"; - - // A set of columns that form a composite key ('quasi-identifiers') // quasiIds = [{ columnName: "age" }, { columnName: "city" }]; FieldId sensitiveAttributeField = FieldId.newBuilder().setColumnName(sensitiveAttribute).build(); From b5ebdabb7ca7127ff875f4680e85881f83e092db Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Wed, 18 Oct 2017 14:42:32 -0700 Subject: [PATCH 09/11] (Temporarily) point BQ tests at nodejs-docs-samples GCP project --- dlp/src/test/java/com/example/dlp/InspectIT.java | 7 ++++++- dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 2039839e5ce..9081a9df6be 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -99,7 +99,12 @@ public void testDatastoreInspectionReturnsInfoTypes() throws Exception { @Test public void testBigqueryInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] {"-bq", "-datasetId", "integration_tests_dlp", "-tableId", "harmful"}); + Inspect.main(new String[] { + "-bq", + "-projectId", "nodejs-docs-samples", + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful" + }); String output = bout.toString(); assertTrue(output.contains("PHONE_NUMBER")); } diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index 69b152669da..c3188adde67 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -48,6 +48,7 @@ public void setUp() { public void testNumericalStats() throws Exception { RiskAnalysis.main(new String[] { "-n", + "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-columnName", "Age" @@ -61,6 +62,7 @@ public void testNumericalStats() throws Exception { public void testCategoricalStats() throws Exception { RiskAnalysis.main(new String[] { "-c", + "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-columnName", "Mystery" @@ -73,6 +75,7 @@ public void testCategoricalStats() throws Exception { public void testKAnonymity() throws Exception { RiskAnalysis.main(new String[] { "-k", + "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-quasiIdColumnNames", "Age", "Mystery" @@ -87,6 +90,7 @@ public void testKAnonymity() throws Exception { public void testLDiversity() throws Exception { RiskAnalysis.main(new String[] { "-l", + "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-sensitiveAttribute", "Name", From 2c4bc7da4625277a6dc3b864efa48ce7554631fe Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Wed, 18 Oct 2017 16:32:56 -0700 Subject: [PATCH 10/11] Remove nodejs-docs-samples project ID for BigQuery tests --- dlp/src/test/java/com/example/dlp/InspectIT.java | 1 - dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java | 4 ---- 2 files changed, 5 deletions(-) diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 9081a9df6be..618c96d0a5c 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -101,7 +101,6 @@ public void testDatastoreInspectionReturnsInfoTypes() throws Exception { public void testBigqueryInspectionReturnsInfoTypes() throws Exception { Inspect.main(new String[] { "-bq", - "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful" }); diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index c3188adde67..69b152669da 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -48,7 +48,6 @@ public void setUp() { public void testNumericalStats() throws Exception { RiskAnalysis.main(new String[] { "-n", - "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-columnName", "Age" @@ -62,7 +61,6 @@ public void testNumericalStats() throws Exception { public void testCategoricalStats() throws Exception { RiskAnalysis.main(new String[] { "-c", - "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-columnName", "Mystery" @@ -75,7 +73,6 @@ public void testCategoricalStats() throws Exception { public void testKAnonymity() throws Exception { RiskAnalysis.main(new String[] { "-k", - "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-quasiIdColumnNames", "Age", "Mystery" @@ -90,7 +87,6 @@ public void testKAnonymity() throws Exception { public void testLDiversity() throws Exception { RiskAnalysis.main(new String[] { "-l", - "-projectId", "nodejs-docs-samples", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-sensitiveAttribute", "Name", From f34ddd4d8b8f1ed6f54c44aea444f7c123b070dd Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Wed, 18 Oct 2017 17:17:52 -0700 Subject: [PATCH 11/11] Address lint comments --- .../com/example/dlp/DeIdentification.java | 32 ++-- .../main/java/com/example/dlp/Inspect.java | 19 ++- .../java/com/example/dlp/RiskAnalysis.java | 137 ++++++++++-------- .../com/example/dlp/DeIdentificationIT.java | 29 ++-- .../java/com/example/dlp/RiskAnalysisIT.java | 22 +-- 5 files changed, 140 insertions(+), 99 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index b5715a1c994..296fc582f78 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -22,12 +22,12 @@ import com.google.privacy.dlp.v2beta1.ContentItem; import com.google.privacy.dlp.v2beta1.CryptoKey; import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig; +import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; import com.google.privacy.dlp.v2beta1.DeidentifyConfig; import com.google.privacy.dlp.v2beta1.DeidentifyContentRequest; import com.google.privacy.dlp.v2beta1.DeidentifyContentResponse; import com.google.privacy.dlp.v2beta1.InfoTypeTransformations; import com.google.privacy.dlp.v2beta1.InfoTypeTransformations.InfoTypeTransformation; -import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; import com.google.privacy.dlp.v2beta1.KmsWrappedCryptoKey; import com.google.privacy.dlp.v2beta1.PrimitiveTransformation; import com.google.protobuf.ByteString; @@ -42,14 +42,17 @@ public class DeIdentification { - private static void deIdentifyWithMask(String string, Character maskingCharacter, int numberToMask) { + private static void deIdentifyWithMask( + String string, + Character maskingCharacter, + int numberToMask) { // [START dlp_deidentify_mask] /** * Deidentify a string by masking sensitive information with a character using the DLP API. * @param string The string to deidentify. * @param maskingCharacter (Optional) The character to mask sensitive data with. * @param numberToMask (Optional) The number of characters' worth of sensitive data to mask. - * Omitting this value or setting it to 0 will mask all sensitive characters. + * Omitting this value or setting it to 0 masks all sensitive chars. */ // instantiate a client @@ -117,10 +120,10 @@ private static void deIdentifyWithFpe( String string, FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey) { // [START dlp_deidentify_fpe] /** - * Deidentify a string by encrypting sensitive information while preserving format using the DLP API. + * Deidentify a string by encrypting sensitive information while preserving format. * @param string The string to deidentify. - * @param alphabet The set of characters to use when encrypting the input. For more information, see - * cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify#FfxCommonNativeAlphabet + * @param alphabet The set of characters to use when encrypting the input. For more information, + * see cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. */ @@ -130,7 +133,7 @@ private static void deIdentifyWithFpe( // string = "My SSN is 372819127"; // alphabet = FfxCommonNativeAlphabet.ALPHA_NUMERIC; - // keyName = "projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME"; + // keyName = "projects/GCP_PROJECT/locations/REGION/keyRings/KEYRING_ID/cryptoKeys/KEY_NAME"; // wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY" ContentItem contentItem = @@ -217,19 +220,24 @@ public static void main(String[] args) throws Exception { Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); - Option maskingCharacterOption = Option.builder("maskingCharacter").hasArg(true).required(false).build(); + Option maskingCharacterOption = + Option.builder("maskingCharacter").hasArg(true).required(false).build(); commandLineOptions.addOption(maskingCharacterOption); - Option numberToMaskOption = Option.builder("numberToMask").hasArg(true).required(false).build(); + Option numberToMaskOption = + Option.builder("numberToMask").hasArg(true).required(false).build(); commandLineOptions.addOption(numberToMaskOption); - Option alphabetOption = Option.builder("commonAlphabet").hasArg(true).required(false).build(); + Option alphabetOption = + Option.builder("commonAlphabet").hasArg(true).required(false).build(); commandLineOptions.addOption(alphabetOption); - Option wrappedKeyOption = Option.builder("wrappedKey").hasArg(true).required(false).build(); + Option wrappedKeyOption = + Option.builder("wrappedKey").hasArg(true).required(false).build(); commandLineOptions.addOption(wrappedKeyOption); - Option keyNameOption = Option.builder("keyName").hasArg(true).required(false).build(); + Option keyNameOption = + Option.builder("keyName").hasArg(true).required(false).build(); commandLineOptions.addOption(keyNameOption); CommandLineParser parser = new DefaultParser(); diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 1d3a321f853..b39be30adc7 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -361,13 +361,21 @@ private static void inspectBigquery( // Reference to the BigQuery table BigQueryTable tableReference = - BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build(); + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); BigQueryOptions bigQueryOptions = - BigQueryOptions.newBuilder().setTableReference(tableReference).build(); + BigQueryOptions.newBuilder() + .setTableReference(tableReference) + .build(); // Construct BigQuery configuration to be inspected StorageConfig storageConfig = - StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build(); + StorageConfig.newBuilder() + .setBigQueryOptions(bigQueryOptions) + .build(); InspectConfig inspectConfig = InspectConfig.newBuilder() @@ -380,7 +388,8 @@ private static void inspectBigquery( // asynchronously submit an inspect operation OperationFuture responseFuture = - dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); + dlpServiceClient.createInspectOperationAsync( + inspectConfig, storageConfig, outputConfig); // ... // block on response, returning job id of the operation @@ -406,7 +415,7 @@ private static void inspectBigquery( /** * Command line application to inspect data using the Data Loss Prevention API. - * Supported data formats: strings, files, text files on GCS, BigQuery tables, and Datastore entities + * Supported data formats: string, file, text file on GCS, BigQuery table, and Datastore entity */ public static void main(String[] args) throws Exception { diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index f050ffcae92..5fc95bdec86 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -24,20 +24,23 @@ import com.google.privacy.dlp.v2beta1.BigQueryTable; import com.google.privacy.dlp.v2beta1.FieldId; import com.google.privacy.dlp.v2beta1.PrivacyMetric; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.NumericalStatsConfig; import com.google.privacy.dlp.v2beta1.PrivacyMetric.CategoricalStatsConfig; import com.google.privacy.dlp.v2beta1.PrivacyMetric.KAnonymityConfig; import com.google.privacy.dlp.v2beta1.PrivacyMetric.LDiversityConfig; +import com.google.privacy.dlp.v2beta1.PrivacyMetric.NumericalStatsConfig; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationMetadata; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.NumericalStatsResult; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityHistogramBucket; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityEquivalenceClass; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityHistogramBucket; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityEquivalenceClass; import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.CategoricalStatsResult.CategoricalStatsHistogramBucket; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityEquivalenceClass; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityHistogramBucket; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityEquivalenceClass; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityHistogramBucket; +import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.NumericalStatsResult; import com.google.privacy.dlp.v2beta1.Value; import com.google.privacy.dlp.v2beta1.ValueFrequency; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; @@ -47,13 +50,9 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - public class RiskAnalysis { - private static void numericalStatsAnalysis( + private static void calculateNumericalStats( String projectId, String datasetId, String tableId, String columnName) throws Exception { // [START dlp_numerical_stats_analysis] @@ -63,7 +62,7 @@ private static void numericalStatsAnalysis( * @param projectId The Google Cloud Platform project ID to run the API call under. * @param datasetId The BigQuery dataset to analyze. * @param tableId The BigQuery table to analyze. - * @param columnName The name of the column to analyze. This column must contain only numerical data. + * @param columnName The name of the column to analyze, which must contain only numerical data. */ // instantiate a client @@ -103,8 +102,8 @@ private static void numericalStatsAnalysis( .build(); // asynchronously submit a risk analysis operation - OperationFuture responseFuture = - dlpServiceClient.analyzeDataSourceRiskAsync(request); + OperationFuture + responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); // ... // block on response @@ -112,7 +111,8 @@ private static void numericalStatsAnalysis( NumericalStatsResult results = response.getNumericalStatsResult(); - System.out.println("Value range: [" + results.getMaxValue() + ", " + results.getMinValue() + "]"); + System.out.println( + "Value range: [" + results.getMaxValue() + ", " + results.getMinValue() + "]"); // Print out unique quantiles String previousValue = ""; @@ -131,7 +131,7 @@ private static void numericalStatsAnalysis( // [END dlp_numerical_stats_analysis] } - private static void categoricalStatsAnalysis( + private static void calculateCategoricalStats( String projectId, String datasetId, String tableId, String columnName) throws Exception { // [START dlp_categorical_stats_analysis] @@ -140,7 +140,7 @@ private static void categoricalStatsAnalysis( * @param projectId The Google Cloud Platform project ID to run the API call under. * @param datasetId The BigQuery dataset to analyze. * @param tableId The BigQuery table to analyze. - * @param columnName The name of the column to analyze. This column can contain numerical or non-numerical data. + * @param columnName The name of the column to analyze, which need not contain numerical data. */ // instantiate a client @@ -180,8 +180,8 @@ private static void categoricalStatsAnalysis( .build(); // asynchronously submit a risk analysis operation - OperationFuture responseFuture = - dlpServiceClient.analyzeDataSourceRiskAsync(request); + OperationFuture + responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); // ... // block on response @@ -189,15 +189,18 @@ private static void categoricalStatsAnalysis( CategoricalStatsHistogramBucket results = response.getCategoricalStatsResult().getValueFrequencyHistogramBuckets(0); - System.out.println("Most common value occurs " + results.getValueFrequencyUpperBound() + " time(s)"); - System.out.println("Least common value occurs " + results.getValueFrequencyLowerBound() + " time(s)"); + System.out.println( + "Most common value occurs " + results.getValueFrequencyUpperBound() + " time(s)"); + System.out.println( + "Least common value occurs " + results.getValueFrequencyLowerBound() + " time(s)"); for (ValueFrequency valueFrequency : results.getBucketValuesList()) { - System.out.println("Value " + - valueFrequency.getValue().toString() + - " occurs " + - valueFrequency.getCount() + - " time(s)."); + System.out.println("Value " + + valueFrequency.getValue().toString() + + " occurs " + + valueFrequency.getCount() + + " time(s)." + ); } } catch (Exception e) { @@ -206,7 +209,7 @@ private static void categoricalStatsAnalysis( // [END dlp_categorical_stats_analysis] } - private static void kAnonymityAnalysis( + private static void calculateKAnonymity( String projectId, String datasetId, String tableId, List quasiIds) throws Exception { // [START dlp_k_anonymity] @@ -215,7 +218,7 @@ private static void kAnonymityAnalysis( * @param projectId The Google Cloud Platform project ID to run the API call under. * @param datasetId The BigQuery dataset to analyze. * @param tableId The BigQuery table to analyze. - * @param quasiIds The names of a set of columns that form a composite key ('quasi-identifiers'). + * @param quasiIds The names of columns that form a composite key ('quasi-identifiers'). */ // instantiate a client @@ -232,7 +235,7 @@ private static void kAnonymityAnalysis( .map(columnName -> FieldId.newBuilder().setColumnName(columnName).build()) .collect(Collectors.toList()); - KAnonymityConfig kAnonymityConfig = + KAnonymityConfig kanonymityConfig = KAnonymityConfig.newBuilder() .addAllQuasiIds(quasiIdFields) .build(); @@ -246,7 +249,7 @@ private static void kAnonymityAnalysis( PrivacyMetric privacyMetric = PrivacyMetric.newBuilder() - .setKAnonymityConfig(kAnonymityConfig) + .setKAnonymityConfig(kanonymityConfig) .build(); AnalyzeDataSourceRiskRequest request = @@ -256,8 +259,8 @@ private static void kAnonymityAnalysis( .build(); // asynchronously submit a risk analysis operation - OperationFuture responseFuture = - dlpServiceClient.analyzeDataSourceRiskAsync(request); + OperationFuture + responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); // ... // block on response @@ -270,7 +273,7 @@ private static void kAnonymityAnalysis( + ", " + results.getEquivalenceClassSizeUpperBound() + "]" - ); + ); for (KAnonymityEquivalenceClass bucket : results.getBucketValuesList()) { List quasiIdValues = bucket.getQuasiIdsValuesList() @@ -287,17 +290,21 @@ private static void kAnonymityAnalysis( // [END dlp_k_anonymity] } - private static void lDiversityAnalysis( - String projectId, String datasetId, String tableId, String sensitiveAttribute, List quasiIds) - throws Exception { + private static void calculateLDiversity( + String projectId, + String datasetId, + String tableId, + String sensitiveAttribute, + List quasiIds + ) throws Exception { // [START dlp_l_diversity] /** - * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table using the DLP API. + * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table. * @param projectId The Google Cloud Platform project ID to run the API call under. * @param datasetId The BigQuery dataset to analyze. * @param tableId The BigQuery table to analyze. * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against - * @param quasiIds The names of a set of columns that form a composite key ('quasi-identifiers'). + * @param quasiIds A set of column names that form a composite key ('quasi-identifiers'). */ // instantiate a client @@ -309,7 +316,10 @@ private static void lDiversityAnalysis( // sensitiveAttribute = "name"; // quasiIds = [{ columnName: "age" }, { columnName: "city" }]; - FieldId sensitiveAttributeField = FieldId.newBuilder().setColumnName(sensitiveAttribute).build(); + FieldId sensitiveAttributeField = + FieldId.newBuilder() + .setColumnName(sensitiveAttribute) + .build(); List quasiIdFields = quasiIds @@ -317,7 +327,7 @@ private static void lDiversityAnalysis( .map(columnName -> FieldId.newBuilder().setColumnName(columnName).build()) .collect(Collectors.toList()); - LDiversityConfig lDiversityConfig = + LDiversityConfig ldiversityConfig = LDiversityConfig.newBuilder() .addAllQuasiIds(quasiIdFields) .setSensitiveAttribute(sensitiveAttributeField) @@ -332,7 +342,7 @@ private static void lDiversityAnalysis( PrivacyMetric privacyMetric = PrivacyMetric.newBuilder() - .setLDiversityConfig(lDiversityConfig) + .setLDiversityConfig(ldiversityConfig) .build(); AnalyzeDataSourceRiskRequest request = @@ -342,8 +352,8 @@ private static void lDiversityAnalysis( .build(); // asynchronously submit a risk analysis operation - OperationFuture responseFuture = - dlpServiceClient.analyzeDataSourceRiskAsync(request); + OperationFuture + responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); // ... // block on response @@ -361,11 +371,11 @@ private static void lDiversityAnalysis( System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) { - System.out.println("\t\tSensitive value " + - valueFrequency.getValue().toString() + - " occurs " + - valueFrequency.getCount() + - " time(s)."); + System.out.println("\t\tSensitive value " + + valueFrequency.getValue().toString() + + " occurs " + + valueFrequency.getCount() + + " time(s)."); } } } catch (Exception e) { @@ -390,11 +400,11 @@ public static void main(String[] args) throws Exception { Option categoricalAnalysisOption = new Option("c", "categorical"); optionsGroup.addOption(categoricalAnalysisOption); - Option kAnonymityOption = new Option("k", "kAnonymity"); - optionsGroup.addOption(kAnonymityOption); + Option kanonymityOption = new Option("k", "kAnonymity"); + optionsGroup.addOption(kanonymityOption); - Option lDiversityOption = new Option("l", "lDiversity"); - optionsGroup.addOption(lDiversityOption); + Option ldiversityOption = new Option("l", "lDiversity"); + optionsGroup.addOption(ldiversityOption); Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); @@ -408,13 +418,16 @@ public static void main(String[] args) throws Exception { Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); commandLineOptions.addOption(projectIdOption); - Option columnNameOption = Option.builder("columnName").hasArg(true).required(false).build(); + Option columnNameOption = + Option.builder("columnName").hasArg(true).required(false).build(); commandLineOptions.addOption(columnNameOption); - Option sensitiveAttributeOption = Option.builder("sensitiveAttribute").hasArg(true).required(false).build(); + Option sensitiveAttributeOption = + Option.builder("sensitiveAttribute").hasArg(true).required(false).build(); commandLineOptions.addOption(sensitiveAttributeOption); - Option quasiIdColumnNamesOption = Option.builder("quasiIdColumnNames").hasArg(true).required(false).build(); + Option quasiIdColumnNamesOption = + Option.builder("quasiIdColumnNames").hasArg(true).required(false).build(); commandLineOptions.addOption(quasiIdColumnNamesOption); CommandLineParser parser = new DefaultParser(); @@ -440,20 +453,22 @@ public static void main(String[] args) throws Exception { if (cmd.hasOption("n")) { // numerical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); - numericalStatsAnalysis(projectId, datasetId, tableId, columnName); + calculateNumericalStats(projectId, datasetId, tableId, columnName); } else if (cmd.hasOption("c")) { // categorical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); - categoricalStatsAnalysis(projectId, datasetId, tableId, columnName); + calculateCategoricalStats(projectId, datasetId, tableId, columnName); } else if (cmd.hasOption("k")) { // k-anonymity analysis - List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); - kAnonymityAnalysis(projectId, datasetId, tableId, quasiIdColumnNames); + List quasiIdColumnNames = + Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); + calculateKAnonymity(projectId, datasetId, tableId, quasiIdColumnNames); } else if (cmd.hasOption("l")) { // l-diversity analysis String sensitiveAttribute = cmd.getOptionValue(sensitiveAttributeOption.getOpt()); - List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); - lDiversityAnalysis(projectId, datasetId, tableId, sensitiveAttribute, quasiIdColumnNames); + List quasiIdColumnNames = + Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); + calculateLDiversity(projectId, datasetId, tableId, sensitiveAttribute, quasiIdColumnNames); } } } diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index 9d8813504fc..0097cee5af1 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -16,21 +16,20 @@ package com.example.dlp; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.regex.Pattern; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; -import java.io.ByteArrayOutputStream; -import java.io.PrintStream; -import java.util.regex.Pattern; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - @RunWith(JUnit4.class) public class DeIdentificationIT { private ByteArrayOutputStream bout; @@ -55,7 +54,11 @@ public void setUp() { @Test public void testDeidStringMasksCharacters() throws Exception { String text = "\"My SSN is 372819127\""; - DeIdentification.main(new String[] {"-m", text, "-maskingCharacter", "x", "-numberToMask", "5"}); + DeIdentification.main(new String[] { + "-m", text, + "-maskingCharacter", "x", + "-numberToMask", "5" + }); String output = bout.toString(); assertEquals(output, "My SSN is xxxxx9127\n"); } @@ -63,7 +66,11 @@ public void testDeidStringMasksCharacters() throws Exception { @Test public void testDeidStringPerformsFpe() throws Exception { String text = "\"My SSN is 372819127\""; - DeIdentification.main(new String[] {"-f", text, "-wrappedKey", wrappedKey, "-keyName", keyName}); + DeIdentification.main(new String[] { + "-f", text, + "-wrappedKey", wrappedKey, + "-keyName", keyName + }); String output = bout.toString(); assertFalse(output.contains(text)); assertTrue(Pattern.compile("My SSN is \\w+").matcher(output).find()); diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index 69b152669da..de5fa22c722 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -16,19 +16,18 @@ package com.example.dlp; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.regex.Pattern; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; -import java.io.ByteArrayOutputStream; -import java.io.PrintStream; -import java.util.regex.Pattern; - -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - @RunWith(JUnit4.class) public class RiskAnalysisIT { private ByteArrayOutputStream bout; @@ -53,8 +52,10 @@ public void testNumericalStats() throws Exception { "-columnName", "Age" }); String output = bout.toString(); - assertTrue(Pattern.compile("Value at 0% quantile: integer_value: \\d{2}").matcher(output).find()); - assertTrue(Pattern.compile("Value at \\d{2}% quantile: integer_value: \\d{2}").matcher(output).find()); + assertTrue(Pattern.compile( + "Value at 0% quantile: integer_value: \\d{2}").matcher(output).find()); + assertTrue(Pattern.compile( + "Value at \\d{2}% quantile: integer_value: \\d{2}").matcher(output).find()); } @Test @@ -66,7 +67,8 @@ public void testCategoricalStats() throws Exception { "-columnName", "Mystery" }); String output = bout.toString(); - assertTrue(Pattern.compile("Most common value occurs \\d time\\(s\\)").matcher(output).find()); + assertTrue(Pattern.compile( + "Most common value occurs \\d time\\(s\\)").matcher(output).find()); } @Test