From c7cb633b395a8941cab9fdf852db02569062a45b Mon Sep 17 00:00:00 2001 From: Archit Jugran Date: Tue, 22 Nov 2022 06:52:39 +0530 Subject: [PATCH] GoogleCloudSpannerReceiver: Mask lock stats PII (#16343) * Hide lock stats PII changelog . Code review changes * chlog * correct merge conflicts * remove extra space * code review changes * Added another test case --- ...udspannerreceiver-hide-lock-stats-pii.yaml | 5 ++ receiver/googlecloudspannerreceiver/README.md | 2 + receiver/googlecloudspannerreceiver/config.go | 9 ++-- .../googlecloudspannerreceiver/config_test.go | 7 +-- .../googlecloudspannerreceiver/factory.go | 12 +++-- .../internal/metadata/labelvalue.go | 4 ++ .../internal/metadata/labelvalue_test.go | 3 ++ .../internal/metadata/metricsdatapoint.go | 47 +++++++++++++++++ .../metadata/metricsdatapoint_test.go | 52 +++++++++++++++++++ .../statsreader/intervalstatsreader.go | 17 ++++-- .../statsreader/intervalstatsreader_test.go | 11 ++-- .../internal/statsreader/reader.go | 5 +- .../googlecloudspannerreceiver/receiver.go | 5 +- .../testdata/config.yaml | 1 + 14 files changed, 156 insertions(+), 24 deletions(-) create mode 100755 .chloggen/googlecloudspannerreceiver-hide-lock-stats-pii.yaml diff --git a/.chloggen/googlecloudspannerreceiver-hide-lock-stats-pii.yaml b/.chloggen/googlecloudspannerreceiver-hide-lock-stats-pii.yaml new file mode 100755 index 000000000000..6a1034b7bb21 --- /dev/null +++ b/.chloggen/googlecloudspannerreceiver-hide-lock-stats-pii.yaml @@ -0,0 +1,5 @@ +change_type: enhancement +component: googlecloudspannerreceiver +note: Configurably mask the PII in lock stats metrics. +issues: [16343] +subtext: diff --git a/receiver/googlecloudspannerreceiver/README.md b/receiver/googlecloudspannerreceiver/README.md index 4646ebff3475..47c0813f76eb 100644 --- a/receiver/googlecloudspannerreceiver/README.md +++ b/receiver/googlecloudspannerreceiver/README.md @@ -32,6 +32,7 @@ receivers: top_metrics_query_max_rows: 100 backfill_enabled: true cardinality_total_limit: 200000 + hide_topn_lockstats_rowrangestartkey: false projects: - project_id: "spanner project 1" service_account_key: "path to spanner project 1 service account json key" @@ -63,6 +64,7 @@ Brief description of configuration properties: - **top_metrics_query_max_rows** - max number of rows to fetch from Top N built-in table(100 by default) - **backfill_enabled** - turn on/off 1-hour data backfill(by default it is turned off) - **cardinality_total_limit** - limit of active series per 24 hours period. If specified, turns on cardinality filtering and handling. If zero or not specified, cardinality is not handled. You can read [this document](cardinality.md) for more information about cardinality handling and filtering. +- **hide_topn_lockstats_rowrangestartkey** - if true, masks PII (key values) in row_range_start_key label for the "top minute lock stats" metric - **projects** - list of GCP projects - **project_id** - identifier of GCP project - **service_account_key** - path to service account JSON key It is highly recommended to set this property to the correct value. In case it is empty, the [Application Default Credentials](https://google.aip.dev/auth/4110) will be used for the database connection. diff --git a/receiver/googlecloudspannerreceiver/config.go b/receiver/googlecloudspannerreceiver/config.go index e6485b01ee82..61877f8570db 100644 --- a/receiver/googlecloudspannerreceiver/config.go +++ b/receiver/googlecloudspannerreceiver/config.go @@ -29,10 +29,11 @@ const ( type Config struct { scraperhelper.ScraperControllerSettings `mapstructure:",squash"` - TopMetricsQueryMaxRows int `mapstructure:"top_metrics_query_max_rows"` - BackfillEnabled bool `mapstructure:"backfill_enabled"` - CardinalityTotalLimit int `mapstructure:"cardinality_total_limit"` - Projects []Project `mapstructure:"projects"` + TopMetricsQueryMaxRows int `mapstructure:"top_metrics_query_max_rows"` + BackfillEnabled bool `mapstructure:"backfill_enabled"` + CardinalityTotalLimit int `mapstructure:"cardinality_total_limit"` + Projects []Project `mapstructure:"projects"` + HideTopnLockstatsRowrangestartkey bool `mapstructure:"hide_topn_lockstats_rowrangestartkey"` } type Project struct { diff --git a/receiver/googlecloudspannerreceiver/config_test.go b/receiver/googlecloudspannerreceiver/config_test.go index 88348f0e5444..77616a99cbe2 100644 --- a/receiver/googlecloudspannerreceiver/config_test.go +++ b/receiver/googlecloudspannerreceiver/config_test.go @@ -47,9 +47,10 @@ func TestLoadConfig(t *testing.T) { ReceiverSettings: config.NewReceiverSettings(component.NewID(typeStr)), CollectionInterval: 120 * time.Second, }, - TopMetricsQueryMaxRows: 10, - BackfillEnabled: true, - CardinalityTotalLimit: 200000, + TopMetricsQueryMaxRows: 10, + BackfillEnabled: true, + CardinalityTotalLimit: 200000, + HideTopnLockstatsRowrangestartkey: true, Projects: []Project{ { ID: "spanner project 1", diff --git a/receiver/googlecloudspannerreceiver/factory.go b/receiver/googlecloudspannerreceiver/factory.go index c58db764531e..7b8b877fd3df 100644 --- a/receiver/googlecloudspannerreceiver/factory.go +++ b/receiver/googlecloudspannerreceiver/factory.go @@ -28,9 +28,10 @@ const ( typeStr = "googlecloudspanner" stability = component.StabilityLevelBeta - defaultCollectionInterval = 60 * time.Second - defaultTopMetricsQueryMaxRows = 100 - defaultBackfillEnabled = false + defaultCollectionInterval = 60 * time.Second + defaultTopMetricsQueryMaxRows = 100 + defaultBackfillEnabled = false + defaultHideTopnLockstatsRowrangestartkey = false ) func NewFactory() component.ReceiverFactory { @@ -46,8 +47,9 @@ func createDefaultConfig() component.ReceiverConfig { ReceiverSettings: config.NewReceiverSettings(component.NewID(typeStr)), CollectionInterval: defaultCollectionInterval, }, - TopMetricsQueryMaxRows: defaultTopMetricsQueryMaxRows, - BackfillEnabled: defaultBackfillEnabled, + TopMetricsQueryMaxRows: defaultTopMetricsQueryMaxRows, + BackfillEnabled: defaultBackfillEnabled, + HideTopnLockstatsRowrangestartkey: defaultHideTopnLockstatsRowrangestartkey, } } diff --git a/receiver/googlecloudspannerreceiver/internal/metadata/labelvalue.go b/receiver/googlecloudspannerreceiver/internal/metadata/labelvalue.go index be5d92fafcd7..efb114ca56b8 100644 --- a/receiver/googlecloudspannerreceiver/internal/metadata/labelvalue.go +++ b/receiver/googlecloudspannerreceiver/internal/metadata/labelvalue.go @@ -188,6 +188,10 @@ func (v byteSliceLabelValue) SetValueTo(attributes pcommon.Map) { attributes.PutStr(v.metadata.Name(), v.value) } +func (v *byteSliceLabelValue) ModifyValue(s string) { + v.value = s +} + func newByteSliceLabelValue(metadata LabelValueMetadata, valueHolder interface{}) LabelValue { return byteSliceLabelValue{ metadata: metadata, diff --git a/receiver/googlecloudspannerreceiver/internal/metadata/labelvalue_test.go b/receiver/googlecloudspannerreceiver/internal/metadata/labelvalue_test.go index 0731d162d232..5fa689b46214 100644 --- a/receiver/googlecloudspannerreceiver/internal/metadata/labelvalue_test.go +++ b/receiver/googlecloudspannerreceiver/internal/metadata/labelvalue_test.go @@ -199,6 +199,9 @@ func TestByteSliceLabelValue(t *testing.T) { assert.True(t, exists) assert.Equal(t, stringValue, attributeValue.Str()) + + labelValue.ModifyValue(labelName) + assert.Equal(t, labelName, labelValue.Value()) } func TestLockRequestSliceLabelValue(t *testing.T) { diff --git a/receiver/googlecloudspannerreceiver/internal/metadata/metricsdatapoint.go b/receiver/googlecloudspannerreceiver/internal/metadata/metricsdatapoint.go index e62ae0e61f3d..4fa4ece19bac 100644 --- a/receiver/googlecloudspannerreceiver/internal/metadata/metricsdatapoint.go +++ b/receiver/googlecloudspannerreceiver/internal/metadata/metricsdatapoint.go @@ -16,6 +16,8 @@ package metadata // import "github.com/open-telemetry/opentelemetry-collector-co import ( "fmt" + "hash/fnv" + "strings" "time" "github.com/mitchellh/hashstructure" @@ -114,6 +116,51 @@ func (mdp *MetricsDataPoint) toDataForHashing() dataForHashing { } } +// Convert row_range_start_key label of top-lock-stats metric from format "sample(key1, key2)" to "sample(hash1, hash2)" +func parseAndHashRowrangestartkey(key string) string { + builderHashedKey := strings.Builder{} + startIndexKeys := strings.Index(key, "(") + if startIndexKeys == -1 || startIndexKeys == len(key)-1 { // if "(" does not exist or is the last character of the string, then label is of incorrect format + return "" + } + substring := key[startIndexKeys+1 : len(key)-1] + builderHashedKey.WriteString(key[:startIndexKeys+1]) + plusPresent := false + if substring[len(substring)-1] == '+' { + substring = substring[:len(substring)-1] + plusPresent = true + } + keySlice := strings.Split(substring, ",") + hashFunction := fnv.New32a() + for cnt, subKey := range keySlice { + hashFunction.Reset() + hashFunction.Write([]byte(subKey)) + if cnt < len(keySlice)-1 { + builderHashedKey.WriteString(fmt.Sprint(hashFunction.Sum32()) + ",") + } else { + builderHashedKey.WriteString(fmt.Sprint(hashFunction.Sum32())) + } + } + if plusPresent { + builderHashedKey.WriteString("+") + } + builderHashedKey.WriteString(")") + return builderHashedKey.String() +} + +func (mdp *MetricsDataPoint) HideLockStatsRowrangestartkeyPII() { + for index, labelValue := range mdp.labelValues { + if labelValue.Metadata().Name() == "row_range_start_key" { + key := labelValue.Value().(string) + hashedKey := parseAndHashRowrangestartkey(key) + v := mdp.labelValues[index].(byteSliceLabelValue) + p := &v + p.ModifyValue(hashedKey) + mdp.labelValues[index] = v + } + } +} + func (mdp *MetricsDataPoint) hash() (string, error) { hashedData, err := hashstructure.Hash(mdp.toDataForHashing(), nil) if err != nil { diff --git a/receiver/googlecloudspannerreceiver/internal/metadata/metricsdatapoint_test.go b/receiver/googlecloudspannerreceiver/internal/metadata/metricsdatapoint_test.go index af08fc644079..198f89fae5d2 100644 --- a/receiver/googlecloudspannerreceiver/internal/metadata/metricsdatapoint_test.go +++ b/receiver/googlecloudspannerreceiver/internal/metadata/metricsdatapoint_test.go @@ -15,6 +15,8 @@ package metadata import ( + "fmt" + "hash/fnv" "testing" "time" @@ -111,6 +113,56 @@ func TestMetricsDataPoint_CopyTo(t *testing.T) { } } +func TestMetricsDataPoint_HideLockStatsRowrangestartkeyPII(t *testing.T) { + btSliceLabelValueMetadata, _ := NewLabelValueMetadata("row_range_start_key", "byteSliceLabelColumnName", StringValueType) + labelValue1 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table1.s(23,hello,23+)"} + labelValue2 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table2(23,hello)"} + metricValues := allPossibleMetricValues(metricDataType) + labelValues := []LabelValue{labelValue1, labelValue2} + timestamp := time.Now().UTC() + metricsDataPoint := &MetricsDataPoint{ + metricName: metricName, + timestamp: timestamp, + databaseID: databaseID(), + labelValues: labelValues, + metricValue: metricValues[0], + } + hashFunction := fnv.New32a() + hashFunction.Reset() + hashFunction.Write([]byte("23")) + hashOf23 := fmt.Sprint(hashFunction.Sum32()) + hashFunction.Reset() + hashFunction.Write([]byte("hello")) + hashOfHello := fmt.Sprint(hashFunction.Sum32()) + + metricsDataPoint.HideLockStatsRowrangestartkeyPII() + + assert.Equal(t, len(metricsDataPoint.labelValues), 2) + assert.Equal(t, metricsDataPoint.labelValues[0].Value(), "table1.s("+hashOf23+","+hashOfHello+","+hashOf23+"+)") + assert.Equal(t, metricsDataPoint.labelValues[1].Value(), "table2("+hashOf23+","+hashOfHello+")") +} + +func TestMetricsDataPoint_HideLockStatsRowrangestartkeyPIIWithInvalidLabelValue(t *testing.T) { + // We are checking that function HideLockStatsRowrangestartkeyPII() does not panic for invalid label values. + btSliceLabelValueMetadata, _ := NewLabelValueMetadata("row_range_start_key", "byteSliceLabelColumnName", StringValueType) + labelValue1 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: ""} + labelValue2 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table22(hello"} + labelValue3 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table22,hello"} + labelValue4 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "("} + metricValues := allPossibleMetricValues(metricDataType) + labelValues := []LabelValue{labelValue1, labelValue2, labelValue3, labelValue4} + timestamp := time.Now().UTC() + metricsDataPoint := &MetricsDataPoint{ + metricName: metricName, + timestamp: timestamp, + databaseID: databaseID(), + labelValues: labelValues, + metricValue: metricValues[0], + } + metricsDataPoint.HideLockStatsRowrangestartkeyPII() + assert.Equal(t, len(metricsDataPoint.labelValues), 4) +} + func allPossibleLabelValues() []LabelValue { strLabelValueMetadata, _ := NewLabelValueMetadata("stringLabelName", "stringLabelColumnName", StringValueType) strLabelValue := stringLabelValue{ diff --git a/receiver/googlecloudspannerreceiver/internal/statsreader/intervalstatsreader.go b/receiver/googlecloudspannerreceiver/internal/statsreader/intervalstatsreader.go index 38902dce1fb3..1c6bd0945467 100644 --- a/receiver/googlecloudspannerreceiver/internal/statsreader/intervalstatsreader.go +++ b/receiver/googlecloudspannerreceiver/internal/statsreader/intervalstatsreader.go @@ -30,12 +30,14 @@ const ( // Since, the initial intent was to work mainly with Prometheus backend, // this constant was set to 1 hour - max allowed interval by Prometheus. backfillIntervalDuration = time.Hour + topLockStatsMetricName = "top minute lock stats" ) type intervalStatsReader struct { currentStatsReader - timestampsGenerator *timestampsGenerator - lastPullTimestamp time.Time + timestampsGenerator *timestampsGenerator + lastPullTimestamp time.Time + hideTopnLockstatsRowrangestartkey bool } func newIntervalStatsReader( @@ -57,8 +59,9 @@ func newIntervalStatsReader( } return &intervalStatsReader{ - currentStatsReader: reader, - timestampsGenerator: tsGenerator, + currentStatsReader: reader, + timestampsGenerator: tsGenerator, + hideTopnLockstatsRowrangestartkey: config.HideTopnLockstatsRowrangestartkey, } } @@ -82,6 +85,12 @@ func (reader *intervalStatsReader) Read(ctx context.Context) ([]*metadata.Metric if err != nil { return nil, err } + metricMetadata := reader.currentStatsReader.metricsMetadata + if reader.hideTopnLockstatsRowrangestartkey && metricMetadata != nil && metricMetadata.Name == topLockStatsMetricName { + for _, dataPoint := range dataPoints { + dataPoint.HideLockStatsRowrangestartkeyPII() + } + } collectedDataPoints = append(collectedDataPoints, dataPoints...) } diff --git a/receiver/googlecloudspannerreceiver/internal/statsreader/intervalstatsreader_test.go b/receiver/googlecloudspannerreceiver/internal/statsreader/intervalstatsreader_test.go index 2307210310ca..2be3a4f8cedc 100644 --- a/receiver/googlecloudspannerreceiver/internal/statsreader/intervalstatsreader_test.go +++ b/receiver/googlecloudspannerreceiver/internal/statsreader/intervalstatsreader_test.go @@ -57,8 +57,9 @@ func TestNewIntervalStatsReader(t *testing.T) { } logger := zaptest.NewLogger(t) config := ReaderConfig{ - TopMetricsQueryMaxRows: topMetricsQueryMaxRows, - BackfillEnabled: true, + TopMetricsQueryMaxRows: topMetricsQueryMaxRows, + BackfillEnabled: true, + HideTopnLockstatsRowrangestartkey: true, } reader := newIntervalStatsReader(logger, database, metricsMetadata, config) @@ -69,6 +70,7 @@ func TestNewIntervalStatsReader(t *testing.T) { assert.Equal(t, topMetricsQueryMaxRows, reader.topMetricsQueryMaxRows) assert.NotNil(t, reader.timestampsGenerator) assert.True(t, reader.timestampsGenerator.backfillEnabled) + assert.True(t, reader.hideTopnLockstatsRowrangestartkey) } func TestIntervalStatsReader_NewPullStatement(t *testing.T) { @@ -76,8 +78,9 @@ func TestIntervalStatsReader_NewPullStatement(t *testing.T) { timestamp := time.Now().UTC() logger := zaptest.NewLogger(t) config := ReaderConfig{ - TopMetricsQueryMaxRows: topMetricsQueryMaxRows, - BackfillEnabled: false, + TopMetricsQueryMaxRows: topMetricsQueryMaxRows, + BackfillEnabled: false, + HideTopnLockstatsRowrangestartkey: true, } ctx := context.Background() client, _ := spanner.NewClient(ctx, "") diff --git a/receiver/googlecloudspannerreceiver/internal/statsreader/reader.go b/receiver/googlecloudspannerreceiver/internal/statsreader/reader.go index d40024a1ea6c..59be07369e2a 100644 --- a/receiver/googlecloudspannerreceiver/internal/statsreader/reader.go +++ b/receiver/googlecloudspannerreceiver/internal/statsreader/reader.go @@ -21,8 +21,9 @@ import ( ) type ReaderConfig struct { - TopMetricsQueryMaxRows int - BackfillEnabled bool + TopMetricsQueryMaxRows int + BackfillEnabled bool + HideTopnLockstatsRowrangestartkey bool } type Reader interface { diff --git a/receiver/googlecloudspannerreceiver/receiver.go b/receiver/googlecloudspannerreceiver/receiver.go index d2e0333a5cbb..a58488ab6f30 100644 --- a/receiver/googlecloudspannerreceiver/receiver.go +++ b/receiver/googlecloudspannerreceiver/receiver.go @@ -108,8 +108,9 @@ func (r *googleCloudSpannerReceiver) initializeProjectReaders(ctx context.Contex parsedMetadata []*metadata.MetricsMetadata) error { readerConfig := statsreader.ReaderConfig{ - BackfillEnabled: r.config.BackfillEnabled, - TopMetricsQueryMaxRows: r.config.TopMetricsQueryMaxRows, + BackfillEnabled: r.config.BackfillEnabled, + TopMetricsQueryMaxRows: r.config.TopMetricsQueryMaxRows, + HideTopnLockstatsRowrangestartkey: r.config.HideTopnLockstatsRowrangestartkey, } for _, project := range r.config.Projects { diff --git a/receiver/googlecloudspannerreceiver/testdata/config.yaml b/receiver/googlecloudspannerreceiver/testdata/config.yaml index fc4515d2ac82..86d7a770bdab 100644 --- a/receiver/googlecloudspannerreceiver/testdata/config.yaml +++ b/receiver/googlecloudspannerreceiver/testdata/config.yaml @@ -3,6 +3,7 @@ googlecloudspanner: top_metrics_query_max_rows: 10 backfill_enabled: true cardinality_total_limit: 200000 + hide_topn_lockstats_rowrangestartkey: true projects: - project_id: "spanner project 1" service_account_key: "path to spanner project 1 service account json key"