Skip to content

Commit

Permalink
GoogleCloudSpannerReceiver: Mask lock stats PII (#16343)
Browse files Browse the repository at this point in the history
* Hide lock stats PII

changelog

.

Code review changes

* chlog

* correct merge conflicts

* remove extra space

* code review changes

* Added another test case
  • Loading branch information
architjugran authored Nov 22, 2022
1 parent 350d257 commit c7cb633
Show file tree
Hide file tree
Showing 14 changed files with 156 additions and 24 deletions.
5 changes: 5 additions & 0 deletions .chloggen/googlecloudspannerreceiver-hide-lock-stats-pii.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
change_type: enhancement
component: googlecloudspannerreceiver
note: Configurably mask the PII in lock stats metrics.
issues: [16343]
subtext:
2 changes: 2 additions & 0 deletions receiver/googlecloudspannerreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ receivers:
top_metrics_query_max_rows: 100
backfill_enabled: true
cardinality_total_limit: 200000
hide_topn_lockstats_rowrangestartkey: false
projects:
- project_id: "spanner project 1"
service_account_key: "path to spanner project 1 service account json key"
Expand Down Expand Up @@ -63,6 +64,7 @@ Brief description of configuration properties:
- **top_metrics_query_max_rows** - max number of rows to fetch from Top N built-in table(100 by default)
- **backfill_enabled** - turn on/off 1-hour data backfill(by default it is turned off)
- **cardinality_total_limit** - limit of active series per 24 hours period. If specified, turns on cardinality filtering and handling. If zero or not specified, cardinality is not handled. You can read [this document](cardinality.md) for more information about cardinality handling and filtering.
- **hide_topn_lockstats_rowrangestartkey** - if true, masks PII (key values) in row_range_start_key label for the "top minute lock stats" metric
- **projects** - list of GCP projects
- **project_id** - identifier of GCP project
- **service_account_key** - path to service account JSON key It is highly recommended to set this property to the correct value. In case it is empty, the [Application Default Credentials](https://google.aip.dev/auth/4110) will be used for the database connection.
Expand Down
9 changes: 5 additions & 4 deletions receiver/googlecloudspannerreceiver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ const (
type Config struct {
scraperhelper.ScraperControllerSettings `mapstructure:",squash"`

TopMetricsQueryMaxRows int `mapstructure:"top_metrics_query_max_rows"`
BackfillEnabled bool `mapstructure:"backfill_enabled"`
CardinalityTotalLimit int `mapstructure:"cardinality_total_limit"`
Projects []Project `mapstructure:"projects"`
TopMetricsQueryMaxRows int `mapstructure:"top_metrics_query_max_rows"`
BackfillEnabled bool `mapstructure:"backfill_enabled"`
CardinalityTotalLimit int `mapstructure:"cardinality_total_limit"`
Projects []Project `mapstructure:"projects"`
HideTopnLockstatsRowrangestartkey bool `mapstructure:"hide_topn_lockstats_rowrangestartkey"`
}

type Project struct {
Expand Down
7 changes: 4 additions & 3 deletions receiver/googlecloudspannerreceiver/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ func TestLoadConfig(t *testing.T) {
ReceiverSettings: config.NewReceiverSettings(component.NewID(typeStr)),
CollectionInterval: 120 * time.Second,
},
TopMetricsQueryMaxRows: 10,
BackfillEnabled: true,
CardinalityTotalLimit: 200000,
TopMetricsQueryMaxRows: 10,
BackfillEnabled: true,
CardinalityTotalLimit: 200000,
HideTopnLockstatsRowrangestartkey: true,
Projects: []Project{
{
ID: "spanner project 1",
Expand Down
12 changes: 7 additions & 5 deletions receiver/googlecloudspannerreceiver/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ const (
typeStr = "googlecloudspanner"
stability = component.StabilityLevelBeta

defaultCollectionInterval = 60 * time.Second
defaultTopMetricsQueryMaxRows = 100
defaultBackfillEnabled = false
defaultCollectionInterval = 60 * time.Second
defaultTopMetricsQueryMaxRows = 100
defaultBackfillEnabled = false
defaultHideTopnLockstatsRowrangestartkey = false
)

func NewFactory() component.ReceiverFactory {
Expand All @@ -46,8 +47,9 @@ func createDefaultConfig() component.ReceiverConfig {
ReceiverSettings: config.NewReceiverSettings(component.NewID(typeStr)),
CollectionInterval: defaultCollectionInterval,
},
TopMetricsQueryMaxRows: defaultTopMetricsQueryMaxRows,
BackfillEnabled: defaultBackfillEnabled,
TopMetricsQueryMaxRows: defaultTopMetricsQueryMaxRows,
BackfillEnabled: defaultBackfillEnabled,
HideTopnLockstatsRowrangestartkey: defaultHideTopnLockstatsRowrangestartkey,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ func (v byteSliceLabelValue) SetValueTo(attributes pcommon.Map) {
attributes.PutStr(v.metadata.Name(), v.value)
}

func (v *byteSliceLabelValue) ModifyValue(s string) {
v.value = s
}

func newByteSliceLabelValue(metadata LabelValueMetadata, valueHolder interface{}) LabelValue {
return byteSliceLabelValue{
metadata: metadata,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ func TestByteSliceLabelValue(t *testing.T) {

assert.True(t, exists)
assert.Equal(t, stringValue, attributeValue.Str())

labelValue.ModifyValue(labelName)
assert.Equal(t, labelName, labelValue.Value())
}

func TestLockRequestSliceLabelValue(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package metadata // import "github.com/open-telemetry/opentelemetry-collector-co

import (
"fmt"
"hash/fnv"
"strings"
"time"

"github.com/mitchellh/hashstructure"
Expand Down Expand Up @@ -114,6 +116,51 @@ func (mdp *MetricsDataPoint) toDataForHashing() dataForHashing {
}
}

// Convert row_range_start_key label of top-lock-stats metric from format "sample(key1, key2)" to "sample(hash1, hash2)"
func parseAndHashRowrangestartkey(key string) string {
builderHashedKey := strings.Builder{}
startIndexKeys := strings.Index(key, "(")
if startIndexKeys == -1 || startIndexKeys == len(key)-1 { // if "(" does not exist or is the last character of the string, then label is of incorrect format
return ""
}
substring := key[startIndexKeys+1 : len(key)-1]
builderHashedKey.WriteString(key[:startIndexKeys+1])
plusPresent := false
if substring[len(substring)-1] == '+' {
substring = substring[:len(substring)-1]
plusPresent = true
}
keySlice := strings.Split(substring, ",")
hashFunction := fnv.New32a()
for cnt, subKey := range keySlice {
hashFunction.Reset()
hashFunction.Write([]byte(subKey))
if cnt < len(keySlice)-1 {
builderHashedKey.WriteString(fmt.Sprint(hashFunction.Sum32()) + ",")
} else {
builderHashedKey.WriteString(fmt.Sprint(hashFunction.Sum32()))
}
}
if plusPresent {
builderHashedKey.WriteString("+")
}
builderHashedKey.WriteString(")")
return builderHashedKey.String()
}

func (mdp *MetricsDataPoint) HideLockStatsRowrangestartkeyPII() {
for index, labelValue := range mdp.labelValues {
if labelValue.Metadata().Name() == "row_range_start_key" {
key := labelValue.Value().(string)
hashedKey := parseAndHashRowrangestartkey(key)
v := mdp.labelValues[index].(byteSliceLabelValue)
p := &v
p.ModifyValue(hashedKey)
mdp.labelValues[index] = v
}
}
}

func (mdp *MetricsDataPoint) hash() (string, error) {
hashedData, err := hashstructure.Hash(mdp.toDataForHashing(), nil)
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package metadata

import (
"fmt"
"hash/fnv"
"testing"
"time"

Expand Down Expand Up @@ -111,6 +113,56 @@ func TestMetricsDataPoint_CopyTo(t *testing.T) {
}
}

func TestMetricsDataPoint_HideLockStatsRowrangestartkeyPII(t *testing.T) {
btSliceLabelValueMetadata, _ := NewLabelValueMetadata("row_range_start_key", "byteSliceLabelColumnName", StringValueType)
labelValue1 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table1.s(23,hello,23+)"}
labelValue2 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table2(23,hello)"}
metricValues := allPossibleMetricValues(metricDataType)
labelValues := []LabelValue{labelValue1, labelValue2}
timestamp := time.Now().UTC()
metricsDataPoint := &MetricsDataPoint{
metricName: metricName,
timestamp: timestamp,
databaseID: databaseID(),
labelValues: labelValues,
metricValue: metricValues[0],
}
hashFunction := fnv.New32a()
hashFunction.Reset()
hashFunction.Write([]byte("23"))
hashOf23 := fmt.Sprint(hashFunction.Sum32())
hashFunction.Reset()
hashFunction.Write([]byte("hello"))
hashOfHello := fmt.Sprint(hashFunction.Sum32())

metricsDataPoint.HideLockStatsRowrangestartkeyPII()

assert.Equal(t, len(metricsDataPoint.labelValues), 2)
assert.Equal(t, metricsDataPoint.labelValues[0].Value(), "table1.s("+hashOf23+","+hashOfHello+","+hashOf23+"+)")
assert.Equal(t, metricsDataPoint.labelValues[1].Value(), "table2("+hashOf23+","+hashOfHello+")")
}

func TestMetricsDataPoint_HideLockStatsRowrangestartkeyPIIWithInvalidLabelValue(t *testing.T) {
// We are checking that function HideLockStatsRowrangestartkeyPII() does not panic for invalid label values.
btSliceLabelValueMetadata, _ := NewLabelValueMetadata("row_range_start_key", "byteSliceLabelColumnName", StringValueType)
labelValue1 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: ""}
labelValue2 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table22(hello"}
labelValue3 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table22,hello"}
labelValue4 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "("}
metricValues := allPossibleMetricValues(metricDataType)
labelValues := []LabelValue{labelValue1, labelValue2, labelValue3, labelValue4}
timestamp := time.Now().UTC()
metricsDataPoint := &MetricsDataPoint{
metricName: metricName,
timestamp: timestamp,
databaseID: databaseID(),
labelValues: labelValues,
metricValue: metricValues[0],
}
metricsDataPoint.HideLockStatsRowrangestartkeyPII()
assert.Equal(t, len(metricsDataPoint.labelValues), 4)
}

func allPossibleLabelValues() []LabelValue {
strLabelValueMetadata, _ := NewLabelValueMetadata("stringLabelName", "stringLabelColumnName", StringValueType)
strLabelValue := stringLabelValue{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ const (
// Since, the initial intent was to work mainly with Prometheus backend,
// this constant was set to 1 hour - max allowed interval by Prometheus.
backfillIntervalDuration = time.Hour
topLockStatsMetricName = "top minute lock stats"
)

type intervalStatsReader struct {
currentStatsReader
timestampsGenerator *timestampsGenerator
lastPullTimestamp time.Time
timestampsGenerator *timestampsGenerator
lastPullTimestamp time.Time
hideTopnLockstatsRowrangestartkey bool
}

func newIntervalStatsReader(
Expand All @@ -57,8 +59,9 @@ func newIntervalStatsReader(
}

return &intervalStatsReader{
currentStatsReader: reader,
timestampsGenerator: tsGenerator,
currentStatsReader: reader,
timestampsGenerator: tsGenerator,
hideTopnLockstatsRowrangestartkey: config.HideTopnLockstatsRowrangestartkey,
}
}

Expand All @@ -82,6 +85,12 @@ func (reader *intervalStatsReader) Read(ctx context.Context) ([]*metadata.Metric
if err != nil {
return nil, err
}
metricMetadata := reader.currentStatsReader.metricsMetadata
if reader.hideTopnLockstatsRowrangestartkey && metricMetadata != nil && metricMetadata.Name == topLockStatsMetricName {
for _, dataPoint := range dataPoints {
dataPoint.HideLockStatsRowrangestartkeyPII()
}
}

collectedDataPoints = append(collectedDataPoints, dataPoints...)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ func TestNewIntervalStatsReader(t *testing.T) {
}
logger := zaptest.NewLogger(t)
config := ReaderConfig{
TopMetricsQueryMaxRows: topMetricsQueryMaxRows,
BackfillEnabled: true,
TopMetricsQueryMaxRows: topMetricsQueryMaxRows,
BackfillEnabled: true,
HideTopnLockstatsRowrangestartkey: true,
}

reader := newIntervalStatsReader(logger, database, metricsMetadata, config)
Expand All @@ -69,15 +70,17 @@ func TestNewIntervalStatsReader(t *testing.T) {
assert.Equal(t, topMetricsQueryMaxRows, reader.topMetricsQueryMaxRows)
assert.NotNil(t, reader.timestampsGenerator)
assert.True(t, reader.timestampsGenerator.backfillEnabled)
assert.True(t, reader.hideTopnLockstatsRowrangestartkey)
}

func TestIntervalStatsReader_NewPullStatement(t *testing.T) {
databaseID := datasource.NewDatabaseID(projectID, instanceID, databaseName)
timestamp := time.Now().UTC()
logger := zaptest.NewLogger(t)
config := ReaderConfig{
TopMetricsQueryMaxRows: topMetricsQueryMaxRows,
BackfillEnabled: false,
TopMetricsQueryMaxRows: topMetricsQueryMaxRows,
BackfillEnabled: false,
HideTopnLockstatsRowrangestartkey: true,
}
ctx := context.Background()
client, _ := spanner.NewClient(ctx, "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ import (
)

type ReaderConfig struct {
TopMetricsQueryMaxRows int
BackfillEnabled bool
TopMetricsQueryMaxRows int
BackfillEnabled bool
HideTopnLockstatsRowrangestartkey bool
}

type Reader interface {
Expand Down
5 changes: 3 additions & 2 deletions receiver/googlecloudspannerreceiver/receiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,9 @@ func (r *googleCloudSpannerReceiver) initializeProjectReaders(ctx context.Contex
parsedMetadata []*metadata.MetricsMetadata) error {

readerConfig := statsreader.ReaderConfig{
BackfillEnabled: r.config.BackfillEnabled,
TopMetricsQueryMaxRows: r.config.TopMetricsQueryMaxRows,
BackfillEnabled: r.config.BackfillEnabled,
TopMetricsQueryMaxRows: r.config.TopMetricsQueryMaxRows,
HideTopnLockstatsRowrangestartkey: r.config.HideTopnLockstatsRowrangestartkey,
}

for _, project := range r.config.Projects {
Expand Down
1 change: 1 addition & 0 deletions receiver/googlecloudspannerreceiver/testdata/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ googlecloudspanner:
top_metrics_query_max_rows: 10
backfill_enabled: true
cardinality_total_limit: 200000
hide_topn_lockstats_rowrangestartkey: true
projects:
- project_id: "spanner project 1"
service_account_key: "path to spanner project 1 service account json key"
Expand Down

0 comments on commit c7cb633

Please sign in to comment.