Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GoogleCloudSpannerReceiver: Mask lock stats PII #16343

Merged
merged 6 commits into from
Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
change_type: enhancement
component: googlecloudspannerreceiver
note: Configurably mask the PII in lock stats metrics.
issues: [16343]
subtext:
2 changes: 2 additions & 0 deletions receiver/googlecloudspannerreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ receivers:
top_metrics_query_max_rows: 100
backfill_enabled: true
cardinality_total_limit: 200000
hide_topn_lockstats_rowrangestartkey: false
projects:
- project_id: "spanner project 1"
service_account_key: "path to spanner project 1 service account json key"
Expand Down Expand Up @@ -63,6 +64,7 @@ Brief description of configuration properties:
- **top_metrics_query_max_rows** - max number of rows to fetch from Top N built-in table(100 by default)
- **backfill_enabled** - turn on/off 1-hour data backfill(by default it is turned off)
- **cardinality_total_limit** - limit of active series per 24 hours period. If specified, turns on cardinality filtering and handling. If zero or not specified, cardinality is not handled. You can read [this document](cardinality.md) for more information about cardinality handling and filtering.
- **hide_topn_lockstats_rowrangestartkey** - if true, masks PII (key values) in row_range_start_key label for the "top minute lock stats" metric
- **projects** - list of GCP projects
- **project_id** - identifier of GCP project
- **service_account_key** - path to service account JSON key It is highly recommended to set this property to the correct value. In case it is empty, the [Application Default Credentials](https://google.aip.dev/auth/4110) will be used for the database connection.
Expand Down
9 changes: 5 additions & 4 deletions receiver/googlecloudspannerreceiver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ const (
type Config struct {
scraperhelper.ScraperControllerSettings `mapstructure:",squash"`

TopMetricsQueryMaxRows int `mapstructure:"top_metrics_query_max_rows"`
BackfillEnabled bool `mapstructure:"backfill_enabled"`
CardinalityTotalLimit int `mapstructure:"cardinality_total_limit"`
Projects []Project `mapstructure:"projects"`
TopMetricsQueryMaxRows int `mapstructure:"top_metrics_query_max_rows"`
BackfillEnabled bool `mapstructure:"backfill_enabled"`
CardinalityTotalLimit int `mapstructure:"cardinality_total_limit"`
Projects []Project `mapstructure:"projects"`
HideTopnLockstatsRowrangestartkey bool `mapstructure:"hide_topn_lockstats_rowrangestartkey"`
}

type Project struct {
Expand Down
7 changes: 4 additions & 3 deletions receiver/googlecloudspannerreceiver/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ func TestLoadConfig(t *testing.T) {
ReceiverSettings: config.NewReceiverSettings(component.NewID(typeStr)),
CollectionInterval: 120 * time.Second,
},
TopMetricsQueryMaxRows: 10,
BackfillEnabled: true,
CardinalityTotalLimit: 200000,
TopMetricsQueryMaxRows: 10,
BackfillEnabled: true,
CardinalityTotalLimit: 200000,
HideTopnLockstatsRowrangestartkey: true,
Projects: []Project{
{
ID: "spanner project 1",
Expand Down
12 changes: 7 additions & 5 deletions receiver/googlecloudspannerreceiver/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ const (
typeStr = "googlecloudspanner"
stability = component.StabilityLevelBeta

defaultCollectionInterval = 60 * time.Second
defaultTopMetricsQueryMaxRows = 100
defaultBackfillEnabled = false
defaultCollectionInterval = 60 * time.Second
defaultTopMetricsQueryMaxRows = 100
defaultBackfillEnabled = false
defaultHideTopnLockstatsRowrangestartkey = false
)

func NewFactory() component.ReceiverFactory {
Expand All @@ -46,8 +47,9 @@ func createDefaultConfig() component.ReceiverConfig {
ReceiverSettings: config.NewReceiverSettings(component.NewID(typeStr)),
CollectionInterval: defaultCollectionInterval,
},
TopMetricsQueryMaxRows: defaultTopMetricsQueryMaxRows,
BackfillEnabled: defaultBackfillEnabled,
TopMetricsQueryMaxRows: defaultTopMetricsQueryMaxRows,
BackfillEnabled: defaultBackfillEnabled,
HideTopnLockstatsRowrangestartkey: defaultHideTopnLockstatsRowrangestartkey,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ func (v byteSliceLabelValue) SetValueTo(attributes pcommon.Map) {
attributes.PutStr(v.metadata.Name(), v.value)
}

func (v *byteSliceLabelValue) ModifyValue(s string) {
v.value = s
}

func newByteSliceLabelValue(metadata LabelValueMetadata, valueHolder interface{}) LabelValue {
return byteSliceLabelValue{
metadata: metadata,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ func TestByteSliceLabelValue(t *testing.T) {

assert.True(t, exists)
assert.Equal(t, stringValue, attributeValue.Str())

labelValue.ModifyValue(labelName)
assert.Equal(t, labelName, labelValue.Value())
}

func TestLockRequestSliceLabelValue(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package metadata // import "github.com/open-telemetry/opentelemetry-collector-co

import (
"fmt"
"hash/fnv"
"strings"
"time"

"github.com/mitchellh/hashstructure"
Expand Down Expand Up @@ -114,6 +116,51 @@ func (mdp *MetricsDataPoint) toDataForHashing() dataForHashing {
}
}

// Convert row_range_start_key label of top-lock-stats metric from format "sample(key1, key2)" to "sample(hash1, hash2)"
func parseAndHashRowrangestartkey(key string) string {
builderHashedKey := strings.Builder{}
startIndexKeys := strings.Index(key, "(")
if startIndexKeys == -1 || startIndexKeys == len(key)-1 { // if "(" does not exist or is the last character of the string, then label is of incorrect format
return ""
}
substring := key[startIndexKeys+1 : len(key)-1]
builderHashedKey.WriteString(key[:startIndexKeys+1])
plusPresent := false
if substring[len(substring)-1] == '+' {
substring = substring[:len(substring)-1]
plusPresent = true
}
keySlice := strings.Split(substring, ",")
hashFunction := fnv.New32a()
for cnt, subKey := range keySlice {
hashFunction.Reset()
hashFunction.Write([]byte(subKey))
if cnt < len(keySlice)-1 {
builderHashedKey.WriteString(fmt.Sprint(hashFunction.Sum32()) + ",")
} else {
builderHashedKey.WriteString(fmt.Sprint(hashFunction.Sum32()))
}
}
if plusPresent {
builderHashedKey.WriteString("+")
}
builderHashedKey.WriteString(")")
return builderHashedKey.String()
}

func (mdp *MetricsDataPoint) HideLockStatsRowrangestartkeyPII() {
for index, labelValue := range mdp.labelValues {
if labelValue.Metadata().Name() == "row_range_start_key" {
key := labelValue.Value().(string)
hashedKey := parseAndHashRowrangestartkey(key)
v := mdp.labelValues[index].(byteSliceLabelValue)
p := &v
p.ModifyValue(hashedKey)
mdp.labelValues[index] = v
}
}
}

func (mdp *MetricsDataPoint) hash() (string, error) {
hashedData, err := hashstructure.Hash(mdp.toDataForHashing(), nil)
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package metadata

import (
"fmt"
"hash/fnv"
"testing"
"time"

Expand Down Expand Up @@ -111,6 +113,56 @@ func TestMetricsDataPoint_CopyTo(t *testing.T) {
}
}

func TestMetricsDataPoint_HideLockStatsRowrangestartkeyPII(t *testing.T) {
btSliceLabelValueMetadata, _ := NewLabelValueMetadata("row_range_start_key", "byteSliceLabelColumnName", StringValueType)
labelValue1 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table1.s(23,hello,23+)"}
labelValue2 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table2(23,hello)"}
dashpole marked this conversation as resolved.
Show resolved Hide resolved
metricValues := allPossibleMetricValues(metricDataType)
labelValues := []LabelValue{labelValue1, labelValue2}
timestamp := time.Now().UTC()
metricsDataPoint := &MetricsDataPoint{
metricName: metricName,
timestamp: timestamp,
databaseID: databaseID(),
labelValues: labelValues,
metricValue: metricValues[0],
}
hashFunction := fnv.New32a()
hashFunction.Reset()
hashFunction.Write([]byte("23"))
hashOf23 := fmt.Sprint(hashFunction.Sum32())
hashFunction.Reset()
hashFunction.Write([]byte("hello"))
hashOfHello := fmt.Sprint(hashFunction.Sum32())

metricsDataPoint.HideLockStatsRowrangestartkeyPII()

assert.Equal(t, len(metricsDataPoint.labelValues), 2)
assert.Equal(t, metricsDataPoint.labelValues[0].Value(), "table1.s("+hashOf23+","+hashOfHello+","+hashOf23+"+)")
assert.Equal(t, metricsDataPoint.labelValues[1].Value(), "table2("+hashOf23+","+hashOfHello+")")
}

func TestMetricsDataPoint_HideLockStatsRowrangestartkeyPIIWithInvalidLabelValue(t *testing.T) {
// We are checking that function HideLockStatsRowrangestartkeyPII() does not panic for invalid label values.
btSliceLabelValueMetadata, _ := NewLabelValueMetadata("row_range_start_key", "byteSliceLabelColumnName", StringValueType)
labelValue1 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: ""}
labelValue2 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table22(hello"}
labelValue3 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "table22,hello"}
labelValue4 := byteSliceLabelValue{metadata: btSliceLabelValueMetadata, value: "("}
metricValues := allPossibleMetricValues(metricDataType)
labelValues := []LabelValue{labelValue1, labelValue2, labelValue3, labelValue4}
timestamp := time.Now().UTC()
metricsDataPoint := &MetricsDataPoint{
metricName: metricName,
timestamp: timestamp,
databaseID: databaseID(),
labelValues: labelValues,
metricValue: metricValues[0],
}
metricsDataPoint.HideLockStatsRowrangestartkeyPII()
assert.Equal(t, len(metricsDataPoint.labelValues), 4)
}

func allPossibleLabelValues() []LabelValue {
strLabelValueMetadata, _ := NewLabelValueMetadata("stringLabelName", "stringLabelColumnName", StringValueType)
strLabelValue := stringLabelValue{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ const (
// Since, the initial intent was to work mainly with Prometheus backend,
// this constant was set to 1 hour - max allowed interval by Prometheus.
backfillIntervalDuration = time.Hour
topLockStatsMetricName = "top minute lock stats"
)

type intervalStatsReader struct {
currentStatsReader
timestampsGenerator *timestampsGenerator
lastPullTimestamp time.Time
timestampsGenerator *timestampsGenerator
lastPullTimestamp time.Time
hideTopnLockstatsRowrangestartkey bool
}

func newIntervalStatsReader(
Expand All @@ -57,8 +59,9 @@ func newIntervalStatsReader(
}

return &intervalStatsReader{
currentStatsReader: reader,
timestampsGenerator: tsGenerator,
currentStatsReader: reader,
timestampsGenerator: tsGenerator,
hideTopnLockstatsRowrangestartkey: config.HideTopnLockstatsRowrangestartkey,
}
}

Expand All @@ -82,6 +85,12 @@ func (reader *intervalStatsReader) Read(ctx context.Context) ([]*metadata.Metric
if err != nil {
return nil, err
}
metricMetadata := reader.currentStatsReader.metricsMetadata
if reader.hideTopnLockstatsRowrangestartkey && metricMetadata != nil && metricMetadata.Name == topLockStatsMetricName {
for _, dataPoint := range dataPoints {
dataPoint.HideLockStatsRowrangestartkeyPII()
}
}

collectedDataPoints = append(collectedDataPoints, dataPoints...)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ func TestNewIntervalStatsReader(t *testing.T) {
}
logger := zaptest.NewLogger(t)
config := ReaderConfig{
TopMetricsQueryMaxRows: topMetricsQueryMaxRows,
BackfillEnabled: true,
TopMetricsQueryMaxRows: topMetricsQueryMaxRows,
BackfillEnabled: true,
HideTopnLockstatsRowrangestartkey: true,
}

reader := newIntervalStatsReader(logger, database, metricsMetadata, config)
Expand All @@ -69,15 +70,17 @@ func TestNewIntervalStatsReader(t *testing.T) {
assert.Equal(t, topMetricsQueryMaxRows, reader.topMetricsQueryMaxRows)
assert.NotNil(t, reader.timestampsGenerator)
assert.True(t, reader.timestampsGenerator.backfillEnabled)
assert.True(t, reader.hideTopnLockstatsRowrangestartkey)
}

func TestIntervalStatsReader_NewPullStatement(t *testing.T) {
databaseID := datasource.NewDatabaseID(projectID, instanceID, databaseName)
timestamp := time.Now().UTC()
logger := zaptest.NewLogger(t)
config := ReaderConfig{
TopMetricsQueryMaxRows: topMetricsQueryMaxRows,
BackfillEnabled: false,
TopMetricsQueryMaxRows: topMetricsQueryMaxRows,
BackfillEnabled: false,
HideTopnLockstatsRowrangestartkey: true,
}
ctx := context.Background()
client, _ := spanner.NewClient(ctx, "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ import (
)

type ReaderConfig struct {
TopMetricsQueryMaxRows int
BackfillEnabled bool
TopMetricsQueryMaxRows int
BackfillEnabled bool
HideTopnLockstatsRowrangestartkey bool
}

type Reader interface {
Expand Down
5 changes: 3 additions & 2 deletions receiver/googlecloudspannerreceiver/receiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,9 @@ func (r *googleCloudSpannerReceiver) initializeProjectReaders(ctx context.Contex
parsedMetadata []*metadata.MetricsMetadata) error {

readerConfig := statsreader.ReaderConfig{
BackfillEnabled: r.config.BackfillEnabled,
TopMetricsQueryMaxRows: r.config.TopMetricsQueryMaxRows,
BackfillEnabled: r.config.BackfillEnabled,
TopMetricsQueryMaxRows: r.config.TopMetricsQueryMaxRows,
HideTopnLockstatsRowrangestartkey: r.config.HideTopnLockstatsRowrangestartkey,
}

for _, project := range r.config.Projects {
Expand Down
1 change: 1 addition & 0 deletions receiver/googlecloudspannerreceiver/testdata/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ googlecloudspanner:
top_metrics_query_max_rows: 10
backfill_enabled: true
cardinality_total_limit: 200000
hide_topn_lockstats_rowrangestartkey: true
projects:
- project_id: "spanner project 1"
service_account_key: "path to spanner project 1 service account json key"
Expand Down