From cf47477c96071093ed9777ba605e9b6a65768866 Mon Sep 17 00:00:00 2001 From: miagilepner Date: Fri, 1 Mar 2024 09:42:39 +0000 Subject: [PATCH] backport of commit ac1347bdf7f7589c81a14277a6e2adefbef0bad4 --- changelog/25713.txt | 3 + helper/metricsutil/wrapped_metrics.go | 1 + vault/activity_log.go | 33 ++++++-- vault/activity_log_test.go | 113 ++++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 6 deletions(-) create mode 100644 changelog/25713.txt diff --git a/changelog/25713.txt b/changelog/25713.txt new file mode 100644 index 000000000000..250045059ec5 --- /dev/null +++ b/changelog/25713.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core/metrics: add metrics for secret sync client count +``` diff --git a/helper/metricsutil/wrapped_metrics.go b/helper/metricsutil/wrapped_metrics.go index 0cf453123304..8b33c8802003 100644 --- a/helper/metricsutil/wrapped_metrics.go +++ b/helper/metricsutil/wrapped_metrics.go @@ -44,6 +44,7 @@ type TelemetryConstConfig struct { } type Metrics interface { + SetGauge(key []string, val float32) SetGaugeWithLabels(key []string, val float32, labels []Label) IncrCounterWithLabels(key []string, val float32, labels []Label) AddSampleWithLabels(key []string, val float32, labels []Label) diff --git a/vault/activity_log.go b/vault/activity_log.go index 55a38d738729..1ec03e4d5bd6 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -2247,7 +2247,6 @@ func (a *ActivityLog) writePrecomputedQuery(ctx context.Context, segmentTime tim // the byNamespace map also needs to be transformed into a protobuf pq.Namespaces = a.transformALNamespaceBreakdowns(opts.byNamespace) - err := a.queryStore.Put(ctx, pq) if err != nil { a.logger.Warn("failed to store precomputed query", "error", err) @@ -2316,11 +2315,27 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime a.breakdownTokenSegment(token, opts.byNamespace) } - // write metrics + // handle metrics reporting + a.reportPrecomputedQueryMetrics(ctx, segmentTime, opts) + + // convert the maps to the proper format and write them as precomputed queries + return a.writePrecomputedQuery(ctx, segmentTime, opts) +} + +func (a *ActivityLog) reportPrecomputedQueryMetrics(ctx context.Context, segmentTime time.Time, opts pqOptions) { + if segmentTime != opts.activePeriodEnd && segmentTime != opts.activePeriodStart { + return + } + // we don't want to introduce any new namespaced metrics. For secret sync + // (and all newer client types) we'll instead keep maps of the total + summedMetricsMonthly := make(map[string]int) + summedMetricsReporting := make(map[string]int) + for nsID, entry := range opts.byNamespace { // If this is the most recent month, or the start of the reporting period, output // a metric for each namespace. - if segmentTime == opts.activePeriodEnd { + switch segmentTime { + case opts.activePeriodEnd: a.metrics.SetGaugeWithLabels( []string{"identity", "entity", "active", "monthly"}, float32(entry.Counts.countByType(entityActivityType)), @@ -2335,7 +2350,8 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, }, ) - } else if segmentTime == opts.activePeriodStart { + summedMetricsMonthly[secretSyncActivityType] += entry.Counts.countByType(secretSyncActivityType) + case opts.activePeriodStart: a.metrics.SetGaugeWithLabels( []string{"identity", "entity", "active", "reporting_period"}, float32(entry.Counts.countByType(entityActivityType)), @@ -2350,11 +2366,16 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, }, ) + summedMetricsReporting[secretSyncActivityType] += entry.Counts.countByType(secretSyncActivityType) } } - // convert the maps to the proper format and write them as precomputed queries - return a.writePrecomputedQuery(ctx, segmentTime, opts) + for clientType, count := range summedMetricsMonthly { + a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", "_"), "active", "monthly"}, float32(count)) + } + for clientType, count := range summedMetricsReporting { + a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", "_"), "active", "reporting_period"}, float32(count)) + } } // goroutine to process the request in the intent log, creating precomputed queries. diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index dc02fed1c8ee..e1eb91a85523 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "github.com/armon/go-metrics" "github.com/axiomhq/hyperloglog" "github.com/go-test/deep" "github.com/golang/protobuf/proto" @@ -4841,3 +4842,115 @@ func TestAddActivityToFragment(t *testing.T) { }) } } + +// TestActivityLog_reportPrecomputedQueryMetrics creates 3 clients per type and +// calls reportPrecomputedQueryMetrics. The test verifies that the metric sink +// gets metrics reported correctly, based on the segment time matching the +// active period start or end +func TestActivityLog_reportPrecomputedQueryMetrics(t *testing.T) { + core, _, _, metricsSink := TestCoreUnsealedWithMetrics(t) + a := core.activityLog + byMonth := make(summaryByMonth) + byNS := make(summaryByNamespace) + segmentTime := time.Now() + + // for each client type, make 3 clients in their own namespaces + for i := 0; i < 3; i++ { + for _, clientType := range []string{secretSyncActivityType, nonEntityTokenActivityType, entityActivityType} { + client := &activity.EntityRecord{ + ClientID: fmt.Sprintf("%s-%d", clientType, i), + NamespaceID: fmt.Sprintf("ns-%d", i), + MountAccessor: fmt.Sprintf("mnt-%d", i), + ClientType: clientType, + NonEntity: clientType == nonEntityTokenActivityType, + } + processClientRecord(client, byNS, byMonth, segmentTime) + } + } + endTime := timeutil.EndOfMonth(segmentTime) + opts := pqOptions{ + byNamespace: byNS, + byMonth: byMonth, + endTime: endTime, + } + + otherTime := segmentTime.Add(time.Hour) + + hasNoMetric := func(t *testing.T, intervals []*metrics.IntervalMetrics, name string) { + t.Helper() + gauges := intervals[len(intervals)-1].Gauges + for _, metric := range gauges { + if metric.Name == name { + require.Fail(t, "metric found", name) + } + } + } + hasMetric := func(t *testing.T, intervals []*metrics.IntervalMetrics, name string, value float32, namespaceLabel *string) { + t.Helper() + fullMetric := fmt.Sprintf("%s;cluster=test-cluster", name) + if namespaceLabel != nil { + fullMetric = fmt.Sprintf("%s;namespace=%s;cluster=test-cluster", name, *namespaceLabel) + } + gauges := intervals[len(intervals)-1].Gauges + require.Contains(t, gauges, fullMetric) + metric := gauges[fullMetric] + require.Equal(t, value, metric.Value) + } + + t.Run("no metrics", func(t *testing.T) { + // neither option is equal to the segment time, so no metrics should be + // reported + opts.activePeriodStart = otherTime + opts.activePeriodEnd = otherTime + a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts) + + data := metricsSink.Data() + hasNoMetric(t, data, "identity.entity.active.monthly") + hasNoMetric(t, data, "identity.nonentity.active.monthly") + hasNoMetric(t, data, "identity.secret_sync.active.monthly") + hasNoMetric(t, data, "identity.entity.active.reporting_period") + hasNoMetric(t, data, "identity.entity.active.reporting_period") + hasNoMetric(t, data, "identity.secret_sync.active.reporting_period") + }) + t.Run("monthly metric", func(t *testing.T) { + // activePeriodEnd is equal to the segment time, indicating that monthly + // metrics should be reported + opts.activePeriodEnd = segmentTime + opts.activePeriodStart = otherTime + a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts) + + data := metricsSink.Data() + // expect the metrics ending with "monthly" + // the namespace was never registered in core, so it'll be + // reported with a "deleted-" prefix + for i := 0; i < 3; i++ { + ns := fmt.Sprintf("deleted-ns-%d", i) + hasMetric(t, data, "identity.entity.active.monthly", 1, &ns) + hasMetric(t, data, "identity.nonentity.active.monthly", 1, &ns) + } + // secret sync metrics should be the sum of clients across all + // namespaces + hasMetric(t, data, "identity.secret_sync.active.monthly", 3, nil) + }) + t.Run("reporting period metric", func(t *testing.T) { + // activePeriodEnd is not equal to the segment time but activePeriodStart + // is, which indicates that metrics for the reporting period should be + // reported + opts.activePeriodEnd = otherTime + opts.activePeriodStart = segmentTime + a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts) + + data := metricsSink.Data() + // expect the metrics ending with "reporting_period" + // the namespace was never registered in core, so it'll be + // reported with a "deleted-" prefix + for i := 0; i < 3; i++ { + ns := fmt.Sprintf("deleted-ns-%d", i) + hasMetric(t, data, "identity.entity.active.reporting_period", 1, &ns) + hasMetric(t, data, "identity.nonentity.active.reporting_period", 1, &ns) + } + // secret sync metrics should be the sum of clients across all + // namespaces + hasMetric(t, data, "identity.secret_sync.active.reporting_period", 3, nil) + }) +}