From 8eb44fd991be76e17272daf21a32c1c50c4da629 Mon Sep 17 00:00:00 2001 From: Gabriel Pop Date: Thu, 21 Sep 2023 00:38:45 +0300 Subject: [PATCH] add grouping --- .../module/azure/app_insights/data.go | 168 ++++- .../module/azure/app_insights/data_test.go | 577 ++++++++++++++++++ 2 files changed, 732 insertions(+), 13 deletions(-) diff --git a/x-pack/metricbeat/module/azure/app_insights/data.go b/x-pack/metricbeat/module/azure/app_insights/data.go index f45f5eaed23..5157b9440c1 100644 --- a/x-pack/metricbeat/module/azure/app_insights/data.go +++ b/x-pack/metricbeat/module/azure/app_insights/data.go @@ -7,7 +7,9 @@ package app_insights import ( "fmt" "regexp" + "sort" "strings" + "time" "github.com/Azure/azure-sdk-for-go/services/preview/appinsights/v1/insights" "github.com/Azure/go-autorest/autorest/date" @@ -116,6 +118,21 @@ func isSegment(metric string) bool { return false } +type metricTimeKey struct { + Start time.Time + End time.Time +} + +func newMetricTimeKey( + start time.Time, + end time.Time, +) metricTimeKey { + return metricTimeKey{ + Start: start, + End: end, + } +} + func EventsMapping(metricValues insights.ListMetricsResultsItem, applicationId string, namespace string) []mb.Event { var events []mb.Event if metricValues.Value == nil { @@ -139,26 +156,97 @@ func EventsMapping(metricValues insights.ListMetricsResultsItem, applicationId s events = append(events, event) } } - for _, val := range segValues { - for _, seg := range val.Segments { - lastSeg := getValue(seg) - for _, ls := range lastSeg { - events = append(events, createSegEvent(val, ls, applicationId, namespace)) - } + + groupedByDimensions := groupMetricsByDimension(mValues) + + for _, group := range groupedByDimensions { + groupedByTime := groupMetricsByTime(group) + + for ts, group := range groupedByTime { + events = append(events, createGroupEvent(group, ts, applicationId, namespace)) } } return events } -func getValue(metric MetricValue) []MetricValue { - var values []MetricValue - if metric.Segments == nil { - return []MetricValue{metric} +// groupMetricsByTime groups metrics by their start and end times truncated to the second. +func groupMetricsByTime(metrics []MetricValue) map[metricTimeKey][]MetricValue { + result := make(map[metricTimeKey][]MetricValue) + + for _, metric := range metrics { + // The start and end times are truncated to the nearest second. + // This is done to ensure that metrics that fall within the same second are grouped together, even if their actual times are slightly different. + timeKey := newMetricTimeKey(metric.Start.Time.Truncate(time.Second), metric.End.Time.Truncate(time.Second)) + result[timeKey] = append(result[timeKey], metric) } - for _, met := range metric.Segments { - values = append(values, getValue(met)...) + + return result +} + +// groupMetricsByDimension groups the given metrics by their dimension keys. +func groupMetricsByDimension(metrics []MetricValue) map[string][]MetricValue { + keys := make(map[string][]MetricValue) + var firstStart, firstEnd *date.Time + + var helper func(metrics []MetricValue) + helper = func(metrics []MetricValue) { + for _, metric := range metrics { + dimensionKey := getSortedKeys(metric.SegmentName) + + if metric.Start != nil && !metric.Start.IsZero() { + firstStart = metric.Start + } + + if metric.End != nil && !metric.End.IsZero() { + firstEnd = metric.End + } + + if len(metric.Segments) > 0 { + for _, segment := range metric.Segments { + segmentKey := getSortedKeys(segment.SegmentName) + if segmentKey != "" { + combinedKey := dimensionKey + segmentKey + + newMetric := MetricValue{ + SegmentName: segment.SegmentName, + Value: segment.Value, + Segments: segment.Segments, + Interval: segment.Interval, + Start: firstStart, + End: firstEnd, + } + + keys[combinedKey] = append(keys[combinedKey], newMetric) + } + } + + for _, segment := range metric.Segments { + helper(segment.Segments) + } + } else if dimensionKey != "" { + m := metric + m.Start = firstStart + m.End = firstEnd + + keys[dimensionKey] = append(keys[dimensionKey], m) + } + } + } + + helper(metrics) + + return keys +} + +// getSortedKeys returns a string of sorted keys. +// The keys are sorted in alphabetical order. +func getSortedKeys(m map[string]string) string { + keys := make([]string, 0, len(m)) + for k, v := range m { + keys = append(keys, fmt.Sprintf("%s%s", k, v)) } - return values + sort.Strings(keys) + return strings.Join(keys, "") } func createSegEvent(parentMetricValue MetricValue, metricValue MetricValue, applicationId string, namespace string) mb.Event { @@ -179,6 +267,60 @@ func createSegEvent(parentMetricValue MetricValue, metricValue MetricValue, appl return event } +func createGroupEvent(metricValue []MetricValue, metricTime metricTimeKey, applicationId string, namespace string) mb.Event { + metricList := mapstr.M{} + + if metricTime.Start.IsZero() || metricTime.End.IsZero() { + return mb.Event{} + } + + for _, v := range metricValue { + for key, metric := range v.Value { + _, _ = metricList.Put(key, metric) + } + } + + if len(metricList) == 0 { + return mb.Event{} + } + + event := mb.Event{ + ModuleFields: mapstr.M{ + "application_id": applicationId, + }, + MetricSetFields: mapstr.M{ + "start_date": metricTime.Start, + "end_date": metricTime.End, + }, + Timestamp: metricTime.End, + } + + event.RootFields = mapstr.M{} + _, _ = event.RootFields.Put("cloud.provider", "azure") + + segments := make(map[string]string) + + for _, v := range metricValue { + for sn, sv := range v.SegmentName { + segments[sn] = sv + } + } + + if len(segments) > 0 { + _, _ = event.ModuleFields.Put("dimensions", segments) + } + + if namespace == "" { + _, _ = event.ModuleFields.Put("metrics", metricList) + } else { + for key, metric := range metricList { + _, _ = event.MetricSetFields.Put(key, metric) + } + } + + return event +} + func createEvent(start *date.Time, end *date.Time, applicationId string, namespace string, metricList mapstr.M) mb.Event { event := mb.Event{ ModuleFields: mapstr.M{ diff --git a/x-pack/metricbeat/module/azure/app_insights/data_test.go b/x-pack/metricbeat/module/azure/app_insights/data_test.go index 56e723f078e..b4b797ecf00 100644 --- a/x-pack/metricbeat/module/azure/app_insights/data_test.go +++ b/x-pack/metricbeat/module/azure/app_insights/data_test.go @@ -6,6 +6,9 @@ package app_insights import ( "testing" + "time" + + "github.com/elastic/beats/v7/metricbeat/mb" "github.com/Azure/azure-sdk-for-go/services/preview/appinsights/v1/insights" "github.com/Azure/go-autorest/autorest/date" @@ -14,6 +17,313 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +func newMetricsTest( + timestamp1 *date.Time, + timestamp2 *date.Time, + timestamp3 *date.Time, +) []MetricValue { + return []MetricValue{ + { + SegmentName: map[string]string{}, + Value: map[string]interface{}{}, + Segments: []MetricValue{ + { + SegmentName: map[string]string{}, + Value: map[string]interface{}{}, + Segments: []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "", + }, + Value: map[string]interface{}{ + "users_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: nil, + End: nil, + }, + }, + Interval: "", + Start: nil, + End: nil, + }, + }, + Interval: "P5M", + Start: timestamp1, + End: timestamp1, + }, + { + SegmentName: map[string]string{}, + Value: map[string]interface{}{}, + Segments: []MetricValue{ + { + SegmentName: map[string]string{}, + Value: map[string]interface{}{}, + Segments: []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "", + }, + Value: map[string]interface{}{ + "sessions_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: nil, + End: nil, + }, + }, + Interval: "", + Start: nil, + End: nil, + }, + }, + Interval: "P5M", + Start: timestamp2, + End: timestamp2, + }, + { + SegmentName: map[string]string{}, + Value: map[string]interface{}{}, + Segments: []MetricValue{ + { + SegmentName: map[string]string{}, + Value: map[string]interface{}{}, + Segments: []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "localhost", + }, + Value: map[string]interface{}{ + "sessions_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: nil, + End: nil, + }, + }, + Interval: "", + Start: nil, + End: nil, + }, + }, + Interval: "P5M", + Start: timestamp3, + End: timestamp3, + }, + } +} + +func TestGroupMetrics(t *testing.T) { + t.Run("two dimensions groups with same timestamps", func(t *testing.T) { + timestamp1 := &date.Time{Time: time.Now()} + timestamp2 := &date.Time{Time: time.Now()} + timestamp3 := &date.Time{Time: time.Now()} + + metrics := newMetricsTest(timestamp1, timestamp2, timestamp3) + + expectedGroup1 := []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "", + }, + Value: map[string]interface{}{ + "users_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp1, + End: timestamp1, + }, + { + SegmentName: map[string]string{ + "request_url_host": "", + }, + Value: map[string]interface{}{ + "sessions_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp2, + End: timestamp2, + }, + } + + expectedGroup2 := []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "localhost", + }, + Value: map[string]interface{}{ + "sessions_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp3, + End: timestamp3, + }, + } + + groupedByDimensions := groupMetricsByDimension(metrics) + assert.Len(t, groupedByDimensions, 2) + + dimensionsGroup1, ok := groupedByDimensions["request_url_host"] + assert.True(t, ok) + assert.Len(t, dimensionsGroup1, 2) // 2 metrics + assert.ElementsMatch(t, dimensionsGroup1, expectedGroup1) + + dimensionsGroup2, ok := groupedByDimensions["request_url_hostlocalhost"] + assert.True(t, ok) + assert.Len(t, dimensionsGroup2, 1) // 1 metric + assert.ElementsMatch(t, dimensionsGroup2, expectedGroup2) + + groupedByTime1 := groupMetricsByTime(dimensionsGroup1) + assert.Len(t, groupedByTime1, 1) // same timestamps, 1 group + timeGroup1, ok := groupedByTime1[newMetricTimeKey(timestamp1.Time.Truncate(time.Second), timestamp1.Time.Truncate(time.Second))] + assert.True(t, ok) + assert.Len(t, timeGroup1, 2) // 2 metrics + assert.ElementsMatch(t, timeGroup1, expectedGroup1) + + groupedByTime2 := groupMetricsByTime(dimensionsGroup2) + assert.Len(t, groupedByTime2, 1) // same timestamps, 1 group + timeGroup1, ok = groupedByTime2[newMetricTimeKey(timestamp1.Time.Truncate(time.Second), timestamp1.Time.Truncate(time.Second))] + assert.True(t, ok) + assert.Len(t, timeGroup1, 1) // 1 metric + assert.ElementsMatch(t, timeGroup1, expectedGroup2) + }) + + t.Run("two dimensions groups with different timestamps", func(t *testing.T) { + timestamp1 := &date.Time{Time: time.Now()} + timestamp2 := &date.Time{Time: time.Now().Add(time.Minute)} + timestamp3 := &date.Time{Time: time.Now().Add(2 * time.Minute)} + + metrics := newMetricsTest(timestamp1, timestamp2, timestamp3) + + expectedDimensionsGroup1 := []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "", + }, + Value: map[string]interface{}{ + "users_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp1, + End: timestamp1, + }, + { + SegmentName: map[string]string{ + "request_url_host": "", + }, + Value: map[string]interface{}{ + "sessions_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp2, + End: timestamp2, + }, + } + + expectedDimensionsGroup2 := []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "localhost", + }, + Value: map[string]interface{}{ + "sessions_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp3, + End: timestamp3, + }, + } + + expectedTimeGroup1 := []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "", + }, + Value: map[string]interface{}{ + "users_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp1, + End: timestamp1, + }, + } + + expectedTimeGroup2 := []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "", + }, + Value: map[string]interface{}{ + "sessions_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp2, + End: timestamp2, + }, + } + + expectedTimeGroup3 := []MetricValue{ + { + SegmentName: map[string]string{ + "request_url_host": "localhost", + }, + Value: map[string]interface{}{ + "sessions_count.unique": 44, + }, + Segments: nil, + Interval: "", + Start: timestamp3, + End: timestamp3, + }, + } + + groupedByDimensions := groupMetricsByDimension(metrics) + assert.Len(t, groupedByDimensions, 2) + + dimensionsGroup1, ok := groupedByDimensions["request_url_host"] + assert.True(t, ok) + assert.Len(t, dimensionsGroup1, 2) // 2 metrics + assert.ElementsMatch(t, dimensionsGroup1, expectedDimensionsGroup1) + + dimensionsGroup2, ok := groupedByDimensions["request_url_hostlocalhost"] + assert.True(t, ok) + assert.Len(t, dimensionsGroup2, 1) // 1 metric + assert.ElementsMatch(t, dimensionsGroup2, expectedDimensionsGroup2) + + groupedByTime1 := groupMetricsByTime(dimensionsGroup1) + assert.Len(t, groupedByTime1, 2) // different timestamps, 2 group + + timeGroup1, ok := groupedByTime1[newMetricTimeKey(timestamp1.Time.Truncate(time.Second), timestamp1.Time.Truncate(time.Second))] + assert.True(t, ok) + assert.Len(t, timeGroup1, 1) // 1 metric + assert.ElementsMatch(t, timeGroup1, expectedTimeGroup1) + + timeGroup2, ok := groupedByTime1[newMetricTimeKey(timestamp2.Time.Truncate(time.Second), timestamp2.Time.Truncate(time.Second))] + assert.True(t, ok) + assert.Len(t, timeGroup1, 1) // 1 metric + assert.ElementsMatch(t, timeGroup2, expectedTimeGroup2) + + groupedByTime2 := groupMetricsByTime(dimensionsGroup2) + assert.Len(t, groupedByTime2, 1) // different timestamps, 2 group + + timeGroup1, ok = groupedByTime2[newMetricTimeKey(timestamp3.Time.Truncate(time.Second), timestamp3.Time.Truncate(time.Second))] + assert.True(t, ok) + assert.Len(t, timeGroup1, 1) // 1 metric + assert.ElementsMatch(t, timeGroup1, expectedTimeGroup3) + }) +} + func TestEventMapping(t *testing.T) { startDate := date.Time{} id := "123" @@ -57,6 +367,273 @@ func TestEventMapping(t *testing.T) { } +func TestEventMappingGrouping(t *testing.T) { + start, err := time.Parse("2006-01-02T15:04:05Z", "2023-09-20T18:08:31Z") + assert.NoError(t, err) + + end, err := time.Parse("2006-01-02T15:04:05Z", "2023-09-20T18:09:31Z") + assert.NoError(t, err) + + interval := "P152D" + results := []insights.MetricsResultsItem{ + { + Body: &insights.MetricsResult{ + Value: &insights.MetricsResultInfo{ + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Interval: &interval, + Segments: &[]insights.MetricsSegmentInfo{ + { + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "request/urlHost": "", + "users/count": map[string]interface{}{"unique": 1.0}, + }, + }, + }, + }, + }, + }, + }, + }, + { + Body: &insights.MetricsResult{ + Value: &insights.MetricsResultInfo{ + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Interval: &interval, + Segments: &[]insights.MetricsSegmentInfo{ + { + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "sessions/count": map[string]interface{}{"unique": 1.0}, + "request/urlHost": "", + }, + }, + }, + }, + }, + }, + }, + }, + { + Body: &insights.MetricsResult{ + Value: &insights.MetricsResultInfo{ + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Interval: &interval, + Segments: &[]insights.MetricsSegmentInfo{ + { + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTiming/urlHost": "localhost", + }, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTiming/urlPath": "/test", + "browserTimings/networkDuration": map[string]interface{}{"avg": 1.5}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + Body: &insights.MetricsResult{ + Value: &insights.MetricsResultInfo{ + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Interval: &interval, + Segments: &[]insights.MetricsSegmentInfo{ + { + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTiming/urlHost": "localhost", + }, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTimings/sendDuration": map[string]interface{}{"avg": 1.25}, + "browserTiming/urlPath": "/test", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + Body: &insights.MetricsResult{ + Value: &insights.MetricsResultInfo{ + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Interval: &interval, + Segments: &[]insights.MetricsSegmentInfo{ + { + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTiming/urlHost": "localhost", + }, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTimings/receiveDuration": map[string]interface{}{"avg": 0.0}, + "browserTiming/urlPath": "/test", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + Body: &insights.MetricsResult{ + Value: &insights.MetricsResultInfo{ + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Interval: &interval, + Segments: &[]insights.MetricsSegmentInfo{ + { + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTiming/urlHost": "localhost", + }, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTimings/processingDuration": map[string]interface{}{"avg": 18.25}, + "browserTiming/urlPath": "/test", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + Body: &insights.MetricsResult{ + Value: &insights.MetricsResultInfo{ + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Interval: &interval, + Segments: &[]insights.MetricsSegmentInfo{ + { + Start: &date.Time{Time: start}, + End: &date.Time{Time: end}, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTiming/urlHost": "localhost", + }, + Segments: &[]insights.MetricsSegmentInfo{ + { + AdditionalProperties: map[string]interface{}{ + "browserTimings/totalDuration": map[string]interface{}{"avg": 22}, + "browserTiming/urlPath": "/test", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + + result := insights.ListMetricsResultsItem{ + Value: &results, + } + + expectedEvents := []mb.Event{ + { + RootFields: mapstr.M{ + "cloud": mapstr.M{ + "provider": "azure", + }, + }, + ModuleFields: mapstr.M{ + "application_id": "2c944c0d-5231-43bb-a59a-dba54894c8d9", + "dimensions": map[string]string{ + "browser_timing_url_path": "/test", + "browser_timing_url_host": "localhost", + }, + "metrics": mapstr.M{ + "browser_timings_network_duration": mapstr.M{"avg": 1.5}, + "browser_timings_send_duration": mapstr.M{"avg": 1.25}, + "browser_timings_receive_duration": mapstr.M{"avg": 0.0}, + "browser_timings_processing_duration": mapstr.M{"avg": 18.25}, + "browser_timings_total_duration": mapstr.M{"avg": 22}, + }, + }, + MetricSetFields: mapstr.M{ + "start_date": start, + "end_date": end, + }, + Timestamp: end, + }, + { + RootFields: mapstr.M{ + "cloud": mapstr.M{ + "provider": "azure", + }, + }, + ModuleFields: mapstr.M{ + "application_id": "2c944c0d-5231-43bb-a59a-dba54894c8d9", + "dimensions": map[string]string{ + "request_url_host": "", + }, + "metrics": mapstr.M{ + "users_count": mapstr.M{"unique": 1.0}, + "sessions_count": mapstr.M{"unique": 1.0}, + }, + }, + MetricSetFields: mapstr.M{ + "start_date": start, + "end_date": end, + }, + Timestamp: end, + }, + } + + events := EventsMapping(result, "2c944c0d-5231-43bb-a59a-dba54894c8d9", "") + assert.Equal(t, len(events), 2) + assert.ElementsMatch(t, expectedEvents, events) +} + func TestCleanMetricNames(t *testing.T) { ex := "customDimensions/ExecutingAssemblyFileVersion" result := cleanMetricNames(ex)