Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[azure] [app_insights] Group metrics by dimensions (segments) and timestamp #36634

Merged
merged 10 commits into from
Dec 4, 2023
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ Setting environmental variable ELASTIC_NETINFO:false in Elastic Agent pod will d
- Add GCP CloudSQL metadata {pull}33066[33066]
- Add GCP Carbon Footprint metricbeat data {pull}34820[34820]
- Add event loop utilization metric to Kibana module {pull}35020[35020]
- Add metrics grouping by dimensions and time to Azure app insights {pull}36634[36634]
- Align on the algorithm used to transform Prometheus histograms into Elasticsearch histograms {pull}36647[36647]
- Enhance GCP billing with detailed tables identification, additional fields, and optimized data handling. {pull}36902[36902]
- Add a `/inputs/` route to the HTTP monitoring endpoint that exposes metrics for each metricset instance. {pull}36971[36971]
Expand Down
221 changes: 158 additions & 63 deletions x-pack/metricbeat/module/azure/app_insights/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ package app_insights
import (
"fmt"
"regexp"
"sort"
"strings"
"time"

"github.com/Azure/azure-sdk-for-go/services/preview/appinsights/v1/insights"
"github.com/Azure/go-autorest/autorest/date"
Expand Down Expand Up @@ -116,112 +118,205 @@ func isSegment(metric string) bool {
return false
}

type metricTimeKey struct {
Start time.Time
End time.Time
}

func newMetricTimeKey(start, end time.Time) metricTimeKey {
return metricTimeKey{Start: start, End: end}
}

func EventsMapping(metricValues insights.ListMetricsResultsItem, applicationId string, namespace string) []mb.Event {
var events []mb.Event
if metricValues.Value == nil {
return events
}
groupedAddProp := make(map[string][]MetricValue)

mValues := mapMetricValues(metricValues)

var segValues []MetricValue
for _, mv := range mValues {
if len(mv.Segments) == 0 {
groupedAddProp[mv.Interval] = append(groupedAddProp[mv.Interval], mv)
} else {
segValues = append(segValues, mv)
}
}
groupedByDimensions := groupMetricsByDimension(mValues)

for _, val := range groupedAddProp {
event := createNoSegEvent(val, applicationId, namespace)
for _, group := range groupedByDimensions {
event := createGroupEvent(group, newMetricTimeKey(group[0].Start.Time, group[0].End.Time), applicationId, namespace)

// Only add events that have metric values.
if len(event.MetricSetFields) > 0 {
events = append(events, event)
}
}
for _, val := range segValues {
for _, seg := range val.Segments {
lastSeg := getValue(seg)
for _, ls := range lastSeg {
events = append(events, createSegEvent(val, ls, applicationId, namespace))
return events
}

// groupMetricsByDimension groups the given metrics by their dimension keys.
func groupMetricsByDimension(metrics []MetricValue) map[string][]MetricValue {
keys := make(map[string][]MetricValue)

var stack []MetricValue
stack = append(stack, metrics...)

// Initialize default start and end times using the first metric's times
// The reason we need to use first metric's start and end times is because
// the start and end times of the child segments are not always set.
firstStart := metrics[0].Start
firstEnd := metrics[0].End

// Iterate until all metrics are processed
for len(stack) > 0 {
// Retrieve and remove the last metric from the stack
metric := stack[len(stack)-1]
stack = stack[:len(stack)-1]

// Update default times if the current metric has valid start and end times
if metric.End != nil && !metric.End.IsZero() {
firstEnd = metric.End
}
if metric.Start != nil && !metric.Start.IsZero() {
firstStart = metric.Start
}

// Generate a sorted key from the segment names to ensure consistent dimension keys
sortedSegmentsKey := getSortedKeys(metric.SegmentName)

// Construct a dimension key using the default times and sorted segment names
dimensionKey := createDimensionKey(firstStart.Unix(), firstEnd.Unix(), sortedSegmentsKey)

// If the metric has child segments, process them
// This is usually the case for segments that don't have actual metric values
if len(metric.Segments) > 0 {
for _, segment := range metric.Segments {
// Generate a sorted key from the segment names
segmentKey := getSortedKeys(segment.SegmentName)
if segmentKey != "" {
// Combine the dimension key with the segment key
combinedKey := dimensionKey + segmentKey

// Create a new metric with the combined key and add it to the map
newMetric := MetricValue{
SegmentName: segment.SegmentName,
Value: segment.Value,
Segments: segment.Segments,
Interval: segment.Interval,
Start: firstStart,
End: firstEnd,
}

keys[combinedKey] = append(keys[combinedKey], newMetric)
}
// Add the child segments to the stack for processing
stack = append(stack, segment.Segments...)
}
} else {
// If the metric has no child segments, add it to the map using the dimension key
// This is usually the case for segments that have actual metric values
if dimensionKey != "" {
metric.Start, metric.End = firstStart, firstEnd
keys[dimensionKey] = append(keys[dimensionKey], metric)
}
}
}
return events

return keys
}

func getValue(metric MetricValue) []MetricValue {
var values []MetricValue
if metric.Segments == nil {
return []MetricValue{metric}
}
for _, met := range metric.Segments {
values = append(values, getValue(met)...)
// getSortedKeys is a function that returns a string of sorted keys.
// The keys are sorted in alphabetical order.
//
// By sorting the keys, we ensure that we always get the same string for the same map,
// regardless of the order in which the keys were originally added.
//
// For example, consider the following two maps:
// map1: map[string]string{"request_url_host": "", "request_url_path": "/home"}
// map2: map[string]string{"request_url_path": "/home", "request_url_host": ""}
// Even though they represent the same data, if we were to join their keys without sorting,
// we would get different results: "request_url_hostrequest_url_path" for map1 and
// "request_url_pathrequest_url_host" for map2.
//
// By sorting the keys, we ensure that we always get "request_url_hostrequest_url_path",
// regardless of the order in which the keys were added to the map.
func getSortedKeys(m map[string]string) string {
keys := make([]string, 0, len(m))
for k, v := range m {
keys = append(keys, k+v)
}
return values
sort.Strings(keys)

return strings.Join(keys, "")
}

func createSegEvent(parentMetricValue MetricValue, metricValue MetricValue, applicationId string, namespace string) mb.Event {
// createDimensionKey is used to generate a unique key for a specific dimension.
// The dimension key is a combination of the start time, end time, and sorted segments.
//
// startTime: The start time of the metric in Unix timestamp format.
// endTime: The end time of the metric in Unix timestamp format.
// sortedSegments: A string representing sorted segments (metric names).
//
// For example: 1617225600_1617232800_request_url_hostlocalhost
func createDimensionKey(startTime, endTime int64, sortedSegments string) string {
return fmt.Sprintf("%d_%d_%s", startTime, endTime, sortedSegments)
}

func createGroupEvent(metricValue []MetricValue, metricTime metricTimeKey, applicationId, namespace string) mb.Event {
// If the metric time is zero then we don't have a valid event.
// This should never happen, it's a safety check.
if metricTime.Start.IsZero() || metricTime.End.IsZero() {
return mb.Event{}
}
Comment on lines +262 to +264
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Under which circumstances can this happen?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just a safety check. Normally, the child segments don't have their own start or end times. They rely on the parents segments for that info. Just double-checking to make sure the time info is there - it should never happen.

Example:

MetricsResult (Parent)
│
│   ├── Start: 2023-01-01 08:00
│   └── End: 2023-01-01 10:00
│
└─── Segments: MetricsSegmentInfo (First-level Child)
     │
     │   ├── Start: 2023-01-01 08:00
     │   └── End: 2023-01-01 10:00
     │
     └─── Segments: []MetricsSegmentInfo (Second-level Children) 
          │
          ├── Segment 1:
          │    │
          │    ├── AdditionalProperties: {"browserTiming/urlHost": "localhost"}
          │    │   (No specific Start/End time here)
          │    │
          │    └─── Segments: []MetricsSegmentInfo (Third-level Children)
          │         │
          │         └─── Child Segment:
          │              │
          │              └── AdditionalProperties: {"browserTiming/urlPath": "/test", "browserTimings/networkDuration": {"avg": 1.5}}
          │                  (No specific Start/End time here)
          │
           

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be a great comment to explain what the code is trying to accomplish!


metricList := mapstr.M{}
for key, metric := range metricValue.Value {
metricList.Put(key, metric)

for _, v := range metricValue {
for key, metric := range v.Value {
_, _ = metricList.Put(key, metric)
}
}

// If we don't have any metrics then we don't have a valid event.
if len(metricList) == 0 {
return mb.Event{}
}
event := createEvent(parentMetricValue.Start, parentMetricValue.End, applicationId, namespace, metricList)
if len(parentMetricValue.SegmentName) > 0 {
event.ModuleFields.Put("dimensions", parentMetricValue.SegmentName)
}
if len(metricValue.SegmentName) > 0 {
event.ModuleFields.Put("dimensions", metricValue.SegmentName)
}
return event
}

func createEvent(start *date.Time, end *date.Time, applicationId string, namespace string, metricList mapstr.M) mb.Event {
event := mb.Event{
ModuleFields: mapstr.M{
"application_id": applicationId,
},
ModuleFields: mapstr.M{"application_id": applicationId},
MetricSetFields: mapstr.M{
"start_date": start,
"end_date": end,
"start_date": metricTime.Start,
"end_date": metricTime.End,
},
Timestamp: end.Time,
Timestamp: metricTime.End,
}

event.RootFields = mapstr.M{}
event.RootFields.Put("cloud.provider", "azure")
if namespace == "" {
event.ModuleFields.Put("metrics", metricList)
} else {
for key, metric := range metricList {
event.MetricSetFields.Put(key, metric)
_, _ = event.RootFields.Put("cloud.provider", "azure")

segments := make(map[string]string)

for _, v := range metricValue {
for sn, sv := range v.SegmentName {
segments[sn] = sv
}
}
return event
}

func createNoSegEvent(values []MetricValue, applicationId string, namespace string) mb.Event {
metricList := mapstr.M{}
for _, value := range values {
for key, metric := range value.Value {
metricList.Put(key, metric)
}
if len(segments) > 0 {
_, _ = event.ModuleFields.Put("dimensions", segments)
}
if len(metricList) == 0 {
return mb.Event{}

if namespace == "" {
_, _ = event.ModuleFields.Put("metrics", metricList)
} else {
for key, metric := range metricList {
_, _ = event.MetricSetFields.Put(key, metric)
}
}
return createEvent(values[0].Start, values[0].End, applicationId, namespace, metricList)

return event
}

func getAdditionalPropMetric(addProp map[string]interface{}) map[string]interface{} {
metricNames := make(map[string]interface{})
for key, val := range addProp {
switch val.(type) {
switch v := val.(type) {
case map[string]interface{}:
for subKey, subVal := range val.(map[string]interface{}) {
for subKey, subVal := range v {
if subVal != nil {
metricNames[cleanMetricNames(fmt.Sprintf("%s.%s", key, subKey))] = subVal
}
Expand Down
Loading