Fix bug: too many cloudwatch metrics

Cloudwatch metrics were being added incorrectly. The most obvious symptom of this was that too many metrics were being added. A simple check against the name of the metric proved to be a sufficient fix. In order to test the fix, a metric selection function was factored out.
influxdata · Oct 11, 2016 · d838221 · d838221
1 parent e96f7a9
commit d838221
Show file tree

Hide file tree

Showing 3 changed files with 108 additions and 13 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -39,6 +39,7 @@ continue sending logs to /var/log/telegraf/telegraf.log.
 
 ### Bugfixes
 
+- [#1885](https://github.com/influxdata/telegraf/pull/1885): Fix over-querying of cloudwatch metrics
 - [#1746](https://github.com/influxdata/telegraf/issues/1746): Fix handling of non-string values for JSON keys listed in tag_keys.
 - [#1628](https://github.com/influxdata/telegraf/issues/1628): Fix mongodb input panic on version 2.2.
 - [#1733](https://github.com/influxdata/telegraf/issues/1733): Fix statsd scientific notation parsing

diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go
@@ -119,11 +119,7 @@ func (c *CloudWatch) Description() string {
 	return "Pull Metric Statistics from Amazon CloudWatch"
 }
 
-func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
-	if c.client == nil {
-		c.initializeCloudWatch()
-	}
-
+func SelectMetrics (c *CloudWatch) ([]*cloudwatch.Metric, error) {
 	var metrics []*cloudwatch.Metric
 
 	// check for provided metric filter
@@ -149,11 +145,11 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
 			} else {
 				allMetrics, err := c.fetchNamespaceMetrics()
 				if err != nil {
-					return err
+					return nil, err
 				}
 				for _, name := range m.MetricNames {
 					for _, metric := range allMetrics {
-						if isSelected(metric, m.Dimensions) {
+						if name == *metric.MetricName && isSelected(metric, m.Dimensions) {
 							metrics = append(metrics, &cloudwatch.Metric{
 								Namespace:  aws.String(c.Namespace),
 								MetricName: aws.String(name),
@@ -163,16 +159,26 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
 					}
 				}
 			}
-
 		}
 	} else {
 		var err error
 		metrics, err = c.fetchNamespaceMetrics()
 		if err != nil {
-			return err
+			return nil, err
 		}
 	}
+	return metrics, nil
+}
 
+func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
+	if c.client == nil {
+		c.initializeCloudWatch()
+	}
+
+	metrics, err := SelectMetrics(c)
+	if err != nil {
+		return err
+	}
 	metricCount := len(metrics)
 	errChan := errchan.New(metricCount)
 

diff --git a/plugins/inputs/cloudwatch/cloudwatch_test.go b/plugins/inputs/cloudwatch/cloudwatch_test.go
@@ -11,9 +11,9 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-type mockCloudWatchClient struct{}
+type mockGatherCloudWatchClient struct{}
 
-func (m *mockCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) {
+func (m *mockGatherCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) {
 	metric := &cloudwatch.Metric{
 		Namespace:  params.Namespace,
 		MetricName: aws.String("Latency"),
@@ -31,7 +31,7 @@ func (m *mockCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput)
 	return result, nil
 }
 
-func (m *mockCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
+func (m *mockGatherCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
 	dataPoint := &cloudwatch.Datapoint{
 		Timestamp:   params.EndTime,
 		Minimum:     aws.Float64(0.1),
@@ -62,7 +62,7 @@ func TestGather(t *testing.T) {
 	}
 
 	var acc testutil.Accumulator
-	c.client = &mockCloudWatchClient{}
+	c.client = &mockGatherCloudWatchClient{}
 
 	c.Gather(&acc)
 
@@ -83,6 +83,94 @@ func TestGather(t *testing.T) {
 
 }
 
+type mockSelectMetricsCloudWatchClient struct{}
+
+func (m *mockSelectMetricsCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) {
+	metrics := []*cloudwatch.Metric{}
+	// 4 metrics are available
+	metricNames := []string { "Latency", "RequestCount", "HealthyHostCount", "UnHealthyHostCount" }
+	// for 3 ELBs
+	loadBalancers := []string { "lb-1", "lb-2", "lb-3" }
+	// in 2 AZs
+	availabilityZones := []string { "us-east-1a", "us-east-1b" }
+	for _, m := range metricNames {
+		for _, lb := range loadBalancers {
+			// For each metric/ELB pair, we get an aggregate value across all AZs.
+			metrics = append(metrics, &cloudwatch.Metric {
+				Namespace: aws.String("AWS/ELB"),
+				MetricName: aws.String(m),
+				Dimensions: []*cloudwatch.Dimension {
+					&cloudwatch.Dimension {
+						Name: aws.String("LoadBalancerName"),
+						Value: aws.String(lb),
+					},
+				},
+			})
+			for _, az := range availabilityZones {
+				// We get a metric for each metric/ELB/AZ triplet.
+				metrics = append(metrics, &cloudwatch.Metric {
+					Namespace: aws.String("AWS/ELB"),
+					MetricName: aws.String(m),
+					Dimensions: []*cloudwatch.Dimension {
+						&cloudwatch.Dimension {
+							Name: aws.String("LoadBalancerName"),
+							Value: aws.String(lb),
+						},
+						&cloudwatch.Dimension {
+							Name: aws.String("AvailabilityZone"),
+							Value: aws.String(az),
+						},
+					},
+				})
+			}
+		}
+	}
+
+	result := &cloudwatch.ListMetricsOutput{
+		Metrics: metrics,
+	}
+	return result, nil
+}
+
+func (m *mockSelectMetricsCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
+	return nil, nil
+}
+
+func TestSelectMetrics(t *testing.T) {
+	duration, _ := time.ParseDuration("1m")
+	internalDuration := internal.Duration{
+		Duration: duration,
+	}
+	c := &CloudWatch{
+		Region:    "us-east-1",
+		Namespace: "AWS/ELB",
+		Delay:     internalDuration,
+		Period:    internalDuration,
+		RateLimit: 10,
+		Metrics: []*Metric{
+			&Metric {
+				MetricNames: []string { "Latency", "RequestCount" },
+				Dimensions: []*Dimension {
+					&Dimension {
+						Name: "LoadBalancerName",
+						Value: "*",
+					},
+					&Dimension {
+						Name: "AvailabilityZone",
+						Value: "*",
+					},
+				},
+			},
+		},
+	}
+	c.client = &mockSelectMetricsCloudWatchClient{}
+	metrics, err := SelectMetrics(c)
+	// We've asked for 2 (out of 4) metrics, over all 3 load balancers in all 2
+	// AZs. We should get 12 metrics.
+	assert.Equal(t, 12, len(metrics))
+	assert.Nil(t, err)
+}
+
 func TestGenerateStatisticsInputParams(t *testing.T) {
 	d := &cloudwatch.Dimension{
 		Name:  aws.String("LoadBalancerName"),