Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test CPU metric values greater than zero #609

Merged
merged 6 commits into from
Oct 13, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions integration/generator/test_case_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ var osToTestDirMap = map[string][]string{
"./integration/test/ca_bundle",
"./integration/test/cloudwatchlogs",
"./integration/test/metrics_number_dimension",
"./integration/test/metric_value_benchmark",
},
"ec2_performance":{
"ec2_performance": {
"./integration/test/performancetest",
},
// @TODO add real tests
Expand All @@ -48,14 +49,14 @@ func main() {

func genMatrix(targetOS string, testDirList []string) []map[string]string {
openTestMatrix, err := os.Open(fmt.Sprintf("integration/generator/resources/%v_test_matrix.json", targetOS))

if err != nil {
log.Panicf("can't read file %v_test_matrix.json err %v", targetOS, err)
}

byteValueTestMatrix, _ := ioutil.ReadAll(openTestMatrix)
_ = openTestMatrix.Close()

var testMatrix []map[string]string
err = json.Unmarshal(byteValueTestMatrix, &testMatrix)
if err != nil {
Expand Down
68 changes: 68 additions & 0 deletions integration/test/metric/cpu.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT

//go:build linux && integration
// +build linux,integration

package metric

import (
"log"

"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
"github.com/aws/aws-sdk-go/aws"
)

type CPUMetricValueFetcher struct {
baseMetricValueFetcher
}

func (f *CPUMetricValueFetcher) Fetch(namespace string, metricName string, stat Statistics) ([]float64, error) {
dimensions := f.getMetricSpecificDimensions()
values, err := f.fetch(namespace, dimensions, metricName, stat)
if err != nil {
log.Printf("Error while fetching metric value for %v: %v", metricName, err.Error())
}
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
return values, err
}

var cpuSupportedMetricValues = map[string]struct{}{
"cpu_time_active": {},
"cpu_time_guest": {},
"cpu_time_guest_nice": {},
"cpu_time_idle": {},
"cpu_time_iowait": {},
"cpu_time_irq": {},
"cpu_time_nice": {},
"cpu_time_softirq": {},
"cpu_time_steal": {},
"cpu_time_system": {},
"cpu_time_user": {},
"cpu_usage_active": {},
"cpu_usage_quest": {},
"cpu_usage_quest_nice": {},
"cpu_usage_idle": {},
"cpu_usage_iowait": {},
"cpu_usage_irq": {},
"cpu_usage_nice": {},
"cpu_usage_softirq": {},
"cpu_usage_steal": {},
"cpu_usage_system": {},
"cpu_usage_user": {},
}

func (f *CPUMetricValueFetcher) isApplicable(metricName string) bool {
_, exists := cpuSupportedMetricValues[metricName]
return exists
}

func (f *CPUMetricValueFetcher) getMetricSpecificDimensions() []types.Dimension {
cpuDimension := types.Dimension{
Name: aws.String("cpu"),
Value: aws.String("cpu-total"),
}
dimensions := make([]types.Dimension, 1)
dimensions[0] = cpuDimension
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can still be simplified. I resolved the other comment cause I think I derailed it, but this could be made into a top level var, or at least just

return []types.Dimension{
    {
        Name: aws.String("cpu"),
        Value: aws.String("cpu-total"),
    },
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed. I think I missed that part


return dimensions
}
97 changes: 97 additions & 0 deletions integration/test/metric/metric_value_query.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT

//go:build linux && integration
// +build linux,integration

package metric

import (
"fmt"
"github.com/aws/amazon-cloudwatch-agent/integration/test"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
"github.com/aws/aws-sdk-go/aws"
"log"
"time"
)

var metricValueFetchers = []MetricValueFetcher{
&CPUMetricValueFetcher{},
}

func GetMetricFetcher(metricName string) (MetricValueFetcher, error) {
for _, fetcher := range metricValueFetchers {
if fetcher.isApplicable(metricName) {
return fetcher, nil
}
}
err := fmt.Errorf("No metric fetcher for metricName %v", metricName)
log.Printf("%s", err)
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
return nil, err
}

type MetricValueFetcher interface {
Fetch(namespace string, metricName string, stat Statistics) ([]float64, error)
fetch(namespace string, metricSpecificDimensions []types.Dimension, metricName string, stat Statistics) ([]float64, error)
isApplicable(metricName string) bool
getMetricSpecificDimensions() []types.Dimension
}

type baseMetricValueFetcher struct{}

func (f *baseMetricValueFetcher) fetch(namespace string, metricSpecificDimensions []types.Dimension, metricName string, stat Statistics) ([]float64, error) {
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
ec2InstanceId := test.GetInstanceId()
instanceIdDimension := types.Dimension{
Name: aws.String("InstanceId"),
Value: aws.String(ec2InstanceId),
}
dimensions := append(metricSpecificDimensions, instanceIdDimension)
metricToFetch := types.Metric{
Namespace: aws.String(namespace),
MetricName: aws.String(metricName),
Dimensions: dimensions,
}

metricQueryPeriod := int32(60)
metricQuery := types.MetricDataQuery{
MetricStat: &types.MetricStat{
Metric: &metricToFetch,
Period: &metricQueryPeriod,
Stat: aws.String(string(stat)),
},
Id: aws.String(metricName),
}
metricDataQueries := make([]types.MetricDataQuery, 1)
metricDataQueries[0] = metricQuery
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, can simplify this so we don't make an array and then set a value in it, we can just define the slice literal, like the above example for the CPU dimensions comment.


endTime := time.Now()
startTime := subtractMinutes(endTime, 10)
getMetricDataInput := cloudwatch.GetMetricDataInput{
StartTime: &startTime,
EndTime: &endTime,
MetricDataQueries: metricDataQueries,
}

log.Printf("Metric data input is : %s", fmt.Sprint(getMetricDataInput))

cwmClient, clientContext, err := test.GetCloudWatchMetricsClient()
if err != nil {
return nil, fmt.Errorf("Error occurred while creating CloudWatch client: %v", err.Error())
}

output, err := cwmClient.GetMetricData(*clientContext, &getMetricDataInput)
if err != nil {
return nil, fmt.Errorf("Error getting metric data %v", err)
}

result := output.MetricDataResults[0].Values
log.Printf("Metric Value is : %s", fmt.Sprint(result))

return result, nil
}

func subtractMinutes(fromTime time.Time, minutes int) time.Time {
tenMinutes := time.Duration(-1*minutes) * time.Minute
return fromTime.Add(tenMinutes)
}
23 changes: 23 additions & 0 deletions integration/test/metric/query-json.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
{
"Id": "m1",
"MetricStat": {
"Metric": {
"Namespace": "MetricValueBenchmarkTest",
"MetricName": "cpu_usage_active",
"Dimensions": [
{
"Name": "InstanceId",
"Value": "i-095d623fa10a192e3"
},
{
"Name": "cpu",
"Value": "cpu-total"
}
]
},
"Period": 60,
"Stat": "Average"
}
}
]
7 changes: 7 additions & 0 deletions integration/test/metric/stat.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package metric

type Statistics string

const (
AVERAGE Statistics = "Average"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"agent": {
"metrics_collection_interval": 60,
"run_as_user": "root",
"debug": true,
"logfile": ""
},
"metrics": {
"namespace": "MetricValueBenchmarkTest",
"append_dimensions": {
"InstanceId": "${aws:InstanceId}"
},
"metrics_collected": {
"cpu": {
"measurement": [
"time_active", "time_guest", "time_guest_nice", "time_idle", "time_iowait", "time_irq",
"time_nice", "time_softirq", "time_steal", "time_system", "time_user",
"usage_active", "usage_guest", "usage_guest_nice", "usage_idle", "usage_iowait", "usage_irq",
"usage_nice", "usage_softirq", "usage_steal", "usage_system", "usage_user"
]
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT

//go:build linux && integration
// +build linux,integration

package metric_value_benchmark

import (
"fmt"
"log"
"testing"
"text/tabwriter"
"time"

"github.com/aws/amazon-cloudwatch-agent/integration/test"
"github.com/aws/amazon-cloudwatch-agent/integration/test/metric"
"github.com/aws/amazon-cloudwatch-agent/integration/test/status"
)

const configOutputPath = "/opt/aws/amazon-cloudwatch-agent/bin/config.json"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style:

I am sort of surprised go formatter didn't automatically do this:

const (
    configOutputPath = "/opt/aws/amazon-cloudwatch-agent/bin/config.json"
    const configJSON = "/base_config.json"
    namespace = "MetricValueBenchmarkTest" instanceId = "InstanceId"
    minimumAgentRuntime = 3 * time.Minute
)

const configJSON = "/base_config.json"

const namespace = "MetricValueBenchmarkTest"
const instanceId = "InstanceId"

const minimumAgentRuntime = 3 * time.Minute
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why 3 minutes? The explanation could go in a code comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the variable name should be sufficient? It was minimum I had to run it for

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

( I don't like comments if I can avoid)


func TestCPUValue(t *testing.T) {
log.Printf("testing cpu value...")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: Use Println() instead of Printf() and save the planet.


resourcePath := "agent_configs"

log.Printf("resource file location %s", resourcePath)

t.Run(fmt.Sprintf("resource file location %s ", resourcePath), func(t *testing.T) {
test.CopyFile(resourcePath+configJSON, configOutputPath)
err := test.StartAgent(configOutputPath, false)

if err != nil {
t.Fatalf("Agent could not start")
}

time.Sleep(minimumAgentRuntime)
Copy link
Contributor

@adam-mateen adam-mateen Oct 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems you are sleeping a fixed amount of time and then asserting you can fetch metrics.
When there is only a few test cases and we don't care about test time this is fine.

But as the number of test cases grow it becomes more beneficial to poll instead of waiting a fixed amount of time. e.g.

loop for 3 minutes:
    sleep 1 second
    fetch metrics
    if all metrics successfully fetched:
        return
end loop
assert.False("time out waiting for metrics")

I've definitely written tests like this, so no need to fix it now.
Just something to consider in the future.

log.Printf("Agent has been running for : %s", minimumAgentRuntime.String())
test.StopAgent()

testResult := validateCpuMetrics()
testSuiteStatus := getTestSuiteStatus(testResult)
printTestResult(testSuiteStatus, testResult)

if testSuiteStatus == status.FAILED {
t.Fatalf("Cpu test failed to validate that every metric value is greater than zero")
}
})

// TODO: Get CPU value > 0
// TODO: Range test with >0 and <100
// TODO: Range test: which metric to get? api reference check. should I get average or test every single datapoint for 10 minutes? (and if 90%> of them are in range, we are good)
}

var metricsToFetch = []string{
"cpu_time_active", "cpu_time_guest", "cpu_time_guest_nice", "cpu_time_idle", "cpu_time_iowait", "cpu_time_irq",
"cpu_time_nice", "cpu_time_softirq", "cpu_time_steal", "cpu_time_system", "cpu_time_user",
"cpu_usage_active", "cpu_usage_quest", "cpu_usage_quest_nice", "cpu_usage_idle", "cpu_usage_iowait",
"cpu_usage_irq", "cpu_usage_nice", "cpu_usage_softirq", "cpu_usage_steal", "cpu_usage_system", "cpu_usage_user"}

func validateCpuMetrics() map[string]status.TestStatus {
validationResult := map[string]status.TestStatus{}
for _, metricName := range metricsToFetch {
validationResult[metricName] = status.FAILED

fetcher, err := metric.GetMetricFetcher(metricName)
if err != nil {
continue
}

values, err := fetcher.Fetch(namespace, metricName, metric.AVERAGE)
if err != nil {
continue
}

if !isAllValuesGreaterThanZero(metricName, values) {
continue
}

validationResult[metricName] = status.SUCCESSFUL
}
return validationResult
}

func isAllValuesGreaterThanZero(metricName string, values []float64) bool {
if len(values) == 0 {
log.Printf("No values found %v", metricName)
return false
}
for _, value := range values {
if value <= 0 {
log.Printf("Values are not all greater than zero for %v", metricName)
return false
}
}
log.Printf("Values are all greater than zero for %v", metricName)
return true
}

func printTestResult(testSuiteStatus status.TestStatus, testSummary map[string]status.TestStatus) {
testSuite := "CPU Test"

log.Printf("Finished %v", testSuite)
log.Printf("==============%v==============", testSuite)
log.Printf("==============%v==============", string(testSuiteStatus))
w := tabwriter.NewWriter(log.Writer(), 1, 1, 1, ' ', 0)
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
for metricName, status := range testSummary {
fmt.Fprintln(w, metricName, "\t", status, "\t")
}
w.Flush()
SaxyPandaBear marked this conversation as resolved.
Show resolved Hide resolved
log.Printf("==============================")
}

func getTestSuiteStatus(testSummary map[string]status.TestStatus) status.TestStatus {
isAllSuccessful := status.SUCCESSFUL
for _, value := range testSummary {
if value == status.FAILED {
isAllSuccessful = status.FAILED
break
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, we could just return status.FAILED here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is... true.. what did I do 🙈

}
}
return isAllSuccessful
}
Loading