From 28c42139687b9e0f37434d45b9ee1a17f3c4fd9e Mon Sep 17 00:00:00 2001 From: Piotr Zaniewski Date: Thu, 11 May 2023 15:56:40 +0200 Subject: [PATCH] Gather Prometheus metrics for one or multiple provider pods --- cmd/perf/internal/quantify.go | 73 +++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/cmd/perf/internal/quantify.go b/cmd/perf/internal/quantify.go index 3f87a17..0ea6815 100644 --- a/cmd/perf/internal/quantify.go +++ b/cmd/perf/internal/quantify.go @@ -18,6 +18,7 @@ package internal import ( "context" "fmt" + "strings" "time" "github.com/pkg/errors" @@ -34,7 +35,7 @@ import ( // QuantifyOptions represents the options of quantify command type QuantifyOptions struct { - providerPod string + providerPods []string providerNamespace string mrPaths map[string]int cmd *cobra.Command @@ -56,12 +57,12 @@ func NewCmdQuantify() *cobra.Command { "reports them. When you execute this tool an end-to-end experiment will run.", Example: "provider-scale --mrs ./internal/providerScale/manifests/virtualnetwork.yaml=2 " + "--mrs ./internal/providerScale/manifests/loadbalancer.yaml=2" + - "--provider-pod crossplane-provider-jet-azure " + + "--provider-pods crossplane-provider-jet-azure " + "--provider-namespace crossplane-system", RunE: o.Run, } - o.cmd.Flags().StringVar(&o.providerPod, "provider-pod", "", "Pod name of provider") + o.cmd.Flags().StringSliceVarP(&o.providerPods, "provider-pods", "p", []string{}, "Names of the provider pods. Multiple names can be specified, separated by commas (spaces are ignored).") o.cmd.Flags().StringVar(&o.providerNamespace, "provider-namespace", "crossplane-system", "Namespace name of provider") o.cmd.Flags().StringToIntVar(&o.mrPaths, "mrs", nil, "Managed resource templates that will be deployed") @@ -71,7 +72,7 @@ func NewCmdQuantify() *cobra.Command { o.cmd.Flags().StringVar(&o.nodeIP, "node", "", "Node IP") o.cmd.Flags().DurationVar(&o.timeout, "timeout", 120*time.Minute, "Timeout for the experiment") - if err := o.cmd.MarkFlagRequired("provider-pod"); err != nil { + if err := o.cmd.MarkFlagRequired("provider-pods"); err != nil { panic(err) } if err := o.cmd.MarkFlagRequired("mrs"); err != nil { @@ -114,28 +115,52 @@ func (o *QuantifyOptions) Run(_ *cobra.Command, _ []string) error { log.Infof("Results\n------------------------------------------------------------\n") log.Infof("Experiment Duration: %f seconds\n", o.endTime.Sub(o.startTime).Seconds()) time.Sleep(60 * time.Second) - queryResultMemory, err := o.CollectData(fmt.Sprintf(`sum(node_namespace_pod_container:container_memory_working_set_bytes{pod="%s", namespace="%s"})`, - o.providerPod, o.providerNamespace)) - if err != nil { - return errors.Wrap(err, "cannot collect memory data") - } - memoryResult, err := common.ConstructResult(queryResultMemory, "Memory", "Bytes") - if err != nil { - return errors.Wrap(err, "cannot construct memory results") - } - qureyResultCPURate, err := o.CollectData(fmt.Sprintf(`instance:node_cpu_utilisation:rate5m{instance="%s"} * 100`, o.nodeIP)) - if err != nil { - return errors.Wrap(err, "cannot collect cpu data") - } - cpuRateResult, err := common.ConstructResult(qureyResultCPURate, "CPU", "Rate") - if err != nil { - return errors.Wrap(err, "cannot construct cpu results") + // Initialize aggregated results + var aggregatedMemoryResult, aggregatedCPURateResult common.Result + + for _, providerPod := range o.providerPods { + providerPod = strings.TrimSpace(providerPod) + queryResultMemory, err := o.CollectData(fmt.Sprintf(`sum(node_namespace_pod_container:container_memory_working_set_bytes{pod="%s", namespace="%s"})`, + providerPod, o.providerNamespace)) + if err != nil { + return errors.Wrap(err, "cannot collect memory data") + } + memoryResult, err := common.ConstructResult(queryResultMemory, "Memory", "Bytes") + if err != nil { + return errors.Wrap(err, "cannot construct memory results") + } + // Update aggregated memory result + aggregatedMemoryResult.Average += memoryResult.Average + if memoryResult.Peak > aggregatedMemoryResult.Peak { + aggregatedMemoryResult.Peak = memoryResult.Peak + } + + queryResultCPURate, err := o.CollectData(fmt.Sprintf(`instance:node_cpu_utilisation:rate5m{instance="%s"} * 100`, o.nodeIP)) + if err != nil { + return errors.Wrap(err, "cannot collect cpu data") + } + cpuRateResult, err := common.ConstructResult(queryResultCPURate, "CPU", "Rate") + if err != nil { + return errors.Wrap(err, "cannot construct cpu results") + } + // Update aggregated CPU rate result + aggregatedCPURateResult.Average += cpuRateResult.Average + if cpuRateResult.Peak > aggregatedCPURateResult.Peak { + aggregatedCPURateResult.Peak = cpuRateResult.Peak + } + + for _, timeToReadinessResult := range timeToReadinessResults { + timeToReadinessResult.Print() + } + memoryResult.Print() + cpuRateResult.Print() } - for _, timeToReadinessResult := range timeToReadinessResults { - timeToReadinessResult.Print() + + if len(o.providerPods) > 1 { + log.Infof("\nAggregated Results\n------------------------------------------------------------\n") + aggregatedMemoryResult.Print() + aggregatedCPURateResult.Print() } - memoryResult.Print() - cpuRateResult.Print() return nil }