connectivity: Add latency measurement

Add netperf based latency tests that can be triggered by running `cilium connectivity test --perf --perf-latency --perf-samples 5` Signed-off-by: Dario Mader <[email protected]>
cilium · Nov 17, 2023 · e3d1ec7 · e3d1ec7
1 parent fe21f47
commit e3d1ec7
Show file tree

Hide file tree

Showing 4 changed files with 116 additions and 40 deletions.
diff --git a/connectivity/check/check.go b/connectivity/check/check.go
@@ -46,6 +46,7 @@ type Parameters struct {
 	PerfCRR               bool
 	PerfHostNet           bool
 	PerfSamples           int
+	PerfLatency           bool
 	CurlImage             string
 	PerformanceImage      string
 	JSONMockImage         string

diff --git a/connectivity/check/context.go b/connectivity/check/context.go
@@ -99,6 +99,7 @@ type PerfResult struct {
 	Samples  int
 	Values   []float64
 	Avg      float64
+	Latency  map[string][]float64
 }
 
 func netIPToCIDRs(netIPs []netip.Addr) (netCIDRs []netip.Prefix) {
@@ -592,16 +593,35 @@ func (ct *ConnectivityTest) report() error {
 	}
 
 	if ct.params.Perf {
-		// Report Performance results
-		ct.Headerf("🔥 Performance Test Summary: ")
-		ct.Logf("%s", strings.Repeat("-", 145))
-		ct.Logf("📋 %-15s | %-50s | %-15s | %-15s | %-15s | %-15s", "Scenario", "Pod", "Test", "Num Samples", "Duration", "Avg value")
-		ct.Logf("%s", strings.Repeat("-", 145))
-		for p, d := range ct.PerfResults {
-			ct.Logf("📋 %-15s | %-50s | %-15s | %-15d | %-15s | %.2f (%s)", d.Scenario, p.Pod, p.Test, d.Samples, d.Duration, d.Avg, d.Metric)
-			ct.Debugf("Individual Values from run : %s", d.Values)
+		if ct.params.PerfLatency {
+			// Report Performance results for latency
+			ct.Header("🔥 Latency Test Summary:")
+			ct.Logf("%s", strings.Repeat("-", 233))
+			ct.Logf("📋 %-15s | %-50s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s", "Scenario", "Pod", "Test", "Num Samples", "Duration", "Min", "Mean", "Max", "P50", "P90", "P99")
+			ct.Logf("%s", strings.Repeat("-", 233))
+			for p, d := range ct.PerfResults {
+				ct.Logf("📋 %-15s | %-50s | %-15s | %-15d | %-15s | %-12.2f %s | %-12.2f %s | %-12.2f %s | %-12.2f %s | %-12.2f %s | %-12.2f %s",
+					d.Scenario, p.Pod, p.Test, d.Samples, d.Duration,
+					d.Latency["min"][0], d.Metric,
+					d.Latency["mean"][0], d.Metric,
+					d.Latency["max"][0], d.Metric,
+					d.Latency["p50"][0], d.Metric,
+					d.Latency["p90"][0], d.Metric,
+					d.Latency["p99"][0], d.Metric)
+			}
+			ct.Logf("%s", strings.Repeat("-", 233))
+		} else {
+			// Report Performance results for throughput
+			ct.Header("🔥 Performance Test Summary:")
+			ct.Logf("%s", strings.Repeat("-", 145))
+			ct.Logf("📋 %-15s | %-50s | %-15s | %-15s | %-15s | %-15s", "Scenario", "Pod", "Test", "Num Samples", "Duration", "Avg value")
+			ct.Logf("%s", strings.Repeat("-", 145))
+			for p, d := range ct.PerfResults {
+				ct.Logf("📋 %-15s | %-50s | %-15s | %-15d | %-15s | %.2f (%s)", d.Scenario, p.Pod, p.Test, d.Samples, d.Duration, d.Avg, d.Metric)
+				ct.Debugf("Individual Values from run : %s", d.Values)
+			}
+			ct.Logf("%s", strings.Repeat("-", 145))
 		}
-		ct.Logf("%s", strings.Repeat("-", 145))
 	}
 
 	ct.Headerf("✅ All %d tests (%d actions) successful, %d tests skipped, %d scenarios skipped.", nt-nst, na, nst, nss)

diff --git a/connectivity/tests/perfpod.go b/connectivity/tests/perfpod.go
@@ -31,6 +31,7 @@ type netPerfPodtoPod struct {
 }
 
 var netPerfRegex = regexp.MustCompile(`\s+\d+\s+\d+\s+(\d+|\S+)\s+(\S+|\d+)\s+(\S+)+\s+(\S+)?`)
+var netPerfRegexLatency = regexp.MustCompile(`(\d+(?:\.\d+)?),(\d+(?:\.\d+)?),(\d+(?:\.\d+)?),(\d+(?:\.\d+)?),(\d+(?:\.\d+)?),(\d+(?:\.\d+)?)`)
 
 func (s *netPerfPodtoPod) Name() string {
 	tn := "perf-pod-to-pod"
@@ -44,6 +45,7 @@ func (s *netPerfPodtoPod) Run(ctx context.Context, t *check.Test) {
 	samples := t.Context().Params().PerfSamples
 	duration := t.Context().Params().PerfDuration
 	crr := t.Context().Params().PerfCRR
+	latency := t.Context().Params().PerfLatency
 	for _, c := range t.Context().PerfClientPods() {
 		c := c
 		for _, server := range t.Context().PerfServerPod() {
@@ -57,7 +59,9 @@ func (s *netPerfPodtoPod) Run(ctx context.Context, t *check.Test) {
 			action.CollectFlows = false
 			action.Run(func(a *check.Action) {
 				if crr {
-					netperf(ctx, server.Pod.Status.PodIP, c.Pod.Name, "TCP_CRR", a, t.Context().PerfResults, 1, 30, scenarioName)
+					netperf(ctx, server.Pod.Status.PodIP, c.Pod.Name, "TCP_CRR", a, t.Context().PerfResults, samples, duration, scenarioName)
+				} else if latency {
+					netperf(ctx, server.Pod.Status.PodIP, c.Pod.Name, "TCP_RR_LATENCY", a, t.Context().PerfResults, samples, duration, scenarioName)
 				} else {
 					netperf(ctx, server.Pod.Status.PodIP, c.Pod.Name, "TCP_RR", a, t.Context().PerfResults, samples, duration, scenarioName)
 					netperf(ctx, server.Pod.Status.PodIP, c.Pod.Name, "TCP_STREAM", a, t.Context().PerfResults, samples, duration, scenarioName)
@@ -69,47 +73,97 @@ func (s *netPerfPodtoPod) Run(ctx context.Context, t *check.Test) {
 	}
 }
 
+func buildExecCommand(test string, sip string, duration time.Duration, args []string) []string {
+	exec := []string{"/usr/local/bin/netperf", "-H", sip, "-l", duration.String(), "-t", test, "--", "-R", "1", "-m", fmt.Sprintf("%d", messageSize)}
+	exec = append(exec, args...)
+
+	return exec
+}
+
 func netperf(ctx context.Context, sip string, podname string, test string, a *check.Action, result map[check.PerfTests]check.PerfResult, samples int, duration time.Duration, scenarioName string) {
 	// Define test about to be executed and from which pod
 	k := check.PerfTests{
 		Pod:  podname,
 		Test: test,
 	}
-	metric := string("OP/s")
-	if strings.Contains(test, "STREAM") {
-		metric = "Mb/s"
+
+	res := check.PerfResult{
+		Duration: duration,
+		Samples:  samples,
+		Scenario: scenarioName,
 	}
 
-	exec := []string{"/usr/local/bin/netperf", "-H", sip, "-l", duration.String(), "-t", test, "--", "-R", "1", "-m", fmt.Sprintf("%d", messageSize)}
-	//  recv socketsize		send socketsize 	msg size|okmsg	duration	value
-	values := []float64{}
-	// Result data
-	for i := 0; i < samples; i++ {
-		a.ExecInPod(ctx, exec)
-		d := netPerfRegex.FindStringSubmatch(a.CmdOutput())
-		if len(d) < 5 {
-			a.Fatal("Unable to process netperf result")
+	if strings.Contains(test, "LATENCY") {
+		test = strings.ReplaceAll(test, "_LATENCY", "")
+		k.Test = test
+		metric := string("μs")
+		latency := make(map[string][]float64)
+
+		args := []string{"-o", "min_latency,mean_latency,max_latency,P50_LATENCY,P90_LATENCY,P99_LATENCY"}
+		exec := buildExecCommand(test, sip, duration, args)
+
+		latencyMetricNames := []string{
+			"min", "mean", "max", "p50", "p90", "p99",
+		}
+
+		var latencyMetricValue float64
+		var err error
+		for i := 0; i < samples; i++ {
+			a.ExecInPod(ctx, exec)
+			d := netPerfRegexLatency.FindStringSubmatch(a.CmdOutput())
+
+			if len(d) != 7 {
+				a.Fatal("Unable to process netperf result")
+			}
+
+			for m, metric := range latencyMetricNames {
+				latencyMetricValue, err = strconv.ParseFloat(d[m+1], 64)
+				if err != nil {
+					a.Fatal(fmt.Sprintf("Unable to parse netperf result %s", metric))
+				}
+				latency[metric] = append(latency[metric], latencyMetricValue)
+			}
 		}
-		nv := ""
-		if len(d[len(d)-1]) > 0 {
-			nv = d[len(d)-1]
-		} else {
-			nv = d[len(d)-2]
+
+		for _, metric := range latencyMetricNames {
+			latency[metric] = []float64{listAvg(latency[metric])}
 		}
-		f, err := strconv.ParseFloat(nv, 64)
-		if err == nil {
-			values = append(values, f)
-		} else {
-			a.Fatal("Unable to parse netperf result")
+
+		res.Metric = metric
+		res.Latency = latency
+	} else {
+		metric := string("OP/s")
+		if strings.Contains(test, "STREAM") {
+			metric = "Mb/s"
 		}
-	}
-	res := check.PerfResult{
-		Scenario: scenarioName,
-		Metric:   metric,
-		Duration: duration,
-		Values:   values,
-		Samples:  samples,
-		Avg:      listAvg(values),
+
+		exec := buildExecCommand(test, sip, duration, []string{})
+		//  recv socketsize		send socketsize 	msg size|okmsg	duration	value
+		// Result data
+		values := []float64{}
+		for i := 0; i < samples; i++ {
+			a.ExecInPod(ctx, exec)
+			d := netPerfRegex.FindStringSubmatch(a.CmdOutput())
+			if len(d) < 5 {
+				a.Fatal("Unable to process netperf result")
+			}
+			nv := ""
+			if len(d[len(d)-1]) > 0 {
+				nv = d[len(d)-1]
+			} else {
+				nv = d[len(d)-2]
+			}
+			f, err := strconv.ParseFloat(nv, 64)
+			if err == nil {
+				values = append(values, f)
+			} else {
+				a.Fatal("Unable to parse netperf result")
+			}
+		}
+
+		res.Metric = metric
+		res.Values = values
+		res.Avg = listAvg(values)
 	}
 	result[k] = res
 }

diff --git a/internal/cli/cmd/connectivity.go b/internal/cli/cmd/connectivity.go
@@ -160,6 +160,7 @@ func newCmdConnectivityTest(hooks Hooks) *cobra.Command {
 	cmd.Flags().IntVar(&params.PerfSamples, "perf-samples", 1, "Number of Performance samples to capture (how many times to run each test)")
 	cmd.Flags().BoolVar(&params.PerfCRR, "perf-crr", false, "Run Netperf CRR Test. --perf-samples and --perf-duration ignored")
 	cmd.Flags().BoolVar(&params.PerfHostNet, "host-net", false, "Use host networking during network performance tests")
+	cmd.Flags().BoolVar(&params.PerfLatency, "perf-latency", false, "Run network latency tests")
 
 	cmd.Flags().StringVar(&params.CurlImage, "curl-image", defaults.ConnectivityCheckAlpineCurlImage, "Image path to use for curl")
 	cmd.Flags().StringVar(&params.PerformanceImage, "performance-image", defaults.ConnectivityPerformanceImage, "Image path to use for performance")