connectivity: Retry on inconclusive results

When running the connectivity tests in AKS, we sometimes get interrupted commands that don't have any output [1]. Unfortunately, those commands then exit without any error and are therefore considered successful. We think this is caused by connectivity blips between Kubernetes components. This commit adds a check for those inconclusive results. If we see a seemingly successful command with no output, we retry it until we get something conclusive. This works because all our test commands (curl, ping, nslookup) dump something to stdout. 1 - cilium/cilium#22162 Signed-off-by: Paul Chaignon <[email protected]>
cilium · Apr 28, 2023 · 927d788 · 927d788
1 parent 7154a5d
commit 927d788
Showing 1 changed file with 30 additions and 5 deletions.
diff --git a/connectivity/check/action.go b/connectivity/check/action.go
@@ -4,6 +4,7 @@
 package check
 
 import (
+	"bytes"
 	"context"
 	"encoding/base64"
 	"errors"
@@ -28,6 +29,10 @@ import (
 	"github.com/cilium/cilium-cli/defaults"
 )
 
+const (
+	testCommandRetries = 3
+)
+
 // Action represents an individual action (e.g. a curl call) in a Scenario
 // between a source and a destination peer.
 type Action struct {
@@ -236,13 +241,33 @@ func (a *Action) ExecInPod(ctx context.Context, cmd []string) {
 	pod := a.src
 
 	a.Debug("Executing command", cmd)
-
-	output, err := pod.K8sClient.ExecInPod(ctx,
-		pod.Pod.Namespace, pod.Pod.Name, pod.Pod.Labels["name"], cmd)
-
 	cmdName := cmd[0]
 	cmdStr := strings.Join(cmd, " ")
-	a.cmdOutput = output.String()
+
+	var output bytes.Buffer
+	var err error
+	// We retry the command in case of inconclusive results. The result is
+	// deemed inconclusive when the command succeeded, but we don't have any
+	// output. We've seen this happen when there are connectivity blips on the
+	// k8s side.
+	// This check currently only works because all our test commands expect an
+	// output.
+	for i := 1; i <= testCommandRetries; i++ {
+		output, err = pod.K8sClient.ExecInPod(ctx,
+			pod.Pod.Namespace, pod.Pod.Name, pod.Pod.Labels["name"], cmd)
+		a.cmdOutput = output.String()
+		// Check for inconclusive results.
+		if err == nil && strings.TrimSpace(a.cmdOutput) == "" {
+			a.Debugf("retrying command %s due to inconclusive results", cmdStr)
+			continue
+		}
+		break
+	}
+	// Check for inconclusive results.
+	if err == nil && strings.TrimSpace(a.cmdOutput) == "" {
+		a.Failf("inconclusive results: command %q was successful but without output", cmdStr)
+	}
+
 	showOutput := false
 	expectedExitCode := a.expectedExitCode()
 	if err != nil {