From 6a4bdd4b8f9255e0b5c82e97ad120b39b1b87e9a Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 27 Apr 2023 00:01:47 +0200 Subject: [PATCH] connectivity: Retry on inconclusive results When running the connectivity tests in AKS, we sometimes get interrupted commands that don't have any output [1]. Unfortunately, those commands then exit without any error and are therefore considered successful. We think this is caused by connectivity blips between Kubernetes components. This commit adds a check for those inconclusive results. If we see a seemingly successful command with no output, we retry it until we get something conclusive. This works because all our test commands (curl, ping, nslookup) dump something to stdout. 1 - https://github.com/cilium/cilium/issues/22162 Signed-off-by: Paul Chaignon --- connectivity/check/action.go | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/connectivity/check/action.go b/connectivity/check/action.go index 27bd7b1eec..a7135dbc47 100644 --- a/connectivity/check/action.go +++ b/connectivity/check/action.go @@ -4,6 +4,7 @@ package check import ( + "bytes" "context" "encoding/base64" "errors" @@ -28,6 +29,10 @@ import ( "github.com/cilium/cilium-cli/defaults" ) +const ( + testCommandRetries = 3 +) + // Action represents an individual action (e.g. a curl call) in a Scenario // between a source and a destination peer. type Action struct { @@ -236,13 +241,33 @@ func (a *Action) ExecInPod(ctx context.Context, cmd []string) { pod := a.src a.Debug("Executing command", cmd) - - output, err := pod.K8sClient.ExecInPod(ctx, - pod.Pod.Namespace, pod.Pod.Name, pod.Pod.Labels["name"], cmd) - cmdName := cmd[0] cmdStr := strings.Join(cmd, " ") - a.cmdOutput = output.String() + + var output bytes.Buffer + var err error + // We retry the command in case of inconclusive results. The result is + // deemed inconclusive when the command succeeded, but we don't have any + // output. We've seen this happen when there are connectivity blips on the + // k8s side. + // This check currently only works because all our test commands expect an + // output. + for i := 1; i <= testCommandRetries; i++ { + output, err = pod.K8sClient.ExecInPod(ctx, + pod.Pod.Namespace, pod.Pod.Name, pod.Pod.Labels["name"], cmd) + a.cmdOutput = output.String() + // Check for inconclusive results. + if err == nil && strings.TrimSpace(a.cmdOutput) == "" { + a.Debugf("retrying command %s due to inconclusive results", cmdStr) + continue + } + break + } + // Check for inconclusive results. + if err == nil && strings.TrimSpace(a.cmdOutput) == "" { + a.Failf("inconclusive results: command %q was successful but without output", cmdStr) + } + showOutput := false expectedExitCode := a.expectedExitCode() if err != nil {