diff --git a/connectivity/check/check.go b/connectivity/check/check.go index e62a5b888a..76affa3832 100644 --- a/connectivity/check/check.go +++ b/connectivity/check/check.go @@ -64,7 +64,10 @@ type Parameters struct { JunitFile string JunitProperties map[string]string - FlushCT bool + IncludeUpgradeTest bool + UpgradeTestSetup bool + UpgradeTestResultPath string + FlushCT bool K8sVersion string HelmChartDirectory string diff --git a/connectivity/check/deployment.go b/connectivity/check/deployment.go index 39a84a37e6..a25424cd8f 100644 --- a/connectivity/check/deployment.go +++ b/connectivity/check/deployment.go @@ -49,6 +49,11 @@ const ( hostNetNSDeploymentNameNonCilium = "host-netns-non-cilium" // runs on non-Cilium test nodes kindHostNetNS = "host-netns" + migrateSvcClientDeploymentName = "migrate-svc-client" + migrateSvcServerDeploymentName = "migrate-svc-server" + migrateSvcServiceName = "migrate-svc" + KindMigrateSvc = "migrate-svc" + EchoServerHostPort = 40000 IngressServiceName = "ingress-service" @@ -584,6 +589,67 @@ func (ct *ConnectivityTest) deploy(ctx context.Context) error { return nil } + // Deploy migrate-svc-client, migrate-svc-server and migrate-svc + if ct.params.UpgradeTestSetup { + _, err = ct.clients.src.GetDeployment(ctx, ct.params.TestNamespace, migrateSvcServerDeploymentName, metav1.GetOptions{}) + if err != nil { + ct.Logf("✨ [%s] Deploying %s deployment...", ct.clients.src.ClusterName(), migrateSvcServerDeploymentName) + migrateSvcServerDeployment := newDeployment(deploymentParameters{ + Name: migrateSvcServerDeploymentName, + Kind: KindMigrateSvc, + Image: "docker.io/cilium/migrate-svc-test:v0.0.2", + Replicas: 3, + Labels: map[string]string{"app": "migrate-svc-server"}, + Command: []string{"/server", "8000"}, + Port: 8000, + }) + _, err = ct.clients.src.CreateServiceAccount(ctx, ct.params.TestNamespace, k8s.NewServiceAccount(migrateSvcServerDeploymentName), metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("unable to create service account %s: %w", migrateSvcServerDeploymentName, err) + } + _, err = ct.clients.src.CreateDeployment(ctx, ct.params.TestNamespace, migrateSvcServerDeployment, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("unable to create deployment %s: %w", migrateSvcServerDeployment, err) + } + } + + _, err = ct.clients.src.GetService(ctx, ct.params.TestNamespace, migrateSvcServiceName, metav1.GetOptions{}) + if err != nil { + ct.Logf("✨ [%s] Deploying %s service...", ct.clients.src.ClusterName(), migrateSvcServiceName) + svc := newService(migrateSvcServiceName, map[string]string{"app": "migrate-svc-server"}, nil, "http", 8000) + _, err = ct.clients.src.CreateService(ctx, ct.params.TestNamespace, svc, metav1.CreateOptions{}) + if err != nil { + return err + } + } + + _, err = ct.clients.src.GetDeployment(ctx, ct.params.TestNamespace, migrateSvcClientDeploymentName, metav1.GetOptions{}) + if err != nil { + ct.Logf("✨ [%s] Deploying %s deployment...", ct.clients.src.ClusterName(), migrateSvcClientDeploymentName) + migrateSvcClientDeployment := newDeployment(deploymentParameters{ + Name: migrateSvcClientDeploymentName, + Kind: KindMigrateSvc, + Image: "docker.io/cilium/migrate-svc-test:v0.0.2", + Replicas: 5, + Labels: map[string]string{"app": "migrate-svc-client"}, + Port: 8000, + Command: []string{ + "/client", + fmt.Sprintf("migrate-svc.%s.svc.cluster.local.:8000", ct.params.TestNamespace), + }, + }) + + _, err = ct.clients.src.CreateServiceAccount(ctx, ct.params.TestNamespace, k8s.NewServiceAccount(migrateSvcClientDeploymentName), metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("unable to create service account %s: %w", migrateSvcClientDeploymentName, err) + } + _, err = ct.clients.src.CreateDeployment(ctx, ct.params.TestNamespace, migrateSvcClientDeployment, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("unable to create deployment %s: %w", migrateSvcClientDeployment, err) + } + } + } + if ct.params.MultiCluster != "" { if ct.params.ForceDeploy { if err := ct.deleteDeployments(ctx, ct.clients.dst); err != nil { @@ -950,6 +1016,11 @@ func (ct *ConnectivityTest) deploymentList() (srcList []string, dstList []string } } + if ct.params.IncludeUpgradeTest { + srcList = append(srcList, migrateSvcClientDeploymentName) + dstList = append(dstList, migrateSvcServerDeploymentName) + } + if (ct.params.MultiCluster != "" || !ct.params.SingleNode) && !ct.params.Perf { dstList = append(dstList, echoOtherNodeDeploymentName) } @@ -967,10 +1038,14 @@ func (ct *ConnectivityTest) deleteDeployments(ctx context.Context, client *k8s.C _ = client.DeleteDeployment(ctx, ct.params.TestNamespace, echoOtherNodeDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteDeployment(ctx, ct.params.TestNamespace, clientDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteDeployment(ctx, ct.params.TestNamespace, client2DeploymentName, metav1.DeleteOptions{}) + _ = client.DeleteDeployment(ctx, ct.params.TestNamespace, migrateSvcClientDeploymentName, metav1.DeleteOptions{}) + _ = client.DeleteDeployment(ctx, ct.params.TestNamespace, migrateSvcServerDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteServiceAccount(ctx, ct.params.TestNamespace, echoSameNodeDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteServiceAccount(ctx, ct.params.TestNamespace, echoOtherNodeDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteServiceAccount(ctx, ct.params.TestNamespace, clientDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteServiceAccount(ctx, ct.params.TestNamespace, client2DeploymentName, metav1.DeleteOptions{}) + _ = client.DeleteServiceAccount(ctx, ct.params.TestNamespace, migrateSvcClientDeploymentName, metav1.DeleteOptions{}) + _ = client.DeleteServiceAccount(ctx, ct.params.TestNamespace, migrateSvcServerDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteService(ctx, ct.params.TestNamespace, echoSameNodeDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteService(ctx, ct.params.TestNamespace, echoOtherNodeDeploymentName, metav1.DeleteOptions{}) _ = client.DeleteConfigMap(ctx, ct.params.TestNamespace, corednsConfigMapName, metav1.DeleteOptions{}) diff --git a/connectivity/suite.go b/connectivity/suite.go index 6cf16830dd..34d2bd6e5f 100644 --- a/connectivity/suite.go +++ b/connectivity/suite.go @@ -205,6 +205,20 @@ func Run(ctx context.Context, ct *check.ConnectivityTest, addExtraTests func(*ch return ct.Run(ctx) } + // Upgrade Test + if ct.Params().IncludeUpgradeTest { + ct.NewTest("post-upgrade").WithScenarios( + tests.NoInterruptedConnections(), + ) + + if ct.Params().UpgradeTestSetup { + // Exit early, as --upgrade-setup is only needed to deploy pods which + // will be used by another invocation of "cli connectivity test" (with + // include --include-upgrade-tests" + return ct.Run(ctx) + } + } + // Run all tests without any policies in place. noPoliciesScenarios := []check.Scenario{ tests.PodToPod(), diff --git a/connectivity/tests/upgrade.go b/connectivity/tests/upgrade.go new file mode 100644 index 0000000000..0ee50dc87b --- /dev/null +++ b/connectivity/tests/upgrade.go @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Cilium + +package tests + +import ( + "context" + gojson "encoding/json" + "os" + "strconv" + + "github.com/cilium/cilium-cli/connectivity/check" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NoInterruptedConnections checks whether there are no interruptions in +// long-lived E/W LB connections. The test case is used to validate Cilium +// upgrades. +// +// The test case consists of three steps: +// +// 1. Deploying pods and a service which establish the long-lived connections +// (done by "--upgrade-test-setup"). The client pods ("migrate-svc-client") +// establish connections via ClusterIP ("migrate-svc") to server pods +// ("migrate-svc-server"). As there former pods come first before the latter, +// the former pods can crash which increases the pod restart counter. The step +// is responsible for storing the restart counter too. +// 2. Do Cilium upgrade. +// 3. Run the test ("--include-upgrade-test"). The test checks the restart +// counters, and compares them against the previously stored ones. A mismatch +// indicates that a connection was interrupted. +func NoInterruptedConnections() check.Scenario { + return &noInterruptedConnections{} +} + +type noInterruptedConnections struct{} + +func (n *noInterruptedConnections) Name() string { + return "no-interrupted-connections" +} + +func (n *noInterruptedConnections) Run(ctx context.Context, t *check.Test) { + ct := t.Context() + + client := ct.K8sClient() + pods, err := client.ListPods(ctx, ct.Params().TestNamespace, metav1.ListOptions{LabelSelector: "kind=" + check.KindMigrateSvc}) + if err != nil { + t.Fatalf("Unable to list migrate-svc pods: %s", err) + } + if len(pods.Items) == 0 { + t.Fatal("No migrate-svc-{client,server} pods found") + } + + restartCount := make(map[string]string) + for _, pod := range pods.Items { + restartCount[pod.GetObjectMeta().GetName()] = strconv.Itoa(int(pod.Status.ContainerStatuses[0].RestartCount)) + } + + // Only store restart counters which will be used later when running the same + // test case, but w/o --upgrade-test-setup. + if ct.Params().UpgradeTestSetup { + file, err := os.Create(ct.Params().UpgradeTestResultPath) + if err != nil { + t.Fatalf("Failed to create %q file for writing upgrade test temp results: %s", + ct.Params().UpgradeTestResultPath, err) + } + defer file.Close() + + counts := make(map[string]string) + for pod, count := range restartCount { + counts[pod] = count + } + j, err := gojson.Marshal(counts) + if err != nil { + t.Fatalf("Failed to marshal JSON: %s", err) + } + + if _, err := file.Write(j); err != nil { + t.Fatalf("Failed to write upgrade test temp result into file: %s", err) + } + + return + } + + b, err := os.ReadFile(ct.Params().UpgradeTestResultPath) + if err != nil { + t.Fatalf("Failed to read upgrade test result files: %s", err) + } + prevRestartCount := make(map[string]string) + if err := gojson.Unmarshal(b, &prevRestartCount); err != nil { + t.Fatalf("Failed to unmarshal JSON test result file: %s", err) + } + + for pod, count := range restartCount { + if prevCount, found := prevRestartCount[pod]; !found { + t.Fatalf("Could not found Pod %s restart count", pod) + } else if prevCount != count { + t.Fatalf("Pod %s flow was interrupted (restart count does not match %s != %s)", + pod, prevCount, count) + } + } +} diff --git a/internal/cli/cmd/connectivity.go b/internal/cli/cmd/connectivity.go index ab1d4d7930..18e54f3ad5 100644 --- a/internal/cli/cmd/connectivity.go +++ b/internal/cli/cmd/connectivity.go @@ -172,6 +172,9 @@ func newCmdConnectivityTest(hooks Hooks) *cobra.Command { initSysdumpFlags(cmd, ¶ms.SysdumpOptions, "sysdump-", hooks) + cmd.Flags().BoolVar(¶ms.IncludeUpgradeTest, "include-upgrade-test", false, "Include upgrade test") + cmd.Flags().BoolVar(¶ms.UpgradeTestSetup, "upgrade-test-setup", false, "Set up upgrade test dependencies") + cmd.Flags().StringVar(¶ms.UpgradeTestResultPath, "upgrade-test-result-path", "/tmp/cilium-upgrade-test-restart-counts", "Upgrade test temporary result file (used internally)") cmd.Flags().BoolVar(¶ms.FlushCT, "flush-ct", false, "Flush conntrack of Cilium on each node") hooks.AddConnectivityTestFlags(cmd.Flags())