autoscaler e2e test

kubernetes-sigs · Apr 12, 2023 · debff0a · debff0a
1 parent 4f60841
commit debff0a
Show file tree

Hide file tree

Showing 13 changed files with 1,233 additions and 5 deletions.
diff --git a/Makefile b/Makefile
@@ -506,6 +506,7 @@ generate-e2e-templates-v1.4: $(KUSTOMIZE)
 .PHONY: generate-e2e-templates-main
 generate-e2e-templates-main: $(KUSTOMIZE)
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template.yaml
+	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-autoscaler --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-autoscaler.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-md-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-md-remediation.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-remediation.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption/step1 --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml
@@ -519,6 +520,7 @@ generate-e2e-templates-main: $(KUSTOMIZE)
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-ipv6 --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-ipv6.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-single-node-cluster --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-single-node-cluster.yaml
+	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-autoscaler --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-autoscaler.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-ignition --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-ignition.yaml
 

diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go
@@ -0,0 +1,248 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/pointer"
+
+	"sigs.k8s.io/cluster-api/test/framework"
+	"sigs.k8s.io/cluster-api/test/framework/clusterctl"
+	"sigs.k8s.io/cluster-api/util"
+)
+
+// AutoscalerSpecInput is the input for AutoscalerSpec.
+type AutoscalerSpecInput struct {
+	E2EConfig             *clusterctl.E2EConfig
+	ClusterctlConfigPath  string
+	BootstrapClusterProxy framework.ClusterProxy
+	ArtifactFolder        string
+	SkipCleanup           bool
+	ControlPlaneWaiters   clusterctl.ControlPlaneWaiters
+
+	// Flavor, if specified is the template flavor to be used for this test.
+	// Note:
+	//  - the file creating the service account to be used by the autoscaler when connecting to the management cluster
+	//    - must be named "autoscaler-to-workload-management.yaml"
+	//    - must deploy objects in the $CLUSTER_NAMESPACE
+	//    - must create a service account with name "cluster-$CLUSTER_NAME" and the RBAC rules required to work.
+	//    - must create a secret with name "cluster-$CLUSTER_NAME-token" and type "kubernetes.io/service-account-token".
+	//  - the file creating the autoscaler deployment in the workload cluster
+	//    - must be named "autoscaler-to-workload-workload.yaml"
+	//    - must deploy objects in the cluster-autoscaler-system namespace
+	//    - must create a deployment named "cluster-autoscaler"
+	//    - must run the autoscaler with --cloud-provider=clusterapi,
+	//      --node-group-auto-discovery=clusterapi:namespace=${CLUSTER_NAMESPACE},clusterName=${CLUSTER_NAME}
+	//      and --cloud-config pointing to a kubeconfig to connect to the management cluster
+	//      using the token above.
+	//    - could use following vars to build the management cluster kubeconfig:
+	//      $MANAGEMENT_CLUSTER_TOKEN, $MANAGEMENT_CLUSTER_CA, $MANAGEMENT_CLUSTER_ADDRESS
+	Flavor                 *string
+	InfrastructureProvider string
+	AutoscalerVersion      string
+}
+
+// AutoscalerSpec implements a test for the autoscaler, and more specifically for the autoscaler
+// being deployed in the workload cluster.
+func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) {
+	var (
+		specName         = "autoscaler"
+		input            AutoscalerSpecInput
+		namespace        *corev1.Namespace
+		cancelWatches    context.CancelFunc
+		clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
+	)
+
+	BeforeEach(func() {
+		Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName)
+		input = inputGetter()
+		Expect(input.E2EConfig).ToNot(BeNil(), "Invalid argument. input.E2EConfig can't be nil when calling %s spec", specName)
+		Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling %s spec", specName)
+		Expect(input.BootstrapClusterProxy).ToNot(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", specName)
+		Expect(input.InfrastructureProvider).ToNot(BeNil(), "Invalid argument. input.InfrastructureProvider can't be empty when calling %s spec", specName)
+		Expect(input.AutoscalerVersion).ToNot(BeNil(), "Invalid argument. input.AutoscalerVersion can't be empty when calling %s spec", specName)
+		Expect(os.MkdirAll(input.ArtifactFolder, 0750)).To(Succeed(), "Invalid argument. input.ArtifactFolder can't be created for %s spec", specName)
+
+		Expect(input.E2EConfig.Variables).To(HaveKey(KubernetesVersion))
+
+		// Setup a Namespace where to host objects for this spec and create a watcher for the namespace events.
+		namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder)
+		clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult)
+	})
+
+	It("Should create a workload cluster", func() {
+		By("Creating a workload cluster")
+
+		flavor := clusterctl.DefaultFlavor
+		if input.Flavor != nil {
+			flavor = *input.Flavor
+		}
+
+		clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{
+			ClusterProxy: input.BootstrapClusterProxy,
+			ConfigCluster: clusterctl.ConfigClusterInput{
+				LogFolder:                filepath.Join(input.ArtifactFolder, "clusters", input.BootstrapClusterProxy.GetName()),
+				ClusterctlConfigPath:     input.ClusterctlConfigPath,
+				KubeconfigPath:           input.BootstrapClusterProxy.GetKubeconfigPath(),
+				InfrastructureProvider:   input.InfrastructureProvider,
+				Flavor:                   flavor,
+				Namespace:                namespace.Name,
+				ClusterName:              fmt.Sprintf("%s-%s", specName, util.RandomString(6)),
+				KubernetesVersion:        input.E2EConfig.GetVariable(KubernetesVersion),
+				ControlPlaneMachineCount: pointer.Int64(1),
+				WorkerMachineCount:       pointer.Int64(1),
+			},
+			ControlPlaneWaiters:          input.ControlPlaneWaiters,
+			WaitForClusterIntervals:      input.E2EConfig.GetIntervals(specName, "wait-cluster"),
+			WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"),
+			WaitForMachineDeployments:    input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
+		}, clusterResources)
+
+		var nodeGroupMinSize, nodeGroupMaxSize string
+		managedCluster := clusterResources.Cluster.Spec.Topology != nil
+		if managedCluster {
+			// Ensure the MachineDeploymentTopology has the autoscaler annotations.
+			mdTopology := clusterResources.Cluster.Spec.Topology.Workers.MachineDeployments[0]
+			Expect(mdTopology.Metadata.Annotations).NotTo(BeNil(), "MachineDeployment is expected to have autoscaler annotations")
+			var ok bool
+			nodeGroupMinSize, ok = mdTopology.Metadata.Annotations[framework.AutoscalerMinSize]
+			Expect(ok).To(BeTrue(), fmt.Sprintf("MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, framework.AutoscalerMinSize))
+			nodeGroupMaxSize, ok = mdTopology.Metadata.Annotations[framework.AutoscalerMaxSize]
+			Expect(ok).To(BeTrue(), fmt.Sprintf("MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, framework.AutoscalerMaxSize))
+		} else {
+			// Ensure the MachineDeployment has the autoscaler annotations.
+			md := clusterResources.MachineDeployments[0]
+			Expect(md.Annotations).NotTo(BeNil(), "MachineDeployment is expected to have autoscaler annotations")
+			var ok bool
+			nodeGroupMinSize, ok = md.Annotations[framework.AutoscalerMinSize]
+			Expect(ok).To(BeTrue(), fmt.Sprintf("MachineDeployment %s does not have the %q autoscaler annotation", md.Name, framework.AutoscalerMinSize))
+			nodeGroupMaxSize, ok = md.Annotations[framework.AutoscalerMaxSize]
+			Expect(ok).To(BeTrue(), fmt.Sprintf("MachineDeployment %s does not have the %q autoscaler annotation", md.Name, framework.AutoscalerMaxSize))
+		}
+
+		// Get a ClusterProxy so we can interact with the workload cluster
+		workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, clusterResources.Cluster.Namespace, clusterResources.Cluster.Name)
+		originalReplicas := *clusterResources.MachineDeployments[0].Spec.Replicas
+
+		By("Installing the autoscaler on the workload cluster")
+		infrastructureProviderVersions := input.E2EConfig.GetProviderVersions(input.InfrastructureProvider)
+		latestProviderVersion := infrastructureProviderVersions[len(infrastructureProviderVersions)-1]
+
+		framework.ApplyAutoscalerToWorkloadCluster(ctx, framework.ApplyAutoscalerToWorkloadClusterInput{
+			ArtifactFolder:         input.ArtifactFolder,
+			InfrastructureProvider: input.InfrastructureProvider,
+			LatestProviderVersion:  latestProviderVersion,
+			ManagementClusterProxy: input.BootstrapClusterProxy,
+			WorkloadClusterProxy:   workloadClusterProxy,
+			Cluster:                clusterResources.Cluster,
+			AutoscalerVersion:      input.AutoscalerVersion,
+		}, input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers")...)
+
+		By("Creating workload that forces the system to scale up")
+		framework.AddScaleUpDeploymentAndWait(ctx, framework.AddScaleUpDeploymentAndWaitInput{
+			ClusterProxy: workloadClusterProxy,
+		}, input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-autoscaler")...)
+
+		By("Checking the MachineDeployment is scaled up")
+		scaledUpReplicas := originalReplicas + 1
+		framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{
+			Getter:                   input.BootstrapClusterProxy.GetClient(),
+			MachineDeployment:        clusterResources.MachineDeployments[0],
+			Replicas:                 scaledUpReplicas,
+			WaitForMachineDeployment: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-autoscaler"),
+		})
+
+		By("Disable autoscaler on the MachineDeployment")
+		if managedCluster {
+			framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineDeploymentTopologyAndWaitInput{
+				ClusterProxy:                  input.BootstrapClusterProxy,
+				Cluster:                       clusterResources.Cluster,
+				WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers"),
+			})
+		} else {
+			framework.DisableAutoscalerForMachineDeploymentAndWait(ctx, framework.DisableAutoscalerForMachineDeploymentAndWaitInput{
+				ClusterProxy:                  input.BootstrapClusterProxy,
+				MachineDeployment:             clusterResources.MachineDeployments[0],
+				WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers"),
+			})
+		}
+
+		By("Checking we can manually scale up the MachineDeployment")
+		// Scale up the MachineDeployment. Since autoscaler is disabled we should be able to do this.
+		excessReplicas := scaledUpReplicas + 1
+		if managedCluster {
+			framework.ScaleAndWaitMachineDeploymentTopology(ctx, framework.ScaleAndWaitMachineDeploymentTopologyInput{
+				ClusterProxy:              input.BootstrapClusterProxy,
+				Cluster:                   clusterResources.Cluster,
+				Replicas:                  excessReplicas,
+				WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
+			})
+		} else {
+			framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{
+				ClusterProxy:              input.BootstrapClusterProxy,
+				Cluster:                   clusterResources.Cluster,
+				MachineDeployment:         clusterResources.MachineDeployments[0],
+				Replicas:                  excessReplicas,
+				WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
+			})
+		}
+
+		By("Checking enabling autoscaler will scale down the MachineDeployment to correct size")
+		// Enable autoscaler on the MachineDeployment.
+		if managedCluster {
+			framework.EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.EnableAutoscalerForMachineDeploymentTopologyAndWaitInput{
+				ClusterProxy:                input.BootstrapClusterProxy,
+				Cluster:                     clusterResources.Cluster,
+				NodeGroupMinSize:            nodeGroupMinSize,
+				NodeGroupMaxSize:            nodeGroupMaxSize,
+				WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-autoscaler"),
+			})
+		} else {
+			framework.EnableAutoscalerForMachineDeploymentAndWait(ctx, framework.EnableAutoscalerForMachineDeploymentAndWaitInput{
+				ClusterProxy:                input.BootstrapClusterProxy,
+				MachineDeployment:           clusterResources.MachineDeployments[0],
+				NodeGroupMinSize:            nodeGroupMinSize,
+				NodeGroupMaxSize:            nodeGroupMaxSize,
+				WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers"),
+			})
+		}
+
+		By("Checking the MachineDeployment is scaled down")
+		// Since we scaled up the MachineDeployment manually and the workload has not changed auto scaler
+		// should detect that there are unneeded nodes and scale down the MachineDeployment.
+		framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{
+			Getter:                   input.BootstrapClusterProxy.GetClient(),
+			MachineDeployment:        clusterResources.MachineDeployments[0],
+			Replicas:                 scaledUpReplicas,
+			WaitForMachineDeployment: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers"),
+		})
+
+		By("PASSED!")
+	})
+
+	AfterEach(func() {
+		// Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself.
+		dumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup)
+	})
+}
diff --git a/test/e2e/autoscaler_test.go b/test/e2e/autoscaler_test.go
@@ -0,0 +1,55 @@
+//go:build e2e
+// +build e2e
+
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	. "github.com/onsi/ginkgo/v2"
+	"k8s.io/utils/pointer"
+)
+
+var _ = Describe("When using the autoscaler with Cluster API [Autoscaler]", func() {
+	AutoscalerSpec(ctx, func() AutoscalerSpecInput {
+		return AutoscalerSpecInput{
+			E2EConfig:              e2eConfig,
+			ClusterctlConfigPath:   clusterctlConfigPath,
+			BootstrapClusterProxy:  bootstrapClusterProxy,
+			ArtifactFolder:         artifactFolder,
+			SkipCleanup:            skipCleanup,
+			InfrastructureProvider: "docker",
+			Flavor:                 pointer.String("autoscaler"),
+			AutoscalerVersion:      "v1.26.1",
+		}
+	})
+})
+
+var _ = Describe("When using the autoscaler with Cluster API using ClusterClass [ClusterClass] [Autoscaler]", func() {
+	AutoscalerSpec(ctx, func() AutoscalerSpecInput {
+		return AutoscalerSpecInput{
+			E2EConfig:              e2eConfig,
+			ClusterctlConfigPath:   clusterctlConfigPath,
+			BootstrapClusterProxy:  bootstrapClusterProxy,
+			ArtifactFolder:         artifactFolder,
+			SkipCleanup:            skipCleanup,
+			InfrastructureProvider: "docker",
+			Flavor:                 pointer.String("topology-autoscaler"),
+			AutoscalerVersion:      "v1.26.1",
+		}
+	})
+})
diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml
@@ -293,6 +293,7 @@ providers:
     files:
     # Add cluster templates
     - sourcePath: "../data/infrastructure-docker/main/cluster-template.yaml"
+    - sourcePath: "../data/infrastructure-docker/main/cluster-template-autoscaler.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-md-remediation.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-remediation.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-adoption.yaml"
@@ -304,11 +305,16 @@ providers:
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-scale-in.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-ipv6.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-single-node-cluster.yaml"
+    - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-autoscaler.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-ignition.yaml"
     - sourcePath: "../data/infrastructure-docker/main/clusterclass-quick-start.yaml"
     - sourcePath: "../data/infrastructure-docker/main/clusterclass-quick-start-runtimesdk.yaml"
     - sourcePath: "../data/shared/main/metadata.yaml"
+    # Add the autoscaler deployment.
+    # Note: we are adding the autoscaler deployment here because the test assumes it exists in the latest version of the kubemark provider.
+    - sourcePath: "../data/autoscaler/autoscaler-to-workload-management.yaml"
+    - sourcePath: "../data/autoscaler/autoscaler-to-workload-workload.yaml"
 
 - name: test-extension
   type: RuntimeExtensionProvider
@@ -357,6 +363,7 @@ intervals:
   default/wait-machine-pool-upgrade: ["5m", "10s"]
   default/wait-nodes-ready: ["10m", "10s"]
   default/wait-machine-remediation: ["5m", "10s"]
+  default/wait-autoscaler: ["5m", "10s"]
   node-drain/wait-deployment-available: ["3m", "10s"]
   node-drain/wait-control-plane: ["15m", "10s"]
   node-drain/wait-machine-deleted: ["2m", "10s"]