diff --git a/Makefile b/Makefile index 3333ad7fbec6..2139245203fb 100644 --- a/Makefile +++ b/Makefile @@ -506,6 +506,7 @@ generate-e2e-templates-v1.4: $(KUSTOMIZE) .PHONY: generate-e2e-templates-main generate-e2e-templates-main: $(KUSTOMIZE) $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template.yaml + $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-autoscaler --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-autoscaler.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-md-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-md-remediation.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-remediation.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption/step1 --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml @@ -519,6 +520,7 @@ generate-e2e-templates-main: $(KUSTOMIZE) $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-ipv6 --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-ipv6.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-single-node-cluster --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-single-node-cluster.yaml + $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-autoscaler --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-autoscaler.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-ignition --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-ignition.yaml diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go new file mode 100644 index 000000000000..4f4f7fcbe334 --- /dev/null +++ b/test/e2e/autoscaler.go @@ -0,0 +1,248 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "os" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/pointer" + + "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" +) + +// AutoscalerSpecInput is the input for AutoscalerSpec. +type AutoscalerSpecInput struct { + E2EConfig *clusterctl.E2EConfig + ClusterctlConfigPath string + BootstrapClusterProxy framework.ClusterProxy + ArtifactFolder string + SkipCleanup bool + ControlPlaneWaiters clusterctl.ControlPlaneWaiters + + // Flavor, if specified is the template flavor to be used for this test. + // Note: + // - the file creating the service account to be used by the autoscaler when connecting to the management cluster + // - must be named "autoscaler-to-workload-management.yaml" + // - must deploy objects in the $CLUSTER_NAMESPACE + // - must create a service account with name "cluster-$CLUSTER_NAME" and the RBAC rules required to work. + // - must create a secret with name "cluster-$CLUSTER_NAME-token" and type "kubernetes.io/service-account-token". + // - the file creating the autoscaler deployment in the workload cluster + // - must be named "autoscaler-to-workload-workload.yaml" + // - must deploy objects in the cluster-autoscaler-system namespace + // - must create a deployment named "cluster-autoscaler" + // - must run the autoscaler with --cloud-provider=clusterapi, + // --node-group-auto-discovery=clusterapi:namespace=${CLUSTER_NAMESPACE},clusterName=${CLUSTER_NAME} + // and --cloud-config pointing to a kubeconfig to connect to the management cluster + // using the token above. + // - could use following vars to build the management cluster kubeconfig: + // $MANAGEMENT_CLUSTER_TOKEN, $MANAGEMENT_CLUSTER_CA, $MANAGEMENT_CLUSTER_ADDRESS + Flavor *string + InfrastructureProvider string + AutoscalerVersion string +} + +// AutoscalerSpec implements a test for the autoscaler, and more specifically for the autoscaler +// being deployed in the workload cluster. +func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) { + var ( + specName = "autoscaler" + input AutoscalerSpecInput + namespace *corev1.Namespace + cancelWatches context.CancelFunc + clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult + ) + + BeforeEach(func() { + Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) + input = inputGetter() + Expect(input.E2EConfig).ToNot(BeNil(), "Invalid argument. input.E2EConfig can't be nil when calling %s spec", specName) + Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling %s spec", specName) + Expect(input.BootstrapClusterProxy).ToNot(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", specName) + Expect(input.InfrastructureProvider).ToNot(BeNil(), "Invalid argument. input.InfrastructureProvider can't be empty when calling %s spec", specName) + Expect(input.AutoscalerVersion).ToNot(BeNil(), "Invalid argument. input.AutoscalerVersion can't be empty when calling %s spec", specName) + Expect(os.MkdirAll(input.ArtifactFolder, 0750)).To(Succeed(), "Invalid argument. input.ArtifactFolder can't be created for %s spec", specName) + + Expect(input.E2EConfig.Variables).To(HaveKey(KubernetesVersion)) + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder) + clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + }) + + It("Should create a workload cluster", func() { + By("Creating a workload cluster") + + flavor := clusterctl.DefaultFlavor + if input.Flavor != nil { + flavor = *input.Flavor + } + + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.BootstrapClusterProxy.GetName()), + ClusterctlConfigPath: input.ClusterctlConfigPath, + KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: input.InfrastructureProvider, + Flavor: flavor, + Namespace: namespace.Name, + ClusterName: fmt.Sprintf("%s-%s", specName, util.RandomString(6)), + KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersion), + ControlPlaneMachineCount: pointer.Int64(1), + WorkerMachineCount: pointer.Int64(1), + }, + ControlPlaneWaiters: input.ControlPlaneWaiters, + WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }, clusterResources) + + var nodeGroupMinSize, nodeGroupMaxSize string + managedCluster := clusterResources.Cluster.Spec.Topology != nil + if managedCluster { + // Ensure the MachineDeploymentTopology has the autoscaler annotations. + mdTopology := clusterResources.Cluster.Spec.Topology.Workers.MachineDeployments[0] + Expect(mdTopology.Metadata.Annotations).NotTo(BeNil(), "MachineDeployment is expected to have autoscaler annotations") + var ok bool + nodeGroupMinSize, ok = mdTopology.Metadata.Annotations[framework.AutoscalerMinSize] + Expect(ok).To(BeTrue(), fmt.Sprintf("MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, framework.AutoscalerMinSize)) + nodeGroupMaxSize, ok = mdTopology.Metadata.Annotations[framework.AutoscalerMaxSize] + Expect(ok).To(BeTrue(), fmt.Sprintf("MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, framework.AutoscalerMaxSize)) + } else { + // Ensure the MachineDeployment has the autoscaler annotations. + md := clusterResources.MachineDeployments[0] + Expect(md.Annotations).NotTo(BeNil(), "MachineDeployment is expected to have autoscaler annotations") + var ok bool + nodeGroupMinSize, ok = md.Annotations[framework.AutoscalerMinSize] + Expect(ok).To(BeTrue(), fmt.Sprintf("MachineDeployment %s does not have the %q autoscaler annotation", md.Name, framework.AutoscalerMinSize)) + nodeGroupMaxSize, ok = md.Annotations[framework.AutoscalerMaxSize] + Expect(ok).To(BeTrue(), fmt.Sprintf("MachineDeployment %s does not have the %q autoscaler annotation", md.Name, framework.AutoscalerMaxSize)) + } + + // Get a ClusterProxy so we can interact with the workload cluster + workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, clusterResources.Cluster.Namespace, clusterResources.Cluster.Name) + originalReplicas := *clusterResources.MachineDeployments[0].Spec.Replicas + + By("Installing the autoscaler on the workload cluster") + infrastructureProviderVersions := input.E2EConfig.GetProviderVersions(input.InfrastructureProvider) + latestProviderVersion := infrastructureProviderVersions[len(infrastructureProviderVersions)-1] + + framework.ApplyAutoscalerToWorkloadCluster(ctx, framework.ApplyAutoscalerToWorkloadClusterInput{ + ArtifactFolder: input.ArtifactFolder, + InfrastructureProvider: input.InfrastructureProvider, + LatestProviderVersion: latestProviderVersion, + ManagementClusterProxy: input.BootstrapClusterProxy, + WorkloadClusterProxy: workloadClusterProxy, + Cluster: clusterResources.Cluster, + AutoscalerVersion: input.AutoscalerVersion, + }, input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers")...) + + By("Creating workload that forces the system to scale up") + framework.AddScaleUpDeploymentAndWait(ctx, framework.AddScaleUpDeploymentAndWaitInput{ + ClusterProxy: workloadClusterProxy, + }, input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-autoscaler")...) + + By("Checking the MachineDeployment is scaled up") + scaledUpReplicas := originalReplicas + 1 + framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + MachineDeployment: clusterResources.MachineDeployments[0], + Replicas: scaledUpReplicas, + WaitForMachineDeployment: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-autoscaler"), + }) + + By("Disable autoscaler on the MachineDeployment") + if managedCluster { + framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineDeploymentTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers"), + }) + } else { + framework.DisableAutoscalerForMachineDeploymentAndWait(ctx, framework.DisableAutoscalerForMachineDeploymentAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + MachineDeployment: clusterResources.MachineDeployments[0], + WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers"), + }) + } + + By("Checking we can manually scale up the MachineDeployment") + // Scale up the MachineDeployment. Since autoscaler is disabled we should be able to do this. + excessReplicas := scaledUpReplicas + 1 + if managedCluster { + framework.ScaleAndWaitMachineDeploymentTopology(ctx, framework.ScaleAndWaitMachineDeploymentTopologyInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + Replicas: excessReplicas, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + } else { + framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + MachineDeployment: clusterResources.MachineDeployments[0], + Replicas: excessReplicas, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + } + + By("Checking enabling autoscaler will scale down the MachineDeployment to correct size") + // Enable autoscaler on the MachineDeployment. + if managedCluster { + framework.EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.EnableAutoscalerForMachineDeploymentTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + NodeGroupMinSize: nodeGroupMinSize, + NodeGroupMaxSize: nodeGroupMaxSize, + WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-autoscaler"), + }) + } else { + framework.EnableAutoscalerForMachineDeploymentAndWait(ctx, framework.EnableAutoscalerForMachineDeploymentAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + MachineDeployment: clusterResources.MachineDeployments[0], + NodeGroupMinSize: nodeGroupMinSize, + NodeGroupMaxSize: nodeGroupMaxSize, + WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers"), + }) + } + + By("Checking the MachineDeployment is scaled down") + // Since we scaled up the MachineDeployment manually and the workload has not changed auto scaler + // should detect that there are unneeded nodes and scale down the MachineDeployment. + framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + MachineDeployment: clusterResources.MachineDeployments[0], + Replicas: scaledUpReplicas, + WaitForMachineDeployment: input.E2EConfig.GetIntervals(input.BootstrapClusterProxy.GetName(), "wait-controllers"), + }) + + By("PASSED!") + }) + + AfterEach(func() { + // Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. + dumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup) + }) +} diff --git a/test/e2e/autoscaler_test.go b/test/e2e/autoscaler_test.go new file mode 100644 index 000000000000..fb6de00cf35f --- /dev/null +++ b/test/e2e/autoscaler_test.go @@ -0,0 +1,55 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + . "github.com/onsi/ginkgo/v2" + "k8s.io/utils/pointer" +) + +var _ = Describe("When using the autoscaler with Cluster API [Autoscaler]", func() { + AutoscalerSpec(ctx, func() AutoscalerSpecInput { + return AutoscalerSpecInput{ + E2EConfig: e2eConfig, + ClusterctlConfigPath: clusterctlConfigPath, + BootstrapClusterProxy: bootstrapClusterProxy, + ArtifactFolder: artifactFolder, + SkipCleanup: skipCleanup, + InfrastructureProvider: "docker", + Flavor: pointer.String("autoscaler"), + AutoscalerVersion: "v1.26.1", + } + }) +}) + +var _ = Describe("When using the autoscaler with Cluster API using ClusterClass [ClusterClass] [Autoscaler]", func() { + AutoscalerSpec(ctx, func() AutoscalerSpecInput { + return AutoscalerSpecInput{ + E2EConfig: e2eConfig, + ClusterctlConfigPath: clusterctlConfigPath, + BootstrapClusterProxy: bootstrapClusterProxy, + ArtifactFolder: artifactFolder, + SkipCleanup: skipCleanup, + InfrastructureProvider: "docker", + Flavor: pointer.String("topology-autoscaler"), + AutoscalerVersion: "v1.26.1", + } + }) +}) diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml index be0c10e84f69..d26c02f904e9 100644 --- a/test/e2e/config/docker.yaml +++ b/test/e2e/config/docker.yaml @@ -293,6 +293,7 @@ providers: files: # Add cluster templates - sourcePath: "../data/infrastructure-docker/main/cluster-template.yaml" + - sourcePath: "../data/infrastructure-docker/main/cluster-template-autoscaler.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-md-remediation.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-remediation.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-adoption.yaml" @@ -304,11 +305,16 @@ providers: - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-scale-in.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-ipv6.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-single-node-cluster.yaml" + - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-autoscaler.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-ignition.yaml" - sourcePath: "../data/infrastructure-docker/main/clusterclass-quick-start.yaml" - sourcePath: "../data/infrastructure-docker/main/clusterclass-quick-start-runtimesdk.yaml" - sourcePath: "../data/shared/main/metadata.yaml" + # Add the autoscaler deployment. + # Note: we are adding the autoscaler deployment here because the test assumes it exists in the latest version of the kubemark provider. + - sourcePath: "../data/autoscaler/autoscaler-to-workload-management.yaml" + - sourcePath: "../data/autoscaler/autoscaler-to-workload-workload.yaml" - name: test-extension type: RuntimeExtensionProvider @@ -357,6 +363,7 @@ intervals: default/wait-machine-pool-upgrade: ["5m", "10s"] default/wait-nodes-ready: ["10m", "10s"] default/wait-machine-remediation: ["5m", "10s"] + default/wait-autoscaler: ["5m", "10s"] node-drain/wait-deployment-available: ["3m", "10s"] node-drain/wait-control-plane: ["15m", "10s"] node-drain/wait-machine-deleted: ["2m", "10s"] diff --git a/test/e2e/data/autoscaler/autoscaler-to-workload-management.yaml b/test/e2e/data/autoscaler/autoscaler-to-workload-management.yaml new file mode 100644 index 000000000000..85d973450786 --- /dev/null +++ b/test/e2e/data/autoscaler/autoscaler-to-workload-management.yaml @@ -0,0 +1,58 @@ +# This yaml creates a service account to be used by the autoscaler running on a workload cluster +# to match against the corresponding Cluster API cluster which is defined into the management cluster. +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cluster-${CLUSTER_NAME} + namespace: ${CLUSTER_NAMESPACE} +--- +apiVersion: v1 +kind: Secret +metadata: + name: cluster-${CLUSTER_NAME}-token + namespace: ${CLUSTER_NAMESPACE} + annotations: + kubernetes.io/service-account.name: cluster-${CLUSTER_NAME} +type: kubernetes.io/service-account-token +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: cluster-${CLUSTER_NAME} + namespace: ${CLUSTER_NAMESPACE} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-${CLUSTER_NAME} +subjects: + - kind: ServiceAccount + name: cluster-${CLUSTER_NAME} +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: cluster-${CLUSTER_NAME} + namespace: ${CLUSTER_NAMESPACE} +rules: + - apiGroups: + - cluster.x-k8s.io + resources: + - machinedeployments + - machinedeployments/scale + - machines + - machinesets + verbs: + - get + - list + - update + - watch + # This is required to allow the autoscaler to scale up dockermachines + # Change this if using with another Cluster API provider. + - apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - dockermachinetemplates + verbs: + - get + - list diff --git a/test/e2e/data/autoscaler/autoscaler-to-workload-workload.yaml b/test/e2e/data/autoscaler/autoscaler-to-workload-workload.yaml new file mode 100644 index 000000000000..e5c87d602ba0 --- /dev/null +++ b/test/e2e/data/autoscaler/autoscaler-to-workload-workload.yaml @@ -0,0 +1,205 @@ +# This yaml deploys the autoscaler on a workload cluster and configures it to match +# against the corresponding Cluster API cluster which is defined into the management cluster. +--- +apiVersion: v1 +kind: Namespace +metadata: + name: cluster-autoscaler-system +--- +# Specify kubeconfig for management cluster +apiVersion: v1 +kind: Secret +metadata: + name: kubeconfig-management-cluster + namespace: cluster-autoscaler-system +stringData: + # NOTE: Using "insecure-skip-tls-verify: true" because kind does not add host.docker.internal to the + # API Server certificate, and we need to use this address in order to access the management cluster + # from the workload cluster. + kubeconfig: | + apiVersion: v1 + kind: Config + clusters: + - name: management-cluster + cluster: + insecure-skip-tls-verify: true + server: ${MANAGEMENT_CLUSTER_ADDRESS} + contexts: + - name: management-context + context: + cluster: management-cluster + namespace: ${CLUSTER_NAMESPACE} + user: cluster-autoscaler-sa + current-context: management-context + users: + - name: cluster-autoscaler-sa + user: + token: ${MANAGEMENT_CLUSTER_TOKEN} + +--- +# Defines the service used by the cluster autoscaler and gives it +# RBAC permissions to look at all the workloads running in this cluster. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cluster-autoscaler + namespace: cluster-autoscaler-system +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: cluster-autoscaler-workload +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler-workload +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: cluster-autoscaler-system +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: cluster-autoscaler-workload +rules: + - apiGroups: + - "" + resources: + - namespaces + - persistentvolumeclaims + - persistentvolumes + - pods + - replicationcontrollers + - services + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - update + - watch + - apiGroups: + - "" + resources: + - pods/eviction + verbs: + - create + - apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch + - apiGroups: + - storage.k8s.io + resources: + - csinodes + - storageclasses + - csidrivers + - csistoragecapacities + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - replicasets + - statefulsets + verbs: + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - create + - delete + - get + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: cluster-autoscaler-system + labels: + app: cluster-autoscaler +spec: + selector: + matchLabels: + app: cluster-autoscaler + replicas: 1 + template: + metadata: + labels: + app: cluster-autoscaler + spec: + containers: + - image: k8s.gcr.io/autoscaling/cluster-autoscaler:${AUTOSCALER_VERSION} + name: cluster-autoscaler + command: + - /cluster-autoscaler + args: + - --cloud-provider=clusterapi + # Specify kubeconfig for management cluster + - --cloud-config=/management-cluster/kubeconfig + # Limit cluster autoscaler to only match against resources belonging to a single Cluster API cluster + - --node-group-auto-discovery=clusterapi:namespace=${CLUSTER_NAMESPACE},clusterName=${CLUSTER_NAME} + # Set a short scale down unneeded time, so we don't have to wait too long during e2e testing + - --scale-down-unneeded-time=1m + # Set a short scale down delay after time, so we don't have to wait too long during e2e testing + - --scale-down-delay-after-add=1m + volumeMounts: + - name: kubeconfig-management-cluster + mountPath: /management-cluster + readOnly: true + serviceAccountName: cluster-autoscaler + terminationGracePeriodSeconds: 10 + # NOTE: Using "hostNetwork: true" because it is required to resolve host.docker.internal in order to + # access the management cluster from the workload cluster. + hostNetwork: true + # We are scheduling the control plane on the CP node, because it is backed by real infrastructure (CAPD) + # while workers instead are backed by kubemark. + nodeSelector: + node-role.kubernetes.io/control-plane: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + volumes: + - name: kubeconfig-management-cluster + secret: + secretName: kubeconfig-management-cluster + optional: false diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-autoscaler/kustomization.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-autoscaler/kustomization.yaml new file mode 100644 index 000000000000..8c79977d6543 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-autoscaler/kustomization.yaml @@ -0,0 +1,4 @@ +bases: +- ../bases/cluster-with-kcp.yaml +- md.yaml +- ../bases/crs.yaml diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-autoscaler/md.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-autoscaler/md.yaml new file mode 100644 index 000000000000..a7cc222d1347 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-autoscaler/md.yaml @@ -0,0 +1,56 @@ +--- +# DockerMachineTemplate referenced by the MachineDeployment and with +# - extraMounts for the docker sock, thus allowing self-hosting test +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: "${CLUSTER_NAME}-md-0" +spec: + template: + spec: + extraMounts: + - containerPath: "/var/run/docker.sock" + hostPath: "/var/run/docker.sock" + # The DOCKER_PRELOAD_IMAGES variable gets set in self-hosted E2E tests to the list of images of the E2E configuration. + preLoadImages: ${DOCKER_PRELOAD_IMAGES:-[]} +--- +# KubeadmConfigTemplate referenced by the MachineDeployment +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfigTemplate +metadata: + name: "${CLUSTER_NAME}-md-0" +spec: + template: + spec: + joinConfiguration: + nodeRegistration: + criSocket: unix:///var/run/containerd/containerd.sock + kubeletExtraArgs: + eviction-hard: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%' +--- +# MachineDeployment object +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + annotations: + cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size: "5" + cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size: "2" + name: "${CLUSTER_NAME}-md-0" +spec: + clusterName: "${CLUSTER_NAME}" + selector: + matchLabels: + template: + spec: + clusterName: "${CLUSTER_NAME}" + version: "${KUBERNETES_VERSION}" + bootstrap: + configRef: + name: "${CLUSTER_NAME}-md-0" + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + infrastructureRef: + name: "${CLUSTER_NAME}-md-0" + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + failureDomain: fd4 diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-topology-autoscaler/cluster-autoscaler.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-topology-autoscaler/cluster-autoscaler.yaml new file mode 100644 index 000000000000..bdf04378ef7b --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-topology-autoscaler/cluster-autoscaler.yaml @@ -0,0 +1,42 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: '${CLUSTER_NAME}' + namespace: default + labels: + cni: "${CLUSTER_NAME}-crs-0" +spec: + clusterNetwork: + services: + cidrBlocks: ['${DOCKER_SERVICE_CIDRS}'] + pods: + cidrBlocks: ['${DOCKER_POD_CIDRS}'] + serviceDomain: '${DOCKER_SERVICE_DOMAIN}' + topology: + class: "quick-start" + version: "${KUBERNETES_VERSION}" + controlPlane: + metadata: {} + nodeDeletionTimeout: "30s" + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + workers: + machineDeployments: + - class: "default-worker" + name: "md-0" + nodeDeletionTimeout: "30s" + failureDomain: fd4 + metadata: + annotations: + cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size: "5" + cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size: "2" + variables: + - name: etcdImageTag + value: "" + - name: coreDNSImageTag + value: "" + - name: preLoadImages + value: ${DOCKER_PRELOAD_IMAGES:-[]} + - name: podSecurityStandard + value: + enabled: false + diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-topology-autoscaler/kustomization.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-topology-autoscaler/kustomization.yaml new file mode 100644 index 000000000000..4ca1b0be5dfb --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-topology-autoscaler/kustomization.yaml @@ -0,0 +1,3 @@ +resources: + - ../bases/crs.yaml + - cluster-autoscaler.yaml diff --git a/test/e2e/data/infrastructure-docker/main/clusterclass-quick-start.yaml b/test/e2e/data/infrastructure-docker/main/clusterclass-quick-start.yaml index f3fb40a4417b..5a7329e9a11f 100644 --- a/test/e2e/data/infrastructure-docker/main/clusterclass-quick-start.yaml +++ b/test/e2e/data/infrastructure-docker/main/clusterclass-quick-start.yaml @@ -100,6 +100,32 @@ spec: openAPIV3Schema: type: boolean default: true + - name: podSecurityStandard + required: false + schema: + openAPIV3Schema: + properties: + audit: + default: baseline + description: audit sets the level for the audit PodSecurityConfiguration + mode. One of privileged, baseline, restricted. + type: string + enabled: + default: true + description: enabled enables the patches to enable Pod Security Standard + via AdmissionConfiguration. + type: boolean + enforce: + default: baseline + description: enforce sets the level for the enforce PodSecurityConfiguration + mode. One of privileged, baseline, restricted. + type: string + warn: + default: baseline + description: warn sets the level for the warn PodSecurityConfiguration + mode. One of privileged, baseline, restricted. + type: string + type: object patches: - name: lbImageRepository definitions: @@ -290,18 +316,18 @@ spec: apiVersion: pod-security.admission.config.k8s.io/v1{{ if semverCompare "< v1.25" .builtin.controlPlane.version }}beta1{{ end }} kind: PodSecurityConfiguration defaults: - enforce: "baseline" + enforce: "{{ .podSecurityStandard.enforce }}" enforce-version: "latest" - audit: "baseline" + audit: "{{ .podSecurityStandard.audit }}" audit-version: "latest" - warn: "baseline" + warn: "{{ .podSecurityStandard.warn }}" warn-version: "latest" exemptions: usernames: [] runtimeClasses: [] namespaces: [kube-system] path: /etc/kubernetes/kube-apiserver-admission-pss.yaml - enabledIf: '{{ semverCompare ">= v1.24" .builtin.controlPlane.version }}' + enabledIf: '{{ and (semverCompare ">= v1.24" .builtin.controlPlane.version) .podSecurityStandard.enabled }}' --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerClusterTemplate diff --git a/test/framework/autoscaler_helpers.go b/test/framework/autoscaler_helpers.go new file mode 100644 index 000000000000..554d289dfe13 --- /dev/null +++ b/test/framework/autoscaler_helpers.go @@ -0,0 +1,493 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package framework + +import ( + "bytes" + "context" + b64 "encoding/base64" + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + clusterctlclient "sigs.k8s.io/cluster-api/cmd/clusterctl/client" + "sigs.k8s.io/cluster-api/test/framework/internal/log" + "sigs.k8s.io/cluster-api/util/patch" +) + +const ( + AutoscalerMinSize = "cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size" + AutoscalerMaxSize = "cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size" +) + +// ApplyAutoscalerToWorkloadClusterInput is the input for ApplyAutoscalerToWorkloadCluster. +type ApplyAutoscalerToWorkloadClusterInput struct { + ClusterctlConfigPath string + + // info about where autoscaler yaml are + // Note: + // - the file creating the service account to be used by the autoscaler when connecting to the management cluster + // - must be named "autoscaler-to-workload-management.yaml" + // - must deploy objects in the $CLUSTER_NAMESPACE + // - must create a service account with name "cluster-$CLUSTER_NAME" and the RBAC rules required to work. + // - must create a secret with name "cluster-$CLUSTER_NAME-token" and type "kubernetes.io/service-account-token". + // - the file creating the autoscaler deployment in the workload cluster + // - must be named "autoscaler-to-workload-workload.yaml" + // - must deploy objects in the cluster-autoscaler-system namespace + // - must create a deployment named "cluster-autoscaler" + // - must run the autoscaler with --cloud-provider=clusterapi, + // --node-group-auto-discovery=clusterapi:namespace=${CLUSTER_NAMESPACE},clusterName=${CLUSTER_NAME} + // and --cloud-config pointing to a kubeconfig to connect to the management cluster + // using the token above. + // - could use following vars to build the management cluster kubeconfig: + // $MANAGEMENT_CLUSTER_TOKEN, $MANAGEMENT_CLUSTER_CA, $MANAGEMENT_CLUSTER_ADDRESS + ArtifactFolder string + InfrastructureProvider string + LatestProviderVersion string + AutoscalerVersion string + + ManagementClusterProxy ClusterProxy + Cluster *clusterv1.Cluster + WorkloadClusterProxy ClusterProxy +} + +// ApplyAutoscalerToWorkloadCluster apply the autoscaler to the workload cluster. +// Please note that it also create a service account in the workload cluster for the autoscaler to use. +func ApplyAutoscalerToWorkloadCluster(ctx context.Context, input ApplyAutoscalerToWorkloadClusterInput, intervals ...interface{}) { + By("Creating the service account to be used by the autoscaler when connecting to the management cluster") + + managementYamlPath := filepath.Join(input.ArtifactFolder, "repository", fmt.Sprintf("infrastructure-%s", input.InfrastructureProvider), input.LatestProviderVersion, "autoscaler-to-workload-management.yaml") + managementYamlTemplate, err := os.ReadFile(managementYamlPath) //nolint:gosec + Expect(err).ToNot(HaveOccurred(), "failed to load %s", managementYamlTemplate) + + managementYaml, err := ProcessYAML(&ProcessYAMLInput{ + Template: managementYamlTemplate, + ClusterctlConfigPath: input.ClusterctlConfigPath, + Env: map[string]string{ + "CLUSTER_NAMESPACE": input.Cluster.Namespace, + "CLUSTER_NAME": input.Cluster.Name, + }, + }) + Expect(err).ToNot(HaveOccurred(), "failed to parse %s", managementYamlTemplate) + Expect(input.ManagementClusterProxy.Apply(ctx, managementYaml)).To(Succeed(), "failed to apply %s", managementYamlTemplate) + + tokenSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: input.Cluster.Namespace, + Name: fmt.Sprintf("cluster-%s-token", input.Cluster.Name), + }, + } + Eventually(func() bool { + err := input.ManagementClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(tokenSecret), tokenSecret) + if err != nil { + return false + } + if _, ok := tokenSecret.Data["token"]; !ok { + return false + } + if _, ok := tokenSecret.Data["ca.crt"]; !ok { + return false + } + return true + }, 5*time.Second, 100*time.Millisecond).Should(BeTrue(), "failed to get token for the autoscaler service account") + + By("Creating the autoscaler deployment in the workload cluster") + + workloadYamlPath := filepath.Join(input.ArtifactFolder, "repository", fmt.Sprintf("infrastructure-%s", input.InfrastructureProvider), input.LatestProviderVersion, "autoscaler-to-workload-workload.yaml") + workloadYamlTemplate, err := os.ReadFile(workloadYamlPath) //nolint:gosec + Expect(err).ToNot(HaveOccurred(), "failed to load %s", workloadYamlTemplate) + + serverAddr := input.ManagementClusterProxy.GetRESTConfig().Host + // On CAPD, if not running on Linux, we need to use Docker's proxy to connect back to the host + // to the CAPD cluster. Moby on Linux doesn't use the host.docker.internal DNS name. + if input.InfrastructureProvider == "docker" { + if runtime.GOOS != "linux" { + serverAddr = strings.ReplaceAll(serverAddr, "127.0.0.1", "host.docker.internal") + } else { + serverAddr = strings.ReplaceAll(serverAddr, "127.0.0.1", dockerHostIPForLinux(ctx)) + } + } + + workloadYaml, err := ProcessYAML(&ProcessYAMLInput{ + Template: workloadYamlTemplate, + ClusterctlConfigPath: input.ClusterctlConfigPath, + Env: map[string]string{ + "CLUSTER_NAMESPACE": input.Cluster.Namespace, + "CLUSTER_NAME": input.Cluster.Name, + "MANAGEMENT_CLUSTER_TOKEN": string(tokenSecret.Data["token"]), + "MANAGEMENT_CLUSTER_CA": b64.StdEncoding.EncodeToString(tokenSecret.Data["ca.crt"]), + "MANAGEMENT_CLUSTER_ADDRESS": serverAddr, + "AUTOSCALER_VERSION": input.AutoscalerVersion, + }, + }) + Expect(err).ToNot(HaveOccurred(), "failed to parse %s", workloadYamlTemplate) + Expect(input.WorkloadClusterProxy.Apply(ctx, workloadYaml)).To(Succeed(), "failed to apply %s", workloadYamlTemplate) + + By("Wait for the autoscaler deployment and collect logs") + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-autoscaler", + Namespace: "cluster-autoscaler-system", + }, + } + + if err := input.WorkloadClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(deployment), deployment); apierrors.IsNotFound(err) { + WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{ + Getter: input.WorkloadClusterProxy.GetClient(), + Deployment: deployment, + }, intervals...) + + // Start streaming logs from all controller providers + WatchDeploymentLogsByName(ctx, WatchDeploymentLogsByNameInput{ + GetLister: input.WorkloadClusterProxy.GetClient(), + ClientSet: input.WorkloadClusterProxy.GetClientSet(), + Deployment: deployment, + LogPath: filepath.Join(input.ArtifactFolder, "clusters", input.WorkloadClusterProxy.GetName(), "logs", deployment.GetNamespace()), + }) + } +} + +// AddScaleUpDeploymentAndWaitInput is the input for AddScaleUpDeploymentAndWait. +type AddScaleUpDeploymentAndWaitInput struct { + ClusterProxy ClusterProxy +} + +// AddScaleUpDeploymentAndWait create a deployment that will trigger the autoscaler to scale up and create a new machine. +func AddScaleUpDeploymentAndWait(ctx context.Context, input AddScaleUpDeploymentAndWaitInput, intervals ...interface{}) { + By("Create a scale up deployment with resource requests to force scale up") + + // gets the node size + nodes := &corev1.NodeList{} + workers := 0 + Expect(input.ClusterProxy.GetClient().List(ctx, nodes)).To(Succeed(), "failed to list nodes") + var memory *resource.Quantity + for _, n := range nodes.Items { + if _, ok := n.Labels[nodeRoleControlPlane]; ok { + continue + } + if _, ok := n.Labels[nodeRoleOldControlPlane]; ok { + continue + } + memory = n.Status.Capacity.Memory() // Assume that all nodes have the same memory. + workers++ + } + Expect(memory).ToNot(BeNil(), "failed to get memory for the worker node") + + // creates a deployment requesting more memory than the worker has, thus triggering autoscaling + // Each pod should requests memory resource of about 60% of the node capacity so that at most one pod + // fits on each node. Setting a replicas of workers + 1 would ensure we have pods that cannot be scheduled. + // This will force exactly one extra node to be spun up. + replicas := workers + 1 + memoryRequired := int64(float64(memory.Value()) * 0.6) + podMemory := resource.NewQuantity(memoryRequired, resource.BinarySI) + + scalelUpDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "scale-up", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + "app": "scale-up", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: pointer.Int32(int32(replicas)), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "scale-up", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "scale-up", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "busybox", + Image: "busybox", + Resources: corev1.ResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: *podMemory, + }, + }, + Command: []string{"/bin/sh", "-c", "echo \"up\" & sleep infinity"}, + }, + }, + }, + }, + }, + } + + By("Scale up deployment created") + Expect(input.ClusterProxy.GetClient().Create(ctx, scalelUpDeployment)).To(Succeed(), "failed to create the scale up pod") + + By("Wait for the scale up deployment to become ready (this implies machines to be created)") + WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{ + Getter: input.ClusterProxy.GetClient(), + Deployment: scalelUpDeployment, + }, intervals...) +} + +type ProcessYAMLInput struct { + Template []byte + ClusterctlConfigPath string + Env map[string]string +} + +func ProcessYAML(input *ProcessYAMLInput) ([]byte, error) { + for n, v := range input.Env { + _ = os.Setenv(n, v) + } + + c, err := clusterctlclient.New(input.ClusterctlConfigPath) + if err != nil { + return nil, err + } + options := clusterctlclient.ProcessYAMLOptions{ + ReaderSource: &clusterctlclient.ReaderSourceOptions{ + Reader: bytes.NewReader(input.Template), + }, + } + + printer, err := c.ProcessYAML(options) + if err != nil { + return nil, err + } + + out, err := printer.Yaml() + if err != nil { + return nil, err + } + + return out, nil +} + +type DisableAutoscalerForMachineDeploymentAndWaitInput struct { + ClusterProxy ClusterProxy + MachineDeployment *clusterv1.MachineDeployment + WaitForAnnotationsToBeDropped []interface{} +} + +func DisableAutoscalerForMachineDeploymentAndWait(ctx context.Context, input DisableAutoscalerForMachineDeploymentAndWaitInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for DisableAutoscalerForMachineDeploymentAndWait") + Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling DisableAutoscalerForMachineDeploymentAndWait") + + mgmtClient := input.ClusterProxy.GetClient() + + // Get the current MachineDeployment object + currentMD := &clusterv1.MachineDeployment{} + Eventually(func(g Gomega) { + g.Expect(input.ClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(input.MachineDeployment), currentMD)).Should(Succeed()) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed()) + + log.Logf("Dropping the %s and %s annotations from the MachineDeployment", AutoscalerMinSize, AutoscalerMaxSize) + patchHelper, err := patch.NewHelper(currentMD, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + delete(currentMD.Annotations, AutoscalerMinSize) + delete(currentMD.Annotations, AutoscalerMaxSize) + Eventually(func(g Gomega) { + g.Expect(patchHelper.Patch(ctx, currentMD)).Should(Succeed()) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch MachineDeployment to drop autoscaler annotations") + + log.Logf("Wait for the annotations to be dropped from the MachineDeployment") + Eventually(func(g Gomega) { + md := &clusterv1.MachineDeployment{} + g.Expect(input.ClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(input.MachineDeployment), md)).To(Succeed()) + g.Expect(md.Annotations).ToNot(HaveKey(AutoscalerMinSize)) + g.Expect(md.Annotations).ToNot(HaveKey(AutoscalerMaxSize)) + // Verify that disabling auto scaler does not change the current MachineDeployment replicas. + g.Expect(md.Spec.Replicas).To(Equal(currentMD.Spec.Replicas)) + }, input.WaitForAnnotationsToBeDropped...).Should(Succeed(), "Auto scaler annotations are not dropped or replicas changed for the MachineDeployment") +} + +type EnableAutoscalerForMachineDeploymentAndWaitInput struct { + ClusterProxy ClusterProxy + MachineDeployment *clusterv1.MachineDeployment + NodeGroupMinSize string + NodeGroupMaxSize string + WaitForAnnotationsToBeAdded []interface{} +} + +func EnableAutoscalerForMachineDeploymentAndWait(ctx context.Context, input EnableAutoscalerForMachineDeploymentAndWaitInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for EnableAutoscalerForMachineDeploymentAndWait") + Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling EnableAutoscalerForMachineDeploymentAndWait") + + mgmtClient := input.ClusterProxy.GetClient() + + // Get the current MachineDeployment object + currentMD := &clusterv1.MachineDeployment{} + Eventually(func(g Gomega) { + g.Expect(input.ClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(input.MachineDeployment), currentMD)).Should(Succeed()) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed()) + + log.Logf("Add the %s and %s annotations to the MachineDeployment", AutoscalerMinSize, AutoscalerMaxSize) + patchHelper, err := patch.NewHelper(currentMD, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + if currentMD.Annotations == nil { + currentMD.Annotations = map[string]string{} + } + // Add the autoscaler annotation + currentMD.Annotations[AutoscalerMinSize] = input.NodeGroupMinSize + currentMD.Annotations[AutoscalerMaxSize] = input.NodeGroupMaxSize + Eventually(func(g Gomega) { + g.Expect(patchHelper.Patch(ctx, currentMD)).Should(Succeed()) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch MachineDeployment to add autoscaler annotations") + + log.Logf("Wait for the annotations to applied on the MachineDeployment") + Eventually(func(g Gomega) { + md := &clusterv1.MachineDeployment{} + g.Expect(input.ClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(input.MachineDeployment), md)).To(Succeed()) + g.Expect(md.Annotations).To(HaveKey(AutoscalerMinSize)) + g.Expect(md.Annotations).To(HaveKey(AutoscalerMaxSize)) + }, input.WaitForAnnotationsToBeAdded...).Should(Succeed(), "Auto scaler annotations are missing from the MachineDeployments") +} + +type DisableAutoscalerForMachineDeploymentTopologyAndWaitInput struct { + ClusterProxy ClusterProxy + Cluster *clusterv1.Cluster + WaitForAnnotationsToBeDropped []interface{} +} + +// DisableAutoscalerForMachineDeploymentTopologyAndWait drop the autoscaler annotations from the MachineDeploymentTopology +// and waits till the annotations are dropped from the underlying MachineDeployment. +func DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx context.Context, input DisableAutoscalerForMachineDeploymentTopologyAndWaitInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for DisableAutoscalerForMachineDeploymentTopologyAndWait") + Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling DisableAutoscalerForMachineDeploymentTopologyAndWait") + + mgmtClient := input.ClusterProxy.GetClient() + + // Get the current replicas of the MachineDeployments. + replicas := map[string]*int32{} + mdList := GetMachineDeploymentsByCluster(ctx, GetMachineDeploymentsByClusterInput{ + Lister: mgmtClient, + ClusterName: input.Cluster.Name, + Namespace: input.Cluster.Namespace, + }) + for _, md := range mdList { + replicas[md.Name] = md.Spec.Replicas + } + + log.Logf("Dropping the %s and %s annotations from the MachineDeployments in ClusterTopology", AutoscalerMinSize, AutoscalerMaxSize) + patchHelper, err := patch.NewHelper(input.Cluster, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + for i := range input.Cluster.Spec.Topology.Workers.MachineDeployments { + md := input.Cluster.Spec.Topology.Workers.MachineDeployments[i] + delete(md.Metadata.Annotations, AutoscalerMinSize) + delete(md.Metadata.Annotations, AutoscalerMaxSize) + input.Cluster.Spec.Topology.Workers.MachineDeployments[i] = md + } + Eventually(func(g Gomega) { + g.Expect(patchHelper.Patch(ctx, input.Cluster)).Should(Succeed()) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch Cluster topology to drop autoscaler annotations") + + log.Logf("Wait for the annotations to be dropped from the MachineDeployments") + Eventually(func(g Gomega) { + mdList := GetMachineDeploymentsByCluster(ctx, GetMachineDeploymentsByClusterInput{ + Lister: mgmtClient, + ClusterName: input.Cluster.Name, + Namespace: input.Cluster.Namespace, + }) + for i := range mdList { + md := mdList[i] + g.Expect(md.Annotations).ToNot(HaveKey(AutoscalerMinSize)) + g.Expect(md.Annotations).ToNot(HaveKey(AutoscalerMaxSize)) + // Verify that disabling auto scaler does not change the current MachineDeployment replicas. + g.Expect(md.Spec.Replicas).To(Equal(replicas[md.Name])) + } + }, input.WaitForAnnotationsToBeDropped...).Should(Succeed(), "Auto scaler annotations are not dropped or replicas changed for the MachineDeployments") +} + +type EnableAutoscalerForMachineDeploymentTopologyAndWaitInput struct { + ClusterProxy ClusterProxy + Cluster *clusterv1.Cluster + NodeGroupMinSize string + NodeGroupMaxSize string + WaitForAnnotationsToBeAdded []interface{} +} + +func EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx context.Context, input EnableAutoscalerForMachineDeploymentTopologyAndWaitInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for EnableAutoscalerForMachineDeploymentTopologyAndWait") + Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling EnableAutoscalerForMachineDeploymentTopologyAndWait") + + mgmtClient := input.ClusterProxy.GetClient() + + log.Logf("Add the %s and %s annotations to the MachineDeployments in ClusterTopology", AutoscalerMinSize, AutoscalerMaxSize) + patchHelper, err := patch.NewHelper(input.Cluster, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + for i := range input.Cluster.Spec.Topology.Workers.MachineDeployments { + md := input.Cluster.Spec.Topology.Workers.MachineDeployments[i] + if md.Metadata.Annotations == nil { + md.Metadata.Annotations = map[string]string{} + } + // Add the autoscaler annotation + md.Metadata.Annotations[AutoscalerMinSize] = input.NodeGroupMinSize + md.Metadata.Annotations[AutoscalerMaxSize] = input.NodeGroupMaxSize + // Drop the replicas from MachineDeploymentTopology, or else the topology controller and autoscaler with fight over control. + md.Replicas = nil + input.Cluster.Spec.Topology.Workers.MachineDeployments[i] = md + } + Eventually(func(g Gomega) { + g.Expect(patchHelper.Patch(ctx, input.Cluster)).Should(Succeed()) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch Cluster topology to add autoscaler annotations") + + log.Logf("Wait for the annotations to applied on the MachineDeployments") + Eventually(func(g Gomega) { + mdList := GetMachineDeploymentsByCluster(ctx, GetMachineDeploymentsByClusterInput{ + Lister: mgmtClient, + ClusterName: input.Cluster.Name, + Namespace: input.Cluster.Namespace, + }) + for i := range mdList { + md := mdList[i] + g.Expect(md.Annotations).To(HaveKey(AutoscalerMinSize)) + g.Expect(md.Annotations).To(HaveKey(AutoscalerMaxSize)) + } + }, input.WaitForAnnotationsToBeAdded...).Should(Succeed(), "Auto scaler annotations are missing from the MachineDeployments") +} + +func dockerHostIPForLinux(ctx context.Context) string { + ipCmd := exec.CommandContext(ctx, "docker", "run", "--rm", "busybox", "ip", "route") + var ipOut, ipErr bytes.Buffer + ipCmd.Stdout = &ipOut + ipCmd.Stderr = &ipErr + Expect(ipCmd.Run()).Should(Succeed(), fmt.Sprintf("Failed to run the command to get the docker host ip: %s", ipErr.String())) + + extractCmd := exec.CommandContext(ctx, "awk", "/default/ { print $3 }") + extractCmd.Stdin = &ipOut + var extractOut, extractErr bytes.Buffer + extractCmd.Stdout = &extractOut + extractCmd.Stderr = &extractErr + Expect(extractCmd.Run()).Should(Succeed(), fmt.Sprintf("Failed to extract ip: %s", extractErr.String())) + return extractOut.String() +} diff --git a/test/framework/machinedeployment_helpers.go b/test/framework/machinedeployment_helpers.go index d4ea0ccb18cf..c447b81b06c6 100644 --- a/test/framework/machinedeployment_helpers.go +++ b/test/framework/machinedeployment_helpers.go @@ -530,7 +530,11 @@ func ScaleAndWaitMachineDeploymentTopology(ctx context.Context, input ScaleAndWa Expect(input.Cluster.Spec.Topology.Workers.MachineDeployments).NotTo(BeEmpty(), "Invalid argument. input.Cluster must have at least one MachineDeployment topology") mdTopology := input.Cluster.Spec.Topology.Workers.MachineDeployments[0] - log.Logf("Scaling machine deployment topology %s from %d to %d replicas", mdTopology.Name, *mdTopology.Replicas, input.Replicas) + if mdTopology.Replicas != nil { + log.Logf("Scaling machine deployment topology %s from %d to %d replicas", mdTopology.Name, *mdTopology.Replicas, input.Replicas) + } else { + log.Logf("Scaling machine deployment topology %s to %d replicas", mdTopology.Name, input.Replicas) + } patchHelper, err := patch.NewHelper(input.Cluster, input.ClusterProxy.GetClient()) Expect(err).ToNot(HaveOccurred()) mdTopology.Replicas = pointer.Int32(input.Replicas) @@ -587,3 +591,28 @@ func ScaleAndWaitMachineDeploymentTopology(ctx context.Context, input ScaleAndWa return nodeRefCount, nil }, input.WaitForMachineDeployments...).Should(Equal(int(*md.Spec.Replicas)), "Timed out waiting for Machine Deployment %s to have %d replicas", klog.KObj(&md), *md.Spec.Replicas) } + +type AssertMachineDeploymentReplicasInput struct { + Getter Getter + MachineDeployment *clusterv1.MachineDeployment + Replicas int32 + WaitForMachineDeployment []interface{} +} + +func AssertMachineDeploymentReplicas(ctx context.Context, input AssertMachineDeploymentReplicasInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for AssertMachineDeploymentReplicas") + Expect(input.Getter).ToNot(BeNil(), "Invalid argument. input.Getter can't be nil when calling AssertMachineDeploymentReplicas") + Expect(input.MachineDeployment).ToNot(BeNil(), "Invalid argument. input.MachineDeployment can't be nil when calling AssertMachineDeploymentReplicas") + + Eventually(func(g Gomega) { + // Get the MachineDeployment + md := &clusterv1.MachineDeployment{} + key := client.ObjectKey{ + Namespace: input.MachineDeployment.Namespace, + Name: input.MachineDeployment.Name, + } + g.Expect(input.Getter.Get(ctx, key, md)).To(Succeed()) + g.Expect(md.Spec.Replicas).Should(Not(BeNil())) + g.Expect(*md.Spec.Replicas).Should(Equal(input.Replicas)) + }, input.WaitForMachineDeployment...).Should(Succeed()) +}