Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cluster-autoscaler-release-1.30] backport Azure e2e tests #7204

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cluster-autoscaler/cloudprovider/azure/test/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
_artifacts
25 changes: 25 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/test/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
REPO_ROOT:=$(shell git rev-parse --show-toplevel)
CAS_ROOT:=$(REPO_ROOT)/cluster-autoscaler

include $(CAS_ROOT)/Makefile

CLUSTER_AUTOSCALER_NAMESPACE?=default
CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME?=cluster-autoscaler

.PHONY: build-e2e
build-e2e:
$(MAKE) -C $(CAS_ROOT) build-arch-$(GOARCH) make-image-arch-$(GOARCH)
docker push $(IMAGE)-$(GOARCH):$(TAG)

ARTIFACTS?=_artifacts

.PHONY: test-e2e
test-e2e: build-e2e
go run github.com/onsi/ginkgo/v2/ginkgo --tags e2e -v --trace --output-dir "$(ARTIFACTS)" --junit-report="junit.e2e_suite.1.xml" e2e -- \
-resource-group="$$(KUBECONFIG= kubectl get managedclusters -n default -o jsonpath='{.items[0].status.nodeResourceGroup}')" \
-cluster-name="$$(KUBECONFIG= kubectl get cluster -n default -o jsonpath='{.items[0].metadata.name}')" \
-client-id="$$(KUBECONFIG= kubectl get userassignedidentities -n default -o jsonpath='{.items[0].status.clientId}')" \
-cas-namespace="$(CLUSTER_AUTOSCALER_NAMESPACE)" \
-cas-serviceaccount-name="$(CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME)" \
-cas-image-repository="$(IMAGE)-$(GOARCH)" \
-cas-image-tag="$(TAG)"
154 changes: 154 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
//go:build e2e

/*
Copyright 2024 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package e2e_test

import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var _ = Describe("Azure Provider", func() {
var (
namespace *corev1.Namespace
)

BeforeEach(func() {
Eventually(allVMSSStable, "10m", "30s").Should(Succeed())

namespace = &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "azure-e2e-",
},
}
Expect(k8s.Create(ctx, namespace)).To(Succeed())
})

AfterEach(func() {
Expect(k8s.Delete(ctx, namespace)).To(Succeed())
Eventually(func() bool {
err := k8s.Get(ctx, client.ObjectKeyFromObject(namespace), &corev1.Namespace{})
return apierrors.IsNotFound(err)
}, "1m", "5s").Should(BeTrue(), "Namespace "+namespace.Name+" still exists")
})

It("scales up AKS node pools when pending Pods exist", func() {
ensureHelmValues(map[string]interface{}{
"extraArgs": map[string]interface{}{
"scale-down-delay-after-add": "10s",
"scale-down-unneeded-time": "10s",
"scale-down-candidates-pool-ratio": "1.0",
"unremovable-node-recheck-timeout": "10s",
"skip-nodes-with-system-pods": "false",
"skip-nodes-with-local-storage": "false",
},
})

nodes := &corev1.NodeList{}
Expect(k8s.List(ctx, nodes)).To(Succeed())
nodeCountBefore := len(nodes.Items)

By("Creating 100 Pods")
// https://raw.githubusercontent.com/kubernetes/website/main/content/en/examples/application/php-apache.yaml
deploy := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "php-apache",
Namespace: namespace.Name,
},
Spec: appsv1.DeploymentSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"run": "php-apache",
},
},
Replicas: ptr.To[int32](100),
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"run": "php-apache",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "php-apache",
Image: "registry.k8s.io/hpa-example",
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("200m"),
},
},
},
},
},
},
},
}
Expect(k8s.Create(ctx, deploy)).To(Succeed())

By("Waiting for more Ready Nodes to exist")
Eventually(func() (int, error) {
readyCount := 0
nodes := &corev1.NodeList{}
if err := k8s.List(ctx, nodes); err != nil {
return 0, err
}
for _, node := range nodes.Items {
for _, cond := range node.Status.Conditions {
if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
readyCount++
break
}
}
}
return readyCount, nil
}, "10m", "10s").Should(BeNumerically(">", nodeCountBefore))

Eventually(allVMSSStable, "10m", "30s").Should(Succeed())

By("Deleting 100 Pods")
Expect(k8s.Delete(ctx, deploy)).To(Succeed())

By("Waiting for the original number of Nodes to be Ready")
Eventually(func(g Gomega) {
nodes := &corev1.NodeList{}
g.Expect(k8s.List(ctx, nodes)).To(Succeed())
g.Expect(nodes.Items).To(SatisfyAll(
HaveLen(nodeCountBefore),
ContainElements(Satisfy(func(node corev1.Node) bool {
for _, cond := range node.Status.Conditions {
if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
return true
}
}
return false
})),
))
}, "20m", "10s").Should(Succeed())
})
})
177 changes: 177 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
//go:build e2e

/*
Copyright 2024 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package e2e_test

import (
"context"
"errors"
"flag"
"fmt"
"os"
"testing"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/chart/loader"
"helm.sh/helm/v3/pkg/cli"
"helm.sh/helm/v3/pkg/storage/driver"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
casReleaseName = "cluster-autoscaler"
)

var (
ctx = context.Background()
vmss *armcompute.VirtualMachineScaleSetsClient
k8s client.Client
helmEnv = cli.New()

resourceGroup string
clusterName string
clientID string
casNamespace string
casServiceAccountName string
casImageRepository string
casImageTag string
)

func init() {
flag.StringVar(&resourceGroup, "resource-group", "", "resource group containing cluster-autoscaler-managed resources")
flag.StringVar(&clusterName, "cluster-name", "", "Cluster API Cluster name for the cluster to be managed by cluster-autoscaler")
flag.StringVar(&clientID, "client-id", "", "Azure client ID to be used by cluster-autoscaler")
flag.StringVar(&casNamespace, "cas-namespace", "", "Namespace in which to install cluster-autoscaler")
flag.StringVar(&casServiceAccountName, "cas-serviceaccount-name", "", "Name of the ServiceAccount to be used by cluster-autoscaler")
flag.StringVar(&casImageRepository, "cas-image-repository", "", "Repository of the container image for cluster-autoscaler")
flag.StringVar(&casImageTag, "cas-image-tag", "", "Tag of the container image for cluster-autoscaler")
}

func TestE2E(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "e2e Suite")
}

var _ = BeforeSuite(func() {
azCred, err := azidentity.NewDefaultAzureCredential(nil)
Expect(err).NotTo(HaveOccurred())
vmss, err = armcompute.NewVirtualMachineScaleSetsClient(os.Getenv("AZURE_SUBSCRIPTION_ID"), azCred, nil)
Expect(err).NotTo(HaveOccurred())

restConfig, err := helmEnv.RESTClientGetter().ToRESTConfig()
Expect(err).NotTo(HaveOccurred())
k8s, err = client.New(restConfig, client.Options{})
Expect(err).NotTo(HaveOccurred())

ensureHelmValues(map[string]interface{}{
"cloudProvider": "azure",
"azureTenantID": os.Getenv("AZURE_TENANT_ID"),
"azureSubscriptionID": os.Getenv("AZURE_SUBSCRIPTION_ID"),
"azureUseWorkloadIdentityExtension": true,
"azureResourceGroup": resourceGroup,
"podLabels": map[string]interface{}{
"azure.workload.identity/use": "true",
},
"rbac": map[string]interface{}{
"serviceAccount": map[string]interface{}{
"name": casServiceAccountName,
"annotations": map[string]interface{}{
"azure.workload.identity/tenant-id": os.Getenv("AZURE_TENANT_ID"),
"azure.workload.identity/client-id": clientID,
},
},
},
"autoDiscovery": map[string]interface{}{
"clusterName": clusterName,
},
"nodeSelector": map[string]interface{}{
"kubernetes.io/os": "linux",
},
"image": map[string]interface{}{
"repository": casImageRepository,
"tag": casImageTag,
"pullPolicy": "Always",
},
})
})

func allVMSSStable(g Gomega) {
pager := vmss.NewListPager(resourceGroup, nil)
expectedNodes := 0
for pager.More() {
page, err := pager.NextPage(ctx)
g.Expect(err).NotTo(HaveOccurred())
for _, scaleset := range page.Value {
g.Expect(*scaleset.Properties.ProvisioningState).To(Equal("Succeeded"))
expectedNodes += int(*scaleset.SKU.Capacity)
}
}

nodes := &corev1.NodeList{}
g.Expect(k8s.List(ctx, nodes)).To(Succeed())
g.Expect(nodes.Items).To(SatisfyAll(
HaveLen(int(expectedNodes)),
ContainElements(Satisfy(func(node corev1.Node) bool {
for _, cond := range node.Status.Conditions {
if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
return true
}
}
return false
})),
))
}

func ensureHelmValues(values map[string]interface{}) {
helmCfg := new(action.Configuration)
Expect(helmCfg.Init(helmEnv.RESTClientGetter(), casNamespace, "secret", func(format string, v ...interface{}) {
GinkgoLogr.Info(fmt.Sprintf(format, v...))
})).To(Succeed())

chart, err := loader.Load("../../../../../charts/cluster-autoscaler")
Expect(err).NotTo(HaveOccurred())

get := action.NewGet(helmCfg)
_, err = get.Run(casReleaseName)
if errors.Is(err, driver.ErrReleaseNotFound) {
install := action.NewInstall(helmCfg)
install.Timeout = 5 * time.Minute
install.Wait = true
install.CreateNamespace = true
install.ReleaseName = casReleaseName
install.Namespace = casNamespace
_, err := install.Run(chart, values)
Expect(err).NotTo(HaveOccurred())
return
} else {
Expect(err).NotTo(HaveOccurred())
}

upgrade := action.NewUpgrade(helmCfg)
upgrade.Timeout = 5 * time.Minute
upgrade.Wait = true
upgrade.ReuseValues = true
_, err = upgrade.Run(casReleaseName, chart, values)
Expect(err).NotTo(HaveOccurred())
}
Loading
Loading