Merge pull request #6771 from WhizUs/feat/exoscale-provider-maxnodesl…

…imit-per-nodepool feat(cluster-autoscaler/exoscale): add support for --nodes
kubernetes · Sep 25, 2024 · 19fe7ab · 19fe7ab
2 parents 77bf580 + 5abbb4a
commit 19fe7ab
Show file tree

Hide file tree

Showing 11 changed files with 158 additions and 50 deletions.
diff --git a/charts/cluster-autoscaler/Chart.yaml b/charts/cluster-autoscaler/Chart.yaml
@@ -11,4 +11,4 @@ name: cluster-autoscaler
 sources:
   - https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
 type: application
-version: 9.39.1
+version: 9.40.0
diff --git a/charts/cluster-autoscaler/README.md b/charts/cluster-autoscaler/README.md
@@ -238,10 +238,20 @@ Additional config parameters available, see the `values.yaml` for more details
 
 ### Exoscale
 
-The following parameters are required:
+Create a `values.yaml` file with the following content:
+```yaml
+cloudProvider: exoscale
+autoDiscovery:
+  clusterName: cluster.local # this value is not used, but must be set
+```
 
-- `cloudProvider=exoscale`
-- `autoDiscovery.clusterName=<CLUSTER NAME>`
+Optionally, you may specify the minimum and maximum size of a particular nodepool by adding the following to the `values.yaml` file:
+```yaml
+autoscalingGroups:
+  - name: your-nodepool-name
+    maxSize: 10
+    minSize: 1
+```
 
 Create an Exoscale API key with appropriate permissions as described in [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md).
 A secret of name `<release-name>-exoscale-cluster-autoscaler` needs to be created, containing the api key and secret, as well as the zone.
@@ -255,9 +265,7 @@ $ kubectl create secret generic my-release-exoscale-cluster-autoscaler \
 After creating the secret, the chart may be installed:
 
 ```console
-$ helm install my-release autoscaler/cluster-autoscaler \
-    --set cloudProvider=exoscale \
-    --set autoDiscovery.clusterName=<CLUSTER NAME>
+$ helm install my-release autoscaler/cluster-autoscaler -f values.yaml
 ```
 
 Read [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md) for further information on the setup without helm.
@@ -391,7 +399,7 @@ vpa:
 | autoDiscovery.namespace | string | `nil` | Enable autodiscovery via cluster namespace for for `cloudProvider=clusterapi` |
 | autoDiscovery.roles | list | `["worker"]` | Magnum node group roles to match. |
 | autoDiscovery.tags | list | `["k8s.io/cluster-autoscaler/enabled","k8s.io/cluster-autoscaler/{{ .Values.autoDiscovery.clusterName }}"]` | ASG tags to match, run through `tpl`. |
-| autoscalingGroups | list | `[]` | For AWS, Azure AKS or Magnum. At least one element is required if not using `autoDiscovery`. For example: <pre> - name: asg1<br />   maxSize: 2<br />   minSize: 1 </pre> For Hetzner Cloud, the `instanceType` and `region` keys are also required. <pre> - name: mypool<br />   maxSize: 2<br />   minSize: 1<br />   instanceType: CPX21<br />   region: FSN1 </pre> |
+| autoscalingGroups | list | `[]` | For AWS, Azure AKS, Exoscale or Magnum. At least one element is required if not using `autoDiscovery`. For example: <pre> - name: asg1<br />   maxSize: 2<br />   minSize: 1 </pre> For Hetzner Cloud, the `instanceType` and `region` keys are also required. <pre> - name: mypool<br />   maxSize: 2<br />   minSize: 1<br />   instanceType: CPX21<br />   region: FSN1 </pre> |
 | autoscalingGroupsnamePrefix | list | `[]` | For GCE. At least one element is required if not using `autoDiscovery`. For example: <pre> - name: ig01<br />   maxSize: 10<br />   minSize: 0 </pre> |
 | awsAccessKeyID | string | `""` | AWS access key ID ([if AWS user keys used](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#using-aws-credentials)) |
 | awsRegion | string | `"us-east-1"` | AWS region (required if `cloudProvider=aws`) |

diff --git a/charts/cluster-autoscaler/README.md.gotmpl b/charts/cluster-autoscaler/README.md.gotmpl
@@ -238,10 +238,20 @@ Additional config parameters available, see the `values.yaml` for more details
 
 ### Exoscale
 
-The following parameters are required:
+Create a `values.yaml` file with the following content:
+```yaml
+cloudProvider: exoscale
+autoDiscovery:
+  clusterName: cluster.local # this value is not used, but must be set
+```
 
-- `cloudProvider=exoscale`
-- `autoDiscovery.clusterName=<CLUSTER NAME>`
+Optionally, you may specify the minimum and maximum size of a particular nodepool by adding the following to the `values.yaml` file:
+```yaml
+autoscalingGroups:
+  - name: your-nodepool-name
+    maxSize: 10
+    minSize: 1
+```
 
 Create an Exoscale API key with appropriate permissions as described in [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md).
 A secret of name `<release-name>-exoscale-cluster-autoscaler` needs to be created, containing the api key and secret, as well as the zone.
@@ -255,9 +265,7 @@ $ kubectl create secret generic my-release-exoscale-cluster-autoscaler \
 After creating the secret, the chart may be installed:
 
 ```console
-$ helm install my-release autoscaler/cluster-autoscaler \
-    --set cloudProvider=exoscale \
-    --set autoDiscovery.clusterName=<CLUSTER NAME>
+$ helm install my-release autoscaler/cluster-autoscaler -f values.yaml
 ```
 
 Read [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md) for further information on the setup without helm.

diff --git a/charts/cluster-autoscaler/values.yaml b/charts/cluster-autoscaler/values.yaml
@@ -33,7 +33,7 @@ autoDiscovery:
   labels: []
     # - color: green
     # - shape: circle
-# autoscalingGroups -- For AWS, Azure AKS or Magnum. At least one element is required if not using `autoDiscovery`. For example:
+# autoscalingGroups -- For AWS, Azure AKS, Exoscale or Magnum. At least one element is required if not using `autoDiscovery`. For example:
 # <pre>
 # - name: asg1<br />
 #   maxSize: 2<br />

diff --git a/cluster-autoscaler/cloudprovider/exoscale/README.md b/cluster-autoscaler/cloudprovider/exoscale/README.md
@@ -3,9 +3,19 @@
 The Cluster Autoscaler (CA) for Exoscale scales worker nodes running in
 Exoscale SKS Nodepools or Instance Pools.
 
+- [Cluster Autoscaler for Exoscale](#cluster-autoscaler-for-exoscale)
+  - [Configuration](#configuration)
+    - [Authenticating to the Exoscale API](#authenticating-to-the-exoscale-api)
+    - [Optional configuration](#optional-configuration)
+  - [Deployment](#deployment)
+    - [Helm](#helm)
+    - [Manifest](#manifest)
+  - [⚠️  Important Notes](#️--important-notes)
 
 ## Configuration
 
+### Authenticating to the Exoscale API
+
 > Note: the following guide assumes you have the permissions to create
 > resources in the `kube-system` namespace of the target Kubernetes cluster.
 
@@ -49,7 +59,7 @@ environment.
 You can restrict API operation your IAM key can perform:
 
 * When deploying the Cluster Autoscaler in SKS, your can restrict your IAM access key
-to these API operations : 
+to these API operations :
 
 ```
 evict-sks-nodepool-members
@@ -74,7 +84,19 @@ get-quota
 scale-instance-pool
 ```
 
-### Deploying the Cluster Autoscaler
+### Optional configuration
+
+By default, all nodepools in the k8s cluster are considered for scaling.
+The flag `--nodes=<min>:<max>:<nodepool-name>` may be specified to limit the minimum and
+maximum size of a particular nodepool.
+
+## Deployment
+
+### Helm
+
+See the [Helm Chart README](https://github.com/kubernetes/autoscaler/tree/master/charts/cluster-autoscaler).
+
+### Manifest
 
 To deploy the CA on your Kubernetes cluster, you can use the manifest provided as example:
 
@@ -92,10 +114,10 @@ kubectl apply -f ./examples/cluster-autoscaler.yaml
 
 ## ⚠️  Important Notes
 
-* The minimum node group size is 1
-* The maximum node group size is computed based on the current [Compute
-  instances limit][exo-limits] of the Exoscale account the Cluster Autoscaler
-  is running in.
+* The minimum and maximum node group size of particular nodepools
+  may be specified via the `--nodes` flag, if omitted (default),
+  the minimum is 1 and maximum is computed based on the current [Compute instances limit][exo-limits]
+  of the Exoscale account the Cluster Autoscaler is running in.
 * The Instance Pool candidate for scaling is determined based on the Compute
   instance the Kubernetes node is running on, depending on cluster resource
   constraining events emitted by the Kubernetes scheduler.

diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider.go
@@ -24,6 +24,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
+	"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
 )
@@ -98,10 +99,38 @@ func (e *exoscaleCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
 			)
 		}
 
+		// nodeGroupSpec contains the configuration spec from the '--nodes' flag
+		// which includes the min and max size of the node group.
+		var nodeGroupSpec *dynamic.NodeGroupSpec
+		for _, spec := range e.manager.discoveryOpts.NodeGroupSpecs {
+			s, err := dynamic.SpecFromString(spec, scaleToZeroSupported)
+			if err != nil {
+				return nil, fmt.Errorf("failed to parse node group spec: %v", err)
+			}
+
+			if s.Name == *sksNodepool.Name {
+				nodeGroupSpec = s
+				break
+			}
+		}
+		var minSize, maxSize int
+		if nodeGroupSpec != nil {
+			minSize = nodeGroupSpec.MinSize
+			maxSize = nodeGroupSpec.MaxSize
+		} else {
+			minSize = 1
+			maxSize, err = e.manager.computeInstanceQuota()
+			if err != nil {
+				return nil, err
+			}
+		}
+
 		nodeGroup = &sksNodepoolNodeGroup{
 			sksNodepool: sksNodepool,
 			sksCluster:  sksCluster,
 			m:           e.manager,
+			minSize:     minSize,
+			maxSize:     maxSize,
 		}
 		debugf("found node %s belonging to SKS Nodepool %s", toNodeID(node.Spec.ProviderID), *sksNodepool.ID)
 	} else {
@@ -196,15 +225,15 @@ func (e *exoscaleCloudProvider) Refresh() error {
 }
 
 // BuildExoscale builds the Exoscale cloud provider.
-func BuildExoscale(_ config.AutoscalingOptions, _ cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
-	manager, err := newManager()
+func BuildExoscale(_ config.AutoscalingOptions, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
+	manager, err := newManager(discoveryOpts)
 	if err != nil {
 		fatalf("failed to initialize manager: %v", err)
 	}
 
 	// The cloud provider automatically uses all Instance Pools in the k8s cluster.
-	// This means we don't use the cloudprovider.NodeGroupDiscoveryOptions
-	// flags (which can be set via '--node-group-auto-discovery' or '-nodes')
+	// The flag '--nodes=1:5:nodepoolname' may be specified to limit the size of a nodepool.
+	// The flag '--node-group-auto-discovery' is not implemented.
 	provider, err := newExoscaleCloudProvider(manager, rl)
 	if err != nil {
 		fatalf("failed to create Exoscale cloud provider: %v", err)

diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider_test.go
@@ -128,7 +128,7 @@ func (ts *cloudProviderTestSuite) SetupTest() {
 	ts.T().Setenv("EXOSCALE_API_KEY", "x")
 	ts.T().Setenv("EXOSCALE_API_SECRET", "x")
 
-	manager, err := newManager()
+	manager, err := newManager(cloudprovider.NodeGroupDiscoveryOptions{})
 	if err != nil {
 		ts.T().Fatalf("error initializing cloud provider manager: %v", err)
 	}
@@ -214,6 +214,17 @@ func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroupForNode_Ins
 }
 
 func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroupForNode_SKSNodepool() {
+	ts.p.manager.client.(*exoscaleClientMock).
+		On("GetQuota", ts.p.manager.ctx, ts.p.manager.zone, testComputeInstanceQuotaName).
+		Return(
+			&egoscale.Quota{
+				Resource: &testComputeInstanceQuotaName,
+				Usage:    &testComputeInstanceQuotaUsage,
+				Limit:    &testComputeInstanceQuotaLimit,
+			},
+			nil,
+		)
+
 	ts.p.manager.client.(*exoscaleClientMock).
 		On("ListSKSClusters", ts.p.manager.ctx, ts.p.manager.zone).
 		Return(
@@ -313,6 +324,17 @@ func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroups() {
 	// Nodegroup. If everything works as expected, the
 	// cloudprovider.NodeGroups() method should return 2 Nodegroups.
 
+	ts.p.manager.client.(*exoscaleClientMock).
+		On("GetQuota", ts.p.manager.ctx, ts.p.manager.zone, testComputeInstanceQuotaName).
+		Return(
+			&egoscale.Quota{
+				Resource: &testComputeInstanceQuotaName,
+				Usage:    &testComputeInstanceQuotaUsage,
+				Limit:    &testComputeInstanceQuotaLimit,
+			},
+			nil,
+		)
+
 	ts.p.manager.client.(*exoscaleClientMock).
 		On("GetInstancePool", ts.p.manager.ctx, ts.p.manager.zone, instancePoolID).
 		Return(

diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go
@@ -43,13 +43,14 @@ const defaultAPIEnvironment = "api"
 // Manager handles Exoscale communication and data caching of
 // node groups (Instance Pools).
 type Manager struct {
-	ctx        context.Context
-	client     exoscaleClient
-	zone       string
-	nodeGroups []cloudprovider.NodeGroup
+	ctx           context.Context
+	client        exoscaleClient
+	zone          string
+	nodeGroups    []cloudprovider.NodeGroup
+	discoveryOpts cloudprovider.NodeGroupDiscoveryOptions
 }
 
-func newManager() (*Manager, error) {
+func newManager(discoveryOpts cloudprovider.NodeGroupDiscoveryOptions) (*Manager, error) {
 	var (
 		zone           string
 		apiKey         string
@@ -82,9 +83,10 @@ func newManager() (*Manager, error) {
 	debugf("initializing manager with zone=%s environment=%s", zone, apiEnvironment)
 
 	m := &Manager{
-		ctx:    exoapi.WithEndpoint(context.Background(), exoapi.NewReqEndpoint(apiEnvironment, zone)),
-		client: client,
-		zone:   zone,
+		ctx:           exoapi.WithEndpoint(context.Background(), exoapi.NewReqEndpoint(apiEnvironment, zone)),
+		client:        client,
+		zone:          zone,
+		discoveryOpts: discoveryOpts,
 	}
 
 	return m, nil

diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager_test.go
@@ -19,18 +19,19 @@ package exoscale
 import (
 	"os"
 
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2"
 )
 
 func (ts *cloudProviderTestSuite) TestNewManager() {
-	manager, err := newManager()
+	manager, err := newManager(cloudprovider.NodeGroupDiscoveryOptions{})
 	ts.Require().NoError(err)
 	ts.Require().NotNil(manager)
 
 	os.Unsetenv("EXOSCALE_API_KEY")
 	os.Unsetenv("EXOSCALE_API_SECRET")
 
-	manager, err = newManager()
+	manager, err = newManager(cloudprovider.NodeGroupDiscoveryOptions{})
 	ts.Require().Error(err)
 	ts.Require().Nil(manager)
 }

diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go
@@ -28,6 +28,10 @@ import (
 	schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
 )
 
+const (
+	scaleToZeroSupported = false
+)
+
 // sksNodepoolNodeGroup implements cloudprovider.NodeGroup interface for Exoscale SKS Nodepools.
 type sksNodepoolNodeGroup struct {
 	sksNodepool *egoscale.SKSNodepool
@@ -36,21 +40,19 @@ type sksNodepoolNodeGroup struct {
 	m *Manager
 
 	sync.Mutex
+
+	minSize int
+	maxSize int
 }
 
 // MaxSize returns maximum size of the node group.
 func (n *sksNodepoolNodeGroup) MaxSize() int {
-	limit, err := n.m.computeInstanceQuota()
-	if err != nil {
-		return 0
-	}
-
-	return limit
+	return n.maxSize
 }
 
 // MinSize returns minimum size of the node group.
 func (n *sksNodepoolNodeGroup) MinSize() int {
-	return 1
+	return n.minSize
 }
 
 // TargetSize returns the current target size of the node group. It is possible that the