From 5abbb4a0973bc06096f5926ff6730de61513b7c3 Mon Sep 17 00:00:00 2001 From: Thomas Stadler Date: Mon, 17 Jun 2024 10:31:16 +0200 Subject: [PATCH] feat(cluster-autoscaler/exoscale): add support for --nodes Signed-off-by: Thomas Stadler --- charts/cluster-autoscaler/Chart.yaml | 2 +- charts/cluster-autoscaler/README.md | 22 +++++++---- charts/cluster-autoscaler/README.md.gotmpl | 20 +++++++--- charts/cluster-autoscaler/values.yaml | 2 +- .../cloudprovider/exoscale/README.md | 34 ++++++++++++++--- .../exoscale/exoscale_cloud_provider.go | 37 +++++++++++++++++-- .../exoscale/exoscale_cloud_provider_test.go | 24 +++++++++++- .../exoscale/exoscale_manager.go | 18 +++++---- .../exoscale/exoscale_manager_test.go | 5 ++- .../exoscale_node_group_sks_nodepool.go | 16 ++++---- .../exoscale_node_group_sks_nodepool_test.go | 28 ++++++++++---- 11 files changed, 158 insertions(+), 50 deletions(-) diff --git a/charts/cluster-autoscaler/Chart.yaml b/charts/cluster-autoscaler/Chart.yaml index a532739d7e4d..0dee960052f2 100644 --- a/charts/cluster-autoscaler/Chart.yaml +++ b/charts/cluster-autoscaler/Chart.yaml @@ -11,4 +11,4 @@ name: cluster-autoscaler sources: - https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler type: application -version: 9.39.1 +version: 9.40.0 diff --git a/charts/cluster-autoscaler/README.md b/charts/cluster-autoscaler/README.md index bf4b9bc70b13..eb2be82e4879 100644 --- a/charts/cluster-autoscaler/README.md +++ b/charts/cluster-autoscaler/README.md @@ -238,10 +238,20 @@ Additional config parameters available, see the `values.yaml` for more details ### Exoscale -The following parameters are required: +Create a `values.yaml` file with the following content: +```yaml +cloudProvider: exoscale +autoDiscovery: + clusterName: cluster.local # this value is not used, but must be set +``` -- `cloudProvider=exoscale` -- `autoDiscovery.clusterName=` +Optionally, you may specify the minimum and maximum size of a particular nodepool by adding the following to the `values.yaml` file: +```yaml +autoscalingGroups: + - name: your-nodepool-name + maxSize: 10 + minSize: 1 +``` Create an Exoscale API key with appropriate permissions as described in [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md). A secret of name `-exoscale-cluster-autoscaler` needs to be created, containing the api key and secret, as well as the zone. @@ -255,9 +265,7 @@ $ kubectl create secret generic my-release-exoscale-cluster-autoscaler \ After creating the secret, the chart may be installed: ```console -$ helm install my-release autoscaler/cluster-autoscaler \ - --set cloudProvider=exoscale \ - --set autoDiscovery.clusterName= +$ helm install my-release autoscaler/cluster-autoscaler -f values.yaml ``` Read [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md) for further information on the setup without helm. @@ -391,7 +399,7 @@ vpa: | autoDiscovery.namespace | string | `nil` | Enable autodiscovery via cluster namespace for for `cloudProvider=clusterapi` | | autoDiscovery.roles | list | `["worker"]` | Magnum node group roles to match. | | autoDiscovery.tags | list | `["k8s.io/cluster-autoscaler/enabled","k8s.io/cluster-autoscaler/{{ .Values.autoDiscovery.clusterName }}"]` | ASG tags to match, run through `tpl`. | -| autoscalingGroups | list | `[]` | For AWS, Azure AKS or Magnum. At least one element is required if not using `autoDiscovery`. For example:
 - name: asg1
maxSize: 2
minSize: 1
For Hetzner Cloud, the `instanceType` and `region` keys are also required.
 - name: mypool
maxSize: 2
minSize: 1
instanceType: CPX21
region: FSN1
| +| autoscalingGroups | list | `[]` | For AWS, Azure AKS, Exoscale or Magnum. At least one element is required if not using `autoDiscovery`. For example:
 - name: asg1
maxSize: 2
minSize: 1
For Hetzner Cloud, the `instanceType` and `region` keys are also required.
 - name: mypool
maxSize: 2
minSize: 1
instanceType: CPX21
region: FSN1
| | autoscalingGroupsnamePrefix | list | `[]` | For GCE. At least one element is required if not using `autoDiscovery`. For example:
 - name: ig01
maxSize: 10
minSize: 0
| | awsAccessKeyID | string | `""` | AWS access key ID ([if AWS user keys used](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#using-aws-credentials)) | | awsRegion | string | `"us-east-1"` | AWS region (required if `cloudProvider=aws`) | diff --git a/charts/cluster-autoscaler/README.md.gotmpl b/charts/cluster-autoscaler/README.md.gotmpl index a680369ddf40..3d42a9568066 100644 --- a/charts/cluster-autoscaler/README.md.gotmpl +++ b/charts/cluster-autoscaler/README.md.gotmpl @@ -238,10 +238,20 @@ Additional config parameters available, see the `values.yaml` for more details ### Exoscale -The following parameters are required: +Create a `values.yaml` file with the following content: +```yaml +cloudProvider: exoscale +autoDiscovery: + clusterName: cluster.local # this value is not used, but must be set +``` -- `cloudProvider=exoscale` -- `autoDiscovery.clusterName=` +Optionally, you may specify the minimum and maximum size of a particular nodepool by adding the following to the `values.yaml` file: +```yaml +autoscalingGroups: + - name: your-nodepool-name + maxSize: 10 + minSize: 1 +``` Create an Exoscale API key with appropriate permissions as described in [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md). A secret of name `-exoscale-cluster-autoscaler` needs to be created, containing the api key and secret, as well as the zone. @@ -255,9 +265,7 @@ $ kubectl create secret generic my-release-exoscale-cluster-autoscaler \ After creating the secret, the chart may be installed: ```console -$ helm install my-release autoscaler/cluster-autoscaler \ - --set cloudProvider=exoscale \ - --set autoDiscovery.clusterName= +$ helm install my-release autoscaler/cluster-autoscaler -f values.yaml ``` Read [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md) for further information on the setup without helm. diff --git a/charts/cluster-autoscaler/values.yaml b/charts/cluster-autoscaler/values.yaml index 20fcad9be92b..3d9d9315f44b 100644 --- a/charts/cluster-autoscaler/values.yaml +++ b/charts/cluster-autoscaler/values.yaml @@ -33,7 +33,7 @@ autoDiscovery: labels: [] # - color: green # - shape: circle -# autoscalingGroups -- For AWS, Azure AKS or Magnum. At least one element is required if not using `autoDiscovery`. For example: +# autoscalingGroups -- For AWS, Azure AKS, Exoscale or Magnum. At least one element is required if not using `autoDiscovery`. For example: #
 # - name: asg1
# maxSize: 2
diff --git a/cluster-autoscaler/cloudprovider/exoscale/README.md b/cluster-autoscaler/cloudprovider/exoscale/README.md index 695ed0df2d25..49826ce38011 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/README.md +++ b/cluster-autoscaler/cloudprovider/exoscale/README.md @@ -3,9 +3,19 @@ The Cluster Autoscaler (CA) for Exoscale scales worker nodes running in Exoscale SKS Nodepools or Instance Pools. +- [Cluster Autoscaler for Exoscale](#cluster-autoscaler-for-exoscale) + - [Configuration](#configuration) + - [Authenticating to the Exoscale API](#authenticating-to-the-exoscale-api) + - [Optional configuration](#optional-configuration) + - [Deployment](#deployment) + - [Helm](#helm) + - [Manifest](#manifest) + - [⚠️ Important Notes](#️--important-notes) ## Configuration +### Authenticating to the Exoscale API + > Note: the following guide assumes you have the permissions to create > resources in the `kube-system` namespace of the target Kubernetes cluster. @@ -49,7 +59,7 @@ environment. You can restrict API operation your IAM key can perform: * When deploying the Cluster Autoscaler in SKS, your can restrict your IAM access key -to these API operations : +to these API operations : ``` evict-sks-nodepool-members @@ -74,7 +84,19 @@ get-quota scale-instance-pool ``` -### Deploying the Cluster Autoscaler +### Optional configuration + +By default, all nodepools in the k8s cluster are considered for scaling. +The flag `--nodes=::` may be specified to limit the minimum and +maximum size of a particular nodepool. + +## Deployment + +### Helm + +See the [Helm Chart README](https://github.com/kubernetes/autoscaler/tree/master/charts/cluster-autoscaler). + +### Manifest To deploy the CA on your Kubernetes cluster, you can use the manifest provided as example: @@ -92,10 +114,10 @@ kubectl apply -f ./examples/cluster-autoscaler.yaml ## ⚠️ Important Notes -* The minimum node group size is 1 -* The maximum node group size is computed based on the current [Compute - instances limit][exo-limits] of the Exoscale account the Cluster Autoscaler - is running in. +* The minimum and maximum node group size of particular nodepools + may be specified via the `--nodes` flag, if omitted (default), + the minimum is 1 and maximum is computed based on the current [Compute instances limit][exo-limits] + of the Exoscale account the Cluster Autoscaler is running in. * The Instance Pool candidate for scaling is determined based on the Compute instance the Kubernetes node is running on, depending on cluster resource constraining events emitted by the Kubernetes scheduler. diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider.go index 405e011916c9..067340e19ad9 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider.go @@ -24,6 +24,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" ) @@ -98,10 +99,38 @@ func (e *exoscaleCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide ) } + // nodeGroupSpec contains the configuration spec from the '--nodes' flag + // which includes the min and max size of the node group. + var nodeGroupSpec *dynamic.NodeGroupSpec + for _, spec := range e.manager.discoveryOpts.NodeGroupSpecs { + s, err := dynamic.SpecFromString(spec, scaleToZeroSupported) + if err != nil { + return nil, fmt.Errorf("failed to parse node group spec: %v", err) + } + + if s.Name == *sksNodepool.Name { + nodeGroupSpec = s + break + } + } + var minSize, maxSize int + if nodeGroupSpec != nil { + minSize = nodeGroupSpec.MinSize + maxSize = nodeGroupSpec.MaxSize + } else { + minSize = 1 + maxSize, err = e.manager.computeInstanceQuota() + if err != nil { + return nil, err + } + } + nodeGroup = &sksNodepoolNodeGroup{ sksNodepool: sksNodepool, sksCluster: sksCluster, m: e.manager, + minSize: minSize, + maxSize: maxSize, } debugf("found node %s belonging to SKS Nodepool %s", toNodeID(node.Spec.ProviderID), *sksNodepool.ID) } else { @@ -196,15 +225,15 @@ func (e *exoscaleCloudProvider) Refresh() error { } // BuildExoscale builds the Exoscale cloud provider. -func BuildExoscale(_ config.AutoscalingOptions, _ cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { - manager, err := newManager() +func BuildExoscale(_ config.AutoscalingOptions, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + manager, err := newManager(discoveryOpts) if err != nil { fatalf("failed to initialize manager: %v", err) } // The cloud provider automatically uses all Instance Pools in the k8s cluster. - // This means we don't use the cloudprovider.NodeGroupDiscoveryOptions - // flags (which can be set via '--node-group-auto-discovery' or '-nodes') + // The flag '--nodes=1:5:nodepoolname' may be specified to limit the size of a nodepool. + // The flag '--node-group-auto-discovery' is not implemented. provider, err := newExoscaleCloudProvider(manager, rl) if err != nil { fatalf("failed to create Exoscale cloud provider: %v", err) diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider_test.go index 4f0b1994dd24..e4e3ca7beed4 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider_test.go @@ -128,7 +128,7 @@ func (ts *cloudProviderTestSuite) SetupTest() { ts.T().Setenv("EXOSCALE_API_KEY", "x") ts.T().Setenv("EXOSCALE_API_SECRET", "x") - manager, err := newManager() + manager, err := newManager(cloudprovider.NodeGroupDiscoveryOptions{}) if err != nil { ts.T().Fatalf("error initializing cloud provider manager: %v", err) } @@ -214,6 +214,17 @@ func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroupForNode_Ins } func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroupForNode_SKSNodepool() { + ts.p.manager.client.(*exoscaleClientMock). + On("GetQuota", ts.p.manager.ctx, ts.p.manager.zone, testComputeInstanceQuotaName). + Return( + &egoscale.Quota{ + Resource: &testComputeInstanceQuotaName, + Usage: &testComputeInstanceQuotaUsage, + Limit: &testComputeInstanceQuotaLimit, + }, + nil, + ) + ts.p.manager.client.(*exoscaleClientMock). On("ListSKSClusters", ts.p.manager.ctx, ts.p.manager.zone). Return( @@ -313,6 +324,17 @@ func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroups() { // Nodegroup. If everything works as expected, the // cloudprovider.NodeGroups() method should return 2 Nodegroups. + ts.p.manager.client.(*exoscaleClientMock). + On("GetQuota", ts.p.manager.ctx, ts.p.manager.zone, testComputeInstanceQuotaName). + Return( + &egoscale.Quota{ + Resource: &testComputeInstanceQuotaName, + Usage: &testComputeInstanceQuotaUsage, + Limit: &testComputeInstanceQuotaLimit, + }, + nil, + ) + ts.p.manager.client.(*exoscaleClientMock). On("GetInstancePool", ts.p.manager.ctx, ts.p.manager.zone, instancePoolID). Return( diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go index 902c4fabbabb..759890982f54 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go @@ -43,13 +43,14 @@ const defaultAPIEnvironment = "api" // Manager handles Exoscale communication and data caching of // node groups (Instance Pools). type Manager struct { - ctx context.Context - client exoscaleClient - zone string - nodeGroups []cloudprovider.NodeGroup + ctx context.Context + client exoscaleClient + zone string + nodeGroups []cloudprovider.NodeGroup + discoveryOpts cloudprovider.NodeGroupDiscoveryOptions } -func newManager() (*Manager, error) { +func newManager(discoveryOpts cloudprovider.NodeGroupDiscoveryOptions) (*Manager, error) { var ( zone string apiKey string @@ -82,9 +83,10 @@ func newManager() (*Manager, error) { debugf("initializing manager with zone=%s environment=%s", zone, apiEnvironment) m := &Manager{ - ctx: exoapi.WithEndpoint(context.Background(), exoapi.NewReqEndpoint(apiEnvironment, zone)), - client: client, - zone: zone, + ctx: exoapi.WithEndpoint(context.Background(), exoapi.NewReqEndpoint(apiEnvironment, zone)), + client: client, + zone: zone, + discoveryOpts: discoveryOpts, } return m, nil diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager_test.go index 5933958b4272..2a682a54f4ef 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager_test.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager_test.go @@ -19,18 +19,19 @@ package exoscale import ( "os" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2" ) func (ts *cloudProviderTestSuite) TestNewManager() { - manager, err := newManager() + manager, err := newManager(cloudprovider.NodeGroupDiscoveryOptions{}) ts.Require().NoError(err) ts.Require().NotNil(manager) os.Unsetenv("EXOSCALE_API_KEY") os.Unsetenv("EXOSCALE_API_SECRET") - manager, err = newManager() + manager, err = newManager(cloudprovider.NodeGroupDiscoveryOptions{}) ts.Require().Error(err) ts.Require().Nil(manager) } diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go index d40d10ee8e2c..a78f6b8e25a5 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go @@ -28,6 +28,10 @@ import ( schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) +const ( + scaleToZeroSupported = false +) + // sksNodepoolNodeGroup implements cloudprovider.NodeGroup interface for Exoscale SKS Nodepools. type sksNodepoolNodeGroup struct { sksNodepool *egoscale.SKSNodepool @@ -36,21 +40,19 @@ type sksNodepoolNodeGroup struct { m *Manager sync.Mutex + + minSize int + maxSize int } // MaxSize returns maximum size of the node group. func (n *sksNodepoolNodeGroup) MaxSize() int { - limit, err := n.m.computeInstanceQuota() - if err != nil { - return 0 - } - - return limit + return n.maxSize } // MinSize returns minimum size of the node group. func (n *sksNodepoolNodeGroup) MinSize() int { - return 1 + return n.minSize } // TargetSize returns the current target size of the node group. It is possible that the diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go index 9cc0d10134f5..02ed542541ff 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go @@ -44,7 +44,9 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_MaxSize() { ID: &testSKSClusterID, Name: &testSKSClusterName, }, - m: ts.p.manager, + m: ts.p.manager, + minSize: int(testSKSNodepoolSize), + maxSize: int(testComputeInstanceQuotaLimit), } ts.Require().Equal(int(testComputeInstanceQuotaLimit), nodeGroup.MaxSize()) @@ -60,7 +62,9 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_MinSize() { ID: &testSKSClusterID, Name: &testSKSClusterName, }, - m: ts.p.manager, + m: ts.p.manager, + minSize: int(testSKSNodepoolSize), + maxSize: int(testComputeInstanceQuotaLimit), } ts.Require().Equal(1, nodeGroup.MinSize()) @@ -128,7 +132,9 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_IncreaseSize() { ID: &testSKSClusterID, Name: &testSKSClusterName, }, - m: ts.p.manager, + m: ts.p.manager, + minSize: int(testSKSNodepoolSize), + maxSize: int(testComputeInstanceQuotaLimit), } ts.Require().NoError(nodeGroup.IncreaseSize(int(testInstancePoolSize + 1))) @@ -176,7 +182,9 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_DeleteNodes() { ID: &testSKSClusterID, Name: &testSKSClusterName, }, - m: ts.p.manager, + m: ts.p.manager, + minSize: int(testSKSNodepoolSize), + maxSize: int(testComputeInstanceQuotaLimit), } ts.Require().NoError(nodeGroup.DeleteNodes([]*apiv1.Node{node})) @@ -193,7 +201,9 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Id() { ID: &testSKSClusterID, Name: &testSKSClusterName, }, - m: ts.p.manager, + m: ts.p.manager, + minSize: int(testSKSNodepoolSize), + maxSize: int(testComputeInstanceQuotaLimit), } ts.Require().Equal(testInstancePoolID, nodeGroup.Id()) @@ -227,7 +237,9 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Nodes() { ID: &testSKSClusterID, Name: &testSKSClusterName, }, - m: ts.p.manager, + m: ts.p.manager, + minSize: int(testSKSNodepoolSize), + maxSize: int(testComputeInstanceQuotaLimit), } instances, err := nodeGroup.Nodes() @@ -247,7 +259,9 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Exist() { ID: &testSKSClusterID, Name: &testSKSClusterName, }, - m: ts.p.manager, + m: ts.p.manager, + minSize: int(testSKSNodepoolSize), + maxSize: int(testComputeInstanceQuotaLimit), } ts.Require().True(nodeGroup.Exist())