From 824c10885318fdc7fcc6912d8645bf055af91876 Mon Sep 17 00:00:00 2001 From: Michael Weibel Date: Fri, 19 Apr 2024 17:56:33 +0200 Subject: [PATCH] feat(clusterapi): per nodeGroup autoscaling options --- .../cloudprovider/clusterapi/README.md | 21 +++ .../clusterapi/clusterapi_nodegroup.go | 55 +++++++- .../clusterapi/clusterapi_nodegroup_test.go | 130 +++++++++++++++++- .../clusterapi/clusterapi_unstructured.go | 21 +-- .../clusterapi/clusterapi_utils.go | 24 ++++ 5 files changed, 238 insertions(+), 13 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/README.md b/cluster-autoscaler/cloudprovider/clusterapi/README.md index d6a73da14949..adf22919d34f 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/README.md +++ b/cluster-autoscaler/cloudprovider/clusterapi/README.md @@ -275,6 +275,27 @@ metadata: capacity.cluster-autoscaler.kubernetes.io/taints: "key1=value1:NoSchedule,key2=value2:NoExecute" ``` +#### Per-NodeGroup autoscaling options + +Custom autoscaling options per node group (MachineDeployment/MachinePool/MachineSet) can be specified as annoations with a common prefix: + +```yaml +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + annotations: + # overrides --scale-down-utilization-threshold global value for that specific MachineDeployment + cluster.x-k8s.io/autoscaling-options-scaledownutilizationthreshold: "0.5" + # overrides --scale-down-gpu-utilization-threshold global value for that specific MachineDeployment + cluster.x-k8s.io/autoscaling-options-scaledowngpuutilizationthreshold: "0.5" + # overrides --scale-down-unneeded-time global value for that specific MachineDeployment + cluster.x-k8s.io/autoscaling-options-scaledownunneededtime: "10m0s" + # overrides --scale-down-unready-time global value for that specific MachineDeployment + cluster.x-k8s.io/autoscaling-options-scaledownunreadytime: "20m0s" + # overrides --max-node-provision-time global value for that specific MachineDeployment + cluster.x-k8s.io/autoscaling-options-maxnodeprovisiontime: "20m0s" +``` + #### CPU Architecture awareness for single-arch clusters Users of single-arch non-amd64 clusters who are using scale from zero diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index eb66ff8ee8ed..009b9abfca30 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -20,6 +20,8 @@ import ( "fmt" "k8s.io/klog/v2" "math/rand" + "strconv" + "time" "github.com/pkg/errors" @@ -335,7 +337,28 @@ func (ng *nodegroup) Autoprovisioned() bool { // GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular // NodeGroup. Returning a nil will result in using default options. func (ng *nodegroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { - return nil, cloudprovider.ErrNotImplemented + options := ng.scalableResource.autoscalingOptions + if options == nil || len(options) == 0 { + return &defaults, nil + } + + if opt, ok := getFloat64Option(options, ng.Id(), config.DefaultScaleDownUtilizationThresholdKey); ok { + defaults.ScaleDownUtilizationThreshold = opt + } + if opt, ok := getFloat64Option(options, ng.Id(), config.DefaultScaleDownGpuUtilizationThresholdKey); ok { + defaults.ScaleDownGpuUtilizationThreshold = opt + } + if opt, ok := getDurationOption(options, ng.Id(), config.DefaultScaleDownUnneededTimeKey); ok { + defaults.ScaleDownUnneededTime = opt + } + if opt, ok := getDurationOption(options, ng.Id(), config.DefaultScaleDownUnreadyTimeKey); ok { + defaults.ScaleDownUnreadyTime = opt + } + if opt, ok := getDurationOption(options, ng.Id(), config.DefaultMaxNodeProvisionTimeKey); ok { + defaults.MaxNodeProvisionTime = opt + } + + return &defaults, nil } func newNodeGroupFromScalableResource(controller *machineController, unstructuredScalableResource *unstructured.Unstructured) (*nodegroup, error) { @@ -415,3 +438,33 @@ func setLabelIfNotEmpty(to, from map[string]string, key string) { to[key] = value } } + +func getFloat64Option(options map[string]string, templateName, name string) (float64, bool) { + raw, ok := options[name] + if !ok { + return 0, false + } + + option, err := strconv.ParseFloat(raw, 64) + if err != nil { + klog.Warningf("failed to convert autoscaling_options option %q (value %q) for scalable resource %q to float: %v", name, raw, templateName, err) + return 0, false + } + + return option, true +} + +func getDurationOption(options map[string]string, templateName, name string) (time.Duration, bool) { + raw, ok := options[name] + if !ok { + return 0, false + } + + option, err := time.ParseDuration(raw) + if err != nil { + klog.Warningf("failed to convert autoscaling_options option %q (value %q) for scalable resource %q to duration: %v", name, raw, templateName, err) + return 0, false + } + + return option, true +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go index 079291552245..1dc7e035b723 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go @@ -25,15 +25,15 @@ import ( "testing" "time" - "k8s.io/client-go/tools/cache" - + "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/client-go/tools/cache" ) const ( @@ -1507,3 +1507,127 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) { } } + +func TestNodeGroupGetOptions(t *testing.T) { + enableScaleAnnotations := map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + } + + defaultOptions := config.NodeGroupAutoscalingOptions{ + ScaleDownUtilizationThreshold: 0.1, + ScaleDownGpuUtilizationThreshold: 0.2, + ScaleDownUnneededTime: time.Second, + ScaleDownUnreadyTime: time.Minute, + MaxNodeProvisionTime: 15 * time.Minute, + } + + cases := []struct { + desc string + opts map[string]string + expected *config.NodeGroupAutoscalingOptions + }{ + { + desc: "return provided defaults on empty metadata", + opts: map[string]string{}, + expected: &defaultOptions, + }, + { + desc: "return specified options", + opts: map[string]string{ + config.DefaultScaleDownGpuUtilizationThresholdKey: "0.6", + config.DefaultScaleDownUtilizationThresholdKey: "0.7", + config.DefaultScaleDownUnneededTimeKey: "1h", + config.DefaultScaleDownUnreadyTimeKey: "30m", + config.DefaultMaxNodeProvisionTimeKey: "60m", + }, + expected: &config.NodeGroupAutoscalingOptions{ + ScaleDownGpuUtilizationThreshold: 0.6, + ScaleDownUtilizationThreshold: 0.7, + ScaleDownUnneededTime: time.Hour, + ScaleDownUnreadyTime: 30 * time.Minute, + MaxNodeProvisionTime: 60 * time.Minute, + }, + }, + { + desc: "complete partial options specs with defaults", + opts: map[string]string{ + config.DefaultScaleDownGpuUtilizationThresholdKey: "0.1", + config.DefaultScaleDownUnneededTimeKey: "1m", + }, + expected: &config.NodeGroupAutoscalingOptions{ + ScaleDownGpuUtilizationThreshold: 0.1, + ScaleDownUtilizationThreshold: defaultOptions.ScaleDownUtilizationThreshold, + ScaleDownUnneededTime: time.Minute, + ScaleDownUnreadyTime: defaultOptions.ScaleDownUnreadyTime, + MaxNodeProvisionTime: 15 * time.Minute, + }, + }, + { + desc: "keep defaults on unparsable options values", + opts: map[string]string{ + config.DefaultScaleDownGpuUtilizationThresholdKey: "foo", + config.DefaultScaleDownUnneededTimeKey: "bar", + }, + expected: &defaultOptions, + }, + } + + test := func(t *testing.T, testConfig *testConfig, expectedOptions *config.NodeGroupAutoscalingOptions) { + controller, stop := mustCreateTestController(t, testConfig) + defer stop() + + nodegroups, err := controller.nodeGroups() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if l := len(nodegroups); l != 1 { + t.Fatalf("expected 1 nodegroup, got %d", l) + } + + ng := nodegroups[0] + opts, err := ng.GetOptions(defaultOptions) + assert.NoError(t, err) + assert.Equal(t, expectedOptions, opts) + } + + for _, c := range cases { + t.Run(c.desc, func(t *testing.T) { + annotations := map[string]string{} + for k, v := range c.opts { + annotations[nodeGroupAutoscalingOptionsKeyPrefix+k] = v + } + + t.Run("MachineSet", func(t *testing.T) { + test( + t, + createMachineSetTestConfig( + testNamespace, + RandomString(6), + RandomString(6), + 10, + cloudprovider.JoinStringMaps(enableScaleAnnotations, annotations), + nil, + ), + c.expected, + ) + }) + + t.Run("MachineDeployment", func(t *testing.T) { + test( + t, + createMachineDeploymentTestConfig( + testNamespace, + RandomString(6), + RandomString(6), + 10, + cloudprovider.JoinStringMaps(enableScaleAnnotations, annotations), + nil, + ), + c.expected, + ) + }) + }) + } +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go index 4eec0e4bf7ec..f374dc7789f4 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go @@ -35,10 +35,11 @@ import ( ) type unstructuredScalableResource struct { - controller *machineController - unstructured *unstructured.Unstructured - maxSize int - minSize int + controller *machineController + unstructured *unstructured.Unstructured + maxSize int + minSize int + autoscalingOptions map[string]string } func (r unstructuredScalableResource) ID() string { @@ -353,16 +354,18 @@ func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*un } func newUnstructuredScalableResource(controller *machineController, u *unstructured.Unstructured) (*unstructuredScalableResource, error) { - minSize, maxSize, err := parseScalingBounds(u.GetAnnotations()) + annotations := u.GetAnnotations() + minSize, maxSize, err := parseScalingBounds(annotations) if err != nil { return nil, errors.Wrap(err, "error validating min/max annotations") } return &unstructuredScalableResource{ - controller: controller, - unstructured: u, - maxSize: maxSize, - minSize: minSize, + controller: controller, + unstructured: u, + maxSize: maxSize, + minSize: minSize, + autoscalingOptions: autoscalingOptions(annotations), }, nil } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go index 68c2be164436..a154a0d1128c 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go @@ -98,6 +98,8 @@ var ( nodeGroupMaxSizeAnnotationKey = getNodeGroupMaxSizeAnnotationKey() zeroQuantity = resource.MustParse("0") + nodeGroupAutoscalingOptionsKeyPrefix = getNodeGroupAutoscalingOptionsKeyPrefix() + systemArchitecture *SystemArchitecture once sync.Once ) @@ -132,6 +134,21 @@ func minSize(annotations map[string]string) (int, error) { return i, nil } +func autoscalingOptions(annotations map[string]string) map[string]string { + options := map[string]string{} + for k, v := range annotations { + if !strings.HasPrefix(k, nodeGroupAutoscalingOptionsKeyPrefix) { + continue + } + resourceName := strings.Split(k, nodeGroupAutoscalingOptionsKeyPrefix) + if len(resourceName) < 2 || resourceName[1] == "" || v == "" { + continue + } + options[resourceName[1]] = strings.ToLower(v) + } + return options +} + // maxSize returns the maximum value encoded in the annotations keyed // by nodeGroupMaxSizeAnnotationKey. Returns errMissingMaxAnnotation // if the annotation doesn't exist or errInvalidMaxAnnotation if the @@ -292,6 +309,13 @@ func getNodeGroupMaxSizeAnnotationKey() string { return key } +// getNodeGroupAutoscalingOptionsKeyPrefix returns the key that is used for autoscaling options +// per node group which override autoscaler default options. +func getNodeGroupAutoscalingOptionsKeyPrefix() string { + key := fmt.Sprintf("%s/autoscaling-options-", getCAPIGroup()) + return key +} + // getMachineDeleteAnnotationKey returns the key that is used by cluster-api for marking // machines to be deleted. This function is needed because the user can change the default // group name by using the CAPI_GROUP environment variable.