diff --git a/cluster-autoscaler/cloudprovider/clusterapi/README.md b/cluster-autoscaler/cloudprovider/clusterapi/README.md index 5aa5f0f23543..938dd951b193 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/README.md +++ b/cluster-autoscaler/cloudprovider/clusterapi/README.md @@ -163,6 +163,70 @@ There are two annotations that control how a cluster resource should be scaled: The autoscaler will monitor any `MachineSet` or `MachineDeployment` containing both of these annotations. +### Scale from zero support + +The Cluster API community has defined an opt-in method for infrastructure +providers to enable scaling from zero-sized node groups in the +[Opt-in Autoscaling from Zero enhancement](https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md). +As defined in the enhancement, each provider may add support for scaling from +zero to their provider, but they are not required to do so. If you are expecting +built-in support for scaling from zero, please check with the Cluster API +infrastructure providers that you are using. + +If your Cluster API provider does not have support for scaling from zero, you +may still use this feature through the capacity annotations. You may add these +annotations to your MachineDeployments, or MachineSets if you are not using +MachineDeployments (it is not needed on both), to instruct the cluster +autoscaler about the sizing of the nodes in the node group. At the minimum, +you must specify the CPU and memory annotations, these annotations should +match the expected capacity of the nodes created from the infrastructure. + +For example, if my MachineDeployment will create nodes that have "16000m" CPU, +"128G" memory, 2 NVidia GPUs, and can support 200 max pods, the folllowing +annotations will instruct the autoscaler how to expand the node group from +zero replicas: + +```yaml +apiVersion: cluster.x-k8s.io/v1alpha4 +kind: MachineDeployment +metadata: + annotations: + cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size: "5" + cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size: "0" + capacity.cluster-autoscaler.kubernetes.io/memory: "128G" + capacity.cluster-autoscaler.kubernetes.io/cpu: "16" + capacity.cluster-autoscaler.kubernetes.io/gpu-type: "nvidia.com/gpu" + capacity.cluster-autoscaler.kubernetes.io/gpu-count: "2" + capacity.cluster-autoscaler.kubernetes.io/maxPods: "200" +``` + +*Note* the `maxPods` annotation will default to `110` if it is not supplied. +This value is inspired by the Kubernetes best practices +[Considerations for large clusters](https://kubernetes.io/docs/setup/best-practices/cluster-large/). + +#### RBAC changes for scaling from zero + +If you are using the opt-in support for scaling from zero as defined by the +Cluster API infrastructure provider, you will need to add the infrastructure +machine template types to your role permissions for the service account +associated with the cluster autoscaler deployment. The service account will +need permission to `get` and `list` the infrastructure machine templates for +your infrastructure provider. + +For example, when using the [Kubemark provider](https://github.com/kubernetes-sigs/cluster-api-provider-kubemark) +you will need to set the following permissions: + +```yaml +rules: + - apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - kubemarkmachinetemplates + verbs: + - get + - list +``` + ## Specifying a Custom Resource Group By default all Kubernetes resources consumed by the Cluster API provider will diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_autodiscovery_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_autodiscovery_test.go index ac454051eb2f..6d0a2db3d273 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_autodiscovery_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_autodiscovery_test.go @@ -204,17 +204,17 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch bool }{{ name: "no clustername, namespace, or label selector specified should match any MachineSet", - testSpec: createTestSpec(RandomString(6), RandomString(6), RandomString(6), 1, false, nil), + testSpec: createTestSpec(RandomString(6), RandomString(6), RandomString(6), 1, false, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{labelSelector: labels.NewSelector()}, shouldMatch: true, }, { name: "no clustername, namespace, or label selector specified should match any MachineDeployment", - testSpec: createTestSpec(RandomString(6), RandomString(6), RandomString(6), 1, true, nil), + testSpec: createTestSpec(RandomString(6), RandomString(6), RandomString(6), 1, true, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{labelSelector: labels.NewSelector()}, shouldMatch: true, }, { name: "clustername specified does not match MachineSet, namespace matches, no labels specified", - testSpec: createTestSpec("default", RandomString(6), RandomString(6), 1, false, nil), + testSpec: createTestSpec("default", RandomString(6), RandomString(6), 1, false, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", namespace: "default", @@ -223,7 +223,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: false, }, { name: "clustername specified does not match MachineDeployment, namespace matches, no labels specified", - testSpec: createTestSpec("default", RandomString(6), RandomString(6), 1, true, nil), + testSpec: createTestSpec("default", RandomString(6), RandomString(6), 1, true, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", namespace: "default", @@ -232,7 +232,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: false, }, { name: "namespace specified does not match MachineSet, clusterName matches, no labels specified", - testSpec: createTestSpec(RandomString(6), "foo", RandomString(6), 1, false, nil), + testSpec: createTestSpec(RandomString(6), "foo", RandomString(6), 1, false, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", namespace: "default", @@ -241,7 +241,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: false, }, { name: "clustername specified does not match MachineDeployment, namespace matches, no labels specified", - testSpec: createTestSpec(RandomString(6), "foo", RandomString(6), 1, true, nil), + testSpec: createTestSpec(RandomString(6), "foo", RandomString(6), 1, true, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", namespace: "default", @@ -250,7 +250,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: false, }, { name: "namespace and clusterName matches MachineSet, no labels specified", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", namespace: "default", @@ -259,7 +259,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: true, }, { name: "namespace and clusterName matches MachineDeployment, no labels specified", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", namespace: "default", @@ -268,7 +268,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: true, }, { name: "namespace and clusterName matches MachineSet, does not match label selector", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", namespace: "default", @@ -277,7 +277,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: false, }, { name: "namespace and clusterName matches MachineDeployment, does not match label selector", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil, nil), autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", namespace: "default", @@ -286,7 +286,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: false, }, { name: "namespace, clusterName, and label selector matches MachineSet", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil, nil), additionalLabels: map[string]string{"color": "green"}, autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", @@ -296,7 +296,7 @@ func Test_allowedByAutoDiscoverySpec(t *testing.T) { shouldMatch: true, }, { name: "namespace, clusterName, and label selector matches MachineDeployment", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil, nil), additionalLabels: map[string]string{"color": "green"}, autoDiscoveryConfig: &clusterAPIAutoDiscoveryConfig{ clusterName: "foo", diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go index 1cc7f1989be1..0f0004e447be 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go @@ -53,6 +53,7 @@ const ( resourceNameMachineSet = "machinesets" resourceNameMachineDeployment = "machinedeployments" failedMachinePrefix = "failed-machine-" + machineTemplateKind = "MachineTemplate" machineDeploymentKind = "MachineDeployment" machineSetKind = "MachineSet" machineKind = "Machine" @@ -80,6 +81,10 @@ type machineController struct { machineDeploymentsAvailable bool accessLock sync.Mutex autoDiscoverySpecs []*clusterAPIAutoDiscoveryConfig + // stopChannel is used for running the shared informers, and for starting + // informers associated with infrastructure machine templates that are + // discovered during operation. + stopChannel <-chan struct{} } func indexMachineByProviderID(obj interface{}) ([]string, error) { @@ -170,9 +175,9 @@ func (c *machineController) findMachineSetOwner(machineSet *unstructured.Unstruc // run starts shared informers and waits for the informer cache to // synchronize. -func (c *machineController) run(stopCh <-chan struct{}) error { - c.workloadInformerFactory.Start(stopCh) - c.managementInformerFactory.Start(stopCh) +func (c *machineController) run() error { + c.workloadInformerFactory.Start(c.stopChannel) + c.managementInformerFactory.Start(c.stopChannel) syncFuncs := []cache.InformerSynced{ c.nodeInformer.HasSynced, @@ -184,7 +189,7 @@ func (c *machineController) run(stopCh <-chan struct{}) error { } klog.V(4).Infof("waiting for caches to sync") - if !cache.WaitForCacheSync(stopCh, syncFuncs...) { + if !cache.WaitForCacheSync(c.stopChannel, syncFuncs...) { return fmt.Errorf("syncing caches failed") } @@ -327,6 +332,7 @@ func newMachineController( managementDiscoveryClient discovery.DiscoveryInterface, managementScaleClient scale.ScalesGetter, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, + stopChannel chan struct{}, ) (*machineController, error) { workloadInformerFactory := kubeinformers.NewSharedInformerFactory(workloadClient, 0) @@ -409,6 +415,7 @@ func newMachineController( machineResource: gvrMachine, machineDeploymentResource: gvrMachineDeployment, machineDeploymentsAvailable: machineDeploymentAvailable, + stopChannel: stopChannel, }, nil } @@ -708,3 +715,30 @@ func (c *machineController) allowedByAutoDiscoverySpecs(r *unstructured.Unstruct return false } + +// Get an infrastructure machine template given its GVR, name, and namespace. +func (c *machineController) getInfrastructureResource(resource schema.GroupVersionResource, name string, namespace string) (*unstructured.Unstructured, error) { + // get an informer for this type, this will create the informer if it does not exist + informer := c.managementInformerFactory.ForResource(resource) + // since this may be a new informer, we need to restart the informer factory + c.managementInformerFactory.Start(c.stopChannel) + // wait for the informer to sync + klog.V(4).Infof("waiting for cache sync on infrastructure resource") + if !cache.WaitForCacheSync(c.stopChannel, informer.Informer().HasSynced) { + return nil, fmt.Errorf("syncing cache on infrastructure resource failed") + } + // use the informer to get the object we want, this will use the informer cache if possible + obj, err := informer.Lister().ByNamespace(namespace).Get(name) + if err != nil { + klog.V(4).Infof("Unable to read infrastructure reference from informer, error: %v", err) + return nil, err + } + + infra, ok := obj.(*unstructured.Unstructured) + if !ok { + err := fmt.Errorf("Unable to convert infrastructure reference for %s/%s", namespace, name) + klog.V(4).Infof("%v", err) + return nil, err + } + return infra, err +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go index 5413de38a540..4db9b31c725f 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go @@ -43,6 +43,7 @@ import ( fakekube "k8s.io/client-go/kubernetes/fake" fakescale "k8s.io/client-go/scale/fake" clientgotesting "k8s.io/client-go/testing" + klog "k8s.io/klog/v2" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" ) @@ -55,12 +56,14 @@ type testConfig struct { namespace string machineDeployment *unstructured.Unstructured machineSet *unstructured.Unstructured + machineTemplate *unstructured.Unstructured machines []*unstructured.Unstructured nodes []*corev1.Node } type testSpec struct { annotations map[string]string + capacity map[string]string machineDeploymentName string machineSetName string clusterName string @@ -91,27 +94,40 @@ func mustCreateTestController(t *testing.T, testConfigs ...*testConfig) (*machin if config.machineDeployment != nil { machineObjects = append(machineObjects, config.machineDeployment) } + + if config.machineTemplate != nil { + machineObjects = append(machineObjects, config.machineTemplate) + } } kubeclientSet := fakekube.NewSimpleClientset(nodeObjects...) dynamicClientset := fakedynamic.NewSimpleDynamicClientWithCustomListKinds( runtime.NewScheme(), map[schema.GroupVersionResource]string{ - {Group: "cluster.x-k8s.io", Version: "v1alpha3", Resource: "machinedeployments"}: "kindList", - {Group: "cluster.x-k8s.io", Version: "v1alpha3", Resource: "machines"}: "kindList", - {Group: "cluster.x-k8s.io", Version: "v1alpha3", Resource: "machinesets"}: "kindList", - {Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "machinedeployments"}: "kindList", - {Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "machines"}: "kindList", - {Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "machinesets"}: "kindList", - {Group: "custom.x-k8s.io", Version: "v1beta1", Resource: "machinedeployments"}: "kindList", - {Group: "custom.x-k8s.io", Version: "v1beta1", Resource: "machines"}: "kindList", - {Group: "custom.x-k8s.io", Version: "v1beta1", Resource: "machinesets"}: "kindList", + {Group: "cluster.x-k8s.io", Version: "v1alpha3", Resource: "machinedeployments"}: "kindList", + {Group: "cluster.x-k8s.io", Version: "v1alpha3", Resource: "machines"}: "kindList", + {Group: "cluster.x-k8s.io", Version: "v1alpha3", Resource: "machinesets"}: "kindList", + {Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "machinedeployments"}: "kindList", + {Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "machines"}: "kindList", + {Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "machinesets"}: "kindList", + {Group: "custom.x-k8s.io", Version: "v1beta1", Resource: "machinedeployments"}: "kindList", + {Group: "custom.x-k8s.io", Version: "v1beta1", Resource: "machines"}: "kindList", + {Group: "custom.x-k8s.io", Version: "v1beta1", Resource: "machinesets"}: "kindList", + {Group: "infrastructure.cluster.x-k8s.io", Version: "v1beta1", Resource: "machinetemplates"}: "kindList", }, machineObjects..., ) discoveryClient := &fakediscovery.FakeDiscovery{ Fake: &clientgotesting.Fake{ Resources: []*metav1.APIResourceList{ + { + GroupVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + APIResources: []metav1.APIResource{ + { + Name: "machinetemplates", + }, + }, + }, { GroupVersion: fmt.Sprintf("%s/v1beta1", customCAPIGroup), APIResources: []metav1.APIResource{ @@ -234,13 +250,13 @@ func mustCreateTestController(t *testing.T, testConfigs ...*testConfig) (*machin } scaleClient.AddReactor("*", "*", scaleReactor) - controller, err := newMachineController(dynamicClientset, kubeclientSet, discoveryClient, scaleClient, cloudprovider.NodeGroupDiscoveryOptions{}) + stopCh := make(chan struct{}) + controller, err := newMachineController(dynamicClientset, kubeclientSet, discoveryClient, scaleClient, cloudprovider.NodeGroupDiscoveryOptions{}, stopCh) if err != nil { t.Fatal("failed to create test controller") } - stopCh := make(chan struct{}) - if err := controller.run(stopCh); err != nil { + if err := controller.run(); err != nil { t.Fatalf("failed to run controller: %v", err) } @@ -249,35 +265,36 @@ func mustCreateTestController(t *testing.T, testConfigs ...*testConfig) (*machin } } -func createMachineSetTestConfig(namespace, clusterName, namePrefix string, nodeCount int, annotations map[string]string) *testConfig { - return createTestConfigs(createTestSpecs(namespace, clusterName, namePrefix, 1, nodeCount, false, annotations)...)[0] +func createMachineSetTestConfig(namespace, clusterName, namePrefix string, nodeCount int, annotations map[string]string, capacity map[string]string) *testConfig { + return createTestConfigs(createTestSpecs(namespace, clusterName, namePrefix, 1, nodeCount, false, annotations, capacity)...)[0] } -func createMachineSetTestConfigs(namespace, clusterName, namePrefix string, configCount, nodeCount int, annotations map[string]string) []*testConfig { - return createTestConfigs(createTestSpecs(namespace, clusterName, namePrefix, configCount, nodeCount, false, annotations)...) +func createMachineSetTestConfigs(namespace, clusterName, namePrefix string, configCount, nodeCount int, annotations map[string]string, capacity map[string]string) []*testConfig { + return createTestConfigs(createTestSpecs(namespace, clusterName, namePrefix, configCount, nodeCount, false, annotations, capacity)...) } -func createMachineDeploymentTestConfig(namespace, clusterName, namePrefix string, nodeCount int, annotations map[string]string) *testConfig { - return createTestConfigs(createTestSpecs(namespace, clusterName, namePrefix, 1, nodeCount, true, annotations)...)[0] +func createMachineDeploymentTestConfig(namespace, clusterName, namePrefix string, nodeCount int, annotations map[string]string, capacity map[string]string) *testConfig { + return createTestConfigs(createTestSpecs(namespace, clusterName, namePrefix, 1, nodeCount, true, annotations, capacity)...)[0] } -func createMachineDeploymentTestConfigs(namespace, clusterName, namePrefix string, configCount, nodeCount int, annotations map[string]string) []*testConfig { - return createTestConfigs(createTestSpecs(namespace, clusterName, namePrefix, configCount, nodeCount, true, annotations)...) +func createMachineDeploymentTestConfigs(namespace, clusterName, namePrefix string, configCount, nodeCount int, annotations map[string]string, capacity map[string]string) []*testConfig { + return createTestConfigs(createTestSpecs(namespace, clusterName, namePrefix, configCount, nodeCount, true, annotations, capacity)...) } -func createTestSpecs(namespace, clusterName, namePrefix string, scalableResourceCount, nodeCount int, isMachineDeployment bool, annotations map[string]string) []testSpec { +func createTestSpecs(namespace, clusterName, namePrefix string, scalableResourceCount, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string) []testSpec { var specs []testSpec for i := 0; i < scalableResourceCount; i++ { - specs = append(specs, createTestSpec(namespace, clusterName, fmt.Sprintf("%s-%d", namePrefix, i), nodeCount, isMachineDeployment, annotations)) + specs = append(specs, createTestSpec(namespace, clusterName, fmt.Sprintf("%s-%d", namePrefix, i), nodeCount, isMachineDeployment, annotations, capacity)) } return specs } -func createTestSpec(namespace, clusterName, name string, nodeCount int, isMachineDeployment bool, annotations map[string]string) testSpec { +func createTestSpec(namespace, clusterName, name string, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string) testSpec { return testSpec{ annotations: annotations, + capacity: capacity, machineDeploymentName: name, machineSetName: name, clusterName: clusterName, @@ -316,6 +333,15 @@ func createTestConfigs(specs ...testSpec) []*testConfig { "spec": map[string]interface{}{ "clusterName": spec.clusterName, "replicas": int64(spec.nodeCount), + "template": map[string]interface{}{ + "spec": map[string]interface{}{ + "infrastructureRef": map[string]interface{}{ + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "kind": machineTemplateKind, + "name": "TestMachineTemplate", + }, + }, + }, }, "status": map[string]interface{}{}, }, @@ -345,6 +371,15 @@ func createTestConfigs(specs ...testSpec) []*testConfig { "spec": map[string]interface{}{ "clusterName": spec.clusterName, "replicas": int64(spec.nodeCount), + "template": map[string]interface{}{ + "spec": map[string]interface{}{ + "infrastructureRef": map[string]interface{}{ + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "kind": machineTemplateKind, + "name": "TestMachineTemplate", + }, + }, + }, }, "status": map[string]interface{}{}, }, @@ -371,6 +406,24 @@ func createTestConfigs(specs ...testSpec) []*testConfig { UID: config.machineSet.GetUID(), } + if spec.capacity != nil { + klog.V(4).Infof("adding capacity to machine template") + config.machineTemplate = &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "kind": machineTemplateKind, + "metadata": map[string]interface{}{ + "name": "TestMachineTemplate", + "namespace": spec.namespace, + "uid": "TestMachineTemplate", + }, + }, + } + unstructured.SetNestedStringMap(config.machineTemplate.Object, spec.capacity, "status", "capacity") + } else { + klog.V(4).Infof("not adding capacity") + } + for j := 0; j < spec.nodeCount; j++ { config.nodes[j], config.machines[j] = makeLinkedNodeAndMachine(j, spec.namespace, spec.clusterName, machineOwner, machineSetLabels) } @@ -585,7 +638,7 @@ func TestControllerFindMachine(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) if tc.name == "" { tc.name = testConfig.machines[0].GetName() } @@ -613,7 +666,7 @@ func TestControllerFindMachineOwner(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -662,7 +715,7 @@ func TestControllerFindMachineByProviderID(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -724,7 +777,7 @@ func TestControllerFindNodeByNodeName(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -824,7 +877,7 @@ func TestControllerListMachinesForScalableResource(t *testing.T) { testConfig1 := createMachineSetTestConfig(namespace, clusterName, RandomString(6), 5, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) // Construct a second set of objects and add the machines, // nodes and the additional machineset to the existing set of @@ -833,7 +886,7 @@ func TestControllerListMachinesForScalableResource(t *testing.T) { testConfig2 := createMachineSetTestConfig(namespace, clusterName, RandomString(6), 5, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) test(t, testConfig1, testConfig2) }) @@ -844,7 +897,7 @@ func TestControllerListMachinesForScalableResource(t *testing.T) { testConfig1 := createMachineDeploymentTestConfig(namespace, clusterName, RandomString(6), 5, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) // Construct a second set of objects and add the machines, // nodes, machineset, and the additional machineset to the existing set of @@ -853,7 +906,7 @@ func TestControllerListMachinesForScalableResource(t *testing.T) { testConfig2 := createMachineDeploymentTestConfig(namespace, clusterName, RandomString(6), 5, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) test(t, testConfig1, testConfig2) }) @@ -884,7 +937,7 @@ func TestControllerLookupNodeGroupForNonExistentNode(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) test(t, testConfig) }) @@ -892,7 +945,7 @@ func TestControllerLookupNodeGroupForNonExistentNode(t *testing.T) { testConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) test(t, testConfig) }) } @@ -923,7 +976,7 @@ func TestControllerNodeGroupForNodeWithMissingMachineOwner(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) test(t, testConfig) }) @@ -931,7 +984,7 @@ func TestControllerNodeGroupForNodeWithMissingMachineOwner(t *testing.T) { testConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) test(t, testConfig) }) } @@ -940,7 +993,7 @@ func TestControllerNodeGroupForNodeWithMissingSetMachineOwner(t *testing.T) { testConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -982,7 +1035,7 @@ func TestControllerNodeGroupForNodeWithPositiveScalingBounds(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "1", - }) + }, nil) test(t, testConfig) }) @@ -990,7 +1043,7 @@ func TestControllerNodeGroupForNodeWithPositiveScalingBounds(t *testing.T) { testConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "1", - }) + }, nil) test(t, testConfig) }) } @@ -1022,14 +1075,14 @@ func TestControllerNodeGroups(t *testing.T) { assertNodegroupLen(t, controller, 0) // Test #2: add 5 machineset-based nodegroups - machineSetConfigs := createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 5, 1, annotations) + machineSetConfigs := createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 5, 1, annotations, nil) if err := addTestConfigs(t, controller, machineSetConfigs...); err != nil { t.Fatalf("unexpected error: %v", err) } assertNodegroupLen(t, controller, 5) // Test #2: add 2 machinedeployment-based nodegroups - machineDeploymentConfigs := createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 2, 1, annotations) + machineDeploymentConfigs := createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 2, 1, annotations, nil) if err := addTestConfigs(t, controller, machineDeploymentConfigs...); err != nil { t.Fatalf("unexpected error: %v", err) } @@ -1053,14 +1106,14 @@ func TestControllerNodeGroups(t *testing.T) { } // Test #5: machineset with no scaling bounds results in no nodegroups - machineSetConfigs = createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 5, 1, annotations) + machineSetConfigs = createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 5, 1, annotations, nil) if err := addTestConfigs(t, controller, machineSetConfigs...); err != nil { t.Fatalf("unexpected error: %v", err) } assertNodegroupLen(t, controller, 0) // Test #6: machinedeployment with no scaling bounds results in no nodegroups - machineDeploymentConfigs = createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 2, 1, annotations) + machineDeploymentConfigs = createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 2, 1, annotations, nil) if err := addTestConfigs(t, controller, machineDeploymentConfigs...); err != nil { t.Fatalf("unexpected error: %v", err) } @@ -1072,7 +1125,7 @@ func TestControllerNodeGroups(t *testing.T) { } // Test #7: machineset with bad scaling bounds results in an error and no nodegroups - machineSetConfigs = createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 5, 1, annotations) + machineSetConfigs = createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 5, 1, annotations, nil) if err := addTestConfigs(t, controller, machineSetConfigs...); err != nil { t.Fatalf("unexpected error: %v", err) } @@ -1081,7 +1134,7 @@ func TestControllerNodeGroups(t *testing.T) { } // Test #8: machinedeployment with bad scaling bounds results in an error and no nodegroups - machineDeploymentConfigs = createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 2, 1, annotations) + machineDeploymentConfigs = createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 2, 1, annotations, nil) if err := addTestConfigs(t, controller, machineDeploymentConfigs...); err != nil { t.Fatalf("unexpected error: %v", err) } @@ -1145,13 +1198,13 @@ func TestControllerNodeGroupsNodeCount(t *testing.T) { t.Run("MachineSet", func(t *testing.T) { for _, tc := range testCases { - test(t, tc, createMachineSetTestConfigs(RandomString(6), RandomString(6), RandomString(6), tc.nodeGroups, tc.nodesPerGroup, annotations)) + test(t, tc, createMachineSetTestConfigs(RandomString(6), RandomString(6), RandomString(6), tc.nodeGroups, tc.nodesPerGroup, annotations, nil)) } }) t.Run("MachineDeployment", func(t *testing.T) { for _, tc := range testCases { - test(t, tc, createMachineDeploymentTestConfigs(RandomString(6), RandomString(6), RandomString(6), tc.nodeGroups, tc.nodesPerGroup, annotations)) + test(t, tc, createMachineDeploymentTestConfigs(RandomString(6), RandomString(6), RandomString(6), tc.nodeGroups, tc.nodesPerGroup, annotations, nil)) } }) } @@ -1160,7 +1213,7 @@ func TestControllerFindMachineFromNodeAnnotation(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -1208,7 +1261,7 @@ func TestControllerMachineSetNodeNamesWithoutLinkage(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 3, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -1250,7 +1303,7 @@ func TestControllerMachineSetNodeNamesUsingProviderID(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 3, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -1302,7 +1355,7 @@ func TestControllerMachineSetNodeNamesUsingStatusNodeRefName(t *testing.T) { testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 3, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -1393,7 +1446,7 @@ func TestControllerGetAPIVersionGroupWithMachineDeployments(t *testing.T) { testConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "1", - }) + }, nil) if err := os.Setenv(CAPIGroupEnvVar, customCAPIGroup); err != nil { t.Fatalf("unexpected error: %v", err) } @@ -1663,7 +1716,7 @@ func Test_machineController_allowedByAutoDiscoverySpecs(t *testing.T) { shouldMatch bool }{{ name: "autodiscovery specs includes permissive spec that should match any MachineSet", - testSpec: createTestSpec(RandomString(6), RandomString(6), RandomString(6), 1, false, nil), + testSpec: createTestSpec(RandomString(6), RandomString(6), RandomString(6), 1, false, nil, nil), autoDiscoverySpecs: []*clusterAPIAutoDiscoveryConfig{ {labelSelector: labels.NewSelector()}, {clusterName: "foo", namespace: "bar", labelSelector: labels.Nothing()}, @@ -1671,7 +1724,7 @@ func Test_machineController_allowedByAutoDiscoverySpecs(t *testing.T) { shouldMatch: true, }, { name: "autodiscovery specs includes permissive spec that should match any MachineDeployment", - testSpec: createTestSpec(RandomString(6), RandomString(6), RandomString(6), 1, true, nil), + testSpec: createTestSpec(RandomString(6), RandomString(6), RandomString(6), 1, true, nil, nil), autoDiscoverySpecs: []*clusterAPIAutoDiscoveryConfig{ {labelSelector: labels.NewSelector()}, {clusterName: "foo", namespace: "bar", labelSelector: labels.Nothing()}, @@ -1679,7 +1732,7 @@ func Test_machineController_allowedByAutoDiscoverySpecs(t *testing.T) { shouldMatch: true, }, { name: "autodiscovery specs includes a restrictive spec that should match specific MachineSet", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil, nil), additionalLabels: map[string]string{"color": "green"}, autoDiscoverySpecs: []*clusterAPIAutoDiscoveryConfig{ {clusterName: "foo", namespace: "default", labelSelector: labels.SelectorFromSet(labels.Set{"color": "green"})}, @@ -1688,7 +1741,7 @@ func Test_machineController_allowedByAutoDiscoverySpecs(t *testing.T) { shouldMatch: true, }, { name: "autodiscovery specs includes a restrictive spec that should match specific MachineDeployment", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil, nil), additionalLabels: map[string]string{"color": "green"}, autoDiscoverySpecs: []*clusterAPIAutoDiscoveryConfig{ {clusterName: "foo", namespace: "default", labelSelector: labels.SelectorFromSet(labels.Set{"color": "green"})}, @@ -1697,7 +1750,7 @@ func Test_machineController_allowedByAutoDiscoverySpecs(t *testing.T) { shouldMatch: true, }, { name: "autodiscovery specs does not include any specs that should match specific MachineSet", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, false, nil, nil), additionalLabels: map[string]string{"color": "green"}, autoDiscoverySpecs: []*clusterAPIAutoDiscoveryConfig{ {clusterName: "test", namespace: "default", labelSelector: labels.SelectorFromSet(labels.Set{"color": "blue"})}, @@ -1706,7 +1759,7 @@ func Test_machineController_allowedByAutoDiscoverySpecs(t *testing.T) { shouldMatch: false, }, { name: "autodiscovery specs does not include any specs that should match specific MachineDeployment", - testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil), + testSpec: createTestSpec("default", "foo", RandomString(6), 1, true, nil, nil), additionalLabels: map[string]string{"color": "green"}, autoDiscoverySpecs: []*clusterAPIAutoDiscoveryConfig{ {clusterName: "test", namespace: "default", labelSelector: labels.SelectorFromSet(labels.Set{"color": "blue"})}, @@ -1736,9 +1789,9 @@ func Test_machineController_allowedByAutoDiscoverySpecs(t *testing.T) { } func Test_machineController_listScalableResources(t *testing.T) { - uniqueMDConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, nil) + uniqueMDConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, nil, nil) - mdTestConfigs := createMachineDeploymentTestConfigs(RandomString(6), RandomString(6), RandomString(6), 5, 1, nil) + mdTestConfigs := createMachineDeploymentTestConfigs(RandomString(6), RandomString(6), RandomString(6), 5, 1, nil, nil) mdTestConfigs = append(mdTestConfigs, uniqueMDConfig) allMachineDeployments := make([]*unstructured.Unstructured, 0, len(mdTestConfigs)) @@ -1746,9 +1799,9 @@ func Test_machineController_listScalableResources(t *testing.T) { allMachineDeployments = append(allMachineDeployments, mdTestConfigs[i].machineDeployment) } - uniqueMSConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, nil) + uniqueMSConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, nil, nil) - msTestConfigs := createMachineSetTestConfigs(RandomString(6), RandomString(6), RandomString(6), 5, 1, nil) + msTestConfigs := createMachineSetTestConfigs(RandomString(6), RandomString(6), RandomString(6), 5, 1, nil, nil) msTestConfigs = append(msTestConfigs, uniqueMSConfig) allMachineSets := make([]*unstructured.Unstructured, 0, len(msTestConfigs)) @@ -1851,12 +1904,12 @@ func Test_machineController_nodeGroupForNode(t *testing.T) { uniqueMDConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) mdTestConfigs := createMachineDeploymentTestConfigs(RandomString(6), RandomString(6), RandomString(6), 5, 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) mdTestConfigs = append(mdTestConfigs, uniqueMDConfig) allMachineDeployments := make([]*unstructured.Unstructured, 0, len(mdTestConfigs)) @@ -1867,12 +1920,12 @@ func Test_machineController_nodeGroupForNode(t *testing.T) { uniqueMSConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) msTestConfigs := createMachineSetTestConfigs(RandomString(6), RandomString(6), RandomString(6), 5, 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) msTestConfigs = append(msTestConfigs, uniqueMSConfig) allMachineSets := make([]*unstructured.Unstructured, 0, len(msTestConfigs)) @@ -1957,12 +2010,12 @@ func Test_machineController_nodeGroups(t *testing.T) { uniqueMDConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) mdTestConfigs := createMachineDeploymentTestConfigs(RandomString(6), RandomString(6), RandomString(6), 5, 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) mdTestConfigs = append(mdTestConfigs, uniqueMDConfig) allMachineDeployments := make([]*unstructured.Unstructured, 0, len(mdTestConfigs)) @@ -1973,12 +2026,12 @@ func Test_machineController_nodeGroups(t *testing.T) { uniqueMSConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) msTestConfigs := createMachineSetTestConfigs(RandomString(6), RandomString(6), RandomString(6), 5, 1, map[string]string{ nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", - }) + }, nil) msTestConfigs = append(msTestConfigs, uniqueMSConfig) allMachineSets := make([]*unstructured.Unstructured, 0, len(msTestConfigs)) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index 87e7803571f6..383d6e886585 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -18,10 +18,14 @@ package clusterapi import ( "fmt" + "math/rand" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + kubeletapis "k8s.io/kubelet/pkg/apis" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" @@ -30,6 +34,11 @@ import ( const ( debugFormat = "%s (min: %d, max: %d, replicas: %d)" + + // The default for the maximum number of pods is inspired by the Kubernetes + // best practices documentation for large clusters. + // see https://kubernetes.io/docs/setup/best-practices/cluster-large/ + defaultMaxPods = 110 ) type nodegroup struct { @@ -234,7 +243,58 @@ func (ng *nodegroup) Nodes() ([]cloudprovider.Instance, error) { // node by default, using manifest (most likely only kube-proxy). // Implementation optional. func (ng *nodegroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { - return nil, cloudprovider.ErrNotImplemented + if !ng.scalableResource.CanScaleFromZero() { + return nil, cloudprovider.ErrNotImplemented + } + + capacity, err := ng.scalableResource.InstanceCapacity() + if err != nil { + return nil, err + } + + nodeName := fmt.Sprintf("%s-asg-%d", ng.scalableResource.Name(), rand.Int63()) + node := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Labels: map[string]string{}, + }, + } + + node.Status.Capacity = capacity + node.Status.Allocatable = capacity + node.Status.Conditions = cloudprovider.BuildReadyConditions() + node.Spec.Taints = ng.scalableResource.Taints() + + node.Labels, err = ng.buildTemplateLabels(nodeName) + if err != nil { + return nil, err + } + + nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(ng.scalableResource.Name())) + nodeInfo.SetNode(&node) + + return nodeInfo, nil +} + +func (ng *nodegroup) buildTemplateLabels(nodeName string) (map[string]string, error) { + labels := cloudprovider.JoinStringMaps(ng.scalableResource.Labels(), buildGenericLabels(nodeName)) + + nodes, err := ng.Nodes() + if err != nil { + return nil, err + } + + if len(nodes) > 0 { + node, err := ng.machineController.findNodeByProviderID(normalizedProviderString(nodes[0].Id)) + if err != nil { + return nil, err + } + + if node != nil { + labels = cloudprovider.JoinStringMaps(labels, extractNodeLabels(node)) + } + } + return labels, nil } // Exist checks if the node group really exists on the cloud nodegroup @@ -289,9 +349,9 @@ func newNodeGroupFromScalableResource(controller *machineController, unstructure return nil, err } - // We don't scale from 0 so nodes must belong to a nodegroup - // that has a scale size of at least 1. - if found && replicas == 0 { + // Ensure that if the nodegroup has 0 replicas it is capable + // of scaling before adding it. + if found && replicas == 0 && !scalableResource.CanScaleFromZero() { return nil, nil } @@ -305,3 +365,47 @@ func newNodeGroupFromScalableResource(controller *machineController, unstructure scalableResource: scalableResource, }, nil } + +func buildGenericLabels(nodeName string) map[string]string { + // TODO revisit this function and add an explanation about what these + // labels are used for, or remove them if not necessary + m := make(map[string]string) + m[kubeletapis.LabelArch] = cloudprovider.DefaultArch + m[corev1.LabelArchStable] = cloudprovider.DefaultArch + + m[kubeletapis.LabelOS] = cloudprovider.DefaultOS + m[corev1.LabelOSStable] = cloudprovider.DefaultOS + + m[corev1.LabelHostname] = nodeName + return m +} + +// extract a predefined list of labels from the existing node +func extractNodeLabels(node *corev1.Node) map[string]string { + m := make(map[string]string) + if node.Labels == nil { + return m + } + + setLabelIfNotEmpty(m, node.Labels, kubeletapis.LabelArch) + setLabelIfNotEmpty(m, node.Labels, corev1.LabelArchStable) + + setLabelIfNotEmpty(m, node.Labels, kubeletapis.LabelOS) + setLabelIfNotEmpty(m, node.Labels, corev1.LabelOSStable) + + setLabelIfNotEmpty(m, node.Labels, corev1.LabelInstanceType) + setLabelIfNotEmpty(m, node.Labels, corev1.LabelInstanceTypeStable) + + setLabelIfNotEmpty(m, node.Labels, corev1.LabelZoneRegion) + setLabelIfNotEmpty(m, node.Labels, corev1.LabelZoneRegionStable) + + setLabelIfNotEmpty(m, node.Labels, corev1.LabelZoneFailureDomain) + + return m +} + +func setLabelIfNotEmpty(to, from map[string]string, key string) { + if value := from[key]; value != "" { + to[key] = value + } +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go index 42eef0957261..a39fccc4a52c 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go @@ -19,19 +19,21 @@ package clusterapi import ( "context" "fmt" - "k8s.io/client-go/tools/cache" "path" "sort" "strings" "testing" "time" + "k8s.io/client-go/tools/cache" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" ) const ( @@ -178,10 +180,6 @@ func TestNodeGroupNewNodeGroupConstructor(t *testing.T) { t.Errorf("expected %q, got %q", expectedDebug, ng.Debug()) } - if _, err := ng.TemplateNodeInfo(); err != cloudprovider.ErrNotImplemented { - t.Error("expected error") - } - if exists := ng.Exist(); !exists { t.Errorf("expected %t, got %t", true, exists) } @@ -204,7 +202,7 @@ func TestNodeGroupNewNodeGroupConstructor(t *testing.T) { t.Run("MachineSet", func(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - test(t, tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), tc.nodeCount, tc.annotations)) + test(t, tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), tc.nodeCount, tc.annotations, nil)) }) } }) @@ -212,7 +210,7 @@ func TestNodeGroupNewNodeGroupConstructor(t *testing.T) { t.Run("MachineDeployment", func(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - test(t, tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), tc.nodeCount, tc.annotations)) + test(t, tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), tc.nodeCount, tc.annotations, nil)) }) } }) @@ -296,7 +294,7 @@ func TestNodeGroupIncreaseSizeErrors(t *testing.T) { nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", } - test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) } }) @@ -308,7 +306,7 @@ func TestNodeGroupIncreaseSizeErrors(t *testing.T) { nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", } - test(t, &tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) } }) @@ -374,7 +372,7 @@ func TestNodeGroupIncreaseSize(t *testing.T) { expected: 4, delta: 1, } - test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) t.Run("MachineDeployment", func(t *testing.T) { @@ -384,7 +382,7 @@ func TestNodeGroupIncreaseSize(t *testing.T) { expected: 4, delta: 1, } - test(t, &tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) } @@ -516,7 +514,7 @@ func TestNodeGroupDecreaseTargetSize(t *testing.T) { delta: -1, expectedError: true, } - test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) t.Run("MachineSet", func(t *testing.T) { @@ -527,7 +525,7 @@ func TestNodeGroupDecreaseTargetSize(t *testing.T) { expected: 3, delta: -1, } - test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) t.Run("MachineDeployment", func(t *testing.T) { @@ -539,7 +537,7 @@ func TestNodeGroupDecreaseTargetSize(t *testing.T) { delta: -1, expectedError: true, } - test(t, &tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) } @@ -621,7 +619,7 @@ func TestNodeGroupDecreaseSizeErrors(t *testing.T) { nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", } - test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) } }) @@ -633,7 +631,7 @@ func TestNodeGroupDecreaseSizeErrors(t *testing.T) { nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", } - test(t, &tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations)) + test(t, &tc, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), int(tc.initial), annotations, nil)) }) } }) @@ -711,17 +709,37 @@ func TestNodeGroupDeleteNodes(t *testing.T) { // sorting and the expected semantics in test() will fail. t.Run("MachineSet", func(t *testing.T) { - test(t, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ - nodeGroupMinSizeAnnotationKey: "1", - nodeGroupMaxSizeAnnotationKey: "10", - })) + test( + t, + createMachineSetTestConfig( + RandomString(6), + RandomString(6), + RandomString(6), + 10, + map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }, + nil, + ), + ) }) t.Run("MachineDeployment", func(t *testing.T) { - test(t, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ - nodeGroupMinSizeAnnotationKey: "1", - nodeGroupMaxSizeAnnotationKey: "10", - })) + test( + t, + createMachineDeploymentTestConfig( + RandomString(6), + RandomString(6), + RandomString(6), + 10, + map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }, + nil, + ), + ) }) } @@ -792,16 +810,16 @@ func TestNodeGroupMachineSetDeleteNodesWithMismatchedNodes(t *testing.T) { t.Run("MachineSet", func(t *testing.T) { namespace := RandomString(6) clusterName := RandomString(6) - testConfig0 := createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 1, 2, annotations) - testConfig1 := createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 1, 2, annotations) + testConfig0 := createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 1, 2, annotations, nil) + testConfig1 := createMachineSetTestConfigs(namespace, clusterName, RandomString(6), 1, 2, annotations, nil) test(t, 2, append(testConfig0, testConfig1...)) }) t.Run("MachineDeployment", func(t *testing.T) { namespace := RandomString(6) clusterName := RandomString(6) - testConfig0 := createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 1, 2, annotations) - testConfig1 := createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 1, 2, annotations) + testConfig0 := createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 1, 2, annotations, nil) + testConfig1 := createMachineDeploymentTestConfigs(namespace, clusterName, RandomString(6), 1, 2, annotations, nil) test(t, 2, append(testConfig0, testConfig1...)) }) } @@ -971,17 +989,37 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) { // sorting and the expected semantics in test() will fail. t.Run("MachineSet", func(t *testing.T) { - test(t, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ - nodeGroupMinSizeAnnotationKey: "1", - nodeGroupMaxSizeAnnotationKey: "10", - })) + test( + t, + createMachineSetTestConfig( + RandomString(6), + RandomString(6), + RandomString(6), + 10, + map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }, + nil, + ), + ) }) t.Run("MachineDeployment", func(t *testing.T) { - test(t, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ - nodeGroupMinSizeAnnotationKey: "1", - nodeGroupMaxSizeAnnotationKey: "10", - })) + test( + t, + createMachineDeploymentTestConfig( + RandomString(6), + RandomString(6), + RandomString(6), + 10, + map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }, + nil, + ), + ) }) } @@ -1100,17 +1138,37 @@ func TestNodeGroupDeleteNodesSequential(t *testing.T) { // sorting and the expected semantics in test() will fail. t.Run("MachineSet", func(t *testing.T) { - test(t, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ - nodeGroupMinSizeAnnotationKey: "1", - nodeGroupMaxSizeAnnotationKey: "10", - })) + test( + t, + createMachineSetTestConfig( + RandomString(6), + RandomString(6), + RandomString(6), + 10, + map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }, + nil, + ), + ) }) t.Run("MachineDeployment", func(t *testing.T) { - test(t, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ - nodeGroupMinSizeAnnotationKey: "1", - nodeGroupMaxSizeAnnotationKey: "10", - })) + test( + t, + createMachineDeploymentTestConfig( + RandomString(6), + RandomString(6), + RandomString(6), + 10, + map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }, + nil, + ), + ) }) } @@ -1181,16 +1239,252 @@ func TestNodeGroupWithFailedMachine(t *testing.T) { // sorting and the expected semantics in test() will fail. t.Run("MachineSet", func(t *testing.T) { - test(t, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ - nodeGroupMinSizeAnnotationKey: "1", - nodeGroupMaxSizeAnnotationKey: "10", - })) + test( + t, + createMachineSetTestConfig( + RandomString(6), + RandomString(6), + RandomString(6), + 10, + map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }, + nil, + ), + ) }) t.Run("MachineDeployment", func(t *testing.T) { - test(t, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ - nodeGroupMinSizeAnnotationKey: "1", - nodeGroupMaxSizeAnnotationKey: "10", - })) + test( + t, + createMachineDeploymentTestConfig( + RandomString(6), + RandomString(6), + RandomString(6), + 10, + map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + }, + nil, + ), + ) }) } + +func TestNodeGroupTemplateNodeInfo(t *testing.T) { + enableScaleAnnotations := map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + } + + type testCaseConfig struct { + nodeLabels map[string]string + nodegroupLabels map[string]string + includeNodes bool + expectedErr error + expectedCapacity map[corev1.ResourceName]int64 + expectedNodeLabels map[string]string + } + + testCases := []struct { + name string + nodeGroupAnnotations map[string]string + config testCaseConfig + }{ + { + name: "When the NodeGroup cannot scale from zero", + config: testCaseConfig{ + expectedErr: cloudprovider.ErrNotImplemented, + }, + }, + { + name: "When the NodeGroup can scale from zero", + nodeGroupAnnotations: map[string]string{ + memoryKey: "2048Mi", + cpuKey: "2", + gpuTypeKey: gpuapis.ResourceNvidiaGPU, + gpuCountKey: "1", + }, + config: testCaseConfig{ + expectedErr: nil, + expectedCapacity: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 2, + corev1.ResourceMemory: 2048 * 1024 * 1024, + corev1.ResourcePods: 110, + gpuapis.ResourceNvidiaGPU: 1, + }, + expectedNodeLabels: map[string]string{ + "kubernetes.io/os": "linux", + "beta.kubernetes.io/os": "linux", + "kubernetes.io/arch": "amd64", + "beta.kubernetes.io/arch": "amd64", + }, + }, + }, + { + name: "When the NodeGroup can scale from zero and the nodegroup adds labels to the Node", + nodeGroupAnnotations: map[string]string{ + memoryKey: "2048Mi", + cpuKey: "2", + }, + config: testCaseConfig{ + expectedErr: nil, + nodegroupLabels: map[string]string{ + "nodeGroupLabel": "value", + "anotherLabel": "anotherValue", + }, + expectedCapacity: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 2, + corev1.ResourceMemory: 2048 * 1024 * 1024, + corev1.ResourcePods: 110, + }, + expectedNodeLabels: map[string]string{ + "kubernetes.io/os": "linux", + "beta.kubernetes.io/os": "linux", + "kubernetes.io/arch": "amd64", + "beta.kubernetes.io/arch": "amd64", + "nodeGroupLabel": "value", + "anotherLabel": "anotherValue", + }, + }, + }, + { + name: "When the NodeGroup can scale from zero and the Node still exists, it includes the known node labels", + nodeGroupAnnotations: map[string]string{ + memoryKey: "2048Mi", + cpuKey: "2", + }, + config: testCaseConfig{ + includeNodes: true, + expectedErr: nil, + nodeLabels: map[string]string{ + "kubernetes.io/os": "windows", + "kubernetes.io/arch": "arm64", + "node.kubernetes.io/instance-type": "instance1", + "anotherLabel": "nodeValue", // This should not be copied as it is not a well known label + }, + nodegroupLabels: map[string]string{ + "nodeGroupLabel": "value", + "anotherLabel": "nodeGroupValue", + }, + expectedCapacity: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 2, + corev1.ResourceMemory: 2048 * 1024 * 1024, + corev1.ResourcePods: 110, + }, + expectedNodeLabels: map[string]string{ + "kubernetes.io/os": "windows", + "beta.kubernetes.io/os": "linux", + "kubernetes.io/arch": "arm64", + "beta.kubernetes.io/arch": "amd64", + "nodeGroupLabel": "value", + "anotherLabel": "nodeGroupValue", + "node.kubernetes.io/instance-type": "instance1", + }, + }, + }, + } + + test := func(t *testing.T, testConfig *testConfig, config testCaseConfig) { + if testConfig.machineDeployment != nil { + unstructured.SetNestedStringMap(testConfig.machineDeployment.Object, config.nodegroupLabels, "spec", "template", "spec", "metadata", "labels") + } else { + unstructured.SetNestedStringMap(testConfig.machineSet.Object, config.nodegroupLabels, "spec", "template", "spec", "metadata", "labels") + } + + if config.includeNodes { + for i := range testConfig.nodes { + testConfig.nodes[i].SetLabels(config.nodeLabels) + } + } else { + testConfig.nodes = []*corev1.Node{} + } + + controller, stop := mustCreateTestController(t, testConfig) + defer stop() + + nodegroups, err := controller.nodeGroups() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if l := len(nodegroups); l != 1 { + t.Fatalf("expected 1 nodegroup, got %d", l) + } + + ng := nodegroups[0] + nodeInfo, err := ng.TemplateNodeInfo() + if config.expectedErr != nil { + if err != config.expectedErr { + t.Fatalf("expected error: %v, but got: %v", config.expectedErr, err) + } + return + } + + nodeAllocatable := nodeInfo.Node().Status.Allocatable + nodeCapacity := nodeInfo.Node().Status.Capacity + for resource, expectedCapacity := range config.expectedCapacity { + if gotAllocatable, ok := nodeAllocatable[resource]; !ok { + t.Errorf("Expected allocatable to have resource %q, resource not found", resource) + } else if gotAllocatable.Value() != expectedCapacity { + t.Errorf("Expected allocatable %q: %+v, Got: %+v", resource, expectedCapacity, gotAllocatable.Value()) + } + + if gotCapactiy, ok := nodeCapacity[resource]; !ok { + t.Errorf("Expected capacity to have resource %q, resource not found", resource) + } else if gotCapactiy.Value() != expectedCapacity { + t.Errorf("Expected capacity %q: %+v, Got: %+v", resource, expectedCapacity, gotCapactiy.Value()) + } + } + + // expectedNodeLabels won't have the hostname label as it is randomized, so +1 to its length + if len(nodeInfo.Node().GetLabels()) != len(config.expectedNodeLabels)+1 { + t.Errorf("Expected node labels to have len: %d, but got: %d", len(config.expectedNodeLabels)+1, len(nodeInfo.Node().GetLabels())) + } + for key, value := range nodeInfo.Node().GetLabels() { + // Exclude the hostname label as it is randomized + if key != corev1.LabelHostname { + if value != config.expectedNodeLabels[key] { + t.Errorf("Expected node label %q: %q, Got: %q", key, config.expectedNodeLabels[key], value) + } + } + } + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Run("MachineSet", func(t *testing.T) { + test( + t, + createMachineSetTestConfig( + testNamespace, + RandomString(6), + RandomString(6), + 10, + cloudprovider.JoinStringMaps(enableScaleAnnotations, tc.nodeGroupAnnotations), + nil, + ), + tc.config, + ) + }) + + t.Run("MachineDeployment", func(t *testing.T) { + test( + t, + createMachineDeploymentTestConfig( + testNamespace, + RandomString(6), + RandomString(6), + 10, + cloudprovider.JoinStringMaps(enableScaleAnnotations, tc.nodeGroupAnnotations), + nil, + ), + tc.config, + ) + }) + }) + } + +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go index 324aeea5bd22..290be55f5384 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go @@ -180,16 +180,16 @@ func BuildClusterAPI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupD klog.Fatalf("create scale client failed: %v", err) } - controller, err := newMachineController(managementClient, workloadClient, managementDiscoveryClient, managementScaleClient, do) - if err != nil { - klog.Fatal(err) - } - // Ideally this would be passed in but the builder is not // currently organised to do so. stopCh := make(chan struct{}) - if err := controller.run(stopCh); err != nil { + controller, err := newMachineController(managementClient, workloadClient, managementDiscoveryClient, managementScaleClient, do, stopCh) + if err != nil { + klog.Fatal(err) + } + + if err := controller.run(); err != nil { klog.Fatal(err) } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go index 488e85f67e40..f8ad7f4e215b 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go @@ -20,12 +20,17 @@ import ( "context" "fmt" "path" + "strings" "time" "github.com/pkg/errors" + apiv1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" + klog "k8s.io/klog/v2" ) type unstructuredScalableResource struct { @@ -163,6 +168,166 @@ func (r unstructuredScalableResource) MarkMachineForDeletion(machine *unstructur return updateErr } +func (r unstructuredScalableResource) Labels() map[string]string { + labels, found, err := unstructured.NestedStringMap(r.unstructured.Object, "spec", "template", "spec", "metadata", "labels") + if !found || err != nil { + return nil + } + return labels +} + +func (r unstructuredScalableResource) Taints() []apiv1.Taint { + taints, found, err := unstructured.NestedSlice(r.unstructured.Object, "spec", "template", "spec", "taints") + if !found || err != nil { + return nil + } + ret := make([]apiv1.Taint, len(taints)) + for i, t := range taints { + if v, ok := t.(apiv1.Taint); ok { + ret[i] = v + } else { + // if we cannot convert the interface to a Taint, return early with zero value + return nil + } + } + return ret +} + +// A node group can scale from zero if it can inform about the CPU and memory +// capacity of the nodes within the group. +func (r unstructuredScalableResource) CanScaleFromZero() bool { + capacity, err := r.InstanceCapacity() + if err != nil { + return false + } + // CPU and memory are the minimum necessary for scaling from zero + _, cpuOk := capacity[corev1.ResourceCPU] + _, memOk := capacity[corev1.ResourceMemory] + + return cpuOk && memOk +} + +// Inspect the annotations on the scalable resource, and the status.capacity +// field of the machine template infrastructure resource to build the projected +// capacity for this node group. The returned map will be empty if the +// provider does not support scaling from zero, or the annotations have not +// been added. +func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceName]resource.Quantity, error) { + capacityAnnotations := map[corev1.ResourceName]resource.Quantity{} + + cpu, err := r.InstanceCPUCapacityAnnotation() + if err != nil { + return nil, err + } + if !cpu.IsZero() { + capacityAnnotations[corev1.ResourceCPU] = cpu + } + + mem, err := r.InstanceMemoryCapacityAnnotation() + if err != nil { + return nil, err + } + if !mem.IsZero() { + capacityAnnotations[corev1.ResourceMemory] = mem + } + + gpuCount, err := r.InstanceGPUCapacityAnnotation() + if err != nil { + return nil, err + } + gpuType := r.InstanceGPUTypeAnnotation() + if !gpuCount.IsZero() && gpuType != "" { + capacityAnnotations[corev1.ResourceName(gpuType)] = gpuCount + } + + maxPods, err := r.InstanceMaxPodsCapacityAnnotation() + if err != nil { + return nil, err + } + if maxPods.IsZero() { + maxPods = *resource.NewQuantity(defaultMaxPods, resource.DecimalSI) + } + capacityAnnotations[corev1.ResourcePods] = maxPods + + infraObj, err := r.readInfrastructureReferenceResource() + if err != nil || infraObj == nil { + // because it is possible that the infrastructure provider does not implement + // the capacity in the infrastructure reference, if there are annotations we + // should return them here. + // Check against 1 here because the max pods is always set. + if len(capacityAnnotations) > 1 { + return capacityAnnotations, nil + } + return nil, err + } + capacityInfraStatus := resourceCapacityFromInfrastructureObject(infraObj) + + // The annotations should override any values from the status block of the machine template. + // We loop through the status block capacity first, then overwrite any values with the + // annotation capacities. + capacity := map[corev1.ResourceName]resource.Quantity{} + for k, v := range capacityInfraStatus { + capacity[k] = v + } + for k, v := range capacityAnnotations { + capacity[k] = v + } + + return capacity, nil +} + +func (r unstructuredScalableResource) InstanceCPUCapacityAnnotation() (resource.Quantity, error) { + return parseCPUCapacity(r.unstructured.GetAnnotations()) +} + +func (r unstructuredScalableResource) InstanceMemoryCapacityAnnotation() (resource.Quantity, error) { + return parseMemoryCapacity(r.unstructured.GetAnnotations()) +} + +func (r unstructuredScalableResource) InstanceGPUCapacityAnnotation() (resource.Quantity, error) { + return parseGPUCount(r.unstructured.GetAnnotations()) +} + +func (r unstructuredScalableResource) InstanceGPUTypeAnnotation() string { + return parseGPUType(r.unstructured.GetAnnotations()) +} + +func (r unstructuredScalableResource) InstanceMaxPodsCapacityAnnotation() (resource.Quantity, error) { + return parseMaxPodsCapacity(r.unstructured.GetAnnotations()) +} + +func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) { + infraref, found, err := unstructured.NestedStringMap(r.unstructured.Object, "spec", "template", "spec", "infrastructureRef") + if !found || err != nil { + return nil, nil + } + + apiversion, ok := infraref["apiVersion"] + if !ok { + return nil, nil + } + kind, ok := infraref["kind"] + if !ok { + return nil, nil + } + name, ok := infraref["name"] + if !ok { + return nil, nil + } + // kind needs to be lower case and plural + kind = fmt.Sprintf("%ss", strings.ToLower(kind)) + gvk := schema.FromAPIVersionAndKind(apiversion, kind) + res := schema.GroupVersionResource{Group: gvk.Group, Version: gvk.Version, Resource: gvk.Kind} + + infra, err := r.controller.getInfrastructureResource(res, name, r.Namespace()) + if err != nil { + klog.V(4).Infof("Unable to read infrastructure reference, error: %v", err) + return nil, err + } + + return infra, nil +} + func newUnstructuredScalableResource(controller *machineController, u *unstructured.Unstructured) (*unstructuredScalableResource, error) { minSize, maxSize, err := parseScalingBounds(u.GetAnnotations()) if err != nil { @@ -176,3 +341,21 @@ func newUnstructuredScalableResource(controller *machineController, u *unstructu minSize: minSize, }, nil } + +func resourceCapacityFromInfrastructureObject(infraobj *unstructured.Unstructured) map[corev1.ResourceName]resource.Quantity { + capacity := map[corev1.ResourceName]resource.Quantity{} + + infracap, found, err := unstructured.NestedStringMap(infraobj.Object, "status", "capacity") + if !found || err != nil { + return capacity + } + + for k, v := range infracap { + // if we cannot parse the quantity, don't add it to the capacity + if value, err := resource.ParseQuantity(v); err == nil { + capacity[corev1.ResourceName(k)] = value + } + } + + return capacity +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go index 7252b49f677f..49a06e6c3a50 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go @@ -19,11 +19,19 @@ package clusterapi import ( "context" "fmt" + "testing" + "time" + + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/client-go/tools/cache" - "testing" - "time" +) + +const ( + cpuStatusKey = "cpu" + memoryStatusKey = "memory" + nvidiaGpuStatusKey = "nvidia.com/gpu" ) func TestSetSize(t *testing.T) { @@ -82,6 +90,7 @@ func TestSetSize(t *testing.T) { nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", }, + nil, )) }) @@ -94,6 +103,7 @@ func TestSetSize(t *testing.T) { nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", }, + nil, )) }) } @@ -194,11 +204,11 @@ func TestReplicas(t *testing.T) { } t.Run("MachineSet", func(t *testing.T) { - test(t, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), initialReplicas, nil)) + test(t, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), initialReplicas, nil, nil)) }) t.Run("MachineDeployment", func(t *testing.T) { - test(t, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), initialReplicas, nil)) + test(t, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), initialReplicas, nil, nil)) }) } @@ -253,6 +263,7 @@ func TestSetSizeAndReplicas(t *testing.T) { nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", }, + nil, )) }) @@ -265,6 +276,204 @@ func TestSetSizeAndReplicas(t *testing.T) { nodeGroupMinSizeAnnotationKey: "1", nodeGroupMaxSizeAnnotationKey: "10", }, + nil, )) }) } + +func TestAnnotations(t *testing.T) { + cpuQuantity := resource.MustParse("2") + memQuantity := resource.MustParse("1024Mi") + gpuQuantity := resource.MustParse("1") + maxPodsQuantity := resource.MustParse("42") + annotations := map[string]string{ + cpuKey: cpuQuantity.String(), + memoryKey: memQuantity.String(), + gpuCountKey: gpuQuantity.String(), + maxPodsKey: maxPodsQuantity.String(), + } + + test := func(t *testing.T, testConfig *testConfig, testResource *unstructured.Unstructured) { + controller, stop := mustCreateTestController(t, testConfig) + defer stop() + + sr, err := newUnstructuredScalableResource(controller, testResource) + if err != nil { + t.Fatal(err) + } + + if cpu, err := sr.InstanceCPUCapacityAnnotation(); err != nil { + t.Fatal(err) + } else if cpuQuantity.Cmp(cpu) != 0 { + t.Errorf("expected %v, got %v", cpuQuantity, cpu) + } + + if mem, err := sr.InstanceMemoryCapacityAnnotation(); err != nil { + t.Fatal(err) + } else if memQuantity.Cmp(mem) != 0 { + t.Errorf("expected %v, got %v", memQuantity, mem) + } + + if gpu, err := sr.InstanceGPUCapacityAnnotation(); err != nil { + t.Fatal(err) + } else if gpuQuantity.Cmp(gpu) != 0 { + t.Errorf("expected %v, got %v", gpuQuantity, gpu) + } + + if maxPods, err := sr.InstanceMaxPodsCapacityAnnotation(); err != nil { + t.Fatal(err) + } else if maxPodsQuantity.Cmp(maxPods) != 0 { + t.Errorf("expected %v, got %v", maxPodsQuantity, maxPods) + } + } + + t.Run("MachineSet", func(t *testing.T) { + testConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, annotations, nil) + test(t, testConfig, testConfig.machineSet) + }) + + t.Run("MachineDeployment", func(t *testing.T) { + testConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, annotations, nil) + test(t, testConfig, testConfig.machineDeployment) + }) +} + +func TestCanScaleFromZero(t *testing.T) { + testConfigs := []struct { + name string + annotations map[string]string + capacity map[string]string + canScale bool + }{ + { + "can scale from zero", + map[string]string{ + cpuKey: "1", + memoryKey: "1024Mi", + }, + nil, + true, + }, + { + "with missing CPU info cannot scale from zero", + map[string]string{ + memoryKey: "1024Mi", + }, + nil, + false, + }, + { + "with missing Memory info cannot scale from zero", + map[string]string{ + cpuKey: "1", + }, + nil, + false, + }, + { + "with no information cannot scale from zero", + map[string]string{}, + nil, + false, + }, + { + "with capacity in machine template can scale from zero", + map[string]string{}, + map[string]string{ + cpuStatusKey: "1", + memoryStatusKey: "4G", + }, + true, + }, + { + "with missing cpu capacity in machine template cannot scale from zero", + map[string]string{}, + map[string]string{ + memoryStatusKey: "4G", + }, + false, + }, + { + "with missing memory capacity in machine template cannot scale from zero", + map[string]string{}, + map[string]string{ + cpuStatusKey: "1", + }, + false, + }, + { + "with both annotations and capacity in machine template can scale from zero", + map[string]string{ + cpuKey: "1", + memoryKey: "1024Mi", + }, + map[string]string{ + cpuStatusKey: "1", + memoryStatusKey: "4G", + }, + true, + }, + { + "with incomplete annotations and capacity in machine template cannot scale from zero", + map[string]string{ + cpuKey: "1", + }, + map[string]string{ + nvidiaGpuStatusKey: "1", + }, + false, + }, + { + "with complete information split across annotations and capacity in machine template can scale from zero", + map[string]string{ + cpuKey: "1", + }, + map[string]string{ + memoryStatusKey: "4G", + }, + true, + }, + } + + for _, tc := range testConfigs { + testname := fmt.Sprintf("MachineSet %s", tc.name) + t.Run(testname, func(t *testing.T) { + msTestConfig := createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, tc.annotations, tc.capacity) + controller, stop := mustCreateTestController(t, msTestConfig) + defer stop() + + testResource := msTestConfig.machineSet + + sr, err := newUnstructuredScalableResource(controller, testResource) + if err != nil { + t.Fatal(err) + } + + canScale := sr.CanScaleFromZero() + if canScale != tc.canScale { + t.Errorf("expected %v, got %v", tc.canScale, canScale) + } + }) + } + + for _, tc := range testConfigs { + testname := fmt.Sprintf("MachineDeployment %s", tc.name) + t.Run(testname, func(t *testing.T) { + msTestConfig := createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 1, tc.annotations, tc.capacity) + controller, stop := mustCreateTestController(t, msTestConfig) + defer stop() + + testResource := msTestConfig.machineDeployment + + sr, err := newUnstructuredScalableResource(controller, testResource) + if err != nil { + t.Fatal(err) + } + + canScale := sr.CanScaleFromZero() + if canScale != tc.canScale { + t.Errorf("expected %v, got %v", tc.canScale, canScale) + } + }) + } +} diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go index bcfcebb968f5..e16dfbcfece6 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go @@ -22,10 +22,23 @@ import ( "strings" "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) +const ( + deprecatedNodeGroupMinSizeAnnotationKey = "cluster.k8s.io/cluster-api-autoscaler-node-group-min-size" + deprecatedNodeGroupMaxSizeAnnotationKey = "cluster.k8s.io/cluster-api-autoscaler-node-group-max-size" + deprecatedClusterNameLabel = "cluster.k8s.io/cluster-name" + + cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu" + memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory" + gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type" + gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count" + maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods" +) + var ( // clusterNameLabel is the label applied to objects(Machine, MachineSet, MachineDeployment) // to identify which cluster they are owned by. Because the label can be @@ -66,6 +79,7 @@ var ( // variable, they are initialized here. nodeGroupMinSizeAnnotationKey = getNodeGroupMinSizeAnnotationKey() nodeGroupMaxSizeAnnotationKey = getNodeGroupMaxSizeAnnotationKey() + zeroQuantity = resource.MustParse("0") ) type normalizedProviderID string @@ -157,6 +171,61 @@ func normalizedProviderString(s string) normalizedProviderID { return normalizedProviderID(split[len(split)-1]) } +func scaleFromZeroAnnotationsEnabled(annotations map[string]string) bool { + cpu := annotations[cpuKey] + mem := annotations[memoryKey] + + if cpu != "" && mem != "" { + return true + } + return false +} + +func parseKey(annotations map[string]string, key string) (resource.Quantity, error) { + if val, exists := annotations[key]; exists && val != "" { + return resource.ParseQuantity(val) + } + return zeroQuantity.DeepCopy(), nil +} + +func parseIntKey(annotations map[string]string, key string) (resource.Quantity, error) { + if val, exists := annotations[key]; exists && val != "" { + valInt, err := strconv.ParseInt(val, 10, 0) + if err != nil { + return zeroQuantity.DeepCopy(), fmt.Errorf("value %q from annotation %q expected to be an integer: %v", val, key, err) + } + return *resource.NewQuantity(valInt, resource.DecimalSI), nil + } + return zeroQuantity.DeepCopy(), nil +} + +func parseCPUCapacity(annotations map[string]string) (resource.Quantity, error) { + return parseKey(annotations, cpuKey) +} + +func parseMemoryCapacity(annotations map[string]string) (resource.Quantity, error) { + return parseKey(annotations, memoryKey) +} + +func parseGPUCount(annotations map[string]string) (resource.Quantity, error) { + return parseIntKey(annotations, gpuCountKey) +} + +// The GPU type is not currently considered by the autoscaler when planning +// expansion, but most likely will be in the future. This method is being added +// in expectation of that arrival. +// see https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/utils/gpu/gpu.go +func parseGPUType(annotations map[string]string) string { + if val, found := annotations[gpuTypeKey]; found { + return val + } + return "" +} + +func parseMaxPodsCapacity(annotations map[string]string) (resource.Quantity, error) { + return parseIntKey(annotations, maxPodsKey) +} + func clusterNameFromResource(r *unstructured.Unstructured) string { // Use Spec.ClusterName if defined (only available on v1alpha3+ types) clusterName, found, err := unstructured.NestedString(r.Object, "spec", "clusterName") diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go index 70b2710e5ed2..92efe5f41956 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go @@ -23,8 +23,10 @@ import ( "strings" "testing" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" ) const ( @@ -429,6 +431,229 @@ func TestUtilNormalizedProviderID(t *testing.T) { } } +func TestScaleFromZeroEnabled(t *testing.T) { + for _, tc := range []struct { + description string + enabled bool + annotations map[string]string + }{{ + description: "nil annotations", + enabled: false, + }, { + description: "empty annotations", + annotations: map[string]string{}, + enabled: false, + }, { + description: "non-matching annotation", + annotations: map[string]string{"foo": "bar"}, + enabled: false, + }, { + description: "matching key, incomplete annotations", + annotations: map[string]string{ + "foo": "bar", + cpuKey: "1", + }, + enabled: false, + }, { + description: "matching key, complete annotations", + annotations: map[string]string{ + "foo": "bar", + cpuKey: "1", + memoryKey: "2Mi", + }, + enabled: true, + }} { + t.Run(tc.description, func(t *testing.T) { + got := scaleFromZeroAnnotationsEnabled(tc.annotations) + if tc.enabled != got { + t.Errorf("expected %t, got %t", tc.enabled, got) + } + }) + } +} + +func TestParseCPUCapacity(t *testing.T) { + for _, tc := range []struct { + description string + annotations map[string]string + expectedQuantity resource.Quantity + expectedError bool + }{{ + description: "nil annotations", + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: false, + }, { + description: "empty annotations", + annotations: map[string]string{}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: false, + }, { + description: "bad quantity", + annotations: map[string]string{cpuKey: "not-a-quantity"}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: true, + }, { + description: "valid quantity with units", + annotations: map[string]string{cpuKey: "123m"}, + expectedError: false, + expectedQuantity: resource.MustParse("123m"), + }, { + description: "valid quantity without units", + annotations: map[string]string{cpuKey: "1"}, + expectedError: false, + expectedQuantity: resource.MustParse("1"), + }, { + description: "valid fractional quantity without units", + annotations: map[string]string{cpuKey: "0.1"}, + expectedError: false, + expectedQuantity: resource.MustParse("0.1"), + }} { + t.Run(tc.description, func(t *testing.T) { + got, err := parseCPUCapacity(tc.annotations) + if tc.expectedError && err == nil { + t.Fatal("expected an error") + } + if tc.expectedQuantity.Cmp(got) != 0 { + t.Errorf("expected %v, got %v", tc.expectedQuantity.String(), got.String()) + } + }) + } +} + +func TestParseMemoryCapacity(t *testing.T) { + for _, tc := range []struct { + description string + annotations map[string]string + expectedQuantity resource.Quantity + expectedError bool + }{{ + description: "nil annotations", + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: false, + }, { + description: "empty annotations", + annotations: map[string]string{}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: false, + }, { + description: "bad quantity", + annotations: map[string]string{memoryKey: "not-a-quantity"}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: true, + }, { + description: "quantity as with no unit type", + annotations: map[string]string{memoryKey: "1024"}, + expectedQuantity: *resource.NewQuantity(1024, resource.DecimalSI), + expectedError: false, + }, { + description: "quantity with unit type (Mi)", + annotations: map[string]string{memoryKey: "456Mi"}, + expectedError: false, + expectedQuantity: *resource.NewQuantity(456*units.MiB, resource.DecimalSI), + }, { + description: "quantity with unit type (Gi)", + annotations: map[string]string{memoryKey: "8Gi"}, + expectedError: false, + expectedQuantity: *resource.NewQuantity(8*units.GiB, resource.DecimalSI), + }} { + t.Run(tc.description, func(t *testing.T) { + got, err := parseMemoryCapacity(tc.annotations) + if tc.expectedError && err == nil { + t.Fatal("expected an error") + } + if tc.expectedQuantity.Cmp(got) != 0 { + t.Errorf("expected %v, got %v", tc.expectedQuantity.String(), got.String()) + } + }) + } +} + +func TestParseGPUCapacity(t *testing.T) { + for _, tc := range []struct { + description string + annotations map[string]string + expectedQuantity resource.Quantity + expectedError bool + }{{ + description: "nil annotations", + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: false, + }, { + description: "empty annotations", + annotations: map[string]string{}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: false, + }, { + description: "bad quantity", + annotations: map[string]string{gpuCountKey: "not-a-quantity"}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: true, + }, { + description: "valid quantity", + annotations: map[string]string{gpuCountKey: "13"}, + expectedError: false, + expectedQuantity: resource.MustParse("13"), + }, { + description: "valid quantity, bad unit type", + annotations: map[string]string{gpuCountKey: "13Mi"}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: true, + }} { + t.Run(tc.description, func(t *testing.T) { + got, err := parseGPUCount(tc.annotations) + if tc.expectedError && err == nil { + t.Fatal("expected an error") + } + if tc.expectedQuantity.Cmp(got) != 0 { + t.Errorf("expected %v, got %v", tc.expectedQuantity.String(), got.String()) + } + }) + } +} + +func TestParseMaxPodsCapacity(t *testing.T) { + for _, tc := range []struct { + description string + annotations map[string]string + expectedQuantity resource.Quantity + expectedError bool + }{{ + description: "nil annotations", + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: false, + }, { + description: "empty annotations", + annotations: map[string]string{}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: false, + }, { + description: "bad quantity", + annotations: map[string]string{maxPodsKey: "not-a-quantity"}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: true, + }, { + description: "valid quantity", + annotations: map[string]string{maxPodsKey: "13"}, + expectedError: false, + expectedQuantity: resource.MustParse("13"), + }, { + description: "valid quantity, bad unit type", + annotations: map[string]string{maxPodsKey: "13Mi"}, + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: true, + }} { + t.Run(tc.description, func(t *testing.T) { + got, err := parseMaxPodsCapacity(tc.annotations) + if tc.expectedError && err == nil { + t.Fatal("expected an error") + } + if tc.expectedQuantity.Cmp(got) != 0 { + t.Errorf("expected %v, got %v", tc.expectedQuantity.String(), got.String()) + } + }) + } +} + func Test_clusterNameFromResource(t *testing.T) { for _, tc := range []struct { name string