Skip to content

Commit

Permalink
UPSTREAM: <carry>: Implement scale from zero
Browse files Browse the repository at this point in the history
This allows a Machine{Set,Deployment} to scale up/down from 0,
providing the following annotations are set:

```yaml
apiVersion: v1
items:
- apiVersion: machine.openshift.io/v1beta1
  kind: MachineSet
  metadata:
    annotations:
      machine.openshift.io/cluster-api-autoscaler-node-group-min-size: "0"
      machine.openshift.io/cluster-api-autoscaler-node-group-max-size: "6"
      machine.openshift.io/vCPU: "2"
      machine.openshift.io/memoryMb: 8G
      machine.openshift.io/GPU: "1"
      machine.openshift.io/maxPods: "100"
```

Note that `machine.openshift.io/GPU` and `machine.openshift.io/maxPods`
are optional.

For autoscaling from zero, the autoscaler should convert the mem value
received in the appropriate annotation to bytes using powers of two
consistently with other providers and fail if the format received is not
expected. This gives robust behaviour consistent with cloud providers APIs
and providers implementations.

https://cloud.google.com/compute/all-pricing
https://www.iec.ch/si/binary.htm
https://github.com/openshift/kubernetes-autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/aws_manager.go#L366

Co-authored-by:  Enxebre <[email protected]>
Co-authored-by:  Joel Speed <[email protected]>
Co-authored-by:  Michael McCune <[email protected]>
  • Loading branch information
4 people committed May 31, 2022
1 parent 26d32eb commit 123e4c3
Show file tree
Hide file tree
Showing 6 changed files with 780 additions and 10 deletions.
146 changes: 142 additions & 4 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,14 @@ package clusterapi

import (
"fmt"
"math/rand"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
kubeletapis "k8s.io/kubelet/pkg/apis"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
Expand All @@ -29,6 +34,10 @@ import (

const (
debugFormat = "%s (min: %d, max: %d, replicas: %d)"

// This default for the maximum number of pods comes from the machine-config-operator
// see https://github.com/openshift/machine-config-operator/blob/2f1bd6d99131fa4471ed95543a51dec3d5922b2b/templates/worker/01-worker-kubelet/_base/files/kubelet.yaml#L19
defaultMaxPods = 250
)

type nodegroup struct {
Expand Down Expand Up @@ -226,7 +235,92 @@ func (ng *nodegroup) Nodes() ([]cloudprovider.Instance, error) {
// node by default, using manifest (most likely only kube-proxy).
// Implementation optional.
func (ng *nodegroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) {
return nil, cloudprovider.ErrNotImplemented
if !ng.scalableResource.CanScaleFromZero() {
return nil, cloudprovider.ErrNotImplemented
}

cpu, err := ng.scalableResource.InstanceCPUCapacity()
if err != nil {
return nil, err
}

mem, err := ng.scalableResource.InstanceMemoryCapacity()
if err != nil {
return nil, err
}

gpu, err := ng.scalableResource.InstanceGPUCapacity()
if err != nil {
return nil, err
}

pod, err := ng.scalableResource.InstanceMaxPodsCapacity()
if err != nil {
return nil, err
}

if cpu.IsZero() || mem.IsZero() {
return nil, cloudprovider.ErrNotImplemented
}

if gpu.IsZero() {
gpu = zeroQuantity.DeepCopy()
}

if pod.IsZero() {
pod = *resource.NewQuantity(defaultMaxPods, resource.DecimalSI)
}

capacity := map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: cpu,
corev1.ResourceMemory: mem,
corev1.ResourcePods: pod,
gpuapis.ResourceNvidiaGPU: gpu,
}

nodeName := fmt.Sprintf("%s-asg-%d", ng.scalableResource.Name(), rand.Int63())
node := corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
Labels: map[string]string{},
},
}

node.Status.Capacity = capacity
node.Status.Allocatable = capacity
node.Status.Conditions = cloudprovider.BuildReadyConditions()
node.Spec.Taints = ng.scalableResource.Taints()

node.Labels, err = ng.buildTemplateLabels(nodeName)
if err != nil {
return nil, err
}

nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(ng.scalableResource.Name()))
nodeInfo.SetNode(&node)

return nodeInfo, nil
}

func (ng *nodegroup) buildTemplateLabels(nodeName string) (map[string]string, error) {
labels := cloudprovider.JoinStringMaps(ng.scalableResource.Labels(), buildGenericLabels(nodeName))

nodes, err := ng.Nodes()
if err != nil {
return nil, err
}

if len(nodes) > 0 {
node, err := ng.machineController.findNodeByProviderID(normalizedProviderString(nodes[0].Id))
if err != nil {
return nil, err
}

if node != nil {
labels = cloudprovider.JoinStringMaps(labels, extractNodeLabels(node))
}
}
return labels, nil
}

// Exist checks if the node group really exists on the cloud nodegroup
Expand Down Expand Up @@ -281,9 +375,9 @@ func newNodeGroupFromScalableResource(controller *machineController, unstructure
return nil, err
}

// We don't scale from 0 so nodes must belong to a nodegroup
// that has a scale size of at least 1.
if found && replicas == 0 {
// Ensure that if the nodegroup has 0 replicas it is capable
// of scaling before adding it.
if found && replicas == 0 && !scalableResource.CanScaleFromZero() {
return nil, nil
}

Expand All @@ -297,3 +391,47 @@ func newNodeGroupFromScalableResource(controller *machineController, unstructure
scalableResource: scalableResource,
}, nil
}

func buildGenericLabels(nodeName string) map[string]string {
// TODO revisit this function and add an explanation about what these
// labels are used for, or remove them if not necessary
m := make(map[string]string)
m[kubeletapis.LabelArch] = cloudprovider.DefaultArch
m[corev1.LabelArchStable] = cloudprovider.DefaultArch

m[kubeletapis.LabelOS] = cloudprovider.DefaultOS
m[corev1.LabelOSStable] = cloudprovider.DefaultOS

m[corev1.LabelHostname] = nodeName
return m
}

// extract a predefined list of labels from the existing node
func extractNodeLabels(node *corev1.Node) map[string]string {
m := make(map[string]string)
if node.Labels == nil {
return m
}

setLabelIfNotEmpty(m, node.Labels, kubeletapis.LabelArch)
setLabelIfNotEmpty(m, node.Labels, corev1.LabelArchStable)

setLabelIfNotEmpty(m, node.Labels, kubeletapis.LabelOS)
setLabelIfNotEmpty(m, node.Labels, corev1.LabelOSStable)

setLabelIfNotEmpty(m, node.Labels, corev1.LabelInstanceType)
setLabelIfNotEmpty(m, node.Labels, corev1.LabelInstanceTypeStable)

setLabelIfNotEmpty(m, node.Labels, corev1.LabelZoneRegion)
setLabelIfNotEmpty(m, node.Labels, corev1.LabelZoneRegionStable)

setLabelIfNotEmpty(m, node.Labels, corev1.LabelZoneFailureDomain)

return m
}

func setLabelIfNotEmpty(to, from map[string]string, key string) {
if value := from[key]; value != "" {
to[key] = value
}
}
Loading

0 comments on commit 123e4c3

Please sign in to comment.