From 57f66365fae28221cb2be09e4a50b4b7bd5804ab Mon Sep 17 00:00:00 2001 From: Hakan Bostan Date: Tue, 14 Feb 2023 12:16:22 +0000 Subject: [PATCH] Use GpuConfig in utilization calculations for scale-down * Changed the `utilization.Calculate()` function to use GpuConfig instead of GPU label. * Started using GpuConfig in utilization threshold calculations. --- .../core/scaledown/actuation/actuator.go | 3 +- .../core/scaledown/eligibility/eligibility.go | 7 +++-- .../simulator/utilization/info.go | 19 ++++++------ .../simulator/utilization/info_test.go | 31 ++++++++++++------- cluster-autoscaler/utils/test/test_utils.go | 15 +++++++++ 5 files changed, 50 insertions(+), 25 deletions(-) diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator.go b/cluster-autoscaler/core/scaledown/actuation/actuator.go index 168841a04743..fd6c89c9287a 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator.go @@ -307,7 +307,8 @@ func (a *Actuator) scaleDownNodeToReport(node *apiv1.Node, drain bool) (*status. if err != nil { return nil, err } - utilInfo, err := utilization.Calculate(nodeInfo, a.ctx.IgnoreDaemonSetsUtilization, a.ctx.IgnoreMirrorPodsUtilization, a.ctx.CloudProvider.GPULabel(), time.Now()) + gpuConfig := a.ctx.CloudProvider.GetNodeGpuConfig(node) + utilInfo, err := utilization.Calculate(nodeInfo, a.ctx.IgnoreDaemonSetsUtilization, a.ctx.IgnoreMirrorPodsUtilization, gpuConfig, time.Now()) if err != nil { return nil, err } diff --git a/cluster-autoscaler/core/scaledown/eligibility/eligibility.go b/cluster-autoscaler/core/scaledown/eligibility/eligibility.go index 6717ae9a0b4b..04cb7d06e050 100644 --- a/cluster-autoscaler/core/scaledown/eligibility/eligibility.go +++ b/cluster-autoscaler/core/scaledown/eligibility/eligibility.go @@ -26,7 +26,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable" "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/utilization" - "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" "k8s.io/autoscaler/cluster-autoscaler/utils/klogx" apiv1 "k8s.io/api/core/v1" @@ -118,7 +117,8 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali return simulator.ScaleDownDisabledAnnotation, nil } - utilInfo, err := utilization.Calculate(nodeInfo, context.IgnoreDaemonSetsUtilization, context.IgnoreMirrorPodsUtilization, context.CloudProvider.GPULabel(), timestamp) + gpuConfig := context.CloudProvider.GetNodeGpuConfig(node) + utilInfo, err := utilization.Calculate(nodeInfo, context.IgnoreDaemonSetsUtilization, context.IgnoreMirrorPodsUtilization, gpuConfig, timestamp) if err != nil { klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err) } @@ -154,7 +154,8 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali func (c *Checker) isNodeBelowUtilizationThreshold(context *context.AutoscalingContext, node *apiv1.Node, nodeGroup cloudprovider.NodeGroup, utilInfo utilization.Info) (bool, error) { var threshold float64 var err error - if gpu.NodeHasGpu(context.CloudProvider.GPULabel(), node) { + gpuConfig := context.CloudProvider.GetNodeGpuConfig(node) + if gpuConfig != nil { threshold, err = c.thresholdGetter.GetScaleDownGpuUtilizationThreshold(context, nodeGroup) if err != nil { return false, err diff --git a/cluster-autoscaler/simulator/utilization/info.go b/cluster-autoscaler/simulator/utilization/info.go index 05084ace5e01..806d7c77b17e 100644 --- a/cluster-autoscaler/simulator/utilization/info.go +++ b/cluster-autoscaler/simulator/utilization/info.go @@ -20,8 +20,8 @@ import ( "fmt" "time" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" apiv1 "k8s.io/api/core/v1" @@ -46,17 +46,16 @@ type Info struct { // memory) or gpu utilization based on if the node has GPU or not. Per resource // utilization is the sum of requests for it divided by allocatable. It also // returns the individual cpu, memory and gpu utilization. -func Calculate(nodeInfo *schedulerframework.NodeInfo, skipDaemonSetPods, skipMirrorPods bool, gpuLabel string, currentTime time.Time) (utilInfo Info, err error) { - if gpu.NodeHasGpu(gpuLabel, nodeInfo.Node()) { - gpuUtil, err := calculateUtilizationOfResource(nodeInfo, gpu.ResourceNvidiaGPU, skipDaemonSetPods, skipMirrorPods, currentTime) +func Calculate(nodeInfo *schedulerframework.NodeInfo, skipDaemonSetPods, skipMirrorPods bool, gpuConfig *cloudprovider.GpuConfig, currentTime time.Time) (utilInfo Info, err error) { + if gpuConfig != nil { + gpuUtil, err := calculateUtilizationOfResource(nodeInfo, gpuConfig.ResourceName, skipDaemonSetPods, skipMirrorPods, currentTime) if err != nil { - klog.V(3).Infof("node %s has unready GPU", nodeInfo.Node().Name) - // Return 0 if GPU is unready. This will guarantee we can still scale down a node with unready GPU. - return Info{GpuUtil: 0, ResourceName: gpu.ResourceNvidiaGPU, Utilization: 0}, nil + klog.V(3).Infof("node %s has unready GPU resource: %s", nodeInfo.Node().Name, gpuConfig.ResourceName.String()) + // Return 0 if accelerator is unready. This will guarantee we can still scale down a node with unready accelerator. + return Info{GpuUtil: 0, ResourceName: gpuConfig.ResourceName, Utilization: 0}, nil } - - // Skips cpu and memory utilization calculation for node with GPU. - return Info{GpuUtil: gpuUtil, ResourceName: gpu.ResourceNvidiaGPU, Utilization: gpuUtil}, nil + // Skips cpu and memory utilization calculation for node with accelerator. + return Info{GpuUtil: gpuUtil, ResourceName: gpuConfig.ResourceName, Utilization: gpuUtil}, err } cpu, err := calculateUtilizationOfResource(nodeInfo, apiv1.ResourceCPU, skipDaemonSetPods, skipMirrorPods, currentTime) diff --git a/cluster-autoscaler/simulator/utilization/info_test.go b/cluster-autoscaler/simulator/utilization/info_test.go index e35046b4562f..5dd24ad9f632 100644 --- a/cluster-autoscaler/simulator/utilization/info_test.go +++ b/cluster-autoscaler/simulator/utilization/info_test.go @@ -31,7 +31,6 @@ import ( func TestCalculate(t *testing.T) { testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) - gpuLabel := GetGPULabel() pod := BuildTestPod("p1", 100, 200000) pod2 := BuildTestPod("p2", -1, -1) @@ -39,14 +38,16 @@ func TestCalculate(t *testing.T) { SetNodeReadyState(node, true, time.Time{}) nodeInfo := newNodeInfo(node, pod, pod, pod2) - utilInfo, err := Calculate(nodeInfo, false, false, gpuLabel, testTime) + gpuConfig := GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err := Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) node2 := BuildTestNode("node1", 2000, -1) nodeInfo = newNodeInfo(node2, pod, pod, pod2) - _, err = Calculate(nodeInfo, false, false, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + _, err = Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.Error(t, err) daemonSetPod3 := BuildTestPod("p3", 100, 200000) @@ -57,19 +58,22 @@ func TestCalculate(t *testing.T) { daemonSetPod4.Annotations = map[string]string{"cluster-autoscaler.kubernetes.io/daemonset-pod": "true"} nodeInfo = newNodeInfo(node, pod, pod, pod2, daemonSetPod3, daemonSetPod4) - utilInfo, err = Calculate(nodeInfo, true, false, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, true, false, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 2.5/10, utilInfo.Utilization, 0.01) nodeInfo = newNodeInfo(node, pod, pod2, daemonSetPod3) - utilInfo, err = Calculate(nodeInfo, false, false, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) terminatedPod := BuildTestPod("podTerminated", 100, 200000) terminatedPod.DeletionTimestamp = &metav1.Time{Time: testTime.Add(-10 * time.Minute)} nodeInfo = newNodeInfo(node, pod, pod, pod2, terminatedPod) - utilInfo, err = Calculate(nodeInfo, false, false, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) @@ -79,17 +83,20 @@ func TestCalculate(t *testing.T) { } nodeInfo = newNodeInfo(node, pod, pod, pod2, mirrorPod) - utilInfo, err = Calculate(nodeInfo, false, true, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, false, true, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 2.0/9.0, utilInfo.Utilization, 0.01) nodeInfo = newNodeInfo(node, pod, pod2, mirrorPod) - utilInfo, err = Calculate(nodeInfo, false, false, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) nodeInfo = newNodeInfo(node, pod, mirrorPod, daemonSetPod3) - utilInfo, err = Calculate(nodeInfo, true, true, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, true, true, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 1.0/8.0, utilInfo.Utilization, 0.01) @@ -99,7 +106,8 @@ func TestCalculate(t *testing.T) { RequestGpuForPod(gpuPod, 1) TolerateGpuForPod(gpuPod) nodeInfo = newNodeInfo(gpuNode, pod, pod, gpuPod) - utilInfo, err = Calculate(nodeInfo, false, false, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 1/1, utilInfo.Utilization, 0.01) @@ -107,7 +115,8 @@ func TestCalculate(t *testing.T) { gpuNode = BuildTestNode("gpu_node", 2000, 2000000) AddGpuLabelToNode(gpuNode) nodeInfo = newNodeInfo(gpuNode, pod, pod) - utilInfo, err = Calculate(nodeInfo, false, false, gpuLabel, testTime) + gpuConfig = GetAcceleratorFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.NoError(t, err) assert.Zero(t, utilInfo.Utilization) } diff --git a/cluster-autoscaler/utils/test/test_utils.go b/cluster-autoscaler/utils/test/test_utils.go index 6e1d84ed7572..042b7d512f16 100644 --- a/cluster-autoscaler/utils/test/test_utils.go +++ b/cluster-autoscaler/utils/test/test_utils.go @@ -28,6 +28,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" kube_types "k8s.io/kubernetes/pkg/kubelet/types" ) @@ -240,6 +241,20 @@ func GetGPULabel() string { return gpuLabel } +// GetAcceleratorFromNode returns the accelerator of the node if it has one. This is only used in unit tests. +func GetAcceleratorFromNode(node *apiv1.Node) *cloudprovider.GpuConfig { + gpuType, hasGpuLabel := node.Labels[gpuLabel] + gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable[resourceNvidiaGPU] + if hasGpuLabel || (hasGpuAllocatable && !gpuAllocatable.IsZero()) { + return &cloudprovider.GpuConfig{ + Label: gpuLabel, + Type: gpuType, + ResourceName: resourceNvidiaGPU, + } + } + return nil +} + // SetNodeReadyState sets node ready state to either ConditionTrue or ConditionFalse. func SetNodeReadyState(node *apiv1.Node, ready bool, lastTransition time.Time) { if ready {