From 2595cee0b6da656be679497bc4567c4923e44939 Mon Sep 17 00:00:00 2001 From: Joel Speed Date: Tue, 12 May 2020 15:47:32 +0100 Subject: [PATCH] Allow 1.5% tolerance in memory capacity when comparing nodegroups --- .../nodegroupset/compare_nodegroups.go | 46 +++++++------------ .../nodegroupset/compare_nodegroups_test.go | 40 ++++++++++++---- 2 files changed, 47 insertions(+), 39 deletions(-) diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go index c54b1923ee5f..15ac835c3448 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go @@ -31,11 +31,9 @@ const ( // MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods) // can differ between groups in the same NodeGroupSet MaxFreeDifferenceRatio = 0.05 -) - -var ( - // MaxMemoryDifference describes how much memory capacity can differ but still be considered equal. - MaxMemoryDifference = resource.MustParse("256Mi") + // MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between + // groups in the same NodeGroupSet + MaxCapacityMemoryDifferenceRatio = 0.015 ) // BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity @@ -58,18 +56,25 @@ type NodeInfoComparator func(n1, n2 *schedulerframework.NodeInfo) bool func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity, maxDifferenceRatio float64) bool { for _, qtyList := range resources { - if len(qtyList) != 2 { - return false - } - larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) - smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) - if larger-smaller > larger*maxDifferenceRatio { + if !compareResourceListWithTolerance(qtyList, maxDifferenceRatio) { return false } } return true } +func compareResourceListWithTolerance(qtyList []resource.Quantity, maxDifferenceRatio float64) bool { + if len(qtyList) != 2 { + return false + } + larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) + smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) + if larger-smaller > larger*maxDifferenceRatio { + return false + } + return true +} + func compareLabels(nodes []*schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool { labels := make(map[string][]string) for _, node := range nodes { @@ -133,11 +138,7 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored } switch kind { case apiv1.ResourceMemory: - // For memory capacity we allow a small tolerance - difference := absSub(qtyList[0], qtyList[1]) - - // Cmp compares two quantities and returns -1 for less than, 0 for equal, 1 for greater than - if difference.Cmp(MaxMemoryDifference) > 0 { + if !compareResourceListWithTolerance(qtyList, MaxCapacityMemoryDifferenceRatio) { return false } default: @@ -164,16 +165,3 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored return true } - -// absSub returns the absolute value of a - b -func absSub(a, b resource.Quantity) *resource.Quantity { - q := &resource.Quantity{} - a.DeepCopyInto(q) - q.Sub(b) - - if q.Sign() == -1 { - q.Neg() - } - - return q -} diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go index d873da8350e9..fc0e29f063b9 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go @@ -101,20 +101,20 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) { func TestNodesSimilarVariousMemoryRequirements(t *testing.T) { comparator := CreateGenericNodeInfoComparator([]string{}) - n1 := BuildTestNode("node1", 1000, MaxMemoryDifference.Value()) + n1 := BuildTestNode("node1", 1000, 1000) // Different memory capacity within tolerance - n2 := BuildTestNode("node2", 1000, MaxMemoryDifference.Value()) - n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifference.Value(), resource.DecimalSI) + n2 := BuildTestNode("node2", 1000, 1000) + n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)+1, resource.DecimalSI) checkNodesSimilar(t, n1, n2, comparator, true) // Different memory capacity exceeds tolerance - n3 := BuildTestNode("node3", 1000, MaxMemoryDifference.Value()) - n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifference.Value()+1, resource.DecimalSI) + n3 := BuildTestNode("node3", 1000, 1000) + n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)+1, resource.DecimalSI) checkNodesSimilar(t, n1, n3, comparator, false) } -func TestNodesSimilarVariousLargeMemoryRequirements(t *testing.T) { +func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) { comparator := CreateGenericNodeInfoComparator([]string{}) // Use realistic memory capacity (taken from real nodes) @@ -130,10 +130,30 @@ func TestNodesSimilarVariousLargeMemoryRequirements(t *testing.T) { checkNodesSimilar(t, n1, n2, comparator, true) // Different memory capacity exceeds tolerance - // Same value as q1 but with MaxMemoryDifference added twice - q3 := resource.MustParse("16116152Ki") - q3.Add(MaxMemoryDifference) - q3.Add(MaxMemoryDifference) + // Value of q1 * 1.02 + q3 := resource.MustParse("16438475Ki") + n3 := BuildTestNode("node3", 1000, q3.Value()) + checkNodesSimilar(t, n1, n3, comparator, false) +} + +func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) { + comparator := CreateGenericNodeInfoComparator([]string{}) + + // Use realistic memory capacity (taken from real nodes) + // 257217528 KB ~= 256GiB (m5.16xLarge) + q1 := resource.MustParse("259970052Ki") + q2 := resource.MustParse("257217528Ki") + + n1 := BuildTestNode("node1", 1000, q1.Value()) + + // Different memory capacity within tolerance + // Value taken from another m5.xLarge in a different zone + n2 := BuildTestNode("node2", 1000, q2.Value()) + checkNodesSimilar(t, n1, n2, comparator, true) + + // Different memory capacity exceeds tolerance + // Value of q1 * 1.02 + q3 := resource.MustParse("265169453Ki") n3 := BuildTestNode("node3", 1000, q3.Value()) checkNodesSimilar(t, n1, n3, comparator, false) }