Skip to content

Commit

Permalink
UPSTREAM: 3124: openshift: Allow small tolerance on memory capacity w…
Browse files Browse the repository at this point in the history
…hen comparing nodegroups

This allows developers to better interpet how the calculations are being
done by leaving the values as "Quantities". For example, the max
difference is now a string converted to a quantity which will be easier
to reason about and update if needed in the future.
Also adds tests that match real values from a real set of nodes that
would be expected to be the same
  • Loading branch information
JoelSpeed committed Jun 11, 2020
1 parent f595858 commit 90751c4
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 20 deletions.
32 changes: 17 additions & 15 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ const (
// MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods)
// can differ between groups in the same NodeGroupSet
MaxFreeDifferenceRatio = 0.05
// MaxMemoryDifferenceInKiloBytes describes how much memory
// capacity can differ but still be considered equal.
MaxMemoryDifferenceInKiloBytes = 256000
// MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between
// groups in the same NodeGroupSet
MaxCapacityMemoryDifferenceRatio = 0.015
)

// BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity
Expand All @@ -53,21 +53,25 @@ var BasicIgnoredLabels = map[string]bool{
// similar enough to be considered a part of a single NodeGroupSet.
type NodeInfoComparator func(n1, n2 *schedulernodeinfo.NodeInfo) bool

func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
func resourceMapsWithinTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
maxDifferenceRatio float64) bool {
for _, qtyList := range resources {
if len(qtyList) != 2 {
return false
}
larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
if larger-smaller > larger*maxDifferenceRatio {
if !resourceListWithinTolerance(qtyList, maxDifferenceRatio) {
return false
}
}
return true
}

func resourceListWithinTolerance(qtyList []resource.Quantity, maxDifferenceRatio float64) bool {
if len(qtyList) != 2 {
return false
}
larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
return larger-smaller <= larger*maxDifferenceRatio
}

func compareLabels(nodes []*schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool {
labels := make(map[string][]string)
for _, node := range nodes {
Expand Down Expand Up @@ -123,9 +127,7 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL
}
switch kind {
case apiv1.ResourceMemory:
// For memory capacity we allow a small tolerance
memoryDifference := math.Abs(float64(qtyList[0].Value()) - float64(qtyList[1].Value()))
if memoryDifference > MaxMemoryDifferenceInKiloBytes {
if !resourceListWithinTolerance(qtyList, MaxCapacityMemoryDifferenceRatio) {
return false
}
default:
Expand All @@ -139,10 +141,10 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL
}

// For allocatable and free we allow resource quantities to be within a few % of each other
if !compareResourceMapsWithTolerance(allocatable, MaxAllocatableDifferenceRatio) {
if !resourceMapsWithinTolerance(allocatable, MaxAllocatableDifferenceRatio) {
return false
}
if !compareResourceMapsWithTolerance(free, MaxFreeDifferenceRatio) {
if !resourceMapsWithinTolerance(free, MaxFreeDifferenceRatio) {
return false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,56 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
}

func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
n1 := BuildTestNode("node1", 1000, MaxMemoryDifferenceInKiloBytes)
n1 := BuildTestNode("node1", 1000, 1000)

// Different memory capacity within tolerance
n2 := BuildTestNode("node2", 1000, MaxMemoryDifferenceInKiloBytes)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes, resource.DecimalSI)
n2 := BuildTestNode("node2", 1000, 1000)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)+1, resource.DecimalSI)
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
n3 := BuildTestNode("node3", 1000, MaxMemoryDifferenceInKiloBytes)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes+1, resource.DecimalSI)
n3 := BuildTestNode("node3", 1000, 1000)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)-1, resource.DecimalSI)
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) {
// Use realistic memory capacity (taken from real nodes)
// 15944120 KB ~= 16GiB (m5.xLarge)
q1 := resource.MustParse("16116152Ki")
q2 := resource.MustParse("15944120Ki")

n1 := BuildTestNode("node1", 1000, q1.Value())

// Different memory capacity within tolerance
// Value taken from another m5.xLarge in a different zone
n2 := BuildTestNode("node2", 1000, q2.Value())
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
// Value of q1 * 1.02
q3 := resource.MustParse("16438475Ki")
n3 := BuildTestNode("node3", 1000, q3.Value())
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) {
// Use realistic memory capacity (taken from real nodes)
// 257217528 KB ~= 256GiB (m5.16xLarge)
q1 := resource.MustParse("259970052Ki")
q2 := resource.MustParse("257217528Ki")

n1 := BuildTestNode("node1", 1000, q1.Value())

// Different memory capacity within tolerance
// Value taken from another m5.xLarge in a different zone
n2 := BuildTestNode("node2", 1000, q2.Value())
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
// Value of q1 * 1.02
q3 := resource.MustParse("265169453Ki")
n3 := BuildTestNode("node3", 1000, q3.Value())
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

Expand Down

0 comments on commit 90751c4

Please sign in to comment.