Skip to content

Commit

Permalink
Merge pull request #5402 from Bryce-Soghigian/bsoghigian/adding-confi…
Browse files Browse the repository at this point in the history
…gurable-difference-ratios

adding configurable difference ratios
  • Loading branch information
k8s-ci-robot authored Jan 10, 2023
2 parents 90419dc + cb19141 commit b94f340
Show file tree
Hide file tree
Showing 15 changed files with 97 additions and 54 deletions.
30 changes: 30 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,34 @@ type NodeGroupAutoscalingOptions struct {
ScaleDownUnreadyTime time.Duration
}

const (
// DefaultMaxAllocatableDifferenceRatio describes how Node.Status.Allocatable can differ between groups in the same NodeGroupSet
DefaultMaxAllocatableDifferenceRatio = 0.05
// DefaultMaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods)
DefaultMaxFreeDifferenceRatio = 0.05
// DefaultMaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory
DefaultMaxCapacityMemoryDifferenceRatio = 0.015
)

// NodeGroupDifferenceRatios contains various ratios used to determine if two NodeGroups are similar and makes scaling decisions
type NodeGroupDifferenceRatios struct {
// MaxAllocatableDifferenceRatio describes how Node.Status.Allocatable can differ between groups in the same NodeGroupSet
MaxAllocatableDifferenceRatio float64
// MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods) can differ between groups in the same NodeGroupSet
MaxFreeDifferenceRatio float64
// MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between groups in the same NodeGroupSetAutoscalingOptions
MaxCapacityMemoryDifferenceRatio float64
}

// NewDefaultNodeGroupDifferenceRatios returns default NodeGroupDifferenceRatios values
func NewDefaultNodeGroupDifferenceRatios() NodeGroupDifferenceRatios {
return NodeGroupDifferenceRatios{
MaxAllocatableDifferenceRatio: DefaultMaxAllocatableDifferenceRatio,
MaxFreeDifferenceRatio: DefaultMaxFreeDifferenceRatio,
MaxCapacityMemoryDifferenceRatio: DefaultMaxCapacityMemoryDifferenceRatio,
}
}

// AutoscalingOptions contain various options to customize how autoscaling works
type AutoscalingOptions struct {
// NodeGroupDefaults are default values for per NodeGroup options.
Expand Down Expand Up @@ -217,4 +245,6 @@ type AutoscalingOptions struct {
NodeDeleteDelayAfterTaint time.Duration
// ParallelDrain is whether CA can drain nodes in parallel.
ParallelDrain bool
// NodeGroupSetRatio is a collection of ratios used by CA used to make scaling decisions.
NodeGroupSetRatios NodeGroupDifferenceRatios
}
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/test/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func NewTestProcessors(context *context.AutoscalingContext) *processors.Autoscal
podlistprocessor.NewFilterOutSchedulablePodListProcessor(context.PredicateChecker),
),
NodeGroupListProcessor: &nodegroups.NoOpNodeGroupListProcessor{},
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}),
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}, config.NodeGroupDifferenceRatios{}),
ScaleDownSetProcessor: nodes.NewPostFilteringScaleDownNodeProcessor(),
// TODO(bskiba): change scale up test so that this can be a NoOpProcessor
ScaleUpStatusProcessor: &status.EventingScaleUpStatusProcessor{},
Expand Down
10 changes: 9 additions & 1 deletion cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ var (
nodeDeleteDelayAfterTaint = flag.Duration("node-delete-delay-after-taint", 5*time.Second, "How long to wait before deleting a node after tainting it")
scaleDownSimulationTimeout = flag.Duration("scale-down-simulation-timeout", 5*time.Minute, "How long should we run scale down simulation.")
parallelDrain = flag.Bool("parallel-drain", false, "Whether to allow parallel drain of nodes.")
maxCapacityMemoryDifferenceRatio = flag.Float64("memory-difference-ratio", config.DefaultMaxCapacityMemoryDifferenceRatio, "Maximum difference in memory capacity between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's memory capacity.")
maxFreeDifferenceRatio = flag.Float64("max-free-difference-ratio", config.DefaultMaxFreeDifferenceRatio, "Maximum difference in free resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's free resource.")
maxAllocatableDifferenceRatio = flag.Float64("max-allocatable-difference-ratio", config.DefaultMaxAllocatableDifferenceRatio, "Maximum difference in allocatable resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's allocatable resource.")
)

func createAutoscalingOptions() config.AutoscalingOptions {
Expand Down Expand Up @@ -319,6 +322,11 @@ func createAutoscalingOptions() config.AutoscalingOptions {
NodeDeleteDelayAfterTaint: *nodeDeleteDelayAfterTaint,
ScaleDownSimulationTimeout: *scaleDownSimulationTimeout,
ParallelDrain: *parallelDrain,
NodeGroupSetRatios: config.NodeGroupDifferenceRatios{
MaxCapacityMemoryDifferenceRatio: *maxCapacityMemoryDifferenceRatio,
MaxAllocatableDifferenceRatio: *maxAllocatableDifferenceRatio,
MaxFreeDifferenceRatio: *maxFreeDifferenceRatio,
},
}
}

Expand Down Expand Up @@ -411,7 +419,7 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter
} else if autoscalingOptions.CloudProviderName == cloudprovider.ClusterAPIProviderName {
nodeInfoComparatorBuilder = nodegroupset.CreateClusterAPINodeInfoComparator
}
nodeInfoComparator = nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels)
nodeInfoComparator = nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels, autoscalingOptions.NodeGroupSetRatios)
}

opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
Expand Down
5 changes: 3 additions & 2 deletions cluster-autoscaler/processors/nodegroupset/aws_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ limitations under the License.
package nodegroupset

import (
"k8s.io/autoscaler/cluster-autoscaler/config"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
)

// CreateAwsNodeInfoComparator returns a comparator that checks if two nodes should be considered
// part of the same NodeGroupSet. This is true if they match usual conditions checked by IsCloudProviderNodeInfoSimilar,
// even if they have different AWS-specific labels.
func CreateAwsNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
func CreateAwsNodeInfoComparator(extraIgnoredLabels []string, ratioOpts config.NodeGroupDifferenceRatios) NodeInfoComparator {
awsIgnoredLabels := map[string]bool{
"alpha.eksctl.io/instance-id": true, // this is a label used by eksctl to identify instances.
"alpha.eksctl.io/nodegroup-name": true, // this is a label used by eksctl to identify "node group" names.
Expand All @@ -42,6 +43,6 @@ func CreateAwsNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, awsIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, awsIgnoredLabels, ratioOpts)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ package nodegroupset
import (
"testing"

"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/context"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
)

func TestIsAwsNodeInfoSimilar(t *testing.T) {
comparator := CreateAwsNodeInfoComparator([]string{})
comparator := CreateAwsNodeInfoComparator([]string{}, config.NodeGroupDifferenceRatios{})
node1 := BuildTestNode("node1", 1000, 2000)
node2 := BuildTestNode("node2", 1000, 2000)

Expand Down Expand Up @@ -176,6 +177,6 @@ func TestIsAwsNodeInfoSimilar(t *testing.T) {
func TestFindSimilarNodeGroupsAwsBasic(t *testing.T) {
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAwsNodeInfoComparator([]string{})}
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAwsNodeInfoComparator([]string{}, config.NodeGroupDifferenceRatios{})}
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package nodegroupset

import (
"k8s.io/autoscaler/cluster-autoscaler/config"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
)

Expand Down Expand Up @@ -44,7 +45,7 @@ func nodesFromSameAzureNodePoolLegacy(n1, n2 *schedulerframework.NodeInfo) bool
// CreateAzureNodeInfoComparator returns a comparator that checks if two nodes should be considered
// part of the same NodeGroupSet. This is true if they either belong to the same Azure agentpool
// or match usual conditions checked by IsCloudProviderNodeInfoSimilar, even if they have different agentpool labels.
func CreateAzureNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
func CreateAzureNodeInfoComparator(extraIgnoredLabels []string, ratioOpts config.NodeGroupDifferenceRatios) NodeInfoComparator {
azureIgnoredLabels := make(map[string]bool)
for k, v := range BasicIgnoredLabels {
azureIgnoredLabels[k] = v
Expand All @@ -60,6 +61,6 @@ func CreateAzureNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparat
if nodesFromSameAzureNodePool(n1, n2) {
return true
}
return IsCloudProviderNodeInfoSimilar(n1, n2, azureIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, azureIgnoredLabels, ratioOpts)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/context"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
Expand All @@ -29,7 +30,7 @@ import (
)

func TestIsAzureNodeInfoSimilar(t *testing.T) {
comparator := CreateAzureNodeInfoComparator([]string{"example.com/ready"})
comparator := CreateAzureNodeInfoComparator([]string{"example.com/ready"}, config.NodeGroupDifferenceRatios{})
n1 := BuildTestNode("node1", 1000, 2000)
n1.ObjectMeta.Labels["test-label"] = "test-value"
n1.ObjectMeta.Labels["character"] = "thing"
Expand Down Expand Up @@ -75,12 +76,12 @@ func TestIsAzureNodeInfoSimilar(t *testing.T) {
func TestFindSimilarNodeGroupsAzureBasic(t *testing.T) {
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{})}
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{}, config.NodeGroupDifferenceRatios{})}
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

func TestFindSimilarNodeGroupsAzureByLabel(t *testing.T) {
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{})}
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{}, config.NodeGroupDifferenceRatios{})}
context := &context.AutoscalingContext{}

n1 := BuildTestNode("n1", 1000, 1000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/context"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"

Expand Down Expand Up @@ -84,7 +85,7 @@ func basicSimilarNodeGroupsTest(
func TestFindSimilarNodeGroups(t *testing.T) {
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
processor := NewDefaultNodeGroupSetProcessor([]string{})
processor := NewDefaultNodeGroupSetProcessor([]string{}, config.NodeGroupDifferenceRatios{})
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

Expand All @@ -94,7 +95,7 @@ func TestFindSimilarNodeGroupsCustomLabels(t *testing.T) {
ni1.Node().Labels["example.com/ready"] = "true"
ni2.Node().Labels["example.com/ready"] = "false"

processor := NewDefaultNodeGroupSetProcessor([]string{"example.com/ready"})
processor := NewDefaultNodeGroupSetProcessor([]string{"example.com/ready"}, config.NodeGroupDifferenceRatios{})
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

Expand All @@ -112,7 +113,7 @@ func TestFindSimilarNodeGroupsCustomComparator(t *testing.T) {
}

func TestBalanceSingleGroup(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor([]string{})
processor := NewDefaultNodeGroupSetProcessor([]string{}, config.NodeGroupDifferenceRatios{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand All @@ -132,7 +133,7 @@ func TestBalanceSingleGroup(t *testing.T) {
}

func TestBalanceUnderMaxSize(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor([]string{})
processor := NewDefaultNodeGroupSetProcessor([]string{}, config.NodeGroupDifferenceRatios{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand Down Expand Up @@ -182,7 +183,7 @@ func TestBalanceUnderMaxSize(t *testing.T) {
}

func TestBalanceHittingMaxSize(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor([]string{})
processor := NewDefaultNodeGroupSetProcessor([]string{}, config.NodeGroupDifferenceRatios{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ limitations under the License.
package nodegroupset

import (
"k8s.io/autoscaler/cluster-autoscaler/config"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
)

// CreateClusterAPINodeInfoComparator returns a comparator that checks if two nodes should be considered
// part of the same NodeGroupSet. This is true if they match usual conditions checked by IsCloudProviderNodeInfoSimilar,
// even if they have different infrastructure provider-specific labels.
func CreateClusterAPINodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
func CreateClusterAPINodeInfoComparator(extraIgnoredLabels []string, ratioOpts config.NodeGroupDifferenceRatios) NodeInfoComparator {
capiIgnoredLabels := map[string]bool{
"topology.ebs.csi.aws.com/zone": true, // this is a label used by the AWS EBS CSI driver as a target for Persistent Volume Node Affinity
"topology.diskplugin.csi.alibabacloud.com/zone": true, // this is a label used by the Alibaba Cloud CSI driver as a target for Persistent Volume Node Affinity
Expand All @@ -41,6 +42,6 @@ func CreateClusterAPINodeInfoComparator(extraIgnoredLabels []string) NodeInfoCom
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, capiIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, capiIgnoredLabels, ratioOpts)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ package nodegroupset
import (
"testing"

"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/context"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
)

func TestIsClusterAPINodeInfoSimilar(t *testing.T) {
comparator := CreateClusterAPINodeInfoComparator([]string{})
comparator := CreateClusterAPINodeInfoComparator([]string{}, config.NodeGroupDifferenceRatios{})
node1 := BuildTestNode("node1", 1000, 2000)
node2 := BuildTestNode("node2", 1000, 2000)

Expand Down Expand Up @@ -92,6 +93,6 @@ func TestIsClusterAPINodeInfoSimilar(t *testing.T) {
func TestFindSimilarNodeGroupsClusterAPIBasic(t *testing.T) {
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateClusterAPINodeInfoComparator([]string{})}
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateClusterAPINodeInfoComparator([]string{}, config.NodeGroupDifferenceRatios{})}
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}
26 changes: 8 additions & 18 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,11 @@ import (

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
)

const (
// MaxAllocatableDifferenceRatio describes how Node.Status.Allocatable can differ between
// groups in the same NodeGroupSet
MaxAllocatableDifferenceRatio = 0.05
// MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods)
// can differ between groups in the same NodeGroupSet
MaxFreeDifferenceRatio = 0.05
// MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between
// groups in the same NodeGroupSet
MaxCapacityMemoryDifferenceRatio = 0.015
)

// BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity
// of two nodes. Customized IgnoredLabels can be implemented in the corresponding codes of
// specific cloud provider and the BasicIgnoredLabels should always be considered part of them.
Expand Down Expand Up @@ -92,7 +81,7 @@ func compareLabels(nodes []*schedulerframework.NodeInfo, ignoredLabels map[strin
}

// CreateGenericNodeInfoComparator returns a generic comparator that checks for node group similarity
func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
func CreateGenericNodeInfoComparator(extraIgnoredLabels []string, ratioOpts config.NodeGroupDifferenceRatios) NodeInfoComparator {
genericIgnoredLabels := make(map[string]bool)
for k, v := range BasicIgnoredLabels {
genericIgnoredLabels[k] = v
Expand All @@ -102,7 +91,7 @@ func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoCompar
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels, ratioOpts)
}
}

Expand All @@ -111,7 +100,8 @@ func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoCompar
// somewhat arbitrary, but generally we check if resources provided by both nodes
// are similar enough to likely be the same type of machine and if the set of labels
// is the same (except for a set of labels passed in to be ignored like hostname or zone).
func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool {
func IsCloudProviderNodeInfoSimilar(
n1, n2 *schedulerframework.NodeInfo, ignoredLabels map[string]bool, ratioOpts config.NodeGroupDifferenceRatios) bool {
capacity := make(map[apiv1.ResourceName][]resource.Quantity)
allocatable := make(map[apiv1.ResourceName][]resource.Quantity)
free := make(map[apiv1.ResourceName][]resource.Quantity)
Expand All @@ -136,7 +126,7 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored
}
switch kind {
case apiv1.ResourceMemory:
if !resourceListWithinTolerance(qtyList, MaxCapacityMemoryDifferenceRatio) {
if !resourceListWithinTolerance(qtyList, ratioOpts.MaxCapacityMemoryDifferenceRatio) {
return false
}
default:
Expand All @@ -150,10 +140,10 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored
}

// For allocatable and free we allow resource quantities to be within a few % of each other
if !resourceMapsWithinTolerance(allocatable, MaxAllocatableDifferenceRatio) {
if !resourceMapsWithinTolerance(allocatable, ratioOpts.MaxAllocatableDifferenceRatio) {
return false
}
if !resourceMapsWithinTolerance(free, MaxFreeDifferenceRatio) {
if !resourceMapsWithinTolerance(free, ratioOpts.MaxFreeDifferenceRatio) {
return false
}

Expand Down
Loading

0 comments on commit b94f340

Please sign in to comment.