Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Check min size of node group and resource limits for set of nodes
Browse files Browse the repository at this point in the history
yaroslava-serdiuk committed Feb 13, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 9f04d4f commit d660b80
Showing 3 changed files with 176 additions and 21 deletions.
68 changes: 48 additions & 20 deletions cluster-autoscaler/core/scaledown/unneeded/nodes.go
Original file line number Diff line number Diff line change
@@ -118,20 +118,28 @@ func (n *Nodes) Drop(node string) {
// into empty and non-empty node lists, as well as a list of nodes that were
// unneeded, but are not removable, annotated by reason.
func (n *Nodes) RemovableAt(context *context.AutoscalingContext, ts time.Time, resourcesLeft resource.Limits, resourcesWithLimits []string, as scaledown.ActuationStatus) (empty, needDrain []simulator.NodeToBeRemoved, unremovable []*simulator.UnremovableNode) {
nodeGroupSize := utils.GetNodeGroupSizeMap(context.CloudProvider)
for nodeName, v := range n.byName {
klog.V(2).Infof("%s was unneeded for %s", nodeName, ts.Sub(v.since).String())
nodeGroupSize := make(map[string]int)
for k, v := range utils.GetNodeGroupSizeMap(context.CloudProvider) {
nodeGroupSize[k] = v
}
resourcesLeftCopy := resourcesLeft.DeepCopy()
emptyNodes, drainNodes := n.sortEmptyAndNonEmptyNodes()

if r := n.unremovableReason(context, v, ts, nodeGroupSize, resourcesLeft, resourcesWithLimits, as); r != simulator.NoReason {
for nodeName, v := range emptyNodes {
klog.V(2).Infof("%s was unneeded for %s", nodeName, ts.Sub(v.since).String())
if r := n.unremovableReason(context, v, ts, nodeGroupSize, resourcesLeftCopy, resourcesWithLimits, as); r != simulator.NoReason {
unremovable = append(unremovable, &simulator.UnremovableNode{Node: v.ntbr.Node, Reason: r})
continue
}

if len(v.ntbr.PodsToReschedule) > 0 {
needDrain = append(needDrain, v.ntbr)
} else {
empty = append(empty, v.ntbr)
empty = append(empty, v.ntbr)
}
for nodeName, v := range drainNodes {
klog.V(2).Infof("%s was unneeded for %s", nodeName, ts.Sub(v.since).String())
if r := n.unremovableReason(context, v, ts, nodeGroupSize, resourcesLeftCopy, resourcesWithLimits, as); r != simulator.NoReason {
unremovable = append(unremovable, &simulator.UnremovableNode{Node: v.ntbr.Node, Reason: r})
continue
}
needDrain = append(needDrain, v.ntbr)
}
return
}
@@ -177,16 +185,8 @@ func (n *Nodes) unremovableReason(context *context.AutoscalingContext, v *node,
}
}

size, found := nodeGroupSize[nodeGroup.Id()]
if !found {
klog.Errorf("Error while checking node group size %s: group size not found in cache", nodeGroup.Id())
return simulator.UnexpectedError
}

deletionsInProgress := as.DeletionsCount(nodeGroup.Id())
if size-deletionsInProgress <= nodeGroup.MinSize() {
klog.V(1).Infof("Skipping %s - node group min size reached", node.Name)
return simulator.NodeGroupMinSizeReached
if reason := verifyMinSize(node.Name, nodeGroup, nodeGroupSize, as); reason != simulator.NoReason {
return reason
}

resourceDelta, err := n.limitsFinder.DeltaForNode(context, node, nodeGroup, resourcesWithLimits)
@@ -195,7 +195,7 @@ func (n *Nodes) unremovableReason(context *context.AutoscalingContext, v *node,
return simulator.UnexpectedError
}

checkResult := resourcesLeft.CheckDeltaWithinLimits(resourceDelta)
checkResult := resourcesLeft.TryDecrementBy(resourceDelta)
if checkResult.Exceeded() {
klog.V(4).Infof("Skipping %s - minimal limit exceeded for %v", node.Name, checkResult.ExceededResources)
for _, resource := range checkResult.ExceededResources {
@@ -213,3 +213,31 @@ func (n *Nodes) unremovableReason(context *context.AutoscalingContext, v *node,

return simulator.NoReason
}

func (n *Nodes) sortEmptyAndNonEmptyNodes() (empty, needDrain map[string]*node) {
empty = make(map[string]*node)
needDrain = make(map[string]*node)
for name, v := range n.byName {
if len(v.ntbr.PodsToReschedule) > 0 {
needDrain[name] = v
} else {
empty[name] = v
}
}
return
}

func verifyMinSize(nodeName string, nodeGroup cloudprovider.NodeGroup, nodeGroupSize map[string]int, as scaledown.ActuationStatus) simulator.UnremovableReason {
size, found := nodeGroupSize[nodeGroup.Id()]
if !found {
klog.Errorf("Error while checking node group size %s: group size not found in cache", nodeGroup.Id())
return simulator.UnexpectedError
}
deletionsInProgress := as.DeletionsCount(nodeGroup.Id())
if size-deletionsInProgress <= nodeGroup.MinSize() {
klog.V(1).Infof("Skipping %s - node group min size reached", nodeName)
return simulator.NodeGroupMinSizeReached
}
nodeGroupSize[nodeGroup.Id()]--
return simulator.NoReason
}
127 changes: 127 additions & 0 deletions cluster-autoscaler/core/scaledown/unneeded/nodes_test.go
Original file line number Diff line number Diff line change
@@ -17,11 +17,22 @@ limitations under the License.
package unneeded

import (
"fmt"
"testing"
"time"

apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/context"
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/resource"
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
. "k8s.io/autoscaler/cluster-autoscaler/core/test"
"k8s.io/autoscaler/cluster-autoscaler/simulator"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/client-go/kubernetes/fake"

"github.com/stretchr/testify/assert"
)
@@ -115,3 +126,119 @@ func makeNode(name, version string) simulator.NodeToBeRemoved {
func version(n simulator.NodeToBeRemoved) string {
return n.Node.Annotations[testVersion]
}

func TestRemovableAt(t *testing.T) {

testCases := []struct {
name string
numEmpty int
numDrain int
minSize int
targetSize int
numOngoingDeletions int
numEmptyToRemove int
numDrainToRemove int
}{
{
name: "Node group min size is not reached",
numEmpty: 3,
numDrain: 2,
minSize: 1,
targetSize: 10,
numOngoingDeletions: 2,
numEmptyToRemove: 3,
numDrainToRemove: 2,
},
{
name: "Node group min size is reached for drain nodes",
numEmpty: 3,
numDrain: 5,
minSize: 1,
targetSize: 10,
numOngoingDeletions: 2,
numEmptyToRemove: 3,
numDrainToRemove: 4,
},
{
name: "Node group min size is reached for empty and drain nodes",
numEmpty: 3,
numDrain: 5,
minSize: 1,
targetSize: 5,
numOngoingDeletions: 2,
numEmptyToRemove: 2,
numDrainToRemove: 0,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ng := testprovider.NewTestNodeGroup("ng", 100, tc.minSize, tc.targetSize, true, false, "", nil, nil)
empty := []simulator.NodeToBeRemoved{}
for i := 0; i < tc.numEmpty; i++ {
empty = append(empty, simulator.NodeToBeRemoved{
Node: BuildTestNode(fmt.Sprintf("empty-%d", i), 10, 100),
})
}
drain := []simulator.NodeToBeRemoved{}
for i := 0; i < tc.numDrain; i++ {
drain = append(drain, simulator.NodeToBeRemoved{
Node: BuildTestNode(fmt.Sprintf("drain-%d", i), 10, 100),
PodsToReschedule: []*apiv1.Pod{BuildTestPod(fmt.Sprintf("pod-%d", i), 1, 1)},
})
}

nodes := append(empty, drain...)
provider := testprovider.NewTestCloudProvider(nil, nil)
provider.InsertNodeGroup(ng)
for _, node := range nodes {
provider.AddNode("ng", node.Node)
}

as := &fakeActuationStatus{deletionCount: map[string]int{"ng": tc.numOngoingDeletions}}

rsLister, err := kube_util.NewTestReplicaSetLister(nil)
assert.NoError(t, err)
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, rsLister, nil)
ctx, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{ScaleDownSimulationTimeout: 5 * time.Minute}, &fake.Clientset{}, registry, provider, nil, nil)
assert.NoError(t, err)

n := NewNodes(&fakeScaleDownTimeGetter{}, &resource.LimitsFinder{})
n.Update(nodes, time.Now())
gotEmptyToRemove, gotDrainToRemove, _ := n.RemovableAt(&ctx, time.Now(), resource.Limits{}, []string{}, as)
if len(gotDrainToRemove) != tc.numDrainToRemove || len(gotEmptyToRemove) != tc.numEmptyToRemove {
t.Errorf("%s: getNodesToRemove() return %d, %d, want %d, %d", tc.name, len(gotEmptyToRemove), len(gotDrainToRemove), tc.numEmptyToRemove, tc.numDrainToRemove)
}
})
}
}

type fakeActuationStatus struct {
recentEvictions []*apiv1.Pod
deletionCount map[string]int
}

func (f *fakeActuationStatus) RecentEvictions() []*apiv1.Pod {
return f.recentEvictions
}

func (f *fakeActuationStatus) DeletionsInProgress() ([]string, []string) {
return nil, nil
}

func (f *fakeActuationStatus) DeletionResults() (map[string]status.NodeDeleteResult, time.Time) {
return nil, time.Time{}
}

func (f *fakeActuationStatus) DeletionsCount(nodeGroup string) int {
return f.deletionCount[nodeGroup]
}

type fakeScaleDownTimeGetter struct{}

func (f *fakeScaleDownTimeGetter) GetScaleDownUnneededTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
return 0 * time.Second, nil
}

func (f *fakeScaleDownTimeGetter) GetScaleDownUnreadyTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
return 0 * time.Second, nil
}
2 changes: 1 addition & 1 deletion cluster-autoscaler/simulator/cluster.go
Original file line number Diff line number Diff line change
@@ -156,7 +156,7 @@ func (r *RemovalSimulator) SimulateNodeRemoval(
klog.V(2).Infof("nodeInfo for %s not found", nodeName)
return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: UnexpectedError}
}

podsToRemove, daemonSetPods, blockingPod, err := GetPodsToMove(nodeInfo, r.deleteOptions, r.listers, pdbs, timestamp)
if err != nil {
klog.V(2).Infof("node %s cannot be removed: %v", nodeName, err)

0 comments on commit d660b80

Please sign in to comment.