diff --git a/pkg/cache/tas_cache_test.go b/pkg/cache/tas_cache_test.go index 2b5a062d9f..4db40ce601 100644 --- a/pkg/cache/tas_cache_test.go +++ b/pkg/cache/tas_cache_test.go @@ -55,6 +55,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -71,6 +77,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -87,6 +99,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -103,6 +121,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -119,6 +143,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -135,6 +165,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("2"), corev1.ResourceMemory: resource.MustParse("4Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, } @@ -185,6 +221,12 @@ func TestFindTopologyAssignment(t *testing.T) { Allocatable: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("2"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -200,6 +242,12 @@ func TestFindTopologyAssignment(t *testing.T) { Allocatable: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("2"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -215,6 +263,12 @@ func TestFindTopologyAssignment(t *testing.T) { Allocatable: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("1"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -230,6 +284,12 @@ func TestFindTopologyAssignment(t *testing.T) { Allocatable: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("1"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -245,6 +305,12 @@ func TestFindTopologyAssignment(t *testing.T) { Allocatable: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("1"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -260,6 +326,12 @@ func TestFindTopologyAssignment(t *testing.T) { Allocatable: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("1"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, }, @@ -656,6 +728,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, }, @@ -698,6 +776,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, }, @@ -726,6 +810,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, }, @@ -768,6 +858,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, }, @@ -816,6 +912,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, }, @@ -850,6 +952,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, }, @@ -885,6 +993,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -899,6 +1013,12 @@ func TestFindTopologyAssignment(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, }, @@ -927,6 +1047,47 @@ func TestFindTopologyAssignment(t *testing.T) { }, }, }, + "no assignment as node is not ready": { + nodes: []corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "b1-r1-x1", + Labels: map[string]string{ + "zone": "zone-a", + tasHostLabel: "x1", + }, + }, + Status: corev1.NodeStatus{ + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + }, + { + Type: corev1.NodeNetworkUnavailable, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + }, + request: kueue.PodSetTopologyRequest{ + Required: ptr.To(tasHostLabel), + }, + nodeLabels: map[string]string{ + "zone": "zone-a", + }, + levels: defaultOneLevel, + requests: resources.Requests{ + corev1.ResourceCPU: 1000, + }, + count: 1, + wantAssignment: nil, + }, } for name, tc := range cases { t.Run(name, func(t *testing.T) { diff --git a/pkg/cache/tas_flavor.go b/pkg/cache/tas_flavor.go index 7f4d832fa5..b29a091c62 100644 --- a/pkg/cache/tas_flavor.go +++ b/pkg/cache/tas_flavor.go @@ -108,12 +108,21 @@ func (c *TASFlavorCache) snapshotForNodes(log logr.Logger, nodes []corev1.Node, snapshot := newTASFlavorSnapshot(log, c.Levels) nodeToDomain := make(map[string]utiltas.TopologyDomainID) for _, node := range nodes { - levelValues := utiltas.LevelValues(c.Levels, node.Labels) - capacity := resources.NewRequests(node.Status.Allocatable) - domainID := utiltas.DomainID(levelValues) - snapshot.levelValuesPerDomain[domainID] = levelValues - snapshot.addCapacity(domainID, capacity) - nodeToDomain[node.Name] = domainID + ready := false + for _, cond := range node.Status.Conditions { + // Only healthy and ready to accept pods nodes are considered for scheduling calculation + ready = (cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue) + } + if ready { + levelValues := utiltas.LevelValues(c.Levels, node.Labels) + capacity := resources.NewRequests(node.Status.Allocatable) + domainID := utiltas.DomainID(levelValues) + snapshot.levelValuesPerDomain[domainID] = levelValues + snapshot.addCapacity(domainID, capacity) + nodeToDomain[node.Name] = domainID + } else { + log.V(3).Info("Node was excluded from TAS Flavor snapshot", "nodeName", node.Name, "nodeStatusConditions", node.Status.Conditions) + } } snapshot.initialize() for domainID, usage := range c.usage { diff --git a/pkg/scheduler/scheduler_test.go b/pkg/scheduler/scheduler_test.go index 36a4816cf6..f8842b6746 100644 --- a/pkg/scheduler/scheduler_test.go +++ b/pkg/scheduler/scheduler_test.go @@ -3881,6 +3881,12 @@ func TestScheduleForTAS(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, } @@ -4202,6 +4208,12 @@ func TestScheduleForTAS(t *testing.T) { Allocatable: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("1"), }, + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, }, }, },