Skip to content

Commit

Permalink
feature(eviction): add event when EvictPod failed
Browse files Browse the repository at this point in the history
  • Loading branch information
googs1025 committed Nov 12, 2024
1 parent ef0c2c1 commit 7cbf849
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 56 deletions.
6 changes: 5 additions & 1 deletion pkg/descheduler/evictions/evictions.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictTotal)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: total eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictTotal)
return err
}

Expand All @@ -145,6 +146,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: node eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictPerNode)
return err
}
}
Expand All @@ -156,6 +158,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: namespace eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictPerNamespace)
return err
}

Expand All @@ -167,6 +170,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: %v", pod.Spec.NodeName, err.Error())
return err
}

Expand All @@ -191,7 +195,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
reason = "NotSet"
}
}
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
}
return nil
}
Expand Down
243 changes: 188 additions & 55 deletions pkg/descheduler/evictions/evictions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,17 @@ package evictions

import (
"context"
"fmt"
"reflect"
"testing"
"time"

v1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"k8s.io/client-go/tools/events"
Expand All @@ -33,42 +38,69 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
notFoundText = "pod not found when evicting \"%s\": pods \"%s\" not found"
tooManyRequests = "error when evicting pod (ignoring) \"%s\": Too many requests: too many requests"
)

func TestEvictPod(t *testing.T) {
ctx := context.Background()
node1 := test.BuildTestNode("node1", 1000, 2000, 9, nil)
pod1 := test.BuildTestPod("p1", 400, 0, "node1", nil)
tests := []struct {
description string
node *v1.Node
pod *v1.Pod
pods []v1.Pod
want error
evictedPod *v1.Pod
pods []runtime.Object
wantErr bool
wantErrMsg string
}{
{
description: "test pod eviction - pod present",
node: node1,
pod: pod1,
pods: []v1.Pod{*pod1},
want: nil,
evictedPod: pod1,
pods: []runtime.Object{pod1},
wantErr: false,
},
{
description: "test pod eviction - pod absent (not found error)",
node: node1,
evictedPod: pod1,
pods: []runtime.Object{test.BuildTestPod("p2", 400, 0, "node1", nil), test.BuildTestPod("p3", 450, 0, "node1", nil)},
wantErr: true,
wantErrMsg: fmt.Sprintf("pod not found when evicting \"%s\": pods \"%s\" not found", pod1.Name, pod1.Name),
},
{
description: "test pod eviction - pod absent",
description: "test pod eviction - pod absent (too many requests error)",
node: node1,
pod: pod1,
pods: []v1.Pod{*test.BuildTestPod("p2", 400, 0, "node1", nil), *test.BuildTestPod("p3", 450, 0, "node1", nil)},
want: nil,
evictedPod: pod1,
pods: []runtime.Object{test.BuildTestPod("p2", 400, 0, "node1", nil), test.BuildTestPod("p3", 450, 0, "node1", nil)},
wantErr: true,
wantErrMsg: fmt.Sprintf("error when evicting pod (ignoring) \"%s\": Too many requests: too many requests", pod1.Name),
},
}

for _, test := range tests {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: test.pods}, nil
t.Run(test.description, func(t *testing.T) {
fakeClient := fake.NewClientset(test.pods...)
fakeClient.PrependReactor("create", "pods/eviction", func(action core.Action) (handled bool, ret runtime.Object, err error) {
if test.wantErrMsg == fmt.Sprintf(notFoundText, test.evictedPod.Name, test.evictedPod.Name) {
return true, nil, k8serrors.NewNotFound(v1.Resource("pods"), test.evictedPod.Name)
}
if test.wantErrMsg == fmt.Sprintf(tooManyRequests, test.evictedPod.Name) {
return true, nil, k8serrors.NewTooManyRequestsError("too many requests")
}
return true, nil, nil
})

gotErr := evictPod(ctx, fakeClient, test.evictedPod, "v1")
if (gotErr != nil) != test.wantErr {
t.Errorf("Test error for Desc: %s. Expected error: %t, got: %v", test.description, test.wantErr, gotErr)
}
if gotErr != nil && test.wantErrMsg != "" && gotErr.Error() != test.wantErrMsg {
t.Errorf("Test error for Desc: %s. Expected error message: %q, got: %q", test.description, test.wantErrMsg, gotErr.Error())
}
})
got := evictPod(ctx, fakeClient, test.pod, "v1")
if got != test.want {
t.Errorf("Test error for Desc: %s. Expected %v pod eviction to be %v, got %v", test.description, test.pod.Name, test.want, got)
}
}
}

Expand Down Expand Up @@ -117,51 +149,152 @@ func TestPodTypes(t *testing.T) {
}
}

func TestNewPodEvictor(t *testing.T) {
func TestPodEvictor(t *testing.T) {
pod1 := test.BuildTestPod("pod", 400, 0, "node", nil)

fakeClient := fake.NewSimpleClientset(pod1)

eventRecorder := &events.FakeRecorder{}

podEvictor := NewPodEvictor(
fakeClient,
eventRecorder,
NewOptions().WithMaxPodsToEvictPerNode(utilptr.To[uint](1)),
)

stubNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node"}}

// 0 evictions expected
if evictions := podEvictor.NodeEvicted(stubNode); evictions != 0 {
t.Errorf("Expected 0 node evictions, got %q instead", evictions)
type podEvictorTest struct {
description string
pod *v1.Pod
dryRun bool
maxPodsToEvictTotal *uint
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
expectedNodeEvictions uint
expectedTotalEvictions uint
expectedError error
// expectedEvent is a slice of strings representing expected events.
// Each string in the slice should follow the format: "EventType Reason Message".
// - "Warning Failed processing failed"
events []string
}
// 0 evictions expected
if evictions := podEvictor.TotalEvicted(); evictions != 0 {
t.Errorf("Expected 0 total evictions, got %q instead", evictions)
tests := []podEvictorTest{
{
description: "one eviction expected",
pod: pod1,
maxPodsToEvictTotal: utilptr.To[uint](1),
maxPodsToEvictPerNode: utilptr.To[uint](1),
maxPodsToEvictPerNamespace: utilptr.To[uint](1),
expectedNodeEvictions: 1,
expectedTotalEvictions: 1,
expectedError: nil,
events: []string{"Normal NotSet pod eviction from node node by sigs.k8s.io/descheduler"},
},
{
description: "eviction limit exceeded on total",
pod: pod1,
maxPodsToEvictTotal: utilptr.To[uint](0),
maxPodsToEvictPerNode: utilptr.To[uint](1),
maxPodsToEvictPerNamespace: utilptr.To[uint](1),
expectedNodeEvictions: 0,
expectedTotalEvictions: 0,
expectedError: NewEvictionTotalLimitError(),
events: []string{"Warning EvictionFailed pod eviction from node node by sigs.k8s.io/descheduler failed: total eviction limit exceeded (0)"},
},
{
description: "eviction limit exceeded on node",
pod: pod1,
maxPodsToEvictTotal: utilptr.To[uint](1),
maxPodsToEvictPerNode: utilptr.To[uint](0),
maxPodsToEvictPerNamespace: utilptr.To[uint](1),
expectedNodeEvictions: 0,
expectedTotalEvictions: 0,
expectedError: NewEvictionNodeLimitError("node"),
events: []string{"Warning EvictionFailed pod eviction from node node by sigs.k8s.io/descheduler failed: node eviction limit exceeded (0)"},
},
{
description: "eviction limit exceeded on node",
pod: pod1,
maxPodsToEvictTotal: utilptr.To[uint](1),
maxPodsToEvictPerNode: utilptr.To[uint](1),
maxPodsToEvictPerNamespace: utilptr.To[uint](0),
expectedNodeEvictions: 0,
expectedTotalEvictions: 0,
expectedError: NewEvictionNamespaceLimitError("default"),
events: []string{"Warning EvictionFailed pod eviction from node node by sigs.k8s.io/descheduler failed: namespace eviction limit exceeded (0)"},
},
{
description: "eviction error",
pod: pod1,
maxPodsToEvictTotal: utilptr.To[uint](1),
maxPodsToEvictPerNode: utilptr.To[uint](1),
maxPodsToEvictPerNamespace: utilptr.To[uint](1),
expectedNodeEvictions: 0,
expectedTotalEvictions: 0,
expectedError: fmt.Errorf("eviction error"),
events: []string{"Warning EvictionFailed pod eviction from node node by sigs.k8s.io/descheduler failed: eviction error"},
},
{
description: "eviction with dryRun",
pod: pod1,
dryRun: true,
maxPodsToEvictTotal: utilptr.To[uint](1),
maxPodsToEvictPerNode: utilptr.To[uint](1),
maxPodsToEvictPerNamespace: utilptr.To[uint](1),
expectedNodeEvictions: 1,
expectedTotalEvictions: 1,
expectedError: nil,
},
}

if err := podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{}); err != nil {
t.Errorf("Expected a pod eviction, got an eviction error instead: %v", err)
}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
fakeClient := fake.NewSimpleClientset(pod1)
fakeClient.PrependReactor("create", "pods/eviction", func(action core.Action) (handled bool, ret runtime.Object, err error) {
if test.expectedError != nil {
return true, nil, test.expectedError
}
return true, nil, nil
})
eventRecorder := events.NewFakeRecorder(100)
podEvictor := NewPodEvictor(
fakeClient,
eventRecorder,
NewOptions().
WithDryRun(test.dryRun).
WithMaxPodsToEvictTotal(test.maxPodsToEvictTotal).
WithMaxPodsToEvictPerNode(test.maxPodsToEvictPerNode).
WithMaxPodsToEvictPerNamespace(test.maxPodsToEvictPerNamespace),
)
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
stubNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node"}}

// 1 node eviction expected
if evictions := podEvictor.NodeEvicted(stubNode); evictions != 1 {
t.Errorf("Expected 1 node eviction, got %q instead", evictions)
}
// 1 total eviction expected
if evictions := podEvictor.TotalEvicted(); evictions != 1 {
t.Errorf("Expected 1 total evictions, got %q instead", evictions)
if actualErr := podEvictor.EvictPod(ctx, test.pod, EvictOptions{}); actualErr != nil && actualErr.Error() != test.expectedError.Error() {
t.Errorf("Expected error: %v, got: %v", test.expectedError, actualErr)
}

if evictions := podEvictor.NodeEvicted(stubNode); evictions != test.expectedNodeEvictions {
t.Errorf("Expected %d node evictions, got %d instead", test.expectedNodeEvictions, evictions)
}

if evictions := podEvictor.TotalEvicted(); evictions != test.expectedTotalEvictions {
t.Errorf("Expected %d total evictions, got %d instead", test.expectedTotalEvictions, evictions)
}
// Assert that the events are correct.
assertEqualEvents(t, test.events, eventRecorder.Events)
})
}
}

err := podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{})
if err == nil {
t.Errorf("Expected a pod eviction error, got nil instead")
func assertEqualEvents(t *testing.T, expected []string, actual <-chan string) {
t.Logf("Assert for events: %v", expected)
c := time.After(wait.ForeverTestTimeout)
for _, e := range expected {
select {
case a := <-actual:
if !reflect.DeepEqual(a, e) {
t.Errorf("Expected event %q, got %q instead", e, a)
}
case <-c:
t.Errorf("Expected event %q, got nothing", e)
// continue iterating to print all expected events
}
}
switch err.(type) {
case *EvictionNodeLimitError:
// all good
default:
t.Errorf("Expected a pod eviction EvictionNodeLimitError error, got a different error instead: %v", err)
for {
select {
case a := <-actual:
t.Errorf("Unexpected event: %q", a)
default:
return // No more events, as expected.
}
}
}

0 comments on commit 7cbf849

Please sign in to comment.