Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make sure the startup taint will eventually being removed after efs driver ready #1287

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pkg/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"net"
"strings"
"time"

"github.com/container-storage-interface/spec/lib/go/csi"
"google.golang.org/grpc"
Expand Down Expand Up @@ -129,10 +130,9 @@ func (d *Driver) Run() error {

// Remove taint from node to indicate driver startup success
// This is done at the last possible moment to prevent race conditions or false positive removals
err = removeNotReadyTaint(cloud.DefaultKubernetesAPIClient)
if err != nil {
klog.ErrorS(err, "Unexpected failure when attempting to remove node taint(s)")
}
go tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
return removeNotReadyTaint(cloud.DefaultKubernetesAPIClient)
})

klog.Infof("Listening for connections on address: %#v", listener.Addr())
return d.srv.Serve(listener)
Expand Down
16 changes: 15 additions & 1 deletion pkg/driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"

"github.com/container-storage-interface/spec/lib/go/csi"
"github.com/kubernetes-sigs/aws-efs-csi-driver/pkg/cloud"
Expand Down Expand Up @@ -452,7 +453,7 @@ type JSONPatch struct {
Value interface{} `json:"value"`
}

// removeNotReadyTaint removes the taint ebs.csi.aws.com/agent-not-ready from the local node
// removeNotReadyTaint removes the taint efs.csi.aws.com/agent-not-ready from the local node
// This taint can be optionally applied by users to prevent startup race conditions such as
// https://github.com/kubernetes/kubernetes/issues/95911
func removeNotReadyTaint(k8sClient cloud.KubernetesAPIClient) error {
Expand Down Expand Up @@ -512,3 +513,16 @@ func removeNotReadyTaint(k8sClient cloud.KubernetesAPIClient) error {
klog.InfoS("Removed taint(s) from local node", "node", nodeName)
return nil
}

// remove taint may failed, this keep retring until succeed, make sure the taint will eventually being removed
func tryRemoveNotReadyTaintUntilSucceed(interval time.Duration, removeFn func() error) {
for {
err := removeFn()
if err == nil {
return
}

klog.ErrorS(err, "Unexpected failure when attempting to remove node taint(s)")
time.Sleep(interval)
}
}
30 changes: 30 additions & 0 deletions pkg/driver/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package driver

import (
"context"
"errors"
"fmt"
"os"
"reflect"
Expand Down Expand Up @@ -988,3 +989,32 @@ func getNodeMock(mockCtl *gomock.Controller, nodeName string, returnNode *corev1

return mockClient, mockNode
}

func TestTryRemoveNotReadyTaintUntilSucceed(t *testing.T) {
{
i := 0
tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
i++
if i < 3 {
return errors.New("test")
}

return nil
})

if i != 3 {
t.Fatalf("unexpected result")
}
}
{
i := 0
tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
i++
return nil
})

if i != 1 {
t.Fatalf("unexpected result")
}
}
}