From cbdca5ad5f98dde1ff6933a74cfcf3dc63e88333 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Sun, 13 Nov 2022 13:01:21 +0000 Subject: [PATCH] retry on CRD permissions failures at bootstrap --- pkg/crd/crd.go | 31 ++++++++++++++++---- pkg/crd/crd_test.go | 70 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 9 deletions(-) diff --git a/pkg/crd/crd.go b/pkg/crd/crd.go index dd0cead71b..164632f3c9 100644 --- a/pkg/crd/crd.go +++ b/pkg/crd/crd.go @@ -31,6 +31,10 @@ import ( ) const ( + // Sleep interval to check the CRD is present. + checkCRDPresentInterval = time.Second + // Timeout to check the CRD is present. + checkCRDPresentTimeout = 60 * time.Second // Sleep interval to check the Established condition of CRD. checkCRDEstablishedInterval = time.Second // Timeout for checking the Established condition of CRD. @@ -78,16 +82,33 @@ func (h *CRDHandler) EnsureCRD(meta *CRDMeta, namespacedScoped bool) (*apiextens } func (h *CRDHandler) createOrUpdateCRD(meta *CRDMeta, namespacedScoped bool) (*apiextensionsv1.CustomResourceDefinition, error) { + updateCRD := false crd := crd(meta, namespacedScoped) - existingCRD, err := h.client.ApiextensionsV1().CustomResourceDefinitions().Get(context.TODO(), crd.Name, metav1.GetOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - return nil, fmt.Errorf("failed to verify the existence of %v CRD: %v", meta.kind, err) + if err := wait.PollImmediate(checkCRDPresentInterval, checkCRDPresentTimeout, func() (bool, error) { + existingCRD, err := h.client.ApiextensionsV1().CustomResourceDefinitions().Get(context.TODO(), crd.Name, metav1.GetOptions{}) + // Retry until the RBAC permissions are propagated to be able to read the CRD + if apierrors.IsForbidden(err) { + return false, nil + } + // CRD doesn't exist, create it + if apierrors.IsNotFound(err) { + return true, nil + } + // Fail on any other error + if err != nil { + return false, fmt.Errorf("failed to verify the existence of %v CRD: %v", meta.kind, err) + } + // CRD exists, get current resource version and update it + updateCRD = true + crd.ResourceVersion = existingCRD.ResourceVersion + return true, nil + }); err != nil { + return nil, fmt.Errorf("timed out waiting to Get %v CRD: %v", meta.kind, err) } // Update CRD if already present. - if err == nil { + if updateCRD { klog.V(0).Infof("Updating existing %v CRD...", meta.kind) - crd.ResourceVersion = existingCRD.ResourceVersion return h.client.ApiextensionsV1().CustomResourceDefinitions().Update(context.TODO(), crd, metav1.UpdateOptions{}) } diff --git a/pkg/crd/crd_test.go b/pkg/crd/crd_test.go index 8002384a55..027e9983fe 100644 --- a/pkg/crd/crd_test.go +++ b/pkg/crd/crd_test.go @@ -18,13 +18,17 @@ package crd import ( "context" + "fmt" "testing" "github.com/google/go-cmp/cmp" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" crdclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" crdclientfake "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/fake" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + k8stesting "k8s.io/client-go/testing" "k8s.io/kube-openapi/pkg/common" ) @@ -48,8 +52,9 @@ func testGetOpenAPIDefinitions(common.ReferenceCallback) map[string]common.OpenA func TestCreateOrUpdateCRD(t *testing.T) { testCases := []struct { - desc string - initFunc func(clientset crdclient.Interface, namespaceScoped bool) error + desc string + initFunc func(clientset crdclient.Interface, namespaceScoped bool) error + retryOnForbidden bool }{ { desc: "Create CRD when not exist", @@ -77,12 +82,41 @@ func TestCreateOrUpdateCRD(t *testing.T) { return nil }, }, + { + desc: "Create CRD when not exist after receiving a forbidden error the first time", + retryOnForbidden: true, + }, + { + desc: "Update CRD when exist with wrongname after receiving a forbidden error the first time", + initFunc: func(clientset crdclient.Interface, namespaceScoped bool) error { + crd := crd(crdMeta, namespaceScoped) + crd.Spec.Names.Kind = "wrongname" + crd.Spec.Names.ListKind = "wrongnameList" + if _, err := clientset.ApiextensionsV1().CustomResourceDefinitions().Create(context.TODO(), crd, metav1.CreateOptions{}); err != nil { + return err + } + return nil + }, + retryOnForbidden: true, + }, + { + desc: "Update CRD when pruning is not enabled after receiving a forbidden error the first time", + initFunc: func(clientset crdclient.Interface, namespaceScoped bool) error { + crd := crd(crdMeta, namespaceScoped) + crd.Spec.PreserveUnknownFields = trueValue + if _, err := clientset.ApiextensionsV1().CustomResourceDefinitions().Create(context.TODO(), crd, metav1.CreateOptions{}); err != nil { + return err + } + return nil + }, + retryOnForbidden: true, + }, } for _, namespacedScoped := range []bool{true, false} { expectedCRD := crd(crdMeta, namespacedScoped) for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { + t.Run(tc.desc+fmt.Sprintf(" namespaced scoped %v", namespacedScoped), func(t *testing.T) { fakeCRDClient := crdclientfake.NewSimpleClientset() fakeCRDHandler := NewCRDHandler(fakeCRDClient) if tc.initFunc != nil { @@ -90,7 +124,20 @@ func TestCreateOrUpdateCRD(t *testing.T) { t.Errorf("Unexpected error in initFunc(): %v", err) } } - + // test createOrUpdateCRD retries on http forbidden errors + numRetries := 3 + if tc.retryOnForbidden { + counter := 0 + fakeCRDClient.PrependReactor("get", "customresourcedefinitions", k8stesting.ReactionFunc(func(a k8stesting.Action) (bool, runtime.Object, error) { + counter++ + if counter < numRetries { + return true, &apiextensionsv1.CustomResourceDefinition{}, apierrors.NewForbidden(a.GetResource().GroupResource(), a.(k8stesting.GetAction).GetName(), fmt.Errorf(`User "system:controller:glbc" cannot get resource`)) + } + + // delegate to the next reactor in the chain + return false, &apiextensionsv1.CustomResourceDefinition{}, nil + })) + } crd, err := fakeCRDHandler.createOrUpdateCRD(crdMeta, namespacedScoped) if err != nil { t.Errorf("Unexpected error in createOrUpdateCRD(): %v", err) @@ -123,6 +170,21 @@ func TestCreateOrUpdateCRD(t *testing.T) { t.Errorf("CRD should be cluster scoped but found %s", crd.Spec.Scope) } + getActions := 0 + for _, a := range fakeCRDClient.Actions() { + if a.GetVerb() == "get" && a.GetResource().Resource == "customresourcedefinitions" { + getActions++ + } + } + expectedGetActions := 1 + if tc.retryOnForbidden { + expectedGetActions = numRetries + } + + if getActions != expectedGetActions { + t.Errorf("Unexpected number of Get requests, got %d expected %d", getActions, expectedGetActions) + } + // Nuke CRD status before comparing. crd.Status = apiextensionsv1.CustomResourceDefinitionStatus{} if diff := cmp.Diff(expectedCRD, crd); diff != "" {