From 8d734fa7bbfaae85fa98b6f8a3aa705057bc0642 Mon Sep 17 00:00:00 2001 From: Max Dribinsky Date: Sat, 7 May 2022 18:03:21 -0400 Subject: [PATCH] Restarting cilium daemonset on upgrade workflow to address upgrade issue (#2057) * Restarting cilium daemonset on upgrade workflow to address https://github.com/aws/eks-anywhere/issues/1888 * moving cilium restart to cluster manager * Fixing unit tests * Improving unit test coverage * Renaming daemonset restart method to be generic * Renaming tests to decouple from cilium --- pkg/clustermanager/cluster_manager.go | 10 +++++ pkg/clustermanager/cluster_manager_test.go | 40 +++++++++++++++++++ .../mocks/client_and_networking.go | 14 +++++++ pkg/executables/kubectl.go | 12 ++++++ pkg/executables/kubectl_test.go | 34 +++++++++++++++- 5 files changed, 109 insertions(+), 1 deletion(-) diff --git a/pkg/clustermanager/cluster_manager.go b/pkg/clustermanager/cluster_manager.go index 184ac2041b1c..85c9650735dc 100644 --- a/pkg/clustermanager/cluster_manager.go +++ b/pkg/clustermanager/cluster_manager.go @@ -87,6 +87,7 @@ type ClusterClient interface { GetEksaVSphereMachineConfig(ctx context.Context, VSphereDatacenterName string, kubeconfigFile string, namespace string) (*v1alpha1.VSphereMachineConfig, error) GetEksaCloudStackMachineConfig(ctx context.Context, cloudstackMachineConfigName string, kubeconfigFile string, namespace string) (*v1alpha1.CloudStackMachineConfig, error) SetEksaControllerEnvVar(ctx context.Context, envVar, envVarVal, kubeconfig string) error + DaemonSetRolloutRestart(ctx context.Context, dsName, dsNamespace, kubeconfig string) error CreateNamespace(ctx context.Context, kubeconfig string, namespace string) error GetNamespace(ctx context.Context, kubeconfig string, namespace string) error ValidateControlPlaneNodes(ctx context.Context, cluster *types.Cluster, clusterName string) error @@ -395,6 +396,15 @@ func (c *ClusterManager) UpgradeCluster(ctx context.Context, managementCluster, return fmt.Errorf("waiting for workload cluster control plane to be ready: %v", err) } + if provider.Name() == constants.CloudStackProviderName { + // TODO: Move this logic to provider implementation: https://github.com/aws/eks-anywhere/issues/2061 + logger.V(3).Info("Restarting cilium daemonset after upgrade") + err = c.clusterClient.DaemonSetRolloutRestart(ctx, "cilium", constants.KubeSystemNamespace, eksaMgmtCluster.KubeconfigFile) + if err != nil { + return fmt.Errorf("restarting cilium daemonset after upgrade: %v", err) + } + } + logger.V(3).Info("Waiting for workload cluster control plane replicas to be ready after upgrade") err = c.waitForControlPlaneReplicasReady(ctx, managementCluster, newClusterSpec) if err != nil { diff --git a/pkg/clustermanager/cluster_manager_test.go b/pkg/clustermanager/cluster_manager_test.go index 3b6192e39d32..20e8fed05bdc 100644 --- a/pkg/clustermanager/cluster_manager_test.go +++ b/pkg/clustermanager/cluster_manager_test.go @@ -467,6 +467,7 @@ func TestClusterManagerUpgradeSelfManagedClusterSuccess(t *testing.T) { } tt := newSpecChangedTest(t) + tt.mocks.provider.EXPECT().Name().Return("test") tt.mocks.client.EXPECT().GetEksaCluster(tt.ctx, tt.cluster, tt.clusterSpec.Cluster.Name).Return(tt.oldClusterConfig, nil) tt.mocks.client.EXPECT().GetBundles(tt.ctx, tt.cluster.KubeconfigFile, tt.cluster.Name, "").Return(test.Bundles(t), nil) tt.mocks.client.EXPECT().GetEksdRelease(tt.ctx, gomock.Any(), constants.EksaSystemNamespace, gomock.Any()) @@ -502,6 +503,7 @@ func TestClusterManagerUpgradeWorkloadClusterSuccess(t *testing.T) { } tt := newSpecChangedTest(t) + tt.mocks.provider.EXPECT().Name().Return("test") tt.mocks.client.EXPECT().GetEksaCluster(tt.ctx, mCluster, mgmtClusterName).Return(tt.oldClusterConfig, nil) tt.mocks.client.EXPECT().GetBundles(tt.ctx, mCluster.KubeconfigFile, mCluster.Name, "").Return(test.Bundles(t), nil) tt.mocks.client.EXPECT().GetEksdRelease(tt.ctx, gomock.Any(), constants.EksaSystemNamespace, gomock.Any()) @@ -524,6 +526,43 @@ func TestClusterManagerUpgradeWorkloadClusterSuccess(t *testing.T) { } } +func TestClusterManagerUpgradeCloudStackWorkloadClusterSuccess(t *testing.T) { + mgmtClusterName := "cluster-name" + workClusterName := "cluster-name-w" + + mCluster := &types.Cluster{ + Name: mgmtClusterName, + ExistingManagement: true, + } + wCluster := &types.Cluster{ + Name: workClusterName, + } + + tt := newSpecChangedTest(t) + tt.mocks.provider.EXPECT().Name().Return(constants.CloudStackProviderName) + tt.mocks.client.EXPECT().GetEksaCluster(tt.ctx, mCluster, mgmtClusterName).Return(tt.oldClusterConfig, nil) + tt.mocks.client.EXPECT().GetBundles(tt.ctx, mCluster.KubeconfigFile, mCluster.Name, "").Return(test.Bundles(t), nil) + tt.mocks.client.EXPECT().GetEksdRelease(tt.ctx, gomock.Any(), constants.EksaSystemNamespace, gomock.Any()) + tt.mocks.provider.EXPECT().GenerateCAPISpecForUpgrade(tt.ctx, mCluster, mCluster, tt.clusterSpec, tt.clusterSpec.DeepCopy()) + tt.mocks.client.EXPECT().ApplyKubeSpecFromBytesWithNamespace(tt.ctx, mCluster, test.OfType("[]uint8"), constants.EksaSystemNamespace).Times(2) + tt.mocks.provider.EXPECT().RunPostControlPlaneUpgrade(tt.ctx, tt.clusterSpec, tt.clusterSpec, wCluster, mCluster) + tt.mocks.client.EXPECT().WaitForControlPlaneReady(tt.ctx, mCluster, "60m", mgmtClusterName).MaxTimes(2) + tt.mocks.client.EXPECT().WaitForControlPlaneNotReady(tt.ctx, mCluster, "1m", mgmtClusterName) + tt.mocks.client.EXPECT().GetMachines(tt.ctx, mCluster, mCluster.Name).Return([]types.Machine{}, nil).Times(2) + tt.mocks.provider.EXPECT().MachineDeploymentsToDelete(mCluster, tt.clusterSpec, tt.clusterSpec.DeepCopy()).Return([]string{}) + tt.mocks.client.EXPECT().WaitForDeployment(tt.ctx, mCluster, "30m", "Available", gomock.Any(), gomock.Any()).MaxTimes(10) + tt.mocks.client.EXPECT().ValidateControlPlaneNodes(tt.ctx, mCluster, mCluster.Name).Return(nil) + tt.mocks.client.EXPECT().ValidateWorkerNodes(tt.ctx, mCluster.Name, mCluster.KubeconfigFile).Return(nil) + tt.mocks.provider.EXPECT().GetDeployments() + tt.mocks.writer.EXPECT().Write(mgmtClusterName+"-eks-a-cluster.yaml", gomock.Any(), gomock.Not(gomock.Nil())) + tt.mocks.client.EXPECT().GetEksaOIDCConfig(tt.ctx, tt.clusterSpec.Cluster.Spec.IdentityProviderRefs[0].Name, mCluster.KubeconfigFile, tt.clusterSpec.Cluster.Namespace).Return(nil, nil) + tt.mocks.client.EXPECT().DaemonSetRolloutRestart(tt.ctx, "cilium", constants.KubeSystemNamespace, tt.cluster.KubeconfigFile) + + if err := tt.clusterManager.UpgradeCluster(tt.ctx, mCluster, wCluster, tt.clusterSpec, tt.mocks.provider); err != nil { + t.Errorf("ClusterManager.UpgradeCluster() error = %v, wantErr nil", err) + } +} + func TestClusterManagerUpgradeWorkloadClusterWaitForMachinesTimeout(t *testing.T) { ctx := context.Background() clusterName := "cluster-name" @@ -610,6 +649,7 @@ func TestClusterManagerUpgradeWorkloadClusterWaitForCAPITimeout(t *testing.T) { } tt := newSpecChangedTest(t) + tt.mocks.provider.EXPECT().Name().Return("test") tt.mocks.client.EXPECT().GetEksaCluster(tt.ctx, tt.cluster, tt.clusterSpec.Cluster.Name).Return(tt.oldClusterConfig, nil) tt.mocks.client.EXPECT().GetBundles(tt.ctx, tt.cluster.KubeconfigFile, tt.cluster.Name, "").Return(test.Bundles(t), nil) tt.mocks.client.EXPECT().GetEksdRelease(tt.ctx, gomock.Any(), constants.EksaSystemNamespace, gomock.Any()) diff --git a/pkg/clustermanager/mocks/client_and_networking.go b/pkg/clustermanager/mocks/client_and_networking.go index 23dc7bfc1240..2e912ff6fabd 100644 --- a/pkg/clustermanager/mocks/client_and_networking.go +++ b/pkg/clustermanager/mocks/client_and_networking.go @@ -99,6 +99,20 @@ func (mr *MockClusterClientMockRecorder) CreateNamespace(arg0, arg1, arg2 interf return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateNamespace", reflect.TypeOf((*MockClusterClient)(nil).CreateNamespace), arg0, arg1, arg2) } +// DaemonSetRolloutRestart mocks base method. +func (m *MockClusterClient) DaemonSetRolloutRestart(arg0 context.Context, arg1, arg2, arg3 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DaemonSetRolloutRestart", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(error) + return ret0 +} + +// DaemonSetRolloutRestart indicates an expected call of DaemonSetRolloutRestart. +func (mr *MockClusterClientMockRecorder) DaemonSetRolloutRestart(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DaemonSetRolloutRestart", reflect.TypeOf((*MockClusterClient)(nil).DaemonSetRolloutRestart), arg0, arg1, arg2, arg3) +} + // DeleteAWSIamConfig mocks base method. func (m *MockClusterClient) DeleteAWSIamConfig(arg0 context.Context, arg1 *types.Cluster, arg2, arg3 string) error { m.ctrl.T.Helper() diff --git a/pkg/executables/kubectl.go b/pkg/executables/kubectl.go index ce0c59f5b659..4e9d9e9665bf 100644 --- a/pkg/executables/kubectl.go +++ b/pkg/executables/kubectl.go @@ -690,6 +690,18 @@ func (k *Kubectl) ValidateEKSAClustersCRD(ctx context.Context, cluster *types.Cl return nil } +func (k *Kubectl) DaemonSetRolloutRestart(ctx context.Context, dsName, dsNamespace, kubeconfig string) error { + params := []string{ + "rollout", "restart", "ds", dsName, + "--kubeconfig", kubeconfig, "--namespace", dsNamespace, + } + _, err := k.Execute(ctx, params...) + if err != nil { + return fmt.Errorf("restarting %s daemonset in namespace %s: %v", dsName, dsNamespace, err) + } + return nil +} + func (k *Kubectl) SetEksaControllerEnvVar(ctx context.Context, envVar, envVarVal, kubeconfig string) error { params := []string{ "set", "env", "deployment/eksa-controller-manager", fmt.Sprintf("%s=%s", envVar, envVarVal), diff --git a/pkg/executables/kubectl_test.go b/pkg/executables/kubectl_test.go index 9af8c44ac6e9..03871afa05fa 100644 --- a/pkg/executables/kubectl_test.go +++ b/pkg/executables/kubectl_test.go @@ -1017,7 +1017,39 @@ func TestKubectlSetControllerEnvVarSuccess(t *testing.T) { err := k.SetEksaControllerEnvVar(ctx, envVar, envVarValue, cluster.KubeconfigFile) if err != nil { - t.Fatalf("Kubectl.GetApiServerUrl() error = %v, want nil", err) + t.Fatalf("Kubectl.DaemonSetRolloutRestart() error = %v, want nil", err) + } +} + +func TestKubectlDaemonSetRolloutRestartSuccess(t *testing.T) { + k, ctx, cluster, e := newKubectl(t) + e.EXPECT().Execute( + ctx, + []string{ + "rollout", "restart", "ds", "cilium", + "--kubeconfig", cluster.KubeconfigFile, "--namespace", constants.KubeSystemNamespace, + }, + ).Return(bytes.Buffer{}, nil) + + err := k.DaemonSetRolloutRestart(ctx, "cilium", constants.KubeSystemNamespace, cluster.KubeconfigFile) + if err != nil { + t.Fatalf("Kubectl.DaemonSetRolloutRestart() error = %v, want nil", err) + } +} + +func TestKubectlDaemonSetRolloutRestartError(t *testing.T) { + k, ctx, cluster, e := newKubectl(t) + e.EXPECT().Execute( + ctx, + []string{ + "rollout", "restart", "ds", "cilium", + "--kubeconfig", cluster.KubeconfigFile, "--namespace", constants.KubeSystemNamespace, + }, + ).Return(bytes.Buffer{}, fmt.Errorf("error")) + + err := k.DaemonSetRolloutRestart(ctx, "cilium", constants.KubeSystemNamespace, cluster.KubeconfigFile) + if err == nil { + t.Fatalf("Kubectl.DaemonSetRolloutRestart() expected error, but was nil") } }