diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go index 8ec12e0d1417..fbc8263ba8d8 100644 --- a/controlplane/kubeadm/controllers/controller.go +++ b/controlplane/kubeadm/controllers/controller.go @@ -536,7 +536,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster") } - removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx, nodeNames) + kubernetesVersion := controlPlane.KCP.Spec.Version + parsedVersion, err := semver.ParseTolerant(kubernetesVersion) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion) + } + removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx, nodeNames, parsedVersion) if err != nil { return ctrl.Result{}, errors.Wrap(err, "failed attempt to reconcile etcd members") } diff --git a/controlplane/kubeadm/controllers/fakes_test.go b/controlplane/kubeadm/controllers/fakes_test.go index ef5bcc1821ee..c3ffc92b9fca 100644 --- a/controlplane/kubeadm/controllers/fakes_test.go +++ b/controlplane/kubeadm/controllers/fakes_test.go @@ -64,7 +64,7 @@ func (f fakeWorkloadCluster) ForwardEtcdLeadership(_ context.Context, _ *cluster return nil } -func (f fakeWorkloadCluster) ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error) { +func (f fakeWorkloadCluster) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) { return nil, nil } @@ -100,7 +100,7 @@ func (f fakeWorkloadCluster) RemoveEtcdMemberForMachine(ctx context.Context, mac return nil } -func (f fakeWorkloadCluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error { +func (f fakeWorkloadCluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error { return nil } diff --git a/controlplane/kubeadm/controllers/remediation.go b/controlplane/kubeadm/controllers/remediation.go index 14c2a1bc142e..994aacb98551 100644 --- a/controlplane/kubeadm/controllers/remediation.go +++ b/controlplane/kubeadm/controllers/remediation.go @@ -20,6 +20,7 @@ import ( "context" "fmt" + "github.com/blang/semver" "github.com/pkg/errors" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" @@ -135,7 +136,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C } } - if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToBeRemediated); err != nil { + kubernetesVersion := controlPlane.KCP.Spec.Version + parsedVersion, err := semver.ParseTolerant(kubernetesVersion) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion) + } + if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToBeRemediated, parsedVersion); err != nil { logger.Error(err, "Failed to remove machine from kubeadm ConfigMap") return ctrl.Result{}, err } @@ -164,7 +170,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C // - etc. // // NOTE: this func assumes the list of members in sync with the list of machines/nodes, it is required to call reconcileEtcdMembers -// ans well as reconcileControlPlaneConditions before this. +// and well as reconcileControlPlaneConditions before this. func (r *KubeadmControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Context, controlPlane *internal.ControlPlane, machineToBeRemediated *clusterv1.Machine) (bool, error) { logger := r.Log.WithValues("namespace", controlPlane.KCP.Namespace, "kubeadmControlPlane", controlPlane.KCP.Name, "cluster", controlPlane.Cluster.Name) diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go index c01114eae604..fa3e4dd68bee 100644 --- a/controlplane/kubeadm/controllers/scale.go +++ b/controlplane/kubeadm/controllers/scale.go @@ -20,6 +20,7 @@ import ( "context" "strings" + "github.com/blang/semver" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -128,7 +129,12 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( } } - if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete); err != nil { + kubernetesVersion := controlPlane.KCP.Spec.Version + parsedVersion, err := semver.ParseTolerant(kubernetesVersion) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion) + } + if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete, parsedVersion); err != nil { logger.Error(err, "Failed to remove machine from kubeadm ConfigMap") return ctrl.Result{}, err } diff --git a/controlplane/kubeadm/internal/workload_cluster.go b/controlplane/kubeadm/internal/workload_cluster.go index add63ff1944d..a70083e26274 100644 --- a/controlplane/kubeadm/internal/workload_cluster.go +++ b/controlplane/kubeadm/internal/workload_cluster.go @@ -82,13 +82,13 @@ type WorkloadCluster interface { UpdateKubeProxyImageInfo(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane) error UpdateCoreDNS(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane) error RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error - RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error - RemoveNodeFromKubeadmConfigMap(ctx context.Context, nodeName string) error + RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error + RemoveNodeFromKubeadmConfigMap(ctx context.Context, nodeName string, version semver.Version) error ForwardEtcdLeadership(ctx context.Context, machine *clusterv1.Machine, leaderCandidate *clusterv1.Machine) error AllowBootstrapTokensToGetNodes(ctx context.Context) error // State recovery tasks. - ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error) + ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) } // Workload defines operations on workload clusters. @@ -296,17 +296,28 @@ func (w *Workload) UpdateSchedulerInKubeadmConfigMap(ctx context.Context, schedu } // RemoveMachineFromKubeadmConfigMap removes the entry for the machine from the kubeadm configmap. -func (w *Workload) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error { +func (w *Workload) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error { if machine == nil || machine.Status.NodeRef == nil { // Nothing to do, no node for Machine return nil } - return w.RemoveNodeFromKubeadmConfigMap(ctx, machine.Status.NodeRef.Name) + return w.RemoveNodeFromKubeadmConfigMap(ctx, machine.Status.NodeRef.Name, version) } +var ( + // Starting from v1.22.0 kubeadm dropped usage of the ClusterStatus entry from the kubeadm-config ConfigMap + // so it isn't necessary anymore to remove API endpoints for control plane nodes after deletion. + // NOTE: This assume kubeadm version equals to Kubernetes version. + minKubernetesVersionWithoutClusterStatus = semver.MustParse("1.22.0") +) + // RemoveNodeFromKubeadmConfigMap removes the entry for the node from the kubeadm configmap. -func (w *Workload) RemoveNodeFromKubeadmConfigMap(ctx context.Context, name string) error { +func (w *Workload) RemoveNodeFromKubeadmConfigMap(ctx context.Context, name string, version semver.Version) error { + if version.GTE(minKubernetesVersionWithoutClusterStatus) { + return nil + } + return util.Retry(func() (bool, error) { configMapKey := ctrlclient.ObjectKey{Name: kubeadmConfigKey, Namespace: metav1.NamespaceSystem} kubeadmConfigMap, err := w.getConfigMap(ctx, configMapKey) diff --git a/controlplane/kubeadm/internal/workload_cluster_etcd.go b/controlplane/kubeadm/internal/workload_cluster_etcd.go index b7c57ea548a9..89b939890bab 100644 --- a/controlplane/kubeadm/internal/workload_cluster_etcd.go +++ b/controlplane/kubeadm/internal/workload_cluster_etcd.go @@ -19,6 +19,7 @@ package internal import ( "context" + "github.com/blang/semver" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kerrors "k8s.io/apimachinery/pkg/util/errors" @@ -35,7 +36,7 @@ type etcdClientFor interface { // ReconcileEtcdMembers iterates over all etcd members and finds members that do not have corresponding nodes. // If there are any such members, it deletes them from etcd and removes their nodes from the kubeadm configmap so that kubeadm does not run etcd health checks on them. -func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error) { +func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) { removedMembers := []string{} errs := []error{} for _, nodeName := range nodeNames { @@ -73,7 +74,7 @@ func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string) errs = append(errs, err) } - if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name); err != nil { + if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name, version); err != nil { errs = append(errs, err) } } diff --git a/controlplane/kubeadm/internal/workload_cluster_etcd_test.go b/controlplane/kubeadm/internal/workload_cluster_etcd_test.go index 952e94113890..4600edbb3de3 100644 --- a/controlplane/kubeadm/internal/workload_cluster_etcd_test.go +++ b/controlplane/kubeadm/internal/workload_cluster_etcd_test.go @@ -21,6 +21,7 @@ import ( "errors" "testing" + "github.com/blang/semver" . "github.com/onsi/gomega" "go.etcd.io/etcd/clientv3" @@ -466,21 +467,24 @@ func TestReconcileEtcdMembers(t *testing.T) { Namespace: metav1.NamespaceSystem, }, Data: map[string]string{ - clusterStatusKey: `apiEndpoints: - ip-10-0-0-1.ec2.internal: - advertiseAddress: 10.0.0.1 - bindPort: 6443 - ip-10-0-0-2.ec2.internal: - advertiseAddress: 10.0.0.2 - bindPort: 6443 - someFieldThatIsAddedInTheFuture: bar - ip-10-0-0-3.ec2.internal: - advertiseAddress: 10.0.0.3 - bindPort: 6443 -apiVersion: kubeadm.k8s.io/vNbetaM -kind: ClusterStatus`, + clusterStatusKey: "apiEndpoints:\n" + + " ip-10-0-0-1.ec2.internal:\n" + + " advertiseAddress: 10.0.0.1\n" + + " bindPort: 6443\n" + + " ip-10-0-0-2.ec2.internal:\n" + + " advertiseAddress: 10.0.0.2\n" + + " bindPort: 6443\n" + + " someFieldThatIsAddedInTheFuture: bar\n" + + " ip-10-0-0-3.ec2.internal:\n" + + " advertiseAddress: 10.0.0.3\n" + + " bindPort: 6443\n" + + "apiVersion: kubeadm.k8s.io/vNbetaM\n" + + "kind: ClusterStatus\n", }, } + kubeadmConfigWithoutClusterStatus := kubeadmConfig.DeepCopy() + delete(kubeadmConfigWithoutClusterStatus.Data, clusterStatusKey) + node1 := &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "ip-10-0-0-1.ec2.internal", @@ -508,25 +512,62 @@ kind: ClusterStatus`, tests := []struct { name string + kubernetesVersion semver.Version objs []runtime.Object nodes []string etcdClientGenerator etcdClientFor expectErr bool - assert func(*WithT) + assert func(*WithT, client.Client) }{ { // the node to be removed is ip-10-0-0-3.ec2.internal since the // other two have nodes - name: "successfully removes the etcd member without a node and removes the node from kubeadm config", - objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfig.DeepCopy()}, - nodes: []string{node1.Name, node2.Name}, + name: "successfully removes the etcd member without a node and removes the node from kubeadm config for Kubernetes version < 1.22.0", + kubernetesVersion: kubernetesVersionWithClusterStatus, // Kubernetes version < 1.22.0 has ClusterStatus + objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfig.DeepCopy()}, + nodes: []string{node1.Name, node2.Name}, + etcdClientGenerator: &fakeEtcdClientGenerator{ + forNodesClient: &etcd.Client{ + EtcdClient: fakeEtcdClient, + }, + }, + expectErr: false, + assert: func(g *WithT, c client.Client) { + g.Expect(fakeEtcdClient.RemovedMember).To(Equal(uint64(3))) + + var actualConfig corev1.ConfigMap + g.Expect(c.Get( + ctx, + ctrlclient.ObjectKey{Name: kubeadmConfigKey, Namespace: metav1.NamespaceSystem}, + &actualConfig, + )).To(Succeed()) + g.Expect(actualConfig.Data[clusterStatusKey]).To(Equal("apiEndpoints:\n" + + " ip-10-0-0-1.ec2.internal:\n" + + " advertiseAddress: 10.0.0.1\n" + + " bindPort: 6443\n" + + " ip-10-0-0-2.ec2.internal:\n" + + " advertiseAddress: 10.0.0.2\n" + + " bindPort: 6443\n" + + " someFieldThatIsAddedInTheFuture: bar\n" + + "apiVersion: kubeadm.k8s.io/vNbetaM\n" + + "kind: ClusterStatus\n")) + + }, + }, + { + // the node to be removed is ip-10-0-0-3.ec2.internal since the + // other two have nodes + name: "successfully removes the etcd member without a node for Kubernetes version >= 1.22.0", + kubernetesVersion: minKubernetesVersionWithoutClusterStatus, // Kubernetes version >= 1.22.0 should not manage ClusterStatus + objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfigWithoutClusterStatus.DeepCopy()}, + nodes: []string{node1.Name, node2.Name}, etcdClientGenerator: &fakeEtcdClientGenerator{ forNodesClient: &etcd.Client{ EtcdClient: fakeEtcdClient, }, }, expectErr: false, - assert: func(g *WithT) { + assert: func(g *WithT, c client.Client) { g.Expect(fakeEtcdClient.RemovedMember).To(Equal(uint64(3))) }, }, @@ -559,7 +600,7 @@ kind: ClusterStatus`, etcdClientGenerator: tt.etcdClientGenerator, } ctx := context.TODO() - _, err := w.ReconcileEtcdMembers(ctx, tt.nodes) + _, err := w.ReconcileEtcdMembers(ctx, tt.nodes, tt.kubernetesVersion) if tt.expectErr { g.Expect(err).To(HaveOccurred()) return @@ -567,7 +608,7 @@ kind: ClusterStatus`, g.Expect(err).ToNot(HaveOccurred()) if tt.assert != nil { - tt.assert(g) + tt.assert(g, testEnv.Client) } }) } diff --git a/controlplane/kubeadm/internal/workload_cluster_test.go b/controlplane/kubeadm/internal/workload_cluster_test.go index aad05bbe8677..548d87b82413 100644 --- a/controlplane/kubeadm/internal/workload_cluster_test.go +++ b/controlplane/kubeadm/internal/workload_cluster_test.go @@ -171,6 +171,12 @@ func TestUpdateKubeProxyImageInfo(t *testing.T) { } } +var kubernetesVersionWithClusterStatus = semver.Version{ + Major: minKubernetesVersionWithoutClusterStatus.Major, + Minor: minKubernetesVersionWithoutClusterStatus.Minor - 1, + Patch: minKubernetesVersionWithoutClusterStatus.Patch, +} + func TestRemoveMachineFromKubeadmConfigMap(t *testing.T) { machine := &clusterv1.Machine{ Status: clusterv1.MachineStatus{ @@ -185,29 +191,30 @@ func TestRemoveMachineFromKubeadmConfigMap(t *testing.T) { Namespace: metav1.NamespaceSystem, }, Data: map[string]string{ - clusterStatusKey: `apiEndpoints: - ip-10-0-0-1.ec2.internal: - advertiseAddress: 10.0.0.1 - bindPort: 6443 - ip-10-0-0-2.ec2.internal: - advertiseAddress: 10.0.0.2 - bindPort: 6443 - someFieldThatIsAddedInTheFuture: bar -apiVersion: kubeadm.k8s.io/vNbetaM -kind: ClusterStatus`, + clusterStatusKey: "apiEndpoints:\n" + + " ip-10-0-0-1.ec2.internal:\n" + + " advertiseAddress: 10.0.0.1\n" + + " bindPort: 6443\n" + + " ip-10-0-0-2.ec2.internal:\n" + + " advertiseAddress: 10.0.0.2\n" + + " bindPort: 6443\n" + + " someFieldThatIsAddedInTheFuture: bar\n" + + "apiVersion: kubeadm.k8s.io/vNbetaM\n" + + "kind: ClusterStatus\n", }, BinaryData: map[string][]byte{ "": nil, }, } - kconfWithoutKey := kubeadmConfig.DeepCopy() - delete(kconfWithoutKey.Data, clusterStatusKey) + kubeadmConfigWithoutClusterStatus := kubeadmConfig.DeepCopy() + delete(kubeadmConfigWithoutClusterStatus.Data, clusterStatusKey) g := NewWithT(t) scheme := runtime.NewScheme() g.Expect(corev1.AddToScheme(scheme)).To(Succeed()) tests := []struct { name string + kubernetesVersion semver.Version machine *clusterv1.Machine objs []runtime.Object expectErr bool @@ -227,29 +234,38 @@ kind: ClusterStatus`, expectErr: false, }, { - name: "returns error if unable to find kubeadm-config", - machine: machine, - expectErr: true, + name: "returns error if unable to find kubeadm-config for Kubernetes version < 1.22.0", + kubernetesVersion: kubernetesVersionWithClusterStatus, // Kubernetes version < 1.22.0 has ClusterStatus + machine: machine, + expectErr: true, }, { - name: "returns error if unable to remove api endpoint", - machine: machine, - objs: []runtime.Object{kconfWithoutKey}, - expectErr: true, + name: "returns error if unable to remove api endpoint for Kubernetes version < 1.22.0", + kubernetesVersion: kubernetesVersionWithClusterStatus, // Kubernetes version < 1.22.0 has ClusterStatus + machine: machine, + objs: []runtime.Object{kubeadmConfigWithoutClusterStatus}, + expectErr: true, }, { - name: "removes the machine node ref from kubeadm config", - machine: machine, - objs: []runtime.Object{kubeadmConfig}, - expectErr: false, - expectedEndpoints: `apiEndpoints: - ip-10-0-0-2.ec2.internal: - advertiseAddress: 10.0.0.2 - bindPort: 6443 - someFieldThatIsAddedInTheFuture: bar -apiVersion: kubeadm.k8s.io/vNbetaM -kind: ClusterStatus -`, + name: "removes the machine node ref from kubeadm config for Kubernetes version < 1.22.0", + kubernetesVersion: kubernetesVersionWithClusterStatus, // Kubernetes version < 1.22.0 has ClusterStatus + machine: machine, + objs: []runtime.Object{kubeadmConfig}, + expectErr: false, + expectedEndpoints: "apiEndpoints:\n" + + " ip-10-0-0-2.ec2.internal:\n" + + " advertiseAddress: 10.0.0.2\n" + + " bindPort: 6443\n" + + " someFieldThatIsAddedInTheFuture: bar\n" + + "apiVersion: kubeadm.k8s.io/vNbetaM\n" + + "kind: ClusterStatus\n", + }, + { + name: "no op for Kubernetes version >= 1.22.0", + kubernetesVersion: minKubernetesVersionWithoutClusterStatus, // Kubernetes version >= 1.22.0 should not manage ClusterStatus + machine: machine, + objs: []runtime.Object{kubeadmConfigWithoutClusterStatus}, + expectErr: false, }, } @@ -261,7 +277,7 @@ kind: ClusterStatus Client: fakeClient, } ctx := context.TODO() - err := w.RemoveMachineFromKubeadmConfigMap(ctx, tt.machine) + err := w.RemoveMachineFromKubeadmConfigMap(ctx, tt.machine, tt.kubernetesVersion) if tt.expectErr { g.Expect(err).To(HaveOccurred()) return