diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index 5a5480801e3f..da51e4da5458 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -527,13 +527,10 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl // Determine control plane availability looking at machines conditions, which at this stage are // already surfacing status from etcd member and all control plane pods hosted on every machine. - // Note: we intentionally use the number of etcd members to determine the etcd quorum because - // etcd members might not match with machines, e.g. while provisioning a new machine. - etcdQuorum := (len(etcdMembers) / 2.0) + 1 k8sControlPlaneHealthy := 0 k8sControlPlaneNotHealthy := 0 - etcdMembersHealthy := 0 - etcdMembersNotHealthy := 0 + k8sControlPlaneNotHealthyButNotReportedYet := 0 + for _, machine := range machines { // if external etcd, only look at the status of the K8s control plane components on this machine. if !etcdIsManaged { @@ -546,6 +543,8 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition) { k8sControlPlaneNotHealthy++ + } else { + k8sControlPlaneNotHealthyButNotReportedYet++ } continue } @@ -556,14 +555,6 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl // - API server on one machine only connect to the local etcd member // - ControllerManager and scheduler on a machine connect to the local API server (not to the control plane endpoint) // As a consequence, we consider the K8s control plane on this machine healthy only if everything is healthy. - - if v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition) { - etcdMembersHealthy++ - } else if shouldSurfaceWhenAvailableTrue(machine, - controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition) { - etcdMembersNotHealthy++ - } - if v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition) && v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition) && v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition) && @@ -577,9 +568,83 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition) { k8sControlPlaneNotHealthy++ + } else { + k8sControlPlaneNotHealthyButNotReportedYet++ } } + // Determine etcd members availability by using etcd members as a source of truth because + // etcd members might not match with machines, e.g. while provisioning a new machine. + // Also in this case, we leverage info on machines to determine member health. + votingEtcdMembers := 0 + learnerEtcdMembers := 0 + etcdMembersHealthy := 0 + etcdMembersNotHealthy := 0 + etcdMembersNotHealthyButNotReportedYet := 0 + + if etcdIsManaged { + // Maps machines to members + memberToMachineMap := map[string]*clusterv1.Machine{} + provisioningMachines := []*clusterv1.Machine{} + for _, machine := range machines { + if machine.Status.NodeRef == nil { + provisioningMachines = append(provisioningMachines, machine) + continue + } + for _, member := range etcdMembers { + if machine.Status.NodeRef.Name == member.Name { + memberToMachineMap[member.Name] = machine + break + } + } + } + + for _, etcdMember := range etcdMembers { + // Note. We consider etcd without a name yet as learners, because this prevents them to impact quorum (this is + // a temporary state that usually goes away very quickly). + if etcdMember.IsLearner || etcdMember.Name == "" { + learnerEtcdMembers++ + } else { + votingEtcdMembers++ + } + + // In case the etcd member does not have yet a name it is not possible to find a corresponding machine, + // but we consider the node being healthy because this is a transient state that usually goes away quickly. + if etcdMember.Name == "" { + etcdMembersHealthy++ + continue + } + + // Look for the corresponding machine. + machine := memberToMachineMap[etcdMember.Name] + if machine == nil { + // If there is only one provisioning machine (a machine yet without the node name), considering that KCP + // only creates one machine at time, we can make the assumption this is the machine hosting the etcd member without a match + if len(provisioningMachines) == 1 { + machine = provisioningMachines[0] + provisioningMachines = nil + } else { + // In case we cannot match an etcd member with a machine, we consider this an issue (it should + // never happen with KCP). + etcdMembersNotHealthy++ + continue + } + } + + // Otherwise read the status of the etcd member from he EtcdMemberHealthy condition. + if v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition) { + etcdMembersHealthy++ + } else if shouldSurfaceWhenAvailableTrue(machine, + controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition) { + etcdMembersNotHealthy++ + } else { + etcdMembersNotHealthyButNotReportedYet++ + } + } + } + etcdQuorum := (votingEtcdMembers / 2.0) + 1 + + // If the control plane and etcd (if managed are available), set the condition to true taking care of surfacing partial unavailability if any. if kcp.DeletionTimestamp.IsZero() && (!etcdIsManaged || etcdMembersHealthy >= etcdQuorum) && k8sControlPlaneHealthy >= 1 && @@ -587,20 +652,31 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl messages := []string{} if etcdIsManaged && etcdMembersNotHealthy > 0 { - switch len(etcdMembers) - etcdMembersNotHealthy { + etcdLearnersMsg := "" + if learnerEtcdMembers > 0 { + etcdLearnersMsg = fmt.Sprintf(" %d learner etcd member,", learnerEtcdMembers) + } + + // Note: When Available is true, we surface failures only after 10s they exist to avoid flakes; + // Accordingly for this message NotHealthyButNotReportedYet sums up to Healthy. + etcdMembersHealthyAndNotHealthyButNotReportedYet := etcdMembersHealthy + etcdMembersNotHealthyButNotReportedYet + switch etcdMembersHealthyAndNotHealthyButNotReportedYet { case 1: - messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy, at least %d required for etcd quorum", len(etcdMembers), etcdQuorum)) + messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy,%s at least %d healthy member required for etcd quorum", len(etcdMembers), etcdLearnersMsg, etcdQuorum)) default: - messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy, at least %d required for etcd quorum", len(etcdMembers)-etcdMembersNotHealthy, len(etcdMembers), etcdQuorum)) + messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy,%s at least %d healthy member required for etcd quorum", etcdMembersHealthyAndNotHealthyButNotReportedYet, len(etcdMembers), etcdLearnersMsg, etcdQuorum)) } } if k8sControlPlaneNotHealthy > 0 { - switch len(machines) - k8sControlPlaneNotHealthy { + // Note: When Available is true, we surface failures only after 10s they exist to avoid flakes; + // Accordingly for this message NotHealthyButNotReportedYet sums up to Healthy. + k8sControlPlaneHealthyAndNotHealthyButNotReportedYet := k8sControlPlaneHealthy + k8sControlPlaneNotHealthyButNotReportedYet + switch k8sControlPlaneHealthyAndNotHealthyButNotReportedYet { case 1: messages = append(messages, fmt.Sprintf("* 1 of %d Machines has healthy control plane components, at least 1 required", len(machines))) default: - messages = append(messages, fmt.Sprintf("* %d of %d Machines have healthy control plane components, at least 1 required", len(machines)-k8sControlPlaneNotHealthy, len(machines))) + messages = append(messages, fmt.Sprintf("* %d of %d Machines have healthy control plane components, at least 1 required", k8sControlPlaneHealthyAndNotHealthyButNotReportedYet, len(machines))) } } @@ -623,13 +699,17 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl } if etcdIsManaged && etcdMembersHealthy < etcdQuorum { + etcdLearnersMsg := "" + if learnerEtcdMembers > 0 { + etcdLearnersMsg = fmt.Sprintf(" %d learner etcd member,", learnerEtcdMembers) + } switch etcdMembersHealthy { case 0: - messages = append(messages, fmt.Sprintf("* There are no healthy etcd member, at least %d required for etcd quorum", etcdQuorum)) + messages = append(messages, fmt.Sprintf("* There are no healthy etcd member,%s at least %d healthy member required for etcd quorum", etcdLearnersMsg, etcdQuorum)) case 1: - messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy, at least %d required for etcd quorum", len(etcdMembers), etcdQuorum)) + messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy,%s at least %d healthy member required for etcd quorum", len(etcdMembers), etcdLearnersMsg, etcdQuorum)) default: - messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy, at least %d required for etcd quorum", etcdMembersHealthy, len(etcdMembers), etcdQuorum)) + messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy,%s at least %d healthy member required for etcd quorum", etcdMembersHealthy, len(etcdMembers), etcdLearnersMsg, etcdQuorum)) } } diff --git a/controlplane/kubeadm/internal/controllers/status_test.go b/controlplane/kubeadm/internal/controllers/status_test.go index 94f544179f56..289a2bbe8580 100644 --- a/controlplane/kubeadm/internal/controllers/status_test.go +++ b/controlplane/kubeadm/internal/controllers/status_test.go @@ -728,14 +728,24 @@ func Test_setAvailableCondition(t *testing.T) { etcdMemberHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, LastTransitionTime: metav1.Time{Time: reconcileTime}} etcdMemberNotHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, LastTransitionTime: metav1.Time{Time: reconcileTime}} + etcdMemberNotHealthy11s := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, LastTransitionTime: metav1.Time{Time: reconcileTime.Add(-11 * time.Second)}} + + apiServerPodHealthyUnknown := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, LastTransitionTime: metav1.Time{Time: reconcileTime}} + controllerManagerPodHealthyUnknown := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, LastTransitionTime: metav1.Time{Time: reconcileTime}} + schedulerPodHealthyUnknown := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, LastTransitionTime: metav1.Time{Time: reconcileTime}} + etcdPodHealthyUnknown := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, LastTransitionTime: metav1.Time{Time: reconcileTime}} + etcdMemberHealthyUnknown11s := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, LastTransitionTime: metav1.Time{Time: reconcileTime.Add(-11 * time.Second)}} tests := []struct { name string controlPlane *internal.ControlPlane expectCondition metav1.Condition }{ + + // Not initialized + { - name: "Kcp not yet initialized", + name: "KCP is not available, not yet initialized", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ @@ -756,8 +766,96 @@ func Test_setAvailableCondition(t *testing.T) { Message: "Control plane not yet initialized", }, }, + + // Available (all good) + { - name: "Failed to get etcd members right after being initialized", + name: "KCP is available (1 CP)", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + ), + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + }, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + }, + }, + { + name: "KCP is available (3 CP)", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + ), + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + }, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + }, + }, + + // With not healthy etcd members / other etcd failures + + { + name: "KCP is not available, failed to get etcd members right after being initialized", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ @@ -786,7 +884,7 @@ func Test_setAvailableCondition(t *testing.T) { }, }, { - name: "Failed to get etcd members, 2m after the cluster was initialized", + name: "KCP is not available, failed to get etcd members, 2m after the cluster was initialized", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ @@ -815,7 +913,7 @@ func Test_setAvailableCondition(t *testing.T) { }, }, { - name: "Etcd members do not agree on member list", + name: "KCP is not available, etcd members do not agree on member list", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ @@ -838,7 +936,7 @@ func Test_setAvailableCondition(t *testing.T) { }, }, { - name: "Etcd members do not agree on cluster ID", + name: "KCP is not available, etcd members do not agree on cluster ID", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ @@ -862,7 +960,7 @@ func Test_setAvailableCondition(t *testing.T) { }, }, { - name: "Etcd members and machines list do not match", + name: "KCP is not available, etcd members and machines list do not match", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ @@ -886,8 +984,9 @@ func Test_setAvailableCondition(t *testing.T) { Message: "The list of etcd members does not match the list of Machines and Nodes", }, }, + { - name: "KCP is available", + name: "KCP is available, one not healthy etcd member, but within quorum (not reported)", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Status: controlplanev1.KubeadmControlPlaneStatus{ @@ -898,9 +997,33 @@ func Test_setAvailableCondition(t *testing.T) { }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}, + }, + }, ), - EtcdMembers: []*etcd.Member{}, + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + }, EtcdMembersAgreeOnMemberList: true, EtcdMembersAgreeOnClusterID: true, EtcdMembersAndMachinesAreMatching: true, @@ -912,7 +1035,7 @@ func Test_setAvailableCondition(t *testing.T) { }, }, { - name: "KCP is available, some control plane failures to be reported", + name: "KCP is available, one not healthy etcd member, but within quorum (reported)", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Status: controlplanev1.KubeadmControlPlaneStatus{ @@ -923,24 +1046,97 @@ func Test_setAvailableCondition(t *testing.T) { }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy11s, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy11s}}, + }, + }, ), - EtcdMembers: []*etcd.Member{}, + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + }, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 2 of 3 etcd members are healthy, at least 2 healthy member required for etcd quorum\n" + + "* 2 of 3 Machines have healthy control plane components, at least 1 required", // Note, when an etcd member is not healthy, also the corresponding CP is considered not healthy. + }, + }, + { + name: "KCP is not available, Not enough healthy etcd members", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}, + }, + }, + ), + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + }, EtcdMembersAgreeOnMemberList: true, EtcdMembersAgreeOnClusterID: true, EtcdMembersAndMachinesAreMatching: true, }, expectCondition: metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, - Message: "* 2 of 3 Machines have healthy control plane components, at least 1 required", // two are not healthy, but one just flipped recently and 10s safeguard against flake did not expired yet + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "* 1 of 3 etcd members is healthy, at least 2 healthy member required for etcd quorum", }, }, { - name: "One not healthy etcd members, but within quorum", + name: "KCP is available, etcd members without name are considered healthy and not voting", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Status: controlplanev1.KubeadmControlPlaneStatus{ @@ -951,11 +1147,41 @@ func Test_setAvailableCondition(t *testing.T) { }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}}}, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy11s}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m4"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m4"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy11s}}, + }, + }, ), - EtcdMembers: []*etcd.Member{{}, {}, {}}, + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + {Name: "", IsLearner: false}, + }, EtcdMembersAgreeOnMemberList: true, EtcdMembersAgreeOnClusterID: true, EtcdMembersAndMachinesAreMatching: true, @@ -964,10 +1190,12 @@ func Test_setAvailableCondition(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 3 of 4 etcd members are healthy, 1 learner etcd member, at least 2 healthy member required for etcd quorum\n" + // m4 is considered learner, so we have 3 voting members, quorum 2 + "* 2 of 4 Machines have healthy control plane components, at least 1 required", }, }, { - name: "Two not healthy k8s control plane, but one working", + name: "KCP is available, etcd members without a machine are bound to provisioning machines (focus on binding)", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Status: controlplanev1.KubeadmControlPlaneStatus{ @@ -978,11 +1206,43 @@ func Test_setAvailableCondition(t *testing.T) { }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy11s}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m4"}, + Status: clusterv1.MachineStatus{ + NodeRef: nil, + // Note this is not a real use case, but it helps to validate that machine m4 is bound to an etcd member and counted as healthy. + // If instead we use unknown or false conditions, it would not be possible to understand if the best effort binding happened or the etcd member was considered unhealthy because without a machine match. + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, ), - EtcdMembers: []*etcd.Member{{}, {}, {}}, + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + {Name: "m4", IsLearner: false}, + }, EtcdMembersAgreeOnMemberList: true, EtcdMembersAgreeOnClusterID: true, EtcdMembersAndMachinesAreMatching: true, @@ -991,15 +1251,73 @@ func Test_setAvailableCondition(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 3 of 4 etcd members are healthy, at least 3 healthy member required for etcd quorum\n" + // member m4 is linked to machine m4 eve if it doesn't have a node yet + "* 3 of 4 Machines have healthy control plane components, at least 1 required", }, }, { - name: "KCP is deleting", + name: "KCP is available, etcd members without a machine are bound to provisioning machines", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ - ObjectMeta: metav1.ObjectMeta{ - DeletionTimestamp: ptr.To(metav1.Now()), + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m4"}, + Status: clusterv1.MachineStatus{ + NodeRef: nil, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthyUnknown, controllerManagerPodHealthyUnknown, schedulerPodHealthyUnknown, etcdPodHealthyUnknown, etcdMemberHealthyUnknown11s}}, + }, + }, + ), + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + {Name: "m4", IsLearner: false}, + }, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 3 of 4 etcd members are healthy, at least 3 healthy member required for etcd quorum\n" + // member m4 is linked to machine m4 eve if it doesn't have a node yet + "* 3 of 4 Machines have healthy control plane components, at least 1 required", + }, + }, + { + name: "KCP is available, members without a machine are considered not healthy", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ Status: controlplanev1.KubeadmControlPlaneStatus{ Initialized: true, V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ @@ -1008,48 +1326,108 @@ func Test_setAvailableCondition(t *testing.T) { }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3-does-not-exist"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, ), - EtcdMembers: []*etcd.Member{}, + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + }, EtcdMembersAgreeOnMemberList: true, EtcdMembersAgreeOnClusterID: true, EtcdMembersAndMachinesAreMatching: true, }, expectCondition: metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, - Message: "* Control plane metadata.deletionTimestamp is set", + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 2 of 3 etcd members are healthy, at least 2 healthy member required for etcd quorum", }, }, { - name: "Certificates are not available", + name: "KCP is available, learner etcd members are not considered for quorum", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Status: controlplanev1.KubeadmControlPlaneStatus{ Initialized: true, V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ - Conditions: []metav1.Condition{certificatesNotReady}, + Conditions: []metav1.Condition{certificatesReady}, }, }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy11s}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m4"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m4"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy11s}}, + }, + }, ), - EtcdMembers: []*etcd.Member{}, + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + {Name: "m4", IsLearner: true}, + }, EtcdMembersAgreeOnMemberList: true, EtcdMembersAgreeOnClusterID: true, EtcdMembersAndMachinesAreMatching: true, }, expectCondition: metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, - Message: "* Control plane certificates are not available", + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 2 of 4 etcd members are healthy, 1 learner etcd member, at least 2 healthy member required for etcd quorum\n" + // m4 is learner, so we have 3 voting members, quorum 2 + "* 2 of 4 Machines have healthy control plane components, at least 1 required", }, }, + + // With not healthy K8s control planes + { - name: "Not enough healthy etcd members", + name: "KCP is available, but with not healthy K8s control planes (one to be reported, one not yet)", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Status: controlplanev1.KubeadmControlPlaneStatus{ @@ -1060,24 +1438,46 @@ func Test_setAvailableCondition(t *testing.T) { }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}}}, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m2"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m3"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy11s, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, ), - EtcdMembers: []*etcd.Member{{}, {}, {}}, + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + {Name: "m2", IsLearner: false}, + {Name: "m3", IsLearner: false}, + }, EtcdMembersAgreeOnMemberList: true, EtcdMembersAgreeOnClusterID: true, EtcdMembersAndMachinesAreMatching: true, }, expectCondition: metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, - Message: "* 1 of 3 etcd members is healthy, at least 2 required for etcd quorum", + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 2 of 3 Machines have healthy control plane components, at least 1 required", // two are not healthy, but one just flipped recently and 10s safeguard against flake did not expired yet }, }, { - name: "Not enough healthy K8s control planes", + name: "KCP is not available, not enough healthy K8s control planes", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Status: controlplanev1.KubeadmControlPlaneStatus{ @@ -1104,8 +1504,11 @@ func Test_setAvailableCondition(t *testing.T) { Message: "* There are no Machines with healthy control plane components, at least 1 required", }, }, + + // With external etcd + { - name: "External etcd, at least one K8s control plane", + name: "KCP is available, but with not healthy K8s control planes (one to be reported, one not yet) (external etcd)", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ @@ -1125,7 +1528,7 @@ func Test_setAvailableCondition(t *testing.T) { Machines: collections.FromMachines( &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy11s, controllerManagerPodHealthy, schedulerPodHealthy}}}}, ), EtcdMembers: nil, EtcdMembersAgreeOnMemberList: false, @@ -1133,13 +1536,14 @@ func Test_setAvailableCondition(t *testing.T) { EtcdMembersAndMachinesAreMatching: false, }, expectCondition: metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 2 of 3 Machines have healthy control plane components, at least 1 required", // two are not healthy, but one just flipped recently and 10s safeguard against flake did not expired yet }, }, { - name: "External etcd, at least one K8s control plane, some control plane failures to be reported", + name: "KCP is not available, not enough healthy K8s control planes (external etcd)", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ @@ -1157,9 +1561,9 @@ func Test_setAvailableCondition(t *testing.T) { }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy11s, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, ), EtcdMembers: nil, EtcdMembersAgreeOnMemberList: false, @@ -1168,22 +1572,58 @@ func Test_setAvailableCondition(t *testing.T) { }, expectCondition: metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, - Message: "* 2 of 3 Machines have healthy control plane components, at least 1 required", // two are not healthy, but one just flipped recently and 10s safeguard against flake did not expired yet + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "* There are no Machines with healthy control plane components, at least 1 required", }, }, + + // With certificates not available + { - name: "External etcd, not enough healthy K8s control planes", + name: "Certificates are not available", controlPlane: &internal.ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{ - Spec: controlplanev1.KubeadmControlPlaneSpec{ - KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ - ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ - Etcd: bootstrapv1.Etcd{External: &bootstrapv1.ExternalEtcd{}}, - }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesNotReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, }, }, + ), + EtcdMembers: []*etcd.Member{ + {Name: "m1", IsLearner: false}, + }, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "* Control plane certificates are not available", + }, + }, + + // Deleted + + { + name: "KCP is deleting", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: ptr.To(metav1.Now()), + }, Status: controlplanev1.KubeadmControlPlaneStatus{ Initialized: true, V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ @@ -1192,20 +1632,24 @@ func Test_setAvailableCondition(t *testing.T) { }, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{Name: "m1"}, + V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}, + }, + }, ), - EtcdMembers: nil, - EtcdMembersAgreeOnMemberList: false, - EtcdMembersAgreeOnClusterID: false, - EtcdMembersAndMachinesAreMatching: false, + EtcdMembers: []*etcd.Member{{Name: "m1"}}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, }, expectCondition: metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, - Message: "* There are no Machines with healthy control plane components, at least 1 required", + Message: "* Control plane metadata.deletionTimestamp is set", }, }, }