Skip to content

Commit

Permalink
Merge pull request #11394 from sbueringer/pr-cluster-topology-conditions
Browse files Browse the repository at this point in the history
🌱 Implement Cluster TopologyReconciled v1beta2 condition
  • Loading branch information
k8s-ci-robot authored Nov 11, 2024
2 parents e13abea + 1ea263b commit e22e2ca
Show file tree
Hide file tree
Showing 7 changed files with 525 additions and 92 deletions.
53 changes: 53 additions & 0 deletions api/v1beta1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,59 @@ const (
// ClusterTopologyReconciledV1Beta2Condition is true if the topology controller is working properly.
// Note: This condition is added only if the Cluster is referencing a ClusterClass / defining a managed Topology.
ClusterTopologyReconciledV1Beta2Condition = "TopologyReconciled"

// ClusterTopologyReconcileSucceededV1Beta2Reason documents the reconciliation of a Cluster topology succeeded.
ClusterTopologyReconcileSucceededV1Beta2Reason = "TopologyReconcileSucceeded"

// ClusterTopologyReconciledFailedV1Beta2Reason documents the reconciliation of a Cluster topology
// failing due to an error.
ClusterTopologyReconciledFailedV1Beta2Reason = "TopologyReconcileFailed"

// ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because Control Plane is not yet updated to match the desired topology spec.
ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason = "ControlPlaneUpgradePending"

// ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachineDeployments is yet to be created.
// This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable.
ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason = "MachineDeploymentsCreatePending"

// ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec.
ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason = "MachineDeploymentsUpgradePending"

// ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because the upgrade for at least one of the MachineDeployments has been deferred.
ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason = "MachineDeploymentsUpgradeDeferred"

// ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec.
ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason = "MachinePoolsUpgradePending"

// ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is yet to be created.
// This generally happens because new MachinePool creations are held off while the ControlPlane is not stable.
ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason = "MachinePoolsCreatePending"

// ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because the upgrade for at least one of the MachinePools has been deferred.
ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason = "MachinePoolsUpgradeDeferred"

// ClusterTopologyReconciledHookBlockingV1Beta2Reason documents reconciliation of a Cluster topology
// not yet completed because at least one of the lifecycle hooks is blocking.
ClusterTopologyReconciledHookBlockingV1Beta2Reason = "LifecycleHookBlocking"

// ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason documents reconciliation of a Cluster topology not
// yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue
// with the ClusterClass surfaced in the ClusterClass status or controller logs.
ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason = "ClusterClassNotReconciled"

// ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason surfaces when the Cluster is deleting because the
// DeletionTimestamp is set.
ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason

// ClusterTopologyReconcilePausedV1Beta2Reason surfaces when the Cluster is paused.
ClusterTopologyReconcilePausedV1Beta2Reason = PausedV1Beta2Reason
)

// Cluster's InfrastructureReady condition and corresponding reasons that will be used in v1Beta2 API version.
Expand Down
3 changes: 3 additions & 0 deletions api/v1beta1/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,9 @@ const (
// yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue
// with the ClusterClass surfaced in the ClusterClass status or controller logs.
TopologyReconciledClusterClassNotReconciledReason = "ClusterClassNotReconciled"

// TopologyReconciledPausedReason (Severity=Info) surfaces when the Cluster is paused.
TopologyReconciledPausedReason = "Paused"
)

// Conditions and condition reasons for ClusterClass.
Expand Down
6 changes: 6 additions & 0 deletions internal/controllers/cluster/cluster_controller_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,12 @@ func (c clusterConditionCustomMergeStrategy) Merge(conditions []v1beta2condition
return v1beta2conditions.InfoMergePriority
}
}

// Treat all reasons except TopologyReconcileFailed and ClusterClassNotReconciled of TopologyReconciled condition as info.
if condition.Type == clusterv1.ClusterTopologyReconciledV1Beta2Condition && condition.Status == metav1.ConditionFalse &&
condition.Reason != clusterv1.ClusterTopologyReconciledFailedV1Beta2Reason && condition.Reason != clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason {
return v1beta2conditions.InfoMergePriority
}
return v1beta2conditions.GetDefaultMergePriorityFunc(c.negativePolarityConditionTypes)(condition)
}).Merge(conditions, conditionTypes)
}
Expand Down
225 changes: 225 additions & 0 deletions internal/controllers/cluster/cluster_controller_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1949,6 +1949,231 @@ func TestSetAvailableCondition(t *testing.T) {
Reason: v1beta2conditions.MultipleInfoReportedReason,
},
},
{
name: "Surfaces message from TopologyReconciled for reason that doesn't affect availability (no other issues)",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "machine-test",
Namespace: metav1.NamespaceDefault,
},
Spec: clusterv1.ClusterSpec{
Topology: &clusterv1.Topology{}, // using CC
},
Status: clusterv1.ClusterStatus{
V1Beta2: &clusterv1.ClusterV1Beta2Status{
Conditions: []metav1.Condition{
{
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterDeletingV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "Foo",
},
{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason,
Message: "Control plane rollout and upgrade to version v1.29.0 on hold.",
},
},
},
},
},
expectCondition: metav1.Condition{
Type: clusterv1.ClusterAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: v1beta2conditions.MultipleInfoReportedReason,
Message: "TopologyReconciled: Control plane rollout and upgrade to version v1.29.0 on hold.",
},
},
{
name: "Drops messages from TopologyReconciled for reason that doesn't affect availability (when there is another issue)",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "machine-test",
Namespace: metav1.NamespaceDefault,
},
Spec: clusterv1.ClusterSpec{
Topology: &clusterv1.Topology{}, // using CC
},
Status: clusterv1.ClusterStatus{
V1Beta2: &clusterv1.ClusterV1Beta2Status{
Conditions: []metav1.Condition{
{
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason,
Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
},
{
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterDeletingV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "Foo",
},
{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason,
Message: "Control plane rollout and upgrade to version v1.29.0 on hold.",
},
},
},
},
},
expectCondition: metav1.Condition{
Type: clusterv1.ClusterAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason, // Note: There is only one condition that is an issue, but it has the MultipleIssuesReported reason.
Message: "WorkersAvailable: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
},
},
{
name: "Takes into account messages from TopologyReconciled for reason that affects availability (no other issues)",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "machine-test",
Namespace: metav1.NamespaceDefault,
},
Spec: clusterv1.ClusterSpec{
Topology: &clusterv1.Topology{}, // using CC
},
Status: clusterv1.ClusterStatus{
V1Beta2: &clusterv1.ClusterV1Beta2Status{
Conditions: []metav1.Condition{
{
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterDeletingV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "Foo",
},
{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
},
},
},
},
},
expectCondition: metav1.Condition{
Type: clusterv1.ClusterAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
Message: "TopologyReconciled: ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
},
},
{
name: "Takes into account messages from TopologyReconciled for reason that affects availability (when there is another issue)",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "machine-test",
Namespace: metav1.NamespaceDefault,
},
Spec: clusterv1.ClusterSpec{
Topology: &clusterv1.Topology{}, // using CC
},
Status: clusterv1.ClusterStatus{
V1Beta2: &clusterv1.ClusterV1Beta2Status{
Conditions: []metav1.Condition{
{
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason,
Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
},
{
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: "Foo",
},
{
Type: clusterv1.ClusterDeletingV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "Foo",
},
{
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
},
},
},
},
},
expectCondition: metav1.Condition{
Type: clusterv1.ClusterAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason,
Message: "WorkersAvailable: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1; TopologyReconciled: ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if.status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
},
},
}

for _, tc := range testCases {
Expand Down
18 changes: 9 additions & 9 deletions internal/controllers/topology/cluster/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), predicateLog)),
).
WithOptions(options).
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)).
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)).
Build(r)

if err != nil {
Expand Down Expand Up @@ -175,13 +175,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
return ctrl.Result{}, nil
}

// Return early if the Cluster is paused.
// TODO: What should we do if the cluster class is paused?
if annotations.IsPaused(cluster, cluster) {
log.Info("Reconciliation is paused for this object")
return ctrl.Result{}, nil
}

patchHelper, err := patch.NewHelper(cluster, r.Client)
if err != nil {
return ctrl.Result{}, err
Expand All @@ -200,14 +193,21 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
clusterv1.TopologyReconciledCondition,
}},
patch.WithForceOverwriteConditions{},
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
clusterv1.ClusterTopologyReconciledV1Beta2Condition,
}},
}
if err := patchHelper.Patch(ctx, cluster, options...); err != nil {
reterr = kerrors.NewAggregate([]error{reterr, err})
return
}
}()

// Return early if the Cluster is paused.
if cluster.Spec.Paused || annotations.HasPaused(cluster) {
return ctrl.Result{}, nil
}

// In case the object is deleted, the managed topology stops to reconcile;
// (the other controllers will take care of deletion).
if !cluster.ObjectMeta.DeletionTimestamp.IsZero() {
Expand Down
Loading

0 comments on commit e22e2ca

Please sign in to comment.