Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CIAINFRA-282: Cluster V1 CR: add observedGeneration and OperatorQuiescent #201

Merged
merged 2 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/go/k8s/api/vectorized/v1alpha1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,9 @@ type ClusterStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file

// If set, this represents the .metadata.generation that was observed by the controller.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
// Replicas show how many nodes have been created for the cluster
// +optional
Replicas int32 `json:"replicas"`
Expand Down Expand Up @@ -436,13 +439,15 @@ type ClusterCondition struct {
}

// ClusterConditionType is a valid value for ClusterCondition.Type
// +kubebuilder:validation:Enum=ClusterConfigured
// +kubebuilder:validation:Enum=ClusterConfigured;OperatorQuiescent
type ClusterConditionType string

// These are valid conditions of the cluster.
const (
// ClusterConfiguredConditionType indicates whether the Redpanda cluster configuration is in sync with the desired one
ClusterConfiguredConditionType ClusterConditionType = "ClusterConfigured"
// OperatorQuiescentConditionType indicates that the operator has no outstanding work to do, based on the observedGeneration.
OperatorQuiescentConditionType ClusterConditionType = "OperatorQuiescent"
chrisseto marked this conversation as resolved.
Show resolved Hide resolved
)

// GetCondition return the condition of the given type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1361,6 +1361,7 @@ spec:
description: Type is the type of the condition
enum:
- ClusterConfigured
- OperatorQuiescent
type: string
required:
- status
Expand Down Expand Up @@ -1496,6 +1497,11 @@ spec:
type: string
type: object
type: object
observedGeneration:
description: If set, this represents the .metadata.generation that
was observed by the controller.
format: int64
type: integer
readyReplicas:
description: ReadyReplicas is the number of Pods belonging to the
cluster that have a Ready Condition.
Expand Down
70 changes: 69 additions & 1 deletion src/go/k8s/internal/controller/redpanda/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ type ClusterReconciler struct {
//nolint:funlen,gocyclo // todo break down
func (r *ClusterReconciler) Reconcile(
c context.Context, req ctrl.Request,
) (ctrl.Result, error) {
) (result ctrl.Result, err error) {
ctx, done := context.WithCancel(c)
defer done()
log := ctrl.LoggerFrom(ctx).WithName("ClusterReconciler.Reconcile")
Expand All @@ -133,6 +133,32 @@ func (r *ClusterReconciler) Reconcile(
return ctrl.Result{}, fmt.Errorf("unable to retrieve Cluster resource: %w", err)
}

// After every reconciliation, update status:
// - Set observedGeneration. The reconciler finished, every action
// performed in this run - including updating status - has been finished, and has
// observed this generation.
// - Set OperatorQuiescent condition, based on our best knowledge if there is
// any outstanding work to do for the controller.
defer func() {
_, patchErr := patchStatus(ctx, r.Client, &vectorizedCluster, func(cluster *vectorizedv1alpha1.Cluster) {
// Set quiescent
cond := getQuiescentCondition(cluster)

flipped := cluster.Status.SetCondition(cond.Type, cond.Status, cond.Reason, cond.Message)
if flipped {
log.Info("Changing OperatorQuiescent condition after reconciliation", "status", cond.Status, "reason", cond.Reason, "message", cond.Message)
}

// Only set observedGeneration if there's no error.
if err == nil {
cluster.Status.ObservedGeneration = vectorizedCluster.Generation
}
})
if patchErr != nil {
log.Error(patchErr, "failed to patchStatus with observedGeneration and quiescent")
}
}()

// Previous usage of finalizer handlers was unreliable in the case of
// flipping Kubernetes Nodes ready status. Local SSD disks that could be
// attached to Redpanda Pod prevents rescheduling as the Persistent Volume
Expand Down Expand Up @@ -1103,3 +1129,45 @@ func isRedpandaClusterVersionManaged(
}
return true
}

func patchStatus(ctx context.Context, c client.Client, observedCluster *vectorizedv1alpha1.Cluster, mutator func(cluster *vectorizedv1alpha1.Cluster)) (vectorizedv1alpha1.ClusterStatus, error) {
clusterPatch := client.MergeFrom(observedCluster.DeepCopy())
mutator(observedCluster)

if err := c.Status().Patch(ctx, observedCluster, clusterPatch); err != nil {
return vectorizedv1alpha1.ClusterStatus{}, fmt.Errorf("failed to update cluster status: %w", err)
}

return observedCluster.Status, nil
}

func getQuiescentCondition(redpandaCluster *vectorizedv1alpha1.Cluster) vectorizedv1alpha1.ClusterCondition {
condition := vectorizedv1alpha1.ClusterCondition{
Type: vectorizedv1alpha1.OperatorQuiescentConditionType,
}

if redpandaCluster.Status.Restarting {
condition.Status = corev1.ConditionFalse
condition.Reason = "Restarting"
condition.Message = "Cluster is restarting"
return condition
}

if redpandaCluster.Status.DecommissioningNode != nil {
condition.Status = corev1.ConditionFalse
condition.Reason = "DecommissioningInProgress"
condition.Message = fmt.Sprintf("Decommissioning of node_id=%d in progress", redpandaCluster.Status.DecommissioningNode)
return condition
}

if redpandaCluster.Spec.Version != redpandaCluster.Status.Version && redpandaCluster.Status.Version != "" {
condition.Status = corev1.ConditionFalse
condition.Reason = "UpgradeInProgress"
condition.Message = fmt.Sprintf("Upgrade from %s to %s in progress", redpandaCluster.Spec.Version, redpandaCluster.Status.Version)
return condition
}

// No reason found (no early return), so claim the controller is quiescent.
condition.Status = corev1.ConditionTrue
return condition
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@ status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/decomm-on-delete --timeout 300s --namespace $NAMESPACE
---
apiVersion: v1
kind: Pod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/decomm-on-delete --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@ status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/decomm-on-delete --timeout 300s --namespace $NAMESPACE
---
apiVersion: v1
kind: Pod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/decomm-on-delete --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@ status:
replicas: 3
currentReplicas: 3
readyReplicas: 3
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/decomm-on-delete --timeout 300s --namespace $NAMESPACE
---
apiVersion: v1
kind: Pod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ status:
replicas: 1
upgrading: false
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/cluster-additional-cmdline-arguments --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ metadata:
status:
replicas: 2
restarting: false
conditions:
- type: ClusterConfigured
status: "False"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=False cluster/centralized-configuration-bootstrap --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ metadata:
status:
replicas: 2
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/centralized-configuration-bootstrap --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ metadata:
status:
replicas: 2
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/centralized-configuration-drift --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ metadata:
name: centralized-configuration-drift
status:
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/centralized-configuration-drift --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ metadata:
name: centralized-configuration-drift
status:
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/centralized-configuration-drift --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ metadata:
status:
replicas: 2
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/centralized-configuration-tls --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ metadata:
status:
replicas: 2
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/centralized-configuration-tls --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ metadata:
status:
replicas: 2
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/centralized-configuration-tls --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ metadata:
status:
replicas: 2
restarting: false
conditions:
- type: ClusterConfigured
status: "True"
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- timeout: 300
script: |
kubectl wait --for=condition=ClusterConfigured=True cluster/centralized-configuration-tls --timeout 300s --namespace $NAMESPACE
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
Expand Down
Loading