Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add license status to cluster output #312

Merged
merged 8 commits into from
Nov 19, 2024
4 changes: 4 additions & 0 deletions operator/api/redpanda/v1alpha2/redpanda_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ const (
// ClusterConfigSynced is a condition indicating whether or not the
// redpanda cluster's configuration is up to date with the desired config.
ClusterConfigSynced = "ClusterConfigSynced"
// ClusterLicenseValid is a condition indicating whether or not the
// redpanda cluster has a valid license.
ClusterLicenseValid = "ClusterLicenseValid"
)

type ChartRef struct {
Expand Down Expand Up @@ -160,6 +163,7 @@ type HelmUpgrade struct {
// +kubebuilder:subresource:status
// +kubebuilder:resource:path=redpandas
// +kubebuilder:resource:shortName=rp
// +kubebuilder:printcolumn:name="License",type="string",JSONPath=`.status.conditions[?(@.type=="ClusterLicenseValid")].message`,description=""
// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type==\"Ready\")].status",description=""
// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[?(@.type==\"Ready\")].message",description=""
// +kubebuilder:storageversion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9804,6 +9804,9 @@ spec:
subresources:
status: {}
- additionalPrinterColumns:
- jsonPath: .status.conditions[?(@.type=="ClusterLicenseValid")].message
name: License
type: string
- jsonPath: .status.conditions[?(@.type=="Ready")].status
name: Ready
type: string
Expand Down
104 changes: 81 additions & 23 deletions operator/internal/controller/redpanda/redpanda_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/yaml"

"github.com/redpanda-data/common-go/rpadmin"
"github.com/redpanda-data/helm-charts/charts/redpanda"
"github.com/redpanda-data/helm-charts/pkg/gotohelm/helmette"
"github.com/redpanda-data/helm-charts/pkg/kube"
Expand Down Expand Up @@ -174,10 +175,10 @@ func (r *RedpandaReconciler) Reconcile(c context.Context, req ctrl.Request) (ctr

defer func() {
durationMsg := fmt.Sprintf("reconciliation finished in %s", time.Since(start).String())
log.Info(durationMsg)
log.V(logger.TraceLevel).Info(durationMsg)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should really just be relying on a time series metric for this anyhow 😓

}()

log.Info("Starting reconcile loop")
log.V(logger.TraceLevel).Info("Starting reconcile loop")

rp := &v1alpha2.Redpanda{}
if err := r.Client.Get(ctx, req.NamespacedName, rp); err != nil {
Expand All @@ -203,7 +204,7 @@ func (r *RedpandaReconciler) Reconcile(c context.Context, req ctrl.Request) (ctr

_, ok := rp.GetAnnotations()[resources.ManagedDecommissionAnnotation]
if ok {
log.Info("Managed decommission")
log.V(logger.TraceLevel).Info("Managed decommission")
return ctrl.Result{}, nil
}

Expand All @@ -230,6 +231,10 @@ func (r *RedpandaReconciler) Reconcile(c context.Context, req ctrl.Request) (ctr
return ctrl.Result{}, err
}

if err := r.reconcileLicense(ctx, rp); err != nil {
return ctrl.Result{}, err
}

if err := r.reconcileClusterConfig(ctx, rp); err != nil {
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -310,7 +315,7 @@ func (r *RedpandaReconciler) reconcileDefluxed(ctx context.Context, rp *v1alpha2
log := ctrl.LoggerFrom(ctx)

if ptr.Deref(rp.Spec.ChartRef.UseFlux, true) {
log.Info("useFlux is true; skipping non-flux reconciliation...")
log.V(logger.TraceLevel).Info("useFlux is true; skipping non-flux reconciliation...")
return nil
}

Expand Down Expand Up @@ -373,7 +378,7 @@ func (r *RedpandaReconciler) reconcileDefluxed(ctx context.Context, rp *v1alpha2
obj.SetAnnotations(annos)

if _, ok := annos["helm.sh/hook"]; ok {
log.Info(fmt.Sprintf("skipping helm hook %T: %q", obj, obj.GetName()))
log.V(logger.TraceLevel).Info(fmt.Sprintf("skipping helm hook %T: %q", obj, obj.GetName()))
continue
}

Expand All @@ -396,7 +401,7 @@ func (r *RedpandaReconciler) reconcileDefluxed(ctx context.Context, rp *v1alpha2
// to be a no-op.
// This check could likely be hoisted above the deployment loop as well.
if rp.Generation == rp.Status.ObservedGeneration && rp.Generation != 0 {
log.Info("observed generation is up to date. skipping garbage collection", "generation", rp.Generation, "observedGeneration", rp.Status.ObservedGeneration)
log.V(logger.TraceLevel).Info("observed generation is up to date. skipping garbage collection", "generation", rp.Generation, "observedGeneration", rp.Status.ObservedGeneration)
return nil
}

Expand All @@ -408,24 +413,13 @@ func (r *RedpandaReconciler) reconcileDefluxed(ctx context.Context, rp *v1alpha2
return nil
}

func (r *RedpandaReconciler) reconcileClusterConfig(ctx context.Context, rp *v1alpha2.Redpanda) error {
func (r *RedpandaReconciler) ratelimitCondition(ctx context.Context, rp *v1alpha2.Redpanda, conditionType string) bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add a comment here noting that this function require the Ready condition to be true.

shouldReconcileCondition might be a bit more descriptive and ergonomic?

log := ctrl.LoggerFrom(ctx)

if ptr.Deref(rp.Spec.ChartRef.UseFlux, true) {
apimeta.SetStatusCondition(rp.GetConditions(), metav1.Condition{
Type: v1alpha2.ClusterConfigSynced,
Status: metav1.ConditionUnknown,
ObservedGeneration: rp.Generation,
Reason: "HandledByFlux",
Message: "cluster configuration is not managed by the operator when Flux is enabled",
})
return nil
}

cond := apimeta.FindStatusCondition(rp.Status.Conditions, v1alpha2.ClusterConfigSynced)
cond := apimeta.FindStatusCondition(rp.Status.Conditions, conditionType)
if cond == nil {
cond = &metav1.Condition{
Type: v1alpha2.ClusterConfigSynced,
Type: conditionType,
Status: metav1.ConditionUnknown,
}
}
Expand All @@ -437,15 +431,15 @@ func (r *RedpandaReconciler) reconcileClusterConfig(ctx context.Context, rp *v1a
// NB: This controller re-queues fairly frequently as is (Watching STS
// which watches Pods), so we're largely relying on that to ensure we eventually run our rechecks.
if previouslySynced && !(generationChanged || recheck) {
return nil
return true
}

redpandaReady := !apimeta.IsStatusConditionTrue(rp.Status.Conditions, meta.ReadyCondition)

if !(rp.GenerationObserved() || redpandaReady) {
log.Info("redpanda not yet ready. skipping cluster config reconciliation.")
log.V(logger.TraceLevel).Info(fmt.Sprintf("redpanda not yet ready. skipping %s reconciliation.", conditionType))
apimeta.SetStatusCondition(rp.GetConditions(), metav1.Condition{
Type: v1alpha2.ClusterConfigSynced,
Type: conditionType,
Status: metav1.ConditionUnknown,
ObservedGeneration: rp.Generation,
Reason: "RedpandaNotReady",
Expand All @@ -454,6 +448,70 @@ func (r *RedpandaReconciler) reconcileClusterConfig(ctx context.Context, rp *v1a

// NB: Redpanda becoming ready and/or observing it's generation will
// trigger a re-queue for us.
return true
}

return false
}

func (r *RedpandaReconciler) reconcileLicense(ctx context.Context, rp *v1alpha2.Redpanda) error {
if r.ratelimitCondition(ctx, rp, v1alpha2.ClusterLicenseValid) {
return nil
}

client, err := r.ClientFactory.RedpandaAdminClient(ctx, rp)
if err != nil {
return err
}

features, err := client.GetEnterpriseFeatures(ctx)
if err != nil {
return err
}

var message string
var reason string
status := metav1.ConditionUnknown

switch features.LicenseStatus {
case rpadmin.LicenseStatusExpired:
status = metav1.ConditionFalse
reason = "LicenseExpired"
message = "Expired"
case rpadmin.LicenseStatusNotPresent:
status = metav1.ConditionFalse
reason = "LicenseNotPresent"
message = "Not Present"
case rpadmin.LicenseStatusValid:
status = metav1.ConditionTrue
reason = "LicenseValid"
message = "Valid"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for future proofing and better debuggability:

default:
    reason = "LicenseStatusUnknown"
    message = fmt.Sprintf("unknown license status: %q", features.LicenseStatus)

I've been bitten by missing defaults too many times.

}

apimeta.SetStatusCondition(rp.GetConditions(), metav1.Condition{
Type: v1alpha2.ClusterLicenseValid,
Status: status,
ObservedGeneration: rp.Generation,
Reason: reason,
Message: message,
})

return nil
}

func (r *RedpandaReconciler) reconcileClusterConfig(ctx context.Context, rp *v1alpha2.Redpanda) error {
if ptr.Deref(rp.Spec.ChartRef.UseFlux, true) {
apimeta.SetStatusCondition(rp.GetConditions(), metav1.Condition{
Type: v1alpha2.ClusterConfigSynced,
Status: metav1.ConditionUnknown,
ObservedGeneration: rp.Generation,
Reason: "HandledByFlux",
Message: "cluster configuration is not managed by the operator when Flux is enabled",
})
return nil
}

if r.ratelimitCondition(ctx, rp, v1alpha2.ClusterConfigSynced) {
return nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,88 @@ func (s *RedpandaControllerSuite) TestClusterSettings() {
s.deleteAndWait(rp)
}

func (s *RedpandaControllerSuite) TestLicense() {
type image struct {
repository string
tag string
}

cases := []struct {
image image
license bool
expected string
}{{
image: image{
repository: "redpandadata/redpanda-unstable",
tag: "v24.3.1-rc4",
},
license: false,
expected: "Expired",
}, {
image: image{
repository: "redpandadata/redpanda-unstable",
tag: "v24.3.1-rc4",
},
license: true,
expected: "Valid",
}, {
image: image{
repository: "redpandadata/redpanda",
tag: "v24.2.9",
},
license: false,
expected: "Not Present",
}, {
image: image{
repository: "redpandadata/redpanda",
tag: "v24.2.9",
},
license: true,
expected: "Not Present",
}}

for _, c := range cases {
rp := s.minimalRP(false)
rp.Spec.ClusterSpec.Image = &redpandav1alpha2.RedpandaImage{
Repository: ptr.To(c.image.repository),
Tag: ptr.To(c.image.tag),
}
if !c.license {
rp.Spec.ClusterSpec.Statefulset.PodTemplate = &redpandav1alpha2.PodTemplate{
Spec: &redpandav1alpha2.PodSpec{
Containers: []redpandav1alpha2.Container{{
Name: "redpanda",
Env: []corev1.EnvVar{{Name: "__REDPANDA_DISABLE_BUILTIN_TRIAL_LICENSE", Value: "true"}},
}},
},
}
}

var condition metav1.Condition
s.applyAndWaitFor(func(o client.Object) bool {
rp := o.(*redpandav1alpha2.Redpanda)

for _, cond := range rp.Status.Conditions {
if cond.Type == redpandav1alpha2.ClusterLicenseValid {
// grab the first non-unknown status
if cond.Status != metav1.ConditionUnknown {
condition = cond
return true
}
return false
}
}
return false
}, rp)

name := fmt.Sprintf("%s/%s (license: %t)", c.image.repository, c.image.tag, c.license)
message := fmt.Sprintf("%s - %s != %s", name, c.expected, condition.Message)
s.Require().Equal(c.expected, condition.Message, message)

s.deleteAndWait(rp)
}
}

func (s *RedpandaControllerSuite) SetupSuite() {
t := s.T()

Expand Down