Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DFBUGS-925: [release-4.18] controllers: new controller for maintenance mode #280

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/v1alpha1/storageclient_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ type StorageClientSpec struct {
type StorageClientStatus struct {
Phase storageClientPhase `json:"phase,omitempty"`

InMaintenanceMode bool `json:"inMaintenanceMode"`

// ConsumerID will hold the identity of this cluster inside the attached provider cluster
ConsumerID string `json:"id,omitempty"`
}
Expand Down
30 changes: 29 additions & 1 deletion bundle/manifests/ocs-client-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
categories: Storage
console.openshift.io/plugins: '["odf-client-console"]'
containerImage: quay.io/ocs-dev/ocs-client-operator:latest
createdAt: "2024-11-22T04:24:54Z"
createdAt: "2024-11-22T06:22:20Z"
description: OpenShift Data Foundation client operator enables consumption of
storage services from a remote centralized OpenShift Data Foundation provider
cluster.
Expand Down Expand Up @@ -241,6 +241,15 @@ spec:
- patch
- update
- watch
- apiGroups:
- ocs.openshift.io
resources:
- storageclaims
- storageclients
verbs:
- get
- list
- watch
- apiGroups:
- ocs.openshift.io
resources:
Expand Down Expand Up @@ -315,6 +324,25 @@ spec:
- list
- update
- watch
- apiGroups:
- ramendr.openshift.io
resources:
- maintenancemodes
verbs:
- create
- delete
- get
- list
- update
- watch
- apiGroups:
- ramendr.openshift.io
resources:
- maintenancemodes/status
verbs:
- get
- patch
- update
- apiGroups:
- replication.storage.openshift.io
resources:
Expand Down
4 changes: 4 additions & 0 deletions bundle/manifests/ocs.openshift.io_storageclients.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,12 @@ spec:
description: ConsumerID will hold the identity of this cluster inside
the attached provider cluster
type: string
inMaintenanceMode:
type: boolean
phase:
type: string
required:
- inMaintenanceMode
type: object
type: object
served: true
Expand Down
13 changes: 12 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func main() {
os.Exit(1)
}

_, err = getAvailableCRDNames(context.Background(), apiClient)
availCrds, err := getAvailableCRDNames(context.Background(), apiClient)
if err != nil {
setupLog.Error(err, "Unable get a list of available CRD names")
os.Exit(1)
Expand Down Expand Up @@ -204,11 +204,22 @@ func main() {
Scheme: mgr.GetScheme(),
OperatorNamespace: utils.GetOperatorNamespace(),
ConsolePort: int32(consolePort),
AvailableCrds: availCrds,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "OperatorConfigMapReconciler")
os.Exit(1)
}

if availCrds[controller.MaintenanceModeCRDName] {
if err = (&controller.MaintenanceModeReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "MaintenanceMode")
os.Exit(1)
}
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/ocs.openshift.io_storageclients.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,12 @@ spec:
description: ConsumerID will hold the identity of this cluster inside
the attached provider cluster
type: string
inMaintenanceMode:
type: boolean
phase:
type: string
required:
- inMaintenanceMode
type: object
type: object
served: true
Expand Down
28 changes: 28 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,15 @@ rules:
- patch
- update
- watch
- apiGroups:
- ocs.openshift.io
resources:
- storageclaims
- storageclients
verbs:
- get
- list
- watch
- apiGroups:
- ocs.openshift.io
resources:
Expand Down Expand Up @@ -272,6 +281,25 @@ rules:
- list
- update
- watch
- apiGroups:
- ramendr.openshift.io
resources:
- maintenancemodes
verbs:
- create
- delete
- get
- list
- update
- watch
- apiGroups:
- ramendr.openshift.io
resources:
- maintenancemodes/status
verbs:
- get
- patch
- update
- apiGroups:
- replication.storage.openshift.io
resources:
Expand Down
192 changes: 192 additions & 0 deletions internal/controller/maintenancemode_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package controller

import (
"context"
"fmt"
"github.com/go-logr/logr"
ramenv1alpha1 "github.com/ramendr/ramen/api/v1alpha1"
"github.com/red-hat-storage/ocs-client-operator/api/v1alpha1"
"github.com/red-hat-storage/ocs-client-operator/pkg/utils"
providerclient "github.com/red-hat-storage/ocs-operator/services/provider/api/v4/client"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"slices"
)

const (
MaintenanceModeCRDName = "maintenancemodes.ramendr.openshift.io"
)

// MaintenanceModeReconciler reconciles a ClusterVersion object
type MaintenanceModeReconciler struct {
client.Client
Scheme *runtime.Scheme

log logr.Logger
ctx context.Context
}

// SetupWithManager sets up the controller with the Manager.
func (r *MaintenanceModeReconciler) SetupWithManager(mgr ctrl.Manager) error {
generationChangePredicate := predicate.GenerationChangedPredicate{}
maintenanceModeChangedPredicate := predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
if e.ObjectOld == nil || e.ObjectNew == nil {
return false
}
oldObj := e.ObjectOld.(*v1alpha1.StorageClient)
newObj := e.ObjectNew.(*v1alpha1.StorageClient)
return oldObj.Status.InMaintenanceMode != newObj.Status.InMaintenanceMode
},
}
return ctrl.NewControllerManagedBy(mgr).
Named("MaintenanceMode").
Watches(
&ramenv1alpha1.MaintenanceMode{},
&handler.EnqueueRequestForObject{},
builder.WithPredicates(generationChangePredicate),
).
Watches(
&v1alpha1.StorageClaim{},
&handler.EnqueueRequestForObject{},
builder.WithPredicates(generationChangePredicate),
).
Watches(
&v1alpha1.StorageClient{},
&handler.EnqueueRequestForObject{},
builder.WithPredicates(
generationChangePredicate,
maintenanceModeChangedPredicate,
),
).
Complete(r)
}

//+kubebuilder:rbac:groups=ramendr.openshift.io,resources=maintenancemodes,verbs=get;list;update;create;watch;delete
//+kubebuilder:rbac:groups=ramendr.openshift.io,resources=maintenancemodes/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=ocs.openshift.io,resources=storageclients;storageclaims,verbs=get;list;watch

func (r *MaintenanceModeReconciler) Reconcile(ctx context.Context, _ ctrl.Request) (ctrl.Result, error) {
r.ctx = ctx
r.log = log.FromContext(ctx)
r.log.Info("Starting reconcile")

nameToStorageClient := map[string]*v1alpha1.StorageClient{}

maintenanceModes := &ramenv1alpha1.MaintenanceModeList{}
if err := r.list(maintenanceModes); err != nil {
r.log.Error(err, "failed to list the MaintenanceMode CRs")
return reconcile.Result{}, err
}

for i := range maintenanceModes.Items {
mm := &maintenanceModes.Items[i]
sc := &v1alpha1.StorageClaim{}
// MMode's TargetID is replicationID, which in our case is storageClaim name
sc.Name = mm.Spec.TargetID
if err := r.get(sc); err != nil {
return ctrl.Result{}, err
}
clientName := sc.Spec.StorageClient
if clientName == "" {
return ctrl.Result{}, fmt.Errorf("StorageClaim %s does not have a StorageClient defined", sc.Name)
}
if nameToStorageClient[clientName] == nil {
storageClient := &v1alpha1.StorageClient{}
storageClient.Name = clientName
if err := r.get(storageClient); err != nil {
return ctrl.Result{}, err
}
nameToStorageClient[clientName] = storageClient
}
if nameToStorageClient[clientName].Status.InMaintenanceMode {
if err := r.updateStatusCompletedForMM(mm); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update status for MaintenanceMode %s: %w", mm.Name, err)
}
}
}

storageClients := &v1alpha1.StorageClientList{}
if err := r.list(storageClients); err != nil {
r.log.Error(err, "failed to list the Storage Clients")
return reconcile.Result{}, err
}

for i := range storageClients.Items {
storageClient := &storageClients.Items[i]
_, needsMaintenanceMode := nameToStorageClient[storageClient.Name]
if needsMaintenanceMode != storageClient.Status.InMaintenanceMode {
if err := r.toggleMaintenanceModeForClient(storageClient, needsMaintenanceMode); err != nil {
return ctrl.Result{}, err
}
}
}

return ctrl.Result{}, nil
}

func (r *MaintenanceModeReconciler) toggleMaintenanceModeForClient(storageClient *v1alpha1.StorageClient, enable bool) error {
providerClient, err := providerclient.NewProviderClient(
r.ctx,
storageClient.Spec.StorageProviderEndpoint,
utils.OcsClientTimeout,
)
if err != nil {
return fmt.Errorf(
"failed to create provider client with endpoint %v: %v",
storageClient.Spec.StorageProviderEndpoint,
err,
)
}
// Close client-side connections.
defer providerClient.Close()

_, err = providerClient.RequestMaintenanceMode(r.ctx, storageClient.Status.ConsumerID, enable)
if err != nil {
return fmt.Errorf("failed to Request maintenance mode: %v", err)
}
return nil
}

func (r *MaintenanceModeReconciler) updateStatusCompletedForMM(maintenanceMode *ramenv1alpha1.MaintenanceMode) error {
// Ramen reads the State and Conditions in order to determine that the MaintenanceMode is Completed

condition := metav1.Condition{
Type: string(ramenv1alpha1.MModeConditionFailoverActivated),
Status: metav1.ConditionTrue,
Reason: string(ramenv1alpha1.MModeStateCompleted),
ObservedGeneration: maintenanceMode.Generation,
}

updateRequired := false
updateRequired = updateRequired || (maintenanceMode.Status.State != ramenv1alpha1.MModeStateCompleted)
updateRequired = updateRequired || slices.Contains(maintenanceMode.Status.Conditions, condition)

if updateRequired {
maintenanceMode.Status.State = ramenv1alpha1.MModeStateCompleted
maintenanceMode.Status.ObservedGeneration = maintenanceMode.Generation
meta.SetStatusCondition(&maintenanceMode.Status.Conditions, condition)

if err := r.Client.Status().Update(r.ctx, maintenanceMode); err != nil {
return err
}
}
return nil
}

func (r *MaintenanceModeReconciler) list(obj client.ObjectList, opts ...client.ListOption) error {
return r.List(r.ctx, obj, opts...)
}

func (r *MaintenanceModeReconciler) get(obj client.Object, opts ...client.GetOption) error {
return r.Get(r.ctx, client.ObjectKeyFromObject(obj), obj, opts...)
}
25 changes: 24 additions & 1 deletion internal/controller/operatorconfigmap_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ type OperatorConfigMapReconciler struct {
OperatorNamespace string
ConsolePort int32
Scheme *runtime.Scheme
AvailableCrds map[string]bool

log logr.Logger
ctx context.Context
Expand Down Expand Up @@ -157,7 +158,20 @@ func (c *OperatorConfigMapReconciler) SetupWithManager(mgr ctrl.Manager) error {
Owns(&csiopv1a1.OperatorConfig{}, builder.WithPredicates(generationChangePredicate)).
Owns(&csiopv1a1.Driver{}, builder.WithPredicates(generationChangePredicate)).
Watches(&configv1.ClusterVersion{}, enqueueConfigMapRequest, clusterVersionPredicates).
Watches(&extv1.CustomResourceDefinition{}, enqueueConfigMapRequest, builder.OnlyMetadata).
Watches(
&extv1.CustomResourceDefinition{},
enqueueConfigMapRequest,
builder.WithPredicates(
utils.NamePredicate(MaintenanceModeCRDName),
utils.EventTypePredicate(
!c.AvailableCrds[MaintenanceModeCRDName],
false,
c.AvailableCrds[MaintenanceModeCRDName],
false,
),
),
builder.OnlyMetadata,
).
Watches(&opv1a1.Subscription{}, enqueueConfigMapRequest, subscriptionPredicates).
Watches(&admrv1.ValidatingWebhookConfiguration{}, enqueueConfigMapRequest, webhookPredicates).
Watches(&v1alpha1.StorageClient{}, enqueueConfigMapRequest, builder.WithPredicates(predicate.AnnotationChangedPredicate{}))
Expand Down Expand Up @@ -188,6 +202,15 @@ func (c *OperatorConfigMapReconciler) Reconcile(ctx context.Context, req ctrl.Re
c.log = log.FromContext(ctx, "OperatorConfigMap", req)
c.log.Info("Reconciling OperatorConfigMap")

crd := &metav1.PartialObjectMetadata{}
crd.SetGroupVersionKind(extv1.SchemeGroupVersion.WithKind("CustomResourceDefinition"))
crd.Name = MaintenanceModeCRDName
if err := c.Client.Get(ctx, client.ObjectKeyFromObject(crd), crd); client.IgnoreNotFound(err) != nil {
c.log.Error(err, "Failed to get CRD", "CRD", crd.Name)
return reconcile.Result{}, err
}
utils.AssertEqual(c.AvailableCrds[crd.Name], crd.UID != "", utils.ExitCodeThatShouldRestartTheProcess)

c.operatorConfigMap = &corev1.ConfigMap{}
c.operatorConfigMap.Name = req.Name
c.operatorConfigMap.Namespace = req.Namespace
Expand Down
Loading
Loading