diff --git a/bundle/manifests/lvms-operator.clusterserviceversion.yaml b/bundle/manifests/lvms-operator.clusterserviceversion.yaml index fc2356dff..6f72addd7 100644 --- a/bundle/manifests/lvms-operator.clusterserviceversion.yaml +++ b/bundle/manifests/lvms-operator.clusterserviceversion.yaml @@ -146,6 +146,8 @@ spec: verbs: - get - list + - patch + - update - watch - apiGroups: - "" diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 498674867..8cd41078d 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -52,6 +52,8 @@ rules: verbs: - get - list + - patch + - update - watch - apiGroups: - "" diff --git a/controllers/lvmcluster_controller.go b/controllers/lvmcluster_controller.go index 56171d7ea..98e912b76 100644 --- a/controllers/lvmcluster_controller.go +++ b/controllers/lvmcluster_controller.go @@ -213,73 +213,109 @@ func (r *LVMClusterReconciler) reconcile(ctx context.Context, instance *lvmv1alp return ctrl.Result{}, nil } -func (r *LVMClusterReconciler) updateLVMClusterStatus(ctx context.Context, instance *lvmv1alpha1.LVMCluster) error { - - vgNodeMap := make(map[string][]lvmv1alpha1.NodeStatus) +func (r *LVMClusterReconciler) getNodes(ctx context.Context) (*corev1.NodeList, error) { + nodes := &corev1.NodeList{} + err := r.Client.List(ctx, nodes) + if err != nil { + return nil, fmt.Errorf("could not list nodes: %w", err) + } + return nodes, err +} - vgNodeStatusList := &lvmv1alpha1.LVMVolumeGroupNodeStatusList{} - err := r.Client.List(ctx, vgNodeStatusList, client.InNamespace(r.Namespace)) +func (r *LVMClusterReconciler) updateLVMClusterStatus(ctx context.Context, lvmCluster *lvmv1alpha1.LVMCluster) error { + // first get all nodes from the cluster + nodes, err := r.getNodes(ctx) if err != nil { - r.Log.Error(err, "failed to list LVMVolumeGroupNodeStatus") return err } - expectedVgCount, err := r.getExpectedVgCount(ctx, instance) + // now only use the nodes in the node selector for status of a node (if applicable, otherwise all nodes) + nodesByDeviceClassNodeSelector, err := r.nodesByDeviceClass(nodes, lvmCluster) if err != nil { r.Log.Error(err, "failed to calculate expected VG count") return err } - var readyVGCount int - var isReady, isDegraded, isFailed bool - - for _, nodeItem := range vgNodeStatusList.Items { - for _, item := range nodeItem.Spec.LVMVGStatus { - if item.Status == lvmv1alpha1.VGStatusReady { - readyVGCount++ - isReady = true - } else if item.Status == lvmv1alpha1.VGStatusDegraded { - isDegraded = true - } else if item.Status == lvmv1alpha1.VGStatusFailed { - isFailed = true - } + nodeStatusList := &lvmv1alpha1.LVMVolumeGroupNodeStatusList{} + if err := r.Client.List(ctx, nodeStatusList, client.InNamespace(r.Namespace)); err != nil { + r.Log.Error(err, "failed to list LVMVolumeGroupNodeStatus") + return err + } - vgNodeMap[item.Name] = append(vgNodeMap[item.Name], - lvmv1alpha1.NodeStatus{ - Node: nodeItem.Name, - Reason: item.Reason, - Status: item.Status, - Devices: item.Devices, - }, - ) - } + // now fetch all nodestatus objects to correlate VG information to a node + nodeStatusByNodeName := make(map[string]lvmv1alpha1.LVMVolumeGroupNodeStatus, len(nodeStatusList.Items)) + for _, nodeStatus := range nodeStatusList.Items { + nodeStatusByNodeName[nodeStatus.GetName()] = nodeStatus } - instance.Status.State = lvmv1alpha1.LVMStatusProgressing - instance.Status.Ready = false + lvmCluster.Status.State = lvmv1alpha1.LVMStatusProgressing + lvmCluster.Status.Ready = false + deviceClassStatus := make([]lvmv1alpha1.DeviceClassStatus, len(lvmCluster.Spec.Storage.DeviceClasses)) + // we expect that all device classes in the lvmCluster need to be ready in the end + expectedReadyDeviceClasses, readyDeviceClasses := len(lvmCluster.Spec.Storage.DeviceClasses), 0 + + for deviceClassName, nodesBySelector := range nodesByDeviceClassNodeSelector { + logger := r.Log.WithValues("deviceClass", deviceClassName) + var nodeStatusForDeviceClass []lvmv1alpha1.NodeStatus + // for every device class all nodes in the node selector must be + expectedReadyNodes, readyNodes := len(nodesBySelector), 0 + for _, node := range nodesBySelector { + logger = logger.WithValues("node", node.GetName()) + nodeStatus := nodeStatusByNodeName[node.GetName()] + for _, vgStatusOnNode := range nodeStatus.Spec.LVMVGStatus { + if vgStatusOnNode.Name != deviceClassName { + continue + } + nodeStatusForDeviceClass = append(nodeStatusForDeviceClass, lvmv1alpha1.NodeStatus{ + Node: node.GetName(), + Status: vgStatusOnNode.Status, + Reason: vgStatusOnNode.Reason, + Devices: vgStatusOnNode.Devices, + }) + logger = logger.WithValues( + "status", vgStatusOnNode.Status, + "reason", vgStatusOnNode.Reason, + "devices", vgStatusOnNode.Devices) + // in case the node status is ready we add it to the ready nodes, if not, we set the cluster + // state accordingly as the entire cluster is then in this state + if vgStatusOnNode.Status == lvmv1alpha1.VGStatusReady { + readyNodes++ + logger.V(1).Info("vg on node is ready") + } else if vgStatusOnNode.Status == lvmv1alpha1.VGStatusDegraded { + lvmCluster.Status.State = lvmv1alpha1.LVMStatusDegraded + logger.V(1).Info("vg on node is degraded") + } else if vgStatusOnNode.Status == lvmv1alpha1.VGStatusFailed { + lvmCluster.Status.State = lvmv1alpha1.LVMStatusFailed + logger.V(1).Info("vg on node is failed") + } + } + } + if expectedReadyNodes == readyNodes { + logger.V(1).Info("deviceClass is ready") + readyDeviceClasses++ + } else { + logger.V(1).Info("deviceClass is not ready") + } - if isFailed { - instance.Status.State = lvmv1alpha1.LVMStatusFailed - } else if isDegraded { - instance.Status.State = lvmv1alpha1.LVMStatusDegraded - } else if isReady && expectedVgCount == readyVGCount { - instance.Status.State = lvmv1alpha1.LVMStatusReady - instance.Status.Ready = true + deviceClassStatus = append(deviceClassStatus, lvmv1alpha1.DeviceClassStatus{ + Name: deviceClassName, + NodeStatus: nodeStatusForDeviceClass, + }) } - allVgStatuses := []lvmv1alpha1.DeviceClassStatus{} - for key, val := range vgNodeMap { - allVgStatuses = append(allVgStatuses, - lvmv1alpha1.DeviceClassStatus{ - Name: key, - NodeStatus: val, - }, - ) + logger := r.Log.V(1). + WithValues("expectedReady", expectedReadyDeviceClasses, "ready", readyDeviceClasses) + if expectedReadyDeviceClasses == readyDeviceClasses { + logger.Info("all device classes are ready") + lvmCluster.Status.State = lvmv1alpha1.LVMStatusReady + lvmCluster.Status.Ready = true + } else { + logger.Info("not all device classes are ready") } - instance.Status.DeviceClassStatuses = allVgStatuses + lvmCluster.Status.DeviceClassStatuses = deviceClassStatus // Apply status changes - err = r.Client.Status().Update(ctx, instance) + err = r.Client.Status().Update(ctx, lvmCluster) if err != nil { if errors.IsNotFound(err) { r.Log.Error(err, "failed to update status") @@ -292,37 +328,32 @@ func (r *LVMClusterReconciler) updateLVMClusterStatus(ctx context.Context, insta return nil } -func (r *LVMClusterReconciler) getExpectedVgCount(ctx context.Context, instance *lvmv1alpha1.LVMCluster) (int, error) { - - var vgCount int - - nodeList := &corev1.NodeList{} - err := r.Client.List(ctx, nodeList) - if err != nil { - r.Log.Error(err, "failed to list Nodes") - return 0, err - } +func (r *LVMClusterReconciler) nodesByDeviceClass(allNodes *corev1.NodeList, instance *lvmv1alpha1.LVMCluster) (map[string][]corev1.Node, error) { + nodesForDeviceClasses := map[string][]corev1.Node{} for _, deviceClass := range instance.Spec.Storage.DeviceClasses { if deviceClass.NodeSelector == nil { - vgCount += len(nodeList.Items) + nodesForDeviceClasses[deviceClass.Name] = allNodes.Items continue } - for i := range nodeList.Items { - matches, err := corev1helper.MatchNodeSelectorTerms(&nodeList.Items[i], deviceClass.NodeSelector) + nodes := make([]corev1.Node, len(instance.Spec.Storage.DeviceClasses)) + for i := range allNodes.Items { + matches, err := corev1helper.MatchNodeSelectorTerms(&allNodes.Items[i], deviceClass.NodeSelector) if err != nil { - r.Log.Error(err, "failed to match node selector") - return 0, err + r.Log.Error(err, "failed to match node selector for deviceClass", + "node", allNodes.Items[i].GetName(), "deviceClass", deviceClass.Name) + continue } if matches { - vgCount++ + nodes = append(nodes, allNodes.Items[i]) } } + nodesForDeviceClasses[deviceClass.Name] = nodes } - return vgCount, nil + return nodesForDeviceClasses, nil } // checkIfOpenshift checks to see if the operator is running on an OCP cluster. diff --git a/controllers/node_removal_controller.go b/controllers/node_removal_controller.go index b72b60771..0450b98fa 100644 --- a/controllers/node_removal_controller.go +++ b/controllers/node_removal_controller.go @@ -11,14 +11,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" ) -const cleanupFinalizer = "removal.node.lvm.topolvm.io" +const cleanupFinalizer = "lvm.topolvm.io/node-removal-hook" const fieldOwner = "lvms" type NodeRemovalController struct { client.Client } -//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch +//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;patch;update;watch //+kubebuilder:rbac:groups=lvm.topolvm.io,resources=lvmvolumegroupnodestatuses,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=lvm.topolvm.io,resources=lvmvolumegroupnodestatuses/status,verbs=get;update;patch //+kubebuilder:rbac:groups=lvm.topolvm.io,resources=lvmvolumegroupnodestatuses/finalizers,verbs=update @@ -41,7 +41,7 @@ func (r *NodeRemovalController) Reconcile(ctx context.Context, req ctrl.Request) if node.DeletionTimestamp.IsZero() { // Add a finalizer in case the node is fresh or the controller newly deployed if needsUpdate := controllerutil.AddFinalizer(node, cleanupFinalizer); needsUpdate { - if err := r.Patch(ctx, node, client.Apply, client.ForceOwnership, client.FieldOwner(fieldOwner)); err != nil { + if err := r.Update(ctx, node, client.FieldOwner(fieldOwner)); err != nil { return ctrl.Result{}, fmt.Errorf("node finalizer could not be updated: %w", err) } }