From 31c7fee4eb12e8cc3e9e3965fd66fbe8de893717 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 5 Nov 2024 15:49:43 +0200 Subject: [PATCH] redesign device plugin always deploy sriov network device plugin and use a label to enable or disable it on the nodes Signed-off-by: Sebastian Sch --- .../sriovnetworknodepolicy_controller.go | 29 +++++++++-- controllers/sriovoperatorconfig_controller.go | 8 +-- .../sriovoperatorconfig_controller_test.go | 52 +------------------ controllers/suite_test.go | 7 +++ deploy/clusterrole.yaml | 6 --- deploy/role.yaml | 12 ++--- .../templates/clusterrole.yaml | 6 --- .../templates/role.yaml | 11 ++-- pkg/consts/constants.go | 4 ++ pkg/utils/cluster.go | 46 ++++++++++++++-- 10 files changed, 92 insertions(+), 89 deletions(-) diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index be46880b7c..628bfb103d 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "reflect" "sort" "strings" @@ -133,10 +134,6 @@ func (r *SriovNetworkNodePolicyReconciler) Reconcile(ctx context.Context, req ct if err = r.syncDevicePluginConfigMap(ctx, defaultOpConf, policyList, nodeList); err != nil { return reconcile.Result{}, err } - // Render and sync Daemon objects - if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultOpConf, policyList); err != nil { - return reconcile.Result{}, err - } // All was successful. Request that this be re-triggered after ResyncPeriod, // so we can reconcile state again. @@ -219,6 +216,30 @@ func (r *SriovNetworkNodePolicyReconciler) syncDevicePluginConfigMap(ctx context return err } configData[node.Name] = string(config) + + if data.ResourceList == nil || len(data.ResourceList) == 0 { + // if we don't have policies we should add the disabled label for the device plugin + err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginLabel, constants.SriovDevicePluginLabelDisabled, r.Client) + if err != nil { + logger.Error(err, "failed to label node for device plugin label", + "labelKey", + constants.SriovDevicePluginLabel, + "labelValue", + constants.SriovDevicePluginLabelDisabled) + return err + } + } else { + // if we have policies we should add the enabled label for the device plugin + err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginLabel, constants.SriovDevicePluginLabelEnabled, r.Client) + if err != nil { + logger.Error(err, "failed to label node for device plugin label", + "labelKey", + constants.SriovDevicePluginLabel, + "labelValue", + constants.SriovDevicePluginLabelEnabled) + return err + } + } } cm := &corev1.ConfigMap{ diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index c9f21f428a..f79614c442 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -44,12 +44,12 @@ import ( machinev1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - apply "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply" - consts "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" - render "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -140,7 +140,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, err } - if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig, policyList); err != nil { + if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig); err != nil { return reconcile.Result{}, err } diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 47e4fc09df..8be8087b1f 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -2,7 +2,6 @@ package controllers import ( "context" - "fmt" "os" "strings" "sync" @@ -30,7 +29,7 @@ import ( mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" - util "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" ) var _ = Describe("SriovOperatorConfig controller", Ordered, func() { @@ -427,7 +426,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { metricsDaemonset := appsv1.DaemonSet{} err := util.WaitForNamespacedObject(&metricsDaemonset, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(metricsDaemonset.Spec.Template.Spec.NodeSelector).To((Equal(nodeSelector))) + g.Expect(metricsDaemonset.Spec.Template.Spec.NodeSelector).To(Equal(nodeSelector)) }).Should(Succeed()) }) @@ -521,53 +520,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n"))) }, "1s").Should(Succeed()) }) - - It("should reconcile to a converging state when multiple node policies are set", func() { - By("Creating a consistent number of node policies") - for i := 0; i < 30; i++ { - p := &sriovnetworkv1.SriovNetworkNodePolicy{ - ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: fmt.Sprintf("p%d", i)}, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - Priority: 99, - NodeSelector: map[string]string{"foo": fmt.Sprintf("v%d", i)}, - }, - } - err := k8sClient.Create(context.Background(), p) - Expect(err).NotTo(HaveOccurred()) - } - - By("Triggering a the reconcile loop") - config := &sriovnetworkv1.SriovOperatorConfig{} - err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) - Expect(err).NotTo(HaveOccurred()) - if config.ObjectMeta.Labels == nil { - config.ObjectMeta.Labels = make(map[string]string) - } - config.ObjectMeta.Labels["trigger-test"] = "test-reconcile-daemonset" - err = k8sClient.Update(context.Background(), config) - Expect(err).NotTo(HaveOccurred()) - - By("Wait until device-plugin Daemonset's affinity has been calculated") - var expectedAffinity *corev1.Affinity - - Eventually(func(g Gomega) { - daemonSet := &appsv1.DaemonSet{} - err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) - g.Expect(err).NotTo(HaveOccurred()) - // Wait until the last policy (with NodeSelector foo=v29) has been considered at least one time - g.Expect(daemonSet.Spec.Template.Spec.Affinity.String()).To(ContainSubstring("v29")) - expectedAffinity = daemonSet.Spec.Template.Spec.Affinity - }, "3s", "1s").Should(Succeed()) - - By("Verify device-plugin Daemonset's affinity doesn't change over time") - Consistently(func(g Gomega) { - daemonSet := &appsv1.DaemonSet{} - err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(daemonSet.Spec.Template.Spec.Affinity). - To(Equal(expectedAffinity)) - }, "3s", "1s").Should(Succeed()) - }) }) }) diff --git a/controllers/suite_test.go b/controllers/suite_test.go index bc2f13b8e2..9d5492e212 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -188,6 +188,13 @@ var _ = BeforeSuite(func() { } Expect(k8sClient.Create(context.Background(), ns)).Should(Succeed()) + sa := &corev1.ServiceAccount{TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + Namespace: testNamespace, + }} + Expect(k8sClient.Create(context.Background(), sa)).Should(Succeed()) + // Create openshift Infrastructure infra := &openshiftconfigv1.Infrastructure{ ObjectMeta: metav1.ObjectMeta{ diff --git a/deploy/clusterrole.yaml b/deploy/clusterrole.yaml index e7a5960616..e7a84394e1 100644 --- a/deploy/clusterrole.yaml +++ b/deploy/clusterrole.yaml @@ -45,12 +45,6 @@ rules: - apiGroups: [""] resources: ["nodes"] verbs: ["get", "list", "watch", "patch", "update"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["*"] -- apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["get"] - apiGroups: [ "config.openshift.io" ] resources: [ "infrastructures" ] verbs: [ "get", "list", "watch" ] diff --git a/deploy/role.yaml b/deploy/role.yaml index 0a6c27a218..3bdcdc1458 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -1,7 +1,6 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - creationTimestamp: null name: sriov-network-operator rules: - apiGroups: @@ -76,13 +75,10 @@ rules: resources: - pods verbs: - - '*' -- apiGroups: - - apps - resources: - - daemonsets - verbs: - - '*' + - "get" + - "list" + - "watch" + - "delete" - apiGroups: - sriovnetwork.openshift.io resources: diff --git a/deployment/sriov-network-operator-chart/templates/clusterrole.yaml b/deployment/sriov-network-operator-chart/templates/clusterrole.yaml index 7cd8fd014e..519d2c05ca 100644 --- a/deployment/sriov-network-operator-chart/templates/clusterrole.yaml +++ b/deployment/sriov-network-operator-chart/templates/clusterrole.yaml @@ -49,12 +49,6 @@ rules: - apiGroups: [""] resources: ["nodes"] verbs: ["get", "list", "watch", "patch", "update"] - - apiGroups: [""] - resources: ["pods"] - verbs: ["*"] - - apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["get"] - apiGroups: [ "config.openshift.io" ] resources: [ "infrastructures" ] verbs: [ "get", "list", "watch" ] diff --git a/deployment/sriov-network-operator-chart/templates/role.yaml b/deployment/sriov-network-operator-chart/templates/role.yaml index 6551b57753..56e5a54879 100644 --- a/deployment/sriov-network-operator-chart/templates/role.yaml +++ b/deployment/sriov-network-operator-chart/templates/role.yaml @@ -82,13 +82,10 @@ rules: resources: - pods verbs: - - '*' - - apiGroups: - - apps - resources: - - daemonsets - verbs: - - '*' + - "get" + - "list" + - "watch" + - "delete" - apiGroups: - sriovnetwork.openshift.io resources: diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index f7025c90d7..539b2d7d8b 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -67,6 +67,10 @@ const ( MachineConfigPoolPausedAnnotationIdle = "Idle" MachineConfigPoolPausedAnnotationPaused = "Paused" + SriovDevicePluginLabel = "sriovnetwork.openshift.io/device-plugin" + SriovDevicePluginLabelEnabled = "Enabled" + SriovDevicePluginLabelDisabled = "Disabled" + NodeDrainAnnotation = "sriovnetwork.openshift.io/state" NodeStateDrainAnnotation = "sriovnetwork.openshift.io/desired-state" NodeStateDrainAnnotationCurrent = "sriovnetwork.openshift.io/current-state" diff --git a/pkg/utils/cluster.go b/pkg/utils/cluster.go index 6f8d72e079..943c7fa779 100644 --- a/pkg/utils/cluster.go +++ b/pkg/utils/cluster.go @@ -128,16 +128,17 @@ func ObjectHasAnnotation(obj metav1.Object, annoKey string, value string) bool { // AnnotateObject adds annotation to a kubernetes object func AnnotateObject(ctx context.Context, obj client.Object, key, value string, c client.Client) error { - log.Log.V(2).Info("AnnotateObject(): Annotate object", - "objectName", obj.GetName(), - "objectKind", obj.GetObjectKind(), - "annotation", value) newObj := obj.DeepCopyObject().(client.Object) if newObj.GetAnnotations() == nil { newObj.SetAnnotations(map[string]string{}) } if newObj.GetAnnotations()[key] != value { + log.Log.V(2).Info("AnnotateObject(): Annotate object", + "objectName", obj.GetName(), + "objectKind", obj.GetObjectKind(), + "annotationKey", key, + "annotationValue", value) newObj.GetAnnotations()[key] = value patch := client.MergeFrom(obj) err := c.Patch(ctx, @@ -161,3 +162,40 @@ func AnnotateNode(ctx context.Context, nodeName string, key, value string, c cli return AnnotateObject(ctx, node, key, value, c) } + +// LabelObject adds label to a kubernetes object +func LabelObject(ctx context.Context, obj client.Object, key, value string, c client.Client) error { + newObj := obj.DeepCopyObject().(client.Object) + if newObj.GetLabels() == nil { + newObj.SetLabels(map[string]string{}) + } + + if newObj.GetLabels()[key] != value { + log.Log.V(2).Info("LabelObject(): label object", + "objectName", obj.GetName(), + "objectKind", obj.GetObjectKind(), + "labelKey", key, + "labelValue", value) + newObj.GetLabels()[key] = value + patch := client.MergeFrom(obj) + err := c.Patch(ctx, + newObj, patch) + if err != nil { + log.Log.Error(err, "LabelObject(): Failed to patch object") + return err + } + } + + return nil +} + +// LabelNode add label to a node +func LabelNode(ctx context.Context, nodeName string, key, value string, c client.Client) error { + node := &corev1.Node{} + err := c.Get(context.TODO(), client.ObjectKey{Name: nodeName}, node) + if err != nil { + return err + } + + return LabelObject(ctx, node, key, value, c) +}