Skip to content

Commit

Permalink
redesign device plugin
Browse files Browse the repository at this point in the history
always deploy sriov network device plugin and use a label to enable or
disable it on the nodes

Signed-off-by: Sebastian Sch <[email protected]>
  • Loading branch information
SchSeba committed Nov 5, 2024
1 parent 823b4d4 commit 31c7fee
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 89 deletions.
29 changes: 25 additions & 4 deletions controllers/sriovnetworknodepolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"

Check failure on line 23 in controllers/sriovnetworknodepolicy_controller.go

View workflow job for this annotation

GitHub Actions / Golangci-lint

File is not `goimports`-ed with -local github.com/k8snetworkplumbingwg/sriov-network-operator (goimports)
"reflect"
"sort"
"strings"
Expand Down Expand Up @@ -133,10 +134,6 @@ func (r *SriovNetworkNodePolicyReconciler) Reconcile(ctx context.Context, req ct
if err = r.syncDevicePluginConfigMap(ctx, defaultOpConf, policyList, nodeList); err != nil {
return reconcile.Result{}, err
}
// Render and sync Daemon objects
if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultOpConf, policyList); err != nil {
return reconcile.Result{}, err
}

// All was successful. Request that this be re-triggered after ResyncPeriod,
// so we can reconcile state again.
Expand Down Expand Up @@ -219,6 +216,30 @@ func (r *SriovNetworkNodePolicyReconciler) syncDevicePluginConfigMap(ctx context
return err
}
configData[node.Name] = string(config)

if data.ResourceList == nil || len(data.ResourceList) == 0 {
// if we don't have policies we should add the disabled label for the device plugin
err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginLabel, constants.SriovDevicePluginLabelDisabled, r.Client)
if err != nil {
logger.Error(err, "failed to label node for device plugin label",
"labelKey",
constants.SriovDevicePluginLabel,
"labelValue",
constants.SriovDevicePluginLabelDisabled)
return err
}
} else {
// if we have policies we should add the enabled label for the device plugin
err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginLabel, constants.SriovDevicePluginLabelEnabled, r.Client)
if err != nil {
logger.Error(err, "failed to label node for device plugin label",
"labelKey",
constants.SriovDevicePluginLabel,
"labelValue",
constants.SriovDevicePluginLabelEnabled)
return err
}
}
}

cm := &corev1.ConfigMap{
Expand Down
8 changes: 4 additions & 4 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ import (
machinev1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
apply "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply"
consts "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms"
render "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
)

Expand Down Expand Up @@ -140,7 +140,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
return reconcile.Result{}, err
}

if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig, policyList); err != nil {
if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig); err != nil {
return reconcile.Result{}, err
}

Expand Down
52 changes: 2 additions & 50 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package controllers

import (
"context"
"fmt"
"os"
"strings"
"sync"
Expand Down Expand Up @@ -30,7 +29,7 @@ import (
mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
util "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util"
)

var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
Expand Down Expand Up @@ -427,7 +426,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
metricsDaemonset := appsv1.DaemonSet{}
err := util.WaitForNamespacedObject(&metricsDaemonset, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(metricsDaemonset.Spec.Template.Spec.NodeSelector).To((Equal(nodeSelector)))
g.Expect(metricsDaemonset.Spec.Template.Spec.NodeSelector).To(Equal(nodeSelector))
}).Should(Succeed())
})

Expand Down Expand Up @@ -521,53 +520,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n")))
}, "1s").Should(Succeed())
})

It("should reconcile to a converging state when multiple node policies are set", func() {
By("Creating a consistent number of node policies")
for i := 0; i < 30; i++ {
p := &sriovnetworkv1.SriovNetworkNodePolicy{
ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: fmt.Sprintf("p%d", i)},
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
Priority: 99,
NodeSelector: map[string]string{"foo": fmt.Sprintf("v%d", i)},
},
}
err := k8sClient.Create(context.Background(), p)
Expect(err).NotTo(HaveOccurred())
}

By("Triggering a the reconcile loop")
config := &sriovnetworkv1.SriovOperatorConfig{}
err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config)
Expect(err).NotTo(HaveOccurred())
if config.ObjectMeta.Labels == nil {
config.ObjectMeta.Labels = make(map[string]string)
}
config.ObjectMeta.Labels["trigger-test"] = "test-reconcile-daemonset"
err = k8sClient.Update(context.Background(), config)
Expect(err).NotTo(HaveOccurred())

By("Wait until device-plugin Daemonset's affinity has been calculated")
var expectedAffinity *corev1.Affinity

Eventually(func(g Gomega) {
daemonSet := &appsv1.DaemonSet{}
err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet)
g.Expect(err).NotTo(HaveOccurred())
// Wait until the last policy (with NodeSelector foo=v29) has been considered at least one time
g.Expect(daemonSet.Spec.Template.Spec.Affinity.String()).To(ContainSubstring("v29"))
expectedAffinity = daemonSet.Spec.Template.Spec.Affinity
}, "3s", "1s").Should(Succeed())

By("Verify device-plugin Daemonset's affinity doesn't change over time")
Consistently(func(g Gomega) {
daemonSet := &appsv1.DaemonSet{}
err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(daemonSet.Spec.Template.Spec.Affinity).
To(Equal(expectedAffinity))
}, "3s", "1s").Should(Succeed())
})
})
})

Expand Down
7 changes: 7 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ var _ = BeforeSuite(func() {
}
Expect(k8sClient.Create(context.Background(), ns)).Should(Succeed())

sa := &corev1.ServiceAccount{TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{
Name: "default",
Namespace: testNamespace,
}}
Expect(k8sClient.Create(context.Background(), sa)).Should(Succeed())

// Create openshift Infrastructure
infra := &openshiftconfigv1.Infrastructure{
ObjectMeta: metav1.ObjectMeta{
Expand Down
6 changes: 0 additions & 6 deletions deploy/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "watch", "patch", "update"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["*"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get"]
- apiGroups: [ "config.openshift.io" ]
resources: [ "infrastructures" ]
verbs: [ "get", "list", "watch" ]
12 changes: 4 additions & 8 deletions deploy/role.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
creationTimestamp: null
name: sriov-network-operator
rules:
- apiGroups:
Expand Down Expand Up @@ -76,13 +75,10 @@ rules:
resources:
- pods
verbs:
- '*'
- apiGroups:
- apps
resources:
- daemonsets
verbs:
- '*'
- "get"
- "list"
- "watch"
- "delete"
- apiGroups:
- sriovnetwork.openshift.io
resources:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,6 @@ rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "watch", "patch", "update"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["*"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get"]
- apiGroups: [ "config.openshift.io" ]
resources: [ "infrastructures" ]
verbs: [ "get", "list", "watch" ]
11 changes: 4 additions & 7 deletions deployment/sriov-network-operator-chart/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,10 @@ rules:
resources:
- pods
verbs:
- '*'
- apiGroups:
- apps
resources:
- daemonsets
verbs:
- '*'
- "get"
- "list"
- "watch"
- "delete"
- apiGroups:
- sriovnetwork.openshift.io
resources:
Expand Down
4 changes: 4 additions & 0 deletions pkg/consts/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ const (
MachineConfigPoolPausedAnnotationIdle = "Idle"
MachineConfigPoolPausedAnnotationPaused = "Paused"

SriovDevicePluginLabel = "sriovnetwork.openshift.io/device-plugin"
SriovDevicePluginLabelEnabled = "Enabled"
SriovDevicePluginLabelDisabled = "Disabled"

NodeDrainAnnotation = "sriovnetwork.openshift.io/state"
NodeStateDrainAnnotation = "sriovnetwork.openshift.io/desired-state"
NodeStateDrainAnnotationCurrent = "sriovnetwork.openshift.io/current-state"
Expand Down
46 changes: 42 additions & 4 deletions pkg/utils/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,17 @@ func ObjectHasAnnotation(obj metav1.Object, annoKey string, value string) bool {

// AnnotateObject adds annotation to a kubernetes object
func AnnotateObject(ctx context.Context, obj client.Object, key, value string, c client.Client) error {
log.Log.V(2).Info("AnnotateObject(): Annotate object",
"objectName", obj.GetName(),
"objectKind", obj.GetObjectKind(),
"annotation", value)
newObj := obj.DeepCopyObject().(client.Object)
if newObj.GetAnnotations() == nil {
newObj.SetAnnotations(map[string]string{})
}

if newObj.GetAnnotations()[key] != value {
log.Log.V(2).Info("AnnotateObject(): Annotate object",
"objectName", obj.GetName(),
"objectKind", obj.GetObjectKind(),
"annotationKey", key,
"annotationValue", value)
newObj.GetAnnotations()[key] = value
patch := client.MergeFrom(obj)
err := c.Patch(ctx,
Expand All @@ -161,3 +162,40 @@ func AnnotateNode(ctx context.Context, nodeName string, key, value string, c cli

return AnnotateObject(ctx, node, key, value, c)
}

// LabelObject adds label to a kubernetes object
func LabelObject(ctx context.Context, obj client.Object, key, value string, c client.Client) error {
newObj := obj.DeepCopyObject().(client.Object)
if newObj.GetLabels() == nil {
newObj.SetLabels(map[string]string{})
}

if newObj.GetLabels()[key] != value {
log.Log.V(2).Info("LabelObject(): label object",
"objectName", obj.GetName(),
"objectKind", obj.GetObjectKind(),
"labelKey", key,
"labelValue", value)
newObj.GetLabels()[key] = value
patch := client.MergeFrom(obj)
err := c.Patch(ctx,
newObj, patch)
if err != nil {
log.Log.Error(err, "LabelObject(): Failed to patch object")
return err
}
}

return nil
}

// LabelNode add label to a node
func LabelNode(ctx context.Context, nodeName string, key, value string, c client.Client) error {
node := &corev1.Node{}
err := c.Get(context.TODO(), client.ObjectKey{Name: nodeName}, node)
if err != nil {
return err
}

return LabelObject(ctx, node, key, value, c)
}

0 comments on commit 31c7fee

Please sign in to comment.