Skip to content

Commit

Permalink
Merge pull request #569 from adrianchiris/add-disable-plugins
Browse files Browse the repository at this point in the history
Add disable plugins
  • Loading branch information
zeeke authored Feb 2, 2024
2 parents 1a3019e + baa18d0 commit e409337
Show file tree
Hide file tree
Showing 20 changed files with 428 additions and 91 deletions.
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,37 @@ This feature was created to support deployments where the user want to use some
communication like storage network or out of band managment and the virtual functions must exist on boot and not only
after the operator and config-daemon are running.

#### Disabling SR-IOV Config Daemon plugins

It is possible to disable SR-IOV network operator config daemon plugins in case their operation
is not needed or un-desirable.

As an example, some plugins perform vendor specific firmware configuration
to enable SR-IOV (e.g `mellanox` plugin). certain deployment environments may prefer to perform such configuration
once during node provisioning, while ensuring the configuration will be compatible with any sriov network node policy
defined for the particular environment. This will reduce or completely eliminate the need for reboot of nodes during SR-IOV
configurations by the operator.

This can be done by setting SriovOperatorConfig `default` CR `spec.disablePlugins` with the list of desired plugins
to disable.

**Example**:

```yaml
apiVersion: sriovnetwork.openshift.io/v1
kind: SriovOperatorConfig
metadata:
name: default
namespace: sriov-network-operator
spec:
...
disablePlugins:
- mellanox
...
```

> **NOTE**: Currently only `mellanox` plugin can be disabled.

## Components and design

This operator is split into 2 components:
Expand Down
18 changes: 18 additions & 0 deletions api/v1/sriovoperatorconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// PluginNameValue defines the plugin name
// +kubebuilder:validation:Enum=mellanox
type PluginNameValue string

// PluginNameSlice defines a slice of PluginNameValue
type PluginNameSlice []PluginNameValue

// ToStringSlice converts PluginNameSlice to string slice
func (pns PluginNameSlice) ToStringSlice() []string {
ss := make([]string, 0, len(pns))
for _, v := range pns {
ss = append(ss, string(v))
}
return ss
}

// SriovOperatorConfigSpec defines the desired state of SriovOperatorConfig
type SriovOperatorConfigSpec struct {
// NodeSelector selects the nodes to be configured
Expand All @@ -45,6 +61,8 @@ type SriovOperatorConfigSpec struct {
ConfigurationMode ConfigurationModeType `json:"configurationMode,omitempty"`
// Flag to enable Container Device Interface mode for SR-IOV Network Device Plugin
UseCDI bool `json:"useCDI,omitempty"`
// DisablePlugins is a list of sriov-network-config-daemon plugins to disable
DisablePlugins PluginNameSlice `json:"disablePlugins,omitempty"`
}

// SriovOperatorConfigStatus defines the observed state of SriovOperatorConfig
Expand Down
24 changes: 24 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions bindata/manifests/daemon/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ spec:
{{- if .UsedSystemdMode}}
- --use-systemd-service
{{- end }}
{{- with index . "DisablePlugins" }}
- --disable-plugins={{.}}
{{- end }}
env:
- name: NODE_NAME
valueFrom:
Expand Down
38 changes: 35 additions & 3 deletions cmd/sriov-network-config-daemon/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,29 @@ import (
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
)

// stringList is a list of strings, implements pflag.Value interface
type stringList []string

func (sl *stringList) String() string {
return strings.Join(*sl, ",")
}

func (sl *stringList) Set(arg string) error {
elems := strings.Split(arg, ",")

for _, elem := range elems {
if len(elem) == 0 {
return fmt.Errorf("empty plugin name")
}
*sl = append(*sl, elem)
}
return nil
}

func (sl *stringList) Type() string {
return "CommaSeparatedString"
}

var (
startCmd = &cobra.Command{
Use: "start",
Expand All @@ -55,9 +78,10 @@ var (
}

startOpts struct {
kubeconfig string
nodeName string
systemd bool
kubeconfig string
nodeName string
systemd bool
disabledPlugins stringList
}
)

Expand All @@ -66,6 +90,7 @@ func init() {
startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)")
startCmd.PersistentFlags().StringVar(&startOpts.nodeName, "node-name", "", "kubernetes node name daemon is managing")
startCmd.PersistentFlags().BoolVar(&startOpts.systemd, "use-systemd-service", false, "use config daemon in systemd mode")
startCmd.PersistentFlags().VarP(&startOpts.disabledPlugins, "disable-plugins", "", "comma-separated list of plugins to disable")
}

func runStartCmd(cmd *cobra.Command, args []string) error {
Expand All @@ -88,6 +113,12 @@ func runStartCmd(cmd *cobra.Command, args []string) error {
}
vars.NodeName = startOpts.nodeName

for _, p := range startOpts.disabledPlugins {
if _, ok := vars.DisableablePlugins[p]; !ok {
return fmt.Errorf("%s plugin cannot be disabled", p)
}
}

// This channel is used to ensure all spawned goroutines exit when we exit.
stopCh := make(chan struct{})
defer close(stopCh)
Expand Down Expand Up @@ -243,6 +274,7 @@ func runStartCmd(cmd *cobra.Command, args []string) error {
syncCh,
refreshCh,
eventRecorder,
startOpts.disabledPlugins,
).Run(stopCh, exitCh)
if err != nil {
setupLog.Error(err, "failed to run daemon")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ spec:
disableDrain:
description: Flag to disable nodes drain during debugging
type: boolean
disablePlugins:
description: DisablePlugins is a list of sriov-network-config-daemon
plugins to disable
items:
description: PluginNameValue defines the plugin name
enum:
- mellanox
type: string
type: array
enableInjector:
description: Flag to control whether the network resource injector
webhook shall be deployed
Expand Down
6 changes: 6 additions & 0 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context,
logger.V(1).Info("New cni bin found", "CNIBinPath", envCniBinPath)
data.Data["CNIBinPath"] = envCniBinPath
}

if len(dc.Spec.DisablePlugins) > 0 {
logger.V(1).Info("DisablePlugins provided", "DisablePlugins", dc.Spec.DisablePlugins)
data.Data["DisablePlugins"] = strings.Join(dc.Spec.DisablePlugins.ToStringSlice(), ",")
}

objs, err := render.RenderDir(consts.ConfigDaemonPath, &data)
if err != nil {
logger.Error(err, "Fail to render config daemon manifests")
Expand Down
38 changes: 36 additions & 2 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package controllers

import (
goctx "context"
"strings"

admv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -32,11 +33,11 @@ var _ = Describe("Operator", func() {

It("should have webhook enable", func() {
mutateCfg := &admv1.MutatingWebhookConfiguration{}
err := util.WaitForNamespacedObject(mutateCfg, k8sClient, testNamespace, "sriov-operator-webhook-config", util.RetryInterval, util.APITimeout)
err := util.WaitForNamespacedObject(mutateCfg, k8sClient, testNamespace, "sriov-operator-webhook-config", util.RetryInterval, util.APITimeout*3)
Expect(err).NotTo(HaveOccurred())

validateCfg := &admv1.ValidatingWebhookConfiguration{}
err = util.WaitForNamespacedObject(validateCfg, k8sClient, testNamespace, "sriov-operator-webhook-config", util.RetryInterval, util.APITimeout)
err = util.WaitForNamespacedObject(validateCfg, k8sClient, testNamespace, "sriov-operator-webhook-config", util.RetryInterval, util.APITimeout*3)
Expect(err).NotTo(HaveOccurred())
})

Expand Down Expand Up @@ -173,5 +174,38 @@ var _ = Describe("Operator", func() {
}, util.APITimeout, util.RetryInterval).Should(Equal(config.Spec.ConfigDaemonNodeSelector))
})

It("should not render disable-plugins cmdline flag of sriov-network-config-daemon if disablePlugin not provided in spec", func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
err := util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

Eventually(func() string {
daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(goctx.TODO(), types.NamespacedName{Name: "sriov-network-config-daemon", Namespace: testNamespace}, daemonSet)
if err != nil {
return ""
}
return strings.Join(daemonSet.Spec.Template.Spec.Containers[0].Args, " ")
}, util.APITimeout*10, util.RetryInterval).Should(And(Not(BeEmpty()), Not(ContainSubstring("disable-plugins"))))
})

It("should render disable-plugins cmdline flag of sriov-network-config-daemon if disablePlugin provided in spec", func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
err := util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

config.Spec.DisablePlugins = sriovnetworkv1.PluginNameSlice{"mellanox"}
err = k8sClient.Update(goctx.TODO(), config)
Expect(err).NotTo(HaveOccurred())

Eventually(func() string {
daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(goctx.TODO(), types.NamespacedName{Name: "sriov-network-config-daemon", Namespace: testNamespace}, daemonSet)
if err != nil {
return ""
}
return strings.Join(daemonSet.Spec.Template.Spec.Containers[0].Args, " ")
}, util.APITimeout*10, util.RetryInterval).Should(ContainSubstring("disable-plugins=mellanox"))
})
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ spec:
disableDrain:
description: Flag to disable nodes drain during debugging
type: boolean
disablePlugins:
description: DisablePlugins is a list of sriov-network-config-daemon
plugins to disable
items:
description: PluginNameValue defines the plugin name
enum:
- mellanox
type: string
type: array
enableInjector:
description: Flag to control whether the network resource injector
webhook shall be deployed
Expand Down
29 changes: 17 additions & 12 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ type Daemon struct {

nodeState *sriovnetworkv1.SriovNetworkNodeState

enabledPlugins map[string]plugin.VendorPlugin
// list of disabled plugins
disabledPlugins []string

loadedPlugins map[string]plugin.VendorPlugin

HostHelpers helper.HostHelpersInterface

Expand Down Expand Up @@ -133,6 +136,7 @@ func New(
syncCh <-chan struct{},
refreshCh chan<- Message,
er *EventRecorder,
disabledPlugins []string,
) *Daemon {
return &Daemon{
client: client,
Expand Down Expand Up @@ -165,7 +169,8 @@ func New(
workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.NewMaxOfRateLimiter(
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(updateDelay), 1)},
workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, maxUpdateBackoff)), "SriovNetworkNodeState"),
eventRecorder: er,
eventRecorder: er,
disabledPlugins: disabledPlugins,
}
}

Expand Down Expand Up @@ -508,8 +513,8 @@ func (dn *Daemon) nodeStateSyncHandler() error {
latestState.Status = updatedState.Status

// load plugins if it has not loaded
if len(dn.enabledPlugins) == 0 {
dn.enabledPlugins, err = enablePlugins(latestState, dn.HostHelpers)
if len(dn.loadedPlugins) == 0 {
dn.loadedPlugins, err = loadPlugins(latestState, dn.HostHelpers, dn.disabledPlugins)
if err != nil {
log.Log.Error(err, "nodeStateSyncHandler(): failed to enable vendor plugins")
return err
Expand All @@ -520,7 +525,7 @@ func (dn *Daemon) nodeStateSyncHandler() error {
reqDrain := false

// check if any of the plugins required to drain or reboot the node
for k, p := range dn.enabledPlugins {
for k, p := range dn.loadedPlugins {
d, r := false, false
if dn.nodeState.GetName() == "" {
log.Log.V(0).Info("nodeStateSyncHandler(): calling OnNodeStateChange for a new node state")
Expand Down Expand Up @@ -570,7 +575,7 @@ func (dn *Daemon) nodeStateSyncHandler() error {
log.Log.V(0).Info("nodeStateSyncHandler(): aggregated daemon",
"drain-required", reqDrain, "reboot-required", reqReboot, "disable-drain", dn.disableDrain)

for k, p := range dn.enabledPlugins {
for k, p := range dn.loadedPlugins {
// Skip both the general and virtual plugin apply them last
if k != GenericPluginName && k != VirtualPluginName {
err := p.Apply()
Expand Down Expand Up @@ -617,23 +622,23 @@ func (dn *Daemon) nodeStateSyncHandler() error {

if !reqReboot && !vars.UsingSystemdMode {
// For BareMetal machines apply the generic plugin
selectedPlugin, ok := dn.enabledPlugins[GenericPluginName]
selectedPlugin, ok := dn.loadedPlugins[GenericPluginName]
if ok {
// Apply generic_plugin last
// Apply generic plugin last
err = selectedPlugin.Apply()
if err != nil {
log.Log.Error(err, "nodeStateSyncHandler(): generic_plugin fail to apply")
log.Log.Error(err, "nodeStateSyncHandler(): generic plugin fail to apply")
return err
}
}

// For Virtual machines apply the virtual plugin
selectedPlugin, ok = dn.enabledPlugins[VirtualPluginName]
selectedPlugin, ok = dn.loadedPlugins[VirtualPluginName]
if ok {
// Apply virtual_plugin last
// Apply virtual plugin last
err = selectedPlugin.Apply()
if err != nil {
log.Log.Error(err, "nodeStateSyncHandler(): virtual_plugin failed to apply")
log.Log.Error(err, "nodeStateSyncHandler(): virtual plugin failed to apply")
return err
}
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/daemon/daemon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,10 @@ var _ = Describe("Config Daemon", func() {
syncCh,
refreshCh,
er,
nil,
)

sut.enabledPlugins = map[string]plugin.VendorPlugin{generic.PluginName: &fake.FakePlugin{}}
sut.loadedPlugins = map[string]plugin.VendorPlugin{generic.PluginName: &fake.FakePlugin{PluginName: "fake"}}

go func() {
defer GinkgoRecover()
Expand Down
Loading

0 comments on commit e409337

Please sign in to comment.