Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into sync-20240201
Browse files Browse the repository at this point in the history
  • Loading branch information
zeeke committed Feb 8, 2024
2 parents e1d5400 + f8405cf commit 3db20ae
Show file tree
Hide file tree
Showing 30 changed files with 772 additions and 302 deletions.
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,37 @@ This feature was created to support deployments where the user want to use some
communication like storage network or out of band managment and the virtual functions must exist on boot and not only
after the operator and config-daemon are running.

#### Disabling SR-IOV Config Daemon plugins

It is possible to disable SR-IOV network operator config daemon plugins in case their operation
is not needed or un-desirable.

As an example, some plugins perform vendor specific firmware configuration
to enable SR-IOV (e.g `mellanox` plugin). certain deployment environments may prefer to perform such configuration
once during node provisioning, while ensuring the configuration will be compatible with any sriov network node policy
defined for the particular environment. This will reduce or completely eliminate the need for reboot of nodes during SR-IOV
configurations by the operator.

This can be done by setting SriovOperatorConfig `default` CR `spec.disablePlugins` with the list of desired plugins
to disable.

**Example**:

```yaml
apiVersion: sriovnetwork.openshift.io/v1
kind: SriovOperatorConfig
metadata:
name: default
namespace: sriov-network-operator
spec:
...
disablePlugins:
- mellanox
...
```

> **NOTE**: Currently only `mellanox` plugin can be disabled.

## Components and design

This operator is split into 2 components:
Expand Down
18 changes: 18 additions & 0 deletions api/v1/sriovoperatorconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// PluginNameValue defines the plugin name
// +kubebuilder:validation:Enum=mellanox
type PluginNameValue string

// PluginNameSlice defines a slice of PluginNameValue
type PluginNameSlice []PluginNameValue

// ToStringSlice converts PluginNameSlice to string slice
func (pns PluginNameSlice) ToStringSlice() []string {
ss := make([]string, 0, len(pns))
for _, v := range pns {
ss = append(ss, string(v))
}
return ss
}

// SriovOperatorConfigSpec defines the desired state of SriovOperatorConfig
type SriovOperatorConfigSpec struct {
// NodeSelector selects the nodes to be configured
Expand All @@ -45,6 +61,8 @@ type SriovOperatorConfigSpec struct {
ConfigurationMode ConfigurationModeType `json:"configurationMode,omitempty"`
// Flag to enable Container Device Interface mode for SR-IOV Network Device Plugin
UseCDI bool `json:"useCDI,omitempty"`
// DisablePlugins is a list of sriov-network-config-daemon plugins to disable
DisablePlugins PluginNameSlice `json:"disablePlugins,omitempty"`
}

// SriovOperatorConfigStatus defines the observed state of SriovOperatorConfig
Expand Down
24 changes: 24 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions bindata/manifests/daemon/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ spec:
{{- if .UsedSystemdMode}}
- --use-systemd-service
{{- end }}
{{- with index . "DisablePlugins" }}
- --disable-plugins={{.}}
{{- end }}
env:
- name: NODE_NAME
valueFrom:
Expand Down
38 changes: 35 additions & 3 deletions cmd/sriov-network-config-daemon/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,29 @@ import (
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
)

// stringList is a list of strings, implements pflag.Value interface
type stringList []string

func (sl *stringList) String() string {
return strings.Join(*sl, ",")
}

func (sl *stringList) Set(arg string) error {
elems := strings.Split(arg, ",")

for _, elem := range elems {
if len(elem) == 0 {
return fmt.Errorf("empty plugin name")
}
*sl = append(*sl, elem)
}
return nil
}

func (sl *stringList) Type() string {
return "CommaSeparatedString"
}

var (
startCmd = &cobra.Command{
Use: "start",
Expand All @@ -55,9 +78,10 @@ var (
}

startOpts struct {
kubeconfig string
nodeName string
systemd bool
kubeconfig string
nodeName string
systemd bool
disabledPlugins stringList
}
)

Expand All @@ -66,6 +90,7 @@ func init() {
startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)")
startCmd.PersistentFlags().StringVar(&startOpts.nodeName, "node-name", "", "kubernetes node name daemon is managing")
startCmd.PersistentFlags().BoolVar(&startOpts.systemd, "use-systemd-service", false, "use config daemon in systemd mode")
startCmd.PersistentFlags().VarP(&startOpts.disabledPlugins, "disable-plugins", "", "comma-separated list of plugins to disable")
}

func runStartCmd(cmd *cobra.Command, args []string) error {
Expand All @@ -88,6 +113,12 @@ func runStartCmd(cmd *cobra.Command, args []string) error {
}
vars.NodeName = startOpts.nodeName

for _, p := range startOpts.disabledPlugins {
if _, ok := vars.DisableablePlugins[p]; !ok {
return fmt.Errorf("%s plugin cannot be disabled", p)
}
}

// This channel is used to ensure all spawned goroutines exit when we exit.
stopCh := make(chan struct{})
defer close(stopCh)
Expand Down Expand Up @@ -243,6 +274,7 @@ func runStartCmd(cmd *cobra.Command, args []string) error {
syncCh,
refreshCh,
eventRecorder,
startOpts.disabledPlugins,
).Run(stopCh, exitCh)
if err != nil {
setupLog.Error(err, "failed to run daemon")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ spec:
disableDrain:
description: Flag to disable nodes drain during debugging
type: boolean
disablePlugins:
description: DisablePlugins is a list of sriov-network-config-daemon
plugins to disable
items:
description: PluginNameValue defines the plugin name
enum:
- mellanox
type: string
type: array
enableInjector:
description: Flag to control whether the network resource injector
webhook shall be deployed
Expand Down
67 changes: 50 additions & 17 deletions controllers/sriovibnetwork_controller_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package controllers

import (
goctx "context"
"context"
"fmt"
"io"
"strings"
"sync"
"time"

netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
Expand All @@ -21,7 +22,39 @@ import (
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util"
)

var _ = Describe("SriovIBNetwork Controller", func() {
var _ = Describe("SriovIBNetwork Controller", Ordered, func() {
var cancel context.CancelFunc
var ctx context.Context

BeforeAll(func() {
By("Setup controller manager")
k8sManager, err := setupK8sManagerForTest()
Expect(err).ToNot(HaveOccurred())

err = (&SriovIBNetworkReconciler{
Client: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
}).SetupWithManager(k8sManager)
Expect(err).ToNot(HaveOccurred())

ctx, cancel = context.WithCancel(context.Background())

wg := sync.WaitGroup{}
wg.Add(1)
go func() {
defer wg.Done()
defer GinkgoRecover()
By("Start controller manager")
err := k8sManager.Start(ctx)
Expect(err).ToNot(HaveOccurred())
}()

DeferCleanup(func() {
By("Shutdown controller manager")
cancel()
wg.Wait()
})
})

Context("with SriovIBNetwork", func() {
specs := map[string]sriovnetworkv1.SriovIBNetworkSpec{
Expand Down Expand Up @@ -52,7 +85,7 @@ var _ = Describe("SriovIBNetwork Controller", func() {

By("Create the SriovIBNetwork Custom Resource")
// get global framework variables
err = k8sClient.Create(goctx.TODO(), &cr)
err = k8sClient.Create(ctx, &cr)
Expect(err).NotTo(HaveOccurred())
ns := testNamespace
if cr.Spec.NetworkNamespace != "" {
Expand All @@ -68,9 +101,9 @@ var _ = Describe("SriovIBNetwork Controller", func() {

By("Delete the SriovIBNetwork Custom Resource")
found := &sriovnetworkv1.SriovIBNetwork{}
err = k8sClient.Get(goctx.TODO(), types.NamespacedName{Namespace: cr.GetNamespace(), Name: cr.GetName()}, found)
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: cr.GetNamespace(), Name: cr.GetName()}, found)
Expect(err).NotTo(HaveOccurred())
err = k8sClient.Delete(goctx.TODO(), found, []dynclient.DeleteOption{}...)
err = k8sClient.Delete(ctx, found, []dynclient.DeleteOption{}...)
Expect(err).NotTo(HaveOccurred())

netAttDef = &netattdefv1.NetworkAttachmentDefinition{}
Expand Down Expand Up @@ -98,25 +131,25 @@ var _ = Describe("SriovIBNetwork Controller", func() {
DescribeTable("should be possible to update net-att-def",
func(old, new sriovnetworkv1.SriovIBNetwork) {
old.Name = new.GetName()
err := k8sClient.Create(goctx.TODO(), &old)
err := k8sClient.Create(ctx, &old)
Expect(err).NotTo(HaveOccurred())
defer func() {
// Cleanup the test resource
Expect(k8sClient.Delete(goctx.TODO(), &old)).To(Succeed())
Expect(k8sClient.Delete(ctx, &old)).To(Succeed())
}()
found := &sriovnetworkv1.SriovIBNetwork{}
expect := generateExpectedIBNetConfig(&new)

retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
// Retrieve the latest version of SriovIBNetwork before attempting update
// RetryOnConflict uses exponential backoff to avoid exhausting the apiserver
getErr := k8sClient.Get(goctx.TODO(), types.NamespacedName{Namespace: old.GetNamespace(), Name: old.GetName()}, found)
getErr := k8sClient.Get(ctx, types.NamespacedName{Namespace: old.GetNamespace(), Name: old.GetName()}, found)
if getErr != nil {
io.WriteString(GinkgoWriter, fmt.Sprintf("Failed to get latest version of SriovIBNetwork: %v", getErr))
}
found.Spec = new.Spec
found.Annotations = new.Annotations
updateErr := k8sClient.Update(goctx.TODO(), found)
updateErr := k8sClient.Update(ctx, found)
if getErr != nil {
io.WriteString(GinkgoWriter, fmt.Sprintf("Failed to update latest version of SriovIBNetwork: %v", getErr))
}
Expand Down Expand Up @@ -164,7 +197,7 @@ var _ = Describe("SriovIBNetwork Controller", func() {
var err error
expect := generateExpectedIBNetConfig(&cr)

err = k8sClient.Create(goctx.TODO(), &cr)
err = k8sClient.Create(ctx, &cr)
Expect(err).NotTo(HaveOccurred())
ns := testNamespace
if cr.Spec.NetworkNamespace != "" {
Expand All @@ -174,7 +207,7 @@ var _ = Describe("SriovIBNetwork Controller", func() {
err = util.WaitForNamespacedObject(netAttDef, k8sClient, ns, cr.GetName(), util.RetryInterval, util.Timeout)
Expect(err).NotTo(HaveOccurred())

err = k8sClient.Delete(goctx.TODO(), netAttDef)
err = k8sClient.Delete(ctx, netAttDef)
Expect(err).NotTo(HaveOccurred())
time.Sleep(3 * time.Second)
err = util.WaitForNamespacedObject(netAttDef, k8sClient, ns, cr.GetName(), util.RetryInterval, util.Timeout)
Expand All @@ -184,9 +217,9 @@ var _ = Describe("SriovIBNetwork Controller", func() {
Expect(strings.TrimSpace(netAttDef.Spec.Config)).To(Equal(expect))

found := &sriovnetworkv1.SriovIBNetwork{}
err = k8sClient.Get(goctx.TODO(), types.NamespacedName{Namespace: cr.GetNamespace(), Name: cr.GetName()}, found)
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: cr.GetNamespace(), Name: cr.GetName()}, found)
Expect(err).NotTo(HaveOccurred())
err = k8sClient.Delete(goctx.TODO(), found, []dynclient.DeleteOption{}...)
err = k8sClient.Delete(ctx, found, []dynclient.DeleteOption{}...)
Expect(err).NotTo(HaveOccurred())
})
})
Expand All @@ -207,11 +240,11 @@ var _ = Describe("SriovIBNetwork Controller", func() {
var err error
expect := generateExpectedIBNetConfig(&cr)

err = k8sClient.Create(goctx.TODO(), &cr)
err = k8sClient.Create(ctx, &cr)
Expect(err).NotTo(HaveOccurred())

DeferCleanup(func() {
err = k8sClient.Delete(goctx.TODO(), &cr)
err = k8sClient.Delete(ctx, &cr)
Expect(err).NotTo(HaveOccurred())
})

Expand All @@ -227,10 +260,10 @@ var _ = Describe("SriovIBNetwork Controller", func() {
nsObj := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{Name: "ib-ns-xxx"},
}
err = k8sClient.Create(goctx.TODO(), nsObj)
err = k8sClient.Create(ctx, nsObj)
Expect(err).NotTo(HaveOccurred())
DeferCleanup(func() {
err = k8sClient.Delete(goctx.TODO(), nsObj)
err = k8sClient.Delete(ctx, nsObj)
Expect(err).NotTo(HaveOccurred())
})

Expand Down
Loading

0 comments on commit 3db20ae

Please sign in to comment.