From b62accf18598894f13f8e473cb4253674ab1c538 Mon Sep 17 00:00:00 2001 From: Fred Rolland Date: Thu, 17 Feb 2022 13:02:23 +0200 Subject: [PATCH] Skip unsupported NICs in DiscoverSriovDevices Before creating the InterfaceExt in utils.DiscoverSriovDevices, check that the discovered NIC is a supported model as configured in the `supported-nic-ids` config map. With this logic, the plugins will not get unsupported NIC to configure. It is possible to get all the NICs discovered by using an env parameter: DEV_MODE with value TRUE in the pod of the sriov-network-config-daemon. Also, if DEV_MODE is set to TRUE, the webhook will allow unsupported NICS in `SriovNetworkNodePolicy`. Signed-off-by: Fred Rolland --- bindata/manifests/daemon/daemonset.yaml | 2 ++ .../manifests/operator-webhook/server.yaml | 2 ++ cmd/sriov-network-config-daemon/start.go | 10 ++++++- controllers/sriovoperatorconfig_controller.go | 2 ++ deploy/operator.yaml | 2 ++ doc/hacking.md | 11 +++++++ hack/env.sh | 1 + pkg/daemon/writer.go | 24 ++++++++------- pkg/utils/utils.go | 9 +++++- pkg/webhook/validate.go | 30 ++++++++++++------- pkg/webhook/validate_test.go | 22 ++++++++++++++ 11 files changed, 91 insertions(+), 24 deletions(-) diff --git a/bindata/manifests/daemon/daemonset.yaml b/bindata/manifests/daemon/daemonset.yaml index 6221d9b2a..df72795cd 100644 --- a/bindata/manifests/daemon/daemonset.yaml +++ b/bindata/manifests/daemon/daemonset.yaml @@ -74,6 +74,8 @@ spec: fieldPath: metadata.namespace - name: CLUSTER_TYPE value: "{{.ClusterType}}" + - name: DEV_MODE + value: "{{.DevMode}}" resources: requests: cpu: 100m diff --git a/bindata/manifests/operator-webhook/server.yaml b/bindata/manifests/operator-webhook/server.yaml index afe029e14..460f85240 100644 --- a/bindata/manifests/operator-webhook/server.yaml +++ b/bindata/manifests/operator-webhook/server.yaml @@ -58,6 +58,8 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + - name: DEV_MODE + value: "{{.DevMode}}" resources: requests: cpu: 10m diff --git a/cmd/sriov-network-config-daemon/start.go b/cmd/sriov-network-config-daemon/start.go index 48c60ba0f..cd01d57ef 100644 --- a/cmd/sriov-network-config-daemon/start.go +++ b/cmd/sriov-network-config-daemon/start.go @@ -130,8 +130,16 @@ func runStartCmd(cmd *cobra.Command, args []string) { config.Timeout = 5 * time.Second writerclient := snclientset.NewForConfigOrDie(config) + + mode := os.Getenv("DEV_MODE") + devMode := false + if mode == "TRUE" { + devMode = true + glog.V(0).Info("dev mode enabled") + } + glog.V(0).Info("starting node writer") - nodeWriter := daemon.NewNodeStateStatusWriter(writerclient, startOpts.nodeName, closeAllConns) + nodeWriter := daemon.NewNodeStateStatusWriter(writerclient, startOpts.nodeName, closeAllConns, devMode) destdir := os.Getenv("DEST_DIR") if destdir == "" { diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index bee4bbfec..9b4d43742 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -179,6 +179,7 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(dc *sriovnetworkv1.S data.Data["SRIOVInfiniBandCNIImage"] = os.Getenv("SRIOV_INFINIBAND_CNI_IMAGE") data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") data.Data["ClusterType"] = utils.ClusterType + data.Data["DevMode"] = os.Getenv("DEV_MODE") envCniBinPath := os.Getenv("SRIOV_CNI_BIN_PATH") if envCniBinPath == "" { data.Data["CNIBinPath"] = "/var/lib/cni/bin" @@ -231,6 +232,7 @@ func (r *SriovOperatorConfigReconciler) syncWebhookObjs(dc *sriovnetworkv1.Sriov data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") data.Data["ClusterType"] = utils.ClusterType data.Data["CaBundle"] = os.Getenv("WEBHOOK_CA_BUNDLE") + data.Data["DevMode"] = os.Getenv("DEV_MODE") objs, err := render.RenderDir(path, &data) if err != nil { logger.Error(err, "Fail to render webhook manifests") diff --git a/deploy/operator.yaml b/deploy/operator.yaml index ad450dc01..b92e23c46 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -63,6 +63,8 @@ spec: value: $RESOURCE_PREFIX - name: ENABLE_ADMISSION_CONTROLLER value: "$ENABLE_ADMISSION_CONTROLLER" + - name: DEV_MODE + value: "$DEV_MODE" - name: NAMESPACE valueFrom: fieldRef: diff --git a/doc/hacking.md b/doc/hacking.md index 5b6be6f9e..2929d4fd9 100644 --- a/doc/hacking.md +++ b/doc/hacking.md @@ -97,3 +97,14 @@ Before deploying the Operator, you want to export these variables to use that cu ```bash export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE= (...) + +## Enable Unsupported NICs + +By default, unsupported NICs are not reported in `SriovNetworkNodeState` and +are not allowed in `SriovNetworkNodePolicy` by the webhook. + +If you want to allow unsupported NICs, set the `DEV_MODE` env var to `TRUE`. + +```bash +export DEV_MODE=TRUE +``` \ No newline at end of file diff --git a/hack/env.sh b/hack/env.sh index 52f8f7097..30d24980c 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -35,3 +35,4 @@ export ENABLE_ADMISSION_CONTROLLER=${ENABLE_ADMISSION_CONTROLLER:-"true"} export CLUSTER_TYPE=${CLUSTER_TYPE:-openshift} export NAMESPACE=${NAMESPACE:-"openshift-sriov-network-operator"} export WEBHOOK_CA_BUNDLE=${WEBHOOK_CA_BUNDLE:-""} +export DEV_MODE=${DEV_MODE:-"FALSE"} diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index eb6684631..2c4686fbe 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -23,20 +23,22 @@ const ( ) type NodeStateStatusWriter struct { - client snclientset.Interface - node string - status sriovnetworkv1.SriovNetworkNodeStateStatus - OnHeartbeatFailure func() - metaData *utils.OSPMetaData - networkData *utils.OSPNetworkData + client snclientset.Interface + node string + status sriovnetworkv1.SriovNetworkNodeStateStatus + OnHeartbeatFailure func() + metaData *utils.OSPMetaData + networkData *utils.OSPNetworkData + withUnsupportedDevices bool } // NewNodeStateStatusWriter Create a new NodeStateStatusWriter -func NewNodeStateStatusWriter(c snclientset.Interface, n string, f func()) *NodeStateStatusWriter { +func NewNodeStateStatusWriter(c snclientset.Interface, n string, f func(), devMode bool) *NodeStateStatusWriter { return &NodeStateStatusWriter{ - client: c, - node: n, - OnHeartbeatFailure: f, + client: c, + node: n, + OnHeartbeatFailure: f, + withUnsupportedDevices: devMode, } } @@ -100,7 +102,7 @@ func (writer *NodeStateStatusWriter) pollNicStatus(platformType utils.PlatformTy if platformType == utils.VirtualOpenStack { iface, err = utils.DiscoverSriovDevicesVirtual(platformType, writer.metaData, writer.networkData) } else { - iface, err = utils.DiscoverSriovDevices() + iface, err = utils.DiscoverSriovDevices(writer.withUnsupportedDevices) } if err != nil { return err diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index af7a9c267..f834c2659 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -48,7 +48,7 @@ func init() { ClusterType = os.Getenv("CLUSTER_TYPE") } -func DiscoverSriovDevices() ([]sriovnetworkv1.InterfaceExt, error) { +func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, error) { glog.V(2).Info("DiscoverSriovDevices") pfList := []sriovnetworkv1.InterfaceExt{} @@ -96,6 +96,13 @@ func DiscoverSriovDevices() ([]sriovnetworkv1.InterfaceExt, error) { continue } + if !withUnsupported { + if !sriovnetworkv1.IsSupportedModel(device.Vendor.ID, device.Product.ID) { + glog.Infof("DiscoverSriovDevices(): unsupported device %+v", device) + continue + } + } + iface := sriovnetworkv1.InterfaceExt{ PciAddress: device.Address, Driver: driver, diff --git a/pkg/webhook/validate.go b/pkg/webhook/validate.go index 1a2b6920c..09119cbf5 100644 --- a/pkg/webhook/validate.go +++ b/pkg/webhook/validate.go @@ -96,18 +96,26 @@ func staticValidateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePol return false, fmt.Errorf("at least one of these parameters (vendor, deviceID, pfNames, rootDevices or netFilter) has to be defined in nicSelector in CR %s", cr.GetName()) } - if cr.Spec.NicSelector.Vendor != "" { - if !sriovnetworkv1.IsSupportedVendor(cr.Spec.NicSelector.Vendor) { - return false, fmt.Errorf("vendor %s is not supported", cr.Spec.NicSelector.Vendor) - } - if cr.Spec.NicSelector.DeviceID != "" { - if !sriovnetworkv1.IsSupportedModel(cr.Spec.NicSelector.Vendor, cr.Spec.NicSelector.DeviceID) { - return false, fmt.Errorf("vendor/device %s/%s is not supported", cr.Spec.NicSelector.Vendor, cr.Spec.NicSelector.DeviceID) + devMode := false + if os.Getenv("DEV_MODE") == "TRUE" { + devMode = true + glog.V(0).Info("dev mode enabled - Admitting not supported NICs") + } + + if !devMode { + if cr.Spec.NicSelector.Vendor != "" { + if !sriovnetworkv1.IsSupportedVendor(cr.Spec.NicSelector.Vendor) { + return false, fmt.Errorf("vendor %s is not supported", cr.Spec.NicSelector.Vendor) + } + if cr.Spec.NicSelector.DeviceID != "" { + if !sriovnetworkv1.IsSupportedModel(cr.Spec.NicSelector.Vendor, cr.Spec.NicSelector.DeviceID) { + return false, fmt.Errorf("vendor/device %s/%s is not supported", cr.Spec.NicSelector.Vendor, cr.Spec.NicSelector.DeviceID) + } + } + } else if cr.Spec.NicSelector.DeviceID != "" { + if !sriovnetworkv1.IsSupportedDevice(cr.Spec.NicSelector.DeviceID) { + return false, fmt.Errorf("device %s is not supported", cr.Spec.NicSelector.DeviceID) } - } - } else if cr.Spec.NicSelector.DeviceID != "" { - if !sriovnetworkv1.IsSupportedDevice(cr.Spec.NicSelector.DeviceID) { - return false, fmt.Errorf("device %s is not supported", cr.Spec.NicSelector.DeviceID) } } diff --git a/pkg/webhook/validate_test.go b/pkg/webhook/validate_test.go index 2de71ddbe..8324bc669 100644 --- a/pkg/webhook/validate_test.go +++ b/pkg/webhook/validate_test.go @@ -339,6 +339,28 @@ func TestStaticValidateSriovNetworkNodePolicyWithInvalidVendor(t *testing.T) { g.Expect(ok).To(Equal(false)) } +func TestStaticValidateSriovNetworkNodePolicyWithInvalidVendorDevMode(t *testing.T) { + t.Setenv("DEV_MODE", "TRUE") + policy := &SriovNetworkNodePolicy{ + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + Vendor: "8087", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 63, + Priority: 99, + ResourceName: "p0", + }, + } + g := NewGomegaWithT(t) + ok, err := staticValidateSriovNetworkNodePolicy(policy) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(ok).To(Equal(true)) +} + func TestStaticValidateSriovNetworkNodePolicyWithInvalidDevice(t *testing.T) { policy := &SriovNetworkNodePolicy{ Spec: SriovNetworkNodePolicySpec{