From 78bde32285684f1300d2c689b974edc9421bbe75 Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Thu, 6 Jun 2024 18:20:01 +0300 Subject: [PATCH 1/4] Remove webhook validation for InfiniBand and RDMA Since we always deploy both mutation and validating webhooks there is no need to validate policy for InfiniBand and RDMA enabled because mutating webhook will wet 'isRdma' flag to 'true' for InfiniBand'. --- pkg/webhook/validate.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/webhook/validate.go b/pkg/webhook/validate.go index 6937aecc5..95c481470 100644 --- a/pkg/webhook/validate.go +++ b/pkg/webhook/validate.go @@ -213,10 +213,6 @@ func staticValidateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePol if cr.Spec.DeviceType == consts.DeviceTypeVfioPci && cr.Spec.IsRdma { return false, fmt.Errorf("'deviceType: vfio-pci' conflicts with 'isRdma: true'; Set 'deviceType' to (string)'netdevice' Or Set 'isRdma' to (bool)'false'") } - if strings.EqualFold(cr.Spec.LinkType, consts.LinkTypeIB) && !cr.Spec.IsRdma { - return false, fmt.Errorf("'linkType: ib or IB' requires 'isRdma: true'; Set 'isRdma' to (bool)'true'") - } - // vdpa: deviceType must be set to 'netdevice' if cr.Spec.DeviceType != consts.DeviceTypeNetDevice && (cr.Spec.VdpaType == consts.VdpaTypeVirtio || cr.Spec.VdpaType == consts.VdpaTypeVhost) { return false, fmt.Errorf("'deviceType: %s' conflicts with '%s'; Set 'deviceType' to (string)'netdevice' Or Remove 'vdpaType'", cr.Spec.DeviceType, cr.Spec.VdpaType) From fb5485a9b723270ef1edd95c9d926b9bc70d8c56 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Tue, 28 May 2024 10:00:42 +0200 Subject: [PATCH 2/4] e2e: Use `helm` to deploy on k8s cluster Using `helm` to deploy the operator in vanilla k8s CI lanes helps avoid error in helm charts changes. Instead of adding a new helm e2e lane (as it would increase the CI load), we can keep the already in place lanes as: - OpenShift: use the `hack/deploy-setup.sh` script - K8s: use helm chart Signed-off-by: Andrea Panattoni --- Makefile | 6 +++++ hack/run-e2e-conformance-virtual-cluster.sh | 30 ++++++++++++++------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 1b40241c5..737f10aa2 100644 --- a/Makefile +++ b/Makefile @@ -177,6 +177,12 @@ skopeo: fakechroot: if ! which fakechroot; then if [ -f /etc/redhat-release ]; then dnf -y install fakechroot; elif [ -f /etc/lsb-release ]; then sudo apt-get -y update; sudo apt-get -y install fakechroot; fi; fi +$(BIN_DIR)/helm helm: + mkdir -p $(BIN_DIR) + curl -fsSL -o $(BIN_DIR)/get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 + chmod 700 $(BIN_DIR)/get_helm.sh + HELM_INSTALL_DIR=$(BIN_DIR) $(BIN_DIR)/get_helm.sh + deploy-setup: export ADMISSION_CONTROLLERS_ENABLED?=false deploy-setup: skopeo install hack/deploy-setup.sh $(NAMESPACE) diff --git a/hack/run-e2e-conformance-virtual-cluster.sh b/hack/run-e2e-conformance-virtual-cluster.sh index 70752b5af..8dbf51593 100755 --- a/hack/run-e2e-conformance-virtual-cluster.sh +++ b/hack/run-e2e-conformance-virtual-cluster.sh @@ -365,6 +365,8 @@ do done +source hack/env.sh + export ADMISSION_CONTROLLERS_ENABLED=true export ADMISSION_CONTROLLERS_CERTIFICATES_CERT_MANAGER_ENABLED=true export SKIP_VAR_SET="" @@ -372,12 +374,26 @@ export NAMESPACE="sriov-network-operator" export OPERATOR_NAMESPACE="sriov-network-operator" export CNI_BIN_PATH=/opt/cni/bin export OPERATOR_EXEC=kubectl -export CLUSTER_TYPE=kubernetes -export DEV_MODE=TRUE export CLUSTER_HAS_EMULATED_PF=TRUE -echo "## deploy namespace" -envsubst< $root/deploy/namespace.yaml | ${OPERATOR_EXEC} apply -f - + +HELM_VALUES_OPTS="\ + --set images.operator=${SRIOV_NETWORK_OPERATOR_IMAGE} \ + --set images.sriovConfigDaemon=${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE} \ + --set images.sriovCni=${SRIOV_CNI_IMAGE} \ + --set images.sriovDevicePlugin=${SRIOV_DEVICE_PLUGIN_IMAGE} \ + --set images.resourcesInjector=${NETWORK_RESOURCES_INJECTOR_IMAGE} \ + --set images.webhook=${SRIOV_NETWORK_WEBHOOK_IMAGE} \ + --set operator.admissionControllers.enabled=${ADMISSION_CONTROLLERS_ENABLED} \ + --set operator.admissionControllers.certificates.certManager.enabled=${ADMISSION_CONTROLLERS_CERTIFICATES_CERT_MANAGER_ENABLED} \ + --set sriovOperatorConfig.deploy=true" + +PATH=$PATH:${root}/bin +make helm +helm install -n ${NAMESPACE} --create-namespace \ + $HELM_VALUES_OPTS \ + --wait sriov-network-operator ./deployment/sriov-network-operator-chart + echo "## create certificates for webhook" cat < Date: Tue, 25 Jun 2024 18:06:28 +0200 Subject: [PATCH 3/4] helm: Quote `.operator.metricsExporter.port` value Signed-off-by: Andrea Panattoni --- deployment/sriov-network-operator-chart/templates/operator.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/sriov-network-operator-chart/templates/operator.yaml b/deployment/sriov-network-operator-chart/templates/operator.yaml index 52da05721..073393299 100644 --- a/deployment/sriov-network-operator-chart/templates/operator.yaml +++ b/deployment/sriov-network-operator-chart/templates/operator.yaml @@ -73,7 +73,7 @@ spec: - name: METRICS_EXPORTER_SECRET_NAME value: {{ .Values.operator.admissionControllers.certificates.secretNames.metricsExporter }} - name: METRICS_EXPORTER_PORT - value: {{ .Values.operator.metricsExporter.port }} + value: "{{ .Values.operator.metricsExporter.port }}" - name: METRICS_EXPORTER_SECRET_NAME value: {{ .Values.operator.metricsExporter.certificates.secretName }} - name: METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE From 8b7f22ad028a7af28e48a80f7cf64e4fd6ddef16 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Mon, 10 Jun 2024 14:00:35 +0200 Subject: [PATCH 4/4] Avoid reconfiguring unmentioned DPDK VFs If a Virtual Function is configured with a DPDK driver (e.g. `vfio-pci`) and it is not referred by any SriovNetworkNodePolicy, `NeedToUpdateSriov` function must not trigger a reconfiguration. This may happen if a PF is configured by multiple policies (via PF partitioning) and a policy is deleted by the user. In these cases, the VF is not reconfigured [1] and a drain loop is started The same logic applies to VDPA devices. refs: [1] https://github.com/k8snetworkplumbingwg/sriov-network-operator/blob/5f3c4e903f789aa177fe54686efd6c18576b7ab1/pkg/host/internal/sriov/sriov.go#L457 Signed-off-by: Andrea Panattoni --- README.md | 4 +++ api/v1/helper.go | 8 ----- api/v1/helper_test.go | 68 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ba36699d0..914dd42f0 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,10 @@ VF groups** (when #-notation is used in pfName field) are merged, otherwise only the highest priority policy is applied. In case of same-priority policies and overlapping VF groups, only the last processed policy is applied. +When using #-notation to define VF group, no actions are taken on virtual functions that +are not mentioned in any policy (e.g. if a policy defines a `vfio-pci` device group for a device, when +it is deleted the VF are not reset to the default driver). + #### Externally Manage virtual functions When `ExternallyManage` is request on a policy the operator will only skip the virtual function creation. diff --git a/api/v1/helper.go b/api/v1/helper.go index 782240ba8..8c51d4530 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -280,10 +280,8 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool { if ifaceSpec.NumVfs > 0 { for _, vfStatus := range ifaceStatus.VFs { - ingroup := false for _, groupSpec := range ifaceSpec.VfGroups { if IndexInRange(vfStatus.VfID, groupSpec.VfRange) { - ingroup = true if vfStatus.Driver == "" { log.V(2).Info("NeedToUpdateSriov(): Driver needs update - has no driver", "desired", groupSpec.DeviceType) @@ -332,12 +330,6 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool { break } } - if !ingroup && (StringInArray(vfStatus.Driver, vars.DpdkDrivers) || vfStatus.VdpaType != "") { - // need to reset VF if it is not a part of a group and: - // a. has DPDK driver loaded - // b. has VDPA device - return true - } } } return false diff --git a/api/v1/helper_test.go b/api/v1/helper_test.go index cae4e2a00..4c899bcc9 100644 --- a/api/v1/helper_test.go +++ b/api/v1/helper_test.go @@ -1054,3 +1054,71 @@ func TestSriovNetworkPoolConfig_MaxUnavailable(t *testing.T) { }) } } + +func TestNeedToUpdateSriov(t *testing.T) { + type args struct { + ifaceSpec *v1.Interface + ifaceStatus *v1.InterfaceExt + } + tests := []struct { + name string + args args + want bool + }{ + { + name: "number of VFs changed", + args: args{ + ifaceSpec: &v1.Interface{NumVfs: 1}, + ifaceStatus: &v1.InterfaceExt{NumVfs: 0}, + }, + want: true, + }, + { + name: "no update", + args: args{ + ifaceSpec: &v1.Interface{NumVfs: 1}, + ifaceStatus: &v1.InterfaceExt{NumVfs: 1}, + }, + want: false, + }, + { + name: "vfio-pci VF is not configured for any group", + args: args{ + ifaceSpec: &v1.Interface{ + NumVfs: 3, + VfGroups: []v1.VfGroup{ + { + VfRange: "1-2", + DeviceType: consts.DeviceTypeNetDevice, + }, + }, + }, + ifaceStatus: &v1.InterfaceExt{ + NumVfs: 3, + VFs: []v1.VirtualFunction{ + { + VfID: 0, + Driver: "vfio-pci", + }, + { + VfID: 1, + Driver: "iavf", + }, + { + VfID: 2, + Driver: "iavf", + }, + }, + }, + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := v1.NeedToUpdateSriov(tt.args.ifaceSpec, tt.args.ifaceStatus); got != tt.want { + t.Errorf("NeedToUpdateSriov() = %v, want %v", got, tt.want) + } + }) + } +}