From ab693dc145c30437cf3d3a0009cba6d798ff7e00 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Tue, 12 Mar 2024 14:55:38 +0100 Subject: [PATCH] Support `sriov-network-metrics-exporter` Deploy `sriov-network-metrics-exporter` DaemonSet and related configuration. The feature is activated by the feature gate `metricsExporter`. Add deployment logic to the SriovOperatorConfig reconcile loop. The operator's environment variable `SRIOV_NETWORK_METRICS_EXPORTER_IMAGE` controls the exporter image to deploy. Update helm charts with `.Values.images.metricsExporter` with the same semantic. Signed-off-by: Andrea Panattoni --- .../metrics-exporter/metrics-config-map.yaml | 12 ++ .../metrics-exporter/metrics-daemonset.yaml | 122 ++++++++++++++++++ .../metrics-exporter/metrics-rbac.yaml | 66 ++++++++++ .../metrics-exporter/metrics-service.yaml | 15 +++ controllers/sriovoperatorconfig_controller.go | 87 +++++++++++-- .../sriovoperatorconfig_controller_test.go | 30 +++++ controllers/suite_test.go | 8 ++ deploy/operator.yaml | 8 ++ deployment/sriov-network-operator/README.md | 3 + .../templates/operator.yaml | 8 ++ deployment/sriov-network-operator/values.yaml | 4 + hack/env.sh | 5 + hack/run-e2e-test.sh | 1 + pkg/consts/constants.go | 4 + test/conformance/tests/test_sriov_operator.go | 64 ++++++++- 15 files changed, 424 insertions(+), 13 deletions(-) create mode 100644 bindata/manifests/metrics-exporter/metrics-config-map.yaml create mode 100644 bindata/manifests/metrics-exporter/metrics-daemonset.yaml create mode 100644 bindata/manifests/metrics-exporter/metrics-rbac.yaml create mode 100644 bindata/manifests/metrics-exporter/metrics-service.yaml diff --git a/bindata/manifests/metrics-exporter/metrics-config-map.yaml b/bindata/manifests/metrics-exporter/metrics-config-map.yaml new file mode 100644 index 0000000000..df112c47c8 --- /dev/null +++ b/bindata/manifests/metrics-exporter/metrics-config-map.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: sriov-network-metrics-exporter-config + namespace: {{.Namespace}} +data: + drivers.yaml: |- + drivers: + - name: ice + version: 1.9.11 + - name: mlx5_core + version: 5.15.0-53-generic diff --git a/bindata/manifests/metrics-exporter/metrics-daemonset.yaml b/bindata/manifests/metrics-exporter/metrics-daemonset.yaml new file mode 100644 index 0000000000..dc71b0161b --- /dev/null +++ b/bindata/manifests/metrics-exporter/metrics-daemonset.yaml @@ -0,0 +1,122 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: sriov-network-metrics-exporter + name: sriov-network-metrics-exporter + namespace: {{.Namespace}} +spec: + selector: + matchLabels: + app: sriov-network-metrics-exporter + template: + metadata: + labels: + app: sriov-network-metrics-exporter + spec: + hostNetwork: true + serviceAccountName: sriov-network-config-daemon + {{- if .ImagePullSecrets }} + imagePullSecrets: + {{- range .ImagePullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} + containers: + - args: + - --web.listen-address=127.0.0.1:{{.MetricsExporterPort}} + - --path.kubecgroup=/sys/fs/cgroup + - --path.sysbuspci=/host/sys/bus/pci/devices/ + - --path.sysclassnet=/host/sys/class/net/ + - --path.cpucheckpoint=/host/cpu_manager_state + - --path.kubeletsocket=/host/kubelet.sock + - --collector.kubepoddevice=true + - --collector.vfstatspriority=sysfs,netlink + image: {{.Image}} + imagePullPolicy: IfNotPresent + name: metrics-exporter + resources: + requests: + memory: 100Mi + cpu: 100m + securityContext: + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + volumeMounts: + - mountPath: /host/kubelet.sock + name: kubeletsocket + - mountPath: /host/sys/bus/pci/devices + name: sysbuspcidevices + readOnly: true + - mountPath: /host/sys/devices + name: sysdevices + readOnly: true + - mountPath: /host/sys/class/net + name: sysclassnet + readOnly: true + - mountPath: /host/cpu_manager_state + name: cpucheckpoint + readOnly: true + - name: sriov-network-metrics-exporter-config + mountPath: /etc/sriov-network-metrics-exporter + - name: kube-rbac-proxy + image: '{{.KubeRbacProxyImage}}' + imagePullPolicy: IfNotPresent + args: + - --logtostderr + - --secure-listen-address=[$(HOST_IP)]:{{.MetricsExporterPort}} + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:{{.MetricsExporterPort}}/ + - --tls-private-key-file=/etc/metrics/tls.key + - --tls-cert-file=/etc/metrics/tls.crt + ports: + - containerPort: {{.MetricsExporterPort}} + name: https-metrics + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + resources: + requests: + cpu: 10m + memory: 20Mi + volumeMounts: + - name: metrics-certs + mountPath: /etc/metrics + readOnly: true + nodeSelector: + kubernetes.io/os: linux + node-role.kubernetes.io/worker: "" + restartPolicy: Always + volumes: + - hostPath: + path: /var/lib/kubelet/pod-resources/kubelet.sock + type: "Socket" + name: kubeletsocket + - hostPath: + path: /var/lib/kubelet/cpu_manager_state + type: "File" + name: cpucheckpoint + - hostPath: + path: /sys/class/net + type: "Directory" + name: sysclassnet + - hostPath: + path: /sys/bus/pci/devices + type: "Directory" + name: sysbuspcidevices + - hostPath: + path: /sys/devices + type: "Directory" + name: sysdevices + - name: sriov-network-metrics-exporter-config + configMap: + name: sriov-network-metrics-exporter-config + - name: metrics-certs + secret: + defaultMode: 420 + secretName: {{ .MetricsExporterSecretName }} diff --git a/bindata/manifests/metrics-exporter/metrics-rbac.yaml b/bindata/manifests/metrics-exporter/metrics-rbac.yaml new file mode 100644 index 0000000000..79a337b970 --- /dev/null +++ b/bindata/manifests/metrics-exporter/metrics-rbac.yaml @@ -0,0 +1,66 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: metrics-exporter-sa + namespace: {{.Namespace}} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: metrics-exporter-role + namespace: {{.Namespace}} +rules: + - apiGroups: + - security.openshift.io + resourceNames: + - hostaccess + resources: + - securitycontextconstraints + verbs: + - use +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: metrics-exporter-rb + namespace: {{.Namespace}} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: metrics-exporter-role +subjects: + - kind: ServiceAccount + name: metrics-exporter-sa + namespace: {{.Namespace}} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: sriov-metrics-kube-rbac-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: sriov-metrics-kube-rbac-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: sriov-metrics-kube-rbac-role +subjects: +- kind: ServiceAccount + name: metrics-exporter-sa + namespace: {{.Namespace}} diff --git a/bindata/manifests/metrics-exporter/metrics-service.yaml b/bindata/manifests/metrics-exporter/metrics-service.yaml new file mode 100644 index 0000000000..1268dff57e --- /dev/null +++ b/bindata/manifests/metrics-exporter/metrics-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: sriov-network-metrics-exporter-service + namespace: {{.Namespace}} + annotations: + prometheus.io/target: "true" + service.beta.openshift.io/serving-cert-secret-name: {{ .MetricsExporterSecretName }} +spec: + selector: + app.kubernetes.io/name: sriov-metrics-exporter + ports: + - protocol: TCP + port: {{ .MetricsExporterPort }} + targetPort: {{ .MetricsExporterPort }} diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 8ac029c521..941c515eab 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -117,6 +117,10 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, err } + if err = r.syncMetricsExporter(ctx, defaultConfig); err != nil { + return reconcile.Result{}, err + } + // For Openshift we need to create the systemd files using a machine config if vars.ClusterType == consts.ClusterTypeOpenshift { // TODO: add support for hypershift as today there is no MCO on hypershift clusters @@ -162,7 +166,6 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context, data.Data["Namespace"] = vars.Namespace data.Data["SRIOVCNIImage"] = os.Getenv("SRIOV_CNI_IMAGE") data.Data["SRIOVInfiniBandCNIImage"] = os.Getenv("SRIOV_INFINIBAND_CNI_IMAGE") - data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") data.Data["ClusterType"] = vars.ClusterType data.Data["DevMode"] = os.Getenv("DEV_MODE") data.Data["ImagePullSecrets"] = GetImagePullSecrets() @@ -196,27 +199,89 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context, } // Sync DaemonSets for _, obj := range objs { - if obj.GetKind() == "DaemonSet" && len(dc.Spec.ConfigDaemonNodeSelector) > 0 { - scheme := kscheme.Scheme - ds := &appsv1.DaemonSet{} - err = scheme.Convert(obj, ds, nil) + err = updateDaemonsetNodeSelector(obj, dc.Spec.ConfigDaemonNodeSelector) + if err != nil { + return err + } + + err = r.syncK8sResource(ctx, dc, obj) + if err != nil { + logger.Error(err, "Couldn't sync SR-IoV daemons objects") + return err + } + } + return nil +} + +func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string]string) error { + if obj.GetKind() != "DaemonSet" { + return nil + } + + if len(nodeSelector) == 0 { + return nil + } + + ds := &appsv1.DaemonSet{} + scheme := kscheme.Scheme + err := scheme.Convert(obj, ds, nil) + if err != nil { + return fmt.Errorf("failed to convert Unstructured [%s] to DaemonSet: %v", obj.GetName(), err) + } + + ds.Spec.Template.Spec.NodeSelector = nodeSelector + + err = scheme.Convert(ds, obj, nil) + if err != nil { + return fmt.Errorf("failed to convert DaemonSet [%s] to Unstructured: %v", obj.GetName(), err) + } + return nil +} + +func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig) error { + logger := log.Log.WithName("syncMetricsExporter") + logger.V(1).Info("Start to sync metrics exporter") + + data := render.MakeRenderData() + data.Data["Image"] = os.Getenv("SRIOV_NETWORK_METRICS_EXPORTER_IMAGE") + data.Data["Namespace"] = vars.Namespace + data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") + data.Data["ImagePullSecrets"] = GetImagePullSecrets() + data.Data["MetricsExporterSecretName"] = os.Getenv("METRICS_EXPORTER_SECRET_NAME") + data.Data["MetricsExporterPort"] = os.Getenv("METRICS_EXPORTER_PORT") + data.Data["KubeRbacProxyImage"] = os.Getenv("KUBE_RBAC_PROXY_IMAGE") + + objs, err := render.RenderDir(consts.MetricsExporterPath, &data) + if err != nil { + logger.Error(err, "Fail to render metrics exporter manifests") + return err + } + + deployMetricsExporter, ok := dc.Spec.FeatureGates[consts.MetricsExporterFeatureGate] + if ok && deployMetricsExporter { + for _, obj := range objs { + + err = updateDaemonsetNodeSelector(obj, dc.Spec.ConfigDaemonNodeSelector) if err != nil { - logger.Error(err, "Fail to convert to DaemonSet") return err } - ds.Spec.Template.Spec.NodeSelector = dc.Spec.ConfigDaemonNodeSelector - err = scheme.Convert(ds, obj, nil) + + err = r.syncK8sResource(ctx, dc, obj) if err != nil { - logger.Error(err, "Fail to convert to Unstructured") + logger.Error(err, "Couldn't sync metrics exporter objects") return err } } - err = r.syncK8sResource(ctx, dc, obj) + return nil + } + + for _, obj := range objs { + err = r.deleteK8sResource(ctx, obj) if err != nil { - logger.Error(err, "Couldn't sync SR-IoV daemons objects") return err } } + return nil } diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 567c17a659..1a35e65405 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -7,6 +7,7 @@ import ( admv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" @@ -325,5 +326,34 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { }) Expect(err).ToNot(HaveOccurred()) }) + It("should deploy the metrics-exporter when the feature gate is enabled", func() { + config := &sriovnetworkv1.SriovOperatorConfig{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred()) + + daemonSet := &appsv1.DaemonSet{} + err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet) + Expect(err).To(HaveOccurred()) + Expect(errors.IsNotFound(err)).To(BeTrue()) + + config.Spec.FeatureGates = map[string]bool{constants.MetricsExporterFeatureGate: true} + err = k8sClient.Update(ctx, config) + Expect(err).NotTo(HaveOccurred()) + + DeferCleanup(func() { + config.Spec.FeatureGates = map[string]bool{} + err = k8sClient.Update(ctx, config) + Expect(err).NotTo(HaveOccurred()) + }) + + err = util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout) + Expect(err).NotTo(HaveOccurred()) + + err = util.WaitForNamespacedObject(&v1.ConfigMap{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-config", util.RetryInterval, util.APITimeout) + Expect(err).NotTo(HaveOccurred()) + + err = util.WaitForNamespacedObject(&v1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout) + Expect(err).ToNot(HaveOccurred()) + }) + }) }) diff --git a/controllers/suite_test.go b/controllers/suite_test.go index bc5870f34c..984e319ae9 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -121,6 +121,14 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) err = os.Setenv("OPERATOR_NAME", "sriov-network-operator") Expect(err).NotTo(HaveOccurred()) + err = os.Setenv("SRIOV_NETWORK_METRICS_EXPORTER_IMAGE", "mock-image") + Expect(err).NotTo(HaveOccurred()) + err = os.Setenv("METRICS_EXPORTER_SECRET_NAME", "metrics-exporter-cert") + Expect(err).NotTo(HaveOccurred()) + err = os.Setenv("METRICS_EXPORTER_PORT", "9110") + Expect(err).NotTo(HaveOccurred()) + err = os.Setenv("KUBE_RBAC_PROXY_IMAGE", "mock-image") + Expect(err).NotTo(HaveOccurred()) By("bootstrapping test environment") testEnv = &envtest.Environment{ diff --git a/deploy/operator.yaml b/deploy/operator.yaml index 069f2eff1c..c94374cbb0 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -64,6 +64,8 @@ spec: value: $SRIOV_NETWORK_CONFIG_DAEMON_IMAGE - name: SRIOV_NETWORK_WEBHOOK_IMAGE value: $SRIOV_NETWORK_WEBHOOK_IMAGE + - name: SRIOV_NETWORK_METRICS_EXPORTER_IMAGE + value: $SRIOV_NETWORK_METRICS_EXPORTER_IMAGE - name: RESOURCE_PREFIX value: $RESOURCE_PREFIX - name: DEV_MODE @@ -96,3 +98,9 @@ spec: value: $ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT - name: ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT value: $ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT + - name: METRICS_EXPORTER_SECRET_NAME + value: $METRICS_EXPORTER_SECRET_NAME + - name: METRICS_EXPORTER_PORT + value: $METRICS_EXPORTER_PORT + - name: KUBE_RBAC_PROXY_IMAGE + value: $KUBE_RBAC_PROXY_IMAGE diff --git a/deployment/sriov-network-operator/README.md b/deployment/sriov-network-operator/README.md index b2a57c4185..eec20b272a 100644 --- a/deployment/sriov-network-operator/README.md +++ b/deployment/sriov-network-operator/README.md @@ -72,6 +72,7 @@ We have introduced the following Chart parameters. | `operator.resourcePrefix` | string | `openshift.io` | Device plugin resource prefix | | `operator.cniBinPath` | string | `/opt/cni/bin` | Path for CNI binary | | `operator.clustertype` | string | `kubernetes` | Cluster environment type | +| `operator.metricsExporterPort` | string | `9110` | Port where the Network Metrics Exporter listen | #### Admission Controllers parameters @@ -127,3 +128,5 @@ This section contains general parameters that apply to both the operator and dae | `images.sriovDevicePlugin` | SR-IOV device plugin image | | `images.resourcesInjector` | Resources Injector image | | `images.webhook` | Operator Webhook image | +| `images.metricsExporter` | Network Metrics Exporter image | +| `images.kubeRbacProxy` | Kube RBAC Proxy image | diff --git a/deployment/sriov-network-operator/templates/operator.yaml b/deployment/sriov-network-operator/templates/operator.yaml index d3fea3d1c1..a79fffc843 100644 --- a/deployment/sriov-network-operator/templates/operator.yaml +++ b/deployment/sriov-network-operator/templates/operator.yaml @@ -66,6 +66,14 @@ spec: value: {{ .Values.images.sriovConfigDaemon }} - name: SRIOV_NETWORK_WEBHOOK_IMAGE value: {{ .Values.images.webhook }} + - name: SRIOV_NETWORK_METRICS_EXPORTER_IMAGE + value: {{ .Values.images.metricsExporter }} + - name: METRICS_EXPORTER_SECRET_NAME + value: {{ .Values.operator.admissionControllers.certificates.secretNames.metricsExporter }} + - name: METRICS_EXPORTER_PORT + value: $METRICS_EXPORTER_PORT + - name: {{ .Values.operator.metricsExporterPort }} + value: {{ .Values.images.kubeRbacProxy }} - name: RESOURCE_PREFIX value: {{ .Values.operator.resourcePrefix }} - name: IMAGE_PULL_SECRETS diff --git a/deployment/sriov-network-operator/values.yaml b/deployment/sriov-network-operator/values.yaml index 81b3d11729..558ae445f9 100644 --- a/deployment/sriov-network-operator/values.yaml +++ b/deployment/sriov-network-operator/values.yaml @@ -27,12 +27,14 @@ operator: resourcePrefix: "openshift.io" cniBinPath: "/opt/cni/bin" clusterType: "kubernetes" + metricsExporterPort: "9110" admissionControllers: enabled: false certificates: secretNames: operator: "operator-webhook-cert" injector: "network-resources-injector-cert" + metricsExporter: "metrics-exporter-cert" certManager: # When enabled, makes use of certificates managed by cert-manager. enabled: false @@ -98,5 +100,7 @@ images: sriovDevicePlugin: ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin resourcesInjector: ghcr.io/k8snetworkplumbingwg/network-resources-injector webhook: ghcr.io/k8snetworkplumbingwg/sriov-network-operator-webhook + metricsExporter: ghcr.io/k8snetworkplumbingwg/sriov-network-metrics-exporter + kubeRbacProxy: gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0 imagePullSecrets: [] diff --git a/hack/env.sh b/hack/env.sh index 1dccb157e5..b0cd170a3e 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -5,6 +5,7 @@ if [ -z $SKIP_VAR_SET ]; then export NETWORK_RESOURCES_INJECTOR_IMAGE=${NETWORK_RESOURCES_INJECTOR_IMAGE:-ghcr.io/k8snetworkplumbingwg/network-resources-injector} export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE=${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator-config-daemon} export SRIOV_NETWORK_WEBHOOK_IMAGE=${SRIOV_NETWORK_WEBHOOK_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator-webhook} + export SRIOV_NETWORK_METRICS_EXPORTER_IMAGE=${SRIOV_NETWORK_METRICS_EXPORTER_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-metrics-exporter} export SRIOV_NETWORK_OPERATOR_IMAGE=${SRIOV_NETWORK_OPERATOR_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator} else [ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 @@ -13,6 +14,7 @@ else [ -z $NETWORK_RESOURCES_INJECTOR_IMAGE ] && echo "NETWORK_RESOURCES_INJECTOR_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 [ -z $SRIOV_NETWORK_CONFIG_DAEMON_IMAGE ] && echo "SRIOV_NETWORK_CONFIG_DAEMON_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 [ -z $SRIOV_NETWORK_WEBHOOK_IMAGE ] && echo "SRIOV_NETWORK_WEBHOOK_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 + [ -z $SRIOV_NETWORK_METRICS_EXPORTER_IMAGE ] && echo "SRIOV_NETWORK_METRICS_EXPORTER_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 [ -z $SRIOV_NETWORK_OPERATOR_IMAGE ] && echo "SRIOV_NETWORK_OPERATOR_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 fi @@ -30,3 +32,6 @@ export ADMISSION_CONTROLLERS_CERTIFICATES_CERT_MANAGER_ENABLED=${ADMISSION_CONTR export ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT=${ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT:-""} export ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT=${ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT:-""} export DEV_MODE=${DEV_MODE:-"FALSE"} +export METRICS_EXPORTER_SECRET_NAME=${METRICS_EXPORTER_SECRET_NAME:-"metrics-exporter-cert"} +export METRICS_EXPORTER_PORT=${METRICS_EXPORTER_PORT:-"9110"} +export KUBE_RBAC_PROXY_IMAGE=${KUBE_RBAC_PROXY_IMAGE:-"gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0"} diff --git a/hack/run-e2e-test.sh b/hack/run-e2e-test.sh index 24cfc934ae..b2f6a79528 100755 --- a/hack/run-e2e-test.sh +++ b/hack/run-e2e-test.sh @@ -12,5 +12,6 @@ echo ${NETWORK_RESOURCES_INJECTOR_IMAGE} echo ${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE} echo ${SRIOV_NETWORK_OPERATOR_IMAGE} echo ${SRIOV_NETWORK_WEBHOOK_IMAGE} +echo ${SRIOV_NETWORK_METRICS_EXPORTER_IMAGE} envsubst < deploy/operator.yaml > deploy/operator-init.yaml go test ./test/e2e/... -root=$(pwd) -kubeconfig=$KUBECONFIG -globalMan deploy/crds/sriovnetwork.openshift.io_sriovnetworks_crd.yaml -namespacedMan deploy/operator-init.yaml -v -singleNamespace true diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index efbefe1981..bf03bd6566 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -19,6 +19,7 @@ const ( ConfigDaemonPath = "./bindata/manifests/daemon" InjectorWebHookPath = "./bindata/manifests/webhook" OperatorWebHookPath = "./bindata/manifests/operator-webhook" + MetricsExporterPath = "./bindata/manifests/metrics-exporter" SystemdServiceOcpPath = "./bindata/manifests/sriov-config-service/openshift" SystemdServiceOcpMachineConfigName = "sriov-config-service" ServiceCAConfigMapAnnotation = "service.beta.openshift.io/inject-cabundle" @@ -124,6 +125,9 @@ const ( // ResourceInjectorMatchConditionFeatureGate: switch injector to fail policy and add mactch condition // this will make the mutating webhook to be called only when a pod has 'k8s.v1.cni.cncf.io/networks' annotation ResourceInjectorMatchConditionFeatureGate = "resourceInjectorMatchCondition" + + // MetricsExporterFeatureGate: enable SriovNetworkMetricsExporter on the same node as where the config-daemon run + MetricsExporterFeatureGate = "metricsExporter" ) const ( diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index 9ad400cbc3..c211fa3c59 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -276,6 +276,29 @@ var _ = Describe("[sriov] operator", func() { }, 3*time.Minute, 5*time.Second).Should(Succeed()) }) }) + + Context("SriovNetworkMetricsExporter", func() { + It("should be deployed if the feature gate is enabled", func() { + if discovery.Enabled() { + Skip("Test unsuitable to be run in discovery mode") + } + + initialValue := isFeatureFlagEnabled("metricsExporter") + DeferCleanup(func() { + By("Restoring initial feature flag value") + setFeatureFlag("metricsExporter", initialValue) + }) + + By("Enabling `metricsExporter` feature flag") + setFeatureFlag("metricsExporter", true) + + By("Checking that a daemon is scheduled on selected node") + Eventually(func() bool { + return isDaemonsetScheduledOnNodes("node-role.kubernetes.io/worker", "app=sriov-network-metrics-exporter") + }, 1*time.Minute, 1*time.Second).Should(Equal(true)) + + }) + }) }) Describe("Generic SriovNetworkNodePolicy", func() { @@ -2355,13 +2378,17 @@ func podVFIndexInHost(hostNetPod *corev1.Pod, targetPod *corev1.Pod, interfaceNa } func daemonsScheduledOnNodes(selector string) bool { + return isDaemonsetScheduledOnNodes(selector, "app=sriov-network-config-daemon") +} + +func isDaemonsetScheduledOnNodes(nodeSelector, daemonsetLabelSelector string) bool { nn, err := clients.CoreV1Interface.Nodes().List(context.Background(), metav1.ListOptions{ - LabelSelector: selector, + LabelSelector: nodeSelector, }) Expect(err).ToNot(HaveOccurred()) nodes := nn.Items - daemons, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: "app=sriov-network-config-daemon"}) + daemons, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: daemonsetLabelSelector}) Expect(err).ToNot(HaveOccurred()) for _, d := range daemons.Items { foundNode := false @@ -2627,6 +2654,39 @@ func getOperatorConfigLogLevel() int { return cfg.Spec.LogLevel } +func isFeatureFlagEnabled(featureFlag string) bool { + cfg := sriovv1.SriovOperatorConfig{} + err := clients.Get(context.TODO(), runtimeclient.ObjectKey{ + Name: "default", + Namespace: operatorNamespace, + }, &cfg) + Expect(err).ToNot(HaveOccurred()) + + ret, ok := cfg.Spec.FeatureGates[featureFlag] + return ok && ret +} + +func setFeatureFlag(featureFlag string, value bool) { + Eventually(func(g Gomega) { + cfg := sriovv1.SriovOperatorConfig{} + err := clients.Get(context.TODO(), runtimeclient.ObjectKey{ + Name: "default", + Namespace: operatorNamespace, + }, &cfg) + g.Expect(err).ToNot(HaveOccurred()) + + previousValue, ok := cfg.Spec.FeatureGates[featureFlag] + if ok && previousValue == value { + return + } + + cfg.Spec.FeatureGates[featureFlag] = value + + err = clients.Update(context.TODO(), &cfg) + g.Expect(err).ToNot(HaveOccurred()) + }, 1*time.Minute, 5*time.Second).Should(Succeed()) +} + func getOperatorLogs(since time.Time) []string { podList, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{ LabelSelector: "name=sriov-network-operator",