Skip to content

Commit

Permalink
Support sriov-network-metrics-exporter
Browse files Browse the repository at this point in the history
Deploy `sriov-network-metrics-exporter` DaemonSet and related
configuration. The feature is activated by the feature gate
`metricsExporter`.

Add deployment logic to the SriovOperatorConfig reconcile loop.

The operator's environment variable `SRIOV_NETWORK_METRICS_EXPORTER_IMAGE`
controls the exporter image to deploy. Update helm charts with
`.Values.images.metricsExporter` with the same semantic.

Signed-off-by: Andrea Panattoni <[email protected]>
  • Loading branch information
zeeke committed Mar 12, 2024
1 parent 82a6d6f commit e85417e
Show file tree
Hide file tree
Showing 13 changed files with 216 additions and 0 deletions.
12 changes: 12 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-config-map.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: sriov-network-metrics-exporter-config
namespace: {{.Namespace}}
data:
drivers.yaml: |-
drivers:
- name: ice
version: 1.9.11
- name: mlx5_core
version: 5.15.0-53-generic
105 changes: 105 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/name: sriov-network-metrics-exporter
name: sriov-network-metrics-exporter
namespace: {{.Namespace}}
annotations:
release.openshift.io/version: "{{.ReleaseVersion}}"
spec:
revisionHistoryLimit: 10
selector:
matchLabels:
app.kubernetes.io/name: sriov-network-metrics-exporter
template:
metadata:
labels:
app.kubernetes.io/name: sriov-network-metrics-exporter
spec:
hostNetwork: true
{{- if .ImagePullSecrets }}
imagePullSecrets:
{{- range .ImagePullSecrets }}
- name: {{ . }}
{{- end }}
{{- end }}
containers:
- args:
- --path.kubecgroup=/host/kubecgroup
- --path.sysbuspci=/host/sys/bus/pci/devices/
- --path.sysclassnet=/host/sys/class/net/
- --path.cpucheckpoint=/host/cpu_manager_state
- --path.kubeletsocket=/host/kubelet.sock
- --collector.kubepoddevice=true
- --collector.vfstatspriority=sysfs,netlink
image: {{.Image}}
imagePullPolicy: Always
name: metrics-exporter
resources:
requests:
memory: 100Mi
cpu: 100m
limits:
memory: 100Mi
cpu: 100m
securityContext:
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
volumeMounts:
- mountPath: /host/kubelet.sock
name: kubeletsocket
- mountPath: /host/sys/bus/pci/devices
name: sysbuspcidevices
readOnly: true
- mountPath: /host/sys/devices
name: sysdevices
readOnly: true
- mountPath: /host/sys/class/net
name: sysclassnet
readOnly: true
- mountPath: /host/kubecgroup
name: kubecgroup
readOnly: true
- mountPath: /host/cpu_manager_state
name: cpucheckpoint
readOnly: true
- name: sriov-network-metrics-exporter-config
mountPath: /etc/sriov-network-metrics-exporter
nodeSelector:
kubernetes.io/os: linux
feature.node.kubernetes.io/network-sriov.capable: "true"
restartPolicy: Always
tolerations:
- operator: Exists
volumes:
- hostPath:
path: /var/lib/kubelet/pod-resources/kubelet.sock
type: "Socket"
name: kubeletsocket
- hostPath:
path: /sys/fs/cgroup/cpuset/kubepods.slice/
type: "Directory"
name: kubecgroup
- hostPath:
path: /var/lib/kubelet/cpu_manager_state
type: "File"
name: cpucheckpoint
- hostPath:
path: /sys/class/net
type: "Directory"
name: sysclassnet
- hostPath:
path: /sys/bus/pci/devices
type: "Directory"
name: sysbuspcidevices
- hostPath:
path: /sys/devices
type: "Directory"
name: sysdevices
- name: sriov-network-metrics-exporter-config
configMap:
name: sriov-network-metrics-exporter-config
14 changes: 14 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: sriov-network-metrics-exporter-service
namespace: {{.Namespace}}
annotations:
prometheus.io/target: "true"
spec:
selector:
app.kubernetes.io/name: sriov-metrics-exporter
ports:
- protocol: TCP
port: 9808
targetPort: 9808
41 changes: 41 additions & 0 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
return reconcile.Result{}, err
}

if err = r.syncMetricsExporter(ctx, defaultConfig); err != nil {
return reconcile.Result{}, err
}

// For Openshift we need to create the systemd files using a machine config
if vars.ClusterType == consts.ClusterTypeOpenshift {
// TODO: add support for hypershift as today there is no MCO on hypershift clusters
Expand Down Expand Up @@ -220,6 +224,43 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context,
return nil
}

func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig) error {
logger := log.Log.WithName("syncMetricsExporter")
logger.V(1).Info("Start to sync metrics exporter")

data := render.MakeRenderData()
data.Data["Image"] = os.Getenv("SRIOV_NETWORK_METRICS_EXPORTER_IMAGE")
data.Data["Namespace"] = vars.Namespace
data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION")
data.Data["ImagePullSecrets"] = GetImagePullSecrets()

objs, err := render.RenderDir(consts.MetricsExporterPath, &data)
if err != nil {
logger.Error(err, "Fail to render metrics exporter manifests")
return err
}

deployMetricsExporter, ok := dc.Spec.FeatureGates[consts.MetricsExporterFeatureGate]
if ok && deployMetricsExporter {
for _, obj := range objs {
err = r.syncK8sResource(ctx, dc, obj)
if err != nil {
logger.Error(err, "Couldn't sync metrics exporter objects")
return err
}
}
} else {
for _, obj := range objs {
err = r.deleteWebhookObject(ctx, obj)
if err != nil {
return err
}
}
}

return nil
}

func (r *SriovOperatorConfigReconciler) syncWebhookObjs(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig) error {
logger := log.Log.WithName("syncWebhookObjs")
logger.V(1).Info("Start to sync webhook objects")
Expand Down
32 changes: 32 additions & 0 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (

admv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"

"github.com/golang/mock/gomock"
Expand Down Expand Up @@ -284,5 +286,35 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
return strings.Join(daemonSet.Spec.Template.Spec.Containers[0].Args, " ")
}, util.APITimeout*10, util.RetryInterval).Should(ContainSubstring("disable-plugins=mellanox"))
})

It("should deploy the metrics-exporter when the feature gate is enabled", func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())

config.Spec.FeatureGates = map[string]bool{constants.MetricsExporterFeatureGate: true}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())

DeferCleanup(func() {
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
})

err = util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&v1.ConfigMap{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-config", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&v1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
})

})
})
1 change: 1 addition & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ var _ = BeforeSuite(func() {
os.Setenv("NETWORK_RESOURCES_INJECTOR_IMAGE", "mock-image")
os.Setenv("SRIOV_NETWORK_CONFIG_DAEMON_IMAGE", "mock-image")
os.Setenv("SRIOV_NETWORK_WEBHOOK_IMAGE", "mock-image")
os.Setenv("SRIOV_NETWORK_METRICS_EXPORTER_IMAGE", "mock-image")
os.Setenv("RELEASE_VERSION", "4.7.0")
os.Setenv("OPERATOR_NAME", "sriov-network-operator")

Expand Down
2 changes: 2 additions & 0 deletions deploy/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ spec:
value: $SRIOV_NETWORK_CONFIG_DAEMON_IMAGE
- name: SRIOV_NETWORK_WEBHOOK_IMAGE
value: $SRIOV_NETWORK_WEBHOOK_IMAGE
- name: SRIOV_NETWORK_METRICS_EXPORTER_IMAGE
value: $SRIOV_NETWORK_METRICS_EXPORTER_IMAGE
- name: RESOURCE_PREFIX
value: $RESOURCE_PREFIX
- name: DEV_MODE
Expand Down
1 change: 1 addition & 0 deletions deployment/sriov-network-operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,4 @@ This section contains general parameters that apply to both the operator and dae
| `images.sriovDevicePlugin` | SR-IOV device plugin image |
| `images.resourcesInjector` | Resources Injector image |
| `images.webhook` | Operator Webhook image |
| `images.metricsExporter` | Network Metrics Exporter image |
2 changes: 2 additions & 0 deletions deployment/sriov-network-operator/templates/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ spec:
value: {{ .Values.images.sriovConfigDaemon }}
- name: SRIOV_NETWORK_WEBHOOK_IMAGE
value: {{ .Values.images.webhook }}
- name: SRIOV_NETWORK_METRICS_EXPORTER_IMAGE
value: {{ .Values.images.metricsExporter }}
- name: RESOURCE_PREFIX
value: {{ .Values.operator.resourcePrefix }}
- name: IMAGE_PULL_SECRETS
Expand Down
1 change: 1 addition & 0 deletions deployment/sriov-network-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,6 @@ images:
sriovDevicePlugin: ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin
resourcesInjector: ghcr.io/k8snetworkplumbingwg/network-resources-injector
webhook: ghcr.io/k8snetworkplumbingwg/sriov-network-operator-webhook
metricsExporter: ghcr.io/k8snetworkplumbingwg/sriov-network-metrics-exporter

imagePullSecrets: []
2 changes: 2 additions & 0 deletions hack/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ if [ -z $SKIP_VAR_SET ]; then
export NETWORK_RESOURCES_INJECTOR_IMAGE=${NETWORK_RESOURCES_INJECTOR_IMAGE:-ghcr.io/k8snetworkplumbingwg/network-resources-injector}
export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE=${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator-config-daemon}
export SRIOV_NETWORK_WEBHOOK_IMAGE=${SRIOV_NETWORK_WEBHOOK_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator-webhook}
export SRIOV_NETWORK_METRICS_EXPORTER_IMAGE=${SRIOV_NETWORK_METRICS_EXPORTER_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-metrics-exporter}
export SRIOV_NETWORK_OPERATOR_IMAGE=${SRIOV_NETWORK_OPERATOR_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator}
else
[ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
Expand All @@ -13,6 +14,7 @@ else
[ -z $NETWORK_RESOURCES_INJECTOR_IMAGE ] && echo "NETWORK_RESOURCES_INJECTOR_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
[ -z $SRIOV_NETWORK_CONFIG_DAEMON_IMAGE ] && echo "SRIOV_NETWORK_CONFIG_DAEMON_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
[ -z $SRIOV_NETWORK_WEBHOOK_IMAGE ] && echo "SRIOV_NETWORK_WEBHOOK_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
[ -z $SRIOV_NETWORK_METRICS_EXPORTER_IMAGE ] && echo "SRIOV_NETWORK_METRICS_EXPORTER_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
[ -z $SRIOV_NETWORK_OPERATOR_IMAGE ] && echo "SRIOV_NETWORK_OPERATOR_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
fi

Expand Down
1 change: 1 addition & 0 deletions hack/run-e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ echo ${NETWORK_RESOURCES_INJECTOR_IMAGE}
echo ${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}
echo ${SRIOV_NETWORK_OPERATOR_IMAGE}
echo ${SRIOV_NETWORK_WEBHOOK_IMAGE}
echo ${SRIOV_NETWORK_METRICS_EXPORTER_IMAGE}
envsubst < deploy/operator.yaml > deploy/operator-init.yaml
go test ./test/e2e/... -root=$(pwd) -kubeconfig=$KUBECONFIG -globalMan deploy/crds/sriovnetwork.openshift.io_sriovnetworks_crd.yaml -namespacedMan deploy/operator-init.yaml -v -singleNamespace true
2 changes: 2 additions & 0 deletions pkg/consts/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const (
ConfigDaemonPath = "./bindata/manifests/daemon"
InjectorWebHookPath = "./bindata/manifests/webhook"
OperatorWebHookPath = "./bindata/manifests/operator-webhook"
MetricsExporterPath = "./bindata/manifests/metrics-exporter"
SystemdServiceOcpPath = "./bindata/manifests/sriov-config-service/openshift"
SystemdServiceOcpMachineConfigName = "sriov-config-service"
ServiceCAConfigMapAnnotation = "service.beta.openshift.io/inject-cabundle"
Expand Down Expand Up @@ -112,6 +113,7 @@ const (
KernelArgIommuPt = "iommu=pt"

ParallelNicConfigFeatureGate = "parallelNicConfig"
MetricsExporterFeatureGate = "metricsExporter"
)

const (
Expand Down

0 comments on commit e85417e

Please sign in to comment.