Skip to content

Commit

Permalink
Support sriov-network-metrics-exporter
Browse files Browse the repository at this point in the history
Deploy `sriov-network-metrics-exporter` DaemonSet and related
configuration. The feature is activated by the feature gate
`metricsExporter`.

Add deployment logic to the SriovOperatorConfig reconcile loop.

The operator's environment variable `SRIOV_NETWORK_METRICS_EXPORTER_IMAGE`
controls the exporter image to deploy. Update helm charts with
`.Values.images.metricsExporter` with the same semantic.

Signed-off-by: Andrea Panattoni <[email protected]>
  • Loading branch information
zeeke committed Jun 19, 2024
1 parent a84770c commit 4db4c48
Show file tree
Hide file tree
Showing 17 changed files with 446 additions and 13 deletions.
118 changes: 118 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: sriov-network-metrics-exporter
name: sriov-network-metrics-exporter
namespace: {{.Namespace}}
spec:
selector:
matchLabels:
app: sriov-network-metrics-exporter
template:
metadata:
labels:
app: sriov-network-metrics-exporter
spec:
hostNetwork: true
serviceAccountName: metrics-exporter-sa
{{- if .ImagePullSecrets }}
imagePullSecrets:
{{- range .ImagePullSecrets }}
- name: {{ . }}
{{- end }}
{{- end }}
containers:
- name: metrics-exporter
args:
- --web.listen-address=127.0.0.1:{{.MetricsExporterPort}}
- --path.kubecgroup=/sys/fs/cgroup
- --path.sysbuspci=/host/sys/bus/pci/devices/
- --path.sysclassnet=/host/sys/class/net/
- --path.cpucheckpoint=/host/cpu_manager_state
- --path.kubeletsocket=/host/kubelet.sock
- --collector.kubepoddevice=true
- --collector.vfstatspriority=netlink,sysfs
image: {{.Image}}
imagePullPolicy: IfNotPresent
resources:
requests:
memory: 100Mi
cpu: 100m
securityContext:
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
volumeMounts:
- mountPath: /host/kubelet.sock
name: kubeletsocket
- mountPath: /host/sys/bus/pci/devices
name: sysbuspcidevices
readOnly: true
- mountPath: /host/sys/devices
name: sysdevices
readOnly: true
- mountPath: /host/sys/class/net
name: sysclassnet
readOnly: true
- mountPath: /host/cpu_manager_state
name: cpucheckpoint
readOnly: true
- name: kube-rbac-proxy
image: '{{.MetricsExporterKubeRbacProxyImage}}'
imagePullPolicy: IfNotPresent
args:
- --logtostderr
- --secure-listen-address=[$(HOST_IP)]:{{.MetricsExporterPort}}
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:{{.MetricsExporterPort}}/
- --tls-private-key-file=/etc/metrics/tls.key
- --tls-cert-file=/etc/metrics/tls.crt
ports:
- containerPort: {{.MetricsExporterPort}}
name: https-metrics
env:
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
resources:
requests:
cpu: 10m
memory: 20Mi
volumeMounts:
- name: metrics-certs
mountPath: /etc/metrics
readOnly: true
nodeSelector:
{{- range $key, $value := .NodeSelectorField }}
{{ $key }}: {{ $value }}
{{- end }}
restartPolicy: Always
volumes:
- hostPath:
path: /var/lib/kubelet/pod-resources/kubelet.sock
type: "Socket"
name: kubeletsocket
- hostPath:
path: /var/lib/kubelet/cpu_manager_state
type: "File"
name: cpucheckpoint
- hostPath:
path: /sys/class/net
type: "Directory"
name: sysclassnet
- hostPath:
path: /sys/bus/pci/devices
type: "Directory"
name: sysbuspcidevices
- hostPath:
path: /sys/devices
type: "Directory"
name: sysdevices
- name: metrics-certs
secret:
defaultMode: 420
secretName: {{ .MetricsExporterSecretName }}
66 changes: 66 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: metrics-exporter-sa
namespace: {{.Namespace}}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: metrics-exporter-role
namespace: {{.Namespace}}
rules:
- apiGroups:
- security.openshift.io
resourceNames:
- privileged
resources:
- securitycontextconstraints
verbs:
- use
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: metrics-exporter-rb
namespace: {{.Namespace}}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: metrics-exporter-role
subjects:
- kind: ServiceAccount
name: metrics-exporter-sa
namespace: {{.Namespace}}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: sriov-metrics-kube-rbac-role
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: sriov-metrics-kube-rbac-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: sriov-metrics-kube-rbac-role
subjects:
- kind: ServiceAccount
name: metrics-exporter-sa
namespace: {{.Namespace}}
20 changes: 20 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: v1
kind: Service
metadata:
name: sriov-network-metrics-exporter-service
namespace: {{.Namespace}}
annotations:
prometheus.io/target: "true"
{{- if eq .ClusterType "openshift" }}
service.beta.openshift.io/serving-cert-secret-name: {{ .MetricsExporterSecretName }}
{{- end }}
labels:
name: sriov-network-metrics-exporter-service
spec:
selector:
app: sriov-network-metrics-exporter
ports:
- protocol: TCP
name: sriov-network-metrics
port: {{ .MetricsExporterPort }}
targetPort: {{ .MetricsExporterPort }}
21 changes: 21 additions & 0 deletions controllers/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,24 @@ func syncDaemonSet(ctx context.Context, client k8sclient.Client, scheme *runtime
}
return nil
}

func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string]string) error {
if len(nodeSelector) == 0 {
return nil
}

ds := &appsv1.DaemonSet{}
scheme := kscheme.Scheme
err := scheme.Convert(obj, ds, nil)
if err != nil {
return fmt.Errorf("failed to convert Unstructured [%s] to DaemonSet: %v", obj.GetName(), err)
}

ds.Spec.Template.Spec.NodeSelector = nodeSelector

err = scheme.Convert(ds, obj, nil)
if err != nil {
return fmt.Errorf("failed to convert DaemonSet [%s] to Unstructured: %v", obj.GetName(), err)
}
return nil
}
63 changes: 52 additions & 11 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
return reconcile.Result{}, err
}

if err = r.syncMetricsExporter(ctx, defaultConfig); err != nil {
return reconcile.Result{}, err
}

// For Openshift we need to create the systemd files using a machine config
if vars.ClusterType == consts.ClusterTypeOpenshift {
// TODO: add support for hypershift as today there is no MCO on hypershift clusters
Expand Down Expand Up @@ -199,27 +203,64 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context,
}
// Sync DaemonSets
for _, obj := range objs {
if obj.GetKind() == "DaemonSet" && len(dc.Spec.ConfigDaemonNodeSelector) > 0 {
scheme := kscheme.Scheme
ds := &appsv1.DaemonSet{}
err = scheme.Convert(obj, ds, nil)
if obj.GetKind() == "DaemonSet" {
err = updateDaemonsetNodeSelector(obj, dc.Spec.ConfigDaemonNodeSelector)
if err != nil {
logger.Error(err, "Fail to convert to DaemonSet")
return err
}
ds.Spec.Template.Spec.NodeSelector = dc.Spec.ConfigDaemonNodeSelector
err = scheme.Convert(ds, obj, nil)
}

err = r.syncK8sResource(ctx, dc, obj)
if err != nil {
logger.Error(err, "Couldn't sync SR-IOV daemons objects")
return err
}
}
return nil
}

func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig) error {
logger := log.Log.WithName("syncMetricsExporter")
logger.V(1).Info("Start to sync metrics exporter")

data := render.MakeRenderData()
data.Data["Image"] = os.Getenv("METRICS_EXPORTER_IMAGE")
data.Data["Namespace"] = vars.Namespace
data.Data["ImagePullSecrets"] = GetImagePullSecrets()
data.Data["MetricsExporterSecretName"] = os.Getenv("METRICS_EXPORTER_SECRET_NAME")
data.Data["MetricsExporterPort"] = os.Getenv("METRICS_EXPORTER_PORT")
data.Data["MetricsExporterKubeRbacProxyImage"] = os.Getenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE")
data.Data["ClusterType"] = vars.ClusterType
data.Data["NodeSelectorField"] = GetDefaultNodeSelector()
if dc.Spec.ConfigDaemonNodeSelector != nil {
data.Data["NodeSelectorField"] = dc.Spec.ConfigDaemonNodeSelector
}

objs, err := render.RenderDir(consts.MetricsExporterPath, &data)
if err != nil {
logger.Error(err, "Fail to render metrics exporter manifests")
return err
}

deployMetricsExporter, ok := dc.Spec.FeatureGates[consts.MetricsExporterFeatureGate]
if ok && deployMetricsExporter {
for _, obj := range objs {
err = r.syncK8sResource(ctx, dc, obj)
if err != nil {
logger.Error(err, "Fail to convert to Unstructured")
logger.Error(err, "Couldn't sync metrics exporter objects")
return err
}
}
err = r.syncK8sResource(ctx, dc, obj)
return nil
}

for _, obj := range objs {
err = r.deleteK8sResource(ctx, obj)
if err != nil {
logger.Error(err, "Couldn't sync SR-IoV daemons objects")
return err
}
}

return nil
}

Expand Down Expand Up @@ -387,7 +428,7 @@ func (r SriovOperatorConfigReconciler) setLabelInsideObject(ctx context.Context,
}
err := r.syncK8sResource(ctx, cr, obj)
if err != nil {
logger.Error(err, "Couldn't sync SR-IoV daemons objects")
logger.Error(err, "Couldn't sync SR-IOV daemons objects")
return err
}
}
Expand Down
39 changes: 39 additions & 0 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

admv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -327,5 +328,43 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
})
Expect(err).ToNot(HaveOccurred())
})

It("should deploy the metrics-exporter when the feature gate is enabled", func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())

By("Turn `metricsExporter` feature gate on")
config.Spec.FeatureGates = map[string]bool{constants.MetricsExporterFeatureGate: true}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())

DeferCleanup(func() {
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
})

err = util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&v1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

By("Turn `metricsExporter` feature gate off")
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)

err = util.WaitForNamespacedObjectDeleted(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObjectDeleted(&v1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
})

})
})
8 changes: 8 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("OPERATOR_NAME", "sriov-network-operator")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_SECRET_NAME", "metrics-exporter-cert")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_PORT", "9110")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())

By("bootstrapping test environment")
testEnv = &envtest.Environment{
Expand Down
Loading

0 comments on commit 4db4c48

Please sign in to comment.