Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[metrics 1/x] Support sriov-network-metrics-exporter #655

Merged
merged 3 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: sriov-network-metrics-exporter
name: sriov-network-metrics-exporter
namespace: {{.Namespace}}
spec:
selector:
matchLabels:
app: sriov-network-metrics-exporter
template:
metadata:
labels:
app: sriov-network-metrics-exporter
spec:
hostNetwork: true
serviceAccountName: metrics-exporter-sa
{{- if .ImagePullSecrets }}
imagePullSecrets:
{{- range .ImagePullSecrets }}
- name: {{ . }}
{{- end }}
{{- end }}
containers:
- name: metrics-exporter
args:
- --web.listen-address=127.0.0.1:{{.MetricsExporterPort}}
- --path.kubecgroup=/sys/fs/cgroup
- --path.sysbuspci=/host/sys/bus/pci/devices/
- --path.sysclassnet=/host/sys/class/net/
- --path.cpucheckpoint=/host/cpu_manager_state
- --path.kubeletsocket=/host/kubelet.sock
- --collector.kubepoddevice=true
- --collector.vfstatspriority=netlink,sysfs
image: {{.Image}}
SchSeba marked this conversation as resolved.
Show resolved Hide resolved
imagePullPolicy: IfNotPresent
resources:
requests:
memory: 100Mi
cpu: 100m
securityContext:
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
volumeMounts:
- mountPath: /host/kubelet.sock
name: kubeletsocket
- mountPath: /host/sys/bus/pci/devices
name: sysbuspcidevices
readOnly: true
- mountPath: /host/sys/devices
name: sysdevices
readOnly: true
- mountPath: /host/sys/class/net
name: sysclassnet
readOnly: true
- mountPath: /host/cpu_manager_state
name: cpucheckpoint
readOnly: true
- name: kube-rbac-proxy
image: '{{.MetricsExporterKubeRbacProxyImage}}'
imagePullPolicy: IfNotPresent
args:
- --logtostderr
- --secure-listen-address=[$(HOST_IP)]:{{.MetricsExporterPort}}
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:{{.MetricsExporterPort}}/
- --tls-private-key-file=/etc/metrics/tls.key
- --tls-cert-file=/etc/metrics/tls.crt
ports:
- containerPort: {{.MetricsExporterPort}}
name: https-metrics
env:
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
resources:
requests:
cpu: 10m
memory: 20Mi
volumeMounts:
- name: metrics-certs
mountPath: /etc/metrics
readOnly: true
nodeSelector:
{{- range $key, $value := .NodeSelectorField }}
{{ $key }}: {{ $value }}
{{- end }}
restartPolicy: Always
volumes:
- hostPath:
path: /var/lib/kubelet/pod-resources/kubelet.sock
type: "Socket"
name: kubeletsocket
- hostPath:
path: /var/lib/kubelet/cpu_manager_state
type: "File"
name: cpucheckpoint
- hostPath:
path: /sys/class/net
type: "Directory"
name: sysclassnet
- hostPath:
path: /sys/bus/pci/devices
type: "Directory"
name: sysbuspcidevices
- hostPath:
path: /sys/devices
type: "Directory"
name: sysdevices
- name: metrics-certs
secret:
defaultMode: 420
secretName: {{ .MetricsExporterSecretName }}
66 changes: 66 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: metrics-exporter-sa
namespace: {{.Namespace}}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: metrics-exporter-role
namespace: {{.Namespace}}
rules:
- apiGroups:
- security.openshift.io
resourceNames:
- privileged
resources:
- securitycontextconstraints
verbs:
- use
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: metrics-exporter-rb
namespace: {{.Namespace}}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: metrics-exporter-role
subjects:
- kind: ServiceAccount
name: metrics-exporter-sa
namespace: {{.Namespace}}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: sriov-metrics-kube-rbac-role
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: sriov-metrics-kube-rbac-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: sriov-metrics-kube-rbac-role
subjects:
- kind: ServiceAccount
name: metrics-exporter-sa
namespace: {{.Namespace}}
20 changes: 20 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: v1
kind: Service
metadata:
name: sriov-network-metrics-exporter-service
namespace: {{.Namespace}}
annotations:
prometheus.io/target: "true"
{{- if eq .ClusterType "openshift" }}
service.beta.openshift.io/serving-cert-secret-name: {{ .MetricsExporterSecretName }}
SchSeba marked this conversation as resolved.
Show resolved Hide resolved
adrianchiris marked this conversation as resolved.
Show resolved Hide resolved
{{- end }}
labels:
name: sriov-network-metrics-exporter-service
spec:
selector:
app: sriov-network-metrics-exporter
ports:
- protocol: TCP
name: sriov-network-metrics
port: {{ .MetricsExporterPort }}
targetPort: {{ .MetricsExporterPort }}
21 changes: 21 additions & 0 deletions controllers/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,24 @@ func syncDaemonSet(ctx context.Context, client k8sclient.Client, scheme *runtime
}
return nil
}

func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string]string) error {
if len(nodeSelector) == 0 {
return nil
}

ds := &appsv1.DaemonSet{}
scheme := kscheme.Scheme
err := scheme.Convert(obj, ds, nil)
if err != nil {
return fmt.Errorf("failed to convert Unstructured [%s] to DaemonSet: %v", obj.GetName(), err)
}

ds.Spec.Template.Spec.NodeSelector = nodeSelector

err = scheme.Convert(ds, obj, nil)
if err != nil {
return fmt.Errorf("failed to convert DaemonSet [%s] to Unstructured: %v", obj.GetName(), err)
}
return nil
}
63 changes: 52 additions & 11 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
return reconcile.Result{}, err
}

if err = r.syncMetricsExporter(ctx, defaultConfig); err != nil {
return reconcile.Result{}, err
}

// For Openshift we need to create the systemd files using a machine config
if vars.ClusterType == consts.ClusterTypeOpenshift {
// TODO: add support for hypershift as today there is no MCO on hypershift clusters
Expand Down Expand Up @@ -199,27 +203,64 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context,
}
// Sync DaemonSets
for _, obj := range objs {
if obj.GetKind() == "DaemonSet" && len(dc.Spec.ConfigDaemonNodeSelector) > 0 {
scheme := kscheme.Scheme
ds := &appsv1.DaemonSet{}
err = scheme.Convert(obj, ds, nil)
if obj.GetKind() == "DaemonSet" {
err = updateDaemonsetNodeSelector(obj, dc.Spec.ConfigDaemonNodeSelector)
if err != nil {
logger.Error(err, "Fail to convert to DaemonSet")
return err
}
ds.Spec.Template.Spec.NodeSelector = dc.Spec.ConfigDaemonNodeSelector
err = scheme.Convert(ds, obj, nil)
}

err = r.syncK8sResource(ctx, dc, obj)
if err != nil {
logger.Error(err, "Couldn't sync SR-IOV daemons objects")
return err
}
}
return nil
}

func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig) error {
logger := log.Log.WithName("syncMetricsExporter")
logger.V(1).Info("Start to sync metrics exporter")

data := render.MakeRenderData()
data.Data["Image"] = os.Getenv("METRICS_EXPORTER_IMAGE")
data.Data["Namespace"] = vars.Namespace
data.Data["ImagePullSecrets"] = GetImagePullSecrets()
data.Data["MetricsExporterSecretName"] = os.Getenv("METRICS_EXPORTER_SECRET_NAME")
data.Data["MetricsExporterPort"] = os.Getenv("METRICS_EXPORTER_PORT")
data.Data["MetricsExporterKubeRbacProxyImage"] = os.Getenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE")
data.Data["ClusterType"] = vars.ClusterType
data.Data["NodeSelectorField"] = GetDefaultNodeSelector()
if dc.Spec.ConfigDaemonNodeSelector != nil {
data.Data["NodeSelectorField"] = dc.Spec.ConfigDaemonNodeSelector
}

objs, err := render.RenderDir(consts.MetricsExporterPath, &data)
if err != nil {
logger.Error(err, "Fail to render metrics exporter manifests")
return err
}

deployMetricsExporter, ok := dc.Spec.FeatureGates[consts.MetricsExporterFeatureGate]
if ok && deployMetricsExporter {
for _, obj := range objs {
err = r.syncK8sResource(ctx, dc, obj)
if err != nil {
logger.Error(err, "Fail to convert to Unstructured")
logger.Error(err, "Couldn't sync metrics exporter objects")
return err
}
}
err = r.syncK8sResource(ctx, dc, obj)
return nil
}

for _, obj := range objs {
err = r.deleteK8sResource(ctx, obj)
if err != nil {
logger.Error(err, "Couldn't sync SR-IoV daemons objects")
return err
}
}

return nil
}

Expand Down Expand Up @@ -387,7 +428,7 @@ func (r SriovOperatorConfigReconciler) setLabelInsideObject(ctx context.Context,
}
err := r.syncK8sResource(ctx, cr, obj)
if err != nil {
logger.Error(err, "Couldn't sync SR-IoV daemons objects")
logger.Error(err, "Couldn't sync SR-IOV daemons objects")
return err
}
}
Expand Down
39 changes: 39 additions & 0 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

admv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -327,5 +328,43 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
})
Expect(err).ToNot(HaveOccurred())
})

It("should deploy the metrics-exporter when the feature gate is enabled", func() {
SchSeba marked this conversation as resolved.
Show resolved Hide resolved
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())

By("Turn `metricsExporter` feature gate on")
config.Spec.FeatureGates = map[string]bool{constants.MetricsExporterFeatureGate: true}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())

DeferCleanup(func() {
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
})

err = util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&v1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

By("Turn `metricsExporter` feature gate off")
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)

err = util.WaitForNamespacedObjectDeleted(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObjectDeleted(&v1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
})

})
})
8 changes: 8 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("OPERATOR_NAME", "sriov-network-operator")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_SECRET_NAME", "metrics-exporter-cert")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_PORT", "9110")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())

By("bootstrapping test environment")
testEnv = &envtest.Environment{
Expand Down
Loading
Loading