From 4e28a8311a8b0fbc1b9ebda1a39771db1fc7fd1c Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Wed, 13 Mar 2024 13:30:53 +1000 Subject: [PATCH] POWERMON-251: ensure redfish config change restart kepler This commit modifies kepler reconciler to add the hash of redfish spec to pod annotations so that any change to the redfish spec will redeploy the pod. This commit also fixes POWERMON-250 by removing the default redfish config added to kepler configmap. Signed-off-by: Sunil Thaha --- go.mod | 2 +- pkg/components/exporter/exporter.go | 36 ++++++++++++------------ pkg/components/exporter/exporter_test.go | 26 +++++++++-------- pkg/controllers/kepler_internal.go | 7 +++-- pkg/reconciler/kepler.go | 33 ++++++++++++---------- 5 files changed, 55 insertions(+), 49 deletions(-) diff --git a/go.mod b/go.mod index 2baaba41..c29e6fb3 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.21 toolchain go1.21.7 require ( + github.com/cespare/xxhash/v2 v2.2.0 github.com/go-logr/logr v1.4.1 github.com/openshift/api v0.0.0-20240212125214-04ea3891d9cb github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 @@ -20,7 +21,6 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/evanphx/json-patch v4.12.0+incompatible // indirect diff --git a/pkg/components/exporter/exporter.go b/pkg/components/exporter/exporter.go index bd74ba0a..deab75da 100644 --- a/pkg/components/exporter/exporter.go +++ b/pkg/components/exporter/exporter.go @@ -49,6 +49,7 @@ const ( RedfishArgs = "-redfish-cred-file-path=/etc/redfish/redfish.csv" RedfishCSV = "redfish.csv" RedfishSecretAnnotation = "kepler.system.sustainable.computing.io/redfish-secret-ref" + RedfishConfigHash = "kepler.system.sustainable.computing.io/redfish-config-hash" ) const ( @@ -140,7 +141,7 @@ func NewDaemonSet(detail components.Detail, k *v1alpha1.KeplerInternal) *appsv1. } } -func MountRedfishSecretToDaemonSet(ds *appsv1.DaemonSet, secret *corev1.Secret) { +func MountRedfishSecretToDaemonSet(ds *appsv1.DaemonSet, secret *corev1.Secret, hash uint64) { spec := &ds.Spec.Template.Spec keplerContainer := &spec.Containers[KeplerContainerIndex] keplerContainer.Command = append(keplerContainer.Command, RedfishArgs) @@ -154,6 +155,7 @@ func MountRedfishSecretToDaemonSet(ds *appsv1.DaemonSet, secret *corev1.Secret) // forces pods to be reployed if the secret chanage ds.Spec.Template.Annotations = map[string]string{ RedfishSecretAnnotation: secret.ResourceVersion, + RedfishConfigHash: fmt.Sprint(hash), } } @@ -246,23 +248,21 @@ func NewConfigMap(d components.Detail, k *v1alpha1.KeplerInternal) *corev1.Confi } exporterConfigMap := k8s.StringMap{ - "KEPLER_NAMESPACE": k.Namespace(), - "KEPLER_LOG_LEVEL": "1", - "METRIC_PATH": "/metrics", - "BIND_ADDRESS": bindAddress, - "ENABLE_GPU": "true", - "ENABLE_QAT": "false", - "ENABLE_EBPF_CGROUPID": "true", - "EXPOSE_HW_COUNTER_METRICS": "true", - "EXPOSE_IRQ_COUNTER_METRICS": "true", - "EXPOSE_KUBELET_METRICS": "true", - "EXPOSE_CGROUP_METRICS": "true", - "ENABLE_PROCESS_METRICS": "false", - "CPU_ARCH_OVERRIDE": "", - "CGROUP_METRICS": "*", - "REDFISH_PROBE_INTERVAL_IN_SECONDS": "60", - "REDFISH_SKIP_SSL_VERIFY": "true", - "MODEL_CONFIG": modelConfig, + "KEPLER_NAMESPACE": k.Namespace(), + "KEPLER_LOG_LEVEL": "1", + "METRIC_PATH": "/metrics", + "BIND_ADDRESS": bindAddress, + "ENABLE_GPU": "true", + "ENABLE_QAT": "false", + "ENABLE_EBPF_CGROUPID": "true", + "EXPOSE_HW_COUNTER_METRICS": "true", + "EXPOSE_IRQ_COUNTER_METRICS": "true", + "EXPOSE_KUBELET_METRICS": "true", + "EXPOSE_CGROUP_METRICS": "true", + "ENABLE_PROCESS_METRICS": "false", + "CPU_ARCH_OVERRIDE": "", + "CGROUP_METRICS": "*", + "MODEL_CONFIG": modelConfig, } ms := k.Spec.ModelServer diff --git a/pkg/components/exporter/exporter_test.go b/pkg/components/exporter/exporter_test.go index a37e326a..309d741e 100644 --- a/pkg/components/exporter/exporter_test.go +++ b/pkg/components/exporter/exporter_test.go @@ -182,7 +182,9 @@ func TestDaemonSet(t *testing.T) { }, }, annotation: map[string]string{ - "kepler.system.sustainable.computing.io/redfish-secret-ref": "123", + + RedfishSecretAnnotation: "123", + RedfishConfigHash: "1337", }, scenario: "redfish case", }, @@ -202,23 +204,23 @@ func TestDaemonSet(t *testing.T) { } ds := NewDaemonSet(components.Full, &k) if tc.addRedfish { - MountRedfishSecretToDaemonSet(ds, tc.redfishSecret) + MountRedfishSecretToDaemonSet(ds, tc.redfishSecret, 1337) } - actual_hostPID := k8s.HostPIDFromDS(ds) - assert.Equal(t, actual_hostPID, tc.hostPID) + actualHostPID := k8s.HostPIDFromDS(ds) + assert.Equal(t, tc.hostPID, actualHostPID) - actual_exporterCommand := k8s.CommandFromDS(ds, KeplerContainerIndex) - assert.Equal(t, actual_exporterCommand, tc.exporterCommand) + actualExporterCommand := k8s.CommandFromDS(ds, KeplerContainerIndex) + assert.Equal(t, tc.exporterCommand, actualExporterCommand) - actual_volumeMounts := k8s.VolumeMountsFromDS(ds, KeplerContainerIndex) - assert.Equal(t, actual_volumeMounts, tc.volumeMounts) + actualVolumeMounts := k8s.VolumeMountsFromDS(ds, KeplerContainerIndex) + assert.Equal(t, tc.volumeMounts, actualVolumeMounts) - actual_Volumes := k8s.VolumesFromDS(ds) - assert.Equal(t, actual_Volumes, tc.volumes) + actualVolumes := k8s.VolumesFromDS(ds) + assert.Equal(t, tc.volumes, actualVolumes) - actual_Annotation := k8s.AnnotationFromDS(ds) - assert.Equal(t, actual_Annotation, tc.annotation) + actualAnnotation := k8s.AnnotationFromDS(ds) + assert.Equal(t, tc.annotation, actualAnnotation) }) } } diff --git a/pkg/controllers/kepler_internal.go b/pkg/controllers/kepler_internal.go index e5c6c4e3..75810858 100644 --- a/pkg/controllers/kepler_internal.go +++ b/pkg/controllers/kepler_internal.go @@ -541,6 +541,7 @@ func exporterReconcilers(ki *v1alpha1.KeplerInternal, cluster k8s.Cluster) []rec exporter.NewServiceMonitor(ki), exporter.NewPrometheusRule(ki), )...) + if ki.Spec.Exporter.Redfish == nil { rs = append(rs, resourceReconcilers(updateResource, exporter.NewDaemonSet(components.Full, ki), @@ -548,12 +549,12 @@ func exporterReconcilers(ki *v1alpha1.KeplerInternal, cluster k8s.Cluster) []rec )...) } else { rs = append(rs, - reconciler.KeplerDaemonSetReconciler{ - Ki: *ki, + reconciler.KeplerReconciler{ + Ki: ki, Ds: exporter.NewDaemonSet(components.Full, ki), }, reconciler.KeplerConfigMapReconciler{ - Ki: *ki, + Ki: ki, Cfm: exporter.NewConfigMap(components.Full, ki), }, ) diff --git a/pkg/reconciler/kepler.go b/pkg/reconciler/kepler.go index 62039f36..977f8bd4 100644 --- a/pkg/reconciler/kepler.go +++ b/pkg/reconciler/kepler.go @@ -18,27 +18,33 @@ package reconciler import ( "context" + "encoding/json" "fmt" "strconv" + "github.com/cespare/xxhash/v2" "github.com/sustainable.computing.io/kepler-operator/pkg/api/v1alpha1" "github.com/sustainable.computing.io/kepler-operator/pkg/components/exporter" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) -type KeplerDaemonSetReconciler struct { - Ki v1alpha1.KeplerInternal +type KeplerReconciler struct { + Ki *v1alpha1.KeplerInternal Ds *appsv1.DaemonSet } -func (r KeplerDaemonSetReconciler) Reconcile(ctx context.Context, cli client.Client, s *runtime.Scheme) Result { +func (r KeplerReconciler) Reconcile(ctx context.Context, cli client.Client, s *runtime.Scheme) Result { + redfish := r.Ki.Spec.Exporter.Redfish + redfishBytes, err := json.Marshal(redfish) + if err != nil { + return Result{Action: Stop, Error: fmt.Errorf("Error occurred while marshaling Redfish spec %w", err)} + } - secretRef := r.Ki.Spec.Exporter.Redfish.SecretRef + secretRef := redfish.SecretRef secret, err := r.getRedfishSecret(ctx, cli, secretRef) if err != nil { @@ -55,12 +61,12 @@ func (r KeplerDaemonSetReconciler) Reconcile(ctx context.Context, cli client.Cli return Result{Action: Stop, Error: fmt.Errorf("Redfish secret is missing %q key", exporter.RedfishCSV)} } - exporter.MountRedfishSecretToDaemonSet(r.Ds, secret) - - return Updater{Owner: &r.Ki, Resource: r.Ds}.Reconcile(ctx, cli, s) + redfishHash := xxhash.Sum64(redfishBytes) + exporter.MountRedfishSecretToDaemonSet(r.Ds, secret, redfishHash) + return Updater{Owner: r.Ki, Resource: r.Ds}.Reconcile(ctx, cli, s) } -func (r KeplerDaemonSetReconciler) getRedfishSecret(ctx context.Context, cli client.Client, secretName string) (*corev1.Secret, error) { +func (r KeplerReconciler) getRedfishSecret(ctx context.Context, cli client.Client, secretName string) (*corev1.Secret, error) { ns := r.Ki.Spec.Exporter.Deployment.Namespace redfishSecret := corev1.Secret{} if err := cli.Get(ctx, types.NamespacedName{Namespace: ns, Name: secretName}, &redfishSecret); err != nil { @@ -70,16 +76,13 @@ func (r KeplerDaemonSetReconciler) getRedfishSecret(ctx context.Context, cli cli } type KeplerConfigMapReconciler struct { - Ki v1alpha1.KeplerInternal + Ki *v1alpha1.KeplerInternal Cfm *corev1.ConfigMap } func (r KeplerConfigMapReconciler) Reconcile(ctx context.Context, cli client.Client, s *runtime.Scheme) Result { rf := r.Ki.Spec.Exporter.Redfish - zero := metav1.Duration{} - if rf.ProbeInterval != zero { - r.Cfm.Data["REDFISH_PROBE_INTERVAL_IN_SECONDS"] = fmt.Sprintf("%f", rf.ProbeInterval.Duration.Seconds()) - } + r.Cfm.Data["REDFISH_PROBE_INTERVAL_IN_SECONDS"] = fmt.Sprint(int64(rf.ProbeInterval.Duration.Seconds())) r.Cfm.Data["REDFISH_SKIP_SSL_VERIFY"] = strconv.FormatBool(rf.SkipSSLVerify) - return Updater{Owner: &r.Ki, Resource: r.Cfm}.Reconcile(ctx, cli, s) + return Updater{Owner: r.Ki, Resource: r.Cfm}.Reconcile(ctx, cli, s) }