From aec3e840c08866cd9c7e674ab02a370cd99a5d8b Mon Sep 17 00:00:00 2001 From: Jaromir Wysoglad Date: Tue, 12 Nov 2024 17:39:51 -0500 Subject: [PATCH 1/2] [OSPRH-9848] enhance NodeExporter metrics This adds a "hostname" label to NodeExporter metrics to make the identification of the node of origin a little easier than just having the IP address. As a side effect of needing a little different parsing for NodeExporter and Kepler ScrapeConfigs, the code to get compute node IPs from ansible facts and from IPsets is now executed only once (in comparison to executing it twice), which greatly reduces k8s api calls. --- controllers/metricstorage_controller.go | 85 +++++++++++++++++++------ pkg/metricstorage/scrape_config.go | 39 +++++++++--- 2 files changed, 94 insertions(+), 30 deletions(-) diff --git a/controllers/metricstorage_controller.go b/controllers/metricstorage_controller.go index 66dcb500..fa79fccc 100644 --- a/controllers/metricstorage_controller.go +++ b/controllers/metricstorage_controller.go @@ -96,6 +96,13 @@ type MetricStorageReconciler struct { Cache cache.Cache } +// ConnectionInfo holds information about connection to a compute node +type ConnectionInfo struct { + IP string + Hostname string + TLS bool +} + // GetLogger returns a logger object with a prefix of "conroller.name" and aditional controller context fields func (r *MetricStorageReconciler) GetLogger(ctx context.Context) logr.Logger { return log.FromContext(ctx).WithName("Controllers").WithName("MetricStorage") @@ -506,7 +513,7 @@ func (r *MetricStorageReconciler) createServiceScrapeConfig( log logr.Logger, description string, serviceName string, - targets []string, + targets interface{}, tlsEnabled bool, ) error { scrapeConfig := &monv1alpha1.ScrapeConfig{ @@ -593,16 +600,17 @@ func (r *MetricStorageReconciler) createScrapeConfigs( return ctrl.Result{}, err } - // ScrapeConfigs for NodeExporters - endpointsNonTLS, endpointsTLS, err := getMetricExporterTargets(instance, helper, telemetryv1.DefaultNodeExporterPort) + connectionInfo, err := getComputeNodesConnectionInfo(instance, helper) if err != nil { - Log.Info(fmt.Sprintf("Cannot get node exporter targets. Scrape configs not created. Error: %s", err)) + Log.Info(fmt.Sprintf("Cannot get compute node connection info. Scrape configs not created. Error: %s", err)) } + // ScrapeConfigs for NodeExporters + neTargetsTLS, neTargetsNonTLS := getNodeExporterTargets(connectionInfo) // ScrapeConfig for non-tls nodes neServiceName := fmt.Sprintf("%s-node-exporter", telemetry.ServiceName) err = r.createServiceScrapeConfig(ctx, instance, Log, "Node Exporter", - neServiceName, endpointsNonTLS, false) + neServiceName, neTargetsNonTLS, false) if err != nil { return ctrl.Result{}, err } @@ -610,13 +618,13 @@ func (r *MetricStorageReconciler) createScrapeConfigs( // ScrapeConfig for tls nodes neServiceName = fmt.Sprintf("%s-node-exporter-tls", telemetry.ServiceName) err = r.createServiceScrapeConfig(ctx, instance, Log, "Node Exporter", - neServiceName, endpointsTLS, true) + neServiceName, neTargetsTLS, true) if err != nil { return ctrl.Result{}, err } // kepler scrape endpoints - _, keplerEndpoints, err := getMetricExporterTargets(instance, helper, telemetryv1.DefaultKeplerPort) + keplerEndpoints, _ := getKeplerTargets(connectionInfo) if err != nil { Log.Info(fmt.Sprintf("Cannot get Kepler targets. Scrape configs not created. Error: %s", err)) } @@ -633,6 +641,37 @@ func (r *MetricStorageReconciler) createScrapeConfigs( return ctrl.Result{}, err } +func getNodeExporterTargets(nodes []ConnectionInfo) ([]metricstorage.LabeledTarget, []metricstorage.LabeledTarget) { + tls := []metricstorage.LabeledTarget{} + nonTLS := []metricstorage.LabeledTarget{} + for _, node := range nodes { + target := metricstorage.LabeledTarget{ + IP: fmt.Sprintf("%s:%d", node.IP, telemetryv1.DefaultNodeExporterPort), + Hostname: node.Hostname, + } + if node.TLS { + tls = append(tls, target) + } else { + nonTLS = append(nonTLS, target) + } + } + return tls, nonTLS +} + +func getKeplerTargets(nodes []ConnectionInfo) ([]string, []string) { + tls := []string{} + nonTLS := []string{} + for _, node := range nodes { + target := fmt.Sprintf("%s:%d", node.IP, telemetryv1.DefaultKeplerPort) + if node.TLS { + tls = append(tls, target) + } else { + nonTLS = append(nonTLS, target) + } + } + return tls, nonTLS +} + func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, eventHandler handler.EventHandler) (ctrl.Result, error) { Log := r.GetLogger(ctx) uiPluginObj := &obsui.UIPlugin{ @@ -795,26 +834,24 @@ func (r *MetricStorageReconciler) ensureWatches( return err } -func getMetricExporterTargets( +func getComputeNodesConnectionInfo( instance *telemetryv1.MetricStorage, helper *helper.Helper, - defaultMetricExporterPort int32, -) ([]string, []string, error) { +) ([]ConnectionInfo, error) { ipSetList, err := getIPSetList(instance, helper) if err != nil { - return []string{}, []string{}, err + return []ConnectionInfo{}, err } inventorySecretList, err := getInventorySecretList(instance, helper) if err != nil { - return []string{}, []string{}, err + return []ConnectionInfo{}, err } var address string - addressesNonTLS := []string{} - addressesTLS := []string{} + connectionInfo := []ConnectionInfo{} for _, secret := range inventorySecretList.Items { inventory, err := ansible.UnmarshalYAML(secret.Data["inventory"]) if err != nil { - return []string{}, []string{}, err + return []ConnectionInfo{}, err } nodeSetGroup := inventory.Groups[secret.Labels["openstackdataplanenodeset"]] containsTelemetry := false @@ -842,20 +879,28 @@ func getMetricExporterTargets( address, _ = getAddressFromAnsibleHost(&item) } else { // we were unable to find an IP or HostName for a node, so we do not go further - return addressesNonTLS, addressesTLS, fmt.Errorf("failed to find an IP or HostName for node %s", name) + return connectionInfo, fmt.Errorf("failed to find an IP or HostName for node %s", name) } if address == "" { // we were unable to find an IP or HostName for a node, so we do not go further - return addressesNonTLS, addressesTLS, fmt.Errorf("failed to find an IP or HostName for node %s", name) + return connectionInfo, fmt.Errorf("failed to find an IP or HostName for node %s", name) } if TLSEnabled, ok := nodeSetGroup.Vars["edpm_tls_certs_enabled"].(bool); ok && TLSEnabled { - addressesTLS = append(addressesTLS, fmt.Sprintf("%s:%d", address, defaultMetricExporterPort)) + connectionInfo = append(connectionInfo, ConnectionInfo{ + IP: address, + Hostname: name, + TLS: true, + }) } else { - addressesNonTLS = append(addressesNonTLS, fmt.Sprintf("%s:%d", address, defaultMetricExporterPort)) + connectionInfo = append(connectionInfo, ConnectionInfo{ + IP: address, + Hostname: name, + TLS: false, + }) } } } - return addressesNonTLS, addressesTLS, nil + return connectionInfo, nil } func getIPSetList(instance *telemetryv1.MetricStorage, helper *helper.Helper) (*infranetworkv1.IPSetList, error) { diff --git a/pkg/metricstorage/scrape_config.go b/pkg/metricstorage/scrape_config.go index e80d4ad9..8152459e 100644 --- a/pkg/metricstorage/scrape_config.go +++ b/pkg/metricstorage/scrape_config.go @@ -27,11 +27,16 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +type LabeledTarget struct { + IP string + Hostname string +} + // ScrapeConfig creates a ScrapeConfig CR func ScrapeConfig( instance *telemetryv1.MetricStorage, labels map[string]string, - targets []string, + targets interface{}, tlsEnabled bool, ) *monv1alpha1.ScrapeConfig { var scrapeInterval monv1.Duration @@ -46,10 +51,28 @@ func ScrapeConfig( scrapeInterval = telemetryv1.DefaultScrapeInterval } - sort.Strings(targets) - var convertedTargets []monv1alpha1.Target - for _, t := range targets { - convertedTargets = append(convertedTargets, monv1alpha1.Target(t)) + var staticConfigs []monv1alpha1.StaticConfig + if ips, ok := targets.([]string); ok { + sort.Strings(ips) + var convertedTargets []monv1alpha1.Target + for _, t := range ips { + convertedTargets = append(convertedTargets, monv1alpha1.Target(t)) + } + staticConfigs = append(staticConfigs, monv1alpha1.StaticConfig{ + Targets: convertedTargets, + }) + } else if labeledTargets, ok := targets.([]LabeledTarget); ok { + sort.Slice(labeledTargets, func(i, j int) bool { + return labeledTargets[i].IP < labeledTargets[j].IP + }) + for _, t := range labeledTargets { + staticConfigs = append(staticConfigs, monv1alpha1.StaticConfig{ + Targets: []monv1alpha1.Target{monv1alpha1.Target(t.IP)}, + Labels: map[monv1.LabelName]string{ + "hostname": t.Hostname, + }, + }) + } } scrapeConfig := &monv1alpha1.ScrapeConfig{ @@ -82,11 +105,7 @@ func ScrapeConfig( }, }, ScrapeInterval: &scrapeInterval, - StaticConfigs: []monv1alpha1.StaticConfig{ - { - Targets: convertedTargets, - }, - }, + StaticConfigs: staticConfigs, }, } From 5130e45b6230032820ce2a9dc211e19ce8d85f93 Mon Sep 17 00:00:00 2001 From: Jaromir Wysoglad Date: Thu, 14 Nov 2024 09:18:00 -0500 Subject: [PATCH 2/2] Modify kuttl-tests --- .../suites/metricstorage/tests/01-assert.yaml | 15 +++++++++++++-- .../suites/metricstorage/tests/04-assert.yaml | 15 +++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/tests/kuttl/suites/metricstorage/tests/01-assert.yaml b/tests/kuttl/suites/metricstorage/tests/01-assert.yaml index f42db17c..e45fb4a1 100644 --- a/tests/kuttl/suites/metricstorage/tests/01-assert.yaml +++ b/tests/kuttl/suites/metricstorage/tests/01-assert.yaml @@ -81,8 +81,19 @@ metadata: - kind: MetricStorage name: telemetry-kuttl spec: - staticConfigs: - - {} + scrapeInterval: 30s +--- +apiVersion: monitoring.rhobs/v1alpha1 +kind: ScrapeConfig +metadata: + labels: + service: metricStorage + name: telemetry-node-exporter-tls + ownerReferences: + - kind: MetricStorage + name: telemetry-kuttl +spec: + scrapeInterval: 30s --- apiVersion: monitoring.rhobs/v1alpha1 kind: ScrapeConfig diff --git a/tests/kuttl/suites/metricstorage/tests/04-assert.yaml b/tests/kuttl/suites/metricstorage/tests/04-assert.yaml index 511d7103..6ba6623f 100644 --- a/tests/kuttl/suites/metricstorage/tests/04-assert.yaml +++ b/tests/kuttl/suites/metricstorage/tests/04-assert.yaml @@ -93,8 +93,19 @@ metadata: - kind: MetricStorage name: telemetry-kuttl spec: - staticConfigs: - - {} + scrapeInterval: 40s +--- +apiVersion: monitoring.rhobs/v1alpha1 +kind: ScrapeConfig +metadata: + labels: + service: metricStorage + name: telemetry-node-exporter-tls + ownerReferences: + - kind: MetricStorage + name: telemetry-kuttl +spec: + scrapeInterval: 40s --- apiVersion: monitoring.rhobs/v1alpha1 kind: ScrapeConfig