From f0537084f77ceb1873d6909e68891fcb2039f35c Mon Sep 17 00:00:00 2001 From: Jaromir Wysoglad Date: Tue, 9 Jul 2024 08:34:21 -0400 Subject: [PATCH 1/2] [OSPRH-8406] Switch to using ScrapeConfigs We recently discovered issues with authentication, IPv6 and ServiceMonitors in STF. This PR is proactively switching to use ScrapeConfigs instead of ServiceMonitors. The functinality should be equivalent to before. Old ServiceMonitors owned by the MetricStorage controller are deleted. There is a slight difference in the labels associated with the collected metrics. - The Node Exporter metrics are now missing the "job" label, which didn't seem useful and it follows how ceilometer and rabbit metrics are collected. - Ceilometer and RabbitMQ metrics don't have the "service" label anymore, because ScrapeConfigs don't have the information to create that label. Instead they now have the "instance" label. The "instance" label is now used to differentiate between different Rabbit clusters in dashboards instead of the "service" label. I used this opportunity to move the ScrapeConfig creation code into its own function, following the example of dashboard code. --- api/v1beta1/conditions.go | 16 +- config/rbac/role.yaml | 4 - controllers/metricstorage_controller.go | 322 ++++++++++-------- pkg/dashboards/openstack-rabbitmq.go | 78 ++--- pkg/metricstorage/const.go | 20 ++ pkg/metricstorage/scrape_config.go | 28 +- pkg/metricstorage/service_monitor.go | 105 ------ .../kuttl/suites/default/tests/01-assert.yaml | 6 +- .../suites/metricstorage/tests/01-assert.yaml | 67 ++-- .../suites/metricstorage/tests/04-assert.yaml | 68 ++-- tests/kuttl/suites/tls/tests/02-assert.yaml | 20 +- 11 files changed, 326 insertions(+), 408 deletions(-) create mode 100644 pkg/metricstorage/const.go delete mode 100644 pkg/metricstorage/service_monitor.go diff --git a/api/v1beta1/conditions.go b/api/v1beta1/conditions.go index 20cd72ce..01e08852 100644 --- a/api/v1beta1/conditions.go +++ b/api/v1beta1/conditions.go @@ -36,11 +36,8 @@ const ( // MonitoringStackReadyCondition Status=True condition which indicates if the MonitoringStack is configured and operational MonitoringStackReadyCondition condition.Type = "MonitoringStackReady" - // ServiceMonitorReadyCondition Status=True condition which indicates if the Ceilometer ServiceMonitor is configured and operational - ServiceMonitorReadyCondition condition.Type = "CeilometerServiceMonitorReady" - - // ScrapeConfigReadyCondition Status=True condition which indicates if the Node Exporter ScrapeConfig is configured and operational - ScrapeConfigReadyCondition condition.Type = "NodeExporterScrapeConfigReady" + // ScrapeConfigReadyCondition Status=True condition which indicates if the ScrapeConfig is configured and operational + ScrapeConfigReadyCondition condition.Type = "ScrapeConfigReady" // PrometheusReadyCondition Status=True condition which indicates if the Prometheus watch is operational PrometheusReadyCondition condition.Type = "PrometheusReady" @@ -140,15 +137,6 @@ const ( // MonitoringStackReadyMisconfiguredMessage MonitoringStackReadyMisconfiguredMessage = "MonitoringStack isn't configured properly: %s" - // - // ServiceMonitorReady condition messages - // - // ServiceMonitorReadyInitMessage - ServiceMonitorReadyInitMessage = "ServiceMonitor not started" - - // ServiceMonitorUnableToOwnMessage - ServiceMonitorUnableToOwnMessage = "Error occured when trying to own %s" - // // ScrapeConfigReady condition messages // diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index f49ac332..f10c4a60 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -236,13 +236,9 @@ rules: resources: - servicemonitors verbs: - - create - delete - get - list - - patch - - update - - watch - apiGroups: - network.openstack.org resources: diff --git a/controllers/metricstorage_controller.go b/controllers/metricstorage_controller.go index 7b0bc6ed..9ab70b5f 100644 --- a/controllers/metricstorage_controller.go +++ b/controllers/metricstorage_controller.go @@ -52,6 +52,7 @@ import ( common "github.com/openstack-k8s-operators/lib-common/modules/common" condition "github.com/openstack-k8s-operators/lib-common/modules/common/condition" helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + object "github.com/openstack-k8s-operators/lib-common/modules/common/object" tls "github.com/openstack-k8s-operators/lib-common/modules/common/tls" infranetworkv1 "github.com/openstack-k8s-operators/infra-operator/apis/network/v1beta1" @@ -59,6 +60,7 @@ import ( ceilometer "github.com/openstack-k8s-operators/telemetry-operator/pkg/ceilometer" "github.com/openstack-k8s-operators/telemetry-operator/pkg/dashboards" metricstorage "github.com/openstack-k8s-operators/telemetry-operator/pkg/metricstorage" + telemetry "github.com/openstack-k8s-operators/telemetry-operator/pkg/telemetry" rabbitmqv1 "github.com/rabbitmq/cluster-operator/api/v1beta1" monv1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1" monv1alpha1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1alpha1" @@ -101,7 +103,7 @@ func (r *MetricStorageReconciler) GetLogger(ctx context.Context) logr.Logger { //+kubebuilder:rbac:groups=telemetry.openstack.org,resources=metricstorages/status,verbs=get;update;patch //+kubebuilder:rbac:groups=telemetry.openstack.org,resources=metricstorages/finalizers,verbs=update;patch //+kubebuilder:rbac:groups=monitoring.rhobs,resources=monitoringstacks,verbs=get;list;watch;create;update;patch;delete -//+kubebuilder:rbac:groups=monitoring.rhobs,resources=servicemonitors,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=monitoring.rhobs,resources=servicemonitors,verbs=get;list;delete //+kubebuilder:rbac:groups=monitoring.rhobs,resources=scrapeconfigs,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=monitoring.rhobs,resources=prometheusrules,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=monitoring.rhobs,resources=prometheuses,verbs=get;list;watch;update;patch;delete @@ -173,7 +175,6 @@ func (r *MetricStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reques cl := condition.CreateList( condition.UnknownCondition(condition.ReadyCondition, condition.InitReason, condition.ReadyInitMessage), condition.UnknownCondition(telemetryv1.MonitoringStackReadyCondition, condition.InitReason, telemetryv1.MonitoringStackReadyInitMessage), - condition.UnknownCondition(telemetryv1.ServiceMonitorReadyCondition, condition.InitReason, telemetryv1.ServiceMonitorReadyInitMessage), condition.UnknownCondition(telemetryv1.ScrapeConfigReadyCondition, condition.InitReason, telemetryv1.ScrapeConfigReadyInitMessage), condition.UnknownCondition(telemetryv1.DashboardPrometheusRuleReadyCondition, condition.InitReason, telemetryv1.DashboardPrometheusRuleReadyInitMessage), condition.UnknownCondition(telemetryv1.DashboardPluginReadyCondition, condition.InitReason, telemetryv1.DashboardPluginReadyInitMessage), @@ -219,6 +220,54 @@ func (r *MetricStorageReconciler) reconcileDelete( return ctrl.Result{}, nil } +// TODO: call the function appropriately +// +//nolint:all +func (r *MetricStorageReconciler) reconcileUpdate( + ctx context.Context, + instance *telemetryv1.MetricStorage, + helper *helper.Helper, +) (ctrl.Result, error) { + Log := r.GetLogger(ctx) + Log.Info("Reconciling Service update") + + err := r.deleteOldServiceMonitors(ctx, instance) + if err != nil { + return ctrl.Result{}, err + } + + Log.Info(fmt.Sprintf("Reconciled Service '%s' update successfully", instance.Name)) + + return ctrl.Result{}, nil +} + +// Delete old ServiceMonitors +// ServiceMonitors were used when deploying with RHOSO 18 GA telemetry-operator. +// +//nolint:all +func (r *MetricStorageReconciler) deleteOldServiceMonitors( + ctx context.Context, + instance *telemetryv1.MetricStorage, +) error { + monitorList := &monv1.ServiceMonitorList{} + listOpts := []client.ListOption{ + client.InNamespace(instance.GetNamespace()), + } + err := r.Client.List(ctx, monitorList, listOpts...) + if err != nil && !k8s_errors.IsNotFound(err) { + return err + } + for _, monitor := range monitorList.Items { + if object.CheckOwnerRefExist(instance.ObjectMeta.UID, monitor.ObjectMeta.OwnerReferences) { + err = r.Client.Delete(ctx, monitor) + if err != nil { + return err + } + } + } + return nil +} + func (r *MetricStorageReconciler) reconcileNormal( ctx context.Context, instance *telemetryv1.MetricStorage, @@ -366,43 +415,123 @@ func (r *MetricStorageReconciler) reconcileNormal( instance.Status.Conditions.MarkTrue(telemetryv1.MonitoringStackReadyCondition, condition.ReadyMessage) } - // Deploy ServiceMonitors - err = r.ensureWatches(ctx, "servicemonitors.monitoring.rhobs", &monv1.ServiceMonitor{}, eventHandler) + // Deploy ScrapeConfigs + if res, err := r.createScrapeConfigs(ctx, instance, eventHandler, helper); err != nil { + return res, err + } + + if !instance.Spec.DashboardsEnabled { + if res, err := metricstorage.DeleteDashboardObjects(ctx, instance, helper); err != nil { + return res, err + } + instance.Status.Conditions.MarkTrue(telemetryv1.DashboardPrometheusRuleReadyCondition, telemetryv1.DashboardsNotEnabledMessage) + instance.Status.Conditions.MarkTrue(telemetryv1.DashboardDatasourceReadyCondition, telemetryv1.DashboardsNotEnabledMessage) + instance.Status.Conditions.MarkTrue(telemetryv1.DashboardDefinitionReadyCondition, telemetryv1.DashboardsNotEnabledMessage) + instance.Status.Conditions.MarkTrue(telemetryv1.DashboardPluginReadyCondition, telemetryv1.DashboardsNotEnabledMessage) + } else { + if res, err := r.createDashboardObjects(ctx, instance, eventHandler); err != nil { + return res, err + } + } + // + // TLS input validation + // + // Validate the CA cert secret if provided + if instance.Spec.PrometheusTLS.CaBundleSecretName != "" { + _, ctrlResult, err := tls.ValidateCACertSecret( + ctx, + helper.GetClient(), + types.NamespacedName{ + Name: instance.Spec.PrometheusTLS.CaBundleSecretName, + Namespace: instance.Namespace, + }, + ) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.TLSInputReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.TLSInputErrorMessage, + err.Error())) + return ctrlResult, err + } else if (ctrlResult != ctrl.Result{}) { + return ctrlResult, nil + } + } + + // Validate API service certs secrets + if instance.Spec.PrometheusTLS.Enabled() { + _, ctrlResult, err := instance.Spec.PrometheusTLS.ValidateCertSecret(ctx, helper, instance.Namespace) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.TLSInputReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.TLSInputErrorMessage, + err.Error())) + return ctrlResult, err + } else if (ctrlResult != ctrl.Result{}) { + return ctrlResult, nil + } + } + + // all cert input checks out so report InputReady + instance.Status.Conditions.MarkTrue(condition.TLSInputReadyCondition, condition.InputReadyMessage) + + if instance.Status.Conditions.AllSubConditionIsTrue() { + instance.Status.Conditions.MarkTrue( + condition.ReadyCondition, condition.ReadyMessage) + } + Log.Info("Reconciled Service successfully") + return ctrl.Result{}, nil +} +func (r *MetricStorageReconciler) createScrapeConfigs( + ctx context.Context, + instance *telemetryv1.MetricStorage, + eventHandler handler.EventHandler, + helper *helper.Helper, +) (ctrl.Result, error) { + Log := r.GetLogger(ctx) + err := r.ensureWatches(ctx, "scrapeconfigs.monitoring.rhobs", &monv1alpha1.ScrapeConfig{}, eventHandler) if err != nil { - instance.Status.Conditions.MarkFalse(telemetryv1.ServiceMonitorReadyCondition, - condition.Reason("Can't own ServiceMonitor resource"), + instance.Status.Conditions.MarkFalse(telemetryv1.ScrapeConfigReadyCondition, + condition.Reason("Can't own ScrapeConfig resource"), condition.SeverityError, - telemetryv1.ServiceMonitorUnableToOwnMessage, err) - Log.Info("Can't own ServiceMonitor resource") + telemetryv1.ScrapeConfigUnableToOwnMessage, err) + Log.Info("Can't own ScrapeConfig resource") return ctrl.Result{RequeueAfter: telemetryv1.PauseBetweenWatchAttempts}, nil } - // ServiceMonitor for ceilometer monitoring + // ScrapeConfig for ceilometer monitoring ceilometerServerName := fmt.Sprintf("%s-internal.%s.svc", ceilometer.ServiceName, instance.Namespace) - ceilometerMonitor := &monv1.ServiceMonitor{ + ceilometerScrapeConfig := &monv1alpha1.ScrapeConfig{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", instance.Name, ceilometerServerName), + Name: fmt.Sprintf("%s-ceilometer", telemetry.ServiceName), Namespace: instance.Namespace, }, } - op, err = controllerutil.CreateOrPatch(ctx, r.Client, ceilometerMonitor, func() error { - ceilometerLabels := map[string]string{ - common.AppSelector: ceilometer.ServiceName, - } - desiredCeilometerMonitor := metricstorage.ServiceMonitor(instance, serviceLabels, ceilometerLabels, ceilometerServerName, "") - desiredCeilometerMonitor.Spec.DeepCopyInto(&ceilometerMonitor.Spec) - ceilometerMonitor.ObjectMeta.Labels = desiredCeilometerMonitor.ObjectMeta.Labels - err = controllerutil.SetControllerReference(instance, ceilometerMonitor, r.Scheme) + op, err := controllerutil.CreateOrPatch(ctx, r.Client, ceilometerScrapeConfig, func() error { + desiredCeilometerScrapeConfig := metricstorage.ScrapeConfig(instance, + serviceLabels, + []string{fmt.Sprintf("%s:%d", ceilometerServerName, ceilometer.CeilometerPrometheusPort)}, + instance.Spec.PrometheusTLS.Enabled()) + desiredCeilometerScrapeConfig.Spec.DeepCopyInto(&ceilometerScrapeConfig.Spec) + ceilometerScrapeConfig.ObjectMeta.Labels = desiredCeilometerScrapeConfig.ObjectMeta.Labels + err = controllerutil.SetControllerReference(instance, ceilometerScrapeConfig, r.Scheme) return err }) if err != nil { return ctrl.Result{}, err } if op != controllerutil.OperationResultNone { - Log.Info(fmt.Sprintf("Ceilometer ServiceMonitor %s successfully changed - operation: %s", ceilometerMonitor.Name, string(op))) + Log.Info(fmt.Sprintf("Ceilometer ScrapeConfig %s successfully changed - operation: %s", ceilometerScrapeConfig.Name, string(op))) } - // ServiceMonitors for RabbitMQ monitoring + // ScrapeConfigs for RabbitMQ monitoring + // NOTE: We're watching Rabbits and reconciling with each of their change + // that should keep the targets inside the ScrapeConfig always + // up to date. + rabbitTargets := []string{} rabbitList := &rabbitmqv1.RabbitmqClusterList{} listOpts := []client.ListOption{ client.InNamespace(instance.GetNamespace()), @@ -413,77 +542,35 @@ func (r *MetricStorageReconciler) reconcileNormal( } for _, rabbit := range rabbitList.Items { rabbitServerName := fmt.Sprintf("%s.%s.svc", rabbit.Name, rabbit.Namespace) - rabbitMonitor := &monv1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", instance.Name, rabbitServerName), - Namespace: instance.Namespace, - }, - } - op, err = controllerutil.CreateOrPatch(ctx, r.Client, rabbitMonitor, func() error { - rabbitLabels := map[string]string{ - "app.kubernetes.io/name": rabbit.Name, - } - desiredRabbitMonitor := metricstorage.ServiceMonitor(instance, serviceLabels, rabbitLabels, rabbitServerName, "prometheus-tls") - desiredRabbitMonitor.Spec.DeepCopyInto(&rabbitMonitor.Spec) - rabbitMonitor.ObjectMeta.Labels = desiredRabbitMonitor.ObjectMeta.Labels - err = controllerutil.SetControllerReference(instance, rabbitMonitor, r.Scheme) - return err - }) - if err != nil { - return ctrl.Result{}, err - } - if op != controllerutil.OperationResultNone { - Log.Info(fmt.Sprintf("Rabbit ServiceMonitor %s successfully changed - operation: %s", rabbitMonitor.Name, string(op))) - } + rabbitTargets = append(rabbitTargets, fmt.Sprintf("%s:%d", rabbitServerName, metricstorage.RabbitMQPrometheusPort)) } - // Check that RabbitMQ monitor's RabbitMQs still exist - // Delete the ServiceMonitors, which don't have a RabbitMQ anymore - svcMonitorList := &monv1.ServiceMonitorList{} - err = r.Client.List(ctx, svcMonitorList, listOpts...) - if err != nil && !k8s_errors.IsNotFound(err) { + rabbitScrapeConfig := &monv1alpha1.ScrapeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-rabbitmq", telemetry.ServiceName), + Namespace: instance.Namespace, + }, + } + op, err = controllerutil.CreateOrPatch(ctx, r.Client, rabbitScrapeConfig, func() error { + desiredRabbitScrapeConfig := metricstorage.ScrapeConfig(instance, serviceLabels, rabbitTargets, instance.Spec.PrometheusTLS.Enabled()) + desiredRabbitScrapeConfig.Spec.DeepCopyInto(&rabbitScrapeConfig.Spec) + rabbitScrapeConfig.ObjectMeta.Labels = desiredRabbitScrapeConfig.ObjectMeta.Labels + err = controllerutil.SetControllerReference(instance, rabbitScrapeConfig, r.Scheme) + return err + }) + if err != nil { return ctrl.Result{}, err } - for _, svcMonitor := range svcMonitorList.Items { - if svcMonitor.OwnerReferences == nil || - len(svcMonitor.OwnerReferences) < 1 || - svcMonitor.OwnerReferences[0].Name != instance.Name { - continue - } - if svcMonitor.Name == fmt.Sprintf("%s-ceilometer-internal.%s.svc", instance.Name, instance.Namespace) { - continue - } - rabbitmqExists := false - for _, rabbit := range rabbitList.Items { - if svcMonitor.Name == fmt.Sprintf("%s-%s.%s.svc", instance.Name, rabbit.Name, instance.Namespace) { - rabbitmqExists = true - } - } - if !rabbitmqExists { - err = r.Client.Delete(ctx, svcMonitor) - if err != nil { - return ctrl.Result{}, err - } - Log.Info(fmt.Sprintf("Deleted ServiceMonitor: %s because its RabbitMQ doesn't exist", svcMonitor.Name)) - } + if op != controllerutil.OperationResultNone { + Log.Info(fmt.Sprintf("Rabbit ScrapeConfig %s successfully changed - operation: %s", rabbitScrapeConfig.Name, string(op))) } - instance.Status.Conditions.MarkTrue(telemetryv1.ServiceMonitorReadyCondition, condition.ReadyMessage) + // ScrapeConfigs for NodeExporters endpointsNonTLS, endpointsTLS, err := getNodeExporterTargets(instance, helper) - // scrapeConfig for non-tls nodes - err = r.ensureWatches(ctx, "scrapeconfigs.monitoring.rhobs", &monv1alpha1.ScrapeConfig{}, eventHandler) - - if err != nil { - instance.Status.Conditions.MarkFalse(telemetryv1.ScrapeConfigReadyCondition, - condition.Reason("Can't own ScrapeConfig resource"), - condition.SeverityError, - telemetryv1.ScrapeConfigUnableToOwnMessage, err) - Log.Info("Can't own ScrapeConfig resource") - return ctrl.Result{RequeueAfter: telemetryv1.PauseBetweenWatchAttempts}, nil - } + // ScrapeConfig for non-tls nodes scrapeConfig := &monv1alpha1.ScrapeConfig{ ObjectMeta: metav1.ObjectMeta{ - Name: instance.Name, + Name: telemetry.ServiceName, Namespace: instance.Namespace, }, } @@ -501,9 +588,10 @@ func (r *MetricStorageReconciler) reconcileNormal( Log.Info(fmt.Sprintf("Node Exporter ScrapeConfig %s successfully changed - operation: %s", scrapeConfig.GetName(), string(op))) } + // ScrapeConfig for tls nodes scrapeConfigTLS := &monv1alpha1.ScrapeConfig{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-tls", instance.Name), + Name: fmt.Sprintf("%s-tls", telemetry.ServiceName), Namespace: instance.Namespace, }, } @@ -521,71 +609,7 @@ func (r *MetricStorageReconciler) reconcileNormal( Log.Info(fmt.Sprintf("Node Exporter ScrapeConfig %s successfully changed - operation: %s", scrapeConfig.GetName(), string(op))) } instance.Status.Conditions.MarkTrue(telemetryv1.ScrapeConfigReadyCondition, condition.ReadyMessage) - - if !instance.Spec.DashboardsEnabled { - if res, err := metricstorage.DeleteDashboardObjects(ctx, instance, helper); err != nil { - return res, err - } - instance.Status.Conditions.MarkTrue(telemetryv1.DashboardPrometheusRuleReadyCondition, telemetryv1.DashboardsNotEnabledMessage) - instance.Status.Conditions.MarkTrue(telemetryv1.DashboardDatasourceReadyCondition, telemetryv1.DashboardsNotEnabledMessage) - instance.Status.Conditions.MarkTrue(telemetryv1.DashboardDefinitionReadyCondition, telemetryv1.DashboardsNotEnabledMessage) - instance.Status.Conditions.MarkTrue(telemetryv1.DashboardPluginReadyCondition, telemetryv1.DashboardsNotEnabledMessage) - } else { - if res, err := r.createDashboardObjects(ctx, instance, eventHandler); err != nil { - return res, err - } - } - // - // TLS input validation - // - // Validate the CA cert secret if provided - if instance.Spec.PrometheusTLS.CaBundleSecretName != "" { - _, ctrlResult, err := tls.ValidateCACertSecret( - ctx, - helper.GetClient(), - types.NamespacedName{ - Name: instance.Spec.PrometheusTLS.CaBundleSecretName, - Namespace: instance.Namespace, - }, - ) - if err != nil { - instance.Status.Conditions.Set(condition.FalseCondition( - condition.TLSInputReadyCondition, - condition.ErrorReason, - condition.SeverityWarning, - condition.TLSInputErrorMessage, - err.Error())) - return ctrlResult, err - } else if (ctrlResult != ctrl.Result{}) { - return ctrlResult, nil - } - } - - // Validate API service certs secrets - if instance.Spec.PrometheusTLS.Enabled() { - _, ctrlResult, err := instance.Spec.PrometheusTLS.ValidateCertSecret(ctx, helper, instance.Namespace) - if err != nil { - instance.Status.Conditions.Set(condition.FalseCondition( - condition.TLSInputReadyCondition, - condition.ErrorReason, - condition.SeverityWarning, - condition.TLSInputErrorMessage, - err.Error())) - return ctrlResult, err - } else if (ctrlResult != ctrl.Result{}) { - return ctrlResult, nil - } - } - - // all cert input checks out so report InputReady - instance.Status.Conditions.MarkTrue(condition.TLSInputReadyCondition, condition.InputReadyMessage) - - if instance.Status.Conditions.AllSubConditionIsTrue() { - instance.Status.Conditions.MarkTrue( - condition.ReadyCondition, condition.ReadyMessage) - } - Log.Info("Reconciled Service successfully") - return ctrl.Result{}, nil + return ctrl.Result{}, err } func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, eventHandler handler.EventHandler) (ctrl.Result, error) { diff --git a/pkg/dashboards/openstack-rabbitmq.go b/pkg/dashboards/openstack-rabbitmq.go index 4156d7a6..35ab7b4a 100644 --- a/pkg/dashboards/openstack-rabbitmq.go +++ b/pkg/dashboards/openstack-rabbitmq.go @@ -101,7 +101,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "pluginVersion": "6.7.6", "targets": [ { - "expr": "sum(rabbitmq_queues{service=\"$cluster\"})", + "expr": "sum(rabbitmq_queues{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -156,7 +156,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "pluginVersion": "6.7.6", "targets": [ { - "expr": "sum(rabbitmq_consumers{service=\"$cluster\"})", + "expr": "sum(rabbitmq_consumers{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -211,7 +211,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "pluginVersion": "6.7.6", "targets": [ { - "expr": "sum(rabbitmq_connections{service=\"$cluster\"})", + "expr": "sum(rabbitmq_connections{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -266,7 +266,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "pluginVersion": "6.7.6", "targets": [ { - "expr": "sum(rabbitmq_channels{service=\"$cluster\"})", + "expr": "sum(rabbitmq_channels{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -321,7 +321,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "pluginVersion": "6.7.6", "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_received_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_received_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -376,7 +376,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "pluginVersion": "6.7.6", "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_redelivered_total{service=\"$cluster\"}[60s])) + sum(rate(rabbitmq_global_messages_delivered_consume_auto_ack_total{service=\"$cluster\"}[60s])) + sum(rate(rabbitmq_global_messages_delivered_consume_manual_ack_total{service=\"$cluster\"}[60s])) + sum(rate(rabbitmq_global_messages_delivered_get_auto_ack_total{service=\"$cluster\"}[60s])) + sum(rate(rabbitmq_global_messages_delivered_get_manual_ack_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_redelivered_total{instance=\"$cluster\"}[60s])) + sum(rate(rabbitmq_global_messages_delivered_consume_auto_ack_total{instance=\"$cluster\"}[60s])) + sum(rate(rabbitmq_global_messages_delivered_consume_manual_ack_total{instance=\"$cluster\"}[60s])) + sum(rate(rabbitmq_global_messages_delivered_get_auto_ack_total{instance=\"$cluster\"}[60s])) + sum(rate(rabbitmq_global_messages_delivered_get_manual_ack_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -431,7 +431,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "pluginVersion": "6.7.6", "targets": [ { - "expr": "sum(rabbitmq_queue_messages_ready{service=\"$cluster\"})", + "expr": "sum(rabbitmq_queue_messages_ready{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -486,7 +486,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "pluginVersion": "6.7.6", "targets": [ { - "expr": "sum(rabbitmq_queue_messages_unacked{service=\"$cluster\"})", + "expr": "sum(rabbitmq_queue_messages_unacked{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -561,7 +561,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "(rabbitmq_resident_memory_limit_bytes{service=\"$cluster\"}) -\n(rabbitmq_process_resident_memory_bytes{service=\"$cluster\"})", + "expr": "(rabbitmq_resident_memory_limit_bytes{instance=\"$cluster\"}) -\n(rabbitmq_process_resident_memory_bytes{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -653,7 +653,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "rabbitmq_disk_space_available_bytes{service=\"$cluster\"}", + "expr": "rabbitmq_disk_space_available_bytes{instance=\"$cluster\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -745,7 +745,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "(rabbitmq_process_max_fds{service=\"$cluster\"}) -\n(rabbitmq_process_open_fds{service=\"$cluster\"})", + "expr": "(rabbitmq_process_max_fds{instance=\"$cluster\"}) -\n(rabbitmq_process_open_fds{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -837,7 +837,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "(rabbitmq_process_max_tcp_sockets{service=\"$cluster\"}) -\n(rabbitmq_process_open_tcp_sockets{service=\"$cluster\"})", + "expr": "(rabbitmq_process_max_tcp_sockets{instance=\"$cluster\"}) -\n(rabbitmq_process_open_tcp_sockets{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -949,7 +949,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rabbitmq_queue_messages_ready{service=\"$cluster\"})", + "expr": "sum(rabbitmq_queue_messages_ready{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -1042,7 +1042,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rabbitmq_queue_messages_unacked{service=\"$cluster\"})", + "expr": "sum(rabbitmq_queue_messages_unacked{instance=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -1153,7 +1153,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_received_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_received_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -1245,7 +1245,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_confirmed_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_confirmed_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -1337,7 +1337,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_routed_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_routed_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -1429,7 +1429,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_received_confirm_total{service=\"$cluster\"}[60s]) - \nrate(rabbitmq_global_messages_confirmed_total{service=\"$cluster\"}[60s])\n)", + "expr": "sum(rate(rabbitmq_global_messages_received_confirm_total{instance=\"$cluster\"}[60s]) - \nrate(rabbitmq_global_messages_confirmed_total{instance=\"$cluster\"}[60s])\n)", "interval": "", "legendFormat": "", "refId": "A" @@ -1521,7 +1521,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_unroutable_dropped_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_unroutable_dropped_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -1613,7 +1613,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_unroutable_returned_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_unroutable_returned_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -1724,7 +1724,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(\n rate(rabbitmq_global_messages_delivered_consume_auto_ack_total{service=\"$cluster\"}[60s])+\n rate(rabbitmq_global_messages_delivered_consume_manual_ack_total{service=\"$cluster\"}[60s])\n)", + "expr": "sum(\n rate(rabbitmq_global_messages_delivered_consume_auto_ack_total{instance=\"$cluster\"}[60s])+\n rate(rabbitmq_global_messages_delivered_consume_manual_ack_total{instance=\"$cluster\"}[60s])\n)", "interval": "", "legendFormat": "", "refId": "A" @@ -1816,7 +1816,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_redelivered_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_redelivered_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -1908,7 +1908,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_delivered_consume_manual_ack_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_delivered_consume_manual_ack_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2000,7 +2000,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_delivered_consume_auto_ack_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_delivered_consume_auto_ack_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2092,7 +2092,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_acknowledged_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_acknowledged_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2184,7 +2184,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_delivered_get_auto_ack_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_delivered_get_auto_ack_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2276,7 +2276,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_get_empty_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_get_empty_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2368,7 +2368,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_global_messages_delivered_get_manual_ack_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_global_messages_delivered_get_manual_ack_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2479,7 +2479,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "rabbitmq_queues{service=\"$cluster\"}", + "expr": "rabbitmq_queues{instance=\"$cluster\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -2571,7 +2571,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_queues_declared_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_queues_declared_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2663,7 +2663,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_queues_created_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_queues_created_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2755,7 +2755,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_queues_deleted_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_queues_deleted_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -2866,7 +2866,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "rabbitmq_channels{service=\"$cluster\"}", + "expr": "rabbitmq_channels{instance=\"$cluster\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -2957,7 +2957,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_channels_opened_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_channels_opened_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -3049,7 +3049,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_channels_closed_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_channels_closed_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -3160,7 +3160,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "rabbitmq_connections{service=\"$cluster\"}", + "expr": "rabbitmq_connections{instance=\"$cluster\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -3251,7 +3251,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_connections_opened_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_connections_opened_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -3343,7 +3343,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "steppedLine": false, "targets": [ { - "expr": "sum(rate(rabbitmq_connections_closed_total{service=\"$cluster\"}[60s]))", + "expr": "sum(rate(rabbitmq_connections_closed_total{instance=\"$cluster\"}[60s]))", "interval": "", "legendFormat": "", "refId": "A" @@ -3422,7 +3422,7 @@ func OpenstackRabbitmq(dsName string) *corev1.ConfigMap { "name": "cluster", "options": [ ], - "query": "label_values(rabbitmq_identity_info, service)", + "query": "label_values(rabbitmq_identity_info, instance)", "skipUrlSync": false, "type": "query" } diff --git a/pkg/metricstorage/const.go b/pkg/metricstorage/const.go new file mode 100644 index 00000000..4e0f981c --- /dev/null +++ b/pkg/metricstorage/const.go @@ -0,0 +1,20 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metricstorage + +const ( + RabbitMQPrometheusPort = 15691 +) diff --git a/pkg/metricstorage/scrape_config.go b/pkg/metricstorage/scrape_config.go index 852974ea..f2c1362c 100644 --- a/pkg/metricstorage/scrape_config.go +++ b/pkg/metricstorage/scrape_config.go @@ -17,7 +17,7 @@ limitations under the License. package metricstorage import ( - "fmt" + "sort" tls "github.com/openstack-k8s-operators/lib-common/modules/common/tls" telemetryv1 "github.com/openstack-k8s-operators/telemetry-operator/api/v1beta1" @@ -42,10 +42,13 @@ func ScrapeConfig( } else { scrapeInterval = telemetryv1.DefaultScrapeInterval } + + sort.Strings(targets) var convertedTargets []monv1alpha1.Target for _, t := range targets { convertedTargets = append(convertedTargets, monv1alpha1.Target(t)) } + scrapeConfig := &monv1alpha1.ScrapeConfig{ ObjectMeta: metav1.ObjectMeta{ Name: instance.Name, @@ -53,6 +56,28 @@ func ScrapeConfig( Labels: labels, }, Spec: monv1alpha1.ScrapeConfigSpec{ + MetricRelabelConfigs: []*monv1.RelabelConfig{ + { + Action: "labeldrop", + Regex: "pod", + SourceLabels: []monv1.LabelName{}, + }, + { + Action: "labeldrop", + Regex: "namespace", + SourceLabels: []monv1.LabelName{}, + }, + { + Action: "labeldrop", + Regex: "job", + SourceLabels: []monv1.LabelName{}, + }, + { + Action: "labeldrop", + Regex: "publisher", + SourceLabels: []monv1.LabelName{}, + }, + }, ScrapeInterval: &scrapeInterval, StaticConfigs: []monv1alpha1.StaticConfig{ { @@ -76,7 +101,6 @@ func ScrapeConfig( scheme := "HTTPS" scrapeConfig.Spec.Scheme = &scheme scrapeConfig.Spec.TLSConfig = &tlsConfig - scrapeConfig.ObjectMeta.Name = fmt.Sprintf("%s-tls", instance.Name) } return scrapeConfig diff --git a/pkg/metricstorage/service_monitor.go b/pkg/metricstorage/service_monitor.go deleted file mode 100644 index 77ffe4cf..00000000 --- a/pkg/metricstorage/service_monitor.go +++ /dev/null @@ -1,105 +0,0 @@ -/* -Copyright 2022. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package metricstorage - -import ( - "fmt" - - tls "github.com/openstack-k8s-operators/lib-common/modules/common/tls" - telemetryv1 "github.com/openstack-k8s-operators/telemetry-operator/api/v1beta1" - monv1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// ServiceMonitor creates a ServiceMonitor CR -func ServiceMonitor( - instance *telemetryv1.MetricStorage, - labels map[string]string, - selector map[string]string, - serverName string, - port string, -) *monv1.ServiceMonitor { - var scrapeInterval monv1.Duration - if instance.Spec.MonitoringStack != nil && instance.Spec.MonitoringStack.ScrapeInterval != "" { - scrapeInterval = monv1.Duration(instance.Spec.MonitoringStack.ScrapeInterval) - } else if instance.Spec.CustomMonitoringStack != nil && - instance.Spec.CustomMonitoringStack.PrometheusConfig != nil && - instance.Spec.CustomMonitoringStack.PrometheusConfig.ScrapeInterval != nil && - *instance.Spec.CustomMonitoringStack.PrometheusConfig.ScrapeInterval != monv1.Duration("") { - scrapeInterval = *instance.Spec.CustomMonitoringStack.PrometheusConfig.ScrapeInterval - } else { - scrapeInterval = telemetryv1.DefaultScrapeInterval - } - - serviceMonitor := &monv1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", instance.Name, serverName), - Namespace: instance.Namespace, - Labels: labels, - }, - Spec: monv1.ServiceMonitorSpec{ - Endpoints: []monv1.Endpoint{ - { - Interval: scrapeInterval, - MetricRelabelConfigs: []*monv1.RelabelConfig{ - { - Action: "labeldrop", - Regex: "pod", - SourceLabels: []monv1.LabelName{}, - }, - { - Action: "labeldrop", - Regex: "namespace", - SourceLabels: []monv1.LabelName{}, - }, - { - Action: "labeldrop", - Regex: "instance", - SourceLabels: []monv1.LabelName{}, - }, - { - Action: "labeldrop", - Regex: "job", - SourceLabels: []monv1.LabelName{}, - }, - { - Action: "labeldrop", - Regex: "publisher", - SourceLabels: []monv1.LabelName{}, - }, - }, - }, - }, - Selector: metav1.LabelSelector{ - MatchLabels: selector, - }, - }, - } - if port != "" { - serviceMonitor.Spec.Endpoints[0].Port = port - } - if instance.Spec.PrometheusTLS.Enabled() { - serviceMonitor.Spec.Endpoints[0].Scheme = "https" - serviceMonitor.Spec.Endpoints[0].TLSConfig = &monv1.TLSConfig{ - CAFile: fmt.Sprintf("/etc/prometheus/secrets/%s/%s", instance.Spec.PrometheusTLS.CaBundleSecretName, tls.CABundleKey), - SafeTLSConfig: monv1.SafeTLSConfig{ - ServerName: serverName, - }, - } - } - return serviceMonitor -} diff --git a/tests/kuttl/suites/default/tests/01-assert.yaml b/tests/kuttl/suites/default/tests/01-assert.yaml index 84467974..7c1620b3 100644 --- a/tests/kuttl/suites/default/tests/01-assert.yaml +++ b/tests/kuttl/suites/default/tests/01-assert.yaml @@ -103,8 +103,6 @@ status: conditions: - type: Ready status: "True" - - type: CeilometerServiceMonitorReady - status: "True" - type: DashboardDatasourceReady status: "True" - type: DashboardDefinitionReady @@ -115,10 +113,10 @@ status: status: "True" - type: MonitoringStackReady status: "True" - - type: NodeExporterScrapeConfigReady - status: "True" - type: PrometheusReady status: "True" + - type: ScrapeConfigReady + status: "True" - type: TLSInputReady status: "True" --- diff --git a/tests/kuttl/suites/metricstorage/tests/01-assert.yaml b/tests/kuttl/suites/metricstorage/tests/01-assert.yaml index 8444f2d1..891277ea 100644 --- a/tests/kuttl/suites/metricstorage/tests/01-assert.yaml +++ b/tests/kuttl/suites/metricstorage/tests/01-assert.yaml @@ -30,68 +30,53 @@ spec: protocol: TCP targetPort: 9090 --- -apiVersion: monitoring.rhobs/v1 -kind: ServiceMonitor +apiVersion: monitoring.rhobs/v1alpha1 +kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl-ceilometer-internal.telemetry-kuttl-tests.svc + name: telemetry-ceilometer ownerReferences: - kind: MetricStorage name: telemetry-kuttl spec: - endpoints: - - interval: 30s - metricRelabelings: - - action: labeldrop - regex: pod - - action: labeldrop - regex: namespace - - action: labeldrop - regex: instance - - action: labeldrop - regex: job - - action: labeldrop - regex: publisher - namespaceSelector: {} - selector: - matchLabels: - service: ceilometer + scrapeInterval: 30s + metricRelabelings: + - action: labeldrop + regex: pod + - action: labeldrop + regex: namespace + - action: labeldrop + regex: job + - action: labeldrop + regex: publisher --- -apiVersion: monitoring.rhobs/v1 -kind: ServiceMonitor +apiVersion: monitoring.rhobs/v1alpha1 +kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl-rabbitmq.telemetry-kuttl-tests.svc + name: telemetry-rabbitmq ownerReferences: - kind: MetricStorage name: telemetry-kuttl spec: - endpoints: - - interval: 30s - metricRelabelings: - - action: labeldrop - regex: pod - - action: labeldrop - regex: namespace - - action: labeldrop - regex: instance - - action: labeldrop - regex: job - - action: labeldrop - regex: publisher - namespaceSelector: {} - selector: - matchLabels: - app.kubernetes.io/name: rabbitmq + scrapeInterval: 30s + metricRelabelings: + - action: labeldrop + regex: pod + - action: labeldrop + regex: namespace + - action: labeldrop + regex: job + - action: labeldrop --- apiVersion: monitoring.rhobs/v1alpha1 kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl + name: telemetry ownerReferences: - kind: MetricStorage name: telemetry-kuttl diff --git a/tests/kuttl/suites/metricstorage/tests/04-assert.yaml b/tests/kuttl/suites/metricstorage/tests/04-assert.yaml index f26c0e08..d44d5a92 100644 --- a/tests/kuttl/suites/metricstorage/tests/04-assert.yaml +++ b/tests/kuttl/suites/metricstorage/tests/04-assert.yaml @@ -41,68 +41,54 @@ spec: protocol: TCP targetPort: 9090 --- -apiVersion: monitoring.rhobs/v1 -kind: ServiceMonitor +apiVersion: monitoring.rhobs/v1alpha1 +kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl-ceilometer-internal.telemetry-kuttl-tests.svc + name: telemetry-ceilometer ownerReferences: - kind: MetricStorage name: telemetry-kuttl spec: - endpoints: - - interval: 40s - metricRelabelings: - - action: labeldrop - regex: pod - - action: labeldrop - regex: namespace - - action: labeldrop - regex: instance - - action: labeldrop - regex: job - - action: labeldrop - regex: publisher - namespaceSelector: {} - selector: - matchLabels: - service: ceilometer + scrapeInterval: 40s + metricRelabelings: + - action: labeldrop + regex: pod + - action: labeldrop + regex: namespace + - action: labeldrop + regex: job + - action: labeldrop + regex: publisher --- -apiVersion: monitoring.rhobs/v1 -kind: ServiceMonitor +apiVersion: monitoring.rhobs/v1alpha1 +kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl-rabbitmq.telemetry-kuttl-tests.svc + name: telemetry-rabbitmq ownerReferences: - kind: MetricStorage name: telemetry-kuttl spec: - endpoints: - - interval: 40s - metricRelabelings: - - action: labeldrop - regex: pod - - action: labeldrop - regex: namespace - - action: labeldrop - regex: instance - - action: labeldrop - regex: job - - action: labeldrop - regex: publisher - namespaceSelector: {} - selector: - matchLabels: - app.kubernetes.io/name: rabbitmq + scrapeInterval: 40s + metricRelabelings: + - action: labeldrop + regex: pod + - action: labeldrop + regex: namespace + - action: labeldrop + regex: job + - action: labeldrop + regex: publisher --- apiVersion: monitoring.rhobs/v1alpha1 kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl + name: telemetry ownerReferences: - kind: MetricStorage name: telemetry-kuttl diff --git a/tests/kuttl/suites/tls/tests/02-assert.yaml b/tests/kuttl/suites/tls/tests/02-assert.yaml index 71c05c5d..fa1a7f45 100644 --- a/tests/kuttl/suites/tls/tests/02-assert.yaml +++ b/tests/kuttl/suites/tls/tests/02-assert.yaml @@ -272,27 +272,29 @@ spec: secrets: - combined-ca-bundle --- -apiVersion: monitoring.rhobs/v1 -kind: ServiceMonitor +apiVersion: monitoring.rhobs/v1alpha1 +kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl-metricstorage-ceilometer-internal.telemetry-kuttl-tests.svc + name: telemetry-ceilometer ownerReferences: - kind: MetricStorage name: telemetry-kuttl-metricstorage spec: - endpoints: - - scheme: https - tlsConfig: - caFile: /etc/prometheus/secrets/combined-ca-bundle/tls-ca-bundle.pem + scheme: HTTPS + tlsConfig: + ca: + secret: + key: tls-ca-bundle.pem + name: combined-ca-bundle --- apiVersion: monitoring.rhobs/v1alpha1 kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl-metricstorage-tls + name: telemetry-tls ownerReferences: - kind: MetricStorage name: telemetry-kuttl-metricstorage @@ -309,7 +311,7 @@ kind: ScrapeConfig metadata: labels: service: metricStorage - name: telemetry-kuttl-metricstorage + name: telemetry ownerReferences: - kind: MetricStorage name: telemetry-kuttl-metricstorage From 5eb13cf813573e330a12b4bc98596f6ee8f32ade Mon Sep 17 00:00:00 2001 From: Jaromir Wysoglad Date: Mon, 15 Jul 2024 10:43:46 -0400 Subject: [PATCH 2/2] Fix ScrapeConfigs for CustomMonitoringStack --- pkg/metricstorage/scrape_config.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/metricstorage/scrape_config.go b/pkg/metricstorage/scrape_config.go index f2c1362c..e80d4ad9 100644 --- a/pkg/metricstorage/scrape_config.go +++ b/pkg/metricstorage/scrape_config.go @@ -37,7 +37,10 @@ func ScrapeConfig( var scrapeInterval monv1.Duration if instance.Spec.MonitoringStack != nil && instance.Spec.MonitoringStack.ScrapeInterval != "" { scrapeInterval = monv1.Duration(instance.Spec.MonitoringStack.ScrapeInterval) - } else if instance.Spec.CustomMonitoringStack != nil && *instance.Spec.CustomMonitoringStack.PrometheusConfig.ScrapeInterval != monv1.Duration("") { + } else if instance.Spec.CustomMonitoringStack != nil && + instance.Spec.CustomMonitoringStack.PrometheusConfig != nil && + instance.Spec.CustomMonitoringStack.PrometheusConfig.ScrapeInterval != nil && + *instance.Spec.CustomMonitoringStack.PrometheusConfig.ScrapeInterval != monv1.Duration("") { scrapeInterval = *instance.Spec.CustomMonitoringStack.PrometheusConfig.ScrapeInterval } else { scrapeInterval = telemetryv1.DefaultScrapeInterval