Skip to content

Commit

Permalink
[OSPRH-8406] Switch to using ScrapeConfigs
Browse files Browse the repository at this point in the history
We recently discovered issues with authentication, IPv6
and ServiceMonitors in STF. This PR is proactively switching
to use ScrapeConfigs instead of ServiceMonitors. The
functinality should be equivalent to before. Old ServiceMonitors
owned by the MetricStorage controller are deleted.

There is a slight difference in the labels associated with
the collected metrics.
 - The Node Exporter metrics are now missing the "job" label,
   which didn't seem useful and it follows how ceilometer
   and rabbit metrics are collected.
 - Ceilometer and RabbitMQ metrics don't have the "service"
   label anymore, because ScrapeConfigs don't have the information
   to create that label. Instead they now have the "instance"
   label.

The "instance" label is now used to differentiate between different
Rabbit clusters in dashboards instead of the "service" label.

I used this opportunity to move the ScrapeConfig creation code
into its own function, following the example of dashboard code.
  • Loading branch information
vyzigold committed Jul 15, 2024
1 parent 87722b7 commit 5331c64
Show file tree
Hide file tree
Showing 11 changed files with 309 additions and 396 deletions.
16 changes: 2 additions & 14 deletions api/v1beta1/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,8 @@ const (
// MonitoringStackReadyCondition Status=True condition which indicates if the MonitoringStack is configured and operational
MonitoringStackReadyCondition condition.Type = "MonitoringStackReady"

// ServiceMonitorReadyCondition Status=True condition which indicates if the Ceilometer ServiceMonitor is configured and operational
ServiceMonitorReadyCondition condition.Type = "CeilometerServiceMonitorReady"

// ScrapeConfigReadyCondition Status=True condition which indicates if the Node Exporter ScrapeConfig is configured and operational
ScrapeConfigReadyCondition condition.Type = "NodeExporterScrapeConfigReady"
// ScrapeConfigReadyCondition Status=True condition which indicates if the ScrapeConfig is configured and operational
ScrapeConfigReadyCondition condition.Type = "ScrapeConfigReady"

// PrometheusReadyCondition Status=True condition which indicates if the Prometheus watch is operational
PrometheusReadyCondition condition.Type = "PrometheusReady"
Expand Down Expand Up @@ -140,15 +137,6 @@ const (
// MonitoringStackReadyMisconfiguredMessage
MonitoringStackReadyMisconfiguredMessage = "MonitoringStack isn't configured properly: %s"

//
// ServiceMonitorReady condition messages
//
// ServiceMonitorReadyInitMessage
ServiceMonitorReadyInitMessage = "ServiceMonitor not started"

// ServiceMonitorUnableToOwnMessage
ServiceMonitorUnableToOwnMessage = "Error occured when trying to own %s"

//
// ScrapeConfigReady condition messages
//
Expand Down
4 changes: 0 additions & 4 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -236,13 +236,9 @@ rules:
resources:
- servicemonitors
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- network.openstack.org
resources:
Expand Down
313 changes: 166 additions & 147 deletions controllers/metricstorage_controller.go

Large diffs are not rendered by default.

78 changes: 39 additions & 39 deletions pkg/dashboards/openstack-rabbitmq.go

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions pkg/metricstorage/const.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metricstorage

const (
RabbitMQPrometheusPort = 15691
)
28 changes: 26 additions & 2 deletions pkg/metricstorage/scrape_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ limitations under the License.
package metricstorage

import (
"fmt"
"sort"

tls "github.com/openstack-k8s-operators/lib-common/modules/common/tls"
telemetryv1 "github.com/openstack-k8s-operators/telemetry-operator/api/v1beta1"
Expand All @@ -42,17 +42,42 @@ func ScrapeConfig(
} else {
scrapeInterval = telemetryv1.DefaultScrapeInterval
}

sort.Strings(targets)
var convertedTargets []monv1alpha1.Target
for _, t := range targets {
convertedTargets = append(convertedTargets, monv1alpha1.Target(t))
}

scrapeConfig := &monv1alpha1.ScrapeConfig{
ObjectMeta: metav1.ObjectMeta{
Name: instance.Name,
Namespace: instance.Namespace,
Labels: labels,
},
Spec: monv1alpha1.ScrapeConfigSpec{
MetricRelabelConfigs: []*monv1.RelabelConfig{
{
Action: "labeldrop",
Regex: "pod",
SourceLabels: []monv1.LabelName{},
},
{
Action: "labeldrop",
Regex: "namespace",
SourceLabels: []monv1.LabelName{},
},
{
Action: "labeldrop",
Regex: "job",
SourceLabels: []monv1.LabelName{},
},
{
Action: "labeldrop",
Regex: "publisher",
SourceLabels: []monv1.LabelName{},
},
},
ScrapeInterval: &scrapeInterval,
StaticConfigs: []monv1alpha1.StaticConfig{
{
Expand All @@ -76,7 +101,6 @@ func ScrapeConfig(
scheme := "HTTPS"
scrapeConfig.Spec.Scheme = &scheme
scrapeConfig.Spec.TLSConfig = &tlsConfig
scrapeConfig.ObjectMeta.Name = fmt.Sprintf("%s-tls", instance.Name)
}

return scrapeConfig
Expand Down
105 changes: 0 additions & 105 deletions pkg/metricstorage/service_monitor.go

This file was deleted.

4 changes: 1 addition & 3 deletions tests/kuttl/suites/default/tests/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@ status:
conditions:
- type: Ready
status: "True"
- type: CeilometerServiceMonitorReady
status: "True"
- type: DashboardDatasourceReady
status: "True"
- type: DashboardDefinitionReady
Expand All @@ -115,7 +113,7 @@ status:
status: "True"
- type: MonitoringStackReady
status: "True"
- type: NodeExporterScrapeConfigReady
- type: ScrapeConfigReady
status: "True"
- type: PrometheusReady
status: "True"
Expand Down
61 changes: 23 additions & 38 deletions tests/kuttl/suites/metricstorage/tests/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,60 +31,45 @@ spec:
targetPort: 9090
---
apiVersion: monitoring.rhobs/v1
kind: ServiceMonitor
kind: ScrapeConfig
metadata:
labels:
service: metricStorage
name: telemetry-kuttl-ceilometer-internal.telemetry-kuttl-tests.svc
name: telemetry-kuttl-ceilometer
ownerReferences:
- kind: MetricStorage
name: telemetry-kuttl
spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: labeldrop
regex: pod
- action: labeldrop
regex: namespace
- action: labeldrop
regex: instance
- action: labeldrop
regex: job
- action: labeldrop
regex: publisher
namespaceSelector: {}
selector:
matchLabels:
service: ceilometer
scrapeInterval: 30s
metricRelabelings:
- action: labeldrop
regex: pod
- action: labeldrop
regex: namespace
- action: labeldrop
regex: job
- action: labeldrop
regex: publisher
---
apiVersion: monitoring.rhobs/v1
kind: ServiceMonitor
kind: ScrapeConfig
metadata:
labels:
service: metricStorage
name: telemetry-kuttl-rabbitmq.telemetry-kuttl-tests.svc
name: telemetry-kuttl-rabbitmq
ownerReferences:
- kind: MetricStorage
name: telemetry-kuttl
spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: labeldrop
regex: pod
- action: labeldrop
regex: namespace
- action: labeldrop
regex: instance
- action: labeldrop
regex: job
- action: labeldrop
regex: publisher
namespaceSelector: {}
selector:
matchLabels:
app.kubernetes.io/name: rabbitmq
scrapeInterval: 30s
metricRelabelings:
- action: labeldrop
regex: pod
- action: labeldrop
regex: namespace
- action: labeldrop
regex: job
- action: labeldrop
---
apiVersion: monitoring.rhobs/v1alpha1
kind: ScrapeConfig
Expand Down
Loading

0 comments on commit 5331c64

Please sign in to comment.