Skip to content

Commit

Permalink
Create kepler scrape config and dashboard only when power monitoring …
Browse files Browse the repository at this point in the history
…is enabled
  • Loading branch information
yadneshk authored and openshift-cherrypick-robot committed Nov 21, 2024
1 parent df16dea commit 2156b26
Show file tree
Hide file tree
Showing 11 changed files with 334 additions and 56 deletions.
2 changes: 2 additions & 0 deletions api/v1beta1/telemetry_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ const (
PauseBetweenWatchAttempts = time.Duration(60) * time.Second
// DefaultKeplerPort -
DefaultKeplerPort = 8888
// Dataplane power monitoring service name
TelemetryPowerMonitoring = "telemetry-power-monitoring"
)

// PrometheusReplicas -
Expand Down
51 changes: 33 additions & 18 deletions controllers/metricstorage_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ func (r *MetricStorageReconciler) reconcileNormal(
instance.Status.Conditions.MarkTrue(telemetryv1.DashboardDefinitionReadyCondition, telemetryv1.DashboardsNotEnabledMessage)
instance.Status.Conditions.MarkTrue(telemetryv1.DashboardPluginReadyCondition, telemetryv1.DashboardsNotEnabledMessage)
} else {
if res, err := r.createDashboardObjects(ctx, instance, eventHandler); err != nil {
if res, err := r.createDashboardObjects(ctx, instance, helper, eventHandler); err != nil {
return res, err
}
}
Expand Down Expand Up @@ -601,7 +601,7 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
return ctrl.Result{}, err
}

connectionInfo, err := getComputeNodesConnectionInfo(instance, helper)
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper, telemetry.ServiceName)
if err != nil {
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Scrape configs not created. Error: %s", err))
}
Expand All @@ -624,18 +624,26 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
return ctrl.Result{}, err
}

connectionInfo, err = getComputeNodesConnectionInfo(instance, helper, telemetryv1.TelemetryPowerMonitoring)
if err != nil {
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Scrape configs not created. Error: %s", err))
}

// kepler scrape endpoints
keplerEndpoints, _ := getKeplerTargets(connectionInfo)
if err != nil {
Log.Info(fmt.Sprintf("Cannot get Kepler targets. Scrape configs not created. Error: %s", err))
}

// Kepler ScrapeConfig for non-tls nodes
keplerServiceName := fmt.Sprintf("%s-kepler", telemetry.ServiceName)
err = r.createServiceScrapeConfig(ctx, instance, Log, "Kepler",
keplerServiceName, keplerEndpoints, false) // Currently Kepler doesn't support TLS so tlsEnabled is set to false
if err != nil {
return ctrl.Result{}, err
// keplerEndpoint is reported as empty slice when telemetry-power-monitoring service is not enabled
if len(keplerEndpoints) > 0 {
// Kepler ScrapeConfig for non-tls nodes
keplerServiceName := fmt.Sprintf("%s-kepler", telemetry.ServiceName)
err = r.createServiceScrapeConfig(ctx, instance, Log, "Kepler",
keplerServiceName, keplerEndpoints, false) // Currently Kepler doesn't support TLS so tlsEnabled is set to false
if err != nil {
return ctrl.Result{}, err
}
}

instance.Status.Conditions.MarkTrue(telemetryv1.ScrapeConfigReadyCondition, condition.ReadyMessage)
Expand Down Expand Up @@ -673,7 +681,7 @@ func getKeplerTargets(nodes []ConnectionInfo) ([]string, []string) {
return tls, nonTLS
}

func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, eventHandler handler.EventHandler) (ctrl.Result, error) {
func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, helper *helper.Helper, eventHandler handler.EventHandler) (ctrl.Result, error) {
Log := r.GetLogger(ctx)
uiPluginObj := &obsui.UIPlugin{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -766,7 +774,14 @@ func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, in
"grafana-dashboard-openstack-node": dashboards.OpenstackNode(datasourceName),
"grafana-dashboard-openstack-vm": dashboards.OpenstackVM(datasourceName),
"grafana-dashboard-openstack-rabbitmq": dashboards.OpenstackRabbitmq(datasourceName),
"grafana-dashboard-openstack-kepler": dashboards.OpenstackKepler(datasourceName),
}

// atleast one nodeset must have "telemetry-power-monitoring" service enabled for kepler dashboard to be created
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper, telemetryv1.TelemetryPowerMonitoring)
if err != nil {
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Power monitoring dashboard not created. Error: %s", err))
} else if len(connectionInfo) > 0 {
dashboardCMs["grafana-dashboard-openstack-kepler"] = dashboards.OpenstackKepler(datasourceName)
}

for dashboardName, desiredCM := range dashboardCMs {
Expand Down Expand Up @@ -838,6 +853,7 @@ func (r *MetricStorageReconciler) ensureWatches(
func getComputeNodesConnectionInfo(
instance *telemetryv1.MetricStorage,
helper *helper.Helper,
telemetryServiceName string,
) ([]ConnectionInfo, error) {
ipSetList, err := getIPSetList(instance, helper)
if err != nil {
Expand All @@ -855,24 +871,23 @@ func getComputeNodesConnectionInfo(
return []ConnectionInfo{}, err
}
nodeSetGroup := inventory.Groups[secret.Labels["openstackdataplanenodeset"]]
containsTelemetry := false
containsTargetService := false
for _, svc := range nodeSetGroup.Vars["edpm_services"].([]interface{}) {
if svc.(string) == "telemetry" {
containsTelemetry = true
if svc.(string) == telemetryServiceName {
containsTargetService = true
}
}
if !containsTelemetry {
// Telemetry isn't deployed on this nodeset
// there is no reason to include these nodes
// for scraping by prometheus
if !containsTargetService {
// If Telemetry|TelemetryPowerMonitoring isn't
// deployed on this nodeset there is no reason
// to include these nodes for scraping by prometheus
continue
}
for name, item := range nodeSetGroup.Hosts {
namespacedName := &types.NamespacedName{
Name: name,
Namespace: instance.GetNamespace(),
}

if len(ipSetList.Items) > 0 {
// if we have IPSets, lets go to search for the IPs there
address, _ = getAddressFromIPSet(instance, &item, namespacedName, helper)
Expand Down
14 changes: 14 additions & 0 deletions kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,17 @@ testDirs:
- tests/kuttl/suites/tls/
suppress:
- events # Remove spammy event logs
commands:
- script: |
if [ ! -f ansibleee-ssh-key-id_rsa ]; then
ssh-keygen -f ansibleee-ssh-key-id_rsa -N "" -t rsa -b 4096
fi
oc create secret generic dataplane-ansible-ssh-private-key-secret \
--save-config \
--dry-run=client \
--from-file=authorized_keys=ansibleee-ssh-key-id_rsa.pub \
--from-file=ssh-privatekey=ansibleee-ssh-key-id_rsa \
--from-file=ssh-publickey=ansibleee-ssh-key-id_rsa.pub \
-n telemetry-kuttl-tests \
-o yaml | \
oc apply -f -
25 changes: 25 additions & 0 deletions tests/kuttl/suites/metricstorage/deps/dnsmasq.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: network.openstack.org/v1beta1
kind: DNSMasq
metadata:
name: dnsmasq
namespace: telemetry-kuttl-tests
spec:
replicas: 1
options:
- key: server
values:
- 192.168.122.1
- key: local
values:
- '/example.com/'
debug:
service: false
override:
service:
metadata:
annotations:
metallb.universe.tf/address-pool: ctlplane
metallb.universe.tf/allow-shared-ip: ctlplane
metallb.universe.tf/loadBalancerIPs: 192.168.122.80
spec:
type: ClusterIP
119 changes: 119 additions & 0 deletions tests/kuttl/suites/metricstorage/deps/inventory
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
openstack-edpm-ipam:
vars:
edpm_network_config_template: |
---
{% set mtu_list = [ctlplane_mtu] %}
{% for network in nodeset_networks %}
{{ mtu_list.append(lookup('vars', networks_lower[network] ~ '_mtu')) }}
{%- endfor %}
{% set min_viable_mtu = mtu_list | max %}
network_config:
- type: interface
name: nic1
use_dhcp: false
- type: interface
name: nic2
use_dhcp: false
addresses:
- ip_netmask: {{ ctlplane_ip }}/{{ ctlplane_cidr }}
routes:
- default: true
next_hop: {{ ctlplane_gateway_ip }}

- type: linux_bond
name: bond_api
use_dhcp: false
bonding_options: "mode=active-backup"
dns_servers: {{ ctlplane_dns_nameservers }}
members:
- type: interface
name: nic3

- type: vlan
vlan_id: {{ lookup('vars', networks_lower['internalapi'] ~ '_vlan_id') }}
device: bond_api
addresses:
- ip_netmask: {{ lookup('vars', networks_lower['internalapi'] ~ '_ip') }}/{{ lookup('vars', networks_lower['internalapi'] ~ '_cidr') }}

- type: vlan
vlan_id: {{ lookup('vars', networks_lower['storage'] ~ '_vlan_id') }}
device: bond_api
addresses:
- ip_netmask: {{ lookup('vars', networks_lower['storage'] ~ '_ip') }}/{{ lookup('vars', networks_lower['storage'] ~ '_cidr') }}
edpm_service_types:
- bootstrap
- download-cache
- reboot-os
- configure-ovs-dpdk
- configure-network
- validate-network
- install-os
- configure-os
- ssh-known-hosts
- run-os
- install-certs
- ovn
- neutron-ovn
- neutron-metadata
- neutron-sriov
- libvirt
- nova
- telemetry
- telemetry-power-monitoring
edpm_services:
- bootstrap
- download-cache
- reboot-os
- configure-ovs-dpdk
- configure-network
- validate-network
- install-os
- configure-os
- ssh-known-hosts
- run-os
- install-certs
- ovn
- neutron-ovn-igmp
- neutron-metadata
- neutron-sriov
- libvirt
- nova
- telemetry
- telemetry-power-monitoring
edpm_tls_certs_enabled: true
hosts:
edpm-compute-0:
ansible_host: edpm-compute-0
canonical_hostname: edpm-compute-0.ctlplane.example.com
ctlplane_cidr: 24
ctlplane_dns_nameservers:
- 192.168.122.80
ctlplane_gateway_ip: 192.168.122.1
ctlplane_host_routes:
- destination: 0.0.0.0/0
nexthop: 192.168.122.1
ctlplane_ip: 192.168.122.100
ctlplane_mtu: 1500
dns_search_domains:
- ctlplane.example.com
- internalapi.example.com
- storage.example.com
- tenant.example.com
internalapi_cidr: 24
internalapi_gateway_ip: null
internalapi_host_routes: []
internalapi_ip: 172.17.0.100
internalapi_mtu: 1496
internalapi_vlan_id: 52
storage_cidr: 24
storage_gateway_ip: null
storage_host_routes: []
storage_ip: 172.18.0.100
storage_mtu: 1496
storage_vlan_id: 53
tenant_cidr: 24
tenant_gateway_ip: null
tenant_host_routes: []
tenant_ip: 172.19.0.100
tenant_mtu: 1496
tenant_vlan_id: 54
19 changes: 0 additions & 19 deletions tests/kuttl/suites/metricstorage/tests/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,19 +95,6 @@ metadata:
spec:
scrapeInterval: 30s
---
apiVersion: monitoring.rhobs/v1alpha1
kind: ScrapeConfig
metadata:
labels:
service: metricStorage
name: telemetry-kepler
ownerReferences:
- kind: MetricStorage
name: telemetry-kuttl
spec:
staticConfigs:
- {}
---
apiVersion: observability.openshift.io/v1alpha1
kind: UIPlugin
metadata:
Expand Down Expand Up @@ -159,9 +146,3 @@ kind: ConfigMap
metadata:
name: grafana-dashboard-openstack-rabbitmq
namespace: openshift-config-managed
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-openstack-kepler
namespace: openshift-config-managed
6 changes: 0 additions & 6 deletions tests/kuttl/suites/metricstorage/tests/02-errors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,3 @@ kind: ConfigMap
metadata:
name: grafana-dashboard-openstack-rabbitmq
namespace: openshift-config-managed
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-openstack-kepler
namespace: openshift-config-managed
13 changes: 0 additions & 13 deletions tests/kuttl/suites/metricstorage/tests/04-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,3 @@ metadata:
name: telemetry-kuttl
spec:
scrapeInterval: 40s
---
apiVersion: monitoring.rhobs/v1alpha1
kind: ScrapeConfig
metadata:
labels:
service: metricStorage
name: telemetry-kepler
ownerReferences:
- kind: MetricStorage
name: telemetry-kuttl
spec:
staticConfigs:
- {}
Loading

0 comments on commit 2156b26

Please sign in to comment.