diff --git a/tests/e2e-openshift/monitoring/00-assert.yaml b/tests/e2e-openshift/monitoring/00-assert.yaml new file mode 100644 index 0000000000..f9cbcf22cc --- /dev/null +++ b/tests/e2e-openshift/monitoring/00-assert.yaml @@ -0,0 +1,4 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +- script: ./tests/e2e-openshift/monitoring/check_user_workload_monitoring.sh diff --git a/tests/e2e-openshift/monitoring/00-workload-monitoring.yaml b/tests/e2e-openshift/monitoring/00-workload-monitoring.yaml new file mode 100644 index 0000000000..af526ecbe1 --- /dev/null +++ b/tests/e2e-openshift/monitoring/00-workload-monitoring.yaml @@ -0,0 +1,13 @@ +# oc -n openshift-user-workload-monitoring get pod +# https://docs.openshift.com/container-platform/4.13/monitoring/enabling-monitoring-for-user-defined-projects.html#accessing-metrics-from-outside-cluster_enabling-monitoring-for-user-defined-projects + +apiVersion: v1 +kind: ConfigMap +metadata: + name: cluster-monitoring-config + namespace: openshift-monitoring +data: + config.yaml: | + enableUserWorkload: true + alertmanagerMain: + enableUserAlertmanagerConfig: true diff --git a/tests/e2e-openshift/monitoring/01-assert.yaml b/tests/e2e-openshift/monitoring/01-assert.yaml new file mode 100644 index 0000000000..170c0f4148 --- /dev/null +++ b/tests/e2e-openshift/monitoring/01-assert.yaml @@ -0,0 +1,111 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + prometheus.io/path: /metrics + prometheus.io/port: "8888" + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: cluster-collector-collector + app.kubernetes.io/part-of: opentelemetry + name: cluster-collector-collector +status: + availableReplicas: 1 + readyReplicas: 1 + replicas: 1 + +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: cluster-collector-collector + name: cluster-collector-collector +spec: + endpoints: + - port: monitoring + selector: + matchLabels: + app.kubernetes.io/managed-by: opentelemetry-operator + +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: cluster-collector-collector + app.kubernetes.io/part-of: opentelemetry + name: cluster-collector-collector +spec: + ports: + - appProtocol: grpc + name: otlp-grpc + port: 4317 + protocol: TCP + targetPort: 4317 + - appProtocol: http + name: otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 + selector: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + type: ClusterIP + +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: cluster-collector-collector + app.kubernetes.io/part-of: opentelemetry + operator.opentelemetry.io/collector-headless-service: Exists + name: cluster-collector-collector-headless +spec: + ports: + - appProtocol: grpc + name: otlp-grpc + port: 4317 + protocol: TCP + targetPort: 4317 + - appProtocol: http + name: otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 + selector: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + type: ClusterIP + +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: cluster-collector-collector-monitoring + app.kubernetes.io/part-of: opentelemetry + name: cluster-collector-collector-monitoring +spec: + ports: + - name: monitoring + port: 8888 + protocol: TCP + targetPort: 8888 + selector: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + type: ClusterIP diff --git a/tests/e2e-openshift/monitoring/01-otel-collector.yaml b/tests/e2e-openshift/monitoring/01-otel-collector.yaml new file mode 100644 index 0000000000..8420879089 --- /dev/null +++ b/tests/e2e-openshift/monitoring/01-otel-collector.yaml @@ -0,0 +1,24 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: cluster-collector +spec: + mode: deployment + observability: + metrics: + enableMetrics: true + config: | + receivers: + otlp: + protocols: + grpc: + http: + processors: + exporters: + debug: + service: + pipelines: + traces: + receivers: [otlp] + processors: [] + exporters: [debug] diff --git a/tests/e2e-openshift/monitoring/02-assert.yaml b/tests/e2e-openshift/monitoring/02-assert.yaml new file mode 100644 index 0000000000..701adbdf76 --- /dev/null +++ b/tests/e2e-openshift/monitoring/02-assert.yaml @@ -0,0 +1,6 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: telemetrygen-traces +status: + active: 1 diff --git a/tests/e2e-openshift/monitoring/02-generate-traces.yaml b/tests/e2e-openshift/monitoring/02-generate-traces.yaml new file mode 100644 index 0000000000..ac16943d4b --- /dev/null +++ b/tests/e2e-openshift/monitoring/02-generate-traces.yaml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: telemetrygen-traces +spec: + completions: 1 + parallelism: 1 + template: + metadata: + labels: + app: telemetrygen-traces + spec: + containers: + - name: telemetrygen-traces + image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/telemetrygen:latest + command: ["./telemetrygen"] + args: + - "--otlp-endpoint=cluster-collector-collector-headless:4317" + - "--otlp-insecure=true" + - "--rate=1" + - "--duration=5s" + - "--otlp-attributes=telemetrygen=\"traces\"" + - "--otlp-header=telemetrygen=\"traces\"" + - "traces" + restartPolicy: Never diff --git a/tests/e2e-openshift/monitoring/03-assert.yaml b/tests/e2e-openshift/monitoring/03-assert.yaml new file mode 100644 index 0000000000..46f97913db --- /dev/null +++ b/tests/e2e-openshift/monitoring/03-assert.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +- script: ./tests/e2e-openshift/monitoring/check_metrics.sh diff --git a/tests/e2e-openshift/monitoring/check_metrics.sh b/tests/e2e-openshift/monitoring/check_metrics.sh new file mode 100755 index 0000000000..fac40fdda0 --- /dev/null +++ b/tests/e2e-openshift/monitoring/check_metrics.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +SECRET=$(oc get secret -n openshift-user-workload-monitoring | grep prometheus-user-workload-token | head -n 1 | awk '{print $1}') +TOKEN=$(echo $(oc get secret $SECRET -n openshift-user-workload-monitoring -o json | jq -r '.data.token') | base64 -d) +THANOS_QUERIER_HOST=$(oc get route thanos-querier -n openshift-monitoring -o json | jq -r '.spec.host') + +#Check metrics used in the prometheus rules created for TempoStack. Refer issue https://issues.redhat.com/browse/TRACING-3399 for skipped metrics. +metrics="otelcol_exporter_enqueue_failed_spans otelcol_exporter_sent_spans otelcol_process_cpu_seconds otelcol_process_memory_rss otelcol_process_runtime_heap_alloc_bytes otelcol_process_runtime_total_alloc_bytes otelcol_process_runtime_total_sys_memory_bytes otelcol_process_uptime otelcol_receiver_accepted_spans otelcol_receiver_refused_spans" + +for metric in $metrics; do + query="$metric" + + response=$(curl -k -H "Authorization: Bearer $TOKEN" -H "Content-type: application/json" "https://$THANOS_QUERIER_HOST/api/v1/query?query=$query") + + count=$(echo "$response" | jq -r '.data.result | length') + + if [[ $count -eq 0 ]]; then + echo "No metric '$metric' with value present. Exiting with status 1." + exit 1 + else + echo "Metric '$metric' with value is present." + fi +done diff --git a/tests/e2e-openshift/monitoring/check_user_workload_monitoring.sh b/tests/e2e-openshift/monitoring/check_user_workload_monitoring.sh new file mode 100755 index 0000000000..5ecaebe3df --- /dev/null +++ b/tests/e2e-openshift/monitoring/check_user_workload_monitoring.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -e + +check_replicas() { + replicas=$(oc get $1 $2 -n openshift-user-workload-monitoring -o 'jsonpath={.status.availableReplicas} {.status.readyReplicas} {.status.replicas}') + for count in $replicas; do + if [[ $count =~ ^[0-9]+$ ]]; then + if ((count < 1)); then + echo "The number of replicas is 0 for $1 $2" + exit 1 + fi + else + echo "Error: Replica count is not a valid number for $1 $2" + exit 1 + fi + done +} + +check_replicas deployment prometheus-operator +check_replicas statefulset prometheus-user-workload +check_replicas statefulset thanos-ruler-user-workload