Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Chore] Add e2e test case for OpenTelemetry collector instance monitoring. #2246

Merged
merged 3 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/e2e-openshift/monitoring/00-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- script: ./tests/e2e-openshift/monitoring/check_user_workload_monitoring.sh
13 changes: 13 additions & 0 deletions tests/e2e-openshift/monitoring/00-workload-monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# oc -n openshift-user-workload-monitoring get pod
# https://docs.openshift.com/container-platform/4.13/monitoring/enabling-monitoring-for-user-defined-projects.html#accessing-metrics-from-outside-cluster_enabling-monitoring-for-user-defined-projects

apiVersion: v1
kind: ConfigMap
metadata:
name: cluster-monitoring-config
namespace: openshift-monitoring
data:
config.yaml: |
enableUserWorkload: true
alertmanagerMain:
enableUserAlertmanagerConfig: true
111 changes: 111 additions & 0 deletions tests/e2e-openshift/monitoring/01-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
prometheus.io/path: /metrics
prometheus.io/port: "8888"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/name: cluster-collector-collector
app.kubernetes.io/part-of: opentelemetry
name: cluster-collector-collector
status:
availableReplicas: 1
readyReplicas: 1
replicas: 1

---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/name: cluster-collector-collector
name: cluster-collector-collector
spec:
endpoints:
- port: monitoring
selector:
matchLabels:
app.kubernetes.io/managed-by: opentelemetry-operator

---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/name: cluster-collector-collector
app.kubernetes.io/part-of: opentelemetry
name: cluster-collector-collector
spec:
ports:
- appProtocol: grpc
name: otlp-grpc
port: 4317
protocol: TCP
targetPort: 4317
- appProtocol: http
name: otlp-http
port: 4318
protocol: TCP
targetPort: 4318
selector:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/part-of: opentelemetry
type: ClusterIP

---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/name: cluster-collector-collector
app.kubernetes.io/part-of: opentelemetry
operator.opentelemetry.io/collector-headless-service: Exists
name: cluster-collector-collector-headless
spec:
ports:
- appProtocol: grpc
name: otlp-grpc
port: 4317
protocol: TCP
targetPort: 4317
- appProtocol: http
name: otlp-http
port: 4318
protocol: TCP
targetPort: 4318
selector:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/part-of: opentelemetry
type: ClusterIP

---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/name: cluster-collector-collector-monitoring
app.kubernetes.io/part-of: opentelemetry
name: cluster-collector-collector-monitoring
spec:
ports:
- name: monitoring
port: 8888
protocol: TCP
targetPort: 8888
selector:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/part-of: opentelemetry
type: ClusterIP
24 changes: 24 additions & 0 deletions tests/e2e-openshift/monitoring/01-otel-collector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
metadata:
name: cluster-collector
spec:
mode: deployment
observability:
metrics:
enableMetrics: true
config: |
receivers:
otlp:
protocols:
grpc:
http:
processors:
exporters:
debug:
service:
pipelines:
traces:
receivers: [otlp]
processors: []
exporters: [debug]
6 changes: 6 additions & 0 deletions tests/e2e-openshift/monitoring/02-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: batch/v1
kind: Job
metadata:
name: telemetrygen-traces
status:
active: 1
25 changes: 25 additions & 0 deletions tests/e2e-openshift/monitoring/02-generate-traces.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: batch/v1
kind: Job
metadata:
name: telemetrygen-traces
spec:
completions: 1
parallelism: 1
template:
metadata:
labels:
app: telemetrygen-traces
spec:
containers:
- name: telemetrygen-traces
image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/telemetrygen:latest
command: ["./telemetrygen"]
args:
- "--otlp-endpoint=cluster-collector-collector-headless:4317"
- "--otlp-insecure=true"
- "--rate=1"
- "--duration=5s"
- "--otlp-attributes=telemetrygen=\"traces\""
- "--otlp-header=telemetrygen=\"traces\""
- "traces"
restartPolicy: Never
5 changes: 5 additions & 0 deletions tests/e2e-openshift/monitoring/03-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- script: ./tests/e2e-openshift/monitoring/check_metrics.sh
23 changes: 23 additions & 0 deletions tests/e2e-openshift/monitoring/check_metrics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

SECRET=$(oc get secret -n openshift-user-workload-monitoring | grep prometheus-user-workload-token | head -n 1 | awk '{print $1}')
TOKEN=$(echo $(oc get secret $SECRET -n openshift-user-workload-monitoring -o json | jq -r '.data.token') | base64 -d)
THANOS_QUERIER_HOST=$(oc get route thanos-querier -n openshift-monitoring -o json | jq -r '.spec.host')

#Check metrics used in the prometheus rules created for TempoStack. Refer issue https://issues.redhat.com/browse/TRACING-3399 for skipped metrics.
metrics="otelcol_exporter_enqueue_failed_spans otelcol_exporter_sent_spans otelcol_process_cpu_seconds otelcol_process_memory_rss otelcol_process_runtime_heap_alloc_bytes otelcol_process_runtime_total_alloc_bytes otelcol_process_runtime_total_sys_memory_bytes otelcol_process_uptime otelcol_receiver_accepted_spans otelcol_receiver_refused_spans"

for metric in $metrics; do
query="$metric"

response=$(curl -k -H "Authorization: Bearer $TOKEN" -H "Content-type: application/json" "https://$THANOS_QUERIER_HOST/api/v1/query?query=$query")

count=$(echo "$response" | jq -r '.data.result | length')

if [[ $count -eq 0 ]]; then
echo "No metric '$metric' with value present. Exiting with status 1."
exit 1
else
echo "Metric '$metric' with value is present."
fi
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

set -e

check_replicas() {
replicas=$(oc get $1 $2 -n openshift-user-workload-monitoring -o 'jsonpath={.status.availableReplicas} {.status.readyReplicas} {.status.replicas}')
for count in $replicas; do
if [[ $count =~ ^[0-9]+$ ]]; then
if ((count < 1)); then
echo "The number of replicas is 0 for $1 $2"
exit 1
fi
else
echo "Error: Replica count is not a valid number for $1 $2"
exit 1
fi
done
}

check_replicas deployment prometheus-operator
check_replicas statefulset prometheus-user-workload
check_replicas statefulset thanos-ruler-user-workload
Loading