diff --git a/.github/workflows/verify-dashboards-alerts.yaml b/.github/workflows/verify-dashboards-alerts.yaml new file mode 100644 index 000000000..47a6b868e --- /dev/null +++ b/.github/workflows/verify-dashboards-alerts.yaml @@ -0,0 +1,122 @@ +name: Verify Dashboards and Alerts OK + +on: + push: + branches: main + paths: + # Dashboards + - examples/dashboards/app_developer.json + - examples/dashboards/business_user.json + - examples/dashboards/platform_engineer.json + # Alerts + - examples/alerts/prometheusrules_policies_missing.yaml + - examples/alerts/slo-availability.yaml + - examples/alerts/slo-latency.yaml +jobs: + verify-dashboards-alerts: + name: Verify Dashboards and Alerts OK + runs-on: ubuntu-latest + defaults: + run: + shell: bash -eo pipefail {0} + steps: + - uses: actions/checkout@v2 + + - name: Set up golang + run: | + sudo apt-get update -y + sudo apt-get install -y golang + + - name: Deploy observability stack (Grafana and Prometheus) + run: | + kind create cluster + # Install Istio + kubectl apply -k config/dependencies/istio/sail + kubectl -n istio-system wait --for=condition=Available deployment istio-operator --timeout=300s + kubectl apply -f config/dependencies/istio/sail/istio.yaml + + # Install Observability Stack (Grafana and Prometheus) + kubectl kustomize config/observability/ | docker run --rm -i ryane/kfilt -i kind=CustomResourceDefinition | kubectl apply --server-side -f - + kubectl kustomize config/observability/ | docker run --rm -i ryane/kfilt -x kind=CustomResourceDefinition | kubectl apply -f - + kubectl kustomize examples/dashboards/ | kubectl apply --server-side -f - + kubectl kustomize examples/alerts/ | kubectl apply --server-side -f - + + - name: Port forward grafana + run: | + # Port forward Grafana + kubectl -n monitoring wait --for=condition=available deployment grafana --timeout=600s + kubectl -n monitoring port-forward service/grafana 3000:3000 & + echo "Successfully port forwarded Grafana service." + + - name: Port forward Prometheus. + run: | + kubectl -n monitoring wait --for=condition=ready pod prometheus-k8s-0 --timeout=600s + # Port forward Prometheus + kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 & + echo "Successfully port forwarded Prometheus service." + + - name: Check if Grafana contains dashboards. + run: | + # Make API Call and save response to variable. + grafana_api_call=$(curl -u admin:admin http://127.0.0.1:3000/api/search) + + # Compare the content in json file with field containing dashboard names + + app_developer=$(jq -r '.panels[1].title' examples/dashboards/app_developer.json) + business_user=$(jq -r '.panels[1].title' examples/dashboards/business_user.json) + platform_engineer=$(jq -r '.panels[1].title' examples/dashboards/platform_engineer.json) + + declare -a missing_dashboards=() + + if [[ "$grafana_api_call" != *"$app_developer"* ]]; then + echo "Grafana does not have $app_developer dashboard." + missing_dashboards+=("$app_developer") + fi + if [[ "$grafana_api_call" != *"$business_user"* ]]; then + echo "Grafana does not have $business_user dashboard." + missing_dashboards+=("$business_user") + fi + if [[ "$grafana_api_call" != *"$platform_engineer"* ]]; then + echo "Grafana does not have $platform_engineer dashboard." + missing_dashboards+=("$platform_engineer") + fi + + if [[ ${#missing_dashboards[@]} -gt 0 ]]; then + echo "Grafana is missing the following dashboards:" + printf '%s\n' "${missing_dashboards[@]}" + echo "Exiting..." + exit 1 + fi + + echo "Grafana contains dashboards $app_developer, $business_user and $platform_engineer. Continuing to Prometheus..." + + - name: Check if Prometheus contains alert rules. + run: | + # Make API Call and save response to variable + prometheus_api_call=$(curl http://localhost:9090/api/v1/rules) + + # Compare the content in json file with field containing dashboard names. + + readarray -t prometheusrules_policies_missing_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/prometheusrules_policies_missing.yaml) + readarray -t slo_availability_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-availability.yaml) + readarray -t slo_latency_alerts < <(yq e '.spec.groups[].rules[].alert' examples/alerts/slo-latency.yaml) + + combined_alerts=("${prometheusrules_policies_missing_alerts[@]}" "${slo_availability_alerts[@]}" "${slo_latency_alerts[@]}") + + declare -a missing_alerts=() + + for alert in "${combined_alerts[@]}"; do + if [[ "$prometheus_api_call" != *"$alert"* && "$alert" != "null" ]]; then + echo "Prometheus does not have $alert rule." + missing_alerts+=("$alert") + fi + done + + if [[ ${#missing_alerts[@]} -gt 0 ]]; then + echo "Prometheus is missing the following alerts:" + printf '%s\n' "${missing_alerts[@]}" + echo "Exiting..." + exit 1 + fi + + echo "Prometheus has all alert rules." \ No newline at end of file