From 9961618c02ef1856e75b0be0e4d1e31842bb52fe Mon Sep 17 00:00:00 2001 From: Ayoub Nasr Date: Tue, 22 Aug 2023 18:23:05 +0200 Subject: [PATCH] fixes --- CHANGELOG.md | 4 ++-- .../deployed/kube-alerts-rules.sls | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f6100968d..25a86a6387 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,9 @@ ## Release 125.0.6 (In development) -### Additions +### Enhancements -- Add alert configuration for KubeJobNotCompleted alert +- Make `KubeJobNotCompleted` alert time configurable (PR[4128](https://github.com/scality/metalk8s/pull/4128)) ## Release 125.0.5 diff --git a/salt/metalk8s/addons/prometheus-operator/deployed/kube-alerts-rules.sls b/salt/metalk8s/addons/prometheus-operator/deployed/kube-alerts-rules.sls index 840c451305..811e93724e 100644 --- a/salt/metalk8s/addons/prometheus-operator/deployed/kube-alerts-rules.sls +++ b/salt/metalk8s/addons/prometheus-operator/deployed/kube-alerts-rules.sls @@ -1,3 +1,5 @@ +#!jinja | metalk8s_kubernetes + {%- from "metalk8s/repo/macro.sls" import build_image_name with context %} {%- set prometheus_defaults = salt.slsutil.renderer( 'salt://metalk8s/addons/prometheus-operator/config/prometheus.yaml', @@ -11,6 +13,15 @@ {%- set rules = prometheus.get('spec', {}).get('rules', {}) %} {%- raw %} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/part-of: metalk8s + metalk8s.scality.com/monitor: '' + release: prometheus-operator + name: metalk8s-kube-apps.rules + namespace: metalk8s-monitoring spec: groups: - name: kubernetes-apps @@ -18,13 +29,13 @@ spec: - alert: KubeJobNotCompleted annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking - more than {% endraw %} rules.kube_apps.kube_job_not_completed.warning.hours {% raw %} hours to complete. + more than {% endraw %} {{ rules.kube_apps.kube_job_not_completed.warning.hours }} {% raw %} hours to complete. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted summary: Job did not complete in time expr: |- time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"} and - kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > {% endraw %} rules.kube_apps.kube_job_not_completed.warning.hours {% raw %}*60*60 + kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > {% endraw %} {{ rules.kube_apps.kube_job_not_completed.warning.hours }} {% raw %}*60*60 labels: severity: warning {%- endraw %} \ No newline at end of file