Skip to content

Commit

Permalink
BB-311 add oplog populator alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerkesni committed Jan 3, 2023
1 parent d1d9e63 commit 525eed9
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 0 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,12 @@ jobs:
alert_inputs: >-
namespace=zenko,job_notification_producer=artesca-data-backbeat-notification-producer-headless,job_notification_processors=artesca-data-backbeat-notification-processor,notificationFailuresWarningThreshold=0.1,notificationFailuresCriticalThreshold=0.5,notification_producer_replicas=3,notification_processor_replicas=2
github_token: ${{ secrets.GIT_ACCESS_TOKEN }}

- name: Render and test oplog populator
uses: scality/[email protected]
with:
alert_file_path: monitoring/oplog-populator/alerts.yaml
test_file_path: monitoring/oplog-populator/alerts.test.yaml
alert_inputs: >-
namespace=zenko,oplog_populator_job=artesca-data-backbeat-oplog-populator-headless
github_token: ${{ secrets.GIT_ACCESS_TOKEN }}
27 changes: 27 additions & 0 deletions monitoring/oplog-populator/alerts.test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
evaluation_interval: 1m
rule_files:
- alerts.rendered.yaml

tests:

- name: Oplog Populator Replicas
interval: 1m
input_series:
- series: up{namespace="zenko",job="artesca-data-backbeat-oplog-populator-headless",pod="oplog-populator-1"}
values: 1 1 0
alert_rule_test:
- alertname: OplogPopulatorUnavailable
eval_time: 1m
exp_alerts: []
- alertname: OplogPopulatorUnavailable
eval_time: 2m
exp_alerts: []
- alertname: OplogPopulatorUnavailable
eval_time: 3m
exp_alerts:
- exp_labels:
severity: critical
exp_annotations:
zenko_service: backbeat-oplog-populator
description: "Oplog populator pod is down"
summary: "Oplog populator service is in critical state"
22 changes: 22 additions & 0 deletions monitoring/oplog-populator/alerts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
x-inputs:
- name: namespace
type: constant
value: zenko
- name: oplog_populator_job
type: config
value: artesca-data-backbeat-oplog-populator-headless

groups:
- name: Oplog Populator
rules:

- alert: OplogPopulatorUnavailable
Expr: |
sum(up{namespace="${namespace}",job="${oplog_populator_job}"}) < 1
For: "30s"
Labels:
severity: critical
Annotations:
zenko_service: backbeat-oplog-populator
description: "Oplog populator pod is down"
summary: "Oplog populator service is in critical state"

0 comments on commit 525eed9

Please sign in to comment.