diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index b1c6192051..80575ec73e 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -15,7 +15,6 @@ package inhibit import ( "context" - "fmt" "sync" "time" @@ -81,12 +80,7 @@ func (ih *Inhibitor) run(ctx context.Context) { level.Error(ih.logger).Log("msg", "Error iterating alerts", "err", err) continue } - if a.Resolved() { - // As alerts can also time out without an update, we never - // handle new resolved alerts but invalidate the cache on read. - continue - } - // Populate the inhibition rules' cache. + // Update the inhibition rules' cache. for _, r := range ih.rules { if r.SourceMatchers.Match(a.Labels) { r.set(a) @@ -145,7 +139,7 @@ func (ih *Inhibitor) Mutes(lset model.LabelSet) bool { for _, r := range ih.rules { // Only inhibit if target matchers match but source matchers don't. if inhibitedByFP, eq := r.hasEqual(lset); !r.SourceMatchers.Match(lset) && r.TargetMatchers.Match(lset) && eq { - ih.marker.SetInhibited(fp, fmt.Sprintf("%d", inhibitedByFP)) + ih.marker.SetInhibited(fp, inhibitedByFP.String()) return true } } diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 9695c998af..d269c06f23 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -18,13 +18,20 @@ import ( "testing" "time" + "github.com/go-kit/kit/log" "github.com/kylelemons/godebug/pretty" + "github.com/prometheus/common/model" + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/provider" "github.com/prometheus/alertmanager/types" - "github.com/prometheus/common/model" ) +var nopLogger = log.NewNopLogger() + func TestInhibitRuleHasEqual(t *testing.T) { + t.Parallel() + now := time.Now() cases := []struct { initial map[model.Fingerprint]*types.Alert @@ -135,6 +142,8 @@ func TestInhibitRuleHasEqual(t *testing.T) { } func TestInhibitRuleMatches(t *testing.T) { + t.Parallel() + // Simple inhibut rule cr := config.InhibitRule{ SourceMatch: map[string]string{"s": "1"}, @@ -142,7 +151,7 @@ func TestInhibitRuleMatches(t *testing.T) { Equal: model.LabelNames{"e"}, } m := types.NewMarker() - ih := NewInhibitor(nil, []*config.InhibitRule{&cr}, m, nil) + ih := NewInhibitor(nil, []*config.InhibitRule{&cr}, m, nopLogger) ir := ih.rules[0] now := time.Now() // Active alert that matches the source filter @@ -226,3 +235,151 @@ func TestInhibitRuleGC(t *testing.T) { t.Errorf(pretty.Compare(r.scache, after)) } } + +type fakeAlerts struct { + alerts []*types.Alert + finished chan struct{} +} + +func newFakeAlerts(alerts []*types.Alert) *fakeAlerts { + return &fakeAlerts{ + alerts: alerts, + finished: make(chan struct{}), + } +} + +func (f *fakeAlerts) GetPending() provider.AlertIterator { return nil } +func (f *fakeAlerts) Get(model.Fingerprint) (*types.Alert, error) { return nil, nil } +func (f *fakeAlerts) Put(...*types.Alert) error { return nil } +func (f *fakeAlerts) Subscribe() provider.AlertIterator { + ch := make(chan *types.Alert) + done := make(chan struct{}) + go func() { + for _, a := range f.alerts { + ch <- a + } + // Send another (meaningless) alert to make sure that the inhibitor has + // processed everything. + ch <- &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{}, + StartsAt: time.Now(), + }, + } + close(f.finished) + <-done + }() + return provider.NewAlertIterator(ch, done, nil) +} + +func TestInhibit(t *testing.T) { + t.Parallel() + + now := time.Now() + inhibitRule := func() *config.InhibitRule { + return &config.InhibitRule{ + SourceMatch: map[string]string{"s": "1"}, + TargetMatch: map[string]string{"t": "1"}, + Equal: model.LabelNames{"e"}, + } + } + // alertOne is muted by alertTwo when it is active. + alertOne := func() *types.Alert { + return &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"t": "1", "e": "f"}, + StartsAt: now.Add(-time.Minute), + EndsAt: now.Add(time.Hour), + }, + } + } + alertTwo := func(resolved bool) *types.Alert { + var end time.Time + if resolved { + end = now.Add(-time.Second) + } else { + end = now.Add(time.Hour) + } + return &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"s": "1", "e": "f"}, + StartsAt: now.Add(-time.Minute), + EndsAt: end, + }, + } + } + + type exp struct { + lbls model.LabelSet + muted bool + } + for i, tc := range []struct { + alerts []*types.Alert + expected []exp + }{ + { + // alertOne shouldn't be muted since alertTwo hasn't fired. + alerts: []*types.Alert{alertOne()}, + expected: []exp{ + { + lbls: model.LabelSet{"t": "1", "e": "f"}, + muted: false, + }, + }, + }, + { + // alertOne should be muted by alertTwo which is active. + alerts: []*types.Alert{alertOne(), alertTwo(false)}, + expected: []exp{ + { + lbls: model.LabelSet{"t": "1", "e": "f"}, + muted: true, + }, + { + lbls: model.LabelSet{"s": "1", "e": "f"}, + muted: false, + }, + }, + }, + { + // alertOne shouldn't be muted since alertTwo is resolved. + alerts: []*types.Alert{alertOne(), alertTwo(false), alertTwo(true)}, + expected: []exp{ + { + lbls: model.LabelSet{"t": "1", "e": "f"}, + muted: false, + }, + { + lbls: model.LabelSet{"s": "1", "e": "f"}, + muted: false, + }, + }, + }, + } { + ap := newFakeAlerts(tc.alerts) + mk := types.NewMarker() + inhibitor := NewInhibitor(ap, []*config.InhibitRule{inhibitRule()}, mk, nopLogger) + + go func() { + for ap.finished != nil { + select { + case <-ap.finished: + ap.finished = nil + default: + } + } + inhibitor.Stop() + }() + inhibitor.Run() + + for _, expected := range tc.expected { + if inhibitor.Mutes(expected.lbls) != expected.muted { + mute := "unmuted" + if expected.muted { + mute = "muted" + } + t.Errorf("tc: %d, expected alert with labels %q to be %s", i, expected.lbls, mute) + } + } + } +} diff --git a/test/acceptance/inhibit_test.go b/test/acceptance/inhibit_test.go index 1e8cacdf5e..43d317c67c 100644 --- a/test/acceptance/inhibit_test.go +++ b/test/acceptance/inhibit_test.go @@ -24,6 +24,10 @@ import ( func TestInhibiting(t *testing.T) { t.Parallel() + // This integration test checks that alerts can be inhibited and that an + // inhibited alert will be notified again as soon as the inhibiting alert + // gets resolved. + conf := ` route: receiver: "default" @@ -64,6 +68,10 @@ inhibit_rules: // second batch of notifications. am.Push(At(2.2), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa")) + // InstanceDown in zone aa should fire again in the third batch of + // notifications once JobDown in zone aa gets resolved. + am.Push(At(3.6), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2, 3.6)) + co.Want(Between(2, 2.5), Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1), @@ -76,5 +84,69 @@ inhibit_rules: Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2), ) + co.Want(Between(4, 4.5), + Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1), + Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1), + Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1), + Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2, 3.6), + ) + + at.Run() +} + +func TestAlwaysInhibiting(t *testing.T) { + t.Parallel() + + // This integration test checks that when inhibited and inhibiting alerts + // gets resolved at the same time, the final notification contains both + // alerts. + + conf := ` +route: + receiver: "default" + group_by: [] + group_wait: 1s + group_interval: 1s + repeat_interval: 1s + +receivers: +- name: "default" + webhook_configs: + - url: 'http://%s' + +inhibit_rules: +- source_match: + alertname: JobDown + target_match: + alertname: InstanceDown + equal: + - job + - zone +` + + at := NewAcceptanceTest(t, &AcceptanceOpts{ + Tolerance: 150 * time.Millisecond, + }) + + co := at.Collector("webhook") + wh := NewWebhook(co) + + am := at.Alertmanager(fmt.Sprintf(conf, wh.Address())) + + am.Push(At(1), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa")) + am.Push(At(1), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa")) + + am.Push(At(2.6), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(1, 2.6)) + am.Push(At(2.6), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1, 2.6)) + + co.Want(Between(2, 2.5), + Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(1), + ) + + co.Want(Between(3, 3.5), + Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1, 2.6), + Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(1, 2.6), + ) + at.Run() }