Skip to content

Commit

Permalink
[Heartbeat] Only retry down monitors on first down check (elastic#36719)
Browse files Browse the repository at this point in the history
* [Heartbeat] Only retry down monitors on first down check

When monitors go down we don't want to keep using retries when
it was previously down, we only want to retry when it goes down
the first time per the retries spec.

* Update failing test
  • Loading branch information
andrewvc authored and Scholar-Li committed Feb 5, 2024
1 parent 9d0b0c1 commit 3b056ba
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
11 changes: 9 additions & 2 deletions heartbeat/monitors/wrappers/summarizer/plugstatestat.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,15 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions {
// determine if a retry is needed
lastStatus := ssp.stateTracker.GetCurrentStatus(ssp.sf)

// FinalAttempt is true if no retries will occur
retry := ssp.js.Status == monitorstate.StatusDown && ssp.js.Attempt < ssp.js.MaxAttempts
curCheckDown := ssp.js.Status == monitorstate.StatusDown
lastStateUp := ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusUp
hasAttemptsRemaining := ssp.js.Attempt < ssp.js.MaxAttempts

// retry if...
retry := curCheckDown && // the current check is down
lastStateUp && // we were previously up, if we were previously down we just check once
hasAttemptsRemaining // and we are configured to actually make multiple attempts
// if we aren't retrying this is the final attempt
ssp.js.FinalAttempt = !retry

ms := ssp.stateTracker.RecordStatus(ssp.sf, ssp.js.Status, ssp.js.FinalAttempt)
Expand Down
6 changes: 3 additions & 3 deletions heartbeat/monitors/wrappers/summarizer/summarizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func TestSummarizer(t *testing.T) {
2,
"du",
"du",
"12",
"11",
2,
testURL,
},
Expand All @@ -80,7 +80,7 @@ func TestSummarizer(t *testing.T) {
2,
"dddddddd",
"dddddddd",
"12121212",
"11111111",
8,
testURL,
},
Expand All @@ -89,7 +89,7 @@ func TestSummarizer(t *testing.T) {
2,
"udddduuu",
"uuddduuu",
"11212111",
"11211111",
8,
testURL,
},
Expand Down
10 changes: 4 additions & 6 deletions heartbeat/monitors/wrappers/wrappers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,10 @@ func TestMultiJobConts(t *testing.T) {
})
}

// TestRetryMultiCont is of somewhat dubious utility at the moment,
// it mostly tests that we __don't__ retry on an initial down.
// retry logic is better and more completely tested in the summarizer
// and scenario tests.
func TestRetryMultiCont(t *testing.T) {
uniqScope := isdef.ScopedIsUnique()

Expand Down Expand Up @@ -466,12 +470,6 @@ func TestRetryMultiCont(t *testing.T) {
summarizertesthelper.SummaryValidator(expected.js.Up, expected.js.Down),
hbtestllext.MaybeHasDuration,
),
contJobValidator("http://foo.com", "1st"),
lookslike.Compose(
contJobValidator("http://foo.com", "2nd"),
summarizertesthelper.SummaryValidator(expected.js.Up, expected.js.Down),
hbtestllext.MaybeHasDuration,
),
},
nil,
nil,
Expand Down

0 comments on commit 3b056ba

Please sign in to comment.