Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add separate history for expired failed probe results #517

Merged
merged 6 commits into from
Sep 10, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions history.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ type result struct {
}

type resultHistory struct {
mu sync.Mutex
nextId int64
results []*result
maxResults uint
mu sync.Mutex
nextId int64
results []*result
maxResults uint
preservedFailedResults []*result
maxPreservedFailedResults uint
}

// Add a result to the history.
Expand All @@ -48,6 +50,14 @@ func (rh *resultHistory) Add(moduleName, target, debugOutput string, success boo

rh.results = append(rh.results, r)
if uint(len(rh.results)) > rh.maxResults {
if !rh.results[0].success {
rh.preservedFailedResults = append(rh.preservedFailedResults, rh.results[0])
if uint(len(rh.preservedFailedResults)) > rh.maxPreservedFailedResults {
preservedFailedResults := make([]*result, len(rh.preservedFailedResults)-1)
copy(preservedFailedResults, rh.preservedFailedResults[1:])
rh.preservedFailedResults = preservedFailedResults
}
}
results := make([]*result, len(rh.results)-1)
copy(results, rh.results[1:])
rh.results = results
Expand All @@ -62,11 +72,24 @@ func (rh *resultHistory) List() []*result {
return rh.results[:]
}

// ListPreservedFailures returns a list of all preserved failed results.
func (rh *resultHistory) ListPreservedFailures() []*result {
rh.mu.Lock()
defer rh.mu.Unlock()

return rh.preservedFailedResults[:]
}

// Get returns a given result.
func (rh *resultHistory) Get(id int64) *result {
rh.mu.Lock()
defer rh.mu.Unlock()

for _, r := range rh.preservedFailedResults {
if r.id == id {
return r
}
}
for _, r := range rh.results {
if r.id == id {
return r
Expand Down
99 changes: 99 additions & 0 deletions history_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"fmt"
"testing"
)

func TestHistoryKeepsLatestResults(t *testing.T) {
history := &resultHistory{maxResults: 3, maxPreservedFailedResults: 3}
for i := 0; i < 4; i++ {
history.Add("module", "target", fmt.Sprintf("result %d", i), true)
}

savedResults := history.List()
for i := 0; i < len(savedResults); i++ {
if savedResults[i].debugOutput != fmt.Sprintf("result %d", i+1) {
t.Errorf("History contained the wrong result at index %d", i)
}
}
}

func FillHistoryWithMaxSuccesses(h *resultHistory) {
for i := uint(0); i < h.maxResults; i++ {
h.Add("module", "target", fmt.Sprintf("result %d", h.nextId), true)
}
}

func FillHistoryWithMaxPreservedFailures(h *resultHistory) {
for i := uint(0); i < h.maxPreservedFailedResults; i++ {
h.Add("module", "target", fmt.Sprintf("result %d", h.nextId), false)
}
}

func TestHistoryPreservesExpiredFailedResults(t *testing.T) {
history := &resultHistory{maxResults: 3, maxPreservedFailedResults: 3}

// Success are expired, no failues are expired
FillHistoryWithMaxSuccesses(history)
FillHistoryWithMaxPreservedFailures(history)
savedResults := history.List()
savedFailedResults := history.ListPreservedFailures()
if len(savedFailedResults) > 0 {
t.Errorf("Preserved failures contains failures unnecessarily.")
}
for i := uint(0); i < uint(len(savedResults)); i++ {
expectedDebugOutput := fmt.Sprintf("result %d", i+history.maxResults)
if savedResults[i].debugOutput != expectedDebugOutput {
t.Errorf("History contained the wrong result at index %d. Expected: %s, Actual: %s", i, expectedDebugOutput, savedResults[i].debugOutput)
}
}

// Failures are expired, should all be preserved
FillHistoryWithMaxPreservedFailures(history)
savedResults = history.List()
savedFailedResults = history.ListPreservedFailures()
for i := uint(0); i < uint(len(savedFailedResults)); i++ {
expectedDebugOutput := fmt.Sprintf("result %d", i+history.maxResults)
if savedFailedResults[i].debugOutput != expectedDebugOutput {
t.Errorf("History contained the wrong result at index %d. Expected: %s, Actual: %s", i, expectedDebugOutput, savedResults[i].debugOutput)
}
}
for i := uint(0); i < uint(len(savedResults)); i++ {
expectedDebugOutput := fmt.Sprintf("result %d", i+history.maxResults+history.maxPreservedFailedResults)
if savedResults[i].debugOutput != expectedDebugOutput {
t.Errorf("History contained the wrong result at index %d. Expected: %s, Actual: %s", i, expectedDebugOutput, savedResults[i].debugOutput)
}
}

// New expired failures are preserved, new success are not expired
FillHistoryWithMaxPreservedFailures(history)
FillHistoryWithMaxSuccesses(history)
savedResults = history.List()
savedFailedResults = history.ListPreservedFailures()
for i := uint(0); i < uint(len(savedFailedResults)); i++ {
expectedDebugOutput := fmt.Sprintf("result %d", i+history.maxResults+history.maxPreservedFailedResults*2)
if savedFailedResults[i].debugOutput != expectedDebugOutput {
t.Errorf("History contained the wrong result at index %d. Expected: %s, Actual: %s", i, expectedDebugOutput, savedResults[i].debugOutput)
}
}
for i := uint(0); i < uint(len(savedResults)); i++ {
expectedDebugOutput := fmt.Sprintf("result %d", i+history.maxResults+history.maxPreservedFailedResults*3)
if savedResults[i].debugOutput != expectedDebugOutput {
t.Errorf("History contained the wrong result at index %d. Expected: %s, Actual: %s", i, expectedDebugOutput, savedResults[i].debugOutput)
}
}
}
32 changes: 25 additions & 7 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ var (
C: &config.Config{},
}

configFile = kingpin.Flag("config.file", "Blackbox exporter configuration file.").Default("blackbox.yml").String()
listenAddress = kingpin.Flag("web.listen-address", "The address to listen on for HTTP requests.").Default(":9115").String()
timeoutOffset = kingpin.Flag("timeout-offset", "Offset to subtract from timeout in seconds.").Default("0.5").Float64()
configCheck = kingpin.Flag("config.check", "If true validate the config file and then exit.").Default().Bool()
historyLimit = kingpin.Flag("history.limit", "The maximum amount of items to keep in the history.").Default("100").Uint()
configFile = kingpin.Flag("config.file", "Blackbox exporter configuration file.").Default("blackbox.yml").String()
listenAddress = kingpin.Flag("web.listen-address", "The address to listen on for HTTP requests.").Default(":9115").String()
timeoutOffset = kingpin.Flag("timeout-offset", "Offset to subtract from timeout in seconds.").Default("0.5").Float64()
configCheck = kingpin.Flag("config.check", "If true validate the config file and then exit.").Default().Bool()
historyLimit = kingpin.Flag("history.limit", "The maximum amount of items to keep in the history.").Default("100").Uint()
historyPreservedFailedLimit = kingpin.Flag("history.preserved-failed-limit", "The maximum amount of failed items to preserve after expiration.").Default("5").Uint()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need an extra setting here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without the extra flag it's less obvious to me how this should be implemented. Should the history.limit flag be the cardinality of standard history and the cardinality of preserved failed history? Should that flag be the cardinality of the combined history? Should the preserved failed history be a constant size separate from the history.limit flag?

I'm open to anything, though I'll admit that without the extra flag, the semantics feel confusing to me: "I set the history.limit flag to 10, why do I have 20 items?"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd have it be the limit of each.


Probers = map[string]prober.ProbeFn{
"http": prober.ProbeHTTP,
Expand Down Expand Up @@ -200,7 +201,7 @@ func run() int {
kingpin.HelpFlag.Short('h')
kingpin.Parse()
logger := promlog.New(promlogConfig)
rh := &resultHistory{maxResults: *historyLimit}
rh := &resultHistory{maxResults: *historyLimit, maxPreservedFailedResults: *historyPreservedFailedLimit}

level.Info(logger).Log("msg", "Starting blackbox_exporter", "version", version.Info())
level.Info(logger).Log("msg", "Build context", version.BuildContext())
Expand Down Expand Up @@ -287,7 +288,24 @@ func run() int {
html.EscapeString(r.moduleName), html.EscapeString(r.target), success, r.id)
}

w.Write([]byte(`</table></body>
w.Write([]byte(`</table>
<h2>Preserved Failed Probes</h2>
<table border='1'><tr><th>Module</th><th>Target</th><th>Result</th><th>Debug</th>`))

preservedFailedResults := rh.ListPreservedFailures()

for i := len(preservedFailedResults) - 1; i >= 0; i-- {
r := preservedFailedResults[i]
success := "Success"
if !r.success {
success = "<strong>Failure</strong>"
}
fmt.Fprintf(w, "<tr><td>%s</td><td>%s</td><td>%s</td><td><a href='logs?id=%d'>Logs</a></td></td>",
html.EscapeString(r.moduleName), html.EscapeString(r.target), success, r.id)
}

w.Write([]byte(`</table>
</body>
</html>`))
})

Expand Down