Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Commit

Permalink
Add new metrics.
Browse files Browse the repository at this point in the history
1. 'prometheus_tsdb_wal_truncate_fail' for failed WAL truncation.
2. 'prometheus_tsdb_checkpoint_delete_fail' for failed old checkpoint delete.

Signed-off-by: Ganesh Vernekar <[email protected]>
  • Loading branch information
codesome committed Sep 25, 2018
1 parent a971f52 commit 632dfb3
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 17 deletions.
4 changes: 3 additions & 1 deletion checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/tsdb/fileutil"
"github.com/prometheus/tsdb/wal"
)
Expand Down Expand Up @@ -102,7 +103,7 @@ const checkpointPrefix = "checkpoint."
// it with the original WAL.
//
// Non-critical errors are logged and not returned.
func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bool, mint int64) (*CheckpointStats, error) {
func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bool, mint int64, checkpointDeleteFail prometheus.Counter) (*CheckpointStats, error) {
if logger == nil {
logger = log.NewNopLogger()
}
Expand Down Expand Up @@ -283,6 +284,7 @@ func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bo
// occupying disk space.
// They will just be ignored since a higher checkpoint exists.
level.Error(logger).Log("msg", "delete old checkpoints", "err", err)
checkpointDeleteFail.Add(float64(1))
}
return stats, nil
}
3 changes: 2 additions & 1 deletion checkpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"path/filepath"
"testing"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/tsdb/fileutil"
"github.com/prometheus/tsdb/labels"
"github.com/prometheus/tsdb/testutil"
Expand Down Expand Up @@ -139,7 +140,7 @@ func TestCheckpoint(t *testing.T) {

_, err = Checkpoint(nil, w, 100, 106, func(x uint64) bool {
return x%2 == 0
}, last/2)
}, last/2, prometheus.NewCounter(prometheus.CounterOpts{}))
testutil.Ok(t, err)

// Only the new checkpoint should be left.
Expand Down
34 changes: 20 additions & 14 deletions head.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,20 @@ type Head struct {
}

type headMetrics struct {
activeAppenders prometheus.Gauge
series prometheus.Gauge
seriesCreated prometheus.Counter
seriesRemoved prometheus.Counter
seriesNotFound prometheus.Counter
chunks prometheus.Gauge
chunksCreated prometheus.Counter
chunksRemoved prometheus.Counter
gcDuration prometheus.Summary
minTime prometheus.GaugeFunc
maxTime prometheus.GaugeFunc
samplesAppended prometheus.Counter
walTruncateDuration prometheus.Summary
activeAppenders prometheus.Gauge
series prometheus.Gauge
seriesCreated prometheus.Counter
seriesRemoved prometheus.Counter
seriesNotFound prometheus.Counter
chunks prometheus.Gauge
chunksCreated prometheus.Counter
chunksRemoved prometheus.Counter
gcDuration prometheus.Summary
minTime prometheus.GaugeFunc
maxTime prometheus.GaugeFunc
samplesAppended prometheus.Counter
walTruncateDuration prometheus.Summary
checkpointDeleteFail prometheus.Counter
}

func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
Expand Down Expand Up @@ -150,6 +151,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
Name: "prometheus_tsdb_head_samples_appended_total",
Help: "Total number of appended samples.",
})
m.checkpointDeleteFail = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_checkpoint_delete_fail",
Help: "Number of times deletion of old checkpoint failed.",
})

if r != nil {
r.MustRegister(
Expand All @@ -166,6 +171,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
m.gcDuration,
m.walTruncateDuration,
m.samplesAppended,
m.checkpointDeleteFail,
)
}
return m
Expand Down Expand Up @@ -469,7 +475,7 @@ func (h *Head) Truncate(mint int64) error {
keep := func(id uint64) bool {
return h.series.getByID(id) != nil
}
if _, err = Checkpoint(h.logger, h.wal, m, n, keep, mint); err != nil {
if _, err = Checkpoint(h.logger, h.wal, m, n, keep, mint, h.metrics.checkpointDeleteFail); err != nil {
return errors.Wrap(err, "create checkpoint")
}
h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds())
Expand Down
9 changes: 8 additions & 1 deletion wal/wal.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ type WAL struct {
fsyncDuration prometheus.Summary
pageFlushes prometheus.Counter
pageCompletions prometheus.Counter
truncateFail prometheus.Counter
}

// New returns a new WAL over the given directory.
Expand Down Expand Up @@ -201,8 +202,12 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi
Name: "prometheus_tsdb_wal_completed_pages_total",
Help: "Total number of completed pages.",
})
w.truncateFail = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_wal_truncate_fail",
Help: "Number of times WAL truncation failed.",
})
if reg != nil {
reg.MustRegister(w.fsyncDuration, w.pageFlushes, w.pageCompletions)
reg.MustRegister(w.fsyncDuration, w.pageFlushes, w.pageCompletions, w.truncateFail)
}

_, j, err := w.Segments()
Expand Down Expand Up @@ -530,13 +535,15 @@ func (w *WAL) Segments() (m, n int, err error) {
func (w *WAL) Truncate(i int) error {
refs, err := listSegments(w.dir)
if err != nil {
w.truncateFail.Add(float64(1))
return err
}
for _, r := range refs {
if r.n >= i {
break
}
if err := os.Remove(filepath.Join(w.dir, r.s)); err != nil {
w.truncateFail.Add(float64(1))
return err
}
}
Expand Down

0 comments on commit 632dfb3

Please sign in to comment.