Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Add new metrics. #396

Merged
merged 3 commits into from
Oct 1, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/tsdb/fileutil"
"github.com/prometheus/tsdb/wal"
)
Expand Down Expand Up @@ -102,7 +103,7 @@ const checkpointPrefix = "checkpoint."
// it with the original WAL.
//
// Non-critical errors are logged and not returned.
func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bool, mint int64) (*CheckpointStats, error) {
func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bool, mint int64, checkpointDeleteFail prometheus.Counter) (*CheckpointStats, error) {
if logger == nil {
logger = log.NewNopLogger()
}
Expand Down Expand Up @@ -283,6 +284,7 @@ func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bo
// occupying disk space.
// They will just be ignored since a higher checkpoint exists.
level.Error(logger).Log("msg", "delete old checkpoints", "err", err)
checkpointDeleteFail.Add(float64(1))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could use checkpointDeleteFail.Inc()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could use checkpointDeleteFail.Inc()

}
simonpasquier marked this conversation as resolved.
Show resolved Hide resolved
return stats, nil
}
3 changes: 2 additions & 1 deletion checkpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"path/filepath"
"testing"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/tsdb/fileutil"
"github.com/prometheus/tsdb/labels"
"github.com/prometheus/tsdb/testutil"
Expand Down Expand Up @@ -139,7 +140,7 @@ func TestCheckpoint(t *testing.T) {

_, err = Checkpoint(nil, w, 100, 106, func(x uint64) bool {
return x%2 == 0
}, last/2)
}, last/2, prometheus.NewCounter(prometheus.CounterOpts{}))
testutil.Ok(t, err)

// Only the new checkpoint should be left.
Expand Down
34 changes: 20 additions & 14 deletions head.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,20 @@ type Head struct {
}

type headMetrics struct {
activeAppenders prometheus.Gauge
series prometheus.Gauge
seriesCreated prometheus.Counter
seriesRemoved prometheus.Counter
seriesNotFound prometheus.Counter
chunks prometheus.Gauge
chunksCreated prometheus.Counter
chunksRemoved prometheus.Counter
gcDuration prometheus.Summary
minTime prometheus.GaugeFunc
maxTime prometheus.GaugeFunc
samplesAppended prometheus.Counter
walTruncateDuration prometheus.Summary
activeAppenders prometheus.Gauge
series prometheus.Gauge
seriesCreated prometheus.Counter
seriesRemoved prometheus.Counter
seriesNotFound prometheus.Counter
chunks prometheus.Gauge
chunksCreated prometheus.Counter
chunksRemoved prometheus.Counter
gcDuration prometheus.Summary
minTime prometheus.GaugeFunc
maxTime prometheus.GaugeFunc
samplesAppended prometheus.Counter
walTruncateDuration prometheus.Summary
checkpointDeleteFail prometheus.Counter
}

func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
Expand Down Expand Up @@ -150,6 +151,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
Name: "prometheus_tsdb_head_samples_appended_total",
Help: "Total number of appended samples.",
})
m.checkpointDeleteFail = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_checkpoint_delete_fail",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prometheus_tsdb_checkpoint_deletions_failed_total

Help: "Number of times deletion of old checkpoint failed.",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Total number of checkpoint deletions that failed."

})
simonpasquier marked this conversation as resolved.
Show resolved Hide resolved

if r != nil {
r.MustRegister(
Expand All @@ -166,6 +171,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
m.gcDuration,
m.walTruncateDuration,
m.samplesAppended,
m.checkpointDeleteFail,
)
}
return m
Expand Down Expand Up @@ -469,7 +475,7 @@ func (h *Head) Truncate(mint int64) error {
keep := func(id uint64) bool {
return h.series.getByID(id) != nil
}
if _, err = Checkpoint(h.logger, h.wal, m, n, keep, mint); err != nil {
if _, err = Checkpoint(h.logger, h.wal, m, n, keep, mint, h.metrics.checkpointDeleteFail); err != nil {
return errors.Wrap(err, "create checkpoint")
}
h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds())
Expand Down
9 changes: 8 additions & 1 deletion wal/wal.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ type WAL struct {
fsyncDuration prometheus.Summary
pageFlushes prometheus.Counter
pageCompletions prometheus.Counter
truncateFail prometheus.Counter
}

// New returns a new WAL over the given directory.
Expand Down Expand Up @@ -201,8 +202,12 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi
Name: "prometheus_tsdb_wal_completed_pages_total",
Help: "Total number of completed pages.",
})
w.truncateFail = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_wal_truncate_fail",
Help: "Number of times WAL truncation failed.",
})
if reg != nil {
reg.MustRegister(w.fsyncDuration, w.pageFlushes, w.pageCompletions)
reg.MustRegister(w.fsyncDuration, w.pageFlushes, w.pageCompletions, w.truncateFail)
}

_, j, err := w.Segments()
Expand Down Expand Up @@ -530,13 +535,15 @@ func (w *WAL) Segments() (m, n int, err error) {
func (w *WAL) Truncate(i int) error {
refs, err := listSegments(w.dir)
if err != nil {
w.truncateFail.Add(float64(1))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

w.truncateFail.Inc()

return err
}
for _, r := range refs {
if r.n >= i {
break
}
if err := os.Remove(filepath.Join(w.dir, r.s)); err != nil {
w.truncateFail.Add(float64(1))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

return err
}
}
Expand Down