-
Notifications
You must be signed in to change notification settings - Fork 179
Add new metrics. #396
Add new metrics. #396
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ import ( | |
"github.com/go-kit/kit/log" | ||
"github.com/go-kit/kit/log/level" | ||
"github.com/pkg/errors" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/tsdb/fileutil" | ||
"github.com/prometheus/tsdb/wal" | ||
) | ||
|
@@ -102,7 +103,7 @@ const checkpointPrefix = "checkpoint." | |
// it with the original WAL. | ||
// | ||
// Non-critical errors are logged and not returned. | ||
func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bool, mint int64) (*CheckpointStats, error) { | ||
func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bool, mint int64, checkpointDeleteFail prometheus.Counter) (*CheckpointStats, error) { | ||
if logger == nil { | ||
logger = log.NewNopLogger() | ||
} | ||
|
@@ -283,6 +284,7 @@ func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bo | |
// occupying disk space. | ||
// They will just be ignored since a higher checkpoint exists. | ||
level.Error(logger).Log("msg", "delete old checkpoints", "err", err) | ||
checkpointDeleteFail.Add(float64(1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could use |
||
} | ||
simonpasquier marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return stats, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,19 +76,20 @@ type Head struct { | |
} | ||
|
||
type headMetrics struct { | ||
activeAppenders prometheus.Gauge | ||
series prometheus.Gauge | ||
seriesCreated prometheus.Counter | ||
seriesRemoved prometheus.Counter | ||
seriesNotFound prometheus.Counter | ||
chunks prometheus.Gauge | ||
chunksCreated prometheus.Counter | ||
chunksRemoved prometheus.Counter | ||
gcDuration prometheus.Summary | ||
minTime prometheus.GaugeFunc | ||
maxTime prometheus.GaugeFunc | ||
samplesAppended prometheus.Counter | ||
walTruncateDuration prometheus.Summary | ||
activeAppenders prometheus.Gauge | ||
series prometheus.Gauge | ||
seriesCreated prometheus.Counter | ||
seriesRemoved prometheus.Counter | ||
seriesNotFound prometheus.Counter | ||
chunks prometheus.Gauge | ||
chunksCreated prometheus.Counter | ||
chunksRemoved prometheus.Counter | ||
gcDuration prometheus.Summary | ||
minTime prometheus.GaugeFunc | ||
maxTime prometheus.GaugeFunc | ||
samplesAppended prometheus.Counter | ||
walTruncateDuration prometheus.Summary | ||
checkpointDeleteFail prometheus.Counter | ||
} | ||
|
||
func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { | ||
|
@@ -150,6 +151,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { | |
Name: "prometheus_tsdb_head_samples_appended_total", | ||
Help: "Total number of appended samples.", | ||
}) | ||
m.checkpointDeleteFail = prometheus.NewCounter(prometheus.CounterOpts{ | ||
Name: "prometheus_tsdb_checkpoint_delete_fail", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
Help: "Number of times deletion of old checkpoint failed.", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Total number of checkpoint deletions that failed." |
||
}) | ||
simonpasquier marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if r != nil { | ||
r.MustRegister( | ||
|
@@ -166,6 +171,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { | |
m.gcDuration, | ||
m.walTruncateDuration, | ||
m.samplesAppended, | ||
m.checkpointDeleteFail, | ||
) | ||
} | ||
return m | ||
|
@@ -469,7 +475,7 @@ func (h *Head) Truncate(mint int64) error { | |
keep := func(id uint64) bool { | ||
return h.series.getByID(id) != nil | ||
} | ||
if _, err = Checkpoint(h.logger, h.wal, m, n, keep, mint); err != nil { | ||
if _, err = Checkpoint(h.logger, h.wal, m, n, keep, mint, h.metrics.checkpointDeleteFail); err != nil { | ||
return errors.Wrap(err, "create checkpoint") | ||
} | ||
h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,6 +162,7 @@ type WAL struct { | |
fsyncDuration prometheus.Summary | ||
pageFlushes prometheus.Counter | ||
pageCompletions prometheus.Counter | ||
truncateFail prometheus.Counter | ||
} | ||
|
||
// New returns a new WAL over the given directory. | ||
|
@@ -201,8 +202,12 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi | |
Name: "prometheus_tsdb_wal_completed_pages_total", | ||
Help: "Total number of completed pages.", | ||
}) | ||
w.truncateFail = prometheus.NewCounter(prometheus.CounterOpts{ | ||
Name: "prometheus_tsdb_wal_truncate_fail", | ||
Help: "Number of times WAL truncation failed.", | ||
}) | ||
if reg != nil { | ||
reg.MustRegister(w.fsyncDuration, w.pageFlushes, w.pageCompletions) | ||
reg.MustRegister(w.fsyncDuration, w.pageFlushes, w.pageCompletions, w.truncateFail) | ||
} | ||
|
||
_, j, err := w.Segments() | ||
|
@@ -530,13 +535,15 @@ func (w *WAL) Segments() (m, n int, err error) { | |
func (w *WAL) Truncate(i int) error { | ||
refs, err := listSegments(w.dir) | ||
if err != nil { | ||
w.truncateFail.Add(float64(1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return err | ||
} | ||
for _, r := range refs { | ||
if r.n >= i { | ||
break | ||
} | ||
if err := os.Remove(filepath.Join(w.dir, r.s)); err != nil { | ||
w.truncateFail.Add(float64(1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
return err | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could use
checkpointDeleteFail.Inc()