Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compactor: adds downsample duration histogram #4552

Merged
merged 7 commits into from
Aug 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#4487](https://github.com/thanos-io/thanos/pull/4487) Query: Add memcached auto discovery support.
- [#4444](https://github.com/thanos-io/thanos/pull/4444) UI: Add search block UI.
- [#4509](https://github.com/thanos-io/thanos/pull/4509) Logging: Adds duration_ms in int64 to the logs.
- [#4462](https://github.com/thanos-io/thanos/pull/4462) UI: Add find overlap block UI
- [#4462](https://github.com/thanos-io/thanos/pull/4462) UI: Add find overlap block UI.
- [#4469](https://github.com/thanos-io/thanos/pull/4469) Compact: Add flag `compact.skip-block-with-out-of-order-chunks` to skip blocks with out-of-order chunks during compaction instead of halting
- [#4552](https://github.com/thanos-io/thanos/pull/4552) Compact: Adds `thanos_compact_downsample_duration_seconds` histogram.

### Fixed

Expand Down
23 changes: 20 additions & 3 deletions cmd/thanos/downsample.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
type DownsampleMetrics struct {
downsamples *prometheus.CounterVec
downsampleFailures *prometheus.CounterVec
downsampleDuration *prometheus.HistogramVec
}

func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics {
Expand All @@ -51,6 +52,11 @@ func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics {
Name: "thanos_compact_downsample_failures_total",
Help: "Total number of failed downsampling attempts.",
}, []string{"group"})
m.downsampleDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
Name: "thanos_compact_downsample_duration_seconds",
Help: "Duration of downsample runs",
Buckets: []float64{60, 300, 900, 1800, 3600, 7200, 14400}, // 1m, 5m, 15m, 30m, 60m, 120m, 240m
}, []string{"group"})

return m
}
Expand Down Expand Up @@ -237,7 +243,7 @@ func downsampleBucket(
resolution = downsample.ResLevel2
errMsg = "downsampling to 60 min"
}
if err := processDownsampling(ctx, logger, bkt, m, dir, resolution, hashFunc); err != nil {
if err := processDownsampling(ctx, logger, bkt, m, dir, resolution, hashFunc, metrics); err != nil {
metrics.downsampleFailures.WithLabelValues(compact.DefaultGroupKey(m.Thanos)).Inc()
return errors.Wrap(err, errMsg)
}
Expand Down Expand Up @@ -309,7 +315,16 @@ func downsampleBucket(
return nil
}

func processDownsampling(ctx context.Context, logger log.Logger, bkt objstore.Bucket, m *metadata.Meta, dir string, resolution int64, hashFunc metadata.HashFunc) error {
func processDownsampling(
ctx context.Context,
logger log.Logger,
bkt objstore.Bucket,
m *metadata.Meta,
dir string,
resolution int64,
hashFunc metadata.HashFunc,
metrics *DownsampleMetrics,
) error {
begin := time.Now()
bdir := filepath.Join(dir, m.ULID.String())

Expand Down Expand Up @@ -344,8 +359,10 @@ func processDownsampling(ctx context.Context, logger log.Logger, bkt objstore.Bu
}
resdir := filepath.Join(dir, id.String())

downsampleDuration := time.Since(begin)
level.Info(logger).Log("msg", "downsampled block",
"from", m.ULID, "to", id, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds())
"from", m.ULID, "to", id, "duration", downsampleDuration, "duration_ms", downsampleDuration.Milliseconds())
metrics.downsampleDuration.WithLabelValues(compact.DefaultGroupKey(m.Thanos)).Observe(downsampleDuration.Seconds())

if err := block.VerifyIndex(logger, filepath.Join(resdir, block.IndexFilename), m.MinTime, m.MaxTime); err != nil {
return errors.Wrap(err, "output block index not valid")
Expand Down