From cdb8957c9f0593adeaca39b8ab257814c5f958a8 Mon Sep 17 00:00:00 2001 From: Matt Cottingham Date: Thu, 6 Jun 2019 14:33:57 +0100 Subject: [PATCH] Propagate retriable/haltable errors from compactor (#1183) * Propagate retriable/haltable errors from compactor Previously, a new error was created from all worker errors so the custom type was not propagated. If all errors returned are retriable, we assume it is safe to retry the compaction. Since the compactor may still return a single error this logic is preserved. * Update MultiError import for tsdb 0.8.0 * Update import path in tests --- cmd/thanos/compact.go | 6 +++--- pkg/compact/compact.go | 31 +++++++++++++++++++++++++------ pkg/compact/compact_test.go | 26 ++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index b65a389f83e..a789b70842b 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -285,9 +285,9 @@ func runCompact( if err == nil { return nil } + // The HaltError type signals that we hit a critical bug and should block - // for investigation. - // You should alert on this being halted. + // for investigation. You should alert on this being halted. if compact.IsHaltError(err) { if haltOnError { level.Error(logger).Log("msg", "critical error detected; halting", "err", err) @@ -299,7 +299,7 @@ func runCompact( } // The RetryError signals that we hit an retriable error (transient error, no connection). - // You should alert on this being triggered to frequently. + // You should alert on this being triggered too frequently. if compact.IsRetryError(err) { level.Error(logger).Log("msg", "retriable error", "err", err) retried.Inc() diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index c9c21cdeb02..61a9f0be1dc 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -8,21 +8,20 @@ import ( "path" "path/filepath" "sort" - "strings" "sync" "time" - "github.com/improbable-eng/thanos/pkg/block/metadata" - "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/improbable-eng/thanos/pkg/block" + "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/improbable-eng/thanos/pkg/compact/downsample" "github.com/improbable-eng/thanos/pkg/objstore" "github.com/oklog/ulid" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/tsdb" + terrors "github.com/prometheus/tsdb/errors" "github.com/prometheus/tsdb/labels" ) @@ -613,7 +612,17 @@ func (e HaltError) Error() string { } // IsHaltError returns true if the base error is a HaltError. +// If a multierror is passed, any halt error will return true. func IsHaltError(err error) bool { + if multiErr, ok := err.(terrors.MultiError); ok { + for _, err := range multiErr { + if _, ok := errors.Cause(err).(HaltError); ok { + return true + } + } + return false + } + _, ok := errors.Cause(err).(HaltError) return ok } @@ -636,7 +645,17 @@ func (e RetryError) Error() string { } // IsRetryError returns true if the base error is a RetryError. +// If a multierror is passed, all errors must be retriable. func IsRetryError(err error) bool { + if multiErr, ok := err.(terrors.MultiError); ok { + for _, err := range multiErr { + if _, ok := errors.Cause(err).(RetryError); !ok { + return false + } + } + return true + } + _, ok := errors.Cause(err).(RetryError) return ok } @@ -1037,12 +1056,12 @@ func (c *BucketCompactor) Compact(ctx context.Context) error { close(errChan) workCtxCancel() if err != nil { - errMsgs := []string{err.Error()} + errs := terrors.MultiError{err} // Collect any other errors reported by the workers. for e := range errChan { - errMsgs = append(errMsgs, e.Error()) + errs.Add(e) } - return errors.New(strings.Join(errMsgs, "; ")) + return errs } if finishedAllGroups { diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index 47155161f90..4639fff67af 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -11,6 +11,7 @@ import ( "github.com/improbable-eng/thanos/pkg/testutil" "github.com/oklog/ulid" "github.com/pkg/errors" + terrors "github.com/prometheus/tsdb/errors" ) func TestHaltError(t *testing.T) { @@ -27,6 +28,31 @@ func TestHaltError(t *testing.T) { testutil.Assert(t, IsHaltError(err), "not a halt error") } +func TestHaltMultiError(t *testing.T) { + haltErr := halt(errors.New("halt error")) + nonHaltErr := errors.New("not a halt error") + + errs := terrors.MultiError{nonHaltErr} + testutil.Assert(t, !IsHaltError(errs), "should not be a halt error") + + errs.Add(haltErr) + testutil.Assert(t, IsHaltError(errs), "if any halt errors are present this should return true") +} + +func TestRetryMultiError(t *testing.T) { + retryErr := retry(errors.New("retry error")) + nonRetryErr := errors.New("not a retry error") + + errs := terrors.MultiError{nonRetryErr} + testutil.Assert(t, !IsRetryError(errs), "should not be a retry error") + + errs = terrors.MultiError{retryErr} + testutil.Assert(t, IsRetryError(errs), "if all errors are retriable this should return true") + + errs = terrors.MultiError{nonRetryErr, retryErr} + testutil.Assert(t, !IsRetryError(errs), "mixed errors should return false") +} + func TestRetryError(t *testing.T) { err := errors.New("test") testutil.Assert(t, !IsRetryError(err), "retry error")