Skip to content

Commit

Permalink
Propagate retriable/haltable errors from compactor
Browse files Browse the repository at this point in the history
Previously, a new error was created from all worker errors so the custom
type was not propagated.

If all errors returned are retriable, we assume it is safe to retry the compaction.

Since the compactor may still return a single error this logic is preserved.
  • Loading branch information
mattrco committed Jun 5, 2019
1 parent 7a11fe5 commit 3dfb893
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 9 deletions.
6 changes: 3 additions & 3 deletions cmd/thanos/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,9 @@ func runCompact(
if err == nil {
return nil
}

// The HaltError type signals that we hit a critical bug and should block
// for investigation.
// You should alert on this being halted.
// for investigation. You should alert on this being halted.
if compact.IsHaltError(err) {
if haltOnError {
level.Error(logger).Log("msg", "critical error detected; halting", "err", err)
Expand All @@ -299,7 +299,7 @@ func runCompact(
}

// The RetryError signals that we hit an retriable error (transient error, no connection).
// You should alert on this being triggered to frequently.
// You should alert on this being triggered too frequently.
if compact.IsRetryError(err) {
level.Error(logger).Log("msg", "retriable error", "err", err)
retried.Inc()
Expand Down
30 changes: 24 additions & 6 deletions pkg/compact/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@ import (
"path"
"path/filepath"
"sort"
"strings"
"sync"
"time"

"github.com/improbable-eng/thanos/pkg/block/metadata"

"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/improbable-eng/thanos/pkg/block"
"github.com/improbable-eng/thanos/pkg/block/metadata"
"github.com/improbable-eng/thanos/pkg/compact/downsample"
"github.com/improbable-eng/thanos/pkg/objstore"
"github.com/oklog/ulid"
Expand Down Expand Up @@ -613,7 +611,17 @@ func (e HaltError) Error() string {
}

// IsHaltError returns true if the base error is a HaltError.
// If a multierror is passed, any halt error will return true.
func IsHaltError(err error) bool {
if multiErr, ok := err.(tsdb.MultiError); ok {
for _, err := range multiErr {
if _, ok := errors.Cause(err).(HaltError); ok {
return true
}
}
return false
}

_, ok := errors.Cause(err).(HaltError)
return ok
}
Expand All @@ -636,7 +644,17 @@ func (e RetryError) Error() string {
}

// IsRetryError returns true if the base error is a RetryError.
// If a multierror is passed, all errors must be retriable.
func IsRetryError(err error) bool {
if multiErr, ok := err.(tsdb.MultiError); ok {
for _, err := range multiErr {
if _, ok := errors.Cause(err).(RetryError); !ok {
return false
}
}
return true
}

_, ok := errors.Cause(err).(RetryError)
return ok
}
Expand Down Expand Up @@ -1037,12 +1055,12 @@ func (c *BucketCompactor) Compact(ctx context.Context) error {
close(errChan)
workCtxCancel()
if err != nil {
errMsgs := []string{err.Error()}
errs := tsdb.MultiError{err}
// Collect any other errors reported by the workers.
for e := range errChan {
errMsgs = append(errMsgs, e.Error())
errs.Add(e)
}
return errors.New(strings.Join(errMsgs, "; "))
return errs
}

if finishedAllGroups {
Expand Down
26 changes: 26 additions & 0 deletions pkg/compact/compact_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/improbable-eng/thanos/pkg/testutil"
"github.com/oklog/ulid"
"github.com/pkg/errors"
"github.com/prometheus/tsdb"
)

func TestHaltError(t *testing.T) {
Expand All @@ -27,6 +28,31 @@ func TestHaltError(t *testing.T) {
testutil.Assert(t, IsHaltError(err), "not a halt error")
}

func TestHaltMultiError(t *testing.T) {
haltErr := halt(errors.New("halt error"))
nonHaltErr := errors.New("not a halt error")

errs := tsdb.MultiError{nonHaltErr}
testutil.Assert(t, !IsHaltError(errs), "should not be a halt error")

errs.Add(haltErr)
testutil.Assert(t, IsHaltError(errs), "if any halt errors are present this should return true")
}

func TestRetryMultiError(t *testing.T) {
retryErr := retry(errors.New("retry error"))
nonRetryErr := errors.New("not a retry error")

errs := tsdb.MultiError{nonRetryErr}
testutil.Assert(t, !IsRetryError(errs), "should not be a retry error")

errs = tsdb.MultiError{retryErr}
testutil.Assert(t, IsRetryError(errs), "if all errors are retriable this should return true")

errs = tsdb.MultiError{nonRetryErr, retryErr}
testutil.Assert(t, !IsRetryError(errs), "mixed errors should return false")
}

func TestRetryError(t *testing.T) {
err := errors.New("test")
testutil.Assert(t, !IsRetryError(err), "retry error")
Expand Down

0 comments on commit 3dfb893

Please sign in to comment.