Skip to content

Commit

Permalink
Merge #81005
Browse files Browse the repository at this point in the history
81005: kvserver: retry failures to rebalance decommissioning replicas r=aayushshah15 a=aayushshah15

Related to #80993
Relates to #79453


This commit makes it such that failures to rebalance replicas on
decommissioning nodes no longer move the replica out of the
replicateQueue as they previously used to. Instead, these failures now
put these replicas into the replicateQueue's purgatory, which will retry
these replicas every minute.

All this is intended to improve the speed of decommissioning towards
its tail end, since previously, failures to rebalance these replicas
meant that they were only retried after about 10 minutes.

Release note: None


Co-authored-by: Aayush Shah <[email protected]>
  • Loading branch information
craig[bot] and aayushshah15 committed Jun 12, 2022
2 parents c9c8a7c + 6f5122b commit 87da966
Show file tree
Hide file tree
Showing 14 changed files with 242 additions and 74 deletions.
5 changes: 2 additions & 3 deletions pkg/kv/kvserver/allocator_impl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"go.etcd.io/etcd/raft/v3"
"go.etcd.io/etcd/raft/v3/tracker"
)
Expand Down Expand Up @@ -254,7 +253,7 @@ func TestAllocatorThrottled(t *testing.T) {

// First test to make sure we would send the replica to purgatory.
_, _, err := a.AllocateVoter(ctx, simpleSpanConfig, []roachpb.ReplicaDescriptor{}, nil)
if !errors.HasInterface(err, (*purgatoryError)(nil)) {
if _, ok := IsPurgatoryError(err); !ok {
t.Fatalf("expected a purgatory error, got: %+v", err)
}

Expand All @@ -278,7 +277,7 @@ func TestAllocatorThrottled(t *testing.T) {
storeDetail.ThrottledUntil = timeutil.Now().Add(24 * time.Hour)
a.StorePool.DetailsMu.Unlock()
_, _, err = a.AllocateVoter(ctx, simpleSpanConfig, []roachpb.ReplicaDescriptor{}, nil)
if errors.HasInterface(err, (*purgatoryError)(nil)) {
if _, ok := IsPurgatoryError(err); ok {
t.Fatalf("expected a non purgatory error, got: %+v", err)
}
}
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/consistency_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,7 @@ func (q *consistencyQueue) timer(duration time.Duration) time.Duration {
func (*consistencyQueue) purgatoryChan() <-chan time.Time {
return nil
}

func (*consistencyQueue) updateChan() <-chan time.Time {
return nil
}
6 changes: 5 additions & 1 deletion pkg/kv/kvserver/merge_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ type rangeMergePurgatoryError struct{ error }

func (rangeMergePurgatoryError) PurgatoryErrorMarker() {}

var _ purgatoryError = rangeMergePurgatoryError{}
var _ PurgatoryError = rangeMergePurgatoryError{}

func (mq *mergeQueue) requestRangeStats(
ctx context.Context, key roachpb.Key,
Expand Down Expand Up @@ -433,3 +433,7 @@ func (mq *mergeQueue) timer(time.Duration) time.Duration {
func (mq *mergeQueue) purgatoryChan() <-chan time.Time {
return mq.purgChan
}

func (mq *mergeQueue) updateChan() <-chan time.Time {
return nil
}
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/mvcc_gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,3 +644,7 @@ func (*mvccGCQueue) timer(_ time.Duration) time.Duration {
func (*mvccGCQueue) purgatoryChan() <-chan time.Time {
return nil
}

func (*mvccGCQueue) updateChan() <-chan time.Time {
return nil
}
131 changes: 77 additions & 54 deletions pkg/kv/kvserver/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ func makeRateLimitedTimeoutFunc(rateSetting *settings.ByteSizeSetting) queueProc
// the operations's timeout.
const permittedRangeScanSlowdown = 10

// a purgatoryError indicates a replica processing failure which indicates
// the replica can be placed into purgatory for faster retries when the
// failure condition changes.
type purgatoryError interface {
// PurgatoryError indicates a replica processing failure which indicates the
// replica can be placed into purgatory for faster retries than the replica
// scanner's interval.
type PurgatoryError interface {
error
PurgatoryErrorMarker() // dummy method for unique interface
}
Expand Down Expand Up @@ -270,6 +270,11 @@ type queueImpl interface {
// purgatory due to failures. If purgatoryChan returns nil, failing
// replicas are not sent to purgatory.
purgatoryChan() <-chan time.Time

// updateChan returns a channel that is signalled whenever there is an update
// to the cluster state that might impact the replicas in the queue's
// purgatory.
updateChan() <-chan time.Time
}

// queueProcessTimeoutFunc controls the timeout for queue processing for a
Expand Down Expand Up @@ -380,7 +385,7 @@ type queueConfig struct {
//
// A queueImpl can opt into a purgatory by returning a non-nil channel from the
// `purgatoryChan` method. A replica is put into purgatory when the `process`
// method returns an error with a `purgatoryError` as an entry somewhere in the
// method returns an error with a `PurgatoryError` as an entry somewhere in the
// `Cause` chain. A replica in purgatory is not processed again until the
// channel is signaled, at which point every replica in purgatory is immediately
// processed. This catchup is run without the `timer` rate limiting but shares
Expand Down Expand Up @@ -414,7 +419,7 @@ type baseQueue struct {
syncutil.Mutex // Protects all variables in the mu struct
replicas map[roachpb.RangeID]*replicaItem // Map from RangeID to replicaItem
priorityQ priorityQueue // The priority queue
purgatory map[roachpb.RangeID]purgatoryError // Map of replicas to processing errors
purgatory map[roachpb.RangeID]PurgatoryError // Map of replicas to processing errors
stopped bool
// Some tests in this package disable queues.
disabled bool
Expand Down Expand Up @@ -987,8 +992,9 @@ func isBenign(err error) bool {
return errors.HasType(err, (*benignError)(nil))
}

func isPurgatoryError(err error) (purgatoryError, bool) {
var purgErr purgatoryError
// IsPurgatoryError returns true iff the given error is a purgatory error.
func IsPurgatoryError(err error) (PurgatoryError, bool) {
var purgErr PurgatoryError
return purgErr, errors.As(err, &purgErr)
}

Expand Down Expand Up @@ -1084,7 +1090,7 @@ func (bq *baseQueue) finishProcessingReplica(
// the failing replica to purgatory. Note that even if the item was
// scheduled to be requeued, we ignore this if we add the replica to
// purgatory.
if purgErr, ok := isPurgatoryError(err); ok {
if purgErr, ok := IsPurgatoryError(err); ok {
bq.mu.Lock()
bq.addToPurgatoryLocked(ctx, stopper, repl, purgErr)
bq.mu.Unlock()
Expand All @@ -1106,7 +1112,7 @@ func (bq *baseQueue) finishProcessingReplica(
// addToPurgatoryLocked adds the specified replica to the purgatory queue, which
// holds replicas which have failed processing.
func (bq *baseQueue) addToPurgatoryLocked(
ctx context.Context, stopper *stop.Stopper, repl replicaInQueue, purgErr purgatoryError,
ctx context.Context, stopper *stop.Stopper, repl replicaInQueue, purgErr PurgatoryError,
) {
bq.mu.AssertHeld()

Expand Down Expand Up @@ -1144,7 +1150,7 @@ func (bq *baseQueue) addToPurgatoryLocked(
}

// Otherwise, create purgatory and start processing.
bq.mu.purgatory = map[roachpb.RangeID]purgatoryError{
bq.mu.purgatory = map[roachpb.RangeID]PurgatoryError{
repl.GetRangeID(): purgErr,
}

Expand All @@ -1153,51 +1159,14 @@ func (bq *baseQueue) addToPurgatoryLocked(
ticker := time.NewTicker(purgatoryReportInterval)
for {
select {
case <-bq.impl.updateChan():
if bq.processReplicasInPurgatory(ctx, stopper) {
return
}
case <-bq.impl.purgatoryChan():
func() {
// Acquire from the process semaphore, release when done.
bq.processSem <- struct{}{}
defer func() { <-bq.processSem }()

// Remove all items from purgatory into a copied slice.
bq.mu.Lock()
ranges := make([]*replicaItem, 0, len(bq.mu.purgatory))
for rangeID := range bq.mu.purgatory {
item := bq.mu.replicas[rangeID]
if item == nil {
log.Fatalf(ctx, "r%d is in purgatory but not in replicas", rangeID)
}
item.setProcessing()
ranges = append(ranges, item)
bq.removeFromPurgatoryLocked(item)
}
bq.mu.Unlock()

for _, item := range ranges {
repl, err := bq.getReplica(item.rangeID)
if err != nil || item.replicaID != repl.ReplicaID() {
continue
}
annotatedCtx := repl.AnnotateCtx(ctx)
if stopper.RunTask(
annotatedCtx, bq.processOpName(), func(ctx context.Context) {
err := bq.processReplica(ctx, repl)
bq.finishProcessingReplica(ctx, stopper, repl, err)
}) != nil {
return
}
}
}()

// Clean up purgatory, if empty.
bq.mu.Lock()
if len(bq.mu.purgatory) == 0 {
log.Infof(ctx, "purgatory is now empty")
bq.mu.purgatory = nil
bq.mu.Unlock()
if bq.processReplicasInPurgatory(ctx, stopper) {
return
}
bq.mu.Unlock()
case <-ticker.C:
// Report purgatory status.
bq.mu.Lock()
Expand All @@ -1213,7 +1182,61 @@ func (bq *baseQueue) addToPurgatoryLocked(
return
}
}
})
},
)
}

// processReplicasInPurgatory processes replicas currently in the queue's
// purgatory.
func (bq *baseQueue) processReplicasInPurgatory(
ctx context.Context, stopper *stop.Stopper,
) (purgatoryCleared bool) {
func() {
// Acquire from the process semaphore, release when done.
bq.processSem <- struct{}{}
defer func() { <-bq.processSem }()

// Remove all items from purgatory into a copied slice.
bq.mu.Lock()
ranges := make([]*replicaItem, 0, len(bq.mu.purgatory))
for rangeID := range bq.mu.purgatory {
item := bq.mu.replicas[rangeID]
if item == nil {
log.Fatalf(ctx, "r%d is in purgatory but not in replicas", rangeID)
}
item.setProcessing()
ranges = append(ranges, item)
bq.removeFromPurgatoryLocked(item)
}
bq.mu.Unlock()

for _, item := range ranges {
repl, err := bq.getReplica(item.rangeID)
if err != nil || item.replicaID != repl.ReplicaID() {
continue
}
annotatedCtx := repl.AnnotateCtx(ctx)
if stopper.RunTask(
annotatedCtx, bq.processOpName(), func(ctx context.Context) {
err := bq.processReplica(ctx, repl)
bq.finishProcessingReplica(ctx, stopper, repl, err)
},
) != nil {
return
}
}
}()

// Clean up purgatory, if empty.
bq.mu.Lock()
if len(bq.mu.purgatory) == 0 {
log.Infof(ctx, "purgatory is now empty")
bq.mu.purgatory = nil
bq.mu.Unlock()
return true /* purgatoryCleared */
}
bq.mu.Unlock()
return false /* purgatoryCleared */
}

// pop dequeues the highest priority replica, if any, in the queue. The
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/queue_concurrency_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ func (fakeQueueImpl) purgatoryChan() <-chan time.Time {
return time.After(time.Nanosecond)
}

func (fakeQueueImpl) updateChan() <-chan time.Time {
return nil
}

type fakeReplica struct {
rangeID roachpb.RangeID
replicaID roachpb.ReplicaID
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ func (tq *testQueueImpl) purgatoryChan() <-chan time.Time {
return tq.pChan
}

func (tq *testQueueImpl) updateChan() <-chan time.Time {
return nil
}

func makeTestBaseQueue(name string, impl queueImpl, store *Store, cfg queueConfig) *baseQueue {
if !cfg.acceptsUnsplitRanges {
// Needed in order to pass the validation in newBaseQueue.
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/raft_log_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,10 @@ func (*raftLogQueue) purgatoryChan() <-chan time.Time {
return nil
}

func (*raftLogQueue) updateChan() <-chan time.Time {
return nil
}

func isLooselyCoupledRaftLogTruncationEnabled(
ctx context.Context, settings *cluster.Settings,
) bool {
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/raft_snapshot_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,7 @@ func (*raftSnapshotQueue) timer(_ time.Duration) time.Duration {
func (rq *raftSnapshotQueue) purgatoryChan() <-chan time.Time {
return nil
}

func (rq *raftSnapshotQueue) updateChan() <-chan time.Time {
return nil
}
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/replica_gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,3 +373,7 @@ func (*replicaGCQueue) timer(_ time.Duration) time.Duration {
func (*replicaGCQueue) purgatoryChan() <-chan time.Time {
return nil
}

func (*replicaGCQueue) updateChan() <-chan time.Time {
return nil
}
Loading

0 comments on commit 87da966

Please sign in to comment.