Skip to content

Commit

Permalink
kvserver: add support for allocator range check via store
Browse files Browse the repository at this point in the history
This change exposes support via a store for checking the allocator
action and upreplication target (if applicable) for any range descriptor.
The range does not need to have a replica on the given store, nor is it
required to evaluate given the current state of the cluster (i.e. the
store's configured `StorePool`), as a node liveness override can be
provided in order to evaluate possible future states.

Depends on #92176.

Part of #91570.

Release note: None
  • Loading branch information
AlexTalks committed Nov 23, 2022
1 parent 345f771 commit f9a087e
Showing 1 changed file with 58 additions and 0 deletions.
58 changes: 58 additions & 0 deletions pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/multiqueue"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftentry"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed"
Expand Down Expand Up @@ -3605,6 +3606,63 @@ func (s *Store) AllocatorDryRun(ctx context.Context, repl *Replica) (tracingpb.R
return collectAndFinish(), nil
}

// AllocatorCheckRange takes a range descriptor and a node liveness override (or
// nil, to use the configured StorePool's), looks up the configuration of
// range, and utilizes the allocator to get the action needed to repair the
// range, as well as any upreplication target if needed, returning along with
// any encountered errors as well as the collected tracing spans.
//
// The range does not need to have a replica on the store in order to check the
// needed allocator action and target. The liveness override function, if
// provided, may return UNKNOWN to fall back to the actual node liveness.
//
// Assuming the span config is available, a valid allocator actions should
// always be returned, even in case of errors.
//
// NB: In the case of removal or rebalance actions, a target cannot be
// evaluated, as a leaseholder is required for evaluation.
func (s *Store) AllocatorCheckRange(ctx context.Context,
desc *roachpb.RangeDescriptor,
nodeLivenessOverride storepool.NodeLivenessFunc,
) (allocatorimpl.AllocatorAction, roachpb.ReplicationTarget, tracingpb.Recording, error) {
ctx, collectAndFinish := tracing.ContextWithRecordingSpan(ctx, s.cfg.AmbientCtx.Tracer, "allocator check range")
defer collectAndFinish()

err := s.WaitForSpanConfigSubscription(ctx)
if err != nil {
log.Eventf(ctx, "span configs unavailable: %s", err)
return allocatorimpl.AllocatorNoop, roachpb.ReplicationTarget{}, collectAndFinish(), err
}

conf, err := s.cfg.SpanConfigSubscriber.GetSpanConfigForKey(ctx, desc.StartKey)
if err != nil {
log.Eventf(ctx, "error retrieving span config for range %s: %s", desc, err)
return allocatorimpl.AllocatorNoop, roachpb.ReplicationTarget{}, collectAndFinish(), err
}

var storePool storepool.AllocatorStorePool
if nodeLivenessOverride != nil {
internalNodeLivenessFn := func(nid roachpb.NodeID, now time.Time, timeUntilStoreDead time.Duration) livenesspb.NodeLivenessStatus {
status := nodeLivenessOverride(nid, now, timeUntilStoreDead)
if status == livenesspb.NodeLivenessStatus_UNKNOWN {
return s.cfg.StorePool.NodeLivenessFn(nid, now, timeUntilStoreDead)
}

return status
}
storePool = storepool.NewOverrideStorePool(s.cfg.StorePool, internalNodeLivenessFn)
} else if s.cfg.StorePool != nil {
storePool = s.cfg.StorePool
}

action, target, err := s.replicateQueue.CheckRangeAction(ctx, storePool, desc, conf)
if err != nil {
log.Eventf(ctx, "error simulating allocator on range %s: %s", desc, err)
}

return action, target, collectAndFinish(), err
}

// Enqueue runs the given replica through the requested queue. If `async` is
// specified, the replica is enqueued into the requested queue for asynchronous
// processing and this method returns nothing. Otherwise, it returns all trace
Expand Down

0 comments on commit f9a087e

Please sign in to comment.