Skip to content

Commit

Permalink
kvserver: introduce Allocator.ValidLeaseTargets()
Browse files Browse the repository at this point in the history
This commit is a minor refactor of the `Allocator.TransferLeaseTarget` logic in
order to make it more readable and, to abstract out a new exported `Allocator`
method called `ValidLeaseTargets()`.

The contract of `ValidLeaseTargets()` is as follows:
```
// ValidLeaseTargets returns a set of candidate stores that are suitable to be
// transferred a lease for the given range.
//
// - It excludes stores that are dead, or marked draining or suspect.
// - If the range has lease_preferences, and there are any non-draining,
// non-suspect nodes that match those preferences, it excludes stores that don't
// match those preferences.
// - It excludes replicas that may need snapshots. If replica calling this
// method is not the Raft leader (meaning that it doesn't know whether follower
// replicas need a snapshot or not), produces no results.

```

Previously, there were multiple places where we were performing the logic
that's encapsulated by `ValidLeaseTargets()`, which was a potential source of
bugs. This is an attempt to unify this logic in one place that's relatively
well-tested. This commit is only a refactor, and does not attempt to change any
behavior. As such, no existing tests have been changed, with the exception of a
subtest inside `TestAllocatorTransferLeaseTargetDraining`. See the comment over
that subtest to understand why the behavior change made by this patch is
desirable.

The next commit in this PR uses this method to fix (at least part of) cockroachdb#74691.

Release note: none
  • Loading branch information
aayushshah15 committed Sep 26, 2022
1 parent efe32e6 commit 0a1b5e1
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 98 deletions.
228 changes: 139 additions & 89 deletions pkg/kv/kvserver/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,118 @@ func (a *Allocator) scorerOptions() scorerOptions {
}
}

// ValidLeaseTargets returns a set of candidate stores that are suitable to be
// transferred a lease for the given range.
//
// - It excludes stores that are dead, or marked draining or suspect.
// - If the range has lease_preferences, and there are any non-draining,
// non-suspect nodes that match those preferences, it excludes stores that don't
// match those preferences.
// - It excludes replicas that may need snapshots. If replica calling this
// method is not the Raft leader (meaning that it doesn't know whether follower
// replicas need a snapshot or not), produces no results.
func (a *Allocator) ValidLeaseTargets(
ctx context.Context,
conf roachpb.SpanConfig,
existing []roachpb.ReplicaDescriptor,
leaseRepl interface {
RaftStatus() *raft.Status
StoreID() roachpb.StoreID
},
// excludeLeaseRepl dictates whether the result set can include the source
// replica.
excludeLeaseRepl bool,
) []roachpb.ReplicaDescriptor {
candidates := make([]roachpb.ReplicaDescriptor, 0, len(existing))
for i := range existing {
if existing[i].GetType() != roachpb.VOTER_FULL {
continue
}
// If we're not allowed to include the current replica, remove it from
// consideration here.
if existing[i].StoreID == leaseRepl.StoreID() && excludeLeaseRepl {
continue
}
candidates = append(candidates, existing[i])
}
candidates, _ = a.storePool.liveAndDeadReplicas(
candidates, false, /* includeSuspectAndDrainingStores */
)

// Only proceed with the lease transfer if we are also the raft leader (we
// already know we are the leaseholder at this point), and only consider
// replicas that are in `StateReplicate` as potential candidates.
//
// NB: The RaftStatus() only returns a non-empty and non-nil result on the
// Raft leader (since Raft followers do not track the progress of other
// replicas, only the leader does).
//
// NB: On every Raft tick, we try to ensure that leadership is collocated with
// leaseholdership (see
// Replica.maybeTransferRaftLeadershipToLeaseholderLocked()). This means that
// on a range that is not already borked (i.e. can accept writes), periods of
// leader/leaseholder misalignment should be ephemeral and rare. We choose to
// be pessimistic here and choose to bail on the lease transfer, as opposed to
// potentially transferring the lease to a replica that may be waiting for a
// snapshot (which will wedge the range until the replica applies that
// snapshot).
candidates = excludeReplicasInNeedOfSnapshots(ctx, leaseRepl.RaftStatus(), candidates)

// Determine which store(s) is preferred based on user-specified preferences.
// If any stores match, only consider those stores as candidates.
preferred := a.preferredLeaseholders(conf, candidates)
if len(preferred) > 0 {
candidates = preferred
}
return candidates
}

// leaseholderShouldMoveDueToPreferences returns true if the current leaseholder
// is in violation of lease preferences _that can otherwise be satisfied_ by
// some existing replica.
//
// INVARIANT: This method should only be called with an `allExistingReplicas`
// slice that contains `leaseRepl`.
func (a *Allocator) leaseholderShouldMoveDueToPreferences(
ctx context.Context,
conf roachpb.SpanConfig,
leaseRepl interface {
RaftStatus() *raft.Status
StoreID() roachpb.StoreID
},
allExistingReplicas []roachpb.ReplicaDescriptor,
) bool {
// Defensive check to ensure that this is never called with a replica set that
// does not contain the leaseholder.
var leaseholderInExisting bool
for _, repl := range allExistingReplicas {
if repl.StoreID == leaseRepl.StoreID() {
leaseholderInExisting = true
break
}
}
if !leaseholderInExisting {
log.Errorf(ctx, "programming error: expected leaseholder store to be in the slice of existing replicas")
}

// Exclude suspect/draining/dead stores.
candidates, _ := a.storePool.liveAndDeadReplicas(
allExistingReplicas, false, /* includeSuspectAndDrainingStores */
)
// If there are any replicas that do match lease preferences, then we check if
// the existing leaseholder is one of them.
preferred := a.preferredLeaseholders(conf, candidates)
if len(preferred) == 0 {
return false
}
for _, repl := range preferred {
if repl.StoreID == leaseRepl.StoreID() {
return false
}
}
return true
}

// TransferLeaseTarget returns a suitable replica to transfer the range lease
// to from the provided list. It excludes the current lease holder replica
// unless asked to do otherwise by the checkTransferLeaseSource parameter.
Expand Down Expand Up @@ -1265,78 +1377,25 @@ func (a *Allocator) TransferLeaseTarget(
checkCandidateFullness bool,
alwaysAllowDecisionWithoutStats bool,
) roachpb.ReplicaDescriptor {
if a.leaseholderShouldMoveDueToPreferences(ctx, conf, leaseRepl, existing) {
// Explicitly exclude the current leaseholder from the result set if it is
// in violation of lease preferences that can be satisfied by some other
// replica.
checkTransferLeaseSource = false
}
excludeLeaseRepl := !checkTransferLeaseSource

sl, _, _ := a.storePool.getStoreList(storeFilterSuspect)
sl = sl.filter(conf.Constraints)
sl = sl.filter(conf.VoterConstraints)
// The only thing we use the storeList for is for the lease mean across the
// eligible stores, make that explicit here.
candidateLeasesMean := sl.candidateLeases.mean

source, ok := a.storePool.getStoreDescriptor(leaseRepl.StoreID())
if !ok {
return roachpb.ReplicaDescriptor{}
}

// Determine which store(s) is preferred based on user-specified preferences.
// If any stores match, only consider those stores as candidates. If only one
// store matches, it's where the lease should be (unless the preferred store
// is the current one and checkTransferLeaseSource is false).
var preferred []roachpb.ReplicaDescriptor
if checkTransferLeaseSource {
preferred = a.preferredLeaseholders(conf, existing)
} else {
// TODO(a-robinson): Should we just always remove the source store from
// existing when checkTransferLeaseSource is false? I'd do it now, but
// it's too big a change to make right before a major release.
var candidates []roachpb.ReplicaDescriptor
for _, repl := range existing {
if repl.StoreID != leaseRepl.StoreID() {
candidates = append(candidates, repl)
}
}
preferred = a.preferredLeaseholders(conf, candidates)
}
if len(preferred) == 1 {
if preferred[0].StoreID == leaseRepl.StoreID() {
return roachpb.ReplicaDescriptor{}
}
// Verify that the preferred replica is eligible to receive the lease.
preferred, _ = a.storePool.liveAndDeadReplicas(preferred, false /* includeSuspectAndDrainingStores */)
if len(preferred) == 1 {
return preferred[0]
}
return roachpb.ReplicaDescriptor{}
} else if len(preferred) > 1 {
// If the current leaseholder is not preferred, set checkTransferLeaseSource
// to false to motivate the below logic to transfer the lease.
existing = preferred
if !storeHasReplica(leaseRepl.StoreID(), roachpb.MakeReplicaSet(preferred).ReplicationTargets()) {
checkTransferLeaseSource = false
}
}

// Only consider live, non-draining, non-suspect replicas.
existing, _ = a.storePool.liveAndDeadReplicas(existing, false /* includeSuspectAndDrainingStores */)

// Only proceed with the lease transfer if we are also the raft leader (we
// already know we are the leaseholder at this point), and only consider
// replicas that are in `StateReplicate` as potential candidates.
//
// NB: The RaftStatus() only returns a non-empty and non-nil result on the
// Raft leader (since Raft followers do not track the progress of other
// replicas, only the leader does).
//
// NB: On every Raft tick, we try to ensure that leadership is collocated with
// leaseholdership (see
// Replica.maybeTransferRaftLeadershipToLeaseholderLocked()). This means that
// on a range that is not already borked (i.e. can accept writes), periods of
// leader/leaseholder misalignment should be ephemeral and rare. We choose to
// be pessimistic here and choose to bail on the lease transfer, as opposed to
// potentially transferring the lease to a replica that may be waiting for a
// snapshot (which will wedge the range until the replica applies that
// snapshot).
existing = excludeReplicasInNeedOfSnapshots(ctx, leaseRepl.RaftStatus(), existing)

existing = a.ValidLeaseTargets(ctx, conf, existing, leaseRepl, excludeLeaseRepl)
// Short-circuit if there are no valid targets out there.
if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseRepl.StoreID()) {
log.VEventf(ctx, 2, "no lease transfer target found for r%d", leaseRepl.GetRangeID())
Expand Down Expand Up @@ -1411,41 +1470,30 @@ func (a *Allocator) ShouldTransferLease(
ctx context.Context,
conf roachpb.SpanConfig,
existing []roachpb.ReplicaDescriptor,
leaseStoreID roachpb.StoreID,
leaseRepl interface {
RaftStatus() *raft.Status
StoreID() roachpb.StoreID
},
stats *replicaStats,
) bool {
source, ok := a.storePool.getStoreDescriptor(leaseStoreID)
if !ok {
return false
if a.leaseholderShouldMoveDueToPreferences(ctx, conf, leaseRepl, existing) {
return true
}
existing = a.ValidLeaseTargets(ctx, conf, existing, leaseRepl, false /* excludeLeaseRepl */)

// Determine which store(s) is preferred based on user-specified preferences.
// If any stores match, only consider those stores as options. If only one
// store matches, it's where the lease should be.
preferred := a.preferredLeaseholders(conf, existing)
if len(preferred) == 1 {
return preferred[0].StoreID != leaseStoreID
} else if len(preferred) > 1 {
existing = preferred
// If the current leaseholder isn't one of the preferred stores, then we
// should try to transfer the lease.
if !storeHasReplica(leaseStoreID, roachpb.MakeReplicaSet(existing).ReplicationTargets()) {
return true
}
// Short-circuit if there are no valid targets out there.
if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseRepl.StoreID()) {
return false
}
source, ok := a.storePool.getStoreDescriptor(leaseRepl.StoreID())
if !ok {
return false
}

sl, _, _ := a.storePool.getStoreList(storeFilterSuspect)
sl = sl.filter(conf.Constraints)
sl = sl.filter(conf.VoterConstraints)
log.VEventf(ctx, 3, "ShouldTransferLease (lease-holder=%d):\n%s", leaseStoreID, sl)

// Only consider live, non-draining, non-suspect replicas.
existing, _ = a.storePool.liveAndDeadReplicas(existing, false /* includeSuspectNodes */)

// Short-circuit if there are no valid targets out there.
if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == source.StoreID) {
return false
}
log.VEventf(ctx, 3, "ShouldTransferLease (lease-holder=%d):\n%s", leaseRepl, sl)

transferDec, _ := a.shouldTransferLeaseUsingStats(ctx, source, existing, stats, nil, sl.candidateLeases.mean)
var result bool
Expand All @@ -1460,7 +1508,9 @@ func (a *Allocator) ShouldTransferLease(
log.Fatalf(ctx, "unexpected transfer decision %d", transferDec)
}

log.VEventf(ctx, 3, "ShouldTransferLease decision (lease-holder=%d): %t", leaseStoreID, result)
log.VEventf(
ctx, 3, "ShouldTransferLease decision (lease-holder=s%d): %t", leaseRepl.StoreID(), result,
)
return result
}

Expand Down
31 changes: 24 additions & 7 deletions pkg/kv/kvserver/allocator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1697,9 +1697,14 @@ func TestAllocatorTransferLeaseTargetDraining(t *testing.T) {
// node 1 because it's draining.
{existing: existing, leaseholder: 3, check: true, expected: 2, conf: emptySpanConfig()},
{existing: existing, leaseholder: 3, check: false, expected: 2, conf: emptySpanConfig()},
// Verify that lease preferences dont impact draining
// Verify that lease preferences dont impact draining.
// If the store that is within the lease preferences (store 1) is draining,
// we'd like the lease to stay on the next best store (which is store 2).
{existing: existing, leaseholder: 2, check: true, expected: 0, conf: roachpb.SpanConfig{LeasePreferences: preferDC1}},
{existing: existing, leaseholder: 2, check: false, expected: 0, conf: roachpb.SpanConfig{LeasePreferences: preferDC1}},
// If the current lease on store 2 needs to be shed (indicated by
// checkTransferLeaseSource = false), and store 1 is draining, then store 3
// is the only reasonable lease transfer target.
{existing: existing, leaseholder: 2, check: false, expected: 3, conf: roachpb.SpanConfig{LeasePreferences: preferDC1}},
{existing: existing, leaseholder: 2, check: true, expected: 3, conf: roachpb.SpanConfig{LeasePreferences: preferRegion1}},
{existing: existing, leaseholder: 2, check: false, expected: 3, conf: roachpb.SpanConfig{LeasePreferences: preferRegion1}},
}
Expand Down Expand Up @@ -1969,7 +1974,10 @@ func TestAllocatorShouldTransferLease(t *testing.T) {
context.Background(),
emptySpanConfig(),
c.existing,
c.leaseholder,
&mockRepl{
storeID: c.leaseholder,
replicationFactor: int32(len(c.existing)),
},
nil, /* replicaStats */
)
if c.expected != result {
Expand Down Expand Up @@ -2031,7 +2039,10 @@ func TestAllocatorShouldTransferLeaseDraining(t *testing.T) {
context.Background(),
emptySpanConfig(),
c.existing,
c.leaseholder,
&mockRepl{
storeID: c.leaseholder,
replicationFactor: int32(len(c.existing)),
},
nil, /* replicaStats */
)
if c.expected != result {
Expand Down Expand Up @@ -2072,7 +2083,7 @@ func TestAllocatorShouldTransferSuspected(t *testing.T) {
context.Background(),
emptySpanConfig(),
replicas(1, 2, 3),
2,
&mockRepl{storeID: 2, replicationFactor: 3},
nil, /* replicaStats */
)
require.Equal(t, expected, result)
Expand Down Expand Up @@ -2211,7 +2222,10 @@ func TestAllocatorLeasePreferences(t *testing.T) {
context.Background(),
conf,
c.existing,
c.leaseholder,
&mockRepl{
storeID: c.leaseholder,
replicationFactor: int32(len(c.existing)),
},
nil, /* replicaStats */
)
expectTransfer := c.expectedCheckTrue != 0
Expand Down Expand Up @@ -2306,7 +2320,10 @@ func TestAllocatorLeasePreferencesMultipleStoresPerLocality(t *testing.T) {
expectedCheckFalse roachpb.StoreID /* checkTransferLeaseSource = false */
}{
{1, replicas(1, 3, 5), preferEast, 0, 3},
{1, replicas(1, 2, 3), preferEast, 0, 2},
// When `checkTransferLeaseSource` = false, we'd expect either store 2 or 3
// to be produced by `TransferLeaseTarget` (since both of them have
// less-than-mean leases). In this case, the rng should produce 3.
{1, replicas(1, 2, 3), preferEast, 0, 3},
{3, replicas(1, 3, 5), preferEast, 0, 1},
{5, replicas(1, 4, 5), preferEast, 1, 1},
{5, replicas(3, 4, 5), preferEast, 3, 3},
Expand Down
3 changes: 1 addition & 2 deletions pkg/kv/kvserver/replicate_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func (rq *replicateQueue) shouldQueue(
status := repl.LeaseStatusAt(ctx, now)
if status.IsValid() &&
rq.canTransferLeaseFrom(ctx, repl) &&
rq.allocator.ShouldTransferLease(ctx, conf, voterReplicas, status.Lease.Replica.StoreID, repl.leaseholderStats) {
rq.allocator.ShouldTransferLease(ctx, conf, voterReplicas, repl, repl.leaseholderStats) {

log.VEventf(ctx, 2, "lease transfer needed, enqueuing")
return true, 0
Expand Down Expand Up @@ -823,7 +823,6 @@ func (rq *replicateQueue) maybeTransferLeaseAway(
desc,
conf,
transferLeaseOptions{
goal: leaseCountConvergence,
dryRun: dryRun,
// NB: This option means that the allocator is asked to not consider the
// current replica in its set of potential candidates.
Expand Down

0 comments on commit 0a1b5e1

Please sign in to comment.