diff --git a/pkg/kv/kvclient/kvcoord/replica_slice.go b/pkg/kv/kvclient/kvcoord/replica_slice.go index 623285042c11..fc3e010ffdb1 100644 --- a/pkg/kv/kvclient/kvcoord/replica_slice.go +++ b/pkg/kv/kvclient/kvcoord/replica_slice.go @@ -81,7 +81,7 @@ func NewReplicaSlice( } } canReceiveLease := func(rDesc roachpb.ReplicaDescriptor) bool { - if err := roachpb.CheckCanReceiveLease(rDesc, desc.Replicas()); err != nil { + if err := roachpb.CheckCanReceiveLease(rDesc, desc.Replicas(), true /* leaseHolderRemovalAllowed */); err != nil { return false } return true diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go index 776171b394ef..23d50fff1bb4 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go @@ -1479,8 +1479,9 @@ func (a *Allocator) ValidLeaseTargets( ) []roachpb.ReplicaDescriptor { candidates := make([]roachpb.ReplicaDescriptor, 0, len(existing)) replDescs := roachpb.MakeReplicaSet(existing) + lhRemovalAllowed := a.StorePool.St.Version.IsActive(ctx, clusterversion.EnableLeaseHolderRemoval) for i := range existing { - if err := roachpb.CheckCanReceiveLease(existing[i], replDescs); err != nil { + if err := roachpb.CheckCanReceiveLease(existing[i], replDescs, lhRemovalAllowed); err != nil { continue } // If we're not allowed to include the current replica, remove it from diff --git a/pkg/kv/kvserver/batcheval/cmd_lease_request.go b/pkg/kv/kvserver/batcheval/cmd_lease_request.go index c5f9b55b9f4c..2758a3861f1c 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease_request.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease_request.go @@ -14,6 +14,7 @@ import ( "context" "time" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary/rspb" @@ -67,9 +68,13 @@ func RequestLease( Requested: args.Lease, } + lhRemovalAllowed := + cArgs.EvalCtx.ClusterSettings().Version.IsActive(ctx, clusterversion.EnableLeaseHolderRemoval) // If this check is removed at some point, the filtering of learners on the // sending side would have to be removed as well. - if err := roachpb.CheckCanReceiveLease(args.Lease.Replica, cArgs.EvalCtx.Desc().Replicas()); err != nil { + if err := roachpb.CheckCanReceiveLease(args.Lease.Replica, cArgs.EvalCtx.Desc().Replicas(), + lhRemovalAllowed, + ); err != nil { rErr.Message = err.Error() return newFailedLeaseTrigger(false /* isTransfer */), rErr } diff --git a/pkg/kv/kvserver/batcheval/cmd_lease_test.go b/pkg/kv/kvserver/batcheval/cmd_lease_test.go index e2afe6cd879b..41454001fbb2 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease_test.go @@ -130,9 +130,10 @@ func TestLeaseCommandLearnerReplica(t *testing.T) { clock := hlc.NewClock(timeutil.NewManualTime(timeutil.Unix(0, 123)), time.Nanosecond /* maxOffset */) cArgs := CommandArgs{ EvalCtx: (&MockEvalCtx{ - StoreID: voterStoreID, - Desc: &desc, - Clock: clock, + ClusterSettings: cluster.MakeTestingClusterSettings(), + StoreID: voterStoreID, + Desc: &desc, + Clock: clock, }).EvalContext(), Args: &roachpb.TransferLeaseRequest{ Lease: roachpb.Lease{ @@ -265,17 +266,37 @@ func TestCheckCanReceiveLease(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + const none = roachpb.ReplicaType(-1) + for _, tc := range []struct { - leaseholderType roachpb.ReplicaType - eligible bool + leaseholderType roachpb.ReplicaType + anotherReplicaType roachpb.ReplicaType + eligibleLhRemovalEnabled bool + eligibleLhRemovalDisabled bool }{ - {leaseholderType: roachpb.VOTER_FULL, eligible: true}, - {leaseholderType: roachpb.VOTER_INCOMING, eligible: true}, - {leaseholderType: roachpb.VOTER_OUTGOING, eligible: false}, - {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, eligible: false}, - {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, eligible: false}, - {leaseholderType: roachpb.LEARNER, eligible: false}, - {leaseholderType: roachpb.NON_VOTER, eligible: false}, + {leaseholderType: roachpb.VOTER_FULL, anotherReplicaType: none, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: true}, + {leaseholderType: roachpb.VOTER_INCOMING, anotherReplicaType: none, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: true}, + + // A VOTER_OUTGOING should only be able to get the lease if there's a VOTER_INCOMING. + {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + + // A VOTER_DEMOTING_LEARNER should only be able to get the lease if there's a VOTER_INCOMING. + {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + + // A VOTER_DEMOTING_NON_VOTER should only be able to get the lease if there's a VOTER_INCOMING. + {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + + {leaseholderType: roachpb.LEARNER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.NON_VOTER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, } { t.Run(tc.leaseholderType.String(), func(t *testing.T) { repDesc := roachpb.ReplicaDescriptor{ @@ -285,14 +306,25 @@ func TestCheckCanReceiveLease(t *testing.T) { rngDesc := roachpb.RangeDescriptor{ InternalReplicas: []roachpb.ReplicaDescriptor{repDesc}, } - err := roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas()) - require.Equal(t, tc.eligible, err == nil, "err: %v", err) + if tc.anotherReplicaType != none { + anotherDesc := roachpb.ReplicaDescriptor{ + ReplicaID: 2, + Type: &tc.anotherReplicaType, + } + rngDesc.InternalReplicas = append(rngDesc.InternalReplicas, anotherDesc) + } + err := roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas(), true) + require.Equal(t, tc.eligibleLhRemovalEnabled, err == nil, "err: %v", err) + + err = roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas(), false) + require.Equal(t, tc.eligibleLhRemovalDisabled, err == nil, "err: %v", err) }) } t.Run("replica not in range desc", func(t *testing.T) { repDesc := roachpb.ReplicaDescriptor{ReplicaID: 1} rngDesc := roachpb.RangeDescriptor{} - require.Regexp(t, "replica.*not found", roachpb.CheckCanReceiveLease(repDesc, rngDesc.Replicas())) + require.Regexp(t, "replica.*not found", roachpb.CheckCanReceiveLease(repDesc, + rngDesc.Replicas(), true)) }) } diff --git a/pkg/kv/kvserver/batcheval/cmd_lease_transfer.go b/pkg/kv/kvserver/batcheval/cmd_lease_transfer.go index 3c75cbfbe27b..095b5fc65cc6 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease_transfer.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease_transfer.go @@ -14,6 +14,7 @@ import ( "context" "time" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary/rspb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" @@ -76,9 +77,13 @@ func TransferLease( newLease := args.Lease args.Lease = roachpb.Lease{} // prevent accidental use below + lhRemovalAllowed := cArgs.EvalCtx.ClusterSettings().Version.IsActive(ctx, + clusterversion.EnableLeaseHolderRemoval) // If this check is removed at some point, the filtering of learners on the // sending side would have to be removed as well. - if err := roachpb.CheckCanReceiveLease(newLease.Replica, cArgs.EvalCtx.Desc().Replicas()); err != nil { + if err := roachpb.CheckCanReceiveLease( + newLease.Replica, cArgs.EvalCtx.Desc().Replicas(), lhRemovalAllowed, + ); err != nil { return newFailedLeaseTrigger(true /* isTransfer */), err } diff --git a/pkg/kv/kvserver/client_lease_test.go b/pkg/kv/kvserver/client_lease_test.go index a3e3e4777667..6cdb9c56976b 100644 --- a/pkg/kv/kvserver/client_lease_test.go +++ b/pkg/kv/kvserver/client_lease_test.go @@ -164,8 +164,9 @@ func TestGossipNodeLivenessOnLeaseChange(t *testing.T) { } // TestCannotTransferLeaseToVoterOutgoing ensures that the evaluation of lease -// requests for nodes which are already in the VOTER_OUTGOING state will fail. -func TestCannotTransferLeaseToVoterOutgoing(t *testing.T) { +// requests for nodes which are already in the VOTER_DEMOTING_LEARNER state will fail +// (in this test, there is no VOTER_INCOMING node). +func TestCannotTransferLeaseToVoterDemoting(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) ctx := context.Background() @@ -173,16 +174,15 @@ func TestCannotTransferLeaseToVoterOutgoing(t *testing.T) { knobs, ltk := makeReplicationTestKnobs() // Add a testing knob to allow us to block the change replicas command // while it is being proposed. When we detect that the change replicas - // command to move n3 to VOTER_OUTGOING has been evaluated, we'll send - // the request to transfer the lease to n3. The hope is that it will - // get past the sanity above latch acquisition prior to change replicas - // command committing. - var scratchRangeID atomic.Value - scratchRangeID.Store(roachpb.RangeID(0)) + // command to move n3 to VOTER_DEMOTING_LEARNER has been evaluated, we'll + // send the request to transfer the lease to n3. The hope is that it will + // get past the sanity check above latch acquisition prior to the change + // replicas command committing. + var scratchRangeID int64 changeReplicasChan := make(chan chan struct{}, 1) shouldBlock := func(args kvserverbase.ProposalFilterArgs) bool { // Block if a ChangeReplicas command is removing a node from our range. - return args.Req.RangeID == scratchRangeID.Load().(roachpb.RangeID) && + return args.Req.RangeID == roachpb.RangeID(atomic.LoadInt64(&scratchRangeID)) && args.Cmd.ReplicatedEvalResult.ChangeReplicas != nil && len(args.Cmd.ReplicatedEvalResult.ChangeReplicas.Removed()) > 0 } @@ -205,7 +205,7 @@ func TestCannotTransferLeaseToVoterOutgoing(t *testing.T) { scratchStartKey := tc.ScratchRange(t) desc := tc.AddVotersOrFatal(t, scratchStartKey, tc.Targets(1, 2)...) - scratchRangeID.Store(desc.RangeID) + atomic.StoreInt64(&scratchRangeID, int64(desc.RangeID)) // Make sure n1 has the lease to start with. err := tc.Server(0).DB().AdminTransferLease(context.Background(), scratchStartKey, tc.Target(0).StoreID) @@ -214,7 +214,7 @@ func TestCannotTransferLeaseToVoterOutgoing(t *testing.T) { // The test proceeds as follows: // // - Send an AdminChangeReplicasRequest to remove n3 and add n4 - // - Block the step that moves n3 to VOTER_OUTGOING on changeReplicasChan + // - Block the step that moves n3 to VOTER_DEMOTING_LEARNER on changeReplicasChan // - Send an AdminLeaseTransfer to make n3 the leaseholder // - Try really hard to make sure that the lease transfer at least gets to // latch acquisition before unblocking the ChangeReplicas. @@ -229,7 +229,6 @@ func TestCannotTransferLeaseToVoterOutgoing(t *testing.T) { _, err = tc.Server(0).DB().AdminChangeReplicas(ctx, scratchStartKey, desc, []roachpb.ReplicationChange{ {ChangeType: roachpb.REMOVE_VOTER, Target: tc.Target(2)}, - {ChangeType: roachpb.ADD_VOTER, Target: tc.Target(3)}, }) require.NoError(t, err) }() @@ -262,7 +261,191 @@ func TestCannotTransferLeaseToVoterOutgoing(t *testing.T) { close(ch) wg.Wait() }) +} + +// TestTransferLeaseToVoterOutgoingWithIncoming ensures that the evaluation of lease +// requests for nodes which are already in the VOTER_DEMOTING_LEARNER state succeeds +// when there is a VOTER_INCOMING node. +func TestTransferLeaseToVoterDemotingWithIncoming(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + knobs, ltk := makeReplicationTestKnobs() + // Add a testing knob to allow us to block the change replicas command + // while it is being proposed. When we detect that the change replicas + // command to move n3 to VOTER_DEMOTING_LEARNER has been evaluated, we'll + // send the request to transfer the lease to n3. The hope is that it will + // get past the sanity check above latch acquisition prior to the change + // replicas command committing. + var scratchRangeID int64 + changeReplicasChan := make(chan chan struct{}, 1) + shouldBlock := func(args kvserverbase.ProposalFilterArgs) bool { + // Block if a ChangeReplicas command is removing a node from our range. + return args.Req.RangeID == roachpb.RangeID(atomic.LoadInt64(&scratchRangeID)) && + args.Cmd.ReplicatedEvalResult.ChangeReplicas != nil && + len(args.Cmd.ReplicatedEvalResult.ChangeReplicas.Removed()) > 0 + } + blockIfShould := func(args kvserverbase.ProposalFilterArgs) { + if shouldBlock(args) { + ch := make(chan struct{}) + changeReplicasChan <- ch + <-ch + } + } + knobs.Store.(*kvserver.StoreTestingKnobs).TestingProposalFilter = func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { + blockIfShould(args) + return nil + } + tc := testcluster.StartTestCluster(t, 4, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{Knobs: knobs}, + ReplicationMode: base.ReplicationManual, + }) + defer tc.Stopper().Stop(ctx) + + scratchStartKey := tc.ScratchRange(t) + desc := tc.AddVotersOrFatal(t, scratchStartKey, tc.Targets(1, 2)...) + atomic.StoreInt64(&scratchRangeID, int64(desc.RangeID)) + // Make sure n1 has the lease to start with. + err := tc.Server(0).DB().AdminTransferLease(context.Background(), + scratchStartKey, tc.Target(0).StoreID) + require.NoError(t, err) + + // The test proceeds as follows: + // + // - Send an AdminChangeReplicasRequest to remove n3 and add n4 + // - Block the step that moves n3 to VOTER_DEMOTING_LEARNER on changeReplicasChan + // - Send an AdminLeaseTransfer to make n3 the leaseholder + // - Try really hard to make sure that the lease transfer at least gets to + // latch acquisition before unblocking the ChangeReplicas. + // - Unblock the ChangeReplicas. + // - Make sure the lease transfer succeeds. + ltk.withStopAfterJointConfig(func() { + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + _, err = tc.Server(0).DB().AdminChangeReplicas(ctx, + scratchStartKey, desc, []roachpb.ReplicationChange{ + {ChangeType: roachpb.REMOVE_VOTER, Target: tc.Target(2)}, + {ChangeType: roachpb.ADD_VOTER, Target: tc.Target(3)}, + }) + require.NoError(t, err) + }() + ch := <-changeReplicasChan + wg.Add(1) + go func() { + defer wg.Done() + // Make sure the lease is currently on n1. + desc, err := tc.LookupRange(scratchStartKey) + require.NoError(t, err) + leaseHolder, err := tc.FindRangeLeaseHolder(desc, nil) + require.NoError(t, err) + require.Equal(t, tc.Target(0), leaseHolder, + errors.Errorf("Leaseholder supposed to be on n1.")) + // Move the lease to n3, the VOTER_DEMOTING_LEARNER. + err = tc.Server(0).DB().AdminTransferLease(context.Background(), + scratchStartKey, tc.Target(2).StoreID) + require.NoError(t, err) + // Make sure the lease moved to n3. + leaseHolder, err = tc.FindRangeLeaseHolder(desc, nil) + require.NoError(t, err) + require.Equal(t, tc.Target(2), leaseHolder, + errors.Errorf("Leaseholder supposed to be on n3.")) + }() + // Try really hard to make sure that our request makes it past the + // sanity check error to the evaluation error. + for i := 0; i < 100; i++ { + runtime.Gosched() + time.Sleep(time.Microsecond) + } + close(ch) + wg.Wait() + }) +} + +// TestTransferLeaseFailureDuringJointConfig reproduces +// https://github.com/cockroachdb/cockroach/issues/83687 +// and makes sure that if lease transfer fails during a joint configuration +// the previous leaseholder will successfully re-aquire the lease. +// The test proceeds as follows: +// - Creates a range with 3 replicas n1, n2, n3, and makes sure the lease is on n1 +// - Makes sure lease transfers on this range fail from now on +// - Invokes AdminChangeReplicas to remove n1 and add n4 +// - This causes the range to go into a joint configuration. A lease transfer +// is attempted to move the lease from n1 to n4 before exiting the joint config, +// but that fails, causing us to remain in the joint configuration with the original +// leaseholder having revoked its lease, but everyone else thinking it's still +// the leaseholder. In this situation, only n1 can re-aquire the lease as long as it is live. +// - We re-enable lease transfers on this range. +// - n1 is able to re-aquire the lease, due to the fix in #83686 which enables a +// VOTER_DEMOTING_LEARNER (n1) replica to get the lease if there's also a VOTER_INCOMING +// which is the case here (n4). +// - n1 transfers the lease away and the range leaves the joint configuration. +func TestTransferLeaseFailureDuringJointConfig(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + knobs := base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{}} + // Add a testing knob to allow us to fail all lease transfer commands on this + // range when they are being proposed. + var scratchRangeID int64 + shouldFailProposal := func(args kvserverbase.ProposalFilterArgs) bool { + // Block if a ChangeReplicas command is removing a node from our range. + return args.Req.RangeID == roachpb.RangeID(atomic.LoadInt64(&scratchRangeID)) && + args.Req.IsSingleTransferLeaseRequest() + } + knobs.Store.(*kvserver.StoreTestingKnobs).TestingProposalFilter = func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { + if shouldFailProposal(args) { + return roachpb.NewErrorf("Injecting lease transfer failure") + } + return nil + } + tc := testcluster.StartTestCluster(t, 4, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{Knobs: knobs}, + ReplicationMode: base.ReplicationManual, + }) + defer tc.Stopper().Stop(ctx) + + scratchStartKey := tc.ScratchRange(t) + desc := tc.AddVotersOrFatal(t, scratchStartKey, tc.Targets(1, 2)...) + // Make sure n1 has the lease to start with. + err := tc.Server(0).DB().AdminTransferLease(context.Background(), + scratchStartKey, tc.Target(0).StoreID) + require.NoError(t, err) + + // The next lease transfer should fail. + atomic.StoreInt64(&scratchRangeID, int64(desc.RangeID)) + + _, err = tc.Server(0).DB().AdminChangeReplicas(ctx, + scratchStartKey, desc, []roachpb.ReplicationChange{ + {ChangeType: roachpb.REMOVE_VOTER, Target: tc.Target(0)}, + {ChangeType: roachpb.ADD_VOTER, Target: tc.Target(3)}, + }) + require.Error(t, err) + require.Regexp(t, "Injecting lease transfer failure", err) + + // We're now in a joint configuration, n1 already revoked its lease but all + // other replicas think n1 is the leaseholder. As long as n1 is alive, it is + // the only one that can get the lease. It will retry and be able to do that + // thanks to the fix in #83686. + desc, err = tc.LookupRange(scratchStartKey) + require.NoError(t, err) + require.True(t, desc.Replicas().InAtomicReplicationChange()) + + // Further lease transfers should succeed, allowing the atomic replication change to complete. + atomic.StoreInt64(&scratchRangeID, 0) + store := tc.GetFirstStoreFromServer(t, 0) + repl := store.LookupReplica(roachpb.RKey(scratchStartKey)) + _, _, err = store.Enqueue( + ctx, "replicate", repl, true /* skipShouldQueue */, false, /* async */ + ) + require.NoError(t, err) + desc, err = tc.LookupRange(scratchStartKey) + require.NoError(t, err) + require.False(t, desc.Replicas().InAtomicReplicationChange()) } // TestStoreLeaseTransferTimestampCacheRead verifies that the timestamp cache on diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go index b93a79e17640..b68a30f2a287 100644 --- a/pkg/kv/kvserver/replica_proposal_buf.go +++ b/pkg/kv/kvserver/replica_proposal_buf.go @@ -15,6 +15,7 @@ import ( "sync" "sync/atomic" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/tracker" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftutil" @@ -150,7 +151,7 @@ type proposer interface { // The following require the proposer to hold an exclusive lock. withGroupLocked(func(proposerRaft) error) error registerProposalLocked(*ProposalData) - leaderStatusRLocked(raftGroup proposerRaft) rangeLeaderInfo + leaderStatusRLocked(ctx context.Context, raftGroup proposerRaft) rangeLeaderInfo ownsValidLeaseRLocked(ctx context.Context, now hlc.ClockTimestamp) bool // rejectProposalWithRedirectLocked rejects a proposal and redirects the // proposer to try it on another node. This is used to sometimes reject lease @@ -713,7 +714,7 @@ func (b *propBuf) maybeRejectUnsafeProposalLocked( // leaderStatusRLocked returns the rangeLeaderInfo for the provided raft group, // or an empty rangeLeaderInfo if the raftGroup is nil. func (b *propBuf) leaderStatusRLocked(ctx context.Context, raftGroup proposerRaft) rangeLeaderInfo { - leaderInfo := b.p.leaderStatusRLocked(raftGroup) + leaderInfo := b.p.leaderStatusRLocked(ctx, raftGroup) // Sanity check. if leaderInfo.leaderKnown && leaderInfo.leader == b.p.getReplicaID() && !leaderInfo.iAmTheLeader { @@ -1185,7 +1186,9 @@ func (rp *replicaProposer) registerProposalLocked(p *ProposalData) { rp.mu.proposals[p.idKey] = p } -func (rp *replicaProposer) leaderStatusRLocked(raftGroup proposerRaft) rangeLeaderInfo { +func (rp *replicaProposer) leaderStatusRLocked( + ctx context.Context, raftGroup proposerRaft, +) rangeLeaderInfo { r := (*Replica)(rp) status := raftGroup.Status() @@ -1205,7 +1208,9 @@ func (rp *replicaProposer) leaderStatusRLocked(raftGroup proposerRaft) rangeLead // lease again, and by then hopefully we will have caught up. leaderEligibleForLease = true } else { - err := roachpb.CheckCanReceiveLease(leaderRep, rangeDesc.Replicas()) + lhRemovalAllowed := r.store.cfg.Settings.Version.IsActive( + ctx, clusterversion.EnableLeaseHolderRemoval) + err := roachpb.CheckCanReceiveLease(leaderRep, rangeDesc.Replicas(), lhRemovalAllowed) leaderEligibleForLease = err == nil } } diff --git a/pkg/kv/kvserver/replica_proposal_buf_test.go b/pkg/kv/kvserver/replica_proposal_buf_test.go index 08b7efe6879d..e620fb47231e 100644 --- a/pkg/kv/kvserver/replica_proposal_buf_test.go +++ b/pkg/kv/kvserver/replica_proposal_buf_test.go @@ -177,7 +177,9 @@ func (t *testProposer) ownsValidLeaseRLocked(ctx context.Context, now hlc.ClockT return t.ownsValidLease } -func (t *testProposer) leaderStatusRLocked(raftGroup proposerRaft) rangeLeaderInfo { +func (t *testProposer) leaderStatusRLocked( + ctx context.Context, raftGroup proposerRaft, +) rangeLeaderInfo { lead := raftGroup.Status().Lead leaderKnown := lead != raft.None var leaderRep roachpb.ReplicaID @@ -196,7 +198,7 @@ func (t *testProposer) leaderStatusRLocked(raftGroup proposerRaft) rangeLeaderIn rngDesc := roachpb.RangeDescriptor{ InternalReplicas: []roachpb.ReplicaDescriptor{repDesc}, } - err := roachpb.CheckCanReceiveLease(repDesc, rngDesc.Replicas()) + err := roachpb.CheckCanReceiveLease(repDesc, rngDesc.Replicas(), true) leaderEligibleForLease = err == nil } else { // This matches replicaProposed.leaderStatusRLocked(). diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index 0ca8a730de6c..39f4a23cf094 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -364,27 +364,39 @@ func (r *Replica) propose( // replicas to 4, and if region1 goes down, we loose a quorum. Instead, // we move to a joint config where v1 (VOTER_DEMOTING_LEARNER) transfer the // lease to v4 (VOTER_INCOMING) directly. + // + // Our implementation assumes that the intention of the caller is for the + // VOTER_INCOMING node to be the replacement replica, and hence get the + // lease. We therefore don't dynamically select a lease target during the + // joint config, and hand it to the VOTER_INCOMING node. This means, + // however, that we only allow a VOTER_DEMOTING to have the lease in a + // joint configuration, when there's also a VOTER_INCOMING node (that + // will be used as a target for the lease transfer). Otherwise, the caller + // is expected to shed the lease before entering a joint configuration. // See also https://github.com/cockroachdb/cockroach/issues/67740. - replID := r.ReplicaID() - rDesc, ok := p.command.ReplicatedEvalResult.State.Desc.GetReplicaDescriptorByID(replID) - hasVoterIncoming := p.command.ReplicatedEvalResult.State.Desc.ContainsVoterIncoming() - lhRemovalAllowed := hasVoterIncoming && r.store.cfg.Settings.Version.IsActive(ctx, - clusterversion.EnableLeaseHolderRemoval) - // Previously, we were not allowed to enter a joint config where the - // leaseholder is being removed (i.e., not a full voter). In the new version - // we're allowed to enter such a joint config (if it has a VOTER_INCOMING), - // but not to exit it in this state, i.e., the leaseholder must be some - // kind of voter in the next new config (potentially VOTER_DEMOTING). - if !ok || - (lhRemovalAllowed && !rDesc.IsAnyVoter()) || - (!lhRemovalAllowed && !rDesc.IsVoterNewConfig()) { - err := errors.Mark(errors.Newf("received invalid ChangeReplicasTrigger %s to remove "+ - "self (leaseholder); hasVoterIncoming: %v, lhRemovalAllowed: %v; proposed descriptor: %v", - crt, hasVoterIncoming, lhRemovalAllowed, p.command.ReplicatedEvalResult.State.Desc), - errMarkInvalidReplicationChange) - log.Errorf(p.ctx, "%v", err) + lhRemovalAllowed := r.store.cfg.Settings.Version.IsActive( + ctx, clusterversion.EnableLeaseHolderRemoval) + lhDescriptor, err := r.GetReplicaDescriptor() + if err != nil { return roachpb.NewError(err) } + proposedDesc := p.command.ReplicatedEvalResult.State.Desc + // This is a reconfiguration command, we make sure the proposed + // config is legal w.r.t. the current leaseholder: we now allow the + // leaseholder to be a VOTER_DEMOTING as long as there is a VOTER_INCOMING. + // Otherwise, the leaseholder must be a full voter in the target config. + // This check won't allow exiting the joint config before the lease is + // transferred away. The previous leaseholder is a LEARNER in the target config, + // and therefore shouldn't continue holding the lease. + if err := roachpb.CheckCanReceiveLease( + lhDescriptor, proposedDesc.Replicas(), lhRemovalAllowed, + ); err != nil { + e := errors.Mark(errors.Wrapf(err, "received invalid ChangeReplicasTrigger %s to "+ + "remove self (leaseholder); lhRemovalAllowed: %v; proposed descriptor: %v", crt, + lhRemovalAllowed, proposedDesc), errMarkInvalidReplicationChange) + log.Errorf(p.ctx, "%v", e) + return roachpb.NewError(e) + } } else if p.command.ReplicatedEvalResult.AddSSTable != nil { log.VEvent(p.ctx, 4, "sideloadable proposal detected") version = kvserverbase.RaftVersionSideloaded diff --git a/pkg/roachpb/metadata.go b/pkg/roachpb/metadata.go index 226391d1eb2e..a3206cf32b01 100644 --- a/pkg/roachpb/metadata.go +++ b/pkg/roachpb/metadata.go @@ -322,16 +322,6 @@ func (r *RangeDescriptor) GetReplicaDescriptorByID(replicaID ReplicaID) (Replica return r.Replicas().GetReplicaDescriptorByID(replicaID) } -// ContainsVoterIncoming returns true if the descriptor contains a VOTER_INCOMING replica. -func (r *RangeDescriptor) ContainsVoterIncoming() bool { - for _, repDesc := range r.Replicas().Descriptors() { - if repDesc.GetType() == VOTER_INCOMING { - return true - } - } - return false -} - // IsInitialized returns false if this descriptor represents an // uninitialized range. // TODO(bdarnell): unify this with Validate(). diff --git a/pkg/roachpb/metadata_replicas.go b/pkg/roachpb/metadata_replicas.go index ceb84c2a720f..903a58cf0b2f 100644 --- a/pkg/roachpb/metadata_replicas.go +++ b/pkg/roachpb/metadata_replicas.go @@ -114,6 +114,15 @@ func predVoterFullOrIncoming(rDesc ReplicaDescriptor) bool { return false } +func predVoterIncoming(rDesc ReplicaDescriptor) bool { + switch rDesc.GetType() { + case VOTER_INCOMING: + return true + default: + } + return false +} + func predLearner(rDesc ReplicaDescriptor) bool { return rDesc.GetType() == LEARNER } @@ -152,6 +161,10 @@ func (d ReplicaSet) VoterDescriptors() []ReplicaDescriptor { return d.FilterToDescriptors(predVoterFullOrIncoming) } +func (d ReplicaSet) containsVoterIncoming() bool { + return len(d.FilterToDescriptors(predVoterIncoming)) > 0 +} + // LearnerDescriptors returns a slice of ReplicaDescriptors corresponding to // learner replicas in `d`. This may allocate, but it also may return the // underlying slice as a performance optimization, so it's not safe to modify @@ -527,12 +540,28 @@ var errReplicaCannotHoldLease = errors.Errorf("replica cannot hold lease") // CheckCanReceiveLease checks whether `wouldbeLeaseholder` can receive a lease. // Returns an error if the respective replica is not eligible. // +// Previously, we were not allowed to enter a joint config where the +// leaseholder is being removed (i.e., not a full voter). In the new version +// we're allowed to enter such a joint config (if it has a VOTER_INCOMING), +// but not to exit it in this state, i.e., the leaseholder must be some +// kind of voter in the next new config (potentially VOTER_DEMOTING). +// // An error is also returned is the replica is not part of `replDescs`. -func CheckCanReceiveLease(wouldbeLeaseholder ReplicaDescriptor, replDescs ReplicaSet) error { +// leaseHolderRemovalAllowed is intended to check if the cluster version is +// EnableLeaseHolderRemoval or higher. +// TODO(shralex): remove this flag in 23.1 +func CheckCanReceiveLease( + wouldbeLeaseholder ReplicaDescriptor, replDescs ReplicaSet, leaseHolderRemovalAllowed bool, +) error { repDesc, ok := replDescs.GetReplicaDescriptorByID(wouldbeLeaseholder.ReplicaID) if !ok { return errReplicaNotFound - } else if !repDesc.IsVoterNewConfig() { + } + if !(repDesc.IsVoterNewConfig() || + (repDesc.IsVoterOldConfig() && replDescs.containsVoterIncoming() && leaseHolderRemovalAllowed)) { + // We allow a demoting / incoming voter to receive the lease if there's an incoming voter. + // In this case, when exiting the joint config, we will transfer the lease to the incoming + // voter. return errReplicaCannotHoldLease } return nil