Skip to content

Commit

Permalink
Merge #129684
Browse files Browse the repository at this point in the history
129684: kv: don't campaign on non-epoch lease redirect r=nvanbenschoten a=nvanbenschoten

Informs #125254.

Doing so is not needed and will be a no-op if the follower is fortified anyway (see next commit).

Also, add a comment mentioning that calls to `campaignLocked` will be no-ops (as of #129158) for fortified followers.

Release note: None

Co-authored-by: Nathan VanBenschoten <[email protected]>
  • Loading branch information
craig[bot] and nvanbenschoten committed Aug 26, 2024
2 parents 0122d9c + a5f997d commit 2cc17ab
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 18 deletions.
10 changes: 6 additions & 4 deletions pkg/kv/kvserver/replica_proposal_buf.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ type proposer interface {
leaseAppliedIndex() kvpb.LeaseAppliedIndex
enqueueUpdateCheck()
closedTimestampTarget() hlc.Timestamp
shouldCampaignOnRedirect(raftGroup proposerRaft) bool
shouldCampaignOnRedirect(raftGroup proposerRaft, leaseType roachpb.LeaseType) bool

// The following require the proposer to hold an exclusive lock.
withGroupLocked(func(proposerRaft) error) error
Expand Down Expand Up @@ -663,7 +663,7 @@ func (b *propBuf) maybeRejectUnsafeProposalLocked(

// TODO(nvanbenschoten): move this to replica_range_lease.go when we
// support build-time verification for lease acquisition. See #118435.
if p.Request.IsSingleRequestLeaseRequest() && b.p.shouldCampaignOnRedirect(raftGroup) {
if p.Request.IsSingleRequestLeaseRequest() && b.p.shouldCampaignOnRedirect(raftGroup, nextLease.Type()) {
const format = "campaigning because Raft leader (id=%d) not live in node liveness map"
lead := raftGroup.BasicStatus().Lead
if logCampaignOnRejectLease.ShouldLog() {
Expand Down Expand Up @@ -1221,14 +1221,16 @@ func (rp *replicaProposer) registerProposalLocked(p *ProposalData) {
rp.mu.proposals[p.idKey] = p
}

func (rp *replicaProposer) shouldCampaignOnRedirect(raftGroup proposerRaft) bool {
func (rp *replicaProposer) shouldCampaignOnRedirect(
raftGroup proposerRaft, leaseType roachpb.LeaseType,
) bool {
r := (*Replica)(rp)
livenessMap, _ := r.store.livenessMap.Load().(livenesspb.IsLiveMap)
return shouldCampaignOnLeaseRequestRedirect(
raftGroup.BasicStatus(),
livenessMap,
r.descRLocked(),
r.shouldUseExpirationLeaseRLocked(),
leaseType,
r.store.Clock().Now(),
)
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/kv/kvserver/replica_proposal_buf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ func (t *testProposer) registerProposalLocked(p *ProposalData) {
t.registered++
}

func (t *testProposer) shouldCampaignOnRedirect(raftGroup proposerRaft) bool {
func (t *testProposer) shouldCampaignOnRedirect(
raftGroup proposerRaft, leaseType roachpb.LeaseType,
) bool {
return t.leaderNotLive
}

Expand Down
26 changes: 21 additions & 5 deletions pkg/kv/kvserver/replica_raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -2336,7 +2336,7 @@ func shouldCampaignOnLeaseRequestRedirect(
raftStatus raft.BasicStatus,
livenessMap livenesspb.IsLiveMap,
desc *roachpb.RangeDescriptor,
shouldUseExpirationLease bool,
leaseType roachpb.LeaseType,
now hlc.Timestamp,
) bool {
// If we're already campaigning don't start a new term.
Expand All @@ -2351,14 +2351,14 @@ func shouldCampaignOnLeaseRequestRedirect(
if raftStatus.Lead == raft.None {
return true
}
// If we should be using an expiration lease then we don't need to campaign
// If we don't want to use an epoch-based lease then we don't need to campaign
// based on liveness state because there can never be a case where a node can
// retain Raft leadership but still be unable to acquire the lease. This is
// possible on ranges that use epoch-based leases because the Raft leader may
// be partitioned from the liveness range.
// See TestRequestsOnFollowerWithNonLiveLeaseholder for an example of a test
// that demonstrates this case.
if shouldUseExpirationLease {
if leaseType != roachpb.LeaseEpoch {
return false
}
// Determine if we think the leader is alive, if we don't have the leader in
Expand Down Expand Up @@ -2386,14 +2386,26 @@ func shouldCampaignOnLeaseRequestRedirect(

// campaignLocked campaigns for raft leadership, using PreVote and, if
// CheckQuorum is enabled, the recent leader condition. That is, followers will
// not grant prevotes if we're behind on the log and, with CheckQuorum, if
// not grant (pre)votes if we're behind on the log and, with CheckQuorum, if
// they've heard from a leader in the past election timeout interval.
// Additionally, the local replica will not even begin to campaign if the recent
// leader condition does not allow it to (i.e. this method will be a no-op).
//
// The "recent leader condition" is based on raft heartbeats for ranges that are
// not using the leader fortification protocol. Followers will not vote against
// a leader if they have recently received a heartbeat (or other message) from
// it. For ranges that are using the leader fortification protocol, the "recent
// leader condition" is based on whether a follower is supporting a fortified
// leader. Followers will not campaign or vote against a leader who's fortified
// store liveness epoch they currently support.
//
// The CheckQuorum condition can delay elections, particularly with quiesced
// ranges that don't tick. However, it is necessary to avoid spurious elections
// and stolen leaderships during partial/asymmetric network partitions, which
// can lead to permanent unavailability if the leaseholder can no longer reach
// the leader.
// the leader. For ranges using the leader fortification protocol, it is also
// necessary to implement irrevocable leader support upon which leader leases
// are built.
//
// Only followers enforce the CheckQuorum recent leader condition though, so if
// a quorum of followers consider the leader dead and choose to become
Expand All @@ -2420,6 +2432,10 @@ func (r *Replica) campaignLocked(ctx context.Context) {
// under partial/asymmetric network partitions. It should only be used when the
// caller is certain that the current leader is actually dead, and we're not
// simply partitioned away from it and/or liveness.
//
// TODO(nvanbenschoten): this is the remaining logic which needs work in order
// to complete #125254. See the comment in raft.go about how even a local
// fortification check is not enough to make MsgTimeoutNow safe.
func (r *Replica) forceCampaignLocked(ctx context.Context) {
log.VEventf(ctx, 3, "force campaigning")
msg := raftpb.Message{To: raftpb.PeerID(r.replicaID), Type: raftpb.MsgTimeoutNow}
Expand Down
20 changes: 12 additions & 8 deletions pkg/kv/kvserver/replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11574,11 +11574,11 @@ func TestReplicaShouldCampaignOnLeaseRequestRedirect(t *testing.T) {
defer log.Scope(t).Close(t)

type params struct {
raftStatus raft.BasicStatus
livenessMap livenesspb.IsLiveMap
desc *roachpb.RangeDescriptor
shouldUseExpirationLease bool
now hlc.Timestamp
raftStatus raft.BasicStatus
livenessMap livenesspb.IsLiveMap
desc *roachpb.RangeDescriptor
leaseType roachpb.LeaseType
now hlc.Timestamp
}

// Set up a base state that we can vary, representing this node n1 being a
Expand Down Expand Up @@ -11608,7 +11608,8 @@ func TestReplicaShouldCampaignOnLeaseRequestRedirect(t *testing.T) {
2: livenesspb.IsLiveMapEntry{IsLive: false},
3: livenesspb.IsLiveMapEntry{IsLive: false},
},
now: hlc.Timestamp{Logical: 10},
leaseType: roachpb.LeaseEpoch,
now: hlc.Timestamp{Logical: 10},
}

testcases := map[string]struct {
Expand All @@ -11632,7 +11633,10 @@ func TestReplicaShouldCampaignOnLeaseRequestRedirect(t *testing.T) {
p.raftStatus.Lead = raft.None
}},
"should use expiration lease": {false, func(p *params) {
p.shouldUseExpirationLease = true
p.leaseType = roachpb.LeaseExpiration
}},
"should use leader lease": {false, func(p *params) {
p.leaseType = roachpb.LeaseLeader
}},
"leader not in desc": {false, func(p *params) {
p.raftStatus.Lead = 4
Expand Down Expand Up @@ -11678,7 +11682,7 @@ func TestReplicaShouldCampaignOnLeaseRequestRedirect(t *testing.T) {
}
tc.modify(&p)
require.Equal(t, tc.expect, shouldCampaignOnLeaseRequestRedirect(
p.raftStatus, p.livenessMap, p.desc, p.shouldUseExpirationLease, p.now))
p.raftStatus, p.livenessMap, p.desc, p.leaseType, p.now))
})
}
}
Expand Down

0 comments on commit 2cc17ab

Please sign in to comment.