From 9a5bb74a6a07ac45b01234bb5672dc8a9a935c1a Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Sun, 19 Dec 2021 21:17:01 -0500 Subject: [PATCH] kv: add to replicaGCQueue in replicaMsgAppDropper, not gcQueue Fixes #73838. This commit is the first of the three "next steps" identified in #73838. It fixes a case where we were accidentally adding a replica to the wrong queue. When dropping a `MsgApp` in `maybeDropMsgApp`, we want to GC the replica on the LHS of the split if it has been removed from its range. However, we were instead passing it to the MVCC GC queue, which was both irrelevant and also a no-op because the LHS was not the leaseholder. It's possible that we have seen the effects of this in roachtests like `splits/largerange`. This but could have delayed a snapshot to the RHS of a split for up to `maxDelaySplitTriggerTicks * 200ms = 20s` in some rare cases. We've seen the logs corresponding to this issue in a few tests over the past year: https://github.com/cockroachdb/cockroach/issues?q=is%3Aissue+%22would+have+dropped+incoming+MsgApp+to+wait+for+split+trigger%22+is%3Aclosed. --- pkg/kv/kvserver/client_merge_test.go | 4 ++-- pkg/kv/kvserver/split_trigger_helper.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go index 42e951e61210..994286262fc3 100644 --- a/pkg/kv/kvserver/client_merge_test.go +++ b/pkg/kv/kvserver/client_merge_test.go @@ -2756,7 +2756,7 @@ func TestStoreRangeMergeSlowUnabandonedFollower_WithSplit(t *testing.T) { t.Fatal(pErr) } - // Now split the newly merged range splits back out at exactly the same key. + // Now split the newly merged range back out at exactly the same key. // When the replica GC queue looks in meta2 it will find the new RHS range, of // which store2 is a member. Note that store2 does not yet have an initialized // replica for this range, since it would intersect with the old RHS replica. @@ -2769,7 +2769,7 @@ func TestStoreRangeMergeSlowUnabandonedFollower_WithSplit(t *testing.T) { tc.RemoveVotersOrFatal(t, lhsDesc.StartKey.AsRawKey(), tc.Target(2)) // Transfer the lease on the new RHS to store2 and wait for it to apply. This - // will force its replica to of the new RHS to become up to date, which + // will force its replica of the new RHS to become up to date, which // indirectly tests that the replica GC queue cleans up both the LHS replica // and the old RHS replica. tc.TransferRangeLeaseOrFatal(t, *newRHSDesc, tc.Target(2)) diff --git a/pkg/kv/kvserver/split_trigger_helper.go b/pkg/kv/kvserver/split_trigger_helper.go index fb719a5a8b13..4c123161df46 100644 --- a/pkg/kv/kvserver/split_trigger_helper.go +++ b/pkg/kv/kvserver/split_trigger_helper.go @@ -37,7 +37,7 @@ func (rd *replicaMsgAppDropper) ShouldDrop(startKey roachpb.RKey) (fmt.Stringer, if lhsRepl == nil { return nil, false } - lhsRepl.store.gcQueue.AddAsync(context.Background(), lhsRepl, replicaGCPriorityDefault) + lhsRepl.store.replicaGCQueue.AddAsync(context.Background(), lhsRepl, replicaGCPriorityDefault) return lhsRepl, true } @@ -48,7 +48,7 @@ type msgAppDropper interface { // maybeDropMsgApp returns true if the incoming Raft message should be dropped. // It does so if the recipient replica is uninitialized (i.e. has no state) and -// is waiting for a split trigger to apply,in which case delivering the message +// is waiting for a split trigger to apply,in which case delivering the message // in this situation would result in an unnecessary Raft snapshot: the MsgApp // would be rejected and the rejection would prompt the leader to send a // snapshot, while the split trigger would likely populate the replica "for