From 4be1c75a3a7bdd2f16a7450f237fc460f930e1fe Mon Sep 17 00:00:00 2001 From: Austen McClernon Date: Wed, 15 Mar 2023 21:02:57 +0000 Subject: [PATCH] kvserver: deflake and unskip split race uninit rhs `TestStoreRangeSplitRaceUninitializedRHS` was skipped some time ago, in mid 2021 (#67082). The test was skipped due to flakes that appeared related to untimely test cluster startup. This commit unskips the test and makes minor adjustments in order to be current with semantics of Raft transport. Without these adjustments, the `MsgVote` sent every microsecond with the intention of triggering a race, would completely fill up the Raft transport send queue. Once the queue was full, the test would fail as requests are dropped. This commit updates the `MsgVote` send loop logic to not require every `MsgVote` request to be sent for the test to succeed. Resolves: #66480 Release note: None --- pkg/kv/kvserver/client_split_test.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pkg/kv/kvserver/client_split_test.go b/pkg/kv/kvserver/client_split_test.go index 6b97f1fd3f88..28f1f978c983 100644 --- a/pkg/kv/kvserver/client_split_test.go +++ b/pkg/kv/kvserver/client_split_test.go @@ -50,7 +50,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" - "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" "github.com/cockroachdb/cockroach/pkg/ts" @@ -2025,7 +2024,6 @@ func TestStoreSplitGCHint(t *testing.T) { // and the uninitialized replica reacting to messages. func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) { defer leaktest.AfterTest(t)() - skip.WithIssue(t, 66480, "flaky test") defer log.Scope(t).Close(t) currentTrigger := make(chan *roachpb.SplitTrigger, 1) @@ -2103,6 +2101,7 @@ func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) { for i := 0; i < 10; i++ { errChan := make(chan *kvpb.Error) + failedSendLog := log.Every(time.Second) // Closed when the split goroutine is done. splitDone := make(chan struct{}) @@ -2147,7 +2146,15 @@ func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) { Term: term, }, }, rpc.DefaultClass); !sent { - t.Error("transport failed to send vote request") + // SendAsync can return false, indicating the message didn't send. + // The most likely reason this test encounters a message failing to + // send is the outgoing message queue being full. The queue filling + // up is expected given it has fixed capacity and this loop is + // attempting to sending 1 MsgVote every microsecond. See comments + // below and above for the frequency rationale. + if failedSendLog.ShouldLog() { + log.Infof(ctx, "transport failed to send vote request") + } } select { case <-splitDone: