Skip to content

Commit

Permalink
kvserver: allow certain read-only requests to drop latches before eva…
Browse files Browse the repository at this point in the history
…luation

This commit introduces a change to the way certain types of read-only requests
are evaluated. Traditionally, read-only requests have held their latches
throughout their execution. This commit allows certain qualifying reads to be
able to release their latches earlier.

At a high level, reads may attempt to resolve all conflicts upfront by
performing a sort of "validation" phase before they perform their MVCC scan.
This validation phase performs a scan of the lock table keyspace in order to
find any conflicting intents that may need to be resolved before the actual
evaluation of the request over the MVCC keyspace. If no conflicting intents are
found, then (since cockroachdb#76312), the
request is guaranteed to be fully isolated against all other concurrent
requests and can be allowed to release its latches at this point. This allows
the actual evaluation of the read (over the MVCC part of the keyspace) to
proceed without latches being held, which is the main motivation of this work.
This validation phase could be thought of as an extension to the validation
that the concurrency manager already performs when requests are sequenced
through it, by trying to detect any conflicting intents that have already been
pulled into the in-memory lock table.

Additionally, for certain types of requests that can drop their latches early,
and do not need to access the `IntentHistory` for any of their parent txn's
intents, this commit attempts to make their MVCC scan cheaper by eliminating
the need for an `intentInterleavingIterator`. This is enabled by the
observation that once the validation phase is complete, the only remaining
intents in the read's declared span must be intents belonging to the reader's
transaction. So if the reader doesn't need to read an intent that isn't the
latest intent on a key, then it doesn't need access to the key's
`IntentHistory` (which lives in the lock-table keyspace), and doesn't need to
use an `intentInterleavingIterator`.

Release note (performance improvement): certain types of reads will now have a
far smaller contention footprint with conflicting concurrent writers

Resolves cockroachdb#66485

Release justification: high benefit change to existing functionality, part of
22.2 roadmap
  • Loading branch information
aayushshah15 committed Aug 23, 2022
1 parent 677ef2c commit a1ba3c6
Show file tree
Hide file tree
Showing 16 changed files with 772 additions and 223 deletions.
15 changes: 8 additions & 7 deletions pkg/kv/kvserver/batcheval/cmd_get.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,14 @@ func Get(
var intent *roachpb.Intent
var err error
val, intent, err = storage.MVCCGet(ctx, reader, args.Key, h.Timestamp, storage.MVCCGetOptions{
Inconsistent: h.ReadConsistency != roachpb.CONSISTENT,
SkipLocked: h.WaitPolicy == lock.WaitPolicy_SkipLocked,
Txn: h.Txn,
FailOnMoreRecent: args.KeyLocking != lock.None,
Uncertainty: cArgs.Uncertainty,
MemoryAccount: cArgs.EvalCtx.GetResponseMemoryAccount(),
LockTable: cArgs.Concurrency,
Inconsistent: h.ReadConsistency != roachpb.CONSISTENT,
SkipLocked: h.WaitPolicy == lock.WaitPolicy_SkipLocked,
Txn: h.Txn,
FailOnMoreRecent: args.KeyLocking != lock.None,
Uncertainty: cArgs.Uncertainty,
MemoryAccount: cArgs.EvalCtx.GetResponseMemoryAccount(),
LockTable: cArgs.Concurrency,
DontInterleaveIntents: cArgs.DontInterleaveIntents,
})
if err != nil {
return result.Result{}, err
Expand Down
25 changes: 13 additions & 12 deletions pkg/kv/kvserver/batcheval/cmd_reverse_scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,19 @@ func ReverseScan(
var err error

opts := storage.MVCCScanOptions{
Inconsistent: h.ReadConsistency != roachpb.CONSISTENT,
SkipLocked: h.WaitPolicy == lock.WaitPolicy_SkipLocked,
Txn: h.Txn,
MaxKeys: h.MaxSpanRequestKeys,
MaxIntents: storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV),
TargetBytes: h.TargetBytes,
AllowEmpty: h.AllowEmpty,
WholeRowsOfSize: h.WholeRowsOfSize,
FailOnMoreRecent: args.KeyLocking != lock.None,
Reverse: true,
MemoryAccount: cArgs.EvalCtx.GetResponseMemoryAccount(),
LockTable: cArgs.Concurrency,
Inconsistent: h.ReadConsistency != roachpb.CONSISTENT,
SkipLocked: h.WaitPolicy == lock.WaitPolicy_SkipLocked,
Txn: h.Txn,
MaxKeys: h.MaxSpanRequestKeys,
MaxIntents: storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV),
TargetBytes: h.TargetBytes,
AllowEmpty: h.AllowEmpty,
WholeRowsOfSize: h.WholeRowsOfSize,
FailOnMoreRecent: args.KeyLocking != lock.None,
Reverse: true,
MemoryAccount: cArgs.EvalCtx.GetResponseMemoryAccount(),
LockTable: cArgs.Concurrency,
DontInterleaveIntents: cArgs.DontInterleaveIntents,
}

switch args.ScanFormat {
Expand Down
27 changes: 14 additions & 13 deletions pkg/kv/kvserver/batcheval/cmd_scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,20 @@ func Scan(
var err error

opts := storage.MVCCScanOptions{
Inconsistent: h.ReadConsistency != roachpb.CONSISTENT,
SkipLocked: h.WaitPolicy == lock.WaitPolicy_SkipLocked,
Txn: h.Txn,
Uncertainty: cArgs.Uncertainty,
MaxKeys: h.MaxSpanRequestKeys,
MaxIntents: storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV),
TargetBytes: h.TargetBytes,
AllowEmpty: h.AllowEmpty,
WholeRowsOfSize: h.WholeRowsOfSize,
FailOnMoreRecent: args.KeyLocking != lock.None,
Reverse: false,
MemoryAccount: cArgs.EvalCtx.GetResponseMemoryAccount(),
LockTable: cArgs.Concurrency,
Inconsistent: h.ReadConsistency != roachpb.CONSISTENT,
SkipLocked: h.WaitPolicy == lock.WaitPolicy_SkipLocked,
Txn: h.Txn,
Uncertainty: cArgs.Uncertainty,
MaxKeys: h.MaxSpanRequestKeys,
MaxIntents: storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV),
TargetBytes: h.TargetBytes,
AllowEmpty: h.AllowEmpty,
WholeRowsOfSize: h.WholeRowsOfSize,
FailOnMoreRecent: args.KeyLocking != lock.None,
Reverse: false,
MemoryAccount: cArgs.EvalCtx.GetResponseMemoryAccount(),
LockTable: cArgs.Concurrency,
DontInterleaveIntents: cArgs.DontInterleaveIntents,
}

switch args.ScanFormat {
Expand Down
7 changes: 4 additions & 3 deletions pkg/kv/kvserver/batcheval/declare.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ type CommandArgs struct {
Args roachpb.Request
Now hlc.ClockTimestamp
// *Stats should be mutated to reflect any writes made by the command.
Stats *enginepb.MVCCStats
Concurrency *concurrency.Guard
Uncertainty uncertainty.Interval
Stats *enginepb.MVCCStats
Concurrency *concurrency.Guard
Uncertainty uncertainty.Interval
DontInterleaveIntents bool
}
46 changes: 22 additions & 24 deletions pkg/kv/kvserver/client_replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,17 +252,13 @@ func TestTxnPutOutOfOrder(t *testing.T) {
restartKey = "restart"
)
// Set up a filter to so that the get operation at Step 3 will return an error.
var numGets int32
var shouldFailGet atomic.Value

testingEvalFilter := func(filterArgs kvserverbase.FilterArgs) *roachpb.Error {
if _, ok := filterArgs.Req.(*roachpb.GetRequest); ok &&
filterArgs.Req.Header().Key.Equal(roachpb.Key(key)) &&
filterArgs.Hdr.Txn == nil {
// The Reader executes two get operations, each of which triggers two get requests
// (the first request fails and triggers txn push, and then the second request
// succeeds). Returns an error for the fourth get request to avoid timestamp cache
// update after the third get operation pushes the txn timestamp.
if atomic.AddInt32(&numGets, 1) == 4 {
if shouldFail := shouldFailGet.Load(); shouldFail != nil && shouldFail.(bool) {
return roachpb.NewErrorWithTxn(errors.Errorf("Test"), filterArgs.Hdr.Txn)
}
}
Expand Down Expand Up @@ -401,6 +397,7 @@ func TestTxnPutOutOfOrder(t *testing.T) {
manual.Increment(100)

h.Timestamp = s.Clock().Now()
shouldFailGet.Store(true)
if _, err := kv.SendWrappedWith(
context.Background(), store.TestSender(), h, &roachpb.GetRequest{RequestHeader: requestHeader},
); err == nil {
Expand Down Expand Up @@ -4493,20 +4490,6 @@ func TestDiscoverIntentAcrossLeaseTransferAwayAndBack(t *testing.T) {
var txn2ID atomic.Value
var txn2BBlockOnce sync.Once
txn2BlockedC := make(chan chan struct{})
postEvalFilter := func(args kvserverbase.FilterArgs) *roachpb.Error {
if txn := args.Hdr.Txn; txn != nil && txn.ID == txn2ID.Load() {
txn2BBlockOnce.Do(func() {
if !errors.HasType(args.Err, (*roachpb.WriteIntentError)(nil)) {
t.Errorf("expected WriteIntentError; got %v", args.Err)
}

unblockCh := make(chan struct{})
txn2BlockedC <- unblockCh
<-unblockCh
})
}
return nil
}

// Detect when txn4 discovers txn3's intent and begins to push.
var txn4ID atomic.Value
Expand All @@ -4527,10 +4510,20 @@ func TestDiscoverIntentAcrossLeaseTransferAwayAndBack(t *testing.T) {
tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{
ServerArgs: base.TestServerArgs{Knobs: base.TestingKnobs{
Store: &kvserver.StoreTestingKnobs{
EvalKnobs: kvserverbase.BatchEvalTestingKnobs{
TestingPostEvalFilter: postEvalFilter,
},
TestingRequestFilter: requestFilter,
TestingConcurrencyRetryFilter: func(ctx context.Context, ba roachpb.BatchRequest, pErr *roachpb.Error) {
if txn := ba.Txn; txn != nil && txn.ID == txn2ID.Load() {
txn2BBlockOnce.Do(func() {
if !errors.HasType(pErr.GoError(), (*roachpb.WriteIntentError)(nil)) {
t.Errorf("expected WriteIntentError; got %v", pErr)
}

unblockCh := make(chan struct{})
txn2BlockedC <- unblockCh
<-unblockCh
})
}
},
// Required by TestCluster.MoveRangeLeaseNonCooperatively.
AllowLeaseRequestProposalsWhenNotLeader: true,
},
Expand Down Expand Up @@ -4563,7 +4556,12 @@ func TestDiscoverIntentAcrossLeaseTransferAwayAndBack(t *testing.T) {
_, err := txn2.Get(ctx, key)
err2C <- err
}()
txn2UnblockC := <-txn2BlockedC
var txn2UnblockC chan struct{}
select {
case txn2UnblockC = <-txn2BlockedC:
case <-time.After(10 * time.Second):
t.Fatal("timed out waiting for txn2 to block")
}

// Transfer the lease to Server 1. Do so non-cooperatively instead of using
// a lease transfer, because the cooperative lease transfer would get stuck
Expand Down
Loading

0 comments on commit a1ba3c6

Please sign in to comment.