Skip to content

Commit

Permalink
batcheval: add MVCC range tombstone support for DeleteRange
Browse files Browse the repository at this point in the history
This patch adds the parameter `UseExperimentalRangeTombstone` for
`DeleteRange`, which deletes the span using an MVCC range tombstone.
The new version gate `MVCCRangeTombstones` must be checked before using
it. `storage.ExperimentalMVCCDeleteRangeUsingTombstone()` is added to
carry out the actual deletion.

This is a bare-bones implementation to allow writing range keys via the
KV API for testing and development purposes. It has significant
shortcomings, and will be fleshed out at a later time.

Release note: None
  • Loading branch information
erikgrinaker committed Jun 4, 2022
1 parent 0fdac09 commit 8410a44
Show file tree
Hide file tree
Showing 16 changed files with 915 additions and 11 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -282,4 +282,4 @@ trace.jaeger.agent string the address of a Jaeger agent to receive traces using
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.
trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https://<ui>/#/debug/tracez
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.
version version 22.1-8 set the active cluster version in the format '<major>.<minor>'
version version 22.1-10 set the active cluster version in the format '<major>.<minor>'
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,6 @@
<tr><td><code>trace.opentelemetry.collector</code></td><td>string</td><td><code></code></td><td>address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.</td></tr>
<tr><td><code>trace.span_registry.enabled</code></td><td>boolean</td><td><code>true</code></td><td>if set, ongoing traces can be seen at https://<ui>/#/debug/tracez</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>22.1-8</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>22.1-10</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
6 changes: 6 additions & 0 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,8 @@ const (
// version is guaranteed to reside in a cluster where all nodes support range
// keys at the Pebble layer.
EnablePebbleFormatVersionRangeKeys
// MVCCRangeTombstones enables the use of MVCC range tombstones.
MVCCRangeTombstones

// *************************************************
// Step (1): Add new versions here.
Expand Down Expand Up @@ -638,6 +640,10 @@ var versionsSingleton = keyedVersions{
Key: EnablePebbleFormatVersionRangeKeys,
Version: roachpb.Version{Major: 22, Minor: 1, Internal: 8},
},
{
Key: MVCCRangeTombstones,
Version: roachpb.Version{Major: 22, Minor: 1, Internal: 10},
},

// *************************************************
// Step (2): Add new versions here.
Expand Down
5 changes: 3 additions & 2 deletions pkg/clusterversion/key_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions pkg/kv/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,34 @@ func (b *Batch) DelRange(s, e interface{}, returnKeys bool) {
b.initResult(1, 0, notRaw, nil)
}

// ExperimentalDelRangeUsingTombstone deletes the rows between begin (inclusive)
// and end (exclusive) using an MVCC range tombstone. Callers must check the
// MVCCRangeTombstones version gate before using this.
//
// This method is EXPERIMENTAL: range tombstones are under active development,
// and have severe limitations including being ignored by all KV and MVCC APIs
// and only being stored in memory.
func (b *Batch) ExperimentalDelRangeUsingTombstone(s, e interface{}) {
start, err := marshalKey(s)
if err != nil {
b.initResult(0, 0, notRaw, err)
return
}
end, err := marshalKey(e)
if err != nil {
b.initResult(0, 0, notRaw, err)
return
}
b.appendReqs(&roachpb.DeleteRangeRequest{
RequestHeader: roachpb.RequestHeader{
Key: start,
EndKey: end,
},
UseExperimentalRangeTombstone: true,
})
b.initResult(1, 0, notRaw, nil)
}

// adminMerge is only exported on DB. It is here for symmetry with the
// other operations.
func (b *Batch) adminMerge(key interface{}) {
Expand Down
16 changes: 16 additions & 0 deletions pkg/kv/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,22 @@ func (db *DB) DelRange(
return r.Keys, err
}

// ExperimentalDelRangeUsingTombstone deletes the rows between begin (inclusive)
// and end (exclusive) using an MVCC range tombstone. Callers must check the
// MVCCRangeTombstones version gate before using this.
//
// This method is EXPERIMENTAL: range tombstones are under active development,
// and have severe limitations including being ignored by all KV and MVCC APIs
// and only being stored in memory.
func (db *DB) ExperimentalDelRangeUsingTombstone(
ctx context.Context, begin, end interface{},
) error {
b := &Batch{}
b.ExperimentalDelRangeUsingTombstone(begin, end)
_, err := getOneResult(db.Run(ctx, b), b)
return err
}

// AdminMerge merges the range containing key and the subsequent range. After
// the merge operation is complete, the range containing key will contain all of
// the key/value pairs of the subsequent range and the subsequent range will no
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/batcheval/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ go_test(
srcs = [
"cmd_add_sstable_test.go",
"cmd_clear_range_test.go",
"cmd_delete_range_test.go",
"cmd_end_transaction_test.go",
"cmd_export_test.go",
"cmd_get_test.go",
Expand Down
19 changes: 19 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_delete_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/errors"
)

func init() {
Expand Down Expand Up @@ -49,6 +50,24 @@ func DeleteRange(
h := cArgs.Header
reply := resp.(*roachpb.DeleteRangeResponse)

// Use experimental MVCC range tombstone if requested.
if args.UseExperimentalRangeTombstone {
if cArgs.Header.Txn != nil {
return result.Result{}, ErrTransactionUnsupported
}
if args.Inline {
return result.Result{}, errors.AssertionFailedf("Inline can't be used with range tombstones")
}
if args.ReturnKeys {
return result.Result{}, errors.AssertionFailedf(
"ReturnKeys can't be used with range tombstones")
}
maxIntents := storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV)
err := storage.ExperimentalMVCCDeleteRangeUsingTombstone(
ctx, readWriter, cArgs.Stats, args.Key, args.EndKey, h.Timestamp, cArgs.Now, maxIntents)
return result.Result{}, err
}

var timestamp hlc.Timestamp
if !args.Inline {
timestamp = h.Timestamp
Expand Down
221 changes: 221 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_delete_range_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package batcheval_test

import (
"context"
"testing"

"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/require"
)

// TestDeleteRangeTombstone tests DeleteRange range tombstones directly, using
// only a Pebble engine.
//
// MVCC range tombstone logic is tested exhaustively in the MVCC history tests,
// this just tests the RPC plumbing.
func TestDeleteRangeTombstone(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

// Initial data for each test. x is point tombstone, [] is intent,
// o---o is range tombstone.
//
// 5 [i5]
// 4 c4
// 3 x
// 2 b2 d2 o-------o
// 1
// a b c d e f g h i
writeInitialData := func(t *testing.T, ctx context.Context, rw storage.ReadWriter) {
t.Helper()
var localTS hlc.ClockTimestamp
txn := roachpb.MakeTransaction("test", nil /* baseKey */, roachpb.NormalUserPriority, hlc.Timestamp{WallTime: 5}, 0, 0)
require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("b"), hlc.Timestamp{WallTime: 2}, localTS, roachpb.MakeValueFromString("b2"), nil))
require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("c"), hlc.Timestamp{WallTime: 4}, localTS, roachpb.MakeValueFromString("c4"), nil))
require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("d"), hlc.Timestamp{WallTime: 2}, localTS, roachpb.MakeValueFromString("d2"), nil))
require.NoError(t, storage.MVCCDelete(ctx, rw, nil, roachpb.Key("d"), hlc.Timestamp{WallTime: 3}, localTS, nil))
require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("i"), hlc.Timestamp{WallTime: 5}, localTS, roachpb.MakeValueFromString("i5"), &txn))
require.NoError(t, storage.ExperimentalMVCCDeleteRangeUsingTombstone(ctx, rw, nil, roachpb.Key("f"), roachpb.Key("h"), hlc.Timestamp{WallTime: 3}, localTS, 0))
}

now := hlc.ClockTimestamp{Logical: 9}

testcases := map[string]struct {
start string
end string
ts int64
txn bool
inline bool
returnKeys bool
expectErr interface{} // error type, substring, or true (any)
}{
"above points succeed": {
start: "a",
end: "f",
ts: 10,
expectErr: nil,
},
"above range tombstone succeed": {
start: "f",
end: "h",
ts: 10,
expectErr: nil,
},
"transaction errors": {
start: "a",
end: "f",
ts: 10,
txn: true,
expectErr: batcheval.ErrTransactionUnsupported,
},
"inline errors": {
start: "a",
end: "f",
ts: 10,
inline: true,
expectErr: "Inline can't be used with range tombstones",
},
"returnKeys errors": {
start: "a",
end: "f",
ts: 10,
returnKeys: true,
expectErr: "ReturnKeys can't be used with range tombstones",
},
"intent errors with WriteIntentError": {
start: "i",
end: "j",
ts: 10,
expectErr: &roachpb.WriteIntentError{},
},
"below point errors with WriteTooOldError": {
start: "a",
end: "d",
ts: 1,
expectErr: &roachpb.WriteTooOldError{},
},
"below range tombstone errors with WriteTooOldError": {
start: "f",
end: "h",
ts: 1,
expectErr: &roachpb.WriteTooOldError{},
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
engine := storage.NewDefaultInMemForTesting()
defer engine.Close()

writeInitialData(t, ctx, engine)

rangeKey := storage.MVCCRangeKey{
StartKey: roachpb.Key(tc.start),
EndKey: roachpb.Key(tc.end),
Timestamp: hlc.Timestamp{WallTime: tc.ts},
}

var txn *roachpb.Transaction
if tc.txn {
tx := roachpb.MakeTransaction("txn", nil /* baseKey */, roachpb.NormalUserPriority, rangeKey.Timestamp, 0, 0)
txn = &tx
}

// Run the request.
var ms enginepb.MVCCStats
resp := &roachpb.DeleteRangeResponse{}
_, err := batcheval.DeleteRange(ctx, engine, batcheval.CommandArgs{
EvalCtx: (&batcheval.MockEvalCtx{ClusterSettings: st}).EvalContext(),
Stats: &ms,
Now: now,
Header: roachpb.Header{
Timestamp: rangeKey.Timestamp,
Txn: txn,
},
Args: &roachpb.DeleteRangeRequest{
RequestHeader: roachpb.RequestHeader{
Key: rangeKey.StartKey,
EndKey: rangeKey.EndKey,
},
UseExperimentalRangeTombstone: true,
Inline: tc.inline,
ReturnKeys: tc.returnKeys,
},
}, resp)

// Check the error.
if tc.expectErr != nil {
require.Error(t, err)
if b, ok := tc.expectErr.(bool); ok && b {
// any error is fine
} else if expectMsg, ok := tc.expectErr.(string); ok {
require.Contains(t, err.Error(), expectMsg)
} else if e, ok := tc.expectErr.(error); ok {
require.True(t, errors.HasType(err, e), "expected %T, got %v", e, err)
} else {
require.Fail(t, "invalid expectErr", "expectErr=%v", tc.expectErr)
}
return
}
require.NoError(t, err)

// Check that the range tombstone was written successfully.
iter := engine.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{
KeyTypes: storage.IterKeyTypeRangesOnly,
LowerBound: rangeKey.StartKey,
UpperBound: rangeKey.EndKey,
})
defer iter.Close()
iter.SeekGE(storage.MVCCKey{Key: rangeKey.StartKey})

var seen storage.MVCCRangeKeyValue
for {
ok, err := iter.Valid()
require.NoError(t, err)
if !ok {
break
}
require.True(t, ok)
for _, rkv := range iter.RangeKeys() {
if rkv.RangeKey.Timestamp.Equal(rangeKey.Timestamp) {
if len(seen.RangeKey.StartKey) == 0 {
seen = rkv.Clone()
} else {
seen.RangeKey.EndKey = rkv.RangeKey.EndKey.Clone()
require.Equal(t, seen.Value, rkv.Value)
}
break
}
}
iter.Next()
}
require.Equal(t, rangeKey, seen.RangeKey)

value, err := storage.DecodeMVCCValue(seen.Value)
require.NoError(t, err)
require.True(t, value.IsTombstone())
require.Equal(t, now, value.LocalTimestamp)

// TODO(erikgrinaker): This should test MVCC stats when implemented.
})
}
}
Loading

0 comments on commit 8410a44

Please sign in to comment.