From 6b34e2a9ed06b4e8c23bc21e7b5c603fdfbe2c22 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 6 Feb 2022 22:22:00 +0000 Subject: [PATCH] storage: add experimental MVCC range tombstone primitives This patch adds initial experimental primitives for MVCC range tombstones and the range keys they build on, based on experimental Pebble range keys, * Data structures: * `MVCCRangeKey` * `MVCCRangeKeyValue` * `nil` value for range tombstones (as with point tombstones) * Engine support for reading, writing, and clearing range keys: * `Engine.ExperimentalClearMVCCRangeKey()` * `Engine.ExperimentalPutMVCCRangeKey()` * `SimpleMVCCIterator.HasPointAndRange()` * `SimpleMVCCIterator.RangeBounds()` * `SimpleMVCCIterator.RangeKeys()` * `MVCCRangeKeyIterator` * MVCC functions for reading and writing range tombstones: * `ExperimentalMVCCDeleteRangeUsingTombstone()` * `ScanMVCCTombstones()` Range tombstones do not have a distinct identity, and should instead be considered a tombstone continuum: they will merge with abutting tombstones, can be partially cleared, can split or merge along with ranges, and so on. Bounded scans will truncate them to the scan bounds. The generalized range keys that range tombstones build on are also exposed via the `Engine` API. This is primarily for internal MVCC use. Exposing this in terms of range key/value pairs rather than range tombstones allows for additional use-cases such as ranged intents. Range tombstones are not yet handled in the rest of the MVCC or KV API, nor are they persisted to disk. Subsequent pull requests will extend their functionality and integrate them with other components. Release note: None --- pkg/kv/kvserver/rangefeed/task_test.go | 15 + pkg/kv/kvserver/spanset/batch.go | 25 ++ pkg/storage/BUILD.bazel | 2 + pkg/storage/engine.go | 96 +++++- pkg/storage/intent_interleaving_iter.go | 15 + pkg/storage/multi_iterator.go | 16 + pkg/storage/mvcc.go | 65 ++++ pkg/storage/mvcc_history_test.go | 106 +++++-- pkg/storage/mvcc_incremental_iterator.go | 15 + pkg/storage/mvcc_key.go | 69 +++++ pkg/storage/mvcc_key_test.go | 91 ++++++ pkg/storage/mvcc_range_key_iterator.go | 269 ++++++++++++++++ pkg/storage/mvcc_range_key_iterator_test.go | 292 ++++++++++++++++++ pkg/storage/pebble.go | 35 +++ pkg/storage/pebble_batch.go | 25 ++ pkg/storage/pebble_iterator.go | 96 ++++++ pkg/storage/sst_iterator.go | 15 + pkg/storage/sst_writer.go | 10 + .../mvcc_histories/delete_range_tombstone | 111 +++++++ pkg/util/hlc/timestamp.go | 16 + pkg/util/hlc/timestamp_test.go | 33 ++ 21 files changed, 1397 insertions(+), 20 deletions(-) create mode 100644 pkg/storage/mvcc_range_key_iterator.go create mode 100644 pkg/storage/mvcc_range_key_iterator_test.go create mode 100644 pkg/storage/testdata/mvcc_histories/delete_range_tombstone diff --git a/pkg/kv/kvserver/rangefeed/task_test.go b/pkg/kv/kvserver/rangefeed/task_test.go index 87b695a7a228..edf833d47904 100644 --- a/pkg/kv/kvserver/rangefeed/task_test.go +++ b/pkg/kv/kvserver/rangefeed/task_test.go @@ -190,6 +190,21 @@ func (s *testIterator) curKV() storage.MVCCKeyValue { return s.kvs[s.cur] } +// HasPointAndRange implements SimpleMVCCIterator. +func (s *testIterator) HasPointAndRange() (bool, bool) { + panic("not implemented") +} + +// RangeBounds implements SimpleMVCCIterator. +func (s *testIterator) RangeBounds() (roachpb.Key, roachpb.Key) { + panic("not implemented") +} + +// RangeTombstones implements SimpleMVCCIterator. +func (s *testIterator) RangeKeys() []storage.MVCCRangeKeyValue { + panic("not implemented") +} + func TestInitResolvedTSScan(t *testing.T) { defer leaktest.AfterTest(t)() startKey := roachpb.RKey("d") diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index ae012cb87a87..b44c26d3dee4 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -176,6 +176,21 @@ func (i *MVCCIterator) UnsafeValue() []byte { return i.i.UnsafeValue() } +// HasPointAndRange implements SimpleMVCCIterator. +func (i *MVCCIterator) HasPointAndRange() (bool, bool) { + panic("not implemented") +} + +// RangeBounds implements SimpleMVCCIterator. +func (i *MVCCIterator) RangeBounds() (roachpb.Key, roachpb.Key) { + panic("not implemented") +} + +// RangeKeys implements SimpleMVCCIterator. +func (i *MVCCIterator) RangeKeys() []storage.MVCCRangeKeyValue { + panic("not implemented") +} + // ComputeStats is part of the storage.MVCCIterator interface. func (i *MVCCIterator) ComputeStats( start, end roachpb.Key, nowNanos int64, @@ -599,6 +614,16 @@ func (s spanSetWriter) ClearIterRange(iter storage.MVCCIterator, start, end roac return s.w.ClearIterRange(iter, start, end) } +func (s spanSetWriter) ExperimentalPutMVCCRangeKey( + rangeKey storage.MVCCRangeKey, value []byte, +) error { + panic("not implemented") +} + +func (s spanSetWriter) ExperimentalClearMVCCRangeKey(rangeKey storage.MVCCRangeKey) error { + panic("not implemented") +} + func (s spanSetWriter) Merge(key storage.MVCCKey, value []byte) error { if s.spansOnly { if err := s.spans.CheckAllowed(SpanReadWrite, roachpb.Span{Key: key.Key}); err != nil { diff --git a/pkg/storage/BUILD.bazel b/pkg/storage/BUILD.bazel index ea1d295af0fc..73b6017c8ace 100644 --- a/pkg/storage/BUILD.bazel +++ b/pkg/storage/BUILD.bazel @@ -21,6 +21,7 @@ go_library( "mvcc_incremental_iterator.go", "mvcc_key.go", "mvcc_logical_ops.go", + "mvcc_range_key_iterator.go", "open.go", "pebble.go", "pebble_batch.go", @@ -108,6 +109,7 @@ go_test( "mvcc_incremental_iterator_test.go", "mvcc_key_test.go", "mvcc_logical_ops_test.go", + "mvcc_range_key_iterator_test.go", "mvcc_stats_test.go", "mvcc_test.go", "pebble_file_registry_test.go", diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index 5198a553ab36..039ec06a222b 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -67,11 +67,56 @@ type SimpleMVCCIterator interface { // reverse iteration to forward iteration. NextKey() // UnsafeKey returns the same value as Key, but the memory is invalidated on - // the next call to {Next,NextKey,Prev,SeekGE,SeekLT,Close}. + // the next call to {Next,NextKey,Prev,SeekGE,SeekLT,Close}. If the iterator + // is on a range key only, this returns the start bound of the range key. UnsafeKey() MVCCKey // UnsafeValue returns the same value as Value, but the memory is // invalidated on the next call to {Next,NextKey,Prev,SeekGE,SeekLT,Close}. UnsafeValue() []byte + // HasPointAndRange returns whether the current iterator position has a point + // key and/or a range key. If Valid() returns true, one of these will be true. + // Range keys are only emitted when requested via IterOptions.KeyTypes. + HasPointAndRange() (bool, bool) + // RangeBounds returns the range bounds for the current range key fragment, if + // any. See RangeKeys() for more info on range key fragments. + // + // NB: It is possible for this to return overlapping range bounds in some + // corner cases, e.g. first [b-c\0) and then [c-d). These overlapping bounds + // will always have the same values, and will always be at the very start of + // the second bounds, i.e. the overlap is [start, start.Next()). + // + // TODO(erikgrinaker): Try to guarantee non-overlapping bounds, but see + // comments in pebbleIterator.RangeBounds(). + RangeBounds() (roachpb.Key, roachpb.Key) + // RangeKeys returns all range key fragments (at different timestamps) at the + // current key position. If we are at a point key, it will return all range + // keys that overlap that point key at any timestamp. + // + // For defragmented iteration, use MVCCRangeKeyIterator instead. Fragmented + // iteration is primarily useful in two cases: + // + // - To iterate over point keys while accessing overlapping range keys + // (e.g. to determine if it is hidden by a range tombstone). + // + // - For partial iteration with later resumption, e.g. Export requests with + // byte limits that have to return point and range key data for a partial + // key span and then resume from that point in a later request. + // + // Range keys are fragmented by Pebble such that all overlapping range keys + // between two fragment bounds form a "stack" of range key fragments at + // different timestamps. Fragmentation is desirable at the storage layer to + // store range keys across SSTs and CRDB ranges without incurring + // cross-SST/range access costs. Stacking is desirable to easily see all range + // keys that overlap with a given point, and to translate range keys from the + // 2D MVCC keyspan to the 1D Pebble keyspan. + // + // This fragmentation is non-deterministic, as it also depends on Pebble's + // internal SST structure (which changes with compactions) and the store's + // write history. They will also split and merge along with CRDB ranges, can + // be partially removed by GC, and may be truncated by iterator bounds. + // + // TODO(erikgrinaker): Write a tech note on range keys and link it here. + RangeKeys() []MVCCRangeKeyValue } // IteratorStats is returned from {MVCCIterator,EngineIterator}.Stats. @@ -309,8 +354,27 @@ type IterOptions struct { // use such an iterator is to use it in concert with an iterator without // timestamp hints, as done by MVCCIncrementalIterator. MinTimestampHint, MaxTimestampHint hlc.Timestamp + // KeyTypes specifies the types of keys to surface: point and/or range keys. + // Use HasPointAndRange() to determine which key type is present at a given + // iterator position, and RangeBounds() and RangeKeys() to access range keys. + // For more info on range keys, see RangeKeys(). + KeyTypes IterKeyType } +// IterKeyType configures which types of keys an iterator should surface. +// +// TODO(erikgrinaker): Combine this with MVCCIterKind somehow. +type IterKeyType = pebble.IterKeyType + +const ( + // IterKeyTypePointsOnly iterates over point keys only. + IterKeyTypePointsOnly = pebble.IterKeyTypePointsOnly + // IterKeyTypePointsAndRanges iterates over both point and range keys. + IterKeyTypePointsAndRanges = pebble.IterKeyTypePointsAndRanges + // IterKeyTypeRangesOnly iterates over only range keys. + IterKeyTypeRangesOnly = pebble.IterKeyTypeRangesOnly +) + // MVCCIterKind is used to inform Reader about the kind of iteration desired // by the caller. type MVCCIterKind int @@ -584,6 +648,36 @@ type Writer interface { // returns. ClearIterRange(iter MVCCIterator, start, end roachpb.Key) error + // ExperimentalClearMVCCRangeKey deletes an MVCC range key from start + // (inclusive) to end (exclusive) at the given timestamp. For any range key + // that straddles the start and end boundaries, only the segments within the + // boundaries will be cleared. Clears are idempotent. + // + // This method is primarily intended for MVCC garbage collection and similar + // internal use. It mutates MVCC history, and does not check for intents or + // other conflicts. + // + // This method is EXPERIMENTAL. Range keys are not supported throughout the + // MVCC API, and the on-disk format is unstable. + ExperimentalClearMVCCRangeKey(rangeKey MVCCRangeKey) error + + // ExperimentalPutMVCCRangeKey writes a value to an MVCC range key. It is + // currently only used for range tombstones, which have a value of nil. Range + // keys exist separately from point keys in Pebble, and must be accessed via + // specialized iterator options and methods -- see e.g. IterOptions.KeyTypes, + // SimpleMVCCIterator.RangeKeys(), and MVCCRangeKeyIterator. + // + // A range key does not have a distinct identity, but should be considered a + // key continuum. They can be non-deterministically fragmented by Pebble, + // split/merged along with CRDB ranges, partially removed with + // ExperimentalClearMVCCRangeKey, and truncated during bounded iteration. + // + // TODO(erikgrinaker): Write a tech note on range keys and link it here. + // + // This function is EXPERIMENTAL. Range keys are not handled throughout the + // MVCC API, and the on-disk format is unstable. + ExperimentalPutMVCCRangeKey(MVCCRangeKey, []byte) error + // Merge is a high-performance write operation used for values which are // accumulated over several writes. Multiple values can be merged // sequentially into a single key; a subsequent read will return a "merged" diff --git a/pkg/storage/intent_interleaving_iter.go b/pkg/storage/intent_interleaving_iter.go index bf31221fbe93..834280186722 100644 --- a/pkg/storage/intent_interleaving_iter.go +++ b/pkg/storage/intent_interleaving_iter.go @@ -715,6 +715,21 @@ func (i *intentInterleavingIter) Value() []byte { return i.iter.Value() } +// HasPointAndRange implements SimpleMVCCIterator. +func (i *intentInterleavingIter) HasPointAndRange() (bool, bool) { + panic("not implemented") +} + +// RangeBounds implements SimpleMVCCIterator. +func (i *intentInterleavingIter) RangeBounds() (roachpb.Key, roachpb.Key) { + panic("not implemented") +} + +// RangeKeys implements SimpleMVCCIterator. +func (i *intentInterleavingIter) RangeKeys() []MVCCRangeKeyValue { + panic("not implemented") +} + func (i *intentInterleavingIter) Close() { i.iter.Close() i.intentIter.Close() diff --git a/pkg/storage/multi_iterator.go b/pkg/storage/multi_iterator.go index 9838adf60ec7..6d2558e10c21 100644 --- a/pkg/storage/multi_iterator.go +++ b/pkg/storage/multi_iterator.go @@ -14,6 +14,7 @@ import ( "bytes" "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/roachpb" ) const invalidIdxSentinel = -1 @@ -92,6 +93,21 @@ func (f *multiIterator) UnsafeValue() []byte { return f.iters[f.currentIdx].UnsafeValue() } +// HasPointAndRange implements SimpleMVCCIterator. +func (f *multiIterator) HasPointAndRange() (bool, bool) { + panic("not implemented") +} + +// RangeBounds implements SimpleMVCCIterator. +func (f *multiIterator) RangeBounds() (roachpb.Key, roachpb.Key) { + panic("not implemented") +} + +// RangeKeys implements SimpleMVCCIterator. +func (f *multiIterator) RangeKeys() []MVCCRangeKeyValue { + panic("not implemented") +} + // Next advances the iterator to the next key/value in the iteration. After this // call, Valid() will be true if the iterator was not positioned at the last // key. diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 64b0d1abaef2..7fdf988a8a17 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -99,6 +99,12 @@ type MVCCKeyValue struct { Value []byte } +// MVCCRangeKeyValue represents a ranged key/value pair. +type MVCCRangeKeyValue struct { + Key MVCCRangeKey + Value []byte +} + // optionalValue represents an optional roachpb.Value. It is preferred // over a *roachpb.Value to avoid the forced heap allocation. type optionalValue struct { @@ -2202,6 +2208,33 @@ func MVCCDeleteRange( return keys, res.ResumeSpan, res.NumKeys, nil } +// ExperimentalMVCCDeleteRangeUsingTombstone deletes the given MVCC keyspan at +// the given timestamp using a range tombstone (rather than point tombstones). +// This operation is non-transactional, but will check for existing intents and +// return a WriteIntentError containing up to maxIntents intents. +// +// This function is EXPERIMENTAL. Range tombstones are not supported throughout +// the MVCC API, and the on-disk format is unstable. +// +// TODO(erikgrinaker): Needs conflict handling, e.g. WriteTooOldError. +// TODO(erikgrinaker): Needs MVCCStats handling. +func ExperimentalMVCCDeleteRangeUsingTombstone( + ctx context.Context, + rw ReadWriter, + ms *enginepb.MVCCStats, + startKey, endKey roachpb.Key, + timestamp hlc.Timestamp, + maxIntents int64, +) error { + if intents, err := ScanIntents(ctx, rw, startKey, endKey, maxIntents, 0); err != nil { + return err + } else if len(intents) > 0 { + return &roachpb.WriteIntentError{Intents: intents} + } + return rw.ExperimentalPutMVCCRangeKey(MVCCRangeKey{ + StartKey: startKey, EndKey: endKey, Timestamp: timestamp}, nil) +} + func recordIteratorStats(traceSpan *tracing.Span, iteratorStats IteratorStats) { stats := iteratorStats.Stats if traceSpan != nil { @@ -3929,3 +3962,35 @@ func ComputeStatsForRange( ms.LastUpdateNanos = nowNanos return ms, nil } + +// MVCCScanRangeTombstones returns a list of range tombstones across the given +// span at the given timestamp, in end,timestamp order rather than +// start,timestamp. Any tombstones that straddle the bounds will be truncated. +func MVCCScanRangeTombstones( + ctx context.Context, reader Reader, start, end roachpb.Key, ts hlc.Timestamp, +) ([]MVCCRangeKey, error) { + var tombstones []MVCCRangeKey + iter := NewMVCCRangeKeyIterator(reader, MVCCRangeKeyIterOptions{ + LowerBound: start, + UpperBound: end, + MaxTimestamp: ts, + }) + for { + ok, err := iter.Valid() + if err != nil { + return nil, err + } + if !ok { + break + } + if err := ctx.Err(); err != nil { + return nil, err + } + // Skip non-tombstone range keys (we don't expect these to exist currently). + if len(iter.Value()) == 0 { + tombstones = append(tombstones, iter.Key()) + } + iter.Next() + } + return tombstones, nil +} diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index cd3f571bf4e0..761c06320bec 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -55,17 +55,20 @@ import ( // resolve_intent t= k= [status=] // check_intent k= [none] // -// cput [t=] [ts=[,]] [resolve [status=]] k= v= [raw] [cond=] -// del [t=] [ts=[,]] [resolve [status=]] k= -// del_range [t=] [ts=[,]] [resolve [status=]] k= [end=] [max=] [returnKeys] -// get [t=] [ts=[,]] [resolve [status=]] k= [inconsistent] [tombstones] [failOnMoreRecent] [localUncertaintyLimit=[,]] [globalUncertaintyLimit=[,]] -// increment [t=] [ts=[,]] [resolve [status=]] k= [inc=] -// put [t=] [ts=[,]] [resolve [status=]] k= v= [raw] -// scan [t=] [ts=[,]] [resolve [status=]] k= [end=] [inconsistent] [tombstones] [reverse] [failOnMoreRecent] [localUncertaintyLimit=[,]] [globalUncertaintyLimit=[,]] [max=] [targetbytes=] [avoidExcess] [allowEmpty] +// cput [t=] [ts=[,]] [resolve [status=]] k= v= [raw] [cond=] +// del [t=] [ts=[,]] [resolve [status=]] k= +// del_range [t=] [ts=[,]] [resolve [status=]] k= [end=] [max=] [returnKeys] +// del_range_ts [ts=[,]] k= end= +// get [t=] [ts=[,]] [resolve [status=]] k= [inconsistent] [tombstones] [failOnMoreRecent] [localUncertaintyLimit=[,]] [globalUncertaintyLimit=[,]] +// increment [t=] [ts=[,]] [resolve [status=]] k= [inc=] +// put [t=] [ts=[,]] [resolve [status=]] k= v= [raw] +// scan [t=] [ts=[,]] [resolve [status=]] k= [end=] [inconsistent] [tombstones] [reverse] [failOnMoreRecent] [localUncertaintyLimit=[,]] [globalUncertaintyLimit=[,]] [max=] [targetbytes=] [avoidExcess] [allowEmpty] +// scan_range_keys k= end= [ts=[,]] // // merge [ts=[,]] k= v= [raw] // -// clear_range k= end= +// clear_range k= end= +// clear_range_key k= end= [ts=[,]] // // Where `` can be a simple string, or a string // prefixed by the following characters: @@ -112,8 +115,25 @@ func TestMVCCHistories(t *testing.T) { defer engine.Close() reportDataEntries := func(buf *redact.StringBuilder) error { - hasData := false - err := engine.MVCCIterate(span.Key, span.EndKey, MVCCKeyAndIntentsIterKind, func(r MVCCKeyValue) error { + var hasData bool + + iter := NewMVCCRangeKeyIterator(engine, MVCCRangeKeyIterOptions{ + LowerBound: span.Key, + UpperBound: span.EndKey, + }) + defer iter.Close() + for { + if ok, err := iter.Valid(); err != nil { + return err + } else if !ok { + break + } + hasData = true + buf.Printf("range key: %s -> %+v\n", iter.Key(), iter.Value()) + iter.Next() + } + + err = engine.MVCCIterate(span.Key, span.EndKey, MVCCKeyAndIntentsIterKind, func(r MVCCKeyValue) error { hasData = true if r.Key.Timestamp.IsEmpty() { // Meta is at timestamp zero. @@ -396,15 +416,18 @@ var commands = map[string]cmd{ // TODO(nvanbenschoten): test "resolve_intent_range". "check_intent": {typReadOnly, cmdCheckIntent}, - "clear_range": {typDataUpdate, cmdClearRange}, - "cput": {typDataUpdate, cmdCPut}, - "del": {typDataUpdate, cmdDelete}, - "del_range": {typDataUpdate, cmdDeleteRange}, - "get": {typReadOnly, cmdGet}, - "increment": {typDataUpdate, cmdIncrement}, - "merge": {typDataUpdate, cmdMerge}, - "put": {typDataUpdate, cmdPut}, - "scan": {typReadOnly, cmdScan}, + "clear_range": {typDataUpdate, cmdClearRange}, + "clear_range_key": {typDataUpdate, cmdClearRangeKey}, + "cput": {typDataUpdate, cmdCPut}, + "del": {typDataUpdate, cmdDelete}, + "del_range": {typDataUpdate, cmdDeleteRange}, + "del_range_ts": {typDataUpdate, cmdDeleteRangeTombstone}, + "get": {typReadOnly, cmdGet}, + "increment": {typDataUpdate, cmdIncrement}, + "merge": {typDataUpdate, cmdMerge}, + "put": {typDataUpdate, cmdPut}, + "scan": {typReadOnly, cmdScan}, + "scan_range_ts": {typReadOnly, cmdScanRangeTombstone}, } func cmdTxnAdvance(e *evalCtx) error { @@ -584,6 +607,16 @@ func cmdClearRange(e *evalCtx) error { return e.engine.ClearMVCCRangeAndIntents(key, endKey) } +func cmdClearRangeKey(e *evalCtx) error { + key, endKey := e.getKeyRange() + ts := e.getTs(nil) + return e.engine.ExperimentalClearMVCCRangeKey(MVCCRangeKey{ + StartKey: key, + EndKey: endKey, + Timestamp: ts, + }) +} + func cmdCPut(e *evalCtx) error { txn := e.getTxn(optional) ts := e.getTs(txn) @@ -660,6 +693,24 @@ func cmdDeleteRange(e *evalCtx) error { }) } +func cmdDeleteRangeTombstone(e *evalCtx) error { + key, endKey := e.getKeyRange() + ts := e.getTs(nil) + + return e.withWriter("del_range", func(rw ReadWriter) error { + err := ExperimentalMVCCDeleteRangeUsingTombstone(e.ctx, rw, nil, key, endKey, ts, 0) + if err != nil { + return err + } + e.results.buf.Printf("del_range_ts: %s\n", MVCCRangeKey{ + StartKey: key, + EndKey: endKey, + Timestamp: ts, + }) + return nil + }) +} + func cmdGet(e *evalCtx) error { txn := e.getTxn(optional) key := e.getKey() @@ -832,6 +883,23 @@ func cmdScan(e *evalCtx) error { return err } +func cmdScanRangeTombstone(e *evalCtx) error { + key, endKey := e.getKeyRange() + ts := e.getTs(nil) + + tombstones, err := MVCCScanRangeTombstones(e.ctx, e.engine, key, endKey, ts) + if err != nil { + return err + } + for _, tombstone := range tombstones { + e.results.buf.Printf("scan_range_ts: %s\n", tombstone) + } + if len(tombstones) == 0 { + e.results.buf.Printf("scan_range_ts: %v-%v -> \n", key, endKey) + } + return nil +} + // evalCtx stored the current state of the environment of a running // script. type evalCtx struct { diff --git a/pkg/storage/mvcc_incremental_iterator.go b/pkg/storage/mvcc_incremental_iterator.go index 0beb1b464ed5..a199c7329cd9 100644 --- a/pkg/storage/mvcc_incremental_iterator.go +++ b/pkg/storage/mvcc_incremental_iterator.go @@ -485,6 +485,21 @@ func (i *MVCCIncrementalIterator) UnsafeKey() MVCCKey { return i.iter.UnsafeKey() } +// HasPointAndRange implements SimpleMVCCIterator. +func (i *MVCCIncrementalIterator) HasPointAndRange() (bool, bool) { + panic("not implemented") +} + +// RangeBounds implements SimpleMVCCIterator. +func (i *MVCCIncrementalIterator) RangeBounds() (roachpb.Key, roachpb.Key) { + panic("not implemented") +} + +// RangeKeys implements SimpleMVCCIterator. +func (i *MVCCIncrementalIterator) RangeKeys() []MVCCRangeKeyValue { + panic("not implemented") +} + // UnsafeValue returns the same value as Value, but the memory is invalidated on // the next call to {Next,Reset,Close}. func (i *MVCCIncrementalIterator) UnsafeValue() []byte { diff --git a/pkg/storage/mvcc_key.go b/pkg/storage/mvcc_key.go index 02cb4016ba92..9c85f944b825 100644 --- a/pkg/storage/mvcc_key.go +++ b/pkg/storage/mvcc_key.go @@ -173,6 +173,12 @@ func encodeMVCCKeyToBuf(buf []byte, key MVCCKey, keyLen int) { } } +// encodeMVCCKeyPrefix encodes an MVCC user key (without timestamp) into its +// Pebble prefix representation. +func encodeMVCCKeyPrefix(key roachpb.Key) []byte { + return EncodeMVCCKey(MVCCKey{Key: key}) +} + // encodeMVCCTimestamp encodes an MVCC timestamp into its Pebble // representation, excluding length suffix and sentinel byte. func encodeMVCCTimestamp(ts hlc.Timestamp) []byte { @@ -287,3 +293,66 @@ func decodeMVCCTimestampSuffix(encodedTS []byte) (hlc.Timestamp, error) { } return decodeMVCCTimestamp(encodedTS[:encodedLen-1]) } + +// MVCCRangeKey is a versioned key span. +type MVCCRangeKey struct { + StartKey roachpb.Key + EndKey roachpb.Key + Timestamp hlc.Timestamp +} + +// Clone returns a copy of the range key. +func (k MVCCRangeKey) Clone() MVCCRangeKey { + // k is already a copy, but byte slices must be cloned. + k.StartKey = k.StartKey.Clone() + k.EndKey = k.EndKey.Clone() + return k +} + +// Compare returns -1 if this key is less than the given key, 0 if they're +// equal, or 1 if the given key is greater than this. Comparison is by +// start,timestamp,end, where larger timestamps sort before smaller ones except +// empty ones which sort first (like elsewhere in MVCC). +func (k MVCCRangeKey) Compare(o MVCCRangeKey) int { + if c := k.StartKey.Compare(o.StartKey); c != 0 { + return c + } + if k.Timestamp.IsEmpty() && !o.Timestamp.IsEmpty() { + return -1 + } else if !k.Timestamp.IsEmpty() && o.Timestamp.IsEmpty() { + return 1 + } else if c := k.Timestamp.Compare(o.Timestamp); c != 0 { + return -c // timestamps sort in reverse + } + return k.EndKey.Compare(o.EndKey) +} + +// String formats the range key. +func (k MVCCRangeKey) String() string { + s := roachpb.Span{Key: k.StartKey, EndKey: k.EndKey}.String() + if !k.Timestamp.IsEmpty() { + s += fmt.Sprintf("/%s", k.Timestamp) + } + return s +} + +// Validate returns an error if the range key is invalid. +func (k MVCCRangeKey) Validate() (err error) { + return errors.Wrapf(k.validate(), "invalid range key %s", k) +} + +func (k MVCCRangeKey) validate() error { + if k.StartKey == nil { + return errors.Errorf("no start key") + } + if k.EndKey == nil { + return errors.Errorf("no end key") + } + if k.Timestamp.IsEmpty() { + return errors.Errorf("no timestamp") + } + if k.StartKey.Compare(k.EndKey) > 0 { + return errors.Errorf("start key %s is after end key %s", k.StartKey, k.EndKey) + } + return nil +} diff --git a/pkg/storage/mvcc_key_test.go b/pkg/storage/mvcc_key_test.go index 63af9beda842..0b651f2838c7 100644 --- a/pkg/storage/mvcc_key_test.go +++ b/pkg/storage/mvcc_key_test.go @@ -238,3 +238,94 @@ func BenchmarkDecodeMVCCKey(b *testing.B) { } benchmarkDecodeMVCCKeyResult = mvccKey // avoid compiler optimizing away function call } + +func TestMVCCRangeKeyString(t *testing.T) { + defer leaktest.AfterTest(t)() + + testcases := map[string]struct { + rk MVCCRangeKey + expect string + }{ + "empty": {MVCCRangeKey{}, "/Min"}, + "only start": {MVCCRangeKey{StartKey: roachpb.Key("foo")}, "foo"}, + "only end": {MVCCRangeKey{EndKey: roachpb.Key("foo")}, "{/Min-foo}"}, + "only timestamp": {MVCCRangeKey{Timestamp: hlc.Timestamp{Logical: 1}}, "/Min/0,1"}, + "only span": {MVCCRangeKey{StartKey: roachpb.Key("a"), EndKey: roachpb.Key("z")}, "{a-z}"}, + "all": {MVCCRangeKey{StartKey: roachpb.Key("a"), EndKey: roachpb.Key("z"), Timestamp: hlc.Timestamp{Logical: 1}}, "{a-z}/0,1"}, + "all overlapping": {MVCCRangeKey{StartKey: roachpb.Key("ab"), EndKey: roachpb.Key("af"), Timestamp: hlc.Timestamp{Logical: 1}}, "a{b-f}/0,1"}, + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + require.Equal(t, tc.expect, tc.rk.String()) + }) + } +} + +func TestMVCCRangeKeyCompare(t *testing.T) { + defer leaktest.AfterTest(t)() + + ab1 := MVCCRangeKey{roachpb.Key("a"), roachpb.Key("b"), hlc.Timestamp{Logical: 1}} + ac1 := MVCCRangeKey{roachpb.Key("a"), roachpb.Key("c"), hlc.Timestamp{Logical: 1}} + ac2 := MVCCRangeKey{roachpb.Key("a"), roachpb.Key("c"), hlc.Timestamp{Logical: 2}} + bc0 := MVCCRangeKey{roachpb.Key("b"), roachpb.Key("c"), hlc.Timestamp{Logical: 0}} + bc1 := MVCCRangeKey{roachpb.Key("b"), roachpb.Key("c"), hlc.Timestamp{Logical: 1}} + bc3 := MVCCRangeKey{roachpb.Key("b"), roachpb.Key("c"), hlc.Timestamp{Logical: 3}} + bd4 := MVCCRangeKey{roachpb.Key("b"), roachpb.Key("d"), hlc.Timestamp{Logical: 4}} + + testcases := map[string]struct { + a MVCCRangeKey + b MVCCRangeKey + expect int + }{ + "equal": {ac1, ac1, 0}, + "start lt": {ac1, bc1, -1}, + "start gt": {bc1, ac1, 1}, + "end lt": {ab1, ac1, -1}, + "end gt": {ac1, ab1, 1}, + "time lt": {ac2, ac1, -1}, // MVCC timestamps sort in reverse order + "time gt": {ac1, ac2, 1}, // MVCC timestamps sort in reverse order + "empty time lt set": {bc0, bc1, -1}, // empty MVCC timestamps sort before non-empty + "set time gt empty": {bc1, bc0, 1}, // empty MVCC timestamps sort before non-empty + "start time precedence": {ac2, bc3, -1}, // a before b, but 3 before 2; key takes precedence + "time end precedence": {bd4, bc3, -1}, // c before d, but 4 before 3; time takes precedence + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + require.Equal(t, tc.expect, tc.a.Compare(tc.b)) + }) + } +} + +func TestMVCCRangeKeyValidate(t *testing.T) { + defer leaktest.AfterTest(t)() + + a := roachpb.Key("a") + b := roachpb.Key("b") + blank := roachpb.Key("") + ts1 := hlc.Timestamp{Logical: 1} + + testcases := map[string]struct { + rangeKey MVCCRangeKey + expectErr string // empty if no error + }{ + "valid": {MVCCRangeKey{StartKey: a, EndKey: b, Timestamp: ts1}, ""}, + "start at end": {MVCCRangeKey{StartKey: a, EndKey: a, Timestamp: ts1}, ""}, + "blank keys": {MVCCRangeKey{StartKey: blank, EndKey: blank, Timestamp: ts1}, ""}, // equivalent to MinKey + "no start": {MVCCRangeKey{EndKey: b, Timestamp: ts1}, "{/Min-b}/0,1: no start key"}, + "no end": {MVCCRangeKey{StartKey: a, Timestamp: ts1}, "a/0,1: no end key"}, + "no timestamp": {MVCCRangeKey{StartKey: a, EndKey: b}, "{a-b}: no timestamp"}, + "empty": {MVCCRangeKey{}, "/Min: no start key"}, + "end before start": {MVCCRangeKey{StartKey: b, EndKey: a, Timestamp: ts1}, `{b-a}/0,1: start key "b" is after end key "a"`}, + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + err := tc.rangeKey.Validate() + if tc.expectErr == "" { + require.NoError(t, err) + } else { + require.Error(t, err) + require.Contains(t, err.Error(), tc.expectErr) + } + }) + } +} diff --git a/pkg/storage/mvcc_range_key_iterator.go b/pkg/storage/mvcc_range_key_iterator.go new file mode 100644 index 000000000000..c7911ed3dd2a --- /dev/null +++ b/pkg/storage/mvcc_range_key_iterator.go @@ -0,0 +1,269 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package storage + +import ( + "bytes" + + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/errors" +) + +// MVCCRangeKeyIterOptions are options for an MVCCRangeKeyIterator. +type MVCCRangeKeyIterOptions struct { + // LowerBound sets the inclusive lower bound of the iterator. Range keys that + // straddle the bound will have their start key truncated to it. + // + // NB: It may be tempting to use an MVCCKey here and include a timestamp, but + // this would be pointless: giving e.g. a@4 would skip a range key starting at + // a@5, but the range key would logically exist at the adjacent a.Next()@5 so + // it would be emitted almost immediately anyway. + LowerBound roachpb.Key + // UpperBound sets the exclusive upper bound of the iterator. Range keys that + // straddle the upper bound will have their end key truncated to it. + UpperBound roachpb.Key + // MinTimestamp sets the inclusive lower timestamp bound for the iterator. + MinTimestamp hlc.Timestamp + // MaxTimestamp sets the inclusive upper timestamp bound for the iterator. + MaxTimestamp hlc.Timestamp + // Fragmented disables defragmentation, emitting non-deterministic fragments + // like SimpleMVCCIterator does. When enabled, this results in an iteration + // order of StartKey,Timestamp rather than EndKey,Timestamp. + Fragmented bool +} + +// MVCCRangeKeyIterator is an iterator over range keys in an engine. Unlike +// SimpleMVCCIterator, range keys are defragmented into contiguous deterministic +// range keys. It does not support seeking or backtracking, see +// MVCCRangeKeyIterOptions for lower/upper bounds and other options. +// +// Iteration is in EndKey,Timestamp order rather than StartKey,Timestamp. For +// example: [c-e)@2, [a-z)@3, [x-z)@1. This is a memory optimization when +// defragmenting, which allows emitting completed range keys as soon as +// possible, only buffering incomplete ones in memory. To emit in +// StartKey,Timestamp order, we would additionally need to buffer all complete +// range keys that start after the current incomplete ones -- in the worst case, +// a range key across the entire key span would require all other range keys to +// be buffered in memory. But see the Fragmented option to emit +// non-deterministic range key fragments in StartKey,Timestamp order. +type MVCCRangeKeyIterator struct { + iter MVCCIterator + opts MVCCRangeKeyIterOptions + incomplete []*MVCCRangeKeyValue // defragmentation buffer + complete []MVCCRangeKeyValue // queued for emission + completeIdx int // current Key() + err error +} + +// NewMVCCRangeKeyIterator sets up a new MVCCRangeKeyIterator and seeks to the +// first range key. The caller must call Close() when done. +func NewMVCCRangeKeyIterator(r Reader, opts MVCCRangeKeyIterOptions) *MVCCRangeKeyIterator { + iter := &MVCCRangeKeyIterator{ + iter: r.NewMVCCIterator(MVCCKeyIterKind, IterOptions{ + KeyTypes: IterKeyTypeRangesOnly, + LowerBound: opts.LowerBound, + UpperBound: opts.UpperBound, + // TODO(erikgrinaker): We do not set Min/MaxTimestampHint here, because + // both are required and it's apparently not always safe to use. + }), + opts: opts, + incomplete: make([]*MVCCRangeKeyValue, 0), + complete: make([]MVCCRangeKeyValue, 0), + } + + // Seek the iterator to the lower bound and iterate until we've collected + // the first complete range key (if any). + iter.iter.SeekGE(MVCCKey{Key: opts.LowerBound}) + iter.findCompleteRangeKeys() + + return iter +} + +// findCompleteRangeKeys defragments range keys at the current iterator position +// and any subsequent iterator positions until it completes one or more range +// keys, populating p.complete. Current p.complete is discarded. +func (p *MVCCRangeKeyIterator) findCompleteRangeKeys() { + p.complete = p.complete[:0] + p.completeIdx = 0 + p.updateRangeKeys() + + for len(p.complete) == 0 { + if ok, err := p.iter.Valid(); err != nil { + p.err = err + return + } else if !ok { + break + } + p.iter.Next() + // NB: We update range keys even if Next() invalidates the iterator, because + // there may be incomplete range keys that become complete when the iterator + // is exhausted. + p.updateRangeKeys() + } +} + +// updateRangeKeys inspects the range keys at the current Pebble iterator +// position, defragments them in p.incomplete, and moves any completed +// range keys into p.complete. +func (p *MVCCRangeKeyIterator) updateRangeKeys() { + var startKey, endKey roachpb.Key + var rangeKeys []MVCCRangeKeyValue + + // If the iterator is exhausted, we still want to complete any remaining + // incomplete range keys. + if ok, err := p.iter.Valid(); err != nil { + p.err = err + return + } else if ok { + startKey, endKey = p.iter.RangeBounds() + rangeKeys = p.iter.RangeKeys() + } + + // Both rangeKeys and p.incomplete are sorted in descending timestamp order, + // so we iterate over them in lockstep and insert/update/delete p.incomplete + // as appropriate. + var tsIdx, rkIdx int + + for rkIdx < len(rangeKeys) { + rangeKey := rangeKeys[rkIdx] + + // Filter rangekeys by timestamp. + // + // TODO(erikgrinaker): This can be optimized to skip unnecessary comparisons + // since rangeKeys is sorted by timestamp. Maybe later. + if !p.opts.MinTimestamp.IsEmpty() && rangeKey.Key.Timestamp.Less(p.opts.MinTimestamp) { + rkIdx++ + continue + } + if !p.opts.MaxTimestamp.IsEmpty() && p.opts.MaxTimestamp.Less(rangeKey.Key.Timestamp) { + rkIdx++ + continue + } + + // If we're at the end of p.incomplete, this range key must be new. + if tsIdx >= len(p.incomplete) { + p.incomplete = append(p.incomplete, &MVCCRangeKeyValue{ + Key: MVCCRangeKey{ + StartKey: append(make([]byte, 0, len(startKey)), startKey...), + EndKey: append(make([]byte, 0, len(endKey)), endKey...), + Timestamp: rangeKey.Key.Timestamp, + }, + Value: append(make([]byte, 0, len(rangeKey.Value)), rangeKey.Value...), + }) + rkIdx++ + tsIdx++ + continue + } + + incomplete := p.incomplete[tsIdx] + cmp := incomplete.Key.Timestamp.Compare(rangeKey.Key.Timestamp) + switch { + // If the timestamps match, the key spans are adjacent or overlapping, and + // the values match then this range key extends the incomplete one. + case cmp == 0 && bytes.Compare(startKey, incomplete.Key.EndKey) <= 0 && + bytes.Equal(rangeKey.Value, incomplete.Value): + incomplete.Key.EndKey = append(incomplete.Key.EndKey[:0], endKey...) + tsIdx++ + rkIdx++ + + // This is a different range key at the same timestamp: complete the + // existing one and start a new one. + case cmp == 0: + p.complete = append(p.complete, *incomplete) + // NB: can't reuse slices, as they were placed in the completed range key. + incomplete.Key.StartKey = append(make([]byte, 0, len(startKey)), startKey...) + incomplete.Key.EndKey = append(make([]byte, 0, len(endKey)), endKey...) + incomplete.Value = append(make([]byte, 0, len(rangeKey.Value)), rangeKey.Value...) + + // This incomplete range key is not present at this range key: complete it + // and remove it from the list, then try again. + case cmp == 1: + p.complete = append(p.complete, *incomplete) + p.incomplete = append(p.incomplete[:tsIdx], p.incomplete[tsIdx+1:]...) + + // This range key is a new incomplete range key: start defragmenting it. + case cmp == -1: + p.incomplete = append(p.incomplete[:tsIdx+1], p.incomplete[tsIdx:]...) + p.incomplete[tsIdx] = &MVCCRangeKeyValue{ + Key: MVCCRangeKey{ + StartKey: append(make(roachpb.Key, 0, len(startKey)), startKey...), + EndKey: append(make(roachpb.Key, 0, len(endKey)), endKey...), + Timestamp: rangeKey.Key.Timestamp, + }, + Value: append(make([]byte, 0, len(rangeKey.Value)), rangeKey.Value...), + } + tsIdx++ + rkIdx++ + + default: + p.err = errors.Errorf("unexpected comparison result %d", cmp) + return + } + } + + // If the caller has requested fragments, we complete all range keys we found + // this iteration by resetting tsIdx to 0. The loop below handles the rest. + if p.opts.Fragmented { + tsIdx = 0 + } + + // If there are any remaining incomplete range keys, they must be complete: + // make them so. + for _, ts := range p.incomplete[tsIdx:] { + p.complete = append(p.complete, *ts) + } + p.incomplete = p.incomplete[:tsIdx] +} + +// Close frees up resources held by the iterator. +func (p *MVCCRangeKeyIterator) Close() { + p.iter.Close() + p.complete = nil + p.completeIdx = 0 +} + +// Next iterates to the next defragmented range key. Note the unusual iteration +// order, see struct comment for details. +func (p *MVCCRangeKeyIterator) Next() { + p.completeIdx++ + if p.completeIdx >= len(p.complete) { + p.iter.Next() + // NB: Called even if Next() fails, because we may have incomplete + // range keys that become complete when the iterator is exhausted. + p.findCompleteRangeKeys() + } +} + +// Key returns the current range key. It will not be invalidated by the +// iterator, but it will be shared by all callers. +func (p *MVCCRangeKeyIterator) Key() MVCCRangeKey { + return p.complete[p.completeIdx].Key +} + +// Value returns the value of the current range key. It will not be invalidated +// by the iterator, but it will be shared by all callers. +func (p *MVCCRangeKeyIterator) Value() []byte { + return p.complete[p.completeIdx].Value +} + +// Valid returns (true, nil) if the iterator points to a valid key, (false, nil) +// if the iterator is exhausted, or (false, error) if an error occurred during +// iteration. +func (p *MVCCRangeKeyIterator) Valid() (bool, error) { + if p.err != nil { + return false, p.err + } + if _, err := p.iter.Valid(); err != nil { + return false, err + } + return p.completeIdx < len(p.complete), nil +} diff --git a/pkg/storage/mvcc_range_key_iterator_test.go b/pkg/storage/mvcc_range_key_iterator_test.go new file mode 100644 index 000000000000..329de972fb35 --- /dev/null +++ b/pkg/storage/mvcc_range_key_iterator_test.go @@ -0,0 +1,292 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package storage + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/stretchr/testify/require" +) + +func TestMVCCRangeKeyIterator(t *testing.T) { + defer leaktest.AfterTest(t)() + + eng := NewDefaultInMemForTesting() + defer eng.Close() + + rangeKeys := []MVCCRangeKeyValue{ + rangeKV("b", "c", 3, "bc3"), + rangeKV("e", "g", 3, "eg3"), + rangeKV("d", "f", 5, "df5"), + rangeKV("f", "g", 5, "fg5"), + rangeKV("d", "f", 2, "df2"), + rangeKV("a", "m", 4, "az4"), // same value as below so these should merge into one + rangeKV("m", "z", 4, "az4"), + } + for _, rk := range rangeKeys { + require.NoError(t, eng.ExperimentalPutMVCCRangeKey(rk.Key, rk.Value)) + } + + testcases := map[string]struct { + opts MVCCRangeKeyIterOptions + expect []MVCCRangeKeyValue + }{ + "all range keys": { + MVCCRangeKeyIterOptions{}, + []MVCCRangeKeyValue{ + rangeKV("b", "c", 3, "bc3"), + rangeKV("d", "f", 5, "df5"), + rangeKV("d", "f", 2, "df2"), + rangeKV("f", "g", 5, "fg5"), + rangeKV("e", "g", 3, "eg3"), + rangeKV("a", "z", 4, "az4"), + }}, + "truncated range keys": { + MVCCRangeKeyIterOptions{ + LowerBound: roachpb.Key("c"), + UpperBound: roachpb.Key("e"), + }, + []MVCCRangeKeyValue{ + rangeKV("d", "e", 5, "df5"), + rangeKV("c", "e", 4, "az4"), + rangeKV("d", "e", 2, "df2"), + }}, + "truncation between range key bounds": { + MVCCRangeKeyIterOptions{ + LowerBound: roachpb.Key("ccc"), + UpperBound: roachpb.Key("eee"), + }, + []MVCCRangeKeyValue{ + rangeKV("d", "eee", 5, "df5"), + rangeKV("ccc", "eee", 4, "az4"), + rangeKV("e", "eee", 3, "eg3"), + rangeKV("d", "eee", 2, "df2"), + }}, + "fragmented range keys": { + MVCCRangeKeyIterOptions{ + Fragmented: true, + }, + []MVCCRangeKeyValue{ + rangeKV("a", "b", 4, "az4"), + rangeKV("b", "c", 4, "az4"), + rangeKV("b", "c", 3, "bc3"), + rangeKV("c", "d", 4, "az4"), + rangeKV("d", "e", 5, "df5"), + rangeKV("d", "e", 4, "az4"), + rangeKV("d", "e", 2, "df2"), + rangeKV("e", "f", 5, "df5"), + rangeKV("e", "f", 4, "az4"), + rangeKV("e", "f", 3, "eg3"), + rangeKV("e", "f", 2, "df2"), + rangeKV("f", "g", 5, "fg5"), + rangeKV("f", "g", 4, "az4"), + rangeKV("f", "g", 3, "eg3"), + rangeKV("g", "m", 4, "az4"), + rangeKV("m", "z", 4, "az4"), + }}, + "empty interval": { + MVCCRangeKeyIterOptions{ + LowerBound: roachpb.Key("A"), + UpperBound: roachpb.Key("Z"), + }, + nil}, + "zero-length interval": { + MVCCRangeKeyIterOptions{ + LowerBound: roachpb.Key("c"), + UpperBound: roachpb.Key("c"), + }, + nil}, + "end after start": { + MVCCRangeKeyIterOptions{ + LowerBound: roachpb.Key("e"), + UpperBound: roachpb.Key("d"), + }, + nil}, + "min timestamp": { + MVCCRangeKeyIterOptions{ + MinTimestamp: hlc.Timestamp{Logical: 3}, + }, + []MVCCRangeKeyValue{ + rangeKV("b", "c", 3, "bc3"), + rangeKV("d", "f", 5, "df5"), + rangeKV("f", "g", 5, "fg5"), + rangeKV("e", "g", 3, "eg3"), + rangeKV("a", "z", 4, "az4"), + }}, + "max timestamp": { + MVCCRangeKeyIterOptions{ + MaxTimestamp: hlc.Timestamp{Logical: 3}, + }, + []MVCCRangeKeyValue{ + rangeKV("b", "c", 3, "bc3"), + rangeKV("d", "f", 2, "df2"), + rangeKV("e", "g", 3, "eg3"), + }}, + "both timestamps": { + MVCCRangeKeyIterOptions{ + MinTimestamp: hlc.Timestamp{Logical: 3}, + MaxTimestamp: hlc.Timestamp{Logical: 3}, + }, + []MVCCRangeKeyValue{ + rangeKV("b", "c", 3, "bc3"), + rangeKV("e", "g", 3, "eg3"), + }}, + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + opts := tc.opts + if opts.UpperBound == nil { + opts.UpperBound = keys.MaxKey // appease pebbleIterator + } + iter := NewMVCCRangeKeyIterator(eng, opts) + defer iter.Close() + + var rangeKVs []MVCCRangeKeyValue + for { + ok, err := iter.Valid() + require.NoError(t, err) + if !ok { + break + } + rangeKVs = append(rangeKVs, MVCCRangeKeyValue{ + Key: iter.Key(), + Value: iter.Value(), + }) + iter.Next() + } + require.Equal(t, tc.expect, rangeKVs) + }) + } +} + +// TestMVCCRangeKeyIteratorTimestampBounds tests that MVCCRangeKeyIterator +// returns appropriate range bounds, even in corner cases where Pebble may +// return bounds with timestamps (if the versions for a key are split across SST +// boundaries). +func TestMVCCIRangeKeyteratorTimestampBounds(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + eng := NewDefaultInMemForTesting() + defer eng.Close() + db := eng.(*Pebble).db + + // First, just set up some regular old range keys. + require.NoError(t, eng.ExperimentalPutMVCCRangeKey(rangeKey("a", "z", 1), []byte("az1"))) + require.NoError(t, eng.ExperimentalPutMVCCRangeKey(rangeKey("b", "d", 4), []byte("bd4"))) + require.NoError(t, eng.ExperimentalPutMVCCRangeKey(rangeKey("e", "g", 3), []byte("eg3"))) + + // Then, write a range key with suffix bounds. The range key will be + // [b-e)@5=be5, but we'll write it with artificial fragment bounds + // [b-b@5), [b@5-d@2), [d@2-e) + require.NoError(t, db.Experimental().RangeKeySet( // [b-b@5) + EncodeMVCCKey(pointKey("b", 0)), + EncodeMVCCKey(pointKey("b", 5)), + encodeMVCCTimestampSuffix(hlc.Timestamp{Logical: 5}), + []byte("be5"), + nil, + )) + require.NoError(t, db.Experimental().RangeKeySet( // [b@5-d@2) + EncodeMVCCKey(pointKey("b", 5)), + EncodeMVCCKey(pointKey("d", 2)), + encodeMVCCTimestampSuffix(hlc.Timestamp{Logical: 5}), + []byte("be5"), + nil, + )) + require.NoError(t, db.Experimental().RangeKeySet( // [d@2-e) + EncodeMVCCKey(pointKey("d", 2)), + EncodeMVCCKey(pointKey("e", 0)), + encodeMVCCTimestampSuffix(hlc.Timestamp{Logical: 5}), + []byte("be5"), + nil, + )) + + // Scan the fragmented range keys. + iter := NewMVCCRangeKeyIterator(eng, MVCCRangeKeyIterOptions{ + Fragmented: true, + UpperBound: keys.MaxKey, + }) + defer iter.Close() + + var actual []MVCCRangeKeyValue + for { + ok, err := iter.Valid() + require.NoError(t, err) + if !ok { + break + } + actual = append(actual, MVCCRangeKeyValue{Key: iter.Key(), Value: iter.Value()}) + iter.Next() + } + require.Equal(t, []MVCCRangeKeyValue{ + rangeKV("a", "b", 1, "az1"), + rangeKV("b", "b\x00", 5, "be5"), + rangeKV("b", "b\x00", 4, "bd4"), + rangeKV("b", "b\x00", 1, "az1"), + rangeKV("b", "d", 5, "be5"), + rangeKV("b", "d", 4, "bd4"), + rangeKV("b", "d", 1, "az1"), + rangeKV("d", "d\x00", 5, "be5"), + rangeKV("d", "d\x00", 1, "az1"), + rangeKV("d", "e", 5, "be5"), + rangeKV("d", "e", 1, "az1"), + rangeKV("e", "g", 3, "eg3"), + rangeKV("e", "g", 1, "az1"), + rangeKV("g", "z", 1, "az1"), + }, actual) + + // Scan the defragmented range keys. + iter = NewMVCCRangeKeyIterator(eng, MVCCRangeKeyIterOptions{ + UpperBound: keys.MaxKey, + }) + defer iter.Close() + + actual = nil + for { + ok, err := iter.Valid() + require.NoError(t, err) + if !ok { + break + } + actual = append(actual, MVCCRangeKeyValue{Key: iter.Key(), Value: iter.Value()}) + iter.Next() + } + require.Equal(t, []MVCCRangeKeyValue{ + rangeKV("b", "d", 4, "bd4"), + rangeKV("b", "e", 5, "be5"), + rangeKV("e", "g", 3, "eg3"), + rangeKV("a", "z", 1, "az1"), + }, actual) +} + +func rangeKey(start, end string, ts int) MVCCRangeKey { + return MVCCRangeKey{ + StartKey: roachpb.Key(start), + EndKey: roachpb.Key(end), + Timestamp: hlc.Timestamp{Logical: int32(ts)}, + } +} + +func rangeKV(start, end string, ts int, value string) MVCCRangeKeyValue { + return MVCCRangeKeyValue{ + Key: rangeKey(start, end, ts), + Value: []byte(value), + } +} + +func pointKey(key string, ts int) MVCCKey { + return MVCCKey{Key: roachpb.Key(key), Timestamp: hlc.Timestamp{Logical: int32(ts)}} +} diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index 02e0bbdc8cbb..850a976205ea 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -481,6 +481,8 @@ func DefaultPebbleOptions() *pebble.Options { TablePropertyCollectors: PebbleTablePropertyCollectors, BlockPropertyCollectors: PebbleBlockPropertyCollectors, } + // Used for experimental MVCC range tombstones. + opts.Experimental.RangeKeys = new(pebble.RangeKeysArena) // Automatically flush 10s after the first range tombstone is added to a // memtable. This ensures that we can reclaim space even when there's no // activity on the database generating flushes. @@ -1108,6 +1110,31 @@ func (p *Pebble) ClearIterRange(iter MVCCIterator, start, end roachpb.Key) error return batch.Commit(true) } +// ExperimentalClearMVCCRangeKey implements the Engine interface. +func (p *Pebble) ExperimentalClearMVCCRangeKey(rangeKey MVCCRangeKey) error { + if err := rangeKey.Validate(); err != nil { + return err + } + return p.db.Experimental().RangeKeyUnset( + encodeMVCCKeyPrefix(rangeKey.StartKey), + encodeMVCCKeyPrefix(rangeKey.EndKey), + encodeMVCCTimestampSuffix(rangeKey.Timestamp), + pebble.Sync) +} + +// ExperimentalPutMVCCRangeKey implements the Engine interface. +func (p *Pebble) ExperimentalPutMVCCRangeKey(rangeKey MVCCRangeKey, value []byte) error { + if err := rangeKey.Validate(); err != nil { + return err + } + return p.db.Experimental().RangeKeySet( + encodeMVCCKeyPrefix(rangeKey.StartKey), + encodeMVCCKeyPrefix(rangeKey.EndKey), + encodeMVCCTimestampSuffix(rangeKey.Timestamp), + value, + pebble.Sync) +} + // Merge implements the Engine interface. func (p *Pebble) Merge(key MVCCKey, value []byte) error { if len(key.Key) == 0 { @@ -1882,6 +1909,14 @@ func (p *pebbleReadOnly) ClearIterRange(iter MVCCIterator, start, end roachpb.Ke panic("not implemented") } +func (p *pebbleReadOnly) ExperimentalPutMVCCRangeKey(_ MVCCRangeKey, _ []byte) error { + panic("not implemented") +} + +func (p *pebbleReadOnly) ExperimentalClearMVCCRangeKey(_ MVCCRangeKey) error { + panic("not implemented") +} + func (p *pebbleReadOnly) Merge(key MVCCKey, value []byte) error { panic("not implemented") } diff --git a/pkg/storage/pebble_batch.go b/pkg/storage/pebble_batch.go index 117c2995cfa8..cce85cfaecc2 100644 --- a/pkg/storage/pebble_batch.go +++ b/pkg/storage/pebble_batch.go @@ -413,6 +413,31 @@ func (p *pebbleBatch) ClearIterRange(iter MVCCIterator, start, end roachpb.Key) return nil } +// ExperimentalClearMVCCRangeKey implements the Engine interface. +func (p *pebbleBatch) ExperimentalClearMVCCRangeKey(rangeKey MVCCRangeKey) error { + if err := rangeKey.Validate(); err != nil { + return err + } + return p.db.Experimental().RangeKeyUnset( + encodeMVCCKeyPrefix(rangeKey.StartKey), + encodeMVCCKeyPrefix(rangeKey.EndKey), + encodeMVCCTimestampSuffix(rangeKey.Timestamp), + nil) +} + +// ExperimentalPutMVCCRangeKey implements the Batch interface. +func (p *pebbleBatch) ExperimentalPutMVCCRangeKey(rangeKey MVCCRangeKey, value []byte) error { + if err := rangeKey.Validate(); err != nil { + return err + } + return p.db.Experimental().RangeKeySet( + encodeMVCCKeyPrefix(rangeKey.StartKey), + encodeMVCCKeyPrefix(rangeKey.EndKey), + encodeMVCCTimestampSuffix(rangeKey.Timestamp), + value, + nil) +} + // Merge implements the Batch interface. func (p *pebbleBatch) Merge(key MVCCKey, value []byte) error { if len(key.Key) == 0 { diff --git a/pkg/storage/pebble_iterator.go b/pkg/storage/pebble_iterator.go index cc4a5ebe502e..87a24a1c6114 100644 --- a/pkg/storage/pebble_iterator.go +++ b/pkg/storage/pebble_iterator.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" @@ -166,6 +167,8 @@ func (p *pebbleIterator) init(handle pebble.Reader, iterToClone cloneableIter, o panic("min timestamp hint set without max timestamp hint") } + p.options.KeyTypes = opts.KeyTypes + if doClone { var err error if p.iter, err = iterToClone.Clone(); err != nil { @@ -332,6 +335,17 @@ func (p *pebbleIterator) Valid() (bool, error) { // NB: A Pebble Iterator always returns Valid()==false when an error is // present. If Valid() is true, there is no error. if ok := p.iter.Valid(); ok { + // TODO(erikgrinaker): Pebble does not seem to respect bounds for range + // keys, so for now we invalidate the iterator here instead. + if hasPoint, hasRange := p.HasPointAndRange(); !hasPoint && hasRange { + start, end := p.iter.RangeBounds() + if len(p.options.UpperBound) > 0 && bytes.Compare(start, p.options.UpperBound) >= 0 { + return false, nil + } + if len(p.options.LowerBound) > 0 && bytes.Compare(end, p.options.LowerBound) <= 0 { + return false, nil + } + } // The MVCCIterator interface is broken in that it silently discards // the error when UnsafeKey(), Key() are unable to parse the key as // an MVCCKey. This is especially problematic if the caller is @@ -575,6 +589,88 @@ func (p *pebbleIterator) ValueProto(msg protoutil.Message) error { return protoutil.Unmarshal(value, msg) } +// HasPointAndRange implements the MVCCIterator interface. +func (p *pebbleIterator) HasPointAndRange() (bool, bool) { + return p.iter.HasPointAndRange() +} + +// RangeBounds implements the MVCCIterator interface. +func (p *pebbleIterator) RangeBounds() (roachpb.Key, roachpb.Key) { + start, end := p.iter.RangeBounds() + + // TODO(erikgrinaker): Pebble does not yet truncate range keys to the + // LowerBound or UpperBound, so we truncate them here. + if len(p.options.LowerBound) > 0 && bytes.Compare(start, p.options.LowerBound) < 0 { + start = p.options.LowerBound + } + if len(p.options.UpperBound) > 0 && bytes.Compare(end, p.options.UpperBound) > 0 { + end = p.options.UpperBound + } + + // TODO(erikgrinaker): We should surface this error somehow, but for now + // we follow UnsafeKey()'s example and silenty return empty bounds. + startKey, err := DecodeMVCCKey(start) + if err != nil { + return nil, nil + } + endKey, err := DecodeMVCCKey(end) + if err != nil { + return nil, nil + } + + // It's possible for a range key to straddle an SST boundary in between two + // versions/timestamps of a key, e.g. [b-c@5),[c@5-e). In this case, Pebble + // will return bounds with timestamps. Because we don't allow writing range + // keys with timestamp bounds, we know that all timestamps of the key (on both + // sides of the boundary) already have the complete set of range key values at + // all timestamps. + // + // We choose the simplest solution here, which is to allow overlapping range + // bounds such that at c@6 we claim [b-c\0) and at c@5 we claim [c-e). It + // would be possible to guarantee non-overlapping bounds with additional state + // tracking and seeks, but that appears non-trivial for every edge case and + // has a risk of leaving gaps, which seems more scary than dealing with + // overlaps. + // + // TODO(erikgrinaker): Consider coming up with a solution to guarantee + // non-overlapping bounds. + if !startKey.Timestamp.IsEmpty() { + startKey.Timestamp = hlc.Timestamp{} + } + if !endKey.Timestamp.IsEmpty() { + endKey.Key = endKey.Key.Next() + endKey.Timestamp = hlc.Timestamp{} + } + return startKey.Key, endKey.Key +} + +// RangeKeys implements the MVCCIterator interface. +// +// TODO(erikgrinaker): Add unit tests for the range key methods. +func (p *pebbleIterator) RangeKeys() []MVCCRangeKeyValue { + startKey, endKey := p.RangeBounds() + rangeKeys := p.iter.RangeKeys() + rangeValues := make([]MVCCRangeKeyValue, 0, len(rangeKeys)) + + for _, rangeKey := range rangeKeys { + timestamp, err := decodeMVCCTimestampSuffix(rangeKey.Suffix) + if err != nil { + // TODO(erikgrinaker): We should surface this error somehow, but for now + // we follow UnsafeKey()'s example and silenty return empty bounds. + continue + } + rangeValues = append(rangeValues, MVCCRangeKeyValue{ + Key: MVCCRangeKey{ + StartKey: startKey, + EndKey: endKey, + Timestamp: timestamp, + }, + Value: rangeKey.Value, + }) + } + return rangeValues +} + // ComputeStats implements the MVCCIterator interface. func (p *pebbleIterator) ComputeStats( start, end roachpb.Key, nowNanos int64, diff --git a/pkg/storage/sst_iterator.go b/pkg/storage/sst_iterator.go index 9bd8b49b1b39..c5c81b756c37 100644 --- a/pkg/storage/sst_iterator.go +++ b/pkg/storage/sst_iterator.go @@ -158,3 +158,18 @@ func (r *sstIterator) UnsafeKey() MVCCKey { func (r *sstIterator) UnsafeValue() []byte { return r.value } + +// HasPointAndRange implements SimpleMVCCIterator. +func (r *sstIterator) HasPointAndRange() (bool, bool) { + panic("not implemented") +} + +// RangeBounds implements SimpleMVCCIterator. +func (r *sstIterator) RangeBounds() (roachpb.Key, roachpb.Key) { + panic("not implemented") +} + +// RangeKeys implements SimpleMVCCIterator. +func (r *sstIterator) RangeKeys() []MVCCRangeKeyValue { + panic("not implemented") +} diff --git a/pkg/storage/sst_writer.go b/pkg/storage/sst_writer.go index 9b58c8afebc1..3643360bf4c3 100644 --- a/pkg/storage/sst_writer.go +++ b/pkg/storage/sst_writer.go @@ -118,6 +118,16 @@ func (fw *SSTWriter) ClearMVCCRange(start, end MVCCKey) error { return fw.clearRange(start, end) } +// ExperimentalPutMVCCRangeKey implements the Writer interface. +func (fw *SSTWriter) ExperimentalPutMVCCRangeKey(rangeKey MVCCRangeKey, value []byte) error { + panic("not implemented") +} + +// ExperimentalClearMVCCRangeKey implements the Writer interface. +func (fw *SSTWriter) ExperimentalClearMVCCRangeKey(rangeKey MVCCRangeKey) error { + panic("not implemented") +} + func (fw *SSTWriter) clearRange(start, end MVCCKey) error { if fw.fw == nil { return errors.New("cannot call ClearRange on a closed writer") diff --git a/pkg/storage/testdata/mvcc_histories/delete_range_tombstone b/pkg/storage/testdata/mvcc_histories/delete_range_tombstone new file mode 100644 index 000000000000..a69416ee7163 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/delete_range_tombstone @@ -0,0 +1,111 @@ +# TODO(erikgrinaker): The MVCC API does not respect range tombstones yet, so +# we don't test point keys because they remain unaffected. +# TODO(erikgrinaker): This needs conflict tests, implement later. + +# Write some range tombstones. Some will abut and merge. +run ok +del_range_ts k=b end=c ts=3 +del_range_ts k=e end=g ts=3 +del_range_ts k=d end=f ts=5 +del_range_ts k=d end=f ts=2 +del_range_ts k=m end=z ts=1 +del_range_ts k=a end=m ts=4 +del_range_ts k=m end=z ts=4 +---- +del_range_ts: {b-c}/3.000000000,0 +del_range_ts: {e-g}/3.000000000,0 +del_range_ts: {d-f}/5.000000000,0 +del_range_ts: {d-f}/2.000000000,0 +del_range_ts: {m-z}/1.000000000,0 +del_range_ts: {a-m}/4.000000000,0 +del_range_ts: {m-z}/4.000000000,0 +>> at end: +range key: {b-c}/3.000000000,0 -> [] +range key: {d-f}/5.000000000,0 -> [] +range key: {d-f}/2.000000000,0 -> [] +range key: {e-g}/3.000000000,0 -> [] +range key: {a-z}/4.000000000,0 -> [] +range key: {m-z}/1.000000000,0 -> [] + +# Scan all tombstones. +run ok +scan_range_ts k=a end=z +---- +scan_range_ts: {b-c}/3.000000000,0 +scan_range_ts: {d-f}/5.000000000,0 +scan_range_ts: {d-f}/2.000000000,0 +scan_range_ts: {e-g}/3.000000000,0 +scan_range_ts: {a-z}/4.000000000,0 +scan_range_ts: {m-z}/1.000000000,0 + +# Scan truncates tombstones to scan bounds. +run ok +scan_range_ts k=c end=e +---- +scan_range_ts: {d-e}/5.000000000,0 +scan_range_ts: {c-e}/4.000000000,0 +scan_range_ts: {d-e}/2.000000000,0 + +# Scan truncates tombstones to scan bounds when not on tombstone bounds. +run ok +scan_range_ts k=ccc end=eee +---- +scan_range_ts: {d-eee}/5.000000000,0 +scan_range_ts: {ccc-eee}/4.000000000,0 +scan_range_ts: e{-ee}/3.000000000,0 +scan_range_ts: {d-eee}/2.000000000,0 + +# Scan at a timestamp. +run ok +scan_range_ts k=a end=z ts=3 +---- +scan_range_ts: {b-c}/3.000000000,0 +scan_range_ts: {d-f}/2.000000000,0 +scan_range_ts: {e-g}/3.000000000,0 +scan_range_ts: {m-z}/1.000000000,0 + +# Empty scans. +run ok +scan_range_ts k=A end=Z +scan_range_ts k=c end=c +scan_range_ts k=z end=a +---- +scan_range_ts: "A"-"Z" -> +scan_range_ts: "c"-"c" -> +scan_range_ts: "z"-"a" -> + +# Remove some tombstones, both a non-existant one and a span across two +# tombstones. +run ok +clear_range_key k=a end=z ts=10 +clear_range_key k=b end=g ts=3 +---- +>> at end: +range key: {d-f}/5.000000000,0 -> [] +range key: {d-f}/2.000000000,0 -> [] +range key: {a-z}/4.000000000,0 -> [] +range key: {m-z}/1.000000000,0 -> [] + +# Remove the middle section of the [a-z)@4 tombstone. Do it twice, to +# make sure clears are idempotent. +run ok +clear_range_key k=k end=n ts=4 +clear_range_key k=k end=n ts=4 +---- +>> at end: +range key: {d-f}/5.000000000,0 -> [] +range key: {d-f}/2.000000000,0 -> [] +range key: {a-k}/4.000000000,0 -> [] +range key: {n-z}/4.000000000,0 -> [] +range key: {m-z}/1.000000000,0 -> [] + +# Remove portions of the [a-k)@4 and [n-z)@4 tombstones in one operation. +run ok +clear_range_key k=eee end=ttt ts=4 +---- +>> at end: +range key: {a-eee}/4.000000000,0 -> [] +range key: {d-f}/5.000000000,0 -> [] +range key: {d-f}/2.000000000,0 -> [] +range key: {ttt-z}/4.000000000,0 -> [] +range key: {m-z}/1.000000000,0 -> [] diff --git a/pkg/util/hlc/timestamp.go b/pkg/util/hlc/timestamp.go index 0263ac1d3946..b84b39207fa6 100644 --- a/pkg/util/hlc/timestamp.go +++ b/pkg/util/hlc/timestamp.go @@ -54,6 +54,22 @@ func (t Timestamp) LessEq(s Timestamp) bool { return t.WallTime < s.WallTime || (t.WallTime == s.WallTime && t.Logical <= s.Logical) } +// Compare returns -1 if this timestamp is lesser than the given timestamp, 1 if +// it is greater, and 0 if they are equal. +func (t Timestamp) Compare(s Timestamp) int { + if t.WallTime > s.WallTime { + return 1 + } else if t.WallTime < s.WallTime { + return -1 + } else if t.Logical > s.Logical { + return 1 + } else if t.Logical < s.Logical { + return -1 + } else { + return 0 + } +} + // String implements the fmt.Stringer interface. func (t Timestamp) String() string { // The following code was originally written as diff --git a/pkg/util/hlc/timestamp_test.go b/pkg/util/hlc/timestamp_test.go index 59e45dde2a87..9ba6ffe81764 100644 --- a/pkg/util/hlc/timestamp_test.go +++ b/pkg/util/hlc/timestamp_test.go @@ -15,6 +15,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func makeTS(walltime int64, logical int32) Timestamp { @@ -85,6 +86,38 @@ func TestLessEq(t *testing.T) { } } +func TestCompare(t *testing.T) { + w0l0 := Timestamp{} + w1l1 := Timestamp{WallTime: 1, Logical: 1} + w1l2 := Timestamp{WallTime: 1, Logical: 2} + w2l1 := Timestamp{WallTime: 2, Logical: 1} + w2l2 := Timestamp{WallTime: 2, Logical: 2} + + testcases := map[string]struct { + a Timestamp + b Timestamp + expect int + }{ + "empty eq empty": {w0l0, w0l0, 0}, + "empty lt set": {w0l0, w1l1, -1}, + "set gt empty": {w1l1, w0l0, 1}, + "set eq set": {w1l1, w1l1, 0}, + + "wall lt": {w1l1, w2l1, -1}, + "wall gt": {w2l1, w1l1, 1}, + "logical lt": {w1l1, w1l2, -1}, + "logical gt": {w1l2, w1l1, 1}, + "both lt": {w1l1, w2l2, -1}, + "both gt": {w2l2, w1l1, 1}, + "wall precedence": {w2l1, w1l2, 1}, + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + require.Equal(t, tc.expect, tc.a.Compare(tc.b)) + }) + } +} + func TestIsEmpty(t *testing.T) { a := makeTS(0, 0) assert.True(t, a.IsEmpty())