Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
86565: roachpb: add gc hint replica state field and let delete range set it r=tbg,erikgrinaker a=aliher1911

This commit adds a test that peforms range split and merge during
cluster upgrade. This is covering test cases where raft state gets
additional information in new versions and we must ensure that
replicas don't diverge in the process of upgrade.

Release justification: Commit adds extra tests only.
Release note: None

----

When cmd_delete_range deletes the whole range it can give a GC hint so
that mvcc gc could optimize large number ranges that would be collected
using gc clear range requests.
Processing those requests together in a quick succession would reduce
compaction load on pebble.

Release justification: This change is safe as it adds handling for the new
type of request that doesn't interact with any existing functionality.
Release note: None

----

This commit adds a version gate GCHintInReplicaState for GC Hint. This
is needed to avoid divergence of replicas when older followers don't yet 
update state with a new field value.

Release justification: This commit adds a version gate for a new feature
Release note: None

----

This commit adds a gc hint field to replica state that is backed by a
replicated range local key. This field could be set explicitly by client
to indicate that all data in the range is ripe for deletion at hint
timestamp. MVCC GC could use this information to optimize clear range
requests.

Release justification: this commit is safe because it adds a new request
type and a new status field that doesn't interfere with existing
functionality.
Release note: None

Co-authored-by: Oleg Afanasyev <[email protected]>
  • Loading branch information
craig[bot] and aliher1911 committed Aug 26, 2022
2 parents 82e2a30 + b87e290 commit 7507e53
Show file tree
Hide file tree
Showing 28 changed files with 525 additions and 24 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -294,4 +294,4 @@ trace.jaeger.agent string the address of a Jaeger agent to receive traces using
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.
trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https://<ui>/#/debug/tracez
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.
version version 1000022.1-62 set the active cluster version in the format '<major>.<minor>'
version version 1000022.1-64 set the active cluster version in the format '<major>.<minor>'
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,6 @@
<tr><td><code>trace.opentelemetry.collector</code></td><td>string</td><td><code></code></td><td>address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.</td></tr>
<tr><td><code>trace.span_registry.enabled</code></td><td>boolean</td><td><code>true</code></td><td>if set, ongoing traces can be seen at https://<ui>/#/debug/tracez</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>1000022.1-62</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>1000022.1-64</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
8 changes: 8 additions & 0 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ const (
// schema changes to complete. After this point, no non-MVCC
// AddSSTable calls will be used outside of tenant streaming.
NoNonMVCCAddSSTable
// GCHintInReplicaState adds GC hint to replica state. When this version is
// enabled, replicas will populate GC hint and update them when necessary.
GCHintInReplicaState

// *************************************************
// Step (1): Add new versions here.
Expand Down Expand Up @@ -477,6 +480,11 @@ var rawVersionsSingleton = keyedVersions{
Key: NoNonMVCCAddSSTable,
Version: roachpb.Version{Major: 22, Minor: 1, Internal: 62},
},
{
Key: GCHintInReplicaState,
Version: roachpb.Version{Major: 22, Minor: 1, Internal: 64},
},

// *************************************************
// Step (2): Add new versions here.
// Do not add new versions to a patch release.
Expand Down
5 changes: 3 additions & 2 deletions pkg/clusterversion/key_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions pkg/cmd/roachtest/tests/acceptance.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ func registerAcceptance(r registry.Registry) {
registry.OwnerTestEng: {
{
name: "version-upgrade",
fn: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runVersionUpgrade(ctx, t, c)
},
fn: runVersionUpgrade,
// This test doesn't like running on old versions because it upgrades to
// the latest released version and then it tries to "head", where head is
// the cockroach binary built from the branch on which the test is
Expand Down
10 changes: 9 additions & 1 deletion pkg/cmd/roachtest/tests/versionupgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ CREATE DATABASE IF NOT EXISTS test;
CREATE TABLE test.t (x INT AS (3) STORED);
DROP TABLE test.t;
`),
stmtFeatureTest("Split and Merge Ranges", v202, `
create database if not EXISTS splitmerge;
create table splitmerge.t (k int primary key);
alter table splitmerge.t split at values (1), (2), (3);
alter table splitmerge.t unsplit at values (1), (2), (3);
drop table splitmerge.t;
`),
}

func runVersionUpgrade(ctx context.Context, t test.Test, c cluster.Cluster) {
Expand Down Expand Up @@ -138,8 +145,9 @@ func runVersionUpgrade(ctx context.Context, t test.Test, c cluster.Cluster) {
// and finalizing on the auto-upgrade path.
preventAutoUpgradeStep(1),
// Roll nodes forward.
binaryUpgradeStep(c.All(), ""),
binaryUpgradeStep(c.Node(1), ""),
testFeaturesStep,
binaryUpgradeStep(c.Range(2, c.Spec().NodeCount), ""),
// Run a quick schemachange workload in between each upgrade.
// The maxOps is 10 to keep the test runtime under 1-2 minutes.
// schemaChangeStep,
Expand Down
2 changes: 2 additions & 0 deletions pkg/keys/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ var (
// LocalRaftTruncatedStateSuffix for the corresponding unreplicated
// RaftTruncatedState.
_ = []byte("rftt")
// LocalRangeGCHintSuffix is the suffix for the GC hint struct.
LocalRangeGCHintSuffix = []byte("rgch")
// LocalRangeLeaseSuffix is the suffix for a range lease.
LocalRangeLeaseSuffix = []byte("rll-")
// LocalRangePriorReadSummarySuffix is the suffix for a range's prior read
Expand Down
11 changes: 11 additions & 0 deletions pkg/keys/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,12 @@ func RangeGCThresholdKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).RangeGCThresholdKey()
}

// RangeGCHintKey returns a system-local key for GC hint data. This data is used
// by GC queue to adjust how replicas are being queued for GC.
func RangeGCHintKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).RangeGCHintKey()
}

// MVCCRangeKeyGCKey returns a range local key protecting range
// tombstone mvcc stats calculations during range tombstone GC.
func MVCCRangeKeyGCKey(rangeID roachpb.RangeID) roachpb.Key {
Expand Down Expand Up @@ -1020,6 +1026,11 @@ func (b RangeIDPrefixBuf) RangeGCThresholdKey() roachpb.Key {
return append(b.replicatedPrefix(), LocalRangeGCThresholdSuffix...)
}

// RangeGCHintKey returns a range-local key for the GC hint data.
func (b RangeIDPrefixBuf) RangeGCHintKey() roachpb.Key {
return append(b.replicatedPrefix(), LocalRangeGCHintSuffix...)
}

// RangeVersionKey returns a system-local key for the range version.
func (b RangeIDPrefixBuf) RangeVersionKey() roachpb.Key {
return append(b.replicatedPrefix(), LocalRangeVersionSuffix...)
Expand Down
2 changes: 2 additions & 0 deletions pkg/kv/kvserver/batcheval/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ go_library(
deps = [
"//pkg/cloud",
"//pkg/cloud/cloudpb",
"//pkg/clusterversion",
"//pkg/keys",
"//pkg/kv/kvserver/abortspan",
"//pkg/kv/kvserver/batcheval/result",
Expand Down Expand Up @@ -103,6 +104,7 @@ go_test(
srcs = [
"cmd_add_sstable_test.go",
"cmd_clear_range_test.go",
"cmd_delete_range_gchint_test.go",
"cmd_delete_range_test.go",
"cmd_end_transaction_test.go",
"cmd_export_test.go",
Expand Down
58 changes: 55 additions & 3 deletions pkg/kv/kvserver/batcheval/cmd_delete_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ import (
"context"
"time"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
Expand Down Expand Up @@ -65,6 +67,13 @@ func declareKeysDeleteRange(
latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{
Key: keys.MVCCRangeKeyGCKey(rs.GetRangeID()),
})

if args.UpdateRangeDeleteGCHint {
// If we are updating GC hint, add it to the latch span.
latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{
Key: keys.RangeGCHintKey(rs.GetRangeID()),
})
}
}
}

Expand All @@ -86,6 +95,13 @@ func DeleteRange(
"UseRangeTombstones must be passed with predicate based Delete Range")
}

if args.UpdateRangeDeleteGCHint && !args.UseRangeTombstone {
// Check for prerequisite for gc hint. If it doesn't hold, this is incorrect
// usage of hint.
return result.Result{}, errors.AssertionFailedf(
"GCRangeHint must only be used together with UseRangeTombstone")
}

// Use MVCC range tombstone if requested.
if args.UseRangeTombstone {
if cArgs.Header.Txn != nil {
Expand All @@ -98,7 +114,39 @@ func DeleteRange(
return result.Result{}, errors.AssertionFailedf(
"ReturnKeys can't be used with range tombstones")
}

desc := cArgs.EvalCtx.Desc()

maybeUpdateGCHint := func(res *result.Result) error {
if !args.UpdateRangeDeleteGCHint {
return nil
}
// If GCHint was provided, then we need to check if this request meets
// range gc criteria of removing all data. This is not an error as range
// might have merged since request was sent and we don't want to fail
// deletion.
if !args.Key.Equal(desc.StartKey.AsRawKey()) || !args.EndKey.Equal(desc.EndKey.AsRawKey()) {
return nil
}
sl := MakeStateLoader(cArgs.EvalCtx)
hint, err := sl.LoadGCHint(ctx, readWriter)
if err != nil {
return err
}
if !hint.ForwardLatestRangeDeleteTimestamp(h.Timestamp) {
return nil
}
canUseGCHint := cArgs.EvalCtx.ClusterSettings().Version.IsActive(ctx,
clusterversion.GCHintInReplicaState)
if updated, err := sl.SetGCHint(ctx, readWriter, cArgs.Stats, hint, canUseGCHint); err != nil || !updated {
return err
}
res.Replicated.State = &kvserverpb.ReplicaState{
GCHint: hint,
}
return nil
}

leftPeekBound, rightPeekBound := rangeTombstonePeekBounds(
args.Key, args.EndKey, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey())
maxIntents := storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV)
Expand All @@ -114,10 +162,14 @@ func DeleteRange(
s := cArgs.EvalCtx.GetMVCCStats()
statsCovered = &s
}
err := storage.MVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats,
if err := storage.MVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats,
args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound,
args.IdempotentTombstone, maxIntents, statsCovered)
return result.Result{}, err
args.IdempotentTombstone, maxIntents, statsCovered); err != nil {
return result.Result{}, err
}
var res result.Result
err := maybeUpdateGCHint(&res)
return res, err
}

if h.MaxSpanRequestKeys == 0 {
Expand Down
81 changes: 81 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_delete_range_gchint_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package batcheval_test

import (
"context"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/stretchr/testify/require"
)

func TestDeleteRangeTombstoneSetsGCHint(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()
serv, _, _ := serverutils.StartServer(t, base.TestServerArgs{
Knobs: base.TestingKnobs{
Store: &kvserver.StoreTestingKnobs{
DisableMergeQueue: true,
DisableSplitQueue: true,
},
},
})
s := serv.(*server.TestServer)
defer s.Stopper().Stop(ctx)

store, err := s.Stores().GetStore(s.GetFirstStoreID())
require.NoError(t, err)

key := roachpb.Key("b")
content := []byte("test")

repl := store.LookupReplica(roachpb.RKey(key))
gcHint := repl.GetGCHint()
require.True(t, gcHint.LatestRangeDeleteTimestamp.IsEmpty(), "gc hint should be empty by default")

pArgs := &roachpb.PutRequest{
RequestHeader: roachpb.RequestHeader{
Key: key,
},
Value: roachpb.MakeValueFromBytes(content),
}
if _, pErr := kv.SendWrapped(ctx, s.DistSender(), pArgs); pErr != nil {
t.Fatal(pErr)
}

r, err := s.LookupRange(key)
require.NoError(t, err, "failed to lookup range")

drArgs := &roachpb.DeleteRangeRequest{
UpdateRangeDeleteGCHint: true,
UseRangeTombstone: true,
RequestHeader: roachpb.RequestHeader{
Key: r.StartKey.AsRawKey(),
EndKey: r.EndKey.AsRawKey(),
},
}
if _, pErr := kv.SendWrapped(ctx, s.DistSender(), drArgs); pErr != nil {
t.Fatal(pErr)
}

gcHint = repl.GetGCHint()
require.True(t, !gcHint.LatestRangeDeleteTimestamp.IsEmpty(), "gc hint was not set by delete range")
}
44 changes: 43 additions & 1 deletion pkg/kv/kvserver/batcheval/cmd_end_transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,13 @@ func declareKeysEndTxn(
latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{
Key: keys.RangePriorReadSummaryKey(mt.LeftDesc.RangeID),
})
// Merge will update GC hint if set, so we need to get a write latch
// on the left side and we already have a read latch on RHS for
// replicated keys.
latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{
Key: keys.RangeGCHintKey(mt.LeftDesc.RangeID),
})

// Merges need to adjust MVCC stats for merged MVCC range tombstones
// that straddle the ranges, by peeking to the left and right of the RHS
// start key. Since Prevish() is imprecise, we must also ensure we don't
Expand Down Expand Up @@ -1075,6 +1082,13 @@ func splitTriggerHelper(
if gcThreshold.IsEmpty() {
log.VEventf(ctx, 1, "LHS's GCThreshold of split is not set")
}
gcHint := &roachpb.GCHint{}
if split.WriteGCHint {
gcHint, err = sl.LoadGCHint(ctx, batch)
if err != nil {
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to load GCHint")
}
}

// Writing the initial state is subtle since this also seeds the Raft
// group. It becomes more subtle due to proposer-evaluated Raft.
Expand Down Expand Up @@ -1125,7 +1139,8 @@ func splitTriggerHelper(
}
*h.AbsPostSplitRight(), err = stateloader.WriteInitialReplicaState(
ctx, batch, *h.AbsPostSplitRight(), split.RightDesc, rightLease,
*gcThreshold, replicaVersion, writeRaftAppliedIndexTerm,
*gcThreshold, *gcHint, replicaVersion, writeRaftAppliedIndexTerm,
split.WriteGCHint,
)
if err != nil {
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to write initial Replica state")
Expand Down Expand Up @@ -1228,6 +1243,33 @@ func mergeTrigger(
pd.Replicated.Merge = &kvserverpb.Merge{
MergeTrigger: *merge,
}

{
// If we have GC hints populated that means we are trying to perform
// optimized garbage removal in future.
// We will try to merge both hints if possible and set new hint on LHS.
lhsLoader := MakeStateLoader(rec)
lhsHint, err := lhsLoader.LoadGCHint(ctx, batch)
if err != nil {
return result.Result{}, err
}
rhsLoader := stateloader.Make(merge.RightDesc.RangeID)
rhsHint, err := rhsLoader.LoadGCHint(ctx, batch)
if err != nil {
return result.Result{}, err
}
if lhsHint.Merge(rhsHint) {
updated, err := lhsLoader.SetGCHint(ctx, batch, ms, lhsHint, merge.WriteGCHint)
if err != nil {
return result.Result{}, err
}
if updated {
pd.Replicated.State = &kvserverpb.ReplicaState{
GCHint: lhsHint,
}
}
}
}
return pd, nil
}

Expand Down
Loading

0 comments on commit 7507e53

Please sign in to comment.