Skip to content

Commit

Permalink
migration,keys: fix incorrect range descriptor iteration
Browse files Browse the repository at this point in the history
The long-running migrations infrastructure exposes an
`IterateRangeDescriptors` interface to paginate through all the ranges
in the system. It was previously doing this (erroneously) by scanning
through the meta2 range. It was thus possible to entirely miss
descriptors for the meta ranges themselves. This could only happen for
large enough clusters with splits in the meta range. This issue was
caught during the careful review of #66445.

We remedy the situation by scanning over the entire `[MetaMin, MetaMax)`
span instead. One thing to take care of is the possibility of finding
the same range descriptor in both the `meta1` and `meta2` range. This is
possible when the first range in the system includes part of the user
keyspace (typically `[/Min, /System/NodeLiveness)`). We de-dup these
descriptors away by maintaining an in-memory map of the range IDs seen
in meta1.

---

What does this mean for existing long running migrations? There was only
one that made use of this descriptor iteration API: the truncated state
migration. Fortunately that migration was a below-raft one, so as part
of it we purged all extant ranges that hadn't seen the corresponding
`Migrate` command (see `postTruncatedStateMigration` and
`PurgeOutdatedReplicas`; the purge attempt reaches out to every store
and ensures every replica has been migrated, and if not, has been GC-ed).

If we had found unmigrated ranges (possible for the meta ranges as
described above), the migration would be blocked. We haven't seen this
happen with users running 21.1, which makes sense since this bug is only
possible in clusters with enough ranges to necessitate a split in the
meta range (very unlikely with our large default range size of 512 MB).

Still, we'll backport this PR to 21.1. Should users run into this
stalled migration, we'll recommend upgrading to whatever patch release
this will be part of.

Release note: None
Release justification: bug fixes and low-risk updates to new functionality
  • Loading branch information
irfansharif committed Jul 12, 2021
1 parent 64e834c commit 8885e3c
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 49 deletions.
5 changes: 5 additions & 0 deletions pkg/keys/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,11 @@ func UserKey(key roachpb.RKey) roachpb.RKey {
return buf
}

// InMeta1 returns true iff a key is in the meta1 range (which includes RKeyMin).
func InMeta1(k roachpb.RKey) bool {
return k.Equal(roachpb.RKeyMin) || bytes.HasPrefix(k, MustAddr(Meta1Prefix))
}

// validateRangeMetaKey validates that the given key is a valid Range Metadata
// key. This checks only the constraints common to forward and backwards scans:
// correct prefix and not exceeding KeyMax.
Expand Down
1 change: 1 addition & 0 deletions pkg/migration/migrationcluster/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ go_test(
],
embed = [":migrationcluster"],
deps = [
"//pkg/keys",
"//pkg/kv/kvserver",
"//pkg/migration/nodelivenesstest",
"//pkg/roachpb",
Expand Down
77 changes: 48 additions & 29 deletions pkg/migration/migrationcluster/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ package migrationcluster_test

import (
"context"
"fmt"
"testing"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/migration/migrationcluster"
"github.com/cockroachdb/cockroach/pkg/migration/nodelivenesstest"
Expand All @@ -23,43 +25,60 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
)

func TestCluster_IterateRangeDescriptors(t *testing.T) {
func TestClusterIterateRangeDescriptors(t *testing.T) {
defer leaktest.AfterTest(t)()

ctx := context.Background()
const numNodes = 1

params, _ := tests.CreateTestServerParams()
server, _, kvDB := serverutils.StartServer(t, params)
defer server.Stopper().Stop(context.Background())
for _, splits := range [][]roachpb.Key{
{}, // no splits
{keys.Meta2Prefix}, // split between meta1 and meta2
{keys.SystemPrefix}, // split after the meta range
{keys.Meta2Prefix, keys.SystemPrefix}, // split before and after meta2
{keys.RangeMetaKey(roachpb.RKey("middle")).AsRawKey()}, // split within meta2
{keys.Meta2Prefix, keys.RangeMetaKey(roachpb.RKey("middle")).AsRawKey()}, // split at start of and within meta2
} {
t.Run(fmt.Sprintf("with-splits-at=%s", splits), func(t *testing.T) {
params, _ := tests.CreateTestServerParams()
server, _, kvDB := serverutils.StartServer(t, params)
defer server.Stopper().Stop(context.Background())

var numRanges int
if err := server.GetStores().(*kvserver.Stores).VisitStores(func(s *kvserver.Store) error {
numRanges = s.ReplicaCount()
return nil
}); err != nil {
t.Fatal(err)
}
for _, split := range splits {
if _, _, err := server.SplitRange(split); err != nil {
t.Fatal(err)
}
}

var numRanges int
if err := server.GetStores().(*kvserver.Stores).VisitStores(func(s *kvserver.Store) error {
numRanges = s.ReplicaCount()
return nil
}); err != nil {
t.Fatal(err)
}

c := nodelivenesstest.New(numNodes)
h := migrationcluster.New(migrationcluster.ClusterConfig{
NodeLiveness: c,
Dialer: migrationcluster.NoopDialer{},
DB: kvDB,
})
c := nodelivenesstest.New(numNodes)
h := migrationcluster.New(migrationcluster.ClusterConfig{
NodeLiveness: c,
Dialer: migrationcluster.NoopDialer{},
DB: kvDB,
})

for _, blockSize := range []int{1, 5, 10, 50} {
var numDescs int
init := func() { numDescs = 0 }
if err := h.IterateRangeDescriptors(ctx, blockSize, init, func(descriptors ...roachpb.RangeDescriptor) error {
numDescs += len(descriptors)
return nil
}); err != nil {
t.Fatal(err)
}
for _, blockSize := range []int{1, 5, 10, 50} {
var numDescs int
init := func() { numDescs = 0 }
if err := h.IterateRangeDescriptors(ctx, blockSize, init, func(descriptors ...roachpb.RangeDescriptor) error {
numDescs += len(descriptors)
return nil
}); err != nil {
t.Fatal(err)
}

if numDescs != numRanges {
t.Fatalf("expected to find %d ranges, found %d", numRanges+1, numDescs)
}
if numDescs != numRanges {
t.Fatalf("expected to find %d ranges, found %d", numRanges, numDescs)
}
}
})
}
}
55 changes: 35 additions & 20 deletions pkg/migration/migrationcluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,37 +134,52 @@ func (c *Cluster) ForEveryNode(
func (c *Cluster) IterateRangeDescriptors(
ctx context.Context, blockSize int, init func(), fn func(...roachpb.RangeDescriptor) error,
) error {
if err := c.DB().Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
return c.DB().Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
// Inform the caller that we're starting a fresh attempt to page in
// range descriptors.
init()

// Iterate through meta2 to pull out all the range descriptors.
return txn.Iterate(ctx, keys.Meta2Prefix, keys.MetaMax, blockSize,
// Iterate through meta{1,2} to pull out all the range descriptors.
var lastRangeIDInMeta1 roachpb.RangeID
return txn.Iterate(ctx, keys.MetaMin, keys.MetaMax, blockSize,
func(rows []kv.KeyValue) error {
descriptors := make([]roachpb.RangeDescriptor, len(rows))
for i, row := range rows {
if err := row.ValueProto(&descriptors[i]); err != nil {
return errors.Wrapf(err,
"unable to unmarshal range descriptor from %s",
row.Key,
)
descriptors := make([]roachpb.RangeDescriptor, 0, len(rows))
var desc roachpb.RangeDescriptor
for _, row := range rows {
err := row.ValueProto(&desc)
if err != nil {
return errors.Wrapf(err, "unable to unmarshal range descriptor from %s", row.Key)
}

// In small enough clusters it's possible for the same range
// descriptor to be stored in both meta1 and meta2. This
// happens when some range spans both the meta and the user
// keyspace. Consider when r1 is [/Min,
// /System/NodeLiveness); we'll store the range descriptor
// in both /Meta2/<r1.EndKey> and in /Meta1/KeyMax[1].
//
// As part of iterator we'll de-duplicate this descriptor
// away by checking whether we've seen it before in meta1.
// Since we're scanning over the meta range in sorted
// order, it's enough to check against the last range
// descriptor we've seen in meta1.
//
// [1]: See kvserver.rangeAddressing.
if desc.RangeID == lastRangeIDInMeta1 {
continue
}

descriptors = append(descriptors, desc)
if keys.InMeta1(keys.RangeMetaKey(desc.StartKey)) {
lastRangeIDInMeta1 = desc.RangeID
}
}

// Invoke fn with the current chunk (of size ~blockSize) of
// range descriptors.
if err := fn(descriptors...); err != nil {
return err
}

return nil
return fn(descriptors...)
})
}); err != nil {
return err
}

return nil
})
}

// DB provides exposes the underlying *kv.DB instance.
Expand Down

0 comments on commit 8885e3c

Please sign in to comment.