Merge pull request cockroachdb#14654 from petermattis/pmattis/avoid-u…

…nsplittable-ranges storage: avoid unsplittable range
andreimatei · Apr 6, 2017 · fe5c546 · fe5c546
2 parents c8e9c26 + dd5797e
commit fe5c546
Show file tree

Hide file tree

Showing 6 changed files with 144 additions and 62 deletions.
diff --git a/pkg/storage/client_split_test.go b/pkg/storage/client_split_test.go
@@ -1954,3 +1954,67 @@ func TestDistributedTxnCleanup(t *testing.T) {
 		}
 	}
 }
+
+func TestUnsplittableRange(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+
+	stopper := stop.NewStopper()
+	defer stopper.Stop()
+
+	store := createTestStoreWithConfig(t, stopper, storage.TestStoreConfig(nil))
+	store.ForceSplitScanAndProcess()
+
+	// Add a single large row to /Table/14.
+	tableKey := keys.MakeTablePrefix(keys.UITableID)
+	row1Key := roachpb.Key(encoding.EncodeVarintAscending(append([]byte(nil), tableKey...), 1))
+	col1Key := keys.MakeFamilyKey(append([]byte(nil), row1Key...), 0)
+	value := bytes.Repeat([]byte("x"), 64<<10)
+	if err := store.DB().Put(context.Background(), col1Key, value); err != nil {
+		t.Fatal(err)
+	}
+
+	store.ForceSplitScanAndProcess()
+	testutils.SucceedsSoon(t, func() error {
+		repl := store.LookupReplica(tableKey, nil)
+		if repl.Desc().StartKey.Equal(tableKey) {
+			return nil
+		}
+		return errors.Errorf("waiting for split: %s", repl)
+	})
+
+	repl := store.LookupReplica(tableKey, nil)
+	origMaxBytes := repl.GetMaxBytes()
+	repl.SetMaxBytes(int64(len(value)))
+
+	// Wait for an attempt to split the range which will fail because it contains
+	// a single large value. The max-bytes for the range will be changed, but it
+	// should not have been reset to its original value.
+	store.ForceSplitScanAndProcess()
+	testutils.SucceedsSoon(t, func() error {
+		maxBytes := repl.GetMaxBytes()
+		if maxBytes != int64(len(value)) && maxBytes < origMaxBytes {
+			return nil
+		}
+		return errors.Errorf("expected max-bytes to be changed: %d", repl.GetMaxBytes())
+	})
+
+	// Add two more rows to the range.
+	for i := int64(2); i < 4; i++ {
+		rowKey := roachpb.Key(encoding.EncodeVarintAscending(append([]byte(nil), tableKey...), i))
+		colKey := keys.MakeFamilyKey(append([]byte(nil), rowKey...), 0)
+		if err := store.DB().Put(context.Background(), colKey, value); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Wait for the range to be split and verify that max-bytes was reset to the
+	// value in the zone config.
+	store.ForceSplitScanAndProcess()
+	testutils.SucceedsSoon(t, func() error {
+		if origMaxBytes == repl.GetMaxBytes() {
+			return nil
+		}
+		return errors.Errorf("expected max-bytes=%d, but got max-bytes=%d",
+			origMaxBytes, repl.GetMaxBytes())
+	})
+}
diff --git a/pkg/storage/engine/mvcc.go b/pkg/storage/engine/mvcc.go
@@ -2194,24 +2194,17 @@ func MVCCFindSplitKey(
 	key,
 	endKey roachpb.RKey,
 	targetSize int64,
-	debugFn func(msg string, args ...interface{}),
 ) (roachpb.Key, error) {
 	if key.Less(roachpb.RKey(keys.LocalMax)) {
 		key = roachpb.RKey(keys.LocalMax)
 	}
 
-	logf := func(msg string, args ...interface{}) {
-		if debugFn != nil {
-			debugFn(msg, args...)
-		} else if log.V(2) {
-			log.Infof(ctx, "FindSplitKey["+rangeID.String()+"] "+msg, args...)
-		}
-	}
-
 	encStartKey := MakeMVCCMetadataKey(key.AsRawKey())
 	encEndKey := MakeMVCCMetadataKey(endKey.AsRawKey())
 
-	logf("searching split key for %d [%s, %s)", rangeID, key, endKey)
+	if log.V(2) {
+		log.Infof(ctx, "searching split key for %d [%s, %s)", rangeID, key, endKey)
+	}
 
 	// Get range size from stats.
 	ms, err := MVCCGetRangeStats(ctx, engine, rangeID)
@@ -2220,32 +2213,40 @@ func MVCCFindSplitKey(
 	}
 
 	rangeSize := ms.KeyBytes + ms.ValBytes
-	logf("range size: %s, targetSize %s", humanize.IBytes(uint64(rangeSize)), humanize.IBytes(uint64(targetSize)))
+	if log.V(2) {
+		log.Infof(ctx, "range size: %s, targetSize %s",
+			humanize.IBytes(uint64(rangeSize)), humanize.IBytes(uint64(targetSize)))
+	}
 
 	sizeSoFar := int64(0)
 	bestSplitKey := encStartKey
 	bestSplitDiff := int64(math.MaxInt64)
 	var lastKey roachpb.Key
+	var n int
 
 	if err := engine.Iterate(encStartKey, encEndKey, func(kv MVCCKeyValue) (bool, error) {
-		// Is key within a legal key range?
-		valid := IsValidSplitKey(kv.Key.Key)
+		n++
+		// Is key within a legal key range? Note that we never choose the first key
+		// as the split key.
+		valid := n > 1 && IsValidSplitKey(kv.Key.Key)
 
 		// Determine if this key would make a better split than last "best" key.
 		diff := targetSize - sizeSoFar
 		if diff < 0 {
 			diff = -diff
 		}
 		if valid && diff < bestSplitDiff {
-			logf("better split: diff %d at %s", diff, kv.Key)
+			if log.V(2) {
+				log.Infof(ctx, "better split: diff %d at %s", diff, kv.Key)
+			}
 			bestSplitKey = kv.Key
 			bestSplitDiff = diff
 		}
 
 		// Determine whether we've found best key and can exit iteration.
 		done := !bestSplitKey.Key.Equal(encStartKey.Key) && diff > bestSplitDiff
-		if done {
-			logf("target size reached")
+		if done && log.V(2) {
+			log.Infof(ctx, "target size reached")
 		}
 
 		// Add this key/value to the size scanned so far.
@@ -2262,7 +2263,7 @@ func MVCCFindSplitKey(
 	}
 
 	if bestSplitKey.Key.Equal(encStartKey.Key) {
-		return nil, errors.Errorf("the range cannot be split; considered range %q-%q has no valid splits", key, endKey)
+		return nil, nil
 	}
 
 	// The key is an MVCC (versioned) key, so to avoid corrupting MVCC we only

diff --git a/pkg/storage/engine/mvcc_test.go b/pkg/storage/engine/mvcc_test.go
@@ -2985,13 +2985,17 @@ func TestFindSplitKey(t *testing.T) {
 	}
 
 	for i, td := range testData {
-		humanSplitKey, err := MVCCFindSplitKey(context.Background(), snap, rangeID, roachpb.RKeyMin, roachpb.RKeyMax, td.targetSize, nil)
+		humanSplitKey, err := MVCCFindSplitKey(
+			context.Background(), snap, rangeID, roachpb.RKeyMin, roachpb.RKeyMax, td.targetSize)
 		if err != nil {
 			t.Fatal(err)
 		}
 		ind, _ := strconv.Atoi(string(humanSplitKey))
+		if ind == 0 {
+			t.Fatalf("%d: should never select first key as split key", i)
+		}
 		if diff := td.splitInd - ind; diff > 1 || diff < -1 {
-			t.Fatalf("%d. wanted key #%d+-1, but got %d (diff %d)", i, td.splitInd, ind, diff)
+			t.Fatalf("%d: wanted key #%d+-1, but got %d (diff %d)", i, td.splitInd, ind, diff)
 		}
 	}
 }
@@ -3014,7 +3018,7 @@ func TestFindValidSplitKeys(t *testing.T) {
 				roachpb.Key("\x02\xff"),
 			},
 			expSplit: nil,
-			expError: true,
+			expError: false,
 		},
 		// All system span cannot be split.
 		{
@@ -3023,7 +3027,7 @@ func TestFindValidSplitKeys(t *testing.T) {
 				roachpb.Key(keys.MakeTablePrefix(keys.MaxSystemConfigDescID)),
 			},
 			expSplit: nil,
-			expError: true,
+			expError: false,
 		},
 		// Between meta1 and meta2, splits at meta2.
 		{
@@ -3081,7 +3085,7 @@ func TestFindValidSplitKeys(t *testing.T) {
 				roachpb.Key("a"),
 			},
 			expSplit: nil,
-			expError: true,
+			expError: false,
 		},
 	}
 
@@ -3113,7 +3117,8 @@ func TestFindValidSplitKeys(t *testing.T) {
 			t.Fatal(err)
 		}
 		targetSize := (ms.KeyBytes + ms.ValBytes) / 2
-		splitKey, err := MVCCFindSplitKey(context.Background(), snap, rangeID, rangeStartAddr, rangeEndAddr, targetSize, nil)
+		splitKey, err := MVCCFindSplitKey(
+			context.Background(), snap, rangeID, rangeStartAddr, rangeEndAddr, targetSize)
 		if test.expError {
 			if !testutils.IsError(err, "has no valid splits") {
 				t.Errorf("%d: unexpected error: %v", i, err)
@@ -3201,7 +3206,8 @@ func TestFindBalancedSplitKeys(t *testing.T) {
 		snap := engine.NewSnapshot()
 		defer snap.Close()
 		targetSize := (ms.KeyBytes + ms.ValBytes) / 2
-		splitKey, err := MVCCFindSplitKey(context.Background(), snap, rangeID, roachpb.RKey("\x02"), roachpb.RKeyMax, targetSize, nil)
+		splitKey, err := MVCCFindSplitKey(
+			context.Background(), snap, rangeID, roachpb.RKey("\x02"), roachpb.RKeyMax, targetSize)
 		if err != nil {
 			t.Errorf("unexpected error: %s", err)
 			continue

diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go
@@ -2601,7 +2601,7 @@ func (r *Replica) AdminSplit(
 		return roachpb.AdminSplitResponse{}, roachpb.NewErrorf("cannot split range with no key provided")
 	}
 	for retryable := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); retryable.Next(); {
-		reply, pErr := r.adminSplitWithDescriptor(ctx, args, r.Desc())
+		reply, _, pErr := r.adminSplitWithDescriptor(ctx, args, r.Desc())
 		// On seeing a ConditionFailedError, retry the command with the
 		// updated descriptor.
 		if _, ok := pErr.GetDetail().(*roachpb.ConditionFailedError); !ok {
@@ -2631,7 +2631,7 @@ func (r *Replica) AdminSplit(
 // See the comment on splitTrigger for details on the complexities.
 func (r *Replica) adminSplitWithDescriptor(
 	ctx context.Context, args roachpb.AdminSplitRequest, desc *roachpb.RangeDescriptor,
-) (roachpb.AdminSplitResponse, *roachpb.Error) {
+) (_ roachpb.AdminSplitResponse, validSplitKey bool, _ *roachpb.Error) {
 	var reply roachpb.AdminSplitResponse
 
 	// Determine split key if not provided with args. This scan is
@@ -2646,29 +2646,34 @@ func (r *Replica) adminSplitWithDescriptor(
 			defer snap.Close()
 			var err error
 			targetSize := r.GetMaxBytes() / 2
-			foundSplitKey, err = engine.MVCCFindSplitKey(ctx, snap, desc.RangeID, desc.StartKey, desc.EndKey, targetSize, nil /* logFn */)
+			foundSplitKey, err = engine.MVCCFindSplitKey(
+				ctx, snap, desc.RangeID, desc.StartKey, desc.EndKey, targetSize)
 			if err != nil {
-				return reply, roachpb.NewErrorf("unable to determine split key: %s", err)
+				return reply, false, roachpb.NewErrorf("unable to determine split key: %s", err)
 			}
 		} else if !r.ContainsKey(foundSplitKey) {
-			return reply, roachpb.NewError(roachpb.NewRangeKeyMismatchError(args.SplitKey, args.SplitKey, desc))
+			return reply, false,
+				roachpb.NewError(roachpb.NewRangeKeyMismatchError(args.SplitKey, args.SplitKey, desc))
+		}
+		if foundSplitKey == nil {
+			return reply, false, nil
 		}
 
 		foundSplitKey, err := keys.EnsureSafeSplitKey(foundSplitKey)
 		if err != nil {
-			return reply, roachpb.NewErrorf("cannot split range at key %s: %v",
+			return reply, false, roachpb.NewErrorf("cannot split range at key %s: %v",
 				args.SplitKey, err)
 		}
 
 		splitKey, err = keys.Addr(foundSplitKey)
 		if err != nil {
-			return reply, roachpb.NewError(err)
+			return reply, false, roachpb.NewError(err)
 		}
 		if !splitKey.Equal(foundSplitKey) {
-			return reply, roachpb.NewErrorf("cannot split range at range-local key %s", splitKey)
+			return reply, false, roachpb.NewErrorf("cannot split range at range-local key %s", splitKey)
 		}
 		if !engine.IsValidSplitKey(foundSplitKey) {
-			return reply, roachpb.NewErrorf("cannot split range at key %s", splitKey)
+			return reply, false, roachpb.NewErrorf("cannot split range at key %s", splitKey)
 		}
 	}
 
@@ -2677,14 +2682,15 @@ func (r *Replica) adminSplitWithDescriptor(
 	// otherwise it will cause infinite retry loop.
 	if desc.StartKey.Equal(splitKey) || desc.EndKey.Equal(splitKey) {
 		log.Event(ctx, "range already split")
-		return reply, nil
+		return reply, false, nil
 	}
 	log.Event(ctx, "found split key")
 
 	// Create right hand side range descriptor with the newly-allocated Range ID.
 	rightDesc, err := r.store.NewRangeDescriptor(splitKey, desc.EndKey, desc.Replicas)
 	if err != nil {
-		return reply, roachpb.NewErrorf("unable to allocate right hand side range descriptor: %s", err)
+		return reply, true,
+			roachpb.NewErrorf("unable to allocate right hand side range descriptor: %s", err)
 	}
 
 	// Init updated version of existing range descriptor.
@@ -2770,11 +2776,11 @@ func (r *Replica) adminSplitWithDescriptor(
 		// ConditionFailedError in the error detail so that the command can be
 		// retried.
 		if _, ok := err.(*roachpb.ConditionFailedError); ok {
-			return reply, roachpb.NewError(err)
+			return reply, true, roachpb.NewError(err)
 		}
-		return reply, roachpb.NewErrorf("split at key %s failed: %s", splitKey, err)
+		return reply, true, roachpb.NewErrorf("split at key %s failed: %s", splitKey, err)
 	}
-	return reply, nil
+	return reply, true, nil
 }
 
 // splitTrigger is called on a successful commit of a transaction

diff --git a/pkg/storage/split_queue.go b/pkg/storage/split_queue.go
@@ -77,13 +77,7 @@ func (sq *splitQueue) shouldQueue(
 
 	// Add priority based on the size of range compared to the max
 	// size for the zone it's in.
-	zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey)
-	if err != nil {
-		log.Error(ctx, err)
-		return
-	}
-
-	if ratio := float64(repl.GetMVCCStats().Total()) / float64(zone.RangeMaxBytes); ratio > 1 {
+	if ratio := float64(repl.GetMVCCStats().Total()) / float64(repl.GetMaxBytes()); ratio > 1 {
 		priority += ratio
 		shouldQ = true
 	}
@@ -96,7 +90,7 @@ func (sq *splitQueue) process(ctx context.Context, r *Replica, sysCfg config.Sys
 	desc := r.Desc()
 	if splitKey := sysCfg.ComputeSplitKey(desc.StartKey, desc.EndKey); splitKey != nil {
 		log.Infof(ctx, "splitting at key %v", splitKey)
-		if _, pErr := r.adminSplitWithDescriptor(
+		if _, _, pErr := r.adminSplitWithDescriptor(
 			ctx,
 			roachpb.AdminSplitRequest{
 				SplitKey: splitKey.AsRawKey(),
@@ -109,19 +103,28 @@ func (sq *splitQueue) process(ctx context.Context, r *Replica, sysCfg config.Sys
 	}
 
 	// Next handle case of splitting due to size.
-	zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey)
-	if err != nil {
-		return err
-	}
 	size := r.GetMVCCStats().Total()
-	if float64(size)/float64(zone.RangeMaxBytes) > 1 {
-		log.Infof(ctx, "splitting size=%d max=%d", size, zone.RangeMaxBytes)
-		if _, pErr := r.adminSplitWithDescriptor(
+	maxBytes := r.GetMaxBytes()
+	if float64(size)/float64(maxBytes) > 1 {
+		log.Infof(ctx, "splitting size=%d max=%d", size, maxBytes)
+		if _, validSplitKey, pErr := r.adminSplitWithDescriptor(
 			ctx,
 			roachpb.AdminSplitRequest{},
 			desc,
 		); pErr != nil {
 			return pErr.GoError()
+		} else if !validSplitKey {
+			// If we couldn't find a split key, set the max-bytes for the range to
+			// double its current size to prevent future attempts to split the range
+			// until it grows again.
+			r.SetMaxBytes(size * 2)
+		} else {
+			// We successfully split the range, reset max-bytes to the zone setting.
+			zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey)
+			if err != nil {
+				return err
+			}
+			r.SetMaxBytes(zone.RangeMaxBytes)
 		}
 	}
 	return nil