Skip to content

Commit

Permalink
kvstorage: move uninit replica creation to kvstorage
Browse files Browse the repository at this point in the history
This commit factors out the code that creates an uninitialized replica in
storage, into the kvstorage package.

Release note: none
Epic: none
  • Loading branch information
pav-kv committed Feb 7, 2023
1 parent 75eb88d commit 09a7998
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 50 deletions.
61 changes: 61 additions & 0 deletions pkg/kv/kvserver/kvstorage/replica_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ package kvstorage
import (
"context"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/errors"
"go.etcd.io/raft/v3/raftpb"
)
Expand Down Expand Up @@ -101,3 +103,62 @@ func (r LoadedReplicaState) check(storeID roachpb.StoreID) error {
}
return nil
}

// CreateUninitializedReplica creates an uninitialized replica in storage.
// Returns roachpb.RaftGroupDeletedError if this replica can not be created
// because it has been deleted.
func CreateUninitializedReplica(
ctx context.Context,
eng storage.Engine,
storeID roachpb.StoreID,
rangeID roachpb.RangeID,
replicaID roachpb.ReplicaID,
) error {
// Before creating the replica, see if there is a tombstone which would
// indicate that this replica has been removed.
tombstoneKey := keys.RangeTombstoneKey(rangeID)
var tombstone roachpb.RangeTombstone
if ok, err := storage.MVCCGetProto(
ctx, eng, tombstoneKey, hlc.Timestamp{}, &tombstone, storage.MVCCGetOptions{},
); err != nil {
return err
} else if ok && replicaID < tombstone.NextReplicaID {
return &roachpb.RaftGroupDeletedError{}
}

// Write the RaftReplicaID for this replica. This is the only place in the
// CockroachDB code that we are creating a new *uninitialized* replica.
// Note that it is possible that we have already created the HardState for
// an uninitialized replica, then crashed, and on recovery are receiving a
// raft message for the same or later replica.
// - Same replica: we are overwriting the RaftReplicaID with the same
// value, which is harmless.
// - Later replica: there may be an existing HardState for the older
// uninitialized replica with Commit=0 and non-zero Term and Vote. Using
// the Term and Vote values for that older replica in the context of
// this newer replica is harmless since it just limits the votes for
// this replica.
//
// Compatibility:
// - v21.2 and v22.1: v22.1 unilaterally introduces RaftReplicaID (an
// unreplicated range-id local key). If a v22.1 binary is rolled back at
// a node, the fact that RaftReplicaID was written is harmless to a
// v21.2 node since it does not read it. When a v21.2 drops an
// initialized range, the RaftReplicaID will also be deleted because the
// whole range-ID local key space is deleted.
// - v22.2: no changes: RaftReplicaID is written, but old Replicas may not
// have it yet.
// - v23.1: at startup, we remove any uninitialized replicas that have a
// HardState but no RaftReplicaID, see kvstorage.LoadAndReconcileReplicas.
// So after first call to this method we have the invariant that all replicas
// have a RaftReplicaID persisted.
sl := stateloader.Make(rangeID)
if err := sl.SetRaftReplicaID(ctx, eng, replicaID); err != nil {
return err
}

// Make sure that storage invariants for this uninitialized replica hold.
uninitDesc := roachpb.RangeDescriptor{RangeID: rangeID}
_, err := LoadReplicaState(ctx, eng, storeID, &uninitDesc, replicaID)
return err
}
52 changes: 2 additions & 50 deletions pkg/kv/kvserver/store_create_replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,8 @@ import (
"context"
"time"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvstorage"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -211,54 +207,10 @@ func (s *Store) tryGetOrCreateReplica(
// be accessed by someone holding a reference to, or currently creating a
// Replica for this rangeID, and that's us.

// Before creating the replica, see if there is a tombstone which would
// indicate that this is a stale message.
tombstoneKey := keys.RangeTombstoneKey(rangeID)
var tombstone roachpb.RangeTombstone
if ok, err := storage.MVCCGetProto(
ctx, s.Engine(), tombstoneKey, hlc.Timestamp{}, &tombstone, storage.MVCCGetOptions{},
if err := kvstorage.CreateUninitializedReplica(
ctx, s.Engine(), s.StoreID(), rangeID, replicaID,
); err != nil {
return nil, false, err
} else if ok && replicaID < tombstone.NextReplicaID {
return nil, false, &roachpb.RaftGroupDeletedError{}
}

// Write the RaftReplicaID for this replica. This is the only place in the
// CockroachDB code that we are creating a new *uninitialized* replica.
// Note that it is possible that we have already created the HardState for
// an uninitialized replica, then crashed, and on recovery are receiving a
// raft message for the same or later replica.
// - Same replica: we are overwriting the RaftReplicaID with the same
// value, which is harmless.
// - Later replica: there may be an existing HardState for the older
// uninitialized replica with Commit=0 and non-zero Term and Vote. Using
// the Term and Vote values for that older replica in the context of
// this newer replica is harmless since it just limits the votes for
// this replica.
//
// Compatibility:
// - v21.2 and v22.1: v22.1 unilaterally introduces RaftReplicaID (an
// unreplicated range-id local key). If a v22.1 binary is rolled back at
// a node, the fact that RaftReplicaID was written is harmless to a
// v21.2 node since it does not read it. When a v21.2 drops an
// initialized range, the RaftReplicaID will also be deleted because the
// whole range-ID local key space is deleted.
// - v22.2: no changes: RaftReplicaID is written, but old Replicas may not
// have it yet.
// - v23.1: at startup, we remove any unitialized replicas that have a
// HardState but no RaftReplicaID, see kvstorage.LoadAndReconcileReplicas.
// So after first call to this method we have the invariant that all replicas
// have a RaftReplicaID persisted.
sl := stateloader.Make(rangeID)
if err := sl.SetRaftReplicaID(ctx, s.Engine(), replicaID); err != nil {
return nil, false, err
}

// Make sure that storage invariants for this uninitialized replica hold.
uninitDesc := roachpb.RangeDescriptor{RangeID: rangeID}
_, err := kvstorage.LoadReplicaState(ctx, s.Engine(), s.StoreID(), &uninitDesc, replicaID)
if err != nil {
return nil, false, err
}

// Create a new uninitialized replica and lock it for raft processing.
Expand Down

0 comments on commit 09a7998

Please sign in to comment.