Skip to content

Commit

Permalink
storage: Don't swallow ErrEpochAlreadyIncremented
Browse files Browse the repository at this point in the history
IncrementEpoch was failing to distinguish between the request that
caused the increment and another caller making the same increment.
This is incorrect since a successful increment tells you *when* the
node was confirmed dead, while relying on another node's increment
leaves this uncertain.

In rare cases involving a badly overloaded cluster, this could result
in inconsistencies (non-serializable transactions) due to incorrect
timestamp cache management.

Fixes cockroachdb#35986

Release note (bug fix): Fix a rare inconsistency that could occur on
badly overloaded clusters.
  • Loading branch information
bdarnell committed Apr 18, 2019
1 parent b47269e commit 473f106
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 10 deletions.
12 changes: 6 additions & 6 deletions pkg/storage/node_liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ var (
// the underlying liveness record has had its epoch incremented.
ErrEpochIncremented = errors.New("heartbeat failed on epoch increment")

// ErrEpochAlreadyIncremented is returned by IncrementEpoch when
// someone else has already incremented the epoch to the desired
// value.
ErrEpochAlreadyIncremented = errors.New("epoch already incremented")

errLiveClockNotLive = errors.New("not live")
)

Expand Down Expand Up @@ -685,8 +690,6 @@ func (nl *NodeLiveness) getLivenessLocked(nodeID roachpb.NodeID) (*storagepb.Liv
return nil, ErrNoLivenessRecord
}

var errEpochAlreadyIncremented = errors.New("epoch already incremented")

// IncrementEpoch is called to increment the current liveness epoch,
// thereby invalidating anything relying on the liveness of the
// previous epoch. This method does a conditional put on the node
Expand Down Expand Up @@ -714,15 +717,12 @@ func (nl *NodeLiveness) IncrementEpoch(ctx context.Context, liveness *storagepb.
if err := nl.updateLiveness(ctx, update, liveness, func(actual storagepb.Liveness) error {
defer nl.maybeUpdate(actual)
if actual.Epoch > liveness.Epoch {
return errEpochAlreadyIncremented
return ErrEpochAlreadyIncremented
} else if actual.Epoch < liveness.Epoch {
return errors.Errorf("unexpected liveness epoch %d; expected >= %d", actual.Epoch, liveness.Epoch)
}
return errors.Errorf("mismatch incrementing epoch for %+v; actual is %+v", *liveness, actual)
}); err != nil {
if err == errEpochAlreadyIncremented {
return nil
}
return err
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/storage/node_liveness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,8 @@ func TestNodeLivenessEpochIncrement(t *testing.T) {
t.Errorf("expected epoch increment == 1; got %d", c)
}

// Verify noop on incrementing an already-incremented epoch.
if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != nil {
// Verify error on incrementing an already-incremented epoch.
if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != storage.ErrEpochAlreadyIncremented {
t.Fatalf("unexpected error incrementing a non-live node: %s", err)
}

Expand Down Expand Up @@ -610,7 +610,7 @@ func TestNodeLivenessConcurrentIncrementEpochs(t *testing.T) {
}()
}
for i := 0; i < concurrency; i++ {
if err := <-errCh; err != nil {
if err := <-errCh; err != nil && err != storage.ErrEpochAlreadyIncremented {
t.Fatalf("concurrent increment epoch %d failed: %s", i, err)
}
}
Expand Down
26 changes: 25 additions & 1 deletion pkg/storage/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,31 @@ func (p *pendingLeaseRequest) requestLeaseAsync(
log.Info(ctx, err)
}
} else if err = p.repl.store.cfg.NodeLiveness.IncrementEpoch(ctx, status.Liveness); err != nil {
log.Error(ctx, err)
// If we get ErrEpochAlreadyIncremented, someone else beat
// us to it. This proves that the target node is dead
// *now*, but it doesn't prove that it was dead at
// status.Timestamp (which we've encoded into our lease
// request). It's possible that the node died and revived
// without having its epoch incremented.
//
// ErrEpochAlreadyIncremented is not an unusual situation,
// so we don't log it, but it would also be incorrect to
// simply proceed to sending our lease request since we
// may have an invalid timestamp (which would allow us to
// request a lease whose start time is before the epoch
// increment of the previous leaseholder, and therefore
// before some reads it may have served). We could bump
// the timestamp in the lease request and proceed, but
// that just sets up another race between this node and
// the one that already incremented the epoch. They're
// probably going to beat us this time too, so just return
// the NotLeaseHolderError here instead of trying to fix
// up the timestamps and return.
//
// https://github.com/cockroachdb/cockroach/issues/35986
if err != ErrEpochAlreadyIncremented {
log.Error(ctx, err)
}
}
}
// Set error for propagation to all waiters below.
Expand Down

0 comments on commit 473f106

Please sign in to comment.