diff --git a/pkg/storage/node_liveness.go b/pkg/storage/node_liveness.go index 4c4231572a0a..51192c3aff5a 100644 --- a/pkg/storage/node_liveness.go +++ b/pkg/storage/node_liveness.go @@ -53,6 +53,11 @@ var ( // the underlying liveness record has had its epoch incremented. ErrEpochIncremented = errors.New("heartbeat failed on epoch increment") + // ErrEpochAlreadyIncremented is returned by IncrementEpoch when + // someone else has already incremented the epoch to the desired + // value. + ErrEpochAlreadyIncremented = errors.New("epoch already incremented") + errLiveClockNotLive = errors.New("not live") ) @@ -685,8 +690,6 @@ func (nl *NodeLiveness) getLivenessLocked(nodeID roachpb.NodeID) (*storagepb.Liv return nil, ErrNoLivenessRecord } -var errEpochAlreadyIncremented = errors.New("epoch already incremented") - // IncrementEpoch is called to increment the current liveness epoch, // thereby invalidating anything relying on the liveness of the // previous epoch. This method does a conditional put on the node @@ -714,15 +717,12 @@ func (nl *NodeLiveness) IncrementEpoch(ctx context.Context, liveness *storagepb. if err := nl.updateLiveness(ctx, update, liveness, func(actual storagepb.Liveness) error { defer nl.maybeUpdate(actual) if actual.Epoch > liveness.Epoch { - return errEpochAlreadyIncremented + return ErrEpochAlreadyIncremented } else if actual.Epoch < liveness.Epoch { return errors.Errorf("unexpected liveness epoch %d; expected >= %d", actual.Epoch, liveness.Epoch) } return errors.Errorf("mismatch incrementing epoch for %+v; actual is %+v", *liveness, actual) }); err != nil { - if err == errEpochAlreadyIncremented { - return nil - } return err } diff --git a/pkg/storage/node_liveness_test.go b/pkg/storage/node_liveness_test.go index 9edc87a230ad..94f221e10093 100644 --- a/pkg/storage/node_liveness_test.go +++ b/pkg/storage/node_liveness_test.go @@ -316,8 +316,8 @@ func TestNodeLivenessEpochIncrement(t *testing.T) { t.Errorf("expected epoch increment == 1; got %d", c) } - // Verify noop on incrementing an already-incremented epoch. - if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != nil { + // Verify error on incrementing an already-incremented epoch. + if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != storage.ErrEpochAlreadyIncremented { t.Fatalf("unexpected error incrementing a non-live node: %s", err) } @@ -610,7 +610,7 @@ func TestNodeLivenessConcurrentIncrementEpochs(t *testing.T) { }() } for i := 0; i < concurrency; i++ { - if err := <-errCh; err != nil { + if err := <-errCh; err != nil && err != storage.ErrEpochAlreadyIncremented { t.Fatalf("concurrent increment epoch %d failed: %s", i, err) } } diff --git a/pkg/storage/replica_range_lease.go b/pkg/storage/replica_range_lease.go index 4f78b6d3df2b..56e70fbfecc1 100644 --- a/pkg/storage/replica_range_lease.go +++ b/pkg/storage/replica_range_lease.go @@ -303,7 +303,31 @@ func (p *pendingLeaseRequest) requestLeaseAsync( log.Info(ctx, err) } } else if err = p.repl.store.cfg.NodeLiveness.IncrementEpoch(ctx, status.Liveness); err != nil { - log.Error(ctx, err) + // If we get ErrEpochAlreadyIncremented, someone else beat + // us to it. This proves that the target node is dead + // *now*, but it doesn't prove that it was dead at + // status.Timestamp (which we've encoded into our lease + // request). It's possible that the node died and revived + // without having its epoch incremented. + // + // ErrEpochAlreadyIncremented is not an unusual situation, + // so we don't log it, but it would also be incorrect to + // simply proceed to sending our lease request since we + // may have an invalid timestamp (which would allow us to + // request a lease whose start time is before the epoch + // increment of the previous leaseholder, and therefore + // before some reads it may have served). We could bump + // the timestamp in the lease request and proceed, but + // that just sets up another race between this node and + // the one that already incremented the epoch. They're + // probably going to beat us this time too, so just return + // the NotLeaseHolderError here instead of trying to fix + // up the timestamps and return. + // + // https://github.com/cockroachdb/cockroach/issues/35986 + if err != ErrEpochAlreadyIncremented { + log.Error(ctx, err) + } } } // Set error for propagation to all waiters below.