From 199ef0304931a22f2340f6b787eec4417bb76f12 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 5 Jul 2019 16:42:40 +0100 Subject: [PATCH] Skip PRRL renewal on UNASSIGNED_SEQ_NO Today when renewing PRRLs we assert that any invalid "backwards" renewals must be because we are recovering the shard. In fact it's also possible to have `checkpointState.globalCheckpoint == SequenceNumbers.UNASSIGNED_SEQ_NO` on a tracked shard copy if the primary was just promoted and hasn't received checkpoints from all of its peers too. This commit weakens the assertion to match. Caught by a [failure of the full cluster restart tests](https://scans.gradle.com/s/5lllzgqtuegty/console-log#L8605) Relates #41536 --- .../org/elasticsearch/index/seqno/ReplicationTracker.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java b/server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java index 256fe571a24e5..a3a02de44ef31 100644 --- a/server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java +++ b/server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java @@ -508,10 +508,9 @@ public synchronized void renewPeerRecoveryRetentionLeases() { PEER_RECOVERY_RETENTION_LEASE_SOURCE); } else { // the retention lease is tied to the node, not the shard copy, so it's possible a copy was removed and now - // we are in the process of recovering it again. The recovery process will fix the lease before initiating - // tracking on this copy: - assert checkpointState.tracked == false - && checkpointState.globalCheckpoint == SequenceNumbers.UNASSIGNED_SEQ_NO : + // we are in the process of recovering it again, or maybe we were just promoted and have not yet received the + // global checkpoints from our peers. + assert checkpointState.globalCheckpoint == SequenceNumbers.UNASSIGNED_SEQ_NO : "cannot renew " + retentionLease + " according to " + checkpointState + " for " + shardRouting; } }