From 95c6ed9d7f14c10119967b3d107fde04bd89e7fd Mon Sep 17 00:00:00 2001 From: Sachin Kale Date: Tue, 4 Apr 2023 21:35:33 +0530 Subject: [PATCH] Fix assertion failure in IndexShard.updateGlobalCheckpointOnReplica() when remote translog is enabled (#6975) Signed-off-by: Sachin Kale --- .../java/org/opensearch/remotestore/RemoteStoreIT.java | 4 ---- .../main/java/org/opensearch/index/shard/IndexShard.java | 6 ++++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 86e4e50a08a38..1644d1c3e63ba 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -282,22 +282,18 @@ public void testPeerRecoveryWithRemoteStoreNoRemoteTranslogRefresh() throws Exce testPeerRecovery(false, randomIntBetween(2, 5), false); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6193") public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogNoDataFlush() throws Exception { testPeerRecovery(true, 1, true); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6193") public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogFlush() throws Exception { testPeerRecovery(true, randomIntBetween(2, 5), true); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6193") public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogNoDataRefresh() throws Exception { testPeerRecovery(true, 1, false); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6193") public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogRefresh() throws Exception { testPeerRecovery(true, randomIntBetween(2, 5), false); } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 3be1816e1c4b8..0072d1594da0e 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -3078,9 +3078,11 @@ public void updateGlobalCheckpointOnReplica(final long globalCheckpoint, final S * calculations of the global checkpoint. However, we can not assert that we are in the translog stage of recovery here as * while the global checkpoint update may have emanated from the primary when we were in that state, we could subsequently move * to recovery finalization, or even finished recovery before the update arrives here. + * When remote translog is enabled for an index, replication operation is limited to primary term validation and does not + * update local checkpoint at replica, so the local checkpoint at replica can be less than globalCheckpoint. */ - assert state() != IndexShardState.POST_RECOVERY && state() != IndexShardState.STARTED - : "supposedly in-sync shard copy received a global checkpoint [" + assert (state() != IndexShardState.POST_RECOVERY && state() != IndexShardState.STARTED) + || indexSettings.isRemoteTranslogStoreEnabled() : "supposedly in-sync shard copy received a global checkpoint [" + globalCheckpoint + "] " + "that is higher than its local checkpoint ["