From 2f490f20f6762b714cfd58ec9146ba2a0260c85e Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Mon, 17 Oct 2022 14:51:56 -0700 Subject: [PATCH] Fix recovery path for searchable snapshots (#4813) Replicas are bootstrapped using the recovery path, as opposed to the restore path used for creating the primary shard. This has been broken in the initial implementation of searchable snapshots. The fix here is to put in the appropriate checks to avoid failing during recovery. I've also updated the integration test to ensure the recovery path is always exercised during testing. Signed-off-by: Andrew Ross (cherry picked from commit 6ac0c7cd2054da9a6492c76011d1dcd2e0f518b8) --- CHANGELOG.md | 4 +--- .../opensearch/snapshots/SearchableSnapshotIT.java | 13 +++++++++++-- .../indices/recovery/PeerRecoveryTargetService.java | 9 ++++++--- .../opensearch/indices/recovery/RecoveryTarget.java | 11 +++++++---- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae65eece9fb8f..b5ad36d9de9eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,9 +66,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [Bug]: Alias filter lost after rollover ([#4499](https://github.com/opensearch-project/OpenSearch/pull/4499)) - Fixing Gradle warnings associated with publishPluginZipPublicationToXxx tasks ([#4696](https://github.com/opensearch-project/OpenSearch/pull/4696)) - Fixed randomly failing test ([4774](https://github.com/opensearch-project/OpenSearch/pull/4774)) -- Skip uppercase regex tests before 2.4.0 ([4869](https://github.com/opensearch-project/OpenSearch/pull/4869)) -- Compatibility issue with /_mget: RHLC 2.x connected to OpenSearch Cluster 1.x ([#4812](https://github.com/opensearch-project/OpenSearch/pull/4812)) - +- Fix recovery path for searchable snapshots ([4813](https://github.com/opensearch-project/OpenSearch/pull/4813)) ### Security - CVE-2022-25857 org.yaml:snakeyaml DOS vulnerability ([#4341](https://github.com/opensearch-project/OpenSearch/pull/4341)) diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java index 96fcf0053c9ab..0ab025bb575cc 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java @@ -49,15 +49,24 @@ protected boolean addMockInternalEngine() { } public void testCreateSearchableSnapshot() throws Exception { + final int numReplicasIndex1 = randomIntBetween(1, 4); + final int numReplicasIndex2 = randomIntBetween(0, 2); + internalCluster().ensureAtLeastNumDataNodes(Math.max(numReplicasIndex1, numReplicasIndex2) + 1); final Client client = client(); createRepository("test-repo", "fs"); createIndex( "test-idx-1", - Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "0").put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1").build() + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, Integer.toString(numReplicasIndex1)) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1") + .build() ); createIndex( "test-idx-2", - Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "0").put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1").build() + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, Integer.toString(numReplicasIndex2)) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1") + .build() ); ensureGreen(); indexRandomDocs("test-idx-1", 100); diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index b5702431ed4bf..556e4db3400e1 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -54,6 +54,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.CancellableThreads; import org.opensearch.common.util.concurrent.AbstractRunnable; +import org.opensearch.index.IndexModule; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.engine.RecoveryEngineException; import org.opensearch.index.mapper.MapperException; @@ -244,8 +245,10 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi assert recoveryTarget.sourceNode() != null : "can not do a recovery without a source node"; logger.trace("{} preparing shard for peer recovery", recoveryTarget.shardId()); indexShard.prepareForIndexRecovery(); - boolean remoteTranslogEnabled = recoveryTarget.state().getPrimary() == false && indexShard.isRemoteTranslogEnabled(); - final long startingSeqNo = indexShard.recoverLocallyAndFetchStartSeqNo(!remoteTranslogEnabled); + final boolean hasRemoteTranslog = recoveryTarget.state().getPrimary() == false && indexShard.isRemoteTranslogEnabled(); + final boolean hasNoTranslog = IndexModule.Type.REMOTE_SNAPSHOT.match(indexShard.indexSettings()); + final boolean verifyTranslog = (hasRemoteTranslog || hasNoTranslog) == false; + final long startingSeqNo = indexShard.recoverLocallyAndFetchStartSeqNo(!hasRemoteTranslog); assert startingSeqNo == UNASSIGNED_SEQ_NO || recoveryTarget.state().getStage() == RecoveryState.Stage.TRANSLOG : "unexpected recovery stage [" + recoveryTarget.state().getStage() + "] starting seqno [ " + startingSeqNo + "]"; startRequest = getStartRecoveryRequest( @@ -253,7 +256,7 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi clusterService.localNode(), recoveryTarget, startingSeqNo, - !remoteTranslogEnabled + verifyTranslog ); requestToSend = startRequest; actionName = PeerRecoverySourceService.Actions.START_RECOVERY; diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java index 652f3c9a55f53..c661ad3a461fe 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java @@ -45,6 +45,7 @@ import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.util.CancellableThreads; +import org.opensearch.index.IndexModule; import org.opensearch.index.engine.Engine; import org.opensearch.index.mapper.MapperException; import org.opensearch.index.seqno.ReplicationTracker; @@ -402,10 +403,12 @@ public void cleanFiles( try { store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetadata); - // If Segment Replication is enabled, we need to reuse the primary's translog UUID already stored in the index. - // With Segrep, replicas should never create their own commit points. This ensures the index and xlog share the same - // UUID without the extra step to associate the index with a new xlog. - if (indexShard.indexSettings().isSegRepEnabled()) { + // Replicas for segment replication or remote snapshot indices do not create + // their own commit points and therefore do not modify the commit user data + // in their store. In these cases, reuse the primary's translog UUID. + final boolean reuseTranslogUUID = indexShard.indexSettings().isSegRepEnabled() + || IndexModule.Type.REMOTE_SNAPSHOT.match(indexShard.indexSettings()); + if (reuseTranslogUUID) { final String translogUUID = store.getMetadata().getCommitUserData().get(TRANSLOG_UUID_KEY); Translog.createEmptyTranslog( indexShard.shardPath().resolveTranslog(),