From 8553d2fa6f0c7a90dbbc833848f5e9adcc2210fe Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Fri, 6 May 2022 10:48:37 -0700 Subject: [PATCH 01/16] Change fastForwardProcessedSeqNo method in LocalCheckpointTracker to persisted checkpoint. This change inverts fastForwardProcessedSeqNo to fastForwardPersistedSeqNo for use in Segment Replication. This is so that a Segrep Engine can match the logic of InternalEngine where the seqNo is incremented with each operation, but only persisted in the tracker on a flush. With Segment Replication we bump the processed number with each operation received index/delete/noOp, and invoke this method when we receive a new set of segments to bump the persisted seqNo. Signed-off-by: Marc Handalian --- .../index/seqno/LocalCheckpointTracker.java | 8 ++-- .../seqno/LocalCheckpointTrackerTests.java | 44 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/seqno/LocalCheckpointTracker.java b/server/src/main/java/org/opensearch/index/seqno/LocalCheckpointTracker.java index dbcc5e2190006..1dda1db0a6533 100644 --- a/server/src/main/java/org/opensearch/index/seqno/LocalCheckpointTracker.java +++ b/server/src/main/java/org/opensearch/index/seqno/LocalCheckpointTracker.java @@ -153,13 +153,13 @@ public synchronized void markSeqNoAsPersisted(final long seqNo) { * * @param seqNo the sequence number to mark as processed */ - public synchronized void fastForwardProcessedSeqNo(final long seqNo) { + public synchronized void fastForwardPersistedSeqNo(final long seqNo) { advanceMaxSeqNo(seqNo); - final long currentProcessedCheckpoint = processedCheckpoint.get(); - if (shouldUpdateSeqNo(seqNo, currentProcessedCheckpoint, persistedCheckpoint) == false) { + final long currentPersistedCheckpoint = persistedCheckpoint.get(); + if (shouldUpdateSeqNo(seqNo, currentPersistedCheckpoint, processedCheckpoint) == false) { return; } - processedCheckpoint.compareAndSet(currentProcessedCheckpoint, seqNo); + persistedCheckpoint.compareAndSet(currentPersistedCheckpoint, seqNo); } private void markSeqNo(final long seqNo, final AtomicLong checkPoint, final LongObjectHashMap bitSetMap) { diff --git a/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java b/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java index 237066e549b09..e311c4272275c 100644 --- a/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java @@ -332,57 +332,57 @@ public void testContains() { assertThat(tracker.hasProcessed(seqNo), equalTo(seqNo <= localCheckpoint || seqNos.contains(seqNo))); } - public void testFastForwardProcessedNoPersistentUpdate() { + public void testFastForwardPersistedNoProcessedUpdate() { // base case with no persistent checkpoint update long seqNo1; - assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); + assertThat(tracker.getPersistedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); seqNo1 = tracker.generateSeqNo(); assertThat(seqNo1, equalTo(0L)); - tracker.fastForwardProcessedSeqNo(seqNo1); - assertThat(tracker.getProcessedCheckpoint(), equalTo(-1L)); + tracker.fastForwardPersistedSeqNo(seqNo1); + assertThat(tracker.getPersistedCheckpoint(), equalTo(-1L)); } - public void testFastForwardProcessedPersistentUpdate() { - // base case with persistent checkpoint update + public void testFastForwardPersistedProcessedUpdate() { + // base case with processed checkpoint update long seqNo1; - assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); + assertThat(tracker.getPersistedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); seqNo1 = tracker.generateSeqNo(); assertThat(seqNo1, equalTo(0L)); - tracker.markSeqNoAsPersisted(seqNo1); - assertThat(tracker.getPersistedCheckpoint(), equalTo(0L)); - tracker.fastForwardProcessedSeqNo(seqNo1); + tracker.markSeqNoAsProcessed(seqNo1); assertThat(tracker.getProcessedCheckpoint(), equalTo(0L)); + tracker.fastForwardPersistedSeqNo(seqNo1); + assertThat(tracker.getPersistedCheckpoint(), equalTo(0L)); assertThat(tracker.hasProcessed(0L), equalTo(true)); assertThat(tracker.hasProcessed(atLeast(1)), equalTo(false)); // idempotent case - tracker.fastForwardProcessedSeqNo(seqNo1); - assertThat(tracker.getProcessedCheckpoint(), equalTo(0L)); + tracker.fastForwardPersistedSeqNo(seqNo1); + assertThat(tracker.getPersistedCheckpoint(), equalTo(0L)); assertThat(tracker.hasProcessed(0L), equalTo(true)); assertThat(tracker.hasProcessed(atLeast(1)), equalTo(false)); } - public void testFastForwardProcessedPersistentUpdate2() { + public void testFastForwardPersistedProcessedUpdate2() { long seqNo1, seqNo2; - assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); + assertThat(tracker.getPersistedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); seqNo1 = tracker.generateSeqNo(); seqNo2 = tracker.generateSeqNo(); assertThat(seqNo1, equalTo(0L)); assertThat(seqNo2, equalTo(1L)); - tracker.markSeqNoAsPersisted(seqNo1); - tracker.markSeqNoAsPersisted(seqNo2); - assertThat(tracker.getProcessedCheckpoint(), equalTo(-1L)); - assertThat(tracker.getPersistedCheckpoint(), equalTo(1L)); - - tracker.fastForwardProcessedSeqNo(seqNo2); + tracker.markSeqNoAsProcessed(seqNo1); + tracker.markSeqNoAsProcessed(seqNo2); + assertThat(tracker.getPersistedCheckpoint(), equalTo(-1L)); assertThat(tracker.getProcessedCheckpoint(), equalTo(1L)); + + tracker.fastForwardPersistedSeqNo(seqNo2); + assertThat(tracker.getPersistedCheckpoint(), equalTo(1L)); assertThat(tracker.hasProcessed(seqNo1), equalTo(true)); assertThat(tracker.hasProcessed(seqNo2), equalTo(true)); - tracker.fastForwardProcessedSeqNo(seqNo1); - assertThat(tracker.getProcessedCheckpoint(), equalTo(1L)); + tracker.fastForwardPersistedSeqNo(seqNo1); + assertThat(tracker.getPersistedCheckpoint(), equalTo(1L)); assertThat(tracker.hasProcessed(between(0, 1)), equalTo(true)); assertThat(tracker.hasProcessed(atLeast(2)), equalTo(false)); assertThat(tracker.getMaxSeqNo(), equalTo(1L)); From 5c54323c2c7c6fc5edf92be7ef85fe909e3a4801 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Fri, 6 May 2022 10:56:30 -0700 Subject: [PATCH 02/16] Extract Translog specific engine methods into an abstract class. This change extracts translog specific methods to an abstract engine class so that other engine implementations can reuse translog logic. Signed-off-by: Marc Handalian --- .../org/opensearch/index/engine/Engine.java | 11 + .../index/engine/InternalEngine.java | 219 +-------------- .../index/engine/TranslogAwareEngine.java | 254 ++++++++++++++++++ .../index/shard/IndexShardTests.java | 8 +- 4 files changed, 280 insertions(+), 212 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index 4a51145432638..6dd067a1158f5 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -176,6 +176,17 @@ public MergeStats getMergeStats() { /** returns the history uuid for the engine */ public abstract String getHistoryUUID(); + /** + * Reads the current stored history ID from commit data. + */ + String loadHistoryUUID(Map commitData) { + final String uuid = commitData.get(HISTORY_UUID_KEY); + if (uuid == null) { + throw new IllegalStateException("commit doesn't contain history uuid"); + } + return uuid; + } + /** Returns how many bytes we are currently moving from heap to disk */ public abstract long getWritingBytes(); diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index 45c6f875aab42..a111e27f0975f 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -104,12 +104,7 @@ import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.OpenSearchMergePolicy; import org.opensearch.index.shard.ShardId; -import org.opensearch.index.translog.DefaultTranslogDeletionPolicy; import org.opensearch.index.translog.Translog; -import org.opensearch.index.translog.TranslogConfig; -import org.opensearch.index.translog.TranslogCorruptedException; -import org.opensearch.index.translog.TranslogDeletionPolicy; -import org.opensearch.index.translog.TranslogStats; import org.opensearch.search.suggest.completion.CompletionStats; import org.opensearch.threadpool.ThreadPool; @@ -131,8 +126,6 @@ import java.util.concurrent.locks.ReentrantLock; import java.util.function.BiConsumer; import java.util.function.BiFunction; -import java.util.function.LongConsumer; -import java.util.function.LongSupplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -141,14 +134,12 @@ * * @opensearch.internal */ -public class InternalEngine extends Engine { +public class InternalEngine extends TranslogAwareEngine { /** * When we last pruned expired tombstones from versionMap.deletes: */ private volatile long lastDeleteVersionPruneTimeMSec; - - private final Translog translog; private final OpenSearchConcurrentMergeScheduler mergeScheduler; private final IndexWriter indexWriter; @@ -229,24 +220,8 @@ public InternalEngine(EngineConfig engineConfig) { if (engineConfig.isAutoGeneratedIDsOptimizationEnabled() == false) { updateAutoIdTimestamp(Long.MAX_VALUE, true); } - final TranslogDeletionPolicy translogDeletionPolicy; - TranslogDeletionPolicy customTranslogDeletionPolicy = null; - if (engineConfig.getCustomTranslogDeletionPolicyFactory() != null) { - customTranslogDeletionPolicy = engineConfig.getCustomTranslogDeletionPolicyFactory() - .create(engineConfig.getIndexSettings(), engineConfig.retentionLeasesSupplier()); - } - if (customTranslogDeletionPolicy != null) { - translogDeletionPolicy = customTranslogDeletionPolicy; - } else { - translogDeletionPolicy = new DefaultTranslogDeletionPolicy( - engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(), - engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis(), - engineConfig.getIndexSettings().getTranslogRetentionTotalFiles() - ); - } store.incRef(); IndexWriter writer = null; - Translog translog = null; ExternalReaderManager externalReaderManager = null; OpenSearchReaderManager internalReaderManager = null; EngineMergeScheduler scheduler = null; @@ -256,22 +231,12 @@ public InternalEngine(EngineConfig engineConfig) { mergeScheduler = scheduler = new EngineMergeScheduler(engineConfig.getShardId(), engineConfig.getIndexSettings()); throttle = new IndexThrottle(); try { - store.trimUnsafeCommits(engineConfig.getTranslogConfig().getTranslogPath()); - translog = openTranslog(engineConfig, translogDeletionPolicy, engineConfig.getGlobalCheckpointSupplier(), seqNo -> { - final LocalCheckpointTracker tracker = getLocalCheckpointTracker(); - assert tracker != null || getTranslog().isOpen() == false; - if (tracker != null) { - tracker.markSeqNoAsPersisted(seqNo); - } - }); - assert translog.getGeneration() != null; - this.translog = translog; this.softDeletesPolicy = newSoftDeletesPolicy(); this.combinedDeletionPolicy = new CombinedDeletionPolicy( logger, translogDeletionPolicy, softDeletesPolicy, - translog::getLastSyncedGlobalCheckpoint + this::getLastSyncedGlobalCheckpoint ); this.localCheckpointTracker = createLocalCheckpointTracker(localCheckpointTrackerSupplier); writer = createWriter(); @@ -280,7 +245,7 @@ public InternalEngine(EngineConfig engineConfig) { historyUUID = loadHistoryUUID(commitData); forceMergeUUID = commitData.get(FORCE_MERGE_UUID_KEY); indexWriter = writer; - } catch (IOException | TranslogCorruptedException e) { + } catch (IOException e) { throw new EngineCreationFailureException(shardId, "failed to create engine", e); } catch (AssertionError e) { // IndexWriter throws AssertionError on init, if asserts are enabled, if any files don't exist, but tests that @@ -357,7 +322,7 @@ private SoftDeletesPolicy newSoftDeletesPolicy() throws IOException { lastMinRetainedSeqNo = Long.parseLong(commitUserData.get(SequenceNumbers.MAX_SEQ_NO)) + 1; } return new SoftDeletesPolicy( - translog::getLastSyncedGlobalCheckpoint, + this::getLastSyncedGlobalCheckpoint, lastMinRetainedSeqNo, engineConfig.getIndexSettings().getSoftDeleteRetentionOperations(), engineConfig.retentionLeasesSupplier() @@ -454,17 +419,6 @@ final boolean assertSearcherIsWarmedUp(String source, SearcherScope scope) { return true; } - @Override - public int restoreLocalHistoryFromTranslog(TranslogRecoveryRunner translogRecoveryRunner) throws IOException { - try (ReleasableLock ignored = readLock.acquire()) { - ensureOpen(); - final long localCheckpoint = localCheckpointTracker.getProcessedCheckpoint(); - try (Translog.Snapshot snapshot = getTranslog().newSnapshot(localCheckpoint + 1, Long.MAX_VALUE)) { - return translogRecoveryRunner.run(this, snapshot); - } - } - } - @Override public int fillSeqNoGaps(long primaryTerm) throws IOException { try (ReleasableLock ignored = writeLock.acquire()) { @@ -559,74 +513,16 @@ private void recoverFromTranslogInternal(TranslogRecoveryRunner translogRecovery flush(false, true); translog.trimUnreferencedReaders(); } - - private Translog openTranslog( - EngineConfig engineConfig, - TranslogDeletionPolicy translogDeletionPolicy, - LongSupplier globalCheckpointSupplier, - LongConsumer persistedSequenceNumberConsumer - ) throws IOException { - - final TranslogConfig translogConfig = engineConfig.getTranslogConfig(); - final Map userData = store.readLastCommittedSegmentsInfo().getUserData(); - final String translogUUID = Objects.requireNonNull(userData.get(Translog.TRANSLOG_UUID_KEY)); - // We expect that this shard already exists, so it must already have an existing translog else something is badly wrong! - return new Translog( - translogConfig, - translogUUID, - translogDeletionPolicy, - globalCheckpointSupplier, - engineConfig.getPrimaryTermSupplier(), - persistedSequenceNumberConsumer - ); - } - - // Package private for testing purposes only - Translog getTranslog() { - ensureOpen(); - return translog; - } - - // Package private for testing purposes only + // Package private for testing purposes only boolean hasSnapshottedCommits() { return combinedDeletionPolicy.hasSnapshottedCommits(); } @Override - public boolean isTranslogSyncNeeded() { - return getTranslog().syncNeeded(); - } - - @Override - public boolean ensureTranslogSynced(Stream locations) throws IOException { - final boolean synced = translog.ensureSynced(locations); - if (synced) { - revisitIndexDeletionPolicyOnTranslogSynced(); - } - return synced; - } - - @Override - public void syncTranslog() throws IOException { - translog.sync(); - revisitIndexDeletionPolicyOnTranslogSynced(); - } - - @Override - public TranslogStats getTranslogStats() { - return getTranslog().stats(); - } - - @Override - public Translog.Location getTranslogLastWriteLocation() { - return getTranslog().getLastWriteLocation(); - } - - private void revisitIndexDeletionPolicyOnTranslogSynced() throws IOException { + protected void deleteUnusedFiles() throws IOException { if (combinedDeletionPolicy.hasUnreferencedCommits()) { indexWriter.deleteUnusedFiles(); } - translog.trimUnreferencedReaders(); } @Override @@ -646,16 +542,6 @@ public long getWritingBytes() { return indexWriter.getFlushingBytes() + versionMap.getRefreshingBytes(); } - /** - * Reads the current stored history ID from the IW commit data. - */ - private String loadHistoryUUID(Map commitData) { - final String uuid = commitData.get(HISTORY_UUID_KEY); - if (uuid == null) { - throw new IllegalStateException("commit doesn't contain history uuid"); - } - return uuid; - } private ExternalReaderManager createReaderManager(RefreshWarmerListener externalRefreshListener) throws EngineException { boolean success = false; @@ -1029,23 +915,7 @@ public IndexResult index(Index index) throws IOException { } } if (index.origin().isFromTranslog() == false) { - final Translog.Location location; - if (indexResult.getResultType() == Result.Type.SUCCESS) { - location = translog.add(new Translog.Index(index, indexResult)); - } else if (indexResult.getSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) { - // if we have document failure, record it as a no-op in the translog and Lucene with the generated seq_no - final NoOp noOp = new NoOp( - indexResult.getSeqNo(), - index.primaryTerm(), - index.origin(), - index.startTime(), - indexResult.getFailure().toString() - ); - location = innerNoOp(noOp).getTranslogLocation(); - } else { - location = null; - } - indexResult.setTranslogLocation(location); + addIndexOperationToTranslog(index, indexResult); } if (plan.indexIntoLucene && indexResult.getResultType() == Result.Type.SUCCESS) { final Translog.Location translogLocation = trackTranslogLocation.get() ? indexResult.getTranslogLocation() : null; @@ -1466,8 +1336,7 @@ public DeleteResult delete(Delete delete) throws IOException { } } if (delete.origin().isFromTranslog() == false && deleteResult.getResultType() == Result.Type.SUCCESS) { - final Translog.Location location = translog.add(new Translog.Delete(delete, deleteResult)); - deleteResult.setTranslogLocation(location); + addDeleteOperationToTranslog(delete, deleteResult); } localCheckpointTracker.markSeqNoAsProcessed(deleteResult.getSeqNo()); if (deleteResult.getTranslogLocation() == null) { @@ -2020,66 +1889,6 @@ private void refreshLastCommittedSegmentInfos() { } } - @Override - public void rollTranslogGeneration() throws EngineException { - try (ReleasableLock ignored = readLock.acquire()) { - ensureOpen(); - translog.rollGeneration(); - translog.trimUnreferencedReaders(); - } catch (AlreadyClosedException e) { - failOnTragicEvent(e); - throw e; - } catch (Exception e) { - try { - failEngine("translog trimming failed", e); - } catch (Exception inner) { - e.addSuppressed(inner); - } - throw new EngineException(shardId, "failed to roll translog", e); - } - } - - @Override - public void trimUnreferencedTranslogFiles() throws EngineException { - try (ReleasableLock lock = readLock.acquire()) { - ensureOpen(); - translog.trimUnreferencedReaders(); - } catch (AlreadyClosedException e) { - failOnTragicEvent(e); - throw e; - } catch (Exception e) { - try { - failEngine("translog trimming failed", e); - } catch (Exception inner) { - e.addSuppressed(inner); - } - throw new EngineException(shardId, "failed to trim translog", e); - } - } - - @Override - public boolean shouldRollTranslogGeneration() { - return getTranslog().shouldRollGeneration(); - } - - @Override - public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException { - try (ReleasableLock lock = readLock.acquire()) { - ensureOpen(); - translog.trimOperations(belowTerm, aboveSeqNo); - } catch (AlreadyClosedException e) { - failOnTragicEvent(e); - throw e; - } catch (Exception e) { - try { - failEngine("translog operations trimming failed", e); - } catch (Exception inner) { - e.addSuppressed(inner); - } - throw new EngineException(shardId, "failed to trim translog operations", e); - } - } - private void pruneDeletedTombstones() { /* * We need to deploy two different trimming strategies for GC deletes on primary and replicas. Delete operations on primary @@ -2682,9 +2491,7 @@ public void onSettingsChanged(TimeValue translogRetentionAge, ByteSizeValue tran // the setting will be re-interpreted if it's set to true updateAutoIdTimestamp(Long.MAX_VALUE, true); } - final TranslogDeletionPolicy translogDeletionPolicy = translog.getDeletionPolicy(); - translogDeletionPolicy.setRetentionAgeInMillis(translogRetentionAge.millis()); - translogDeletionPolicy.setRetentionSizeInBytes(translogRetentionSize.getBytes()); + refreshTranslogDeletionPolicy(translogRetentionAge, translogRetentionSize); softDeletesPolicy.setRetentionOperations(softDeletesRetentionOps); } @@ -2692,13 +2499,9 @@ public MergeStats getMergeStats() { return mergeScheduler.stats(); } - LocalCheckpointTracker getLocalCheckpointTracker() { - return localCheckpointTracker; - } - @Override - public long getLastSyncedGlobalCheckpoint() { - return getTranslog().getLastSyncedGlobalCheckpoint(); + protected LocalCheckpointTracker getLocalCheckpointTracker() { + return localCheckpointTracker; } public long getProcessedLocalCheckpoint() { diff --git a/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java b/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java new file mode 100644 index 0000000000000..9f3c5d2c13b08 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java @@ -0,0 +1,254 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.common.unit.ByteSizeValue; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.ReleasableLock; +import org.opensearch.index.seqno.LocalCheckpointTracker; +import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.index.translog.DefaultTranslogDeletionPolicy; +import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogConfig; +import org.opensearch.index.translog.TranslogCorruptedException; +import org.opensearch.index.translog.TranslogDeletionPolicy; +import org.opensearch.index.translog.TranslogStats; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; +import java.util.function.LongConsumer; +import java.util.function.LongSupplier; +import java.util.stream.Stream; + +/** + * Abstract Engine implementation that provides common Translog functionality. + * This class creates a closeable {@link Translog}, it is up to subclasses to close the translog reference. + * + * @opensearch.internal + */ +public abstract class TranslogAwareEngine extends Engine { + + protected final Translog translog; + protected final TranslogDeletionPolicy translogDeletionPolicy; + + protected TranslogAwareEngine(EngineConfig engineConfig) { + super(engineConfig); + store.incRef(); + TranslogDeletionPolicy customTranslogDeletionPolicy = null; + if (engineConfig.getCustomTranslogDeletionPolicyFactory() != null) { + customTranslogDeletionPolicy = engineConfig.getCustomTranslogDeletionPolicyFactory() + .create(engineConfig.getIndexSettings(), engineConfig.retentionLeasesSupplier()); + } + translogDeletionPolicy = Objects.requireNonNullElseGet(customTranslogDeletionPolicy, () -> new DefaultTranslogDeletionPolicy( + engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(), + engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis(), + engineConfig.getIndexSettings().getTranslogRetentionTotalFiles() + )); + try { + store.trimUnsafeCommits(engineConfig.getTranslogConfig().getTranslogPath()); + translog = openTranslog(engineConfig, translogDeletionPolicy, engineConfig.getGlobalCheckpointSupplier(), seqNo -> { + final LocalCheckpointTracker tracker = getLocalCheckpointTracker(); + assert tracker != null || getTranslog().isOpen() == false; + if (tracker != null) { + tracker.markSeqNoAsPersisted(seqNo); + } + }); + assert translog.getGeneration() != null; + } catch (IOException | TranslogCorruptedException e) { + throw new EngineCreationFailureException(shardId, "failed to create engine", e); + } finally { + store.decRef(); + } + } + + @Override + public final int restoreLocalHistoryFromTranslog(TranslogRecoveryRunner translogRecoveryRunner) throws IOException { + try (ReleasableLock ignored = readLock.acquire()) { + ensureOpen(); + final long localCheckpoint = getLocalCheckpointTracker().getProcessedCheckpoint(); + try (Translog.Snapshot snapshot = getTranslog().newSnapshot(localCheckpoint + 1, Long.MAX_VALUE)) { + return translogRecoveryRunner.run(this, snapshot); + } + } + } + + @Override + public final boolean isTranslogSyncNeeded() { + return getTranslog().syncNeeded(); + } + + @Override + public final void syncTranslog() throws IOException { + translog.sync(); + deleteUnusedFiles(); + translog.trimUnreferencedReaders(); + } + + @Override + public final boolean ensureTranslogSynced(Stream locations) throws IOException { + boolean synced = translog.ensureSynced(locations); + if (synced) { + deleteUnusedFiles(); + translog.trimUnreferencedReaders(); + } + return synced; + } + + @Override + public final void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException { + try (ReleasableLock lock = readLock.acquire()) { + ensureOpen(); + translog.trimOperations(belowTerm, aboveSeqNo); + } catch (AlreadyClosedException e) { + maybeFailEngine("trimOperationsFromTranslog", e); + throw e; + } catch (Exception e) { + try { + failEngine("translog operations trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } + throw new EngineException(shardId, "failed to trim translog operations", e); + } + } + + @Override + public final TranslogStats getTranslogStats() { + return getTranslog().stats(); + } + + @Override + public final Translog.Location getTranslogLastWriteLocation() { + return getTranslog().getLastWriteLocation(); + } + + @Override + public final long getLastSyncedGlobalCheckpoint() { + return getTranslog().getLastSyncedGlobalCheckpoint(); + } + + @Override + public final void rollTranslogGeneration() throws EngineException { + try (ReleasableLock ignored = readLock.acquire()) { + ensureOpen(); + translog.rollGeneration(); + translog.trimUnreferencedReaders(); + } catch (AlreadyClosedException e) { + maybeFailEngine("rollTranslogGeneration", e); + throw e; + } catch (Exception e) { + try { + failEngine("translog trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } + throw new EngineException(shardId, "failed to roll translog", e); + } + } + + @Override + public final boolean shouldRollTranslogGeneration() { + return getTranslog().shouldRollGeneration(); + } + + @Override + public final void trimUnreferencedTranslogFiles() throws EngineException { + try (ReleasableLock lock = readLock.acquire()) { + ensureOpen(); + translog.trimUnreferencedReaders(); + } catch (AlreadyClosedException e) { + maybeFailEngine("trimUnreferencedTranslogFiles", e); + throw e; + } catch (Exception e) { + try { + failEngine("translog trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } + throw new EngineException(shardId, "failed to trim translog", e); + } + } + + protected final void refreshTranslogDeletionPolicy(TimeValue translogRetentionAge, ByteSizeValue translogRetentionSize) { + final TranslogDeletionPolicy translogDeletionPolicy = translog.getDeletionPolicy(); + translogDeletionPolicy.setRetentionAgeInMillis(translogRetentionAge.millis()); + translogDeletionPolicy.setRetentionSizeInBytes(translogRetentionSize.getBytes()); + } + + protected final Translog getTranslog() { + ensureOpen(); + return translog; + } + + protected final void addIndexOperationToTranslog(Index index, IndexResult indexResult) throws IOException { + Translog.Location location = null; + if (indexResult.getResultType() == Result.Type.SUCCESS) { + location = translog.add(new Translog.Index(index, indexResult)); + } else if (indexResult.getSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) { + // if we have document failure, record it as a no-op in the translog and Lucene with the generated seq_no + final NoOp noOp = new NoOp( + indexResult.getSeqNo(), + index.primaryTerm(), + index.origin(), + index.startTime(), + indexResult.getFailure().toString() + ); + location = noOp(noOp).getTranslogLocation(); + } + indexResult.setTranslogLocation(location); + } + + protected final void addDeleteOperationToTranslog(Delete delete, DeleteResult deleteResult) throws IOException { + if (deleteResult.getResultType() == Result.Type.SUCCESS) { + final Translog.Location location = translog.add(new Translog.Delete(delete, deleteResult)); + deleteResult.setTranslogLocation(location); + } + } + + private Translog openTranslog( + EngineConfig engineConfig, + TranslogDeletionPolicy translogDeletionPolicy, + LongSupplier globalCheckpointSupplier, + LongConsumer persistedSequenceNumberConsumer + ) throws IOException { + final TranslogConfig translogConfig = engineConfig.getTranslogConfig(); + store.incRef(); + try { + final Map userData = store.readLastCommittedSegmentsInfo().getUserData(); + final String translogUUID = Objects.requireNonNull(userData.get(Translog.TRANSLOG_UUID_KEY)); + // We expect that this shard already exists, so it must already have an existing translog else something is badly wrong! + return new Translog( + translogConfig, + translogUUID, + translogDeletionPolicy, + globalCheckpointSupplier, + engineConfig.getPrimaryTermSupplier(), + persistedSequenceNumberConsumer + ); + } finally { + store.decRef(); + } + } + + /** + * Fetch a reference to this Engine's {@link LocalCheckpointTracker} + * + * @return {@link LocalCheckpointTracker} + */ + protected abstract LocalCheckpointTracker getLocalCheckpointTracker(); + + /** + * Allow implementations to delete unused index files after a translog sync. + * + * @throws IOException - When there is an IO error deleting unused files from the index. + */ + protected abstract void deleteUnusedFiles() throws IOException; +} diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index e54d30c626812..04e89daf2e375 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -4037,7 +4037,7 @@ public void testCloseShardWhileResettingEngine() throws Exception { CountDownLatch closeDoneLatch = new CountDownLatch(1); IndexShard shard = newStartedShard(false, Settings.EMPTY, config -> new InternalEngine(config) { @Override - public InternalEngine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) + public Engine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) throws IOException { readyToCloseLatch.countDown(); try { @@ -4096,16 +4096,16 @@ public void testSnapshotWhileResettingEngine() throws Exception { CountDownLatch snapshotDoneLatch = new CountDownLatch(1); IndexShard shard = newStartedShard(false, Settings.EMPTY, config -> new InternalEngine(config) { @Override - public InternalEngine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) + public Engine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) throws IOException { - InternalEngine internalEngine = super.recoverFromTranslog(translogRecoveryRunner, recoverUpToSeqNo); + Engine engine = super.recoverFromTranslog(translogRecoveryRunner, recoverUpToSeqNo); readyToSnapshotLatch.countDown(); try { snapshotDoneLatch.await(); } catch (InterruptedException e) { throw new AssertionError(e); } - return internalEngine; + return engine; } }); From e77b6164293a182bda8e73e8e94dd05373dfd1a6 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Sun, 8 May 2022 19:22:07 -0700 Subject: [PATCH 03/16] Add a separate Engine implementation for replicas with segment replication enabled. This change adds a new engine intended to be used on replicas with segment replication enabled. This engine does not wire up an IndexWriter, but still writes all operations to a translog. The engine uses a new ReaderManager that refreshes from an externally provided SegmentInfos. Signed-off-by: Marc Handalian --- .../org/opensearch/index/engine/Engine.java | 6 + .../opensearch/index/engine/EngineConfig.java | 21 +- .../index/engine/EngineConfigFactory.java | 6 +- .../index/engine/InternalEngine.java | 18 + .../index/engine/NRTReplicationEngine.java | 331 ++++++++++++++++++ .../engine/NRTReplicationEngineFactory.java | 19 + .../engine/NRTReplicationReaderManager.java | 90 +++++ .../index/engine/ReadOnlyEngine.java | 5 + .../opensearch/index/shard/IndexShard.java | 3 +- .../opensearch/indices/IndicesService.java | 4 + .../engine/EngineConfigFactoryTests.java | 6 +- .../index/engine/InternalEngineTests.java | 9 +- .../engine/NRTReplicationEngineTest.java | 132 +++++++ .../index/shard/IndexShardTests.java | 35 +- .../index/shard/RefreshListenersTests.java | 3 +- .../IndexingMemoryControllerTests.java | 3 +- .../index/engine/EngineTestCase.java | 37 +- 17 files changed, 696 insertions(+), 32 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java create mode 100644 server/src/main/java/org/opensearch/index/engine/NRTReplicationEngineFactory.java create mode 100644 server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java create mode 100644 server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTest.java diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index 6dd067a1158f5..7d59c70429046 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -169,6 +169,12 @@ public final EngineConfig config() { protected abstract SegmentInfos getLastCommittedSegmentInfos(); + /** + * Return the latest active SegmentInfos from the engine. + * @return {@link SegmentInfos} + */ + protected abstract SegmentInfos getLatestSegmentInfos(); + public MergeStats getMergeStats() { return new MergeStats(); } diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 52dff2826657b..3f6d4568ca592 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -97,6 +97,7 @@ public final class EngineConfig { private final CircuitBreakerService circuitBreakerService; private final LongSupplier globalCheckpointSupplier; private final Supplier retentionLeasesSupplier; + private boolean isReadOnlyReplica; /** * A supplier of the outstanding retention leases. This is used during merged operations to determine which operations that have been @@ -171,7 +172,8 @@ public EngineConfig( LongSupplier globalCheckpointSupplier, Supplier retentionLeasesSupplier, LongSupplier primaryTermSupplier, - TombstoneDocSupplier tombstoneDocSupplier + TombstoneDocSupplier tombstoneDocSupplier, + boolean isReadOnlyReplica ) { this( shardId, @@ -196,7 +198,8 @@ public EngineConfig( globalCheckpointSupplier, retentionLeasesSupplier, primaryTermSupplier, - tombstoneDocSupplier + tombstoneDocSupplier, + isReadOnlyReplica ); } @@ -226,7 +229,8 @@ public EngineConfig( LongSupplier globalCheckpointSupplier, Supplier retentionLeasesSupplier, LongSupplier primaryTermSupplier, - TombstoneDocSupplier tombstoneDocSupplier + TombstoneDocSupplier tombstoneDocSupplier, + boolean isReadOnlyReplica ) { this.shardId = shardId; this.indexSettings = indexSettings; @@ -266,6 +270,7 @@ public EngineConfig( this.retentionLeasesSupplier = Objects.requireNonNull(retentionLeasesSupplier); this.primaryTermSupplier = primaryTermSupplier; this.tombstoneDocSupplier = tombstoneDocSupplier; + this.isReadOnlyReplica = isReadOnlyReplica; } /** @@ -460,6 +465,16 @@ public LongSupplier getPrimaryTermSupplier() { return primaryTermSupplier; } + /** + * Returns if this replica should be wired as a read only. + * This is used for Segment Replication where the engine implementation used is dependent on + * if the shard is a primary/replica. + * @return true if this engine should be wired as read only. + */ + public boolean isReadOnlyReplica() { + return isReadOnlyReplica; + } + /** * A supplier supplies tombstone documents which will be used in soft-update methods. * The returned document consists only _uid, _seqno, _term and _version fields; other metadata fields are excluded. diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java b/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java index afab57905a9a7..c8aec3570f8b5 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java @@ -146,7 +146,8 @@ public EngineConfig newEngineConfig( LongSupplier globalCheckpointSupplier, Supplier retentionLeasesSupplier, LongSupplier primaryTermSupplier, - EngineConfig.TombstoneDocSupplier tombstoneDocSupplier + EngineConfig.TombstoneDocSupplier tombstoneDocSupplier, + boolean isReadOnlyReplica ) { CodecService codecServiceToUse = codecService; if (codecService == null && this.codecServiceFactory != null) { @@ -176,7 +177,8 @@ public EngineConfig newEngineConfig( globalCheckpointSupplier, retentionLeasesSupplier, primaryTermSupplier, - tombstoneDocSupplier + tombstoneDocSupplier, + isReadOnlyReplica ); } diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index a111e27f0975f..400d76c3955bb 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -49,6 +49,7 @@ import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.ShuffleForcedMergePolicy; import org.apache.lucene.index.SoftDeletesRetentionMergePolicy; +import org.apache.lucene.index.StandardDirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.sandbox.index.MergeOnFlushMergePolicy; import org.apache.lucene.search.BooleanClause; @@ -2095,6 +2096,23 @@ protected SegmentInfos getLastCommittedSegmentInfos() { return lastCommittedSegmentInfos; } + @Override + public SegmentInfos getLatestSegmentInfos() { + OpenSearchDirectoryReader reader = null; + try { + reader = externalReaderManager.internalReaderManager.acquire(); + return ((StandardDirectoryReader) reader.getDelegate()).getSegmentInfos(); + } catch (IOException e) { + throw new EngineException(shardId, e.getMessage(), e); + } finally { + try { + externalReaderManager.internalReaderManager.release(reader); + } catch (IOException e) { + throw new EngineException(shardId, e.getMessage(), e); + } + } + } + @Override protected final void writerSegmentStats(SegmentsStats stats) { stats.addVersionMapMemoryInBytes(versionMap.ramBytesUsed()); diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java new file mode 100644 index 0000000000000..f2ea501c634b9 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -0,0 +1,331 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SoftDeletesDirectoryReaderWrapper; +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; +import org.opensearch.core.internal.io.IOUtils; +import org.opensearch.index.seqno.LocalCheckpointTracker; +import org.opensearch.index.seqno.SeqNoStats; +import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.index.translog.Translog; +import org.opensearch.search.suggest.completion.CompletionStats; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.function.BiFunction; + +/** + * This is an {@link Engine} implementation intended for replica shards when Segment Replication + * is enabled. This Engine does not create an IndexWriter, rather it refreshes a {@link NRTReplicationReaderManager} + * with new Segments when received from an external source. + * + * @opensearch.internal + */ +public class NRTReplicationEngine extends TranslogAwareEngine { + + private volatile SegmentInfos lastCommittedSegmentInfos; + private final NRTReplicationReaderManager readerManager; + private final CompletionStatsCache completionStatsCache; + private final LocalCheckpointTracker localCheckpointTracker; + + public NRTReplicationEngine(EngineConfig engineConfig) { + super(engineConfig); + store.incRef(); + try { + lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); + readerManager = new NRTReplicationReaderManager(OpenSearchDirectoryReader.wrap(getDirectoryReader(), shardId)); + final SequenceNumbers.CommitInfo commitInfo = SequenceNumbers.loadSeqNoInfoFromLuceneCommit(this.lastCommittedSegmentInfos.getUserData().entrySet()); + this.localCheckpointTracker = new LocalCheckpointTracker(commitInfo.maxSeqNo, commitInfo.localCheckpoint); + this.completionStatsCache = new CompletionStatsCache(() -> acquireSearcher("completion_stats")); + this.readerManager.addListener(completionStatsCache); + } catch (IOException e) { + IOUtils.closeWhileHandlingException(store::decRef, translog); + throw new EngineCreationFailureException(shardId, "failed to create engine", e); + } + } + + public synchronized void updateSegments(final SegmentInfos infos, long seqNo) + throws IOException { + try { + store.incRef(); + // Update the current infos reference on the Engine's reader. + readerManager.updateSegments(infos); + + // only update the persistedSeqNo and "lastCommitted" infos reference if the incoming segments have a higher + // generation. We can still refresh with incoming SegmentInfos that are not part of a commit point. + if (infos.getGeneration() > lastCommittedSegmentInfos.getGeneration()) { + this.lastCommittedSegmentInfos = infos; + localCheckpointTracker.fastForwardPersistedSeqNo(seqNo); + } + readerManager.maybeRefresh(); + } finally { + store.decRef(); + } + } + + @Override + public String getHistoryUUID() { + return loadHistoryUUID(lastCommittedSegmentInfos.userData); + } + + @Override + public long getWritingBytes() { + return 0; + } + + @Override + public CompletionStats completionStats(String... fieldNamePatterns) { + return completionStatsCache.get(fieldNamePatterns); + } + + @Override + public long getIndexThrottleTimeInMillis() { + return 0; + } + + @Override + public boolean isThrottled() { + return false; + } + + @Override + public IndexResult index(Index index) throws IOException { + IndexResult indexResult = new IndexResult( + index.version(), + index.primaryTerm(), + index.seqNo(), + false + ); + addIndexOperationToTranslog(index, indexResult); + indexResult.setTook(System.nanoTime() - index.startTime()); + indexResult.freeze(); + localCheckpointTracker.markSeqNoAsProcessed(indexResult.getSeqNo()); + return indexResult; + } + + @Override + public DeleteResult delete(Delete delete) throws IOException { + DeleteResult deleteResult = new DeleteResult( + delete.version(), + delete.primaryTerm(), + delete.seqNo(), + true + ); + addDeleteOperationToTranslog(delete, deleteResult); + deleteResult.setTook(System.nanoTime() - delete.startTime()); + deleteResult.freeze(); + localCheckpointTracker.markSeqNoAsProcessed(deleteResult.getSeqNo()); + return deleteResult; + } + + @Override + public NoOpResult noOp(NoOp noOp) throws IOException { + NoOpResult noOpResult = new NoOpResult(noOp.primaryTerm(), noOp.seqNo()); + final Translog.Location location = translog.add(new Translog.NoOp(noOp.seqNo(), noOp.primaryTerm(), noOp.reason())); + noOpResult.setTranslogLocation(location); + noOpResult.setTook(System.nanoTime() - noOp.startTime()); + noOpResult.freeze(); + localCheckpointTracker.markSeqNoAsProcessed(noOpResult.getSeqNo()); + return noOpResult; + } + + @Override + public GetResult get(Get get, BiFunction searcherFactory) throws EngineException { + return getFromSearcher(get, searcherFactory, SearcherScope.EXTERNAL); + } + + @Override + protected ReferenceManager getReferenceManager(SearcherScope scope) { + return readerManager; + } + + @Override + public Closeable acquireHistoryRetentionLock() { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public Translog.Snapshot newChangesSnapshot(String source, long fromSeqNo, long toSeqNo, boolean requiredFullRange, boolean accurateCount) throws IOException { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNumber) throws IOException { + return 0; + } + + + @Override + public boolean hasCompleteOperationHistory(String reason, long startingSeqNo) { + return false; + } + + @Override + public long getMinRetainedSeqNo() { + return localCheckpointTracker.getProcessedCheckpoint(); + } + + @Override + public long getPersistedLocalCheckpoint() { + return localCheckpointTracker.getPersistedCheckpoint(); + } + + public long getProcessedLocalCheckpoint() { + return localCheckpointTracker.getProcessedCheckpoint(); + } + + @Override + public SeqNoStats getSeqNoStats(long globalCheckpoint) { + return localCheckpointTracker.getStats(globalCheckpoint); + } + + @Override + protected LocalCheckpointTracker getLocalCheckpointTracker() { + return localCheckpointTracker; + } + + @Override + public long getIndexBufferRAMBytesUsed() { + return 0; + } + + @Override + public List segments(boolean verbose) { + return Arrays.asList(getSegmentInfo(lastCommittedSegmentInfos, verbose)); + } + + @Override + public void refresh(String source) throws EngineException {} + + @Override + public boolean maybeRefresh(String source) throws EngineException { + return false; + } + + @Override + public void writeIndexingBuffer() throws EngineException {} + + @Override + public boolean shouldPeriodicallyFlush() { + return false; + } + + @Override + public void flush(boolean force, boolean waitIfOngoing) throws EngineException {} + + + @Override + public void forceMerge(boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, boolean upgrade, boolean upgradeOnlyAncientSegments, String forceMergeUUID) throws EngineException, IOException {} + + @Override + public GatedCloseable acquireLastIndexCommit(boolean flushFirst) throws EngineException { + store.incRef(); + try { + final IndexCommit indexCommit = Lucene.getIndexCommit(lastCommittedSegmentInfos, store.directory()); + return new GatedCloseable<>(indexCommit, store::decRef); + } catch (IOException e) { + throw new EngineException(shardId, "Unable to build latest IndexCommit", e); + } + } + + @Override + public GatedCloseable acquireSafeIndexCommit() throws EngineException { + return acquireLastIndexCommit(false); + } + + @Override + public SafeCommitInfo getSafeCommitInfo() { + return new SafeCommitInfo(localCheckpointTracker.getProcessedCheckpoint(), lastCommittedSegmentInfos.totalMaxDoc()); + } + + @Override + protected final void closeNoLock(String reason, CountDownLatch closedLatch) { + if (isClosed.compareAndSet(false, true)) { + assert rwl.isWriteLockedByCurrentThread() || failEngineLock.isHeldByCurrentThread() + : "Either the write lock must be held or the engine must be currently be failing itself"; + try { + IOUtils.close(readerManager, translog, store::decRef); + } catch (Exception e) { + logger.warn("failed to close engine", e); + } finally { + try { + logger.debug("engine closed [{}]", reason); + } finally { + closedLatch.countDown(); + } + } + } + } + + @Override + protected void deleteUnusedFiles() {} + + @Override + public void activateThrottling() {} + + @Override + public void deactivateThrottling() {} + + @Override + public int fillSeqNoGaps(long primaryTerm) throws IOException { + return 0; + } + + @Override + public Engine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) throws IOException { + throw new UnsupportedOperationException("Read only replicas do not have an IndexWriter and cannot recover from a translog."); + } + + @Override + public void skipTranslogRecovery() { + // Do nothing. + } + + @Override + public void maybePruneDeletes() {} + + @Override + public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) {} + + @Override + public long getMaxSeqNoOfUpdatesOrDeletes() { + return localCheckpointTracker.getMaxSeqNo(); + } + + @Override + public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) {} + + @Override + protected SegmentInfos getLastCommittedSegmentInfos() { + return lastCommittedSegmentInfos; + } + + @Override + protected SegmentInfos getLatestSegmentInfos() { + return readerManager.getSegmentInfos(); + } + + private DirectoryReader getDirectoryReader() throws IOException { + // for segment replication: replicas should create the reader from store, we don't want an open IW on replicas. + return new SoftDeletesDirectoryReaderWrapper( + DirectoryReader.open(store.directory()), + Lucene.SOFT_DELETES_FIELD + ); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngineFactory.java new file mode 100644 index 0000000000000..ccbd04129b349 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngineFactory.java @@ -0,0 +1,19 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +public class NRTReplicationEngineFactory implements EngineFactory { + @Override + public Engine newReadWriteEngine(EngineConfig config) { + if (config.isReadOnlyReplica()) { + return new NRTReplicationEngine(config); + } + return new InternalEngine(config); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java new file mode 100644 index 0000000000000..a97f3b191d53c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SoftDeletesDirectoryReaderWrapper; +import org.apache.lucene.index.StandardDirectoryReader; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; +import org.opensearch.index.shard.ShardId; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * This is an extension of {@link OpenSearchReaderManager} for use with {@link NRTReplicationEngine}. + * The manager holds a reference to the latest {@link SegmentInfos} object that is used to refresh a reader. + * + * @opensearch.internal + */ +public class NRTReplicationReaderManager extends OpenSearchReaderManager { + + protected static Logger logger = LogManager.getLogger(NRTReplicationReaderManager.class); + private volatile SegmentInfos currentInfos; + + /** + * Creates and returns a new SegmentReplicationReaderManager from the given + * already-opened {@link OpenSearchDirectoryReader}, stealing + * the incoming reference. + * + * @param reader the SegmentReplicationReaderManager to use for future reopens + */ + NRTReplicationReaderManager(OpenSearchDirectoryReader reader) { + super(reader); + currentInfos = unwrapStandardReader(reader).getSegmentInfos(); + } + + @Override + protected OpenSearchDirectoryReader refreshIfNeeded(OpenSearchDirectoryReader referenceToRefresh) throws IOException { + Objects.requireNonNull(referenceToRefresh); + final List subs = new ArrayList<>(); + final StandardDirectoryReader standardDirectoryReader = unwrapStandardReader(referenceToRefresh); + for (LeafReaderContext ctx : standardDirectoryReader.leaves()) { + subs.add(ctx.reader()); + } + DirectoryReader innerReader = StandardDirectoryReader.open(referenceToRefresh.directory(), currentInfos, subs, null); + final DirectoryReader softDeletesDirectoryReaderWrapper = new SoftDeletesDirectoryReaderWrapper( + innerReader, + Lucene.SOFT_DELETES_FIELD + ); + logger.trace("updated to SegmentInfosVersion=" + currentInfos.getVersion() + " reader=" + innerReader); + return OpenSearchDirectoryReader.wrap(softDeletesDirectoryReaderWrapper, referenceToRefresh.shardId()); + } + + /** + * Update this reader's segments and refresh. + * + * @param infos {@link SegmentInfos} infos + * @throws IOException - When Refresh fails with an IOException. + */ + public synchronized void updateSegments(SegmentInfos infos) throws IOException { + currentInfos = infos; + maybeRefresh(); + } + + public SegmentInfos getSegmentInfos() { + return currentInfos; + } + + private StandardDirectoryReader unwrapStandardReader(OpenSearchDirectoryReader reader) { + final DirectoryReader delegate = reader.getDelegate(); + if (delegate instanceof SoftDeletesDirectoryReaderWrapper) { + return (StandardDirectoryReader) ((SoftDeletesDirectoryReaderWrapper) delegate).getDelegate(); + } + return (StandardDirectoryReader) delegate; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java b/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java index 2e3155a4d173e..23a86d8da5599 100644 --- a/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java @@ -270,6 +270,11 @@ protected SegmentInfos getLastCommittedSegmentInfos() { return lastCommittedSegmentInfos; } + @Override + protected SegmentInfos getLatestSegmentInfos() { + return lastCommittedSegmentInfos; + } + @Override public String getHistoryUUID() { return lastCommittedSegmentInfos.userData.get(Engine.HISTORY_UUID_KEY); diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index d8fa560a4a812..64355b2f2b064 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -3122,7 +3122,8 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { globalCheckpointSupplier, replicationTracker::getRetentionLeases, () -> getOperationPrimaryTerm(), - tombstoneDocSupplier() + tombstoneDocSupplier(), + indexSettings.isSegRepEnabled() && shardRouting.primary() == false ); } diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 87cd63119f1d4..48bf27126924a 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -109,6 +109,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.InternalEngineFactory; +import org.opensearch.index.engine.NRTReplicationEngineFactory; import org.opensearch.index.engine.NoOpEngine; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.flush.FlushStats; @@ -762,6 +763,9 @@ private EngineFactory getEngineFactory(final IndexSettings idxSettings) { .filter(maybe -> Objects.requireNonNull(maybe).isPresent()) .collect(Collectors.toList()); if (engineFactories.isEmpty()) { + if (idxSettings.isSegRepEnabled()) { + return new NRTReplicationEngineFactory(); + } return new InternalEngineFactory(); } else if (engineFactories.size() == 1) { assert engineFactories.get(0).isPresent(); diff --git a/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java b/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java index 8030619500278..7ddd92ea7b36e 100644 --- a/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java @@ -65,7 +65,8 @@ public void testCreateEngineConfigFromFactory() { null, () -> new RetentionLeases(0, 0, Collections.emptyList()), null, - null + null, + false ); assertNotNull(config.getCodec()); @@ -141,7 +142,8 @@ public void testCreateCodecServiceFromFactory() { null, () -> new RetentionLeases(0, 0, Collections.emptyList()), null, - null + null, + false ); assertNotNull(config.getCodec()); } diff --git a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java index cbae55a047a1e..597bdeb7d69d1 100644 --- a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java @@ -3608,7 +3608,8 @@ public void testRecoverFromForeignTranslog() throws IOException { () -> UNASSIGNED_SEQ_NO, () -> RetentionLeases.EMPTY, primaryTerm::get, - tombstoneDocSupplier() + tombstoneDocSupplier(), + false ); expectThrows(EngineCreationFailureException.class, () -> new InternalEngine(brokenConfig)); @@ -3652,7 +3653,8 @@ public CustomTranslogDeletionPolicy(IndexSettings indexSettings, Supplier operations = generateHistoryOnReplica(between(1, 500), randomBoolean(), randomBoolean(), randomBoolean()); + for (Engine.Operation op : operations) { + applyOperation(engine, op); + applyOperation(nrtEngine, op); + } + + assertEquals(nrtEngine.getProcessedLocalCheckpoint(), engine.getProcessedLocalCheckpoint()); + + // we don't index into nrtEngine, so get the doc ids from the regular engine. + final List docs = getDocIds(engine, true); + + // recover a new engine from the nrtEngine's xlog. + nrtEngine.syncTranslog(); + try (InternalEngine engine = new InternalEngine(nrtEngine.config())) { + engine.recoverFromTranslog(translogHandler, Long.MAX_VALUE); + assertEquals(getDocIds(engine, true), docs); + } + assertEngineCleanedUp(nrtEngine, nrtEngine.getTranslog()); + } + } + + public void testUpdateSegments() throws Exception { + final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); + + try (final Store nrtEngineStore = createStore(); + final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore);) { + // add docs to the primary engine. + List operations = generateHistoryOnReplica(between(1, 500), randomBoolean(), randomBoolean(), randomBoolean()).stream().filter(op -> op.operationType().equals(Engine.Operation.TYPE.INDEX)).collect(Collectors.toList()); + for (Engine.Operation op : operations) { + applyOperation(engine, op); + applyOperation(nrtEngine, op); + } + assertEquals(nrtEngine.getProcessedLocalCheckpoint(), engine.getProcessedLocalCheckpoint()); + + engine.refresh("test"); + + nrtEngine.updateSegments(engine.getLatestSegmentInfos(), engine.getProcessedLocalCheckpoint()); + assertMatchingSegmentsAndCheckpoints(nrtEngine); + + // Flush the primary and update the NRTEngine with the latest committed infos. + engine.flush(); + nrtEngine.updateSegments(engine.getLastCommittedSegmentInfos(), engine.getPersistedLocalCheckpoint()); + assertMatchingSegmentsAndCheckpoints(nrtEngine); + + // Ensure the same hit count between engines. + int expectedDocCount; + try (final Engine.Searcher test = engine.acquireSearcher("test")) { + expectedDocCount = test.count(Queries.newMatchAllQuery()); + assertSearcherHits(nrtEngine, expectedDocCount); + } + assertEngineCleanedUp(nrtEngine, nrtEngine.getTranslog()); + } + } + + private void assertMatchingSegmentsAndCheckpoints(NRTReplicationEngine nrtEngine) { + assertEquals(engine.getPersistedLocalCheckpoint(), nrtEngine.getPersistedLocalCheckpoint()); + assertEquals(engine.getProcessedLocalCheckpoint(), nrtEngine.getProcessedLocalCheckpoint()); + assertEquals(engine.getLocalCheckpointTracker().getMaxSeqNo(), nrtEngine.getLocalCheckpointTracker().getMaxSeqNo()); + assertEquals(engine.segments(true), nrtEngine.segments(true)); + } + + private void assertSearcherHits(Engine engine, int hits) { + try (final Engine.Searcher test = engine.acquireSearcher("test")) { + MatcherAssert.assertThat(test, EngineSearcherTotalHitsMatcher.engineSearcherTotalHits(hits)); + } + } + + private NRTReplicationEngine buildNrtReplicaEngine(AtomicLong globalCheckpoint, Store store) throws IOException { + Lucene.cleanLuceneIndex(store.directory()); + final Path translogDir = createTempDir(); + final EngineConfig replicaConfig = config(defaultSettings, store, translogDir, NoMergePolicy.INSTANCE, null, null, globalCheckpoint::get); + if (Lucene.indexExists(store.directory()) == false) { + store.createEmpty(replicaConfig.getIndexSettings().getIndexVersionCreated().luceneVersion); + final String translogUuid = Translog.createEmptyTranslog( + replicaConfig.getTranslogConfig().getTranslogPath(), + SequenceNumbers.NO_OPS_PERFORMED, + shardId, + primaryTerm.get() + ); + store.associateIndexWithNewTranslog(translogUuid); + } + return new NRTReplicationEngine(replicaConfig); + } +} diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 04e89daf2e375..b06c8dfae5828 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -100,6 +100,8 @@ import org.opensearch.index.engine.EngineTestCase; import org.opensearch.index.engine.InternalEngine; import org.opensearch.index.engine.InternalEngineFactory; +import org.opensearch.index.engine.NRTReplicationEngineFactory; +import org.opensearch.index.engine.NRTReplicationEngine; import org.opensearch.index.engine.ReadOnlyEngine; import org.opensearch.index.fielddata.FieldDataStats; import org.opensearch.index.fielddata.IndexFieldData; @@ -134,6 +136,7 @@ import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.repositories.IndexId; import org.opensearch.snapshots.Snapshot; import org.opensearch.snapshots.SnapshotId; @@ -4037,7 +4040,7 @@ public void testCloseShardWhileResettingEngine() throws Exception { CountDownLatch closeDoneLatch = new CountDownLatch(1); IndexShard shard = newStartedShard(false, Settings.EMPTY, config -> new InternalEngine(config) { @Override - public Engine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) + public InternalEngine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) throws IOException { readyToCloseLatch.countDown(); try { @@ -4096,9 +4099,9 @@ public void testSnapshotWhileResettingEngine() throws Exception { CountDownLatch snapshotDoneLatch = new CountDownLatch(1); IndexShard shard = newStartedShard(false, Settings.EMPTY, config -> new InternalEngine(config) { @Override - public Engine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) + public InternalEngine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) throws IOException { - Engine engine = super.recoverFromTranslog(translogRecoveryRunner, recoverUpToSeqNo); + InternalEngine engine = super.recoverFromTranslog(translogRecoveryRunner, recoverUpToSeqNo); readyToSnapshotLatch.countDown(); try { snapshotDoneLatch.await(); @@ -4378,6 +4381,29 @@ protected void ensureMaxSeqNoEqualsToGlobalCheckpoint(SeqNoStats seqNoStats) { closeShards(readonlyShard); } + public void testReadOnlyReplicaEngineConfig() throws IOException { + Settings primarySettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + final IndexShard primaryShard = newStartedShard(false, primarySettings, new NRTReplicationEngineFactory()); + assertFalse(primaryShard.getEngine().config().isReadOnlyReplica()); + assertEquals(primaryShard.getEngine().getClass(), InternalEngine.class); + + Settings replicaSettings = Settings.builder() + .put(primarySettings) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + final IndexShard replicaShard = newStartedShard(false, replicaSettings, new NRTReplicationEngineFactory()); + assertTrue(replicaShard.getEngine().config().isReadOnlyReplica()); + assertEquals(replicaShard.getEngine().getClass(), NRTReplicationEngine.class); + + + closeShards(primaryShard, replicaShard); + } + + public void testCloseShardWhileEngineIsWarming() throws Exception { CountDownLatch warmerStarted = new CountDownLatch(1); CountDownLatch warmerBlocking = new CountDownLatch(1); @@ -4413,7 +4439,8 @@ public void testCloseShardWhileEngineIsWarming() throws Exception { config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier() + config.getTombstoneDocSupplier(), + false ); return new InternalEngine(configWithWarmer); }); diff --git a/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java b/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java index eea316d9a9370..13c5a3bdf0465 100644 --- a/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java @@ -170,7 +170,8 @@ public void onFailedEngine(String reason, @Nullable Exception e) { () -> SequenceNumbers.NO_OPS_PERFORMED, () -> RetentionLeases.EMPTY, () -> primaryTerm, - EngineTestCase.tombstoneDocSupplier() + EngineTestCase.tombstoneDocSupplier(), + false ); engine = new InternalEngine(config); engine.recoverFromTranslog((e, s) -> 0, Long.MAX_VALUE); diff --git a/server/src/test/java/org/opensearch/indices/IndexingMemoryControllerTests.java b/server/src/test/java/org/opensearch/indices/IndexingMemoryControllerTests.java index c68ad7eaba82e..b44707035a2f6 100644 --- a/server/src/test/java/org/opensearch/indices/IndexingMemoryControllerTests.java +++ b/server/src/test/java/org/opensearch/indices/IndexingMemoryControllerTests.java @@ -422,7 +422,8 @@ EngineConfig configWithRefreshListener(EngineConfig config, ReferenceManager.Ref config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier() + config.getTombstoneDocSupplier(), + false ); } diff --git a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java index 2bce5a7c81794..10988ea1afe5b 100644 --- a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java @@ -264,7 +264,8 @@ public EngineConfig copy(EngineConfig config, LongSupplier globalCheckpointSuppl globalCheckpointSupplier, config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - tombstoneDocSupplier() + tombstoneDocSupplier(), + false ); } @@ -291,7 +292,8 @@ public EngineConfig copy(EngineConfig config, Analyzer analyzer) { config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier() + config.getTombstoneDocSupplier(), + false ); } @@ -318,7 +320,8 @@ public EngineConfig copy(EngineConfig config, MergePolicy mergePolicy) { config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier() + config.getTombstoneDocSupplier(), + false ); } @@ -328,24 +331,26 @@ public void tearDown() throws Exception { super.tearDown(); try { if (engine != null && engine.isClosed.get() == false) { - engine.getTranslog().getDeletionPolicy().assertNoOpenTranslogRefs(); - assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine); - assertNoInFlightDocuments(engine); - assertMaxSeqNoInCommitUserData(engine); - assertAtMostOneLuceneDocumentPerSequenceNumber(engine); + assertEngineCleanedUp(engine, engine.getTranslog()); } if (replicaEngine != null && replicaEngine.isClosed.get() == false) { - replicaEngine.getTranslog().getDeletionPolicy().assertNoOpenTranslogRefs(); - assertConsistentHistoryBetweenTranslogAndLuceneIndex(replicaEngine); - assertNoInFlightDocuments(replicaEngine); - assertMaxSeqNoInCommitUserData(replicaEngine); - assertAtMostOneLuceneDocumentPerSequenceNumber(replicaEngine); + assertEngineCleanedUp(replicaEngine, replicaEngine.getTranslog()); } } finally { IOUtils.close(replicaEngine, storeReplica, engine, store, () -> terminate(threadPool)); } } + protected void assertEngineCleanedUp(Engine engine, Translog translog) throws Exception { + if (engine.isClosed.get() == false) { + translog.getDeletionPolicy().assertNoOpenTranslogRefs(); + assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine); + assertNoInFlightDocuments(engine); + assertMaxSeqNoInCommitUserData(engine); + assertAtMostOneLuceneDocumentPerSequenceNumber(engine); + } + } + protected static ParseContext.Document testDocumentWithTextField() { return testDocumentWithTextField("test"); } @@ -871,7 +876,8 @@ public EngineConfig config( globalCheckpointSupplier, retentionLeasesSupplier, primaryTerm, - tombstoneDocSupplier() + tombstoneDocSupplier(), + false ); } @@ -911,7 +917,8 @@ protected EngineConfig config( config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - tombstoneDocSupplier + tombstoneDocSupplier, + false ); } From 22e7dde724f0d30c57d35796cd9af0d4d97bb085 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Sun, 8 May 2022 21:46:58 -0700 Subject: [PATCH 04/16] Fix spotless checks. Signed-off-by: Marc Handalian --- .../index/engine/InternalEngine.java | 4 +- .../index/engine/NRTReplicationEngine.java | 49 +++++++++---------- .../engine/NRTReplicationReaderManager.java | 1 - .../index/engine/TranslogAwareEngine.java | 13 +++-- .../engine/NRTReplicationEngineTest.java | 42 +++++++++++----- .../index/shard/IndexShardTests.java | 2 - 6 files changed, 65 insertions(+), 46 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index 400d76c3955bb..d49aa3efa1852 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -514,7 +514,8 @@ private void recoverFromTranslogInternal(TranslogRecoveryRunner translogRecovery flush(false, true); translog.trimUnreferencedReaders(); } - // Package private for testing purposes only + + // Package private for testing purposes only boolean hasSnapshottedCommits() { return combinedDeletionPolicy.hasSnapshottedCommits(); } @@ -543,7 +544,6 @@ public long getWritingBytes() { return indexWriter.getFlushingBytes() + versionMap.getRefreshingBytes(); } - private ExternalReaderManager createReaderManager(RefreshWarmerListener externalRefreshListener) throws EngineException { boolean success = false; OpenSearchReaderManager internalReaderManager = null; diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index f2ea501c634b9..b8c06ccd9e7e6 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -50,25 +50,26 @@ public NRTReplicationEngine(EngineConfig engineConfig) { try { lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); readerManager = new NRTReplicationReaderManager(OpenSearchDirectoryReader.wrap(getDirectoryReader(), shardId)); - final SequenceNumbers.CommitInfo commitInfo = SequenceNumbers.loadSeqNoInfoFromLuceneCommit(this.lastCommittedSegmentInfos.getUserData().entrySet()); + final SequenceNumbers.CommitInfo commitInfo = SequenceNumbers.loadSeqNoInfoFromLuceneCommit( + this.lastCommittedSegmentInfos.getUserData().entrySet() + ); this.localCheckpointTracker = new LocalCheckpointTracker(commitInfo.maxSeqNo, commitInfo.localCheckpoint); this.completionStatsCache = new CompletionStatsCache(() -> acquireSearcher("completion_stats")); this.readerManager.addListener(completionStatsCache); } catch (IOException e) { - IOUtils.closeWhileHandlingException(store::decRef, translog); + IOUtils.closeWhileHandlingException(store::decRef, translog); throw new EngineCreationFailureException(shardId, "failed to create engine", e); } } - public synchronized void updateSegments(final SegmentInfos infos, long seqNo) - throws IOException { + public synchronized void updateSegments(final SegmentInfos infos, long seqNo) throws IOException { try { store.incRef(); // Update the current infos reference on the Engine's reader. readerManager.updateSegments(infos); // only update the persistedSeqNo and "lastCommitted" infos reference if the incoming segments have a higher - // generation. We can still refresh with incoming SegmentInfos that are not part of a commit point. + // generation. We can still refresh with incoming SegmentInfos that are not part of a commit point. if (infos.getGeneration() > lastCommittedSegmentInfos.getGeneration()) { this.lastCommittedSegmentInfos = infos; localCheckpointTracker.fastForwardPersistedSeqNo(seqNo); @@ -106,12 +107,7 @@ public boolean isThrottled() { @Override public IndexResult index(Index index) throws IOException { - IndexResult indexResult = new IndexResult( - index.version(), - index.primaryTerm(), - index.seqNo(), - false - ); + IndexResult indexResult = new IndexResult(index.version(), index.primaryTerm(), index.seqNo(), false); addIndexOperationToTranslog(index, indexResult); indexResult.setTook(System.nanoTime() - index.startTime()); indexResult.freeze(); @@ -121,12 +117,7 @@ public IndexResult index(Index index) throws IOException { @Override public DeleteResult delete(Delete delete) throws IOException { - DeleteResult deleteResult = new DeleteResult( - delete.version(), - delete.primaryTerm(), - delete.seqNo(), - true - ); + DeleteResult deleteResult = new DeleteResult(delete.version(), delete.primaryTerm(), delete.seqNo(), true); addDeleteOperationToTranslog(delete, deleteResult); deleteResult.setTook(System.nanoTime() - delete.startTime()); deleteResult.freeze(); @@ -161,7 +152,13 @@ public Closeable acquireHistoryRetentionLock() { } @Override - public Translog.Snapshot newChangesSnapshot(String source, long fromSeqNo, long toSeqNo, boolean requiredFullRange, boolean accurateCount) throws IOException { + public Translog.Snapshot newChangesSnapshot( + String source, + long fromSeqNo, + long toSeqNo, + boolean requiredFullRange, + boolean accurateCount + ) throws IOException { throw new UnsupportedOperationException("Not implemented"); } @@ -170,7 +167,6 @@ public int countNumberOfHistoryOperations(String source, long fromSeqNo, long to return 0; } - @Override public boolean hasCompleteOperationHistory(String reason, long startingSeqNo) { return false; @@ -229,9 +225,15 @@ public boolean shouldPeriodicallyFlush() { @Override public void flush(boolean force, boolean waitIfOngoing) throws EngineException {} - @Override - public void forceMerge(boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, boolean upgrade, boolean upgradeOnlyAncientSegments, String forceMergeUUID) throws EngineException, IOException {} + public void forceMerge( + boolean flush, + int maxNumSegments, + boolean onlyExpungeDeletes, + boolean upgrade, + boolean upgradeOnlyAncientSegments, + String forceMergeUUID + ) throws EngineException, IOException {} @Override public GatedCloseable acquireLastIndexCommit(boolean flushFirst) throws EngineException { @@ -323,9 +325,6 @@ protected SegmentInfos getLatestSegmentInfos() { private DirectoryReader getDirectoryReader() throws IOException { // for segment replication: replicas should create the reader from store, we don't want an open IW on replicas. - return new SoftDeletesDirectoryReaderWrapper( - DirectoryReader.open(store.directory()), - Lucene.SOFT_DELETES_FIELD - ); + return new SoftDeletesDirectoryReaderWrapper(DirectoryReader.open(store.directory()), Lucene.SOFT_DELETES_FIELD); } } diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java index a97f3b191d53c..e8bbc28cf077b 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java @@ -18,7 +18,6 @@ import org.apache.lucene.index.StandardDirectoryReader; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; -import org.opensearch.index.shard.ShardId; import java.io.IOException; import java.util.ArrayList; diff --git a/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java b/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java index 9f3c5d2c13b08..4de0185b0e217 100644 --- a/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java @@ -47,11 +47,14 @@ protected TranslogAwareEngine(EngineConfig engineConfig) { customTranslogDeletionPolicy = engineConfig.getCustomTranslogDeletionPolicyFactory() .create(engineConfig.getIndexSettings(), engineConfig.retentionLeasesSupplier()); } - translogDeletionPolicy = Objects.requireNonNullElseGet(customTranslogDeletionPolicy, () -> new DefaultTranslogDeletionPolicy( - engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(), - engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis(), - engineConfig.getIndexSettings().getTranslogRetentionTotalFiles() - )); + translogDeletionPolicy = Objects.requireNonNullElseGet( + customTranslogDeletionPolicy, + () -> new DefaultTranslogDeletionPolicy( + engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(), + engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis(), + engineConfig.getIndexSettings().getTranslogRetentionTotalFiles() + ) + ); try { store.trimUnsafeCommits(engineConfig.getTranslogConfig().getTranslogPath()); translog = openTranslog(engineConfig, translogDeletionPolicy, engineConfig.getGlobalCheckpointSupplier(), seqNo -> { diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTest.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTest.java index 3e74faa84249a..df9ca935658a0 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTest.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTest.java @@ -10,11 +10,9 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.SegmentInfos; -import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.hamcrest.MatcherAssert; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.search.Queries; -import org.opensearch.core.internal.io.IOUtils; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.store.Store; import org.opensearch.index.translog.Translog; @@ -29,8 +27,10 @@ public class NRTReplicationEngineTest extends EngineTestCase { public void testCreateEngine() throws IOException { final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); - try (final Store nrtEngineStore = createStore(); - final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore);) { + try ( + final Store nrtEngineStore = createStore(); + final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore); + ) { final SegmentInfos latestSegmentInfos = nrtEngine.getLatestSegmentInfos(); final SegmentInfos lastCommittedSegmentInfos = nrtEngine.getLastCommittedSegmentInfos(); assertEquals(latestSegmentInfos.version, lastCommittedSegmentInfos.version); @@ -44,9 +44,16 @@ public void testCreateEngine() throws IOException { public void testEngineWritesOpsToTranslog() throws Exception { final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); - try (final Store nrtEngineStore = createStore(); - final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore);) { - List operations = generateHistoryOnReplica(between(1, 500), randomBoolean(), randomBoolean(), randomBoolean()); + try ( + final Store nrtEngineStore = createStore(); + final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore); + ) { + List operations = generateHistoryOnReplica( + between(1, 500), + randomBoolean(), + randomBoolean(), + randomBoolean() + ); for (Engine.Operation op : operations) { applyOperation(engine, op); applyOperation(nrtEngine, op); @@ -70,10 +77,15 @@ public void testEngineWritesOpsToTranslog() throws Exception { public void testUpdateSegments() throws Exception { final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); - try (final Store nrtEngineStore = createStore(); - final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore);) { + try ( + final Store nrtEngineStore = createStore(); + final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore); + ) { // add docs to the primary engine. - List operations = generateHistoryOnReplica(between(1, 500), randomBoolean(), randomBoolean(), randomBoolean()).stream().filter(op -> op.operationType().equals(Engine.Operation.TYPE.INDEX)).collect(Collectors.toList()); + List operations = generateHistoryOnReplica(between(1, 500), randomBoolean(), randomBoolean(), randomBoolean()) + .stream() + .filter(op -> op.operationType().equals(Engine.Operation.TYPE.INDEX)) + .collect(Collectors.toList()); for (Engine.Operation op : operations) { applyOperation(engine, op); applyOperation(nrtEngine, op); @@ -116,7 +128,15 @@ private void assertSearcherHits(Engine engine, int hits) { private NRTReplicationEngine buildNrtReplicaEngine(AtomicLong globalCheckpoint, Store store) throws IOException { Lucene.cleanLuceneIndex(store.directory()); final Path translogDir = createTempDir(); - final EngineConfig replicaConfig = config(defaultSettings, store, translogDir, NoMergePolicy.INSTANCE, null, null, globalCheckpoint::get); + final EngineConfig replicaConfig = config( + defaultSettings, + store, + translogDir, + NoMergePolicy.INSTANCE, + null, + null, + globalCheckpoint::get + ); if (Lucene.indexExists(store.directory()) == false) { store.createEmpty(replicaConfig.getIndexSettings().getIndexVersionCreated().luceneVersion); final String translogUuid = Translog.createEmptyTranslog( diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index b06c8dfae5828..50f303fda5a64 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -4399,11 +4399,9 @@ public void testReadOnlyReplicaEngineConfig() throws IOException { assertTrue(replicaShard.getEngine().config().isReadOnlyReplica()); assertEquals(replicaShard.getEngine().getClass(), NRTReplicationEngine.class); - closeShards(primaryShard, replicaShard); } - public void testCloseShardWhileEngineIsWarming() throws Exception { CountDownLatch warmerStarted = new CountDownLatch(1); CountDownLatch warmerBlocking = new CountDownLatch(1); From daacef4db12dbf27fe054435beaabbf63642960a Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Sun, 8 May 2022 21:56:19 -0700 Subject: [PATCH 05/16] Fix :server:compileInternalClusterTestJava compilation. Signed-off-by: Marc Handalian --- .../org/opensearch/indices/IndexingMemoryControllerIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndexingMemoryControllerIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndexingMemoryControllerIT.java index 0d3c685ab0327..54912d5308797 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/IndexingMemoryControllerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndexingMemoryControllerIT.java @@ -103,7 +103,8 @@ EngineConfig engineConfigWithLargerIndexingMemory(EngineConfig config) { config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier() + config.getTombstoneDocSupplier(), + config.isReadOnlyReplica() ); } From 3d3cf48a427e5e3c15ab5bd0e92699d309f5dae3 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Sun, 8 May 2022 22:05:06 -0700 Subject: [PATCH 06/16] Fix failing test naming convention check. Signed-off-by: Marc Handalian --- ...eplicationEngineTest.java => NRTReplicationEngineTests.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename server/src/test/java/org/opensearch/index/engine/{NRTReplicationEngineTest.java => NRTReplicationEngineTests.java} (99%) diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTest.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java similarity index 99% rename from server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTest.java rename to server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index df9ca935658a0..5823286cdac32 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTest.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; -public class NRTReplicationEngineTest extends EngineTestCase { +public class NRTReplicationEngineTests extends EngineTestCase { public void testCreateEngine() throws IOException { final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); From 8fddec8403ec0bd8f5f7086f6f7fbc4954376ff8 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Tue, 10 May 2022 17:34:21 -0700 Subject: [PATCH 07/16] PR feedback. - Removed isReadOnlyReplica from overloaded constructor and added feature flag checks. - Updated log msg in NRTReplicationReaderManager - cleaned up store ref counting in NRTReplicationEngine. Signed-off-by: Marc Handalian --- .../indices/IndexingMemoryControllerIT.java | 3 +- .../opensearch/index/engine/EngineConfig.java | 67 +++++++++++++++++-- .../index/engine/NRTReplicationEngine.java | 57 +++++++++------- .../engine/NRTReplicationReaderManager.java | 3 +- .../index/engine/InternalEngineTests.java | 9 +-- .../engine/NRTReplicationEngineTests.java | 9 +++ .../index/shard/IndexShardTests.java | 3 +- .../index/shard/RefreshListenersTests.java | 3 +- .../IndexingMemoryControllerTests.java | 3 +- .../index/engine/EngineTestCase.java | 15 ++--- 10 files changed, 115 insertions(+), 57 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndexingMemoryControllerIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndexingMemoryControllerIT.java index 54912d5308797..0d3c685ab0327 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/IndexingMemoryControllerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndexingMemoryControllerIT.java @@ -103,8 +103,7 @@ EngineConfig engineConfigWithLargerIndexingMemory(EngineConfig config) { config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier(), - config.isReadOnlyReplica() + config.getTombstoneDocSupplier() ); } diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 3f6d4568ca592..3ec7cc790f50b 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -97,7 +97,7 @@ public final class EngineConfig { private final CircuitBreakerService circuitBreakerService; private final LongSupplier globalCheckpointSupplier; private final Supplier retentionLeasesSupplier; - private boolean isReadOnlyReplica; + private final boolean isReadOnlyReplica; /** * A supplier of the outstanding retention leases. This is used during merged operations to determine which operations that have been @@ -172,8 +172,7 @@ public EngineConfig( LongSupplier globalCheckpointSupplier, Supplier retentionLeasesSupplier, LongSupplier primaryTermSupplier, - TombstoneDocSupplier tombstoneDocSupplier, - boolean isReadOnlyReplica + TombstoneDocSupplier tombstoneDocSupplier ) { this( shardId, @@ -198,11 +197,64 @@ public EngineConfig( globalCheckpointSupplier, retentionLeasesSupplier, primaryTermSupplier, - tombstoneDocSupplier, - isReadOnlyReplica + tombstoneDocSupplier ); } + /** + * Creates a new {@link org.opensearch.index.engine.EngineConfig} + */ + EngineConfig( + ShardId shardId, + ThreadPool threadPool, + IndexSettings indexSettings, + Engine.Warmer warmer, + Store store, + MergePolicy mergePolicy, + Analyzer analyzer, + Similarity similarity, + CodecService codecService, + Engine.EventListener eventListener, + QueryCache queryCache, + QueryCachingPolicy queryCachingPolicy, + TranslogConfig translogConfig, + TranslogDeletionPolicyFactory translogDeletionPolicyFactory, + TimeValue flushMergesAfter, + List externalRefreshListener, + List internalRefreshListener, + Sort indexSort, + CircuitBreakerService circuitBreakerService, + LongSupplier globalCheckpointSupplier, + Supplier retentionLeasesSupplier, + LongSupplier primaryTermSupplier, + TombstoneDocSupplier tombstoneDocSupplier + ) { + this(shardId, + threadPool, + indexSettings, + warmer, + store, + mergePolicy, + analyzer, + similarity, + codecService, + eventListener, + queryCache, + queryCachingPolicy, + translogConfig, + translogDeletionPolicyFactory, + flushMergesAfter, + externalRefreshListener, + internalRefreshListener, + indexSort, + circuitBreakerService, + globalCheckpointSupplier, + retentionLeasesSupplier, + primaryTermSupplier, + tombstoneDocSupplier, + false); + } + /** * Creates a new {@link org.opensearch.index.engine.EngineConfig} */ @@ -232,6 +284,9 @@ public EngineConfig( TombstoneDocSupplier tombstoneDocSupplier, boolean isReadOnlyReplica ) { + if (isReadOnlyReplica && indexSettings.isSegRepEnabled() == false) { + throw new IllegalArgumentException("Shard can only be wired as a read only replica with Segment Replication enabled"); + } this.shardId = shardId; this.indexSettings = indexSettings; this.threadPool = threadPool; @@ -472,7 +527,7 @@ public LongSupplier getPrimaryTermSupplier() { * @return true if this engine should be wired as read only. */ public boolean isReadOnlyReplica() { - return isReadOnlyReplica; + return indexSettings.isSegRepEnabled() && isReadOnlyReplica; } /** diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index b8c06ccd9e7e6..1a0df68756a4e 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -63,21 +63,16 @@ public NRTReplicationEngine(EngineConfig engineConfig) { } public synchronized void updateSegments(final SegmentInfos infos, long seqNo) throws IOException { - try { - store.incRef(); - // Update the current infos reference on the Engine's reader. - readerManager.updateSegments(infos); - - // only update the persistedSeqNo and "lastCommitted" infos reference if the incoming segments have a higher - // generation. We can still refresh with incoming SegmentInfos that are not part of a commit point. - if (infos.getGeneration() > lastCommittedSegmentInfos.getGeneration()) { - this.lastCommittedSegmentInfos = infos; - localCheckpointTracker.fastForwardPersistedSeqNo(seqNo); - } - readerManager.maybeRefresh(); - } finally { - store.decRef(); + // Update the current infos reference on the Engine's reader. + readerManager.updateSegments(infos); + + // only update the persistedSeqNo and "lastCommitted" infos reference if the incoming segments have a higher + // generation. We can still refresh with incoming SegmentInfos that are not part of a commit point. + if (infos.getGeneration() > lastCommittedSegmentInfos.getGeneration()) { + this.lastCommittedSegmentInfos = infos; + localCheckpointTracker.fastForwardPersistedSeqNo(seqNo); } + readerManager.maybeRefresh(); } @Override @@ -207,7 +202,8 @@ public List segments(boolean verbose) { } @Override - public void refresh(String source) throws EngineException {} + public void refresh(String source) throws EngineException { + } @Override public boolean maybeRefresh(String source) throws EngineException { @@ -215,7 +211,8 @@ public boolean maybeRefresh(String source) throws EngineException { } @Override - public void writeIndexingBuffer() throws EngineException {} + public void writeIndexingBuffer() throws EngineException { + } @Override public boolean shouldPeriodicallyFlush() { @@ -223,7 +220,8 @@ public boolean shouldPeriodicallyFlush() { } @Override - public void flush(boolean force, boolean waitIfOngoing) throws EngineException {} + public void flush(boolean force, boolean waitIfOngoing) throws EngineException { + } @Override public void forceMerge( @@ -233,14 +231,15 @@ public void forceMerge( boolean upgrade, boolean upgradeOnlyAncientSegments, String forceMergeUUID - ) throws EngineException, IOException {} + ) throws EngineException, IOException { + } @Override public GatedCloseable acquireLastIndexCommit(boolean flushFirst) throws EngineException { - store.incRef(); try { final IndexCommit indexCommit = Lucene.getIndexCommit(lastCommittedSegmentInfos, store.directory()); - return new GatedCloseable<>(indexCommit, store::decRef); + return new GatedCloseable<>(indexCommit, () -> { + }); } catch (IOException e) { throw new EngineException(shardId, "Unable to build latest IndexCommit", e); } @@ -276,13 +275,16 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) { } @Override - protected void deleteUnusedFiles() {} + protected void deleteUnusedFiles() { + } @Override - public void activateThrottling() {} + public void activateThrottling() { + } @Override - public void deactivateThrottling() {} + public void deactivateThrottling() { + } @Override public int fillSeqNoGaps(long primaryTerm) throws IOException { @@ -300,10 +302,12 @@ public void skipTranslogRecovery() { } @Override - public void maybePruneDeletes() {} + public void maybePruneDeletes() { + } @Override - public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) {} + public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) { + } @Override public long getMaxSeqNoOfUpdatesOrDeletes() { @@ -311,7 +315,8 @@ public long getMaxSeqNoOfUpdatesOrDeletes() { } @Override - public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) {} + public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) { + } @Override protected SegmentInfos getLastCommittedSegmentInfos() { diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java index e8bbc28cf077b..d1c7d43d3487a 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -60,7 +61,7 @@ protected OpenSearchDirectoryReader refreshIfNeeded(OpenSearchDirectoryReader re innerReader, Lucene.SOFT_DELETES_FIELD ); - logger.trace("updated to SegmentInfosVersion=" + currentInfos.getVersion() + " reader=" + innerReader); + logger.trace(() -> new ParameterizedMessage("updated to SegmentInfosVersion=" + currentInfos.getVersion() + " reader=" + innerReader)); return OpenSearchDirectoryReader.wrap(softDeletesDirectoryReaderWrapper, referenceToRefresh.shardId()); } diff --git a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java index 597bdeb7d69d1..cbae55a047a1e 100644 --- a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java @@ -3608,8 +3608,7 @@ public void testRecoverFromForeignTranslog() throws IOException { () -> UNASSIGNED_SEQ_NO, () -> RetentionLeases.EMPTY, primaryTerm::get, - tombstoneDocSupplier(), - false + tombstoneDocSupplier() ); expectThrows(EngineCreationFailureException.class, () -> new InternalEngine(brokenConfig)); @@ -3653,8 +3652,7 @@ public CustomTranslogDeletionPolicy(IndexSettings indexSettings, Supplier indexCommitGatedCloseable = nrtEngine.acquireLastIndexCommit(false)) { + final IndexCommit indexCommit = indexCommitGatedCloseable.get(); + assertEquals(indexCommit.getUserData(), lastCommittedSegmentInfos.getUserData()); + assertTrue(indexCommit.getFileNames().containsAll(lastCommittedSegmentInfos.files(true))); + } } } diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 50f303fda5a64..47a37e5edaa0c 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -4437,8 +4437,7 @@ public void testCloseShardWhileEngineIsWarming() throws Exception { config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier(), - false + config.getTombstoneDocSupplier() ); return new InternalEngine(configWithWarmer); }); diff --git a/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java b/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java index 13c5a3bdf0465..eea316d9a9370 100644 --- a/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java @@ -170,8 +170,7 @@ public void onFailedEngine(String reason, @Nullable Exception e) { () -> SequenceNumbers.NO_OPS_PERFORMED, () -> RetentionLeases.EMPTY, () -> primaryTerm, - EngineTestCase.tombstoneDocSupplier(), - false + EngineTestCase.tombstoneDocSupplier() ); engine = new InternalEngine(config); engine.recoverFromTranslog((e, s) -> 0, Long.MAX_VALUE); diff --git a/server/src/test/java/org/opensearch/indices/IndexingMemoryControllerTests.java b/server/src/test/java/org/opensearch/indices/IndexingMemoryControllerTests.java index b44707035a2f6..c68ad7eaba82e 100644 --- a/server/src/test/java/org/opensearch/indices/IndexingMemoryControllerTests.java +++ b/server/src/test/java/org/opensearch/indices/IndexingMemoryControllerTests.java @@ -422,8 +422,7 @@ EngineConfig configWithRefreshListener(EngineConfig config, ReferenceManager.Ref config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier(), - false + config.getTombstoneDocSupplier() ); } diff --git a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java index 10988ea1afe5b..66c697d83510b 100644 --- a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java @@ -264,8 +264,7 @@ public EngineConfig copy(EngineConfig config, LongSupplier globalCheckpointSuppl globalCheckpointSupplier, config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - tombstoneDocSupplier(), - false + tombstoneDocSupplier() ); } @@ -292,8 +291,7 @@ public EngineConfig copy(EngineConfig config, Analyzer analyzer) { config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier(), - false + config.getTombstoneDocSupplier() ); } @@ -320,8 +318,7 @@ public EngineConfig copy(EngineConfig config, MergePolicy mergePolicy) { config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - config.getTombstoneDocSupplier(), - false + config.getTombstoneDocSupplier() ); } @@ -876,8 +873,7 @@ public EngineConfig config( globalCheckpointSupplier, retentionLeasesSupplier, primaryTerm, - tombstoneDocSupplier(), - false + tombstoneDocSupplier() ); } @@ -917,8 +913,7 @@ protected EngineConfig config( config.getGlobalCheckpointSupplier(), config.retentionLeasesSupplier(), config.getPrimaryTermSupplier(), - tombstoneDocSupplier, - false + tombstoneDocSupplier ); } From 576f61dd6cc2922a9cab13bf7550b584869415cb Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Tue, 10 May 2022 17:39:58 -0700 Subject: [PATCH 08/16] Fix spotless check. Signed-off-by: Marc Handalian --- .../opensearch/index/engine/EngineConfig.java | 8 +++-- .../index/engine/NRTReplicationEngine.java | 33 +++++++------------ .../engine/NRTReplicationReaderManager.java | 4 ++- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 3ec7cc790f50b..c7719d265de38 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -228,8 +228,9 @@ public EngineConfig( Supplier retentionLeasesSupplier, LongSupplier primaryTermSupplier, TombstoneDocSupplier tombstoneDocSupplier - ) { - this(shardId, + ) { + this( + shardId, threadPool, indexSettings, warmer, @@ -252,7 +253,8 @@ public EngineConfig( retentionLeasesSupplier, primaryTermSupplier, tombstoneDocSupplier, - false); + false + ); } /** diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 1a0df68756a4e..6b430ef4a3612 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -202,8 +202,7 @@ public List segments(boolean verbose) { } @Override - public void refresh(String source) throws EngineException { - } + public void refresh(String source) throws EngineException {} @Override public boolean maybeRefresh(String source) throws EngineException { @@ -211,8 +210,7 @@ public boolean maybeRefresh(String source) throws EngineException { } @Override - public void writeIndexingBuffer() throws EngineException { - } + public void writeIndexingBuffer() throws EngineException {} @Override public boolean shouldPeriodicallyFlush() { @@ -220,8 +218,7 @@ public boolean shouldPeriodicallyFlush() { } @Override - public void flush(boolean force, boolean waitIfOngoing) throws EngineException { - } + public void flush(boolean force, boolean waitIfOngoing) throws EngineException {} @Override public void forceMerge( @@ -231,15 +228,13 @@ public void forceMerge( boolean upgrade, boolean upgradeOnlyAncientSegments, String forceMergeUUID - ) throws EngineException, IOException { - } + ) throws EngineException, IOException {} @Override public GatedCloseable acquireLastIndexCommit(boolean flushFirst) throws EngineException { try { final IndexCommit indexCommit = Lucene.getIndexCommit(lastCommittedSegmentInfos, store.directory()); - return new GatedCloseable<>(indexCommit, () -> { - }); + return new GatedCloseable<>(indexCommit, () -> {}); } catch (IOException e) { throw new EngineException(shardId, "Unable to build latest IndexCommit", e); } @@ -275,16 +270,13 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) { } @Override - protected void deleteUnusedFiles() { - } + protected void deleteUnusedFiles() {} @Override - public void activateThrottling() { - } + public void activateThrottling() {} @Override - public void deactivateThrottling() { - } + public void deactivateThrottling() {} @Override public int fillSeqNoGaps(long primaryTerm) throws IOException { @@ -302,12 +294,10 @@ public void skipTranslogRecovery() { } @Override - public void maybePruneDeletes() { - } + public void maybePruneDeletes() {} @Override - public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) { - } + public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) {} @Override public long getMaxSeqNoOfUpdatesOrDeletes() { @@ -315,8 +305,7 @@ public long getMaxSeqNoOfUpdatesOrDeletes() { } @Override - public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) { - } + public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) {} @Override protected SegmentInfos getLastCommittedSegmentInfos() { diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java index d1c7d43d3487a..9c78e763bee43 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java @@ -61,7 +61,9 @@ protected OpenSearchDirectoryReader refreshIfNeeded(OpenSearchDirectoryReader re innerReader, Lucene.SOFT_DELETES_FIELD ); - logger.trace(() -> new ParameterizedMessage("updated to SegmentInfosVersion=" + currentInfos.getVersion() + " reader=" + innerReader)); + logger.trace( + () -> new ParameterizedMessage("updated to SegmentInfosVersion=" + currentInfos.getVersion() + " reader=" + innerReader) + ); return OpenSearchDirectoryReader.wrap(softDeletesDirectoryReaderWrapper, referenceToRefresh.shardId()); } From e0d97423facd70e79ec32ba81ff584b2cb7cc12d Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 12 May 2022 09:56:53 -0700 Subject: [PATCH 09/16] Remove TranslogAwareEngine and build translog in NRTReplicationEngine. Signed-off-by: Marc Handalian --- .../index/engine/InternalEngine.java | 358 +++++++++++++----- .../index/engine/NRTReplicationEngine.java | 183 +++++++-- .../index/engine/TranslogAwareEngine.java | 257 ------------- .../index/seqno/LocalCheckpointTracker.java | 8 +- .../engine/NRTReplicationEngineTests.java | 12 +- .../seqno/LocalCheckpointTrackerTests.java | 44 +-- 6 files changed, 466 insertions(+), 396 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index d49aa3efa1852..ed0a959b48fbc 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -105,7 +105,12 @@ import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.OpenSearchMergePolicy; import org.opensearch.index.shard.ShardId; +import org.opensearch.index.translog.DefaultTranslogDeletionPolicy; import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogConfig; +import org.opensearch.index.translog.TranslogCorruptedException; +import org.opensearch.index.translog.TranslogDeletionPolicy; +import org.opensearch.index.translog.TranslogStats; import org.opensearch.search.suggest.completion.CompletionStats; import org.opensearch.threadpool.ThreadPool; @@ -127,6 +132,8 @@ import java.util.concurrent.locks.ReentrantLock; import java.util.function.BiConsumer; import java.util.function.BiFunction; +import java.util.function.LongConsumer; +import java.util.function.LongSupplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -135,12 +142,14 @@ * * @opensearch.internal */ -public class InternalEngine extends TranslogAwareEngine { +public class InternalEngine extends Engine { /** * When we last pruned expired tombstones from versionMap.deletes: */ private volatile long lastDeleteVersionPruneTimeMSec; + + private final Translog translog; private final OpenSearchConcurrentMergeScheduler mergeScheduler; private final IndexWriter indexWriter; @@ -221,8 +230,24 @@ public InternalEngine(EngineConfig engineConfig) { if (engineConfig.isAutoGeneratedIDsOptimizationEnabled() == false) { updateAutoIdTimestamp(Long.MAX_VALUE, true); } + final TranslogDeletionPolicy translogDeletionPolicy; + TranslogDeletionPolicy customTranslogDeletionPolicy = null; + if (engineConfig.getCustomTranslogDeletionPolicyFactory() != null) { + customTranslogDeletionPolicy = engineConfig.getCustomTranslogDeletionPolicyFactory() + .create(engineConfig.getIndexSettings(), engineConfig.retentionLeasesSupplier()); + } + if (customTranslogDeletionPolicy != null) { + translogDeletionPolicy = customTranslogDeletionPolicy; + } else { + translogDeletionPolicy = new DefaultTranslogDeletionPolicy( + engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(), + engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis(), + engineConfig.getIndexSettings().getTranslogRetentionTotalFiles() + ); + } store.incRef(); IndexWriter writer = null; + Translog translog = null; ExternalReaderManager externalReaderManager = null; OpenSearchReaderManager internalReaderManager = null; EngineMergeScheduler scheduler = null; @@ -232,12 +257,22 @@ public InternalEngine(EngineConfig engineConfig) { mergeScheduler = scheduler = new EngineMergeScheduler(engineConfig.getShardId(), engineConfig.getIndexSettings()); throttle = new IndexThrottle(); try { + store.trimUnsafeCommits(engineConfig.getTranslogConfig().getTranslogPath()); + translog = openTranslog(engineConfig, translogDeletionPolicy, engineConfig.getGlobalCheckpointSupplier(), seqNo -> { + final LocalCheckpointTracker tracker = getLocalCheckpointTracker(); + assert tracker != null || getTranslog().isOpen() == false; + if (tracker != null) { + tracker.markSeqNoAsPersisted(seqNo); + } + }); + assert translog.getGeneration() != null; + this.translog = translog; this.softDeletesPolicy = newSoftDeletesPolicy(); this.combinedDeletionPolicy = new CombinedDeletionPolicy( logger, translogDeletionPolicy, softDeletesPolicy, - this::getLastSyncedGlobalCheckpoint + translog::getLastSyncedGlobalCheckpoint ); this.localCheckpointTracker = createLocalCheckpointTracker(localCheckpointTrackerSupplier); writer = createWriter(); @@ -246,7 +281,7 @@ public InternalEngine(EngineConfig engineConfig) { historyUUID = loadHistoryUUID(commitData); forceMergeUUID = commitData.get(FORCE_MERGE_UUID_KEY); indexWriter = writer; - } catch (IOException e) { + } catch (IOException | TranslogCorruptedException e) { throw new EngineCreationFailureException(shardId, "failed to create engine", e); } catch (AssertionError e) { // IndexWriter throws AssertionError on init, if asserts are enabled, if any files don't exist, but tests that @@ -323,7 +358,7 @@ private SoftDeletesPolicy newSoftDeletesPolicy() throws IOException { lastMinRetainedSeqNo = Long.parseLong(commitUserData.get(SequenceNumbers.MAX_SEQ_NO)) + 1; } return new SoftDeletesPolicy( - this::getLastSyncedGlobalCheckpoint, + translog::getLastSyncedGlobalCheckpoint, lastMinRetainedSeqNo, engineConfig.getIndexSettings().getSoftDeleteRetentionOperations(), engineConfig.retentionLeasesSupplier() @@ -420,6 +455,17 @@ final boolean assertSearcherIsWarmedUp(String source, SearcherScope scope) { return true; } + @Override + public int restoreLocalHistoryFromTranslog(TranslogRecoveryRunner translogRecoveryRunner) throws IOException { + try (ReleasableLock ignored = readLock.acquire()) { + ensureOpen(); + final long localCheckpoint = localCheckpointTracker.getProcessedCheckpoint(); + try (Translog.Snapshot snapshot = getTranslog().newSnapshot(localCheckpoint + 1, Long.MAX_VALUE)) { + return translogRecoveryRunner.run(this, snapshot); + } + } + } + @Override public int fillSeqNoGaps(long primaryTerm) throws IOException { try (ReleasableLock ignored = writeLock.acquire()) { @@ -441,10 +487,10 @@ public int fillSeqNoGaps(long primaryTerm) throws IOException { syncTranslog(); // to persist noops associated with the advancement of the local checkpoint assert localCheckpointTracker.getPersistedCheckpoint() == maxSeqNo : "persisted local checkpoint did not advance to max seq no; is [" - + localCheckpointTracker.getPersistedCheckpoint() - + "], max seq no [" - + maxSeqNo - + "]"; + + localCheckpointTracker.getPersistedCheckpoint() + + "], max seq no [" + + maxSeqNo + + "]"; return numNoOpsAdded; } } @@ -515,16 +561,73 @@ private void recoverFromTranslogInternal(TranslogRecoveryRunner translogRecovery translog.trimUnreferencedReaders(); } + private Translog openTranslog( + EngineConfig engineConfig, + TranslogDeletionPolicy translogDeletionPolicy, + LongSupplier globalCheckpointSupplier, + LongConsumer persistedSequenceNumberConsumer + ) throws IOException { + + final TranslogConfig translogConfig = engineConfig.getTranslogConfig(); + final Map userData = store.readLastCommittedSegmentsInfo().getUserData(); + final String translogUUID = Objects.requireNonNull(userData.get(Translog.TRANSLOG_UUID_KEY)); + // We expect that this shard already exists, so it must already have an existing translog else something is badly wrong! + return new Translog( + translogConfig, + translogUUID, + translogDeletionPolicy, + globalCheckpointSupplier, + engineConfig.getPrimaryTermSupplier(), + persistedSequenceNumberConsumer + ); + } + + // Package private for testing purposes only + Translog getTranslog() { + ensureOpen(); + return translog; + } + // Package private for testing purposes only boolean hasSnapshottedCommits() { return combinedDeletionPolicy.hasSnapshottedCommits(); } @Override - protected void deleteUnusedFiles() throws IOException { + public boolean isTranslogSyncNeeded() { + return getTranslog().syncNeeded(); + } + + @Override + public boolean ensureTranslogSynced(Stream locations) throws IOException { + final boolean synced = translog.ensureSynced(locations); + if (synced) { + revisitIndexDeletionPolicyOnTranslogSynced(); + } + return synced; + } + + @Override + public void syncTranslog() throws IOException { + translog.sync(); + revisitIndexDeletionPolicyOnTranslogSynced(); + } + + @Override + public TranslogStats getTranslogStats() { + return getTranslog().stats(); + } + + @Override + public Translog.Location getTranslogLastWriteLocation() { + return getTranslog().getLastWriteLocation(); + } + + private void revisitIndexDeletionPolicyOnTranslogSynced() throws IOException { if (combinedDeletionPolicy.hasUnreferencedCommits()) { indexWriter.deleteUnusedFiles(); } + translog.trimUnreferencedReaders(); } @Override @@ -738,8 +841,8 @@ private VersionValue resolveDocVersion(final Operation op, boolean loadSeqNo) th } else if (engineConfig.isEnableGcDeletes() && versionValue.isDelete() && (engineConfig.getThreadPool().relativeTimeInMillis() - ((DeleteVersionValue) versionValue).time) > getGcDeletesInMillis()) { - versionValue = null; - } + versionValue = null; + } return versionValue; } @@ -916,7 +1019,23 @@ public IndexResult index(Index index) throws IOException { } } if (index.origin().isFromTranslog() == false) { - addIndexOperationToTranslog(index, indexResult); + final Translog.Location location; + if (indexResult.getResultType() == Result.Type.SUCCESS) { + location = translog.add(new Translog.Index(index, indexResult)); + } else if (indexResult.getSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) { + // if we have document failure, record it as a no-op in the translog and Lucene with the generated seq_no + final NoOp noOp = new NoOp( + indexResult.getSeqNo(), + index.primaryTerm(), + index.origin(), + index.startTime(), + indexResult.getFailure().toString() + ); + location = innerNoOp(noOp).getTranslogLocation(); + } else { + location = null; + } + indexResult.setTranslogLocation(location); } if (plan.indexIntoLucene && indexResult.getResultType() == Result.Type.SUCCESS) { final Translog.Location translogLocation = trackTranslogLocation.get() ? indexResult.getTranslogLocation() : null; @@ -1034,35 +1153,35 @@ private IndexingStrategy planIndexingAsPrimary(Index index) throws IOException { plan = IndexingStrategy.skipDueToVersionConflict(e, true, currentVersion); } else if (index.getIfSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO && (versionValue.seqNo != index.getIfSeqNo() || versionValue.term != index.getIfPrimaryTerm())) { - final VersionConflictEngineException e = new VersionConflictEngineException( - shardId, - index.id(), - index.getIfSeqNo(), - index.getIfPrimaryTerm(), - versionValue.seqNo, - versionValue.term - ); - plan = IndexingStrategy.skipDueToVersionConflict(e, currentNotFoundOrDeleted, currentVersion); - } else if (index.versionType().isVersionConflictForWrites(currentVersion, index.version(), currentNotFoundOrDeleted)) { - final VersionConflictEngineException e = new VersionConflictEngineException( - shardId, - index, - currentVersion, - currentNotFoundOrDeleted - ); - plan = IndexingStrategy.skipDueToVersionConflict(e, currentNotFoundOrDeleted, currentVersion); + final VersionConflictEngineException e = new VersionConflictEngineException( + shardId, + index.id(), + index.getIfSeqNo(), + index.getIfPrimaryTerm(), + versionValue.seqNo, + versionValue.term + ); + plan = IndexingStrategy.skipDueToVersionConflict(e, currentNotFoundOrDeleted, currentVersion); + } else if (index.versionType().isVersionConflictForWrites(currentVersion, index.version(), currentNotFoundOrDeleted)) { + final VersionConflictEngineException e = new VersionConflictEngineException( + shardId, + index, + currentVersion, + currentNotFoundOrDeleted + ); + plan = IndexingStrategy.skipDueToVersionConflict(e, currentNotFoundOrDeleted, currentVersion); + } else { + final Exception reserveError = tryAcquireInFlightDocs(index, reservingDocs); + if (reserveError != null) { + plan = IndexingStrategy.failAsTooManyDocs(reserveError); } else { - final Exception reserveError = tryAcquireInFlightDocs(index, reservingDocs); - if (reserveError != null) { - plan = IndexingStrategy.failAsTooManyDocs(reserveError); - } else { - plan = IndexingStrategy.processNormally( - currentNotFoundOrDeleted, - canOptimizeAddDocument ? 1L : index.versionType().updateVersion(currentVersion, index.version()), - reservingDocs - ); - } + plan = IndexingStrategy.processNormally( + currentNotFoundOrDeleted, + canOptimizeAddDocument ? 1L : index.versionType().updateVersion(currentVersion, index.version()), + reservingDocs + ); } + } } return plan; } @@ -1188,10 +1307,10 @@ private IndexingStrategy( : "use lucene update is set to true, but we're not indexing into lucene"; assert (indexIntoLucene && earlyResultOnPreFlightError != null) == false : "can only index into lucene or have a preflight result but not both." - + "indexIntoLucene: " - + indexIntoLucene - + " earlyResultOnPreFlightError:" - + earlyResultOnPreFlightError; + + "indexIntoLucene: " + + indexIntoLucene + + " earlyResultOnPreFlightError:" + + earlyResultOnPreFlightError; assert reservedDocs == 0 || indexIntoLucene || addStaleOpToLucene : reservedDocs; this.currentNotFoundOrDeleted = currentNotFoundOrDeleted; this.useLuceneUpdateDocument = useLuceneUpdateDocument; @@ -1337,7 +1456,8 @@ public DeleteResult delete(Delete delete) throws IOException { } } if (delete.origin().isFromTranslog() == false && deleteResult.getResultType() == Result.Type.SUCCESS) { - addDeleteOperationToTranslog(delete, deleteResult); + final Translog.Location location = translog.add(new Translog.Delete(delete, deleteResult)); + deleteResult.setTranslogLocation(location); } localCheckpointTracker.markSeqNoAsProcessed(deleteResult.getSeqNo()); if (deleteResult.getTranslogLocation() == null) { @@ -1448,32 +1568,32 @@ private DeletionStrategy planDeletionAsPrimary(Delete delete) throws IOException plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, true); } else if (delete.getIfSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO && (versionValue.seqNo != delete.getIfSeqNo() || versionValue.term != delete.getIfPrimaryTerm())) { - final VersionConflictEngineException e = new VersionConflictEngineException( - shardId, - delete.id(), - delete.getIfSeqNo(), - delete.getIfPrimaryTerm(), - versionValue.seqNo, - versionValue.term - ); - plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, currentlyDeleted); - } else if (delete.versionType().isVersionConflictForWrites(currentVersion, delete.version(), currentlyDeleted)) { - final VersionConflictEngineException e = new VersionConflictEngineException( - shardId, - delete, - currentVersion, - currentlyDeleted - ); - plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, currentlyDeleted); + final VersionConflictEngineException e = new VersionConflictEngineException( + shardId, + delete.id(), + delete.getIfSeqNo(), + delete.getIfPrimaryTerm(), + versionValue.seqNo, + versionValue.term + ); + plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, currentlyDeleted); + } else if (delete.versionType().isVersionConflictForWrites(currentVersion, delete.version(), currentlyDeleted)) { + final VersionConflictEngineException e = new VersionConflictEngineException( + shardId, + delete, + currentVersion, + currentlyDeleted + ); + plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, currentlyDeleted); + } else { + final Exception reserveError = tryAcquireInFlightDocs(delete, 1); + if (reserveError != null) { + plan = DeletionStrategy.failAsTooManyDocs(reserveError); } else { - final Exception reserveError = tryAcquireInFlightDocs(delete, 1); - if (reserveError != null) { - plan = DeletionStrategy.failAsTooManyDocs(reserveError); - } else { - final long versionOfDeletion = delete.versionType().updateVersion(currentVersion, delete.version()); - plan = DeletionStrategy.processNormally(currentlyDeleted, versionOfDeletion, 1); - } + final long versionOfDeletion = delete.versionType().updateVersion(currentVersion, delete.version()); + plan = DeletionStrategy.processNormally(currentlyDeleted, versionOfDeletion, 1); } + } return plan; } @@ -1533,10 +1653,10 @@ private DeletionStrategy( ) { assert (deleteFromLucene && earlyResultOnPreflightError != null) == false : "can only delete from lucene or have a preflight result but not both." - + "deleteFromLucene: " - + deleteFromLucene - + " earlyResultOnPreFlightError:" - + earlyResultOnPreflightError; + + "deleteFromLucene: " + + deleteFromLucene + + " earlyResultOnPreFlightError:" + + earlyResultOnPreflightError; this.deleteFromLucene = deleteFromLucene; this.addStaleOpToLucene = addStaleOpToLucene; this.currentlyDeleted = currentlyDeleted; @@ -1820,8 +1940,8 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { || force || shouldPeriodicallyFlush || getProcessedLocalCheckpoint() > Long.parseLong( - lastCommittedSegmentInfos.userData.get(SequenceNumbers.LOCAL_CHECKPOINT_KEY) - )) { + lastCommittedSegmentInfos.userData.get(SequenceNumbers.LOCAL_CHECKPOINT_KEY) + )) { ensureCanFlush(); try { translog.rollGeneration(); @@ -1890,6 +2010,66 @@ private void refreshLastCommittedSegmentInfos() { } } + @Override + public void rollTranslogGeneration() throws EngineException { + try (ReleasableLock ignored = readLock.acquire()) { + ensureOpen(); + translog.rollGeneration(); + translog.trimUnreferencedReaders(); + } catch (AlreadyClosedException e) { + failOnTragicEvent(e); + throw e; + } catch (Exception e) { + try { + failEngine("translog trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } + throw new EngineException(shardId, "failed to roll translog", e); + } + } + + @Override + public void trimUnreferencedTranslogFiles() throws EngineException { + try (ReleasableLock lock = readLock.acquire()) { + ensureOpen(); + translog.trimUnreferencedReaders(); + } catch (AlreadyClosedException e) { + failOnTragicEvent(e); + throw e; + } catch (Exception e) { + try { + failEngine("translog trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } + throw new EngineException(shardId, "failed to trim translog", e); + } + } + + @Override + public boolean shouldRollTranslogGeneration() { + return getTranslog().shouldRollGeneration(); + } + + @Override + public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException { + try (ReleasableLock lock = readLock.acquire()) { + ensureOpen(); + translog.trimOperations(belowTerm, aboveSeqNo); + } catch (AlreadyClosedException e) { + failOnTragicEvent(e); + throw e; + } catch (Exception e) { + try { + failEngine("translog operations trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } + throw new EngineException(shardId, "failed to trim translog operations", e); + } + } + private void pruneDeletedTombstones() { /* * We need to deploy two different trimming strategies for GC deletes on primary and replicas. Delete operations on primary @@ -2082,12 +2262,12 @@ protected boolean maybeFailEngine(String source, Exception e) { return failOnTragicEvent((AlreadyClosedException) e); } else if (e != null && ((indexWriter.isOpen() == false && indexWriter.getTragicException() == e) - || (translog.isOpen() == false && translog.getTragicException() == e))) { - // this spot on - we are handling the tragic event exception here so we have to fail the engine - // right away - failEngine(source, e); - return true; - } + || (translog.isOpen() == false && translog.getTragicException() == e))) { + // this spot on - we are handling the tragic event exception here so we have to fail the engine + // right away + failEngine(source, e); + return true; + } return false; } @@ -2509,7 +2689,9 @@ public void onSettingsChanged(TimeValue translogRetentionAge, ByteSizeValue tran // the setting will be re-interpreted if it's set to true updateAutoIdTimestamp(Long.MAX_VALUE, true); } - refreshTranslogDeletionPolicy(translogRetentionAge, translogRetentionSize); + final TranslogDeletionPolicy translogDeletionPolicy = translog.getDeletionPolicy(); + translogDeletionPolicy.setRetentionAgeInMillis(translogRetentionAge.millis()); + translogDeletionPolicy.setRetentionSizeInBytes(translogRetentionSize.getBytes()); softDeletesPolicy.setRetentionOperations(softDeletesRetentionOps); } @@ -2517,11 +2699,15 @@ public MergeStats getMergeStats() { return mergeScheduler.stats(); } - @Override - protected LocalCheckpointTracker getLocalCheckpointTracker() { + LocalCheckpointTracker getLocalCheckpointTracker() { return localCheckpointTracker; } + @Override + public long getLastSyncedGlobalCheckpoint() { + return getTranslog().getLastSyncedGlobalCheckpoint(); + } + public long getProcessedLocalCheckpoint() { return localCheckpointTracker.getProcessedCheckpoint(); } @@ -2816,9 +3002,9 @@ private void restoreVersionMapAndCheckpointTracker(DirectoryReader directoryRead final IndexSearcher searcher = new IndexSearcher(directoryReader); searcher.setQueryCache(null); final Query query = new BooleanQuery.Builder().add( - LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, getPersistedLocalCheckpoint() + 1, Long.MAX_VALUE), - BooleanClause.Occur.MUST - ) + LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, getPersistedLocalCheckpoint() + 1, Long.MAX_VALUE), + BooleanClause.Occur.MUST + ) // exclude non-root nested documents .add(Queries.newNonNestedFilter(), BooleanClause.Occur.MUST) .build(); diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 6b430ef4a3612..4d751a6dda57b 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -20,15 +20,24 @@ import org.opensearch.index.seqno.LocalCheckpointTracker; import org.opensearch.index.seqno.SeqNoStats; import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.index.translog.DefaultTranslogDeletionPolicy; import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogConfig; +import org.opensearch.index.translog.TranslogDeletionPolicy; +import org.opensearch.index.translog.TranslogStats; import org.opensearch.search.suggest.completion.CompletionStats; import java.io.Closeable; import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.concurrent.CountDownLatch; import java.util.function.BiFunction; +import java.util.function.LongConsumer; +import java.util.function.LongSupplier; +import java.util.stream.Stream; /** * This is an {@link Engine} implementation intended for replica shards when Segment Replication @@ -37,16 +46,18 @@ * * @opensearch.internal */ -public class NRTReplicationEngine extends TranslogAwareEngine { +public class NRTReplicationEngine extends Engine { private volatile SegmentInfos lastCommittedSegmentInfos; private final NRTReplicationReaderManager readerManager; private final CompletionStatsCache completionStatsCache; private final LocalCheckpointTracker localCheckpointTracker; + private final Translog translog; public NRTReplicationEngine(EngineConfig engineConfig) { super(engineConfig); store.incRef(); + NRTReplicationReaderManager readerManager = null; try { lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); readerManager = new NRTReplicationReaderManager(OpenSearchDirectoryReader.wrap(getDirectoryReader(), shardId)); @@ -55,9 +66,14 @@ public NRTReplicationEngine(EngineConfig engineConfig) { ); this.localCheckpointTracker = new LocalCheckpointTracker(commitInfo.maxSeqNo, commitInfo.localCheckpoint); this.completionStatsCache = new CompletionStatsCache(() -> acquireSearcher("completion_stats")); + this.readerManager = readerManager; this.readerManager.addListener(completionStatsCache); + this.translog = openTranslog(engineConfig, + getTranslogDeletionPolicy(engineConfig), + engineConfig.getGlobalCheckpointSupplier(), + localCheckpointTracker::markSeqNoAsPersisted); } catch (IOException e) { - IOUtils.closeWhileHandlingException(store::decRef, translog); + IOUtils.closeWhileHandlingException(store::decRef, readerManager); throw new EngineCreationFailureException(shardId, "failed to create engine", e); } } @@ -70,8 +86,8 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th // generation. We can still refresh with incoming SegmentInfos that are not part of a commit point. if (infos.getGeneration() > lastCommittedSegmentInfos.getGeneration()) { this.lastCommittedSegmentInfos = infos; - localCheckpointTracker.fastForwardPersistedSeqNo(seqNo); } + localCheckpointTracker.fastForwardProcessedSeqNo(seqNo); readerManager.maybeRefresh(); } @@ -100,23 +116,35 @@ public boolean isThrottled() { return false; } + @Override + public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException { + ensureOpen(); + try { + translog.trimOperations(belowTerm, aboveSeqNo); + } catch (IOException e) { + throw new EngineException(shardId, "failed to trim translog operations", e); + } + } + @Override public IndexResult index(Index index) throws IOException { IndexResult indexResult = new IndexResult(index.version(), index.primaryTerm(), index.seqNo(), false); - addIndexOperationToTranslog(index, indexResult); + final Translog.Location location = translog.add(new Translog.Index(index, indexResult)); + indexResult.setTranslogLocation(location); indexResult.setTook(System.nanoTime() - index.startTime()); indexResult.freeze(); - localCheckpointTracker.markSeqNoAsProcessed(indexResult.getSeqNo()); + localCheckpointTracker.advanceMaxSeqNo(index.seqNo()); return indexResult; } @Override public DeleteResult delete(Delete delete) throws IOException { DeleteResult deleteResult = new DeleteResult(delete.version(), delete.primaryTerm(), delete.seqNo(), true); - addDeleteOperationToTranslog(delete, deleteResult); + final Translog.Location location = translog.add(new Translog.Delete(delete, deleteResult)); + deleteResult.setTranslogLocation(location); deleteResult.setTook(System.nanoTime() - delete.startTime()); deleteResult.freeze(); - localCheckpointTracker.markSeqNoAsProcessed(deleteResult.getSeqNo()); + localCheckpointTracker.advanceMaxSeqNo(delete.seqNo()); return deleteResult; } @@ -127,7 +155,7 @@ public NoOpResult noOp(NoOp noOp) throws IOException { noOpResult.setTranslogLocation(location); noOpResult.setTook(System.nanoTime() - noOp.startTime()); noOpResult.freeze(); - localCheckpointTracker.markSeqNoAsProcessed(noOpResult.getSeqNo()); + localCheckpointTracker.advanceMaxSeqNo(noOp.seqNo()); return noOpResult; } @@ -141,6 +169,26 @@ protected ReferenceManager getReferenceManager(Search return readerManager; } + @Override + public boolean isTranslogSyncNeeded() { + return false; + } + + @Override + public boolean ensureTranslogSynced(Stream locations) throws IOException { + boolean synced = translog.ensureSynced(locations); + if (synced) { + translog.trimUnreferencedReaders(); + } + return synced; + } + + @Override + public void syncTranslog() throws IOException { + translog.sync(); + translog.trimUnreferencedReaders(); + } + @Override public Closeable acquireHistoryRetentionLock() { throw new UnsupportedOperationException("Not implemented"); @@ -172,6 +220,16 @@ public long getMinRetainedSeqNo() { return localCheckpointTracker.getProcessedCheckpoint(); } + @Override + public TranslogStats getTranslogStats() { + return null; + } + + @Override + public Translog.Location getTranslogLastWriteLocation() { + return null; + } + @Override public long getPersistedLocalCheckpoint() { return localCheckpointTracker.getPersistedCheckpoint(); @@ -187,8 +245,8 @@ public SeqNoStats getSeqNoStats(long globalCheckpoint) { } @Override - protected LocalCheckpointTracker getLocalCheckpointTracker() { - return localCheckpointTracker; + public long getLastSyncedGlobalCheckpoint() { + return 0; } @Override @@ -198,11 +256,12 @@ public long getIndexBufferRAMBytesUsed() { @Override public List segments(boolean verbose) { - return Arrays.asList(getSegmentInfo(lastCommittedSegmentInfos, verbose)); + return Arrays.asList(getSegmentInfo(getLatestSegmentInfos(), verbose)); } @Override - public void refresh(String source) throws EngineException {} + public void refresh(String source) throws EngineException { + } @Override public boolean maybeRefresh(String source) throws EngineException { @@ -210,7 +269,8 @@ public boolean maybeRefresh(String source) throws EngineException { } @Override - public void writeIndexingBuffer() throws EngineException {} + public void writeIndexingBuffer() throws EngineException { + } @Override public boolean shouldPeriodicallyFlush() { @@ -218,7 +278,33 @@ public boolean shouldPeriodicallyFlush() { } @Override - public void flush(boolean force, boolean waitIfOngoing) throws EngineException {} + public void flush(boolean force, boolean waitIfOngoing) throws EngineException { + } + + @Override + public void trimUnreferencedTranslogFiles() throws EngineException { + ensureOpen(); + try { + translog.trimUnreferencedReaders(); + } catch (IOException e) { + throw new EngineException(shardId, "failed to trim translog", e); + } + } + + @Override + public boolean shouldRollTranslogGeneration() { + return translog.shouldRollGeneration(); + } + + @Override + public void rollTranslogGeneration() throws EngineException { + try { + translog.rollGeneration(); + translog.trimUnreferencedReaders(); + } catch (IOException e) { + throw new EngineException(shardId, "failed to roll translog", e); + } + } @Override public void forceMerge( @@ -228,13 +314,15 @@ public void forceMerge( boolean upgrade, boolean upgradeOnlyAncientSegments, String forceMergeUUID - ) throws EngineException, IOException {} + ) throws EngineException, IOException { + } @Override public GatedCloseable acquireLastIndexCommit(boolean flushFirst) throws EngineException { try { final IndexCommit indexCommit = Lucene.getIndexCommit(lastCommittedSegmentInfos, store.directory()); - return new GatedCloseable<>(indexCommit, () -> {}); + return new GatedCloseable<>(indexCommit, () -> { + }); } catch (IOException e) { throw new EngineException(shardId, "Unable to build latest IndexCommit", e); } @@ -270,13 +358,17 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) { } @Override - protected void deleteUnusedFiles() {} + public void activateThrottling() { + } @Override - public void activateThrottling() {} + public void deactivateThrottling() { + } @Override - public void deactivateThrottling() {} + public int restoreLocalHistoryFromTranslog(TranslogRecoveryRunner translogRecoveryRunner) throws IOException { + return 0; + } @Override public int fillSeqNoGaps(long primaryTerm) throws IOException { @@ -294,10 +386,12 @@ public void skipTranslogRecovery() { } @Override - public void maybePruneDeletes() {} + public void maybePruneDeletes() { + } @Override - public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) {} + public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) { + } @Override public long getMaxSeqNoOfUpdatesOrDeletes() { @@ -305,7 +399,8 @@ public long getMaxSeqNoOfUpdatesOrDeletes() { } @Override - public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) {} + public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) { + } @Override protected SegmentInfos getLastCommittedSegmentInfos() { @@ -321,4 +416,48 @@ private DirectoryReader getDirectoryReader() throws IOException { // for segment replication: replicas should create the reader from store, we don't want an open IW on replicas. return new SoftDeletesDirectoryReaderWrapper(DirectoryReader.open(store.directory()), Lucene.SOFT_DELETES_FIELD); } + + private Translog openTranslog( + EngineConfig engineConfig, + TranslogDeletionPolicy translogDeletionPolicy, + LongSupplier globalCheckpointSupplier, + LongConsumer persistedSequenceNumberConsumer + ) throws IOException { + final TranslogConfig translogConfig = engineConfig.getTranslogConfig(); + final Map userData = store.readLastCommittedSegmentsInfo().getUserData(); + final String translogUUID = Objects.requireNonNull(userData.get(Translog.TRANSLOG_UUID_KEY)); + // We expect that this shard already exists, so it must already have an existing translog else something is badly wrong! + return new Translog( + translogConfig, + translogUUID, + translogDeletionPolicy, + globalCheckpointSupplier, + engineConfig.getPrimaryTermSupplier(), + persistedSequenceNumberConsumer + ); + } + + private TranslogDeletionPolicy getTranslogDeletionPolicy(EngineConfig engineConfig) { + TranslogDeletionPolicy customTranslogDeletionPolicy = null; + if (engineConfig.getCustomTranslogDeletionPolicyFactory() != null) { + customTranslogDeletionPolicy = engineConfig.getCustomTranslogDeletionPolicyFactory() + .create(engineConfig.getIndexSettings(), engineConfig.retentionLeasesSupplier()); + } + return Objects.requireNonNullElseGet( + customTranslogDeletionPolicy, + () -> new DefaultTranslogDeletionPolicy( + engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(), + engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis(), + engineConfig.getIndexSettings().getTranslogRetentionTotalFiles() + ) + ); + } + + public Translog getTranslog() { + return translog; + } + + protected LocalCheckpointTracker getLocalCheckpointTracker() { + return localCheckpointTracker; + } } diff --git a/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java b/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java deleted file mode 100644 index 4de0185b0e217..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/TranslogAwareEngine.java +++ /dev/null @@ -1,257 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine; - -import org.apache.lucene.store.AlreadyClosedException; -import org.opensearch.common.unit.ByteSizeValue; -import org.opensearch.common.unit.TimeValue; -import org.opensearch.common.util.concurrent.ReleasableLock; -import org.opensearch.index.seqno.LocalCheckpointTracker; -import org.opensearch.index.seqno.SequenceNumbers; -import org.opensearch.index.translog.DefaultTranslogDeletionPolicy; -import org.opensearch.index.translog.Translog; -import org.opensearch.index.translog.TranslogConfig; -import org.opensearch.index.translog.TranslogCorruptedException; -import org.opensearch.index.translog.TranslogDeletionPolicy; -import org.opensearch.index.translog.TranslogStats; - -import java.io.IOException; -import java.util.Map; -import java.util.Objects; -import java.util.function.LongConsumer; -import java.util.function.LongSupplier; -import java.util.stream.Stream; - -/** - * Abstract Engine implementation that provides common Translog functionality. - * This class creates a closeable {@link Translog}, it is up to subclasses to close the translog reference. - * - * @opensearch.internal - */ -public abstract class TranslogAwareEngine extends Engine { - - protected final Translog translog; - protected final TranslogDeletionPolicy translogDeletionPolicy; - - protected TranslogAwareEngine(EngineConfig engineConfig) { - super(engineConfig); - store.incRef(); - TranslogDeletionPolicy customTranslogDeletionPolicy = null; - if (engineConfig.getCustomTranslogDeletionPolicyFactory() != null) { - customTranslogDeletionPolicy = engineConfig.getCustomTranslogDeletionPolicyFactory() - .create(engineConfig.getIndexSettings(), engineConfig.retentionLeasesSupplier()); - } - translogDeletionPolicy = Objects.requireNonNullElseGet( - customTranslogDeletionPolicy, - () -> new DefaultTranslogDeletionPolicy( - engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(), - engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis(), - engineConfig.getIndexSettings().getTranslogRetentionTotalFiles() - ) - ); - try { - store.trimUnsafeCommits(engineConfig.getTranslogConfig().getTranslogPath()); - translog = openTranslog(engineConfig, translogDeletionPolicy, engineConfig.getGlobalCheckpointSupplier(), seqNo -> { - final LocalCheckpointTracker tracker = getLocalCheckpointTracker(); - assert tracker != null || getTranslog().isOpen() == false; - if (tracker != null) { - tracker.markSeqNoAsPersisted(seqNo); - } - }); - assert translog.getGeneration() != null; - } catch (IOException | TranslogCorruptedException e) { - throw new EngineCreationFailureException(shardId, "failed to create engine", e); - } finally { - store.decRef(); - } - } - - @Override - public final int restoreLocalHistoryFromTranslog(TranslogRecoveryRunner translogRecoveryRunner) throws IOException { - try (ReleasableLock ignored = readLock.acquire()) { - ensureOpen(); - final long localCheckpoint = getLocalCheckpointTracker().getProcessedCheckpoint(); - try (Translog.Snapshot snapshot = getTranslog().newSnapshot(localCheckpoint + 1, Long.MAX_VALUE)) { - return translogRecoveryRunner.run(this, snapshot); - } - } - } - - @Override - public final boolean isTranslogSyncNeeded() { - return getTranslog().syncNeeded(); - } - - @Override - public final void syncTranslog() throws IOException { - translog.sync(); - deleteUnusedFiles(); - translog.trimUnreferencedReaders(); - } - - @Override - public final boolean ensureTranslogSynced(Stream locations) throws IOException { - boolean synced = translog.ensureSynced(locations); - if (synced) { - deleteUnusedFiles(); - translog.trimUnreferencedReaders(); - } - return synced; - } - - @Override - public final void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException { - try (ReleasableLock lock = readLock.acquire()) { - ensureOpen(); - translog.trimOperations(belowTerm, aboveSeqNo); - } catch (AlreadyClosedException e) { - maybeFailEngine("trimOperationsFromTranslog", e); - throw e; - } catch (Exception e) { - try { - failEngine("translog operations trimming failed", e); - } catch (Exception inner) { - e.addSuppressed(inner); - } - throw new EngineException(shardId, "failed to trim translog operations", e); - } - } - - @Override - public final TranslogStats getTranslogStats() { - return getTranslog().stats(); - } - - @Override - public final Translog.Location getTranslogLastWriteLocation() { - return getTranslog().getLastWriteLocation(); - } - - @Override - public final long getLastSyncedGlobalCheckpoint() { - return getTranslog().getLastSyncedGlobalCheckpoint(); - } - - @Override - public final void rollTranslogGeneration() throws EngineException { - try (ReleasableLock ignored = readLock.acquire()) { - ensureOpen(); - translog.rollGeneration(); - translog.trimUnreferencedReaders(); - } catch (AlreadyClosedException e) { - maybeFailEngine("rollTranslogGeneration", e); - throw e; - } catch (Exception e) { - try { - failEngine("translog trimming failed", e); - } catch (Exception inner) { - e.addSuppressed(inner); - } - throw new EngineException(shardId, "failed to roll translog", e); - } - } - - @Override - public final boolean shouldRollTranslogGeneration() { - return getTranslog().shouldRollGeneration(); - } - - @Override - public final void trimUnreferencedTranslogFiles() throws EngineException { - try (ReleasableLock lock = readLock.acquire()) { - ensureOpen(); - translog.trimUnreferencedReaders(); - } catch (AlreadyClosedException e) { - maybeFailEngine("trimUnreferencedTranslogFiles", e); - throw e; - } catch (Exception e) { - try { - failEngine("translog trimming failed", e); - } catch (Exception inner) { - e.addSuppressed(inner); - } - throw new EngineException(shardId, "failed to trim translog", e); - } - } - - protected final void refreshTranslogDeletionPolicy(TimeValue translogRetentionAge, ByteSizeValue translogRetentionSize) { - final TranslogDeletionPolicy translogDeletionPolicy = translog.getDeletionPolicy(); - translogDeletionPolicy.setRetentionAgeInMillis(translogRetentionAge.millis()); - translogDeletionPolicy.setRetentionSizeInBytes(translogRetentionSize.getBytes()); - } - - protected final Translog getTranslog() { - ensureOpen(); - return translog; - } - - protected final void addIndexOperationToTranslog(Index index, IndexResult indexResult) throws IOException { - Translog.Location location = null; - if (indexResult.getResultType() == Result.Type.SUCCESS) { - location = translog.add(new Translog.Index(index, indexResult)); - } else if (indexResult.getSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) { - // if we have document failure, record it as a no-op in the translog and Lucene with the generated seq_no - final NoOp noOp = new NoOp( - indexResult.getSeqNo(), - index.primaryTerm(), - index.origin(), - index.startTime(), - indexResult.getFailure().toString() - ); - location = noOp(noOp).getTranslogLocation(); - } - indexResult.setTranslogLocation(location); - } - - protected final void addDeleteOperationToTranslog(Delete delete, DeleteResult deleteResult) throws IOException { - if (deleteResult.getResultType() == Result.Type.SUCCESS) { - final Translog.Location location = translog.add(new Translog.Delete(delete, deleteResult)); - deleteResult.setTranslogLocation(location); - } - } - - private Translog openTranslog( - EngineConfig engineConfig, - TranslogDeletionPolicy translogDeletionPolicy, - LongSupplier globalCheckpointSupplier, - LongConsumer persistedSequenceNumberConsumer - ) throws IOException { - final TranslogConfig translogConfig = engineConfig.getTranslogConfig(); - store.incRef(); - try { - final Map userData = store.readLastCommittedSegmentsInfo().getUserData(); - final String translogUUID = Objects.requireNonNull(userData.get(Translog.TRANSLOG_UUID_KEY)); - // We expect that this shard already exists, so it must already have an existing translog else something is badly wrong! - return new Translog( - translogConfig, - translogUUID, - translogDeletionPolicy, - globalCheckpointSupplier, - engineConfig.getPrimaryTermSupplier(), - persistedSequenceNumberConsumer - ); - } finally { - store.decRef(); - } - } - - /** - * Fetch a reference to this Engine's {@link LocalCheckpointTracker} - * - * @return {@link LocalCheckpointTracker} - */ - protected abstract LocalCheckpointTracker getLocalCheckpointTracker(); - - /** - * Allow implementations to delete unused index files after a translog sync. - * - * @throws IOException - When there is an IO error deleting unused files from the index. - */ - protected abstract void deleteUnusedFiles() throws IOException; -} diff --git a/server/src/main/java/org/opensearch/index/seqno/LocalCheckpointTracker.java b/server/src/main/java/org/opensearch/index/seqno/LocalCheckpointTracker.java index 1dda1db0a6533..d75893080c0d7 100644 --- a/server/src/main/java/org/opensearch/index/seqno/LocalCheckpointTracker.java +++ b/server/src/main/java/org/opensearch/index/seqno/LocalCheckpointTracker.java @@ -153,13 +153,13 @@ public synchronized void markSeqNoAsPersisted(final long seqNo) { * * @param seqNo the sequence number to mark as processed */ - public synchronized void fastForwardPersistedSeqNo(final long seqNo) { + public synchronized void fastForwardProcessedSeqNo(final long seqNo) { advanceMaxSeqNo(seqNo); - final long currentPersistedCheckpoint = persistedCheckpoint.get(); - if (shouldUpdateSeqNo(seqNo, currentPersistedCheckpoint, processedCheckpoint) == false) { + final long currentProcessedCheckpoint = processedCheckpoint.get(); + if (seqNo <= currentProcessedCheckpoint) { return; } - persistedCheckpoint.compareAndSet(currentPersistedCheckpoint, seqNo); + processedCheckpoint.compareAndSet(currentProcessedCheckpoint, seqNo); } private void markSeqNo(final long seqNo, final AtomicLong checkPoint, final LongObjectHashMap bitSetMap) { diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index 5f9df348c36d2..7f64745ece97a 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -68,8 +68,6 @@ public void testEngineWritesOpsToTranslog() throws Exception { applyOperation(nrtEngine, op); } - assertEquals(nrtEngine.getProcessedLocalCheckpoint(), engine.getProcessedLocalCheckpoint()); - // we don't index into nrtEngine, so get the doc ids from the regular engine. final List docs = getDocIds(engine, true); @@ -99,7 +97,6 @@ public void testUpdateSegments() throws Exception { applyOperation(engine, op); applyOperation(nrtEngine, op); } - assertEquals(nrtEngine.getProcessedLocalCheckpoint(), engine.getProcessedLocalCheckpoint()); engine.refresh("test"); @@ -108,7 +105,9 @@ public void testUpdateSegments() throws Exception { // Flush the primary and update the NRTEngine with the latest committed infos. engine.flush(); - nrtEngine.updateSegments(engine.getLastCommittedSegmentInfos(), engine.getPersistedLocalCheckpoint()); + nrtEngine.syncTranslog(); // to advance persisted checkpoint + + nrtEngine.updateSegments(engine.getLastCommittedSegmentInfos(), engine.getProcessedLocalCheckpoint()); assertMatchingSegmentsAndCheckpoints(nrtEngine); // Ensure the same hit count between engines. @@ -121,10 +120,13 @@ public void testUpdateSegments() throws Exception { } } - private void assertMatchingSegmentsAndCheckpoints(NRTReplicationEngine nrtEngine) { + private void assertMatchingSegmentsAndCheckpoints(NRTReplicationEngine nrtEngine) throws IOException { assertEquals(engine.getPersistedLocalCheckpoint(), nrtEngine.getPersistedLocalCheckpoint()); assertEquals(engine.getProcessedLocalCheckpoint(), nrtEngine.getProcessedLocalCheckpoint()); assertEquals(engine.getLocalCheckpointTracker().getMaxSeqNo(), nrtEngine.getLocalCheckpointTracker().getMaxSeqNo()); + assertEquals(engine.getLatestSegmentInfos().files(true), nrtEngine.getLatestSegmentInfos().files(true)); + assertEquals(engine.getLatestSegmentInfos().getUserData(), nrtEngine.getLatestSegmentInfos().getUserData()); + assertEquals(engine.getLatestSegmentInfos().getVersion(), nrtEngine.getLatestSegmentInfos().getVersion()); assertEquals(engine.segments(true), nrtEngine.segments(true)); } diff --git a/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java b/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java index e311c4272275c..237066e549b09 100644 --- a/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java @@ -332,57 +332,57 @@ public void testContains() { assertThat(tracker.hasProcessed(seqNo), equalTo(seqNo <= localCheckpoint || seqNos.contains(seqNo))); } - public void testFastForwardPersistedNoProcessedUpdate() { + public void testFastForwardProcessedNoPersistentUpdate() { // base case with no persistent checkpoint update long seqNo1; - assertThat(tracker.getPersistedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); + assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); seqNo1 = tracker.generateSeqNo(); assertThat(seqNo1, equalTo(0L)); - tracker.fastForwardPersistedSeqNo(seqNo1); - assertThat(tracker.getPersistedCheckpoint(), equalTo(-1L)); + tracker.fastForwardProcessedSeqNo(seqNo1); + assertThat(tracker.getProcessedCheckpoint(), equalTo(-1L)); } - public void testFastForwardPersistedProcessedUpdate() { - // base case with processed checkpoint update + public void testFastForwardProcessedPersistentUpdate() { + // base case with persistent checkpoint update long seqNo1; - assertThat(tracker.getPersistedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); + assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); seqNo1 = tracker.generateSeqNo(); assertThat(seqNo1, equalTo(0L)); - tracker.markSeqNoAsProcessed(seqNo1); - assertThat(tracker.getProcessedCheckpoint(), equalTo(0L)); - tracker.fastForwardPersistedSeqNo(seqNo1); + tracker.markSeqNoAsPersisted(seqNo1); assertThat(tracker.getPersistedCheckpoint(), equalTo(0L)); + tracker.fastForwardProcessedSeqNo(seqNo1); + assertThat(tracker.getProcessedCheckpoint(), equalTo(0L)); assertThat(tracker.hasProcessed(0L), equalTo(true)); assertThat(tracker.hasProcessed(atLeast(1)), equalTo(false)); // idempotent case - tracker.fastForwardPersistedSeqNo(seqNo1); - assertThat(tracker.getPersistedCheckpoint(), equalTo(0L)); + tracker.fastForwardProcessedSeqNo(seqNo1); + assertThat(tracker.getProcessedCheckpoint(), equalTo(0L)); assertThat(tracker.hasProcessed(0L), equalTo(true)); assertThat(tracker.hasProcessed(atLeast(1)), equalTo(false)); } - public void testFastForwardPersistedProcessedUpdate2() { + public void testFastForwardProcessedPersistentUpdate2() { long seqNo1, seqNo2; - assertThat(tracker.getPersistedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); + assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); seqNo1 = tracker.generateSeqNo(); seqNo2 = tracker.generateSeqNo(); assertThat(seqNo1, equalTo(0L)); assertThat(seqNo2, equalTo(1L)); - tracker.markSeqNoAsProcessed(seqNo1); - tracker.markSeqNoAsProcessed(seqNo2); - assertThat(tracker.getPersistedCheckpoint(), equalTo(-1L)); - assertThat(tracker.getProcessedCheckpoint(), equalTo(1L)); - - tracker.fastForwardPersistedSeqNo(seqNo2); + tracker.markSeqNoAsPersisted(seqNo1); + tracker.markSeqNoAsPersisted(seqNo2); + assertThat(tracker.getProcessedCheckpoint(), equalTo(-1L)); assertThat(tracker.getPersistedCheckpoint(), equalTo(1L)); + + tracker.fastForwardProcessedSeqNo(seqNo2); + assertThat(tracker.getProcessedCheckpoint(), equalTo(1L)); assertThat(tracker.hasProcessed(seqNo1), equalTo(true)); assertThat(tracker.hasProcessed(seqNo2), equalTo(true)); - tracker.fastForwardPersistedSeqNo(seqNo1); - assertThat(tracker.getPersistedCheckpoint(), equalTo(1L)); + tracker.fastForwardProcessedSeqNo(seqNo1); + assertThat(tracker.getProcessedCheckpoint(), equalTo(1L)); assertThat(tracker.hasProcessed(between(0, 1)), equalTo(true)); assertThat(tracker.hasProcessed(atLeast(2)), equalTo(false)); assertThat(tracker.getMaxSeqNo(), equalTo(1L)); From 41715d5b3cf1881374c4528afe1883ea348d468b Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 12 May 2022 10:01:58 -0700 Subject: [PATCH 10/16] Fix formatting Signed-off-by: Marc Handalian --- .../index/engine/InternalEngine.java | 152 +++++++++--------- .../index/engine/NRTReplicationEngine.java | 36 ++--- 2 files changed, 90 insertions(+), 98 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index ed0a959b48fbc..2321224b85ece 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -487,10 +487,10 @@ public int fillSeqNoGaps(long primaryTerm) throws IOException { syncTranslog(); // to persist noops associated with the advancement of the local checkpoint assert localCheckpointTracker.getPersistedCheckpoint() == maxSeqNo : "persisted local checkpoint did not advance to max seq no; is [" - + localCheckpointTracker.getPersistedCheckpoint() - + "], max seq no [" - + maxSeqNo - + "]"; + + localCheckpointTracker.getPersistedCheckpoint() + + "], max seq no [" + + maxSeqNo + + "]"; return numNoOpsAdded; } } @@ -841,8 +841,8 @@ private VersionValue resolveDocVersion(final Operation op, boolean loadSeqNo) th } else if (engineConfig.isEnableGcDeletes() && versionValue.isDelete() && (engineConfig.getThreadPool().relativeTimeInMillis() - ((DeleteVersionValue) versionValue).time) > getGcDeletesInMillis()) { - versionValue = null; - } + versionValue = null; + } return versionValue; } @@ -1153,35 +1153,35 @@ private IndexingStrategy planIndexingAsPrimary(Index index) throws IOException { plan = IndexingStrategy.skipDueToVersionConflict(e, true, currentVersion); } else if (index.getIfSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO && (versionValue.seqNo != index.getIfSeqNo() || versionValue.term != index.getIfPrimaryTerm())) { - final VersionConflictEngineException e = new VersionConflictEngineException( - shardId, - index.id(), - index.getIfSeqNo(), - index.getIfPrimaryTerm(), - versionValue.seqNo, - versionValue.term - ); - plan = IndexingStrategy.skipDueToVersionConflict(e, currentNotFoundOrDeleted, currentVersion); - } else if (index.versionType().isVersionConflictForWrites(currentVersion, index.version(), currentNotFoundOrDeleted)) { - final VersionConflictEngineException e = new VersionConflictEngineException( - shardId, - index, - currentVersion, - currentNotFoundOrDeleted - ); - plan = IndexingStrategy.skipDueToVersionConflict(e, currentNotFoundOrDeleted, currentVersion); - } else { - final Exception reserveError = tryAcquireInFlightDocs(index, reservingDocs); - if (reserveError != null) { - plan = IndexingStrategy.failAsTooManyDocs(reserveError); - } else { - plan = IndexingStrategy.processNormally( - currentNotFoundOrDeleted, - canOptimizeAddDocument ? 1L : index.versionType().updateVersion(currentVersion, index.version()), - reservingDocs + final VersionConflictEngineException e = new VersionConflictEngineException( + shardId, + index.id(), + index.getIfSeqNo(), + index.getIfPrimaryTerm(), + versionValue.seqNo, + versionValue.term ); + plan = IndexingStrategy.skipDueToVersionConflict(e, currentNotFoundOrDeleted, currentVersion); + } else if (index.versionType().isVersionConflictForWrites(currentVersion, index.version(), currentNotFoundOrDeleted)) { + final VersionConflictEngineException e = new VersionConflictEngineException( + shardId, + index, + currentVersion, + currentNotFoundOrDeleted + ); + plan = IndexingStrategy.skipDueToVersionConflict(e, currentNotFoundOrDeleted, currentVersion); + } else { + final Exception reserveError = tryAcquireInFlightDocs(index, reservingDocs); + if (reserveError != null) { + plan = IndexingStrategy.failAsTooManyDocs(reserveError); + } else { + plan = IndexingStrategy.processNormally( + currentNotFoundOrDeleted, + canOptimizeAddDocument ? 1L : index.versionType().updateVersion(currentVersion, index.version()), + reservingDocs + ); + } } - } } return plan; } @@ -1307,10 +1307,10 @@ private IndexingStrategy( : "use lucene update is set to true, but we're not indexing into lucene"; assert (indexIntoLucene && earlyResultOnPreFlightError != null) == false : "can only index into lucene or have a preflight result but not both." - + "indexIntoLucene: " - + indexIntoLucene - + " earlyResultOnPreFlightError:" - + earlyResultOnPreFlightError; + + "indexIntoLucene: " + + indexIntoLucene + + " earlyResultOnPreFlightError:" + + earlyResultOnPreFlightError; assert reservedDocs == 0 || indexIntoLucene || addStaleOpToLucene : reservedDocs; this.currentNotFoundOrDeleted = currentNotFoundOrDeleted; this.useLuceneUpdateDocument = useLuceneUpdateDocument; @@ -1568,32 +1568,32 @@ private DeletionStrategy planDeletionAsPrimary(Delete delete) throws IOException plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, true); } else if (delete.getIfSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO && (versionValue.seqNo != delete.getIfSeqNo() || versionValue.term != delete.getIfPrimaryTerm())) { - final VersionConflictEngineException e = new VersionConflictEngineException( - shardId, - delete.id(), - delete.getIfSeqNo(), - delete.getIfPrimaryTerm(), - versionValue.seqNo, - versionValue.term - ); - plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, currentlyDeleted); - } else if (delete.versionType().isVersionConflictForWrites(currentVersion, delete.version(), currentlyDeleted)) { - final VersionConflictEngineException e = new VersionConflictEngineException( - shardId, - delete, - currentVersion, - currentlyDeleted - ); - plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, currentlyDeleted); - } else { - final Exception reserveError = tryAcquireInFlightDocs(delete, 1); - if (reserveError != null) { - plan = DeletionStrategy.failAsTooManyDocs(reserveError); + final VersionConflictEngineException e = new VersionConflictEngineException( + shardId, + delete.id(), + delete.getIfSeqNo(), + delete.getIfPrimaryTerm(), + versionValue.seqNo, + versionValue.term + ); + plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, currentlyDeleted); + } else if (delete.versionType().isVersionConflictForWrites(currentVersion, delete.version(), currentlyDeleted)) { + final VersionConflictEngineException e = new VersionConflictEngineException( + shardId, + delete, + currentVersion, + currentlyDeleted + ); + plan = DeletionStrategy.skipDueToVersionConflict(e, currentVersion, currentlyDeleted); } else { - final long versionOfDeletion = delete.versionType().updateVersion(currentVersion, delete.version()); - plan = DeletionStrategy.processNormally(currentlyDeleted, versionOfDeletion, 1); + final Exception reserveError = tryAcquireInFlightDocs(delete, 1); + if (reserveError != null) { + plan = DeletionStrategy.failAsTooManyDocs(reserveError); + } else { + final long versionOfDeletion = delete.versionType().updateVersion(currentVersion, delete.version()); + plan = DeletionStrategy.processNormally(currentlyDeleted, versionOfDeletion, 1); + } } - } return plan; } @@ -1653,10 +1653,10 @@ private DeletionStrategy( ) { assert (deleteFromLucene && earlyResultOnPreflightError != null) == false : "can only delete from lucene or have a preflight result but not both." - + "deleteFromLucene: " - + deleteFromLucene - + " earlyResultOnPreFlightError:" - + earlyResultOnPreflightError; + + "deleteFromLucene: " + + deleteFromLucene + + " earlyResultOnPreFlightError:" + + earlyResultOnPreflightError; this.deleteFromLucene = deleteFromLucene; this.addStaleOpToLucene = addStaleOpToLucene; this.currentlyDeleted = currentlyDeleted; @@ -1940,8 +1940,8 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { || force || shouldPeriodicallyFlush || getProcessedLocalCheckpoint() > Long.parseLong( - lastCommittedSegmentInfos.userData.get(SequenceNumbers.LOCAL_CHECKPOINT_KEY) - )) { + lastCommittedSegmentInfos.userData.get(SequenceNumbers.LOCAL_CHECKPOINT_KEY) + )) { ensureCanFlush(); try { translog.rollGeneration(); @@ -2262,12 +2262,12 @@ protected boolean maybeFailEngine(String source, Exception e) { return failOnTragicEvent((AlreadyClosedException) e); } else if (e != null && ((indexWriter.isOpen() == false && indexWriter.getTragicException() == e) - || (translog.isOpen() == false && translog.getTragicException() == e))) { - // this spot on - we are handling the tragic event exception here so we have to fail the engine - // right away - failEngine(source, e); - return true; - } + || (translog.isOpen() == false && translog.getTragicException() == e))) { + // this spot on - we are handling the tragic event exception here so we have to fail the engine + // right away + failEngine(source, e); + return true; + } return false; } @@ -3002,9 +3002,9 @@ private void restoreVersionMapAndCheckpointTracker(DirectoryReader directoryRead final IndexSearcher searcher = new IndexSearcher(directoryReader); searcher.setQueryCache(null); final Query query = new BooleanQuery.Builder().add( - LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, getPersistedLocalCheckpoint() + 1, Long.MAX_VALUE), - BooleanClause.Occur.MUST - ) + LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, getPersistedLocalCheckpoint() + 1, Long.MAX_VALUE), + BooleanClause.Occur.MUST + ) // exclude non-root nested documents .add(Queries.newNonNestedFilter(), BooleanClause.Occur.MUST) .build(); diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 4d751a6dda57b..181c7a9afc129 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -68,10 +68,12 @@ public NRTReplicationEngine(EngineConfig engineConfig) { this.completionStatsCache = new CompletionStatsCache(() -> acquireSearcher("completion_stats")); this.readerManager = readerManager; this.readerManager.addListener(completionStatsCache); - this.translog = openTranslog(engineConfig, + this.translog = openTranslog( + engineConfig, getTranslogDeletionPolicy(engineConfig), engineConfig.getGlobalCheckpointSupplier(), - localCheckpointTracker::markSeqNoAsPersisted); + localCheckpointTracker::markSeqNoAsPersisted + ); } catch (IOException e) { IOUtils.closeWhileHandlingException(store::decRef, readerManager); throw new EngineCreationFailureException(shardId, "failed to create engine", e); @@ -260,8 +262,7 @@ public List segments(boolean verbose) { } @Override - public void refresh(String source) throws EngineException { - } + public void refresh(String source) throws EngineException {} @Override public boolean maybeRefresh(String source) throws EngineException { @@ -269,8 +270,7 @@ public boolean maybeRefresh(String source) throws EngineException { } @Override - public void writeIndexingBuffer() throws EngineException { - } + public void writeIndexingBuffer() throws EngineException {} @Override public boolean shouldPeriodicallyFlush() { @@ -278,8 +278,7 @@ public boolean shouldPeriodicallyFlush() { } @Override - public void flush(boolean force, boolean waitIfOngoing) throws EngineException { - } + public void flush(boolean force, boolean waitIfOngoing) throws EngineException {} @Override public void trimUnreferencedTranslogFiles() throws EngineException { @@ -314,15 +313,13 @@ public void forceMerge( boolean upgrade, boolean upgradeOnlyAncientSegments, String forceMergeUUID - ) throws EngineException, IOException { - } + ) throws EngineException, IOException {} @Override public GatedCloseable acquireLastIndexCommit(boolean flushFirst) throws EngineException { try { final IndexCommit indexCommit = Lucene.getIndexCommit(lastCommittedSegmentInfos, store.directory()); - return new GatedCloseable<>(indexCommit, () -> { - }); + return new GatedCloseable<>(indexCommit, () -> {}); } catch (IOException e) { throw new EngineException(shardId, "Unable to build latest IndexCommit", e); } @@ -358,12 +355,10 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) { } @Override - public void activateThrottling() { - } + public void activateThrottling() {} @Override - public void deactivateThrottling() { - } + public void deactivateThrottling() {} @Override public int restoreLocalHistoryFromTranslog(TranslogRecoveryRunner translogRecoveryRunner) throws IOException { @@ -386,12 +381,10 @@ public void skipTranslogRecovery() { } @Override - public void maybePruneDeletes() { - } + public void maybePruneDeletes() {} @Override - public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) { - } + public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) {} @Override public long getMaxSeqNoOfUpdatesOrDeletes() { @@ -399,8 +392,7 @@ public long getMaxSeqNoOfUpdatesOrDeletes() { } @Override - public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) { - } + public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) {} @Override protected SegmentInfos getLastCommittedSegmentInfos() { From 4201475f14d4f6c52fc8d8c51cc081aefa9c7127 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 12 May 2022 10:11:55 -0700 Subject: [PATCH 11/16] Add missing translog methods to NRTEngine. Signed-off-by: Marc Handalian --- .../index/engine/NRTReplicationEngine.java | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 181c7a9afc129..58944bbf1d3e7 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -16,6 +16,8 @@ import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; +import org.opensearch.common.unit.ByteSizeValue; +import org.opensearch.common.unit.TimeValue; import org.opensearch.core.internal.io.IOUtils; import org.opensearch.index.seqno.LocalCheckpointTracker; import org.opensearch.index.seqno.SeqNoStats; @@ -173,7 +175,7 @@ protected ReferenceManager getReferenceManager(Search @Override public boolean isTranslogSyncNeeded() { - return false; + return translog.syncNeeded(); } @Override @@ -224,12 +226,12 @@ public long getMinRetainedSeqNo() { @Override public TranslogStats getTranslogStats() { - return null; + return translog.stats(); } @Override public Translog.Location getTranslogLastWriteLocation() { - return null; + return translog.getLastWriteLocation(); } @Override @@ -248,7 +250,7 @@ public SeqNoStats getSeqNoStats(long globalCheckpoint) { @Override public long getLastSyncedGlobalCheckpoint() { - return 0; + return translog.getLastSyncedGlobalCheckpoint(); } @Override @@ -394,6 +396,17 @@ public long getMaxSeqNoOfUpdatesOrDeletes() { @Override public void advanceMaxSeqNoOfUpdatesOrDeletes(long maxSeqNoOfUpdatesOnPrimary) {} + public Translog getTranslog() { + return translog; + } + + @Override + public void onSettingsChanged(TimeValue translogRetentionAge, ByteSizeValue translogRetentionSize, long softDeletesRetentionOps) { + final TranslogDeletionPolicy translogDeletionPolicy = translog.getDeletionPolicy(); + translogDeletionPolicy.setRetentionAgeInMillis(translogRetentionAge.millis()); + translogDeletionPolicy.setRetentionSizeInBytes(translogRetentionSize.getBytes()); + } + @Override protected SegmentInfos getLastCommittedSegmentInfos() { return lastCommittedSegmentInfos; @@ -404,6 +417,10 @@ protected SegmentInfos getLatestSegmentInfos() { return readerManager.getSegmentInfos(); } + protected LocalCheckpointTracker getLocalCheckpointTracker() { + return localCheckpointTracker; + } + private DirectoryReader getDirectoryReader() throws IOException { // for segment replication: replicas should create the reader from store, we don't want an open IW on replicas. return new SoftDeletesDirectoryReaderWrapper(DirectoryReader.open(store.directory()), Lucene.SOFT_DELETES_FIELD); @@ -445,11 +462,4 @@ private TranslogDeletionPolicy getTranslogDeletionPolicy(EngineConfig engineConf ); } - public Translog getTranslog() { - return translog; - } - - protected LocalCheckpointTracker getLocalCheckpointTracker() { - return localCheckpointTracker; - } } From 675dc1179c5a5eda02da86b39fdb5044d647bb10 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 12 May 2022 10:37:57 -0700 Subject: [PATCH 12/16] Remove persistent seqNo check from fastForwardProcessedSeqNo. Signed-off-by: Marc Handalian --- .../seqno/LocalCheckpointTrackerTests.java | 45 ++----------------- 1 file changed, 4 insertions(+), 41 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java b/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java index 237066e549b09..3a450e1f72a8d 100644 --- a/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/LocalCheckpointTrackerTests.java @@ -332,59 +332,22 @@ public void testContains() { assertThat(tracker.hasProcessed(seqNo), equalTo(seqNo <= localCheckpoint || seqNos.contains(seqNo))); } - public void testFastForwardProcessedNoPersistentUpdate() { + public void testFastForwardProcessedSeqNo() { // base case with no persistent checkpoint update long seqNo1; assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); seqNo1 = tracker.generateSeqNo(); assertThat(seqNo1, equalTo(0L)); tracker.fastForwardProcessedSeqNo(seqNo1); - assertThat(tracker.getProcessedCheckpoint(), equalTo(-1L)); - } + assertThat(tracker.getProcessedCheckpoint(), equalTo(seqNo1)); - public void testFastForwardProcessedPersistentUpdate() { - // base case with persistent checkpoint update - long seqNo1; - assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); - seqNo1 = tracker.generateSeqNo(); - assertThat(seqNo1, equalTo(0L)); - - tracker.markSeqNoAsPersisted(seqNo1); - assertThat(tracker.getPersistedCheckpoint(), equalTo(0L)); + // idempotent case tracker.fastForwardProcessedSeqNo(seqNo1); assertThat(tracker.getProcessedCheckpoint(), equalTo(0L)); assertThat(tracker.hasProcessed(0L), equalTo(true)); - assertThat(tracker.hasProcessed(atLeast(1)), equalTo(false)); - // idempotent case - tracker.fastForwardProcessedSeqNo(seqNo1); + tracker.fastForwardProcessedSeqNo(-1); assertThat(tracker.getProcessedCheckpoint(), equalTo(0L)); assertThat(tracker.hasProcessed(0L), equalTo(true)); - assertThat(tracker.hasProcessed(atLeast(1)), equalTo(false)); - - } - - public void testFastForwardProcessedPersistentUpdate2() { - long seqNo1, seqNo2; - assertThat(tracker.getProcessedCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); - seqNo1 = tracker.generateSeqNo(); - seqNo2 = tracker.generateSeqNo(); - assertThat(seqNo1, equalTo(0L)); - assertThat(seqNo2, equalTo(1L)); - tracker.markSeqNoAsPersisted(seqNo1); - tracker.markSeqNoAsPersisted(seqNo2); - assertThat(tracker.getProcessedCheckpoint(), equalTo(-1L)); - assertThat(tracker.getPersistedCheckpoint(), equalTo(1L)); - - tracker.fastForwardProcessedSeqNo(seqNo2); - assertThat(tracker.getProcessedCheckpoint(), equalTo(1L)); - assertThat(tracker.hasProcessed(seqNo1), equalTo(true)); - assertThat(tracker.hasProcessed(seqNo2), equalTo(true)); - - tracker.fastForwardProcessedSeqNo(seqNo1); - assertThat(tracker.getProcessedCheckpoint(), equalTo(1L)); - assertThat(tracker.hasProcessed(between(0, 1)), equalTo(true)); - assertThat(tracker.hasProcessed(atLeast(2)), equalTo(false)); - assertThat(tracker.getMaxSeqNo(), equalTo(1L)); } } From 5ea0e7b8f250171125d7117d32b09e0028282e46 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 12 May 2022 14:33:25 -0700 Subject: [PATCH 13/16] PR feedback. Signed-off-by: Marc Handalian --- .../java/org/opensearch/index/engine/InternalEngine.java | 4 ++-- .../org/opensearch/index/engine/NRTReplicationEngine.java | 5 ++++- .../opensearch/index/engine/NRTReplicationReaderManager.java | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index 2321224b85ece..2b8c0fe28cc4e 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -2280,13 +2280,13 @@ protected SegmentInfos getLastCommittedSegmentInfos() { public SegmentInfos getLatestSegmentInfos() { OpenSearchDirectoryReader reader = null; try { - reader = externalReaderManager.internalReaderManager.acquire(); + reader = internalReaderManager.acquire(); return ((StandardDirectoryReader) reader.getDelegate()).getSegmentInfos(); } catch (IOException e) { throw new EngineException(shardId, e.getMessage(), e); } finally { try { - externalReaderManager.internalReaderManager.release(reader); + internalReaderManager.release(reader); } catch (IOException e) { throw new EngineException(shardId, e.getMessage(), e); } diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 58944bbf1d3e7..093df048a2797 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -92,7 +92,6 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th this.lastCommittedSegmentInfos = infos; } localCheckpointTracker.fastForwardProcessedSeqNo(seqNo); - readerManager.maybeRefresh(); } @Override @@ -132,6 +131,7 @@ public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws E @Override public IndexResult index(Index index) throws IOException { + ensureOpen(); IndexResult indexResult = new IndexResult(index.version(), index.primaryTerm(), index.seqNo(), false); final Translog.Location location = translog.add(new Translog.Index(index, indexResult)); indexResult.setTranslogLocation(location); @@ -143,6 +143,7 @@ public IndexResult index(Index index) throws IOException { @Override public DeleteResult delete(Delete delete) throws IOException { + ensureOpen(); DeleteResult deleteResult = new DeleteResult(delete.version(), delete.primaryTerm(), delete.seqNo(), true); final Translog.Location location = translog.add(new Translog.Delete(delete, deleteResult)); deleteResult.setTranslogLocation(location); @@ -154,6 +155,7 @@ public DeleteResult delete(Delete delete) throws IOException { @Override public NoOpResult noOp(NoOp noOp) throws IOException { + ensureOpen(); NoOpResult noOpResult = new NoOpResult(noOp.primaryTerm(), noOp.seqNo()); final Translog.Location location = translog.add(new Translog.NoOp(noOp.seqNo(), noOp.primaryTerm(), noOp.reason())); noOpResult.setTranslogLocation(location); @@ -299,6 +301,7 @@ public boolean shouldRollTranslogGeneration() { @Override public void rollTranslogGeneration() throws EngineException { + ensureOpen(); try { translog.rollGeneration(); translog.trimUnreferencedReaders(); diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java index 9c78e763bee43..16e615672a26f 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java @@ -33,7 +33,7 @@ */ public class NRTReplicationReaderManager extends OpenSearchReaderManager { - protected static Logger logger = LogManager.getLogger(NRTReplicationReaderManager.class); + private final static Logger logger = LogManager.getLogger(NRTReplicationReaderManager.class); private volatile SegmentInfos currentInfos; /** From 5bccdd857b7356dce6b3f4b5c8249d6165527641 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 12 May 2022 15:14:05 -0700 Subject: [PATCH 14/16] Add test specific to translog trimming. Signed-off-by: Marc Handalian --- .../index/engine/NRTReplicationEngine.java | 2 +- .../engine/NRTReplicationEngineTests.java | 53 +++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 093df048a2797..a048b858ca139 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -436,7 +436,7 @@ private Translog openTranslog( LongConsumer persistedSequenceNumberConsumer ) throws IOException { final TranslogConfig translogConfig = engineConfig.getTranslogConfig(); - final Map userData = store.readLastCommittedSegmentsInfo().getUserData(); + final Map userData = lastCommittedSegmentInfos.getUserData(); final String translogUUID = Objects.requireNonNull(userData.get(Translog.TRANSLOG_UUID_KEY)); // We expect that this shard already exists, so it must already have an existing translog else something is badly wrong! return new Translog( diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index 7f64745ece97a..133eb1199bbcb 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -15,16 +15,23 @@ import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.search.Queries; +import org.opensearch.index.mapper.ParsedDocument; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.store.Store; +import org.opensearch.index.translog.TestTranslog; import org.opensearch.index.translog.Translog; import java.io.IOException; import java.nio.file.Path; import java.util.List; +import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED; + public class NRTReplicationEngineTests extends EngineTestCase { public void testCreateEngine() throws IOException { @@ -68,6 +75,9 @@ public void testEngineWritesOpsToTranslog() throws Exception { applyOperation(nrtEngine, op); } + assertEquals(nrtEngine.getTranslogLastWriteLocation(), engine.getTranslogLastWriteLocation()); + assertEquals(nrtEngine.getLastSyncedGlobalCheckpoint(), engine.getLastSyncedGlobalCheckpoint()); + // we don't index into nrtEngine, so get the doc ids from the regular engine. final List docs = getDocIds(engine, true); @@ -103,6 +113,18 @@ public void testUpdateSegments() throws Exception { nrtEngine.updateSegments(engine.getLatestSegmentInfos(), engine.getProcessedLocalCheckpoint()); assertMatchingSegmentsAndCheckpoints(nrtEngine); + // assert a doc from the operations exists. + final ParsedDocument parsedDoc = createParsedDoc(operations.stream().findFirst().get().id(), null); + try (Engine.GetResult getResult = engine.get(newGet(true, parsedDoc), engine::acquireSearcher)) { + assertThat(getResult.exists(), equalTo(true)); + assertThat(getResult.docIdAndVersion(), notNullValue()); + } + + try (Engine.GetResult getResult = nrtEngine.get(newGet(true, parsedDoc), nrtEngine::acquireSearcher)) { + assertThat(getResult.exists(), equalTo(true)); + assertThat(getResult.docIdAndVersion(), notNullValue()); + } + // Flush the primary and update the NRTEngine with the latest committed infos. engine.flush(); nrtEngine.syncTranslog(); // to advance persisted checkpoint @@ -120,6 +142,37 @@ public void testUpdateSegments() throws Exception { } } + public void testTrimTranslogOps() throws Exception { + final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); + + try ( + final Store nrtEngineStore = createStore(); + final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore); + ) { + List operations = generateHistoryOnReplica( + between(1, 100), + randomBoolean(), + randomBoolean(), + randomBoolean() + ); + applyOperations(nrtEngine, operations); + Set seqNos = operations.stream().map(Engine.Operation::seqNo).collect(Collectors.toSet()); + try (Translog.Snapshot snapshot = nrtEngine.getTranslog().newSnapshot()) { + assertThat(snapshot.totalOperations(), equalTo(operations.size())); + assertThat( + TestTranslog.drainSnapshot(snapshot, false).stream().map(Translog.Operation::seqNo).collect(Collectors.toSet()), + equalTo(seqNos) + ); + } + nrtEngine.rollTranslogGeneration(); + nrtEngine.trimOperationsFromTranslog(primaryTerm.get(), NO_OPS_PERFORMED); + try (Translog.Snapshot snapshot = getTranslog(engine).newSnapshot()) { + assertThat(snapshot.totalOperations(), equalTo(0)); + assertNull(snapshot.next()); + } + } + } + private void assertMatchingSegmentsAndCheckpoints(NRTReplicationEngine nrtEngine) throws IOException { assertEquals(engine.getPersistedLocalCheckpoint(), nrtEngine.getPersistedLocalCheckpoint()); assertEquals(engine.getProcessedLocalCheckpoint(), nrtEngine.getProcessedLocalCheckpoint()); From 5b380d2a8adbf9abfdc14a3d4ae11122972282ed Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 12 May 2022 16:01:09 -0700 Subject: [PATCH 15/16] Javadoc check. Signed-off-by: Marc Handalian --- .../index/engine/NRTReplicationEngineFactory.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngineFactory.java index ccbd04129b349..45fe3086ac3f6 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngineFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngineFactory.java @@ -8,6 +8,12 @@ package org.opensearch.index.engine; +/** + * Engine Factory implementation used with Segment Replication that wires up replica shards with an ${@link NRTReplicationEngine} + * and primary with an ${@link InternalEngine} + * + * @opensearch.internal + */ public class NRTReplicationEngineFactory implements EngineFactory { @Override public Engine newReadWriteEngine(EngineConfig config) { From 06b0d98bbbf8adbebeb1c5a2202c52b929255bf1 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Wed, 18 May 2022 16:33:17 -0700 Subject: [PATCH 16/16] Add failEngine calls to translog methods in NRTReplicationEngine. Roll xlog generation on replica when a new commit point is received. Signed-off-by: Marc Handalian --- .../index/engine/NRTReplicationEngine.java | 42 ++++++++++++------- .../engine/NRTReplicationEngineTests.java | 23 ++++++++++ 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index a048b858ca139..106643198cc3b 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -18,6 +18,7 @@ import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.ReleasableLock; import org.opensearch.core.internal.io.IOUtils; import org.opensearch.index.seqno.LocalCheckpointTracker; import org.opensearch.index.seqno.SeqNoStats; @@ -90,6 +91,7 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th // generation. We can still refresh with incoming SegmentInfos that are not part of a commit point. if (infos.getGeneration() > lastCommittedSegmentInfos.getGeneration()) { this.lastCommittedSegmentInfos = infos; + rollTranslogGeneration(); } localCheckpointTracker.fastForwardProcessedSeqNo(seqNo); } @@ -121,10 +123,15 @@ public boolean isThrottled() { @Override public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException { - ensureOpen(); - try { + try (ReleasableLock lock = readLock.acquire()) { + ensureOpen(); translog.trimOperations(belowTerm, aboveSeqNo); - } catch (IOException e) { + } catch (Exception e) { + try { + failEngine("translog operations trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } throw new EngineException(shardId, "failed to trim translog operations", e); } } @@ -286,10 +293,15 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { @Override public void trimUnreferencedTranslogFiles() throws EngineException { - ensureOpen(); - try { + try (ReleasableLock lock = readLock.acquire()) { + ensureOpen(); translog.trimUnreferencedReaders(); - } catch (IOException e) { + } catch (Exception e) { + try { + failEngine("translog trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } throw new EngineException(shardId, "failed to trim translog", e); } } @@ -301,11 +313,16 @@ public boolean shouldRollTranslogGeneration() { @Override public void rollTranslogGeneration() throws EngineException { - ensureOpen(); - try { + try (ReleasableLock ignored = readLock.acquire()) { + ensureOpen(); translog.rollGeneration(); translog.trimUnreferencedReaders(); - } catch (IOException e) { + } catch (Exception e) { + try { + failEngine("translog trimming failed", e); + } catch (Exception inner) { + e.addSuppressed(inner); + } throw new EngineException(shardId, "failed to roll translog", e); } } @@ -350,11 +367,8 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) { } catch (Exception e) { logger.warn("failed to close engine", e); } finally { - try { - logger.debug("engine closed [{}]", reason); - } finally { - closedLatch.countDown(); - } + logger.debug("engine closed [{}]", reason); + closedLatch.countDown(); } } } diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index 133eb1199bbcb..6aa00bb9312dd 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -129,9 +129,32 @@ public void testUpdateSegments() throws Exception { engine.flush(); nrtEngine.syncTranslog(); // to advance persisted checkpoint + Set seqNos = operations.stream().map(Engine.Operation::seqNo).collect(Collectors.toSet()); + + try (Translog.Snapshot snapshot = nrtEngine.getTranslog().newSnapshot()) { + assertThat(snapshot.totalOperations(), equalTo(operations.size())); + assertThat( + TestTranslog.drainSnapshot(snapshot, false).stream().map(Translog.Operation::seqNo).collect(Collectors.toSet()), + equalTo(seqNos) + ); + } + nrtEngine.updateSegments(engine.getLastCommittedSegmentInfos(), engine.getProcessedLocalCheckpoint()); assertMatchingSegmentsAndCheckpoints(nrtEngine); + assertEquals( + nrtEngine.getTranslog().getGeneration().translogFileGeneration, + engine.getTranslog().getGeneration().translogFileGeneration + ); + + try (Translog.Snapshot snapshot = nrtEngine.getTranslog().newSnapshot()) { + assertThat(snapshot.totalOperations(), equalTo(operations.size())); + assertThat( + TestTranslog.drainSnapshot(snapshot, false).stream().map(Translog.Operation::seqNo).collect(Collectors.toSet()), + equalTo(seqNos) + ); + } + // Ensure the same hit count between engines. int expectedDocCount; try (final Engine.Searcher test = engine.acquireSearcher("test")) {