diff --git a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java index efc522a1f9741..2491a6b9f29b2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java @@ -84,6 +84,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.breaker.CircuitBreakerService; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.plugins.Plugin; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.test.DummyShardLock; @@ -673,7 +674,8 @@ public static final IndexShard newIndexShard( Arrays.asList(listeners), () -> {}, RetentionLeaseSyncer.EMPTY, - cbs + cbs, + SegmentReplicationCheckpointPublisher.EMPTY ); } diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 0739e5afdffcd..0a6d1501f2bea 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -94,6 +94,7 @@ import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache; import org.opensearch.indices.mapper.MapperRegistry; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.plugins.IndexStorePlugin; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; @@ -428,7 +429,8 @@ private long getAvgShardSizeInBytes() throws IOException { public synchronized IndexShard createShard( final ShardRouting routing, final Consumer globalCheckpointSyncer, - final RetentionLeaseSyncer retentionLeaseSyncer + final RetentionLeaseSyncer retentionLeaseSyncer, + final SegmentReplicationCheckpointPublisher checkpointPublisher ) throws IOException { Objects.requireNonNull(retentionLeaseSyncer); /* @@ -530,7 +532,8 @@ public synchronized IndexShard createShard( indexingOperationListeners, () -> globalCheckpointSyncer.accept(shardId), retentionLeaseSyncer, - circuitBreakerService + circuitBreakerService, + this.indexSettings.isSegRepEnabled() && routing.primary() ? checkpointPublisher : null ); eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created"); eventListener.afterIndexShardCreated(indexShard); diff --git a/server/src/main/java/org/opensearch/index/shard/CheckpointRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/CheckpointRefreshListener.java new file mode 100644 index 0000000000000..ac6754bf6a74a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/shard/CheckpointRefreshListener.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.shard; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; + +import java.io.IOException; + +/** + * A {@link ReferenceManager.RefreshListener} that publishes a checkpoint to be consumed by replicas. + * This class is only used with Segment Replication enabled. + * + * @opensearch.internal + */ +public class CheckpointRefreshListener implements ReferenceManager.RefreshListener { + + protected static Logger logger = LogManager.getLogger(CheckpointRefreshListener.class); + + private final IndexShard shard; + private final SegmentReplicationCheckpointPublisher publisher; + + public CheckpointRefreshListener(IndexShard shard, SegmentReplicationCheckpointPublisher publisher) { + this.shard = shard; + this.publisher = publisher; + } + + @Override + public void beforeRefresh() throws IOException { + // Do nothing + } + + @Override + public void afterRefresh(boolean didRefresh) throws IOException { + if (didRefresh) { + publisher.publish(shard); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index ef99b0a12af5f..605d4c84c968c 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -160,6 +160,9 @@ import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; +import org.opensearch.indices.replication.checkpoint.PublishCheckpointRequest; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.rest.RestStatus; @@ -299,6 +302,7 @@ Runnable getGlobalCheckpointSyncer() { private final AtomicReference pendingRefreshLocation = new AtomicReference<>(); private final RefreshPendingLocationListener refreshPendingLocationListener; private volatile boolean useRetentionLeasesInPeerRecovery; + private final ReferenceManager.RefreshListener checkpointRefreshListener; public IndexShard( final ShardRouting shardRouting, @@ -320,7 +324,8 @@ public IndexShard( final List listeners, final Runnable globalCheckpointSyncer, final RetentionLeaseSyncer retentionLeaseSyncer, - final CircuitBreakerService circuitBreakerService + final CircuitBreakerService circuitBreakerService, + @Nullable final SegmentReplicationCheckpointPublisher checkpointPublisher ) throws IOException { super(shardRouting.shardId(), indexSettings); assert shardRouting.initializing(); @@ -403,6 +408,11 @@ public boolean shouldCache(Query query) { persistMetadata(path, indexSettings, shardRouting, null, logger); this.useRetentionLeasesInPeerRecovery = replicationTracker.hasAllPeerRecoveryRetentionLeases(); this.refreshPendingLocationListener = new RefreshPendingLocationListener(); + if (checkpointPublisher != null) { + this.checkpointRefreshListener = new CheckpointRefreshListener(this, checkpointPublisher); + } else { + this.checkpointRefreshListener = null; + } } public ThreadPool getThreadPool() { @@ -1363,6 +1373,21 @@ public GatedCloseable acquireSafeIndexCommit() throws EngineExcepti } } + /** + * Returns the lastest Replication Checkpoint that shard received + */ + public ReplicationCheckpoint getLatestReplicationCheckpoint() { + return new ReplicationCheckpoint(shardId, 0, 0, 0, 0); + } + + /** + * Invoked when a new checkpoint is received from a primary shard. Starts the copy process. + */ + public synchronized void onNewCheckpoint(final PublishCheckpointRequest request) { + assert shardRouting.primary() == false; + // TODO + } + /** * gets a {@link Store.MetadataSnapshot} for the current directory. This method is safe to call in all lifecycle of the index shard, * without having to worry about the current state of the engine and concurrent flushes. @@ -3118,6 +3143,13 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { } }; + final List internalRefreshListener; + if (this.checkpointRefreshListener != null) { + internalRefreshListener = Arrays.asList(new RefreshMetricUpdater(refreshMetric), checkpointRefreshListener); + } else { + internalRefreshListener = Collections.singletonList(new RefreshMetricUpdater(refreshMetric)); + } + return this.engineConfigFactory.newEngineConfig( shardId, threadPool, @@ -3134,7 +3166,7 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { translogConfig, IndexingMemoryController.SHARD_INACTIVE_TIME_SETTING.get(indexSettings.getSettings()), Arrays.asList(refreshListeners, refreshPendingLocationListener), - Collections.singletonList(new RefreshMetricUpdater(refreshMetric)), + internalRefreshListener, indexSort, circuitBreakerService, globalCheckpointSupplier, diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index 150f003e1a766..c728952a9dcbf 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -41,6 +41,7 @@ import org.opensearch.common.inject.AbstractModule; import org.opensearch.common.io.stream.NamedWriteableRegistry; import org.opensearch.common.io.stream.NamedWriteableRegistry.Entry; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.NamedXContentRegistry; import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.BooleanFieldMapper; @@ -74,6 +75,7 @@ import org.opensearch.index.shard.PrimaryReplicaSyncer; import org.opensearch.indices.cluster.IndicesClusterStateService; import org.opensearch.indices.mapper.MapperRegistry; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.indices.store.IndicesStore; import org.opensearch.indices.store.TransportNodesListShardStoreMetadata; import org.opensearch.plugins.MapperPlugin; @@ -280,6 +282,9 @@ protected void configure() { bind(RetentionLeaseSyncAction.class).asEagerSingleton(); bind(RetentionLeaseBackgroundSyncAction.class).asEagerSingleton(); bind(RetentionLeaseSyncer.class).asEagerSingleton(); + if (FeatureFlags.isEnabled(FeatureFlags.REPLICATION_TYPE)) { + bind(SegmentReplicationCheckpointPublisher.class).asEagerSingleton(); + } } /** diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 74870f70d76bd..79fd2893fb78c 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -139,6 +139,7 @@ import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.node.Node; import org.opensearch.plugins.IndexStorePlugin; import org.opensearch.plugins.PluginsService; @@ -843,6 +844,7 @@ public synchronized void verifyIndexMetadata(IndexMetadata metadata, IndexMetada @Override public IndexShard createShard( final ShardRouting shardRouting, + final SegmentReplicationCheckpointPublisher checkpointPublisher, final PeerRecoveryTargetService recoveryTargetService, final RecoveryListener recoveryListener, final RepositoriesService repositoriesService, @@ -857,7 +859,7 @@ public IndexShard createShard( IndexService indexService = indexService(shardRouting.index()); assert indexService != null; RecoveryState recoveryState = indexService.createRecoveryState(shardRouting, targetNode, sourceNode); - IndexShard indexShard = indexService.createShard(shardRouting, globalCheckpointSyncer, retentionLeaseSyncer); + IndexShard indexShard = indexService.createShard(shardRouting, globalCheckpointSyncer, retentionLeaseSyncer, checkpointPublisher); indexShard.addShardFailureCallback(onShardFailure); indexShard.startRecovery(recoveryState, recoveryTargetService, recoveryListener, repositoriesService, mapping -> { assert recoveryState.getRecoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS diff --git a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java index e230d9ca708f0..a600581384f31 100644 --- a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java @@ -80,6 +80,7 @@ import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.repositories.RepositoriesService; import org.opensearch.search.SearchService; @@ -138,6 +139,8 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent imple private final Consumer globalCheckpointSyncer; private final RetentionLeaseSyncer retentionLeaseSyncer; + private final SegmentReplicationCheckpointPublisher checkpointPublisher; + @Inject public IndicesClusterStateService( final Settings settings, @@ -153,13 +156,15 @@ public IndicesClusterStateService( final SnapshotShardsService snapshotShardsService, final PrimaryReplicaSyncer primaryReplicaSyncer, final GlobalCheckpointSyncAction globalCheckpointSyncAction, - final RetentionLeaseSyncer retentionLeaseSyncer + final RetentionLeaseSyncer retentionLeaseSyncer, + final SegmentReplicationCheckpointPublisher checkpointPublisher ) { this( settings, indicesService, clusterService, threadPool, + checkpointPublisher, recoveryTargetService, shardStateAction, nodeMappingRefreshAction, @@ -179,6 +184,7 @@ public IndicesClusterStateService( final AllocatedIndices> indicesService, final ClusterService clusterService, final ThreadPool threadPool, + final SegmentReplicationCheckpointPublisher checkpointPublisher, final PeerRecoveryTargetService recoveryTargetService, final ShardStateAction shardStateAction, final NodeMappingRefreshAction nodeMappingRefreshAction, @@ -191,6 +197,7 @@ public IndicesClusterStateService( final RetentionLeaseSyncer retentionLeaseSyncer ) { this.settings = settings; + this.checkpointPublisher = checkpointPublisher; this.buildInIndexListener = Arrays.asList(peerRecoverySourceService, recoveryTargetService, searchService, snapshotShardsService); this.indicesService = indicesService; this.clusterService = clusterService; @@ -624,6 +631,7 @@ private void createShard(DiscoveryNodes nodes, RoutingTable routingTable, ShardR logger.debug("{} creating shard with primary term [{}]", shardRouting.shardId(), primaryTerm); indicesService.createShard( shardRouting, + checkpointPublisher, recoveryTargetService, new RecoveryListener(shardRouting, primaryTerm, this), repositoriesService, @@ -981,6 +989,7 @@ U createIndex(IndexMetadata indexMetadata, List builtInIndex */ T createShard( ShardRouting shardRouting, + SegmentReplicationCheckpointPublisher checkpointPublisher, PeerRecoveryTargetService recoveryTargetService, RecoveryListener recoveryListener, RepositoriesService repositoriesService, diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java new file mode 100644 index 0000000000000..b74a69971ebd5 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java @@ -0,0 +1,173 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication.checkpoint; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.ExceptionsHelper; +import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.replication.ReplicationResponse; +import org.opensearch.action.support.replication.ReplicationTask; +import org.opensearch.action.support.replication.TransportReplicationAction; +import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.index.IndexNotFoundException; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.IndexShardClosedException; +import org.opensearch.indices.IndicesService; +import org.opensearch.node.NodeClosedException; +import org.opensearch.tasks.Task; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportResponseHandler; +import org.opensearch.transport.TransportService; + +import java.io.IOException; +import java.util.Objects; + +/** + * Replication action responsible for publishing checkpoint to a replica shard. + * + * @opensearch.internal + */ + +public class PublishCheckpointAction extends TransportReplicationAction< + PublishCheckpointRequest, + PublishCheckpointRequest, + ReplicationResponse> { + + public static final String ACTION_NAME = "indices:admin/publishCheckpoint"; + protected static Logger logger = LogManager.getLogger(PublishCheckpointAction.class); + + @Inject + public PublishCheckpointAction( + Settings settings, + TransportService transportService, + ClusterService clusterService, + IndicesService indicesService, + ThreadPool threadPool, + ShardStateAction shardStateAction, + ActionFilters actionFilters + ) { + super( + settings, + ACTION_NAME, + transportService, + clusterService, + indicesService, + threadPool, + shardStateAction, + actionFilters, + PublishCheckpointRequest::new, + PublishCheckpointRequest::new, + ThreadPool.Names.REFRESH + ); + } + + @Override + protected ReplicationResponse newResponseInstance(StreamInput in) throws IOException { + return new ReplicationResponse(in); + } + + @Override + protected void doExecute(Task task, PublishCheckpointRequest request, ActionListener listener) { + assert false : "use PublishCheckpointAction#publish"; + } + + /** + * Publish checkpoint request to shard + */ + final void publish(IndexShard indexShard) { + String primaryAllocationId = indexShard.routingEntry().allocationId().getId(); + long primaryTerm = indexShard.getPendingPrimaryTerm(); + final ThreadContext threadContext = threadPool.getThreadContext(); + try (ThreadContext.StoredContext ignore = threadContext.stashContext()) { + // we have to execute under the system context so that if security is enabled the sync is authorized + threadContext.markAsSystemContext(); + PublishCheckpointRequest request = new PublishCheckpointRequest(indexShard.getLatestReplicationCheckpoint()); + final ReplicationTask task = (ReplicationTask) taskManager.register("transport", "segrep_publish_checkpoint", request); + transportService.sendChildRequest( + clusterService.localNode(), + transportPrimaryAction, + new ConcreteShardRequest<>(request, primaryAllocationId, primaryTerm), + task, + transportOptions, + new TransportResponseHandler() { + @Override + public ReplicationResponse read(StreamInput in) throws IOException { + return newResponseInstance(in); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public void handleResponse(ReplicationResponse response) { + task.setPhase("finished"); + taskManager.unregister(task); + } + + @Override + public void handleException(TransportException e) { + task.setPhase("finished"); + taskManager.unregister(task); + if (ExceptionsHelper.unwrap(e, NodeClosedException.class) != null) { + // node shutting down + return; + } + if (ExceptionsHelper.unwrap( + e, + IndexNotFoundException.class, + AlreadyClosedException.class, + IndexShardClosedException.class + ) != null) { + // the index was deleted or the shard is closed + return; + } + logger.warn( + new ParameterizedMessage("{} segment replication checkpoint publishing failed", indexShard.shardId()), + e + ); + } + } + ); + } + } + + @Override + protected void shardOperationOnPrimary( + PublishCheckpointRequest request, + IndexShard primary, + ActionListener> listener + ) { + ActionListener.completeWith(listener, () -> new PrimaryResult<>(request, new ReplicationResponse())); + } + + @Override + protected void shardOperationOnReplica(PublishCheckpointRequest request, IndexShard replica, ActionListener listener) { + Objects.requireNonNull(request); + Objects.requireNonNull(replica); + ActionListener.completeWith(listener, () -> { + logger.trace("Checkpoint received on replica {}", request); + if (request.getCheckpoint().getShardId().equals(replica.shardId())) { + replica.onNewCheckpoint(request); + } + return new ReplicaResult(); + }); + } +} diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointRequest.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointRequest.java new file mode 100644 index 0000000000000..740fd3bccb7c4 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointRequest.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication.checkpoint; + +import org.opensearch.action.support.replication.ReplicationRequest; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Replication request responsible for publishing checkpoint request to a replica shard. + * + * @opensearch.internal + */ +public class PublishCheckpointRequest extends ReplicationRequest { + + private final ReplicationCheckpoint checkpoint; + + public PublishCheckpointRequest(ReplicationCheckpoint checkpoint) { + super(checkpoint.getShardId()); + this.checkpoint = checkpoint; + } + + public PublishCheckpointRequest(StreamInput in) throws IOException { + super(in); + this.checkpoint = new ReplicationCheckpoint(in); + } + + /** + * Returns Replication Checkpoint + */ + public ReplicationCheckpoint getCheckpoint() { + return checkpoint; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + checkpoint.writeTo(out); + } + + @Override + public String toString() { + return "PublishCheckpointRequest{" + "checkpoint=" + checkpoint + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/ReplicationCheckpoint.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/ReplicationCheckpoint.java new file mode 100644 index 0000000000000..98ab9cc4c1708 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/ReplicationCheckpoint.java @@ -0,0 +1,136 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication.checkpoint; + +import org.opensearch.common.Nullable; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.index.shard.ShardId; + +import java.io.IOException; +import java.util.Objects; + +/** + * Represents a Replication Checkpoint which is sent to a replica shard. + * + * @opensearch.internal + */ +public class ReplicationCheckpoint implements Writeable { + + private final ShardId shardId; + private final long primaryTerm; + private final long segmentsGen; + private final long seqNo; + private final long segmentInfosVersion; + + public ReplicationCheckpoint(ShardId shardId, long primaryTerm, long segmentsGen, long seqNo, long segmentInfosVersion) { + this.shardId = shardId; + this.primaryTerm = primaryTerm; + this.segmentsGen = segmentsGen; + this.seqNo = seqNo; + this.segmentInfosVersion = segmentInfosVersion; + } + + public ReplicationCheckpoint(StreamInput in) throws IOException { + shardId = new ShardId(in); + primaryTerm = in.readLong(); + segmentsGen = in.readLong(); + seqNo = in.readLong(); + segmentInfosVersion = in.readLong(); + } + + /** + * The primary term of this Replication Checkpoint. + * + * @return the primary term + */ + public long getPrimaryTerm() { + return primaryTerm; + } + + /** + * @return the Segments Gen number + */ + public long getSegmentsGen() { + return segmentsGen; + } + + /** + * @return the Segment Info version + */ + public long getSegmentInfosVersion() { + return segmentInfosVersion; + } + + /** + * @return the Seq number + */ + public long getSeqNo() { + return seqNo; + } + + /** + * Shard Id of primary shard. + * + * @return the Shard Id + */ + public ShardId getShardId() { + return shardId; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + shardId.writeTo(out); + out.writeLong(primaryTerm); + out.writeLong(segmentsGen); + out.writeLong(seqNo); + out.writeLong(segmentInfosVersion); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ReplicationCheckpoint that = (ReplicationCheckpoint) o; + return primaryTerm == that.primaryTerm + && segmentsGen == that.segmentsGen + && seqNo == that.seqNo + && segmentInfosVersion == that.segmentInfosVersion + && Objects.equals(shardId, that.shardId); + } + + @Override + public int hashCode() { + return Objects.hash(shardId, primaryTerm, segmentsGen, seqNo); + } + + /** + * Checks if other is aheadof current replication point by comparing segmentInfosVersion. Returns true for null + */ + public boolean isAheadOf(@Nullable ReplicationCheckpoint other) { + return other == null || segmentInfosVersion > other.getSegmentInfosVersion(); + } + + @Override + public String toString() { + return "ReplicationCheckpoint{" + + "shardId=" + + shardId + + ", primaryTerm=" + + primaryTerm + + ", segmentsGen=" + + segmentsGen + + ", seqNo=" + + seqNo + + ", version=" + + segmentInfosVersion + + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/SegmentReplicationCheckpointPublisher.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/SegmentReplicationCheckpointPublisher.java new file mode 100644 index 0000000000000..2b09901a947fe --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/SegmentReplicationCheckpointPublisher.java @@ -0,0 +1,49 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication.checkpoint; + +import org.opensearch.common.inject.Inject; +import org.opensearch.index.shard.IndexShard; + +import java.util.Objects; + +/** + * Publish Segment Replication Checkpoint. + * + * @opensearch.internal + */ +public class SegmentReplicationCheckpointPublisher { + + private final PublishAction publishAction; + + @Inject + public SegmentReplicationCheckpointPublisher(PublishCheckpointAction publishAction) { + this(publishAction::publish); + } + + public SegmentReplicationCheckpointPublisher(PublishAction publishAction) { + this.publishAction = Objects.requireNonNull(publishAction); + } + + public void publish(IndexShard indexShard) { + publishAction.publish(indexShard); + } + + /** + * Represents an action that is invoked to publish segment replication checkpoint to replica shard + */ + public interface PublishAction { + void publish(IndexShard indexShard); + } + + /** + * NoOp Checkpoint publisher + */ + public static final SegmentReplicationCheckpointPublisher EMPTY = new SegmentReplicationCheckpointPublisher(indexShard -> {}); +} diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/package-info.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/package-info.java new file mode 100644 index 0000000000000..a30154ea9206a --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Package containing classes to implement a replication checkpoint */ +package org.opensearch.indices.replication.checkpoint; diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 47a37e5edaa0c..49d0c089f072b 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; +import org.apache.lucene.search.ReferenceManager; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.AlreadyClosedException; @@ -135,6 +136,7 @@ import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.repositories.IndexId; @@ -201,6 +203,7 @@ import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.oneOf; import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.Mockito.mock; import static org.opensearch.cluster.routing.TestShardRouting.newShardRouting; import static org.opensearch.common.lucene.Lucene.cleanLuceneIndex; import static org.opensearch.common.xcontent.ToXContent.EMPTY_PARAMS; @@ -3428,6 +3431,72 @@ public void testReadSnapshotConcurrently() throws IOException, InterruptedExcept closeShards(newShard); } + /** + * here we are mocking a SegmentReplicationcheckpointPublisher and testing on index shard if CheckpointRefreshListener is added to the InternalrefreshListerners List + */ + public void testCheckpointRefreshListener() throws IOException { + final SegmentReplicationCheckpointPublisher mock = mock(SegmentReplicationCheckpointPublisher.class); + IndexShard shard = newStartedShard(p -> newShard(mock), true); + List refreshListeners = shard.getEngine().config().getInternalRefreshListener(); + assertTrue(refreshListeners.stream().anyMatch(e -> e instanceof CheckpointRefreshListener)); + closeShards(shard); + } + + /** + * here we are passing null in place of SegmentReplicationCheckpointPublisher and testing on index shard if CheckpointRefreshListener is not added to the InternalrefreshListerners List + */ + public void testCheckpointRefreshListenerWithNull() throws IOException { + IndexShard shard = newStartedShard(p -> newShard(null), true); + List refreshListeners = shard.getEngine().config().getInternalRefreshListener(); + assertFalse(refreshListeners.stream().anyMatch(e -> e instanceof CheckpointRefreshListener)); + closeShards(shard); + } + + /** + * creates a new initializing shard. The shard will will be put in its proper path under the + * current node id the shard is assigned to. + * @param checkpointPublisher Segment Replication Checkpoint Publisher to publish checkpoint + */ + private IndexShard newShard(SegmentReplicationCheckpointPublisher checkpointPublisher) throws IOException { + final ShardId shardId = new ShardId("index", "_na_", 0); + final ShardRouting shardRouting = TestShardRouting.newShardRouting( + shardId, + randomAlphaOfLength(10), + true, + ShardRoutingState.INITIALIZING, + RecoverySource.EmptyStoreRecoverySource.INSTANCE + ); + final NodeEnvironment.NodePath nodePath = new NodeEnvironment.NodePath(createTempDir()); + ShardPath shardPath = new ShardPath(false, nodePath.resolve(shardId), nodePath.resolve(shardId), shardId); + + Settings indexSettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, "SEGMENT") + .put(IndexSettings.INDEX_SOFT_DELETES_RETENTION_OPERATIONS_SETTING.getKey(), between(0, 1000)) + .put(Settings.EMPTY) + .build(); + IndexMetadata metadata = IndexMetadata.builder(shardRouting.getIndexName()) + .settings(indexSettings) + .primaryTerm(0, primaryTerm) + .putMapping("{ \"properties\": {} }") + .build(); + return newShard( + shardRouting, + shardPath, + metadata, + null, + null, + new InternalEngineFactory(), + new EngineConfigFactory(new IndexSettings(metadata, metadata.getSettings())), + () -> {}, + RetentionLeaseSyncer.EMPTY, + EMPTY_EVENT_LISTENER, + checkpointPublisher + ); + } + public void testIndexCheckOnStartup() throws Exception { final IndexShard indexShard = newStartedShard(true); diff --git a/server/src/test/java/org/opensearch/indices/IndicesLifecycleListenerSingleNodeTests.java b/server/src/test/java/org/opensearch/indices/IndicesLifecycleListenerSingleNodeTests.java index 5f3d03f85f324..0989bf869f18e 100644 --- a/server/src/test/java/org/opensearch/indices/IndicesLifecycleListenerSingleNodeTests.java +++ b/server/src/test/java/org/opensearch/indices/IndicesLifecycleListenerSingleNodeTests.java @@ -49,6 +49,7 @@ import org.opensearch.index.shard.ShardId; import org.opensearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.test.OpenSearchSingleNodeTestCase; import java.util.Arrays; @@ -148,7 +149,12 @@ public void afterIndexRemoved(Index index, IndexSettings indexSettings, IndexRem newRouting = newRouting.moveToUnassigned(unassignedInfo) .updateUnassigned(unassignedInfo, RecoverySource.EmptyStoreRecoverySource.INSTANCE); newRouting = ShardRoutingHelper.initialize(newRouting, nodeId); - IndexShard shard = index.createShard(newRouting, s -> {}, RetentionLeaseSyncer.EMPTY); + IndexShard shard = index.createShard( + newRouting, + s -> {}, + RetentionLeaseSyncer.EMPTY, + SegmentReplicationCheckpointPublisher.EMPTY + ); IndexShardTestCase.updateRoutingEntry(shard, newRouting); assertEquals(5, counter.get()); final DiscoveryNode localNode = new DiscoveryNode( diff --git a/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java b/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java index 97cb1dc341b13..0619e3e3f62a2 100644 --- a/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java +++ b/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java @@ -59,6 +59,7 @@ import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.repositories.RepositoriesService; import org.opensearch.test.OpenSearchTestCase; @@ -253,6 +254,7 @@ public MockIndexService indexService(Index index) { @Override public MockIndexShard createShard( final ShardRouting shardRouting, + final SegmentReplicationCheckpointPublisher checkpointPublisher, final PeerRecoveryTargetService recoveryTargetService, final RecoveryListener recoveryListener, final RepositoriesService repositoriesService, diff --git a/server/src/test/java/org/opensearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java b/server/src/test/java/org/opensearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java index 67d69a5dabde1..5bac40ab64d11 100644 --- a/server/src/test/java/org/opensearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java +++ b/server/src/test/java/org/opensearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java @@ -66,6 +66,7 @@ import org.opensearch.index.shard.PrimaryReplicaSyncer; import org.opensearch.index.shard.ShardId; import org.opensearch.indices.recovery.PeerRecoveryTargetService; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.repositories.RepositoriesService; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -568,6 +569,7 @@ private IndicesClusterStateService createIndicesClusterStateService( indicesService, clusterService, threadPool, + SegmentReplicationCheckpointPublisher.EMPTY, recoveryTargetService, shardStateAction, null, diff --git a/server/src/test/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointActionTests.java b/server/src/test/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointActionTests.java new file mode 100644 index 0000000000000..b54237a130431 --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointActionTests.java @@ -0,0 +1,159 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication.checkpoint; + +import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.ActionTestUtils; +import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.action.support.replication.TransportReplicationAction; +import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.internal.io.IOUtils; +import org.opensearch.index.Index; +import org.opensearch.index.IndexService; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.ShardId; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.recovery.RecoverySettings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.CapturingTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.Collections; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; + +public class PublishCheckpointActionTests extends OpenSearchTestCase { + + private ThreadPool threadPool; + private CapturingTransport transport; + private ClusterService clusterService; + private TransportService transportService; + private ShardStateAction shardStateAction; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool(getClass().getName()); + transport = new CapturingTransport(); + clusterService = createClusterService(threadPool); + transportService = transport.createTransportService( + clusterService.getSettings(), + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundAddress -> clusterService.localNode(), + null, + Collections.emptySet() + ); + transportService.start(); + transportService.acceptIncomingRequests(); + shardStateAction = new ShardStateAction(clusterService, transportService, null, null, threadPool); + } + + @Override + public void tearDown() throws Exception { + try { + IOUtils.close(transportService, clusterService, transport); + } finally { + terminate(threadPool); + } + super.tearDown(); + } + + public void testPublishCheckpointActionOnPrimary() throws InterruptedException { + final IndicesService indicesService = mock(IndicesService.class); + + final Index index = new Index("index", "uuid"); + final IndexService indexService = mock(IndexService.class); + when(indicesService.indexServiceSafe(index)).thenReturn(indexService); + + final int id = randomIntBetween(0, 4); + final IndexShard indexShard = mock(IndexShard.class); + when(indexService.getShard(id)).thenReturn(indexShard); + + final ShardId shardId = new ShardId(index, id); + when(indexShard.shardId()).thenReturn(shardId); + + final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, clusterService.getClusterSettings()); + + final PublishCheckpointAction action = new PublishCheckpointAction( + Settings.EMPTY, + transportService, + clusterService, + indicesService, + threadPool, + shardStateAction, + new ActionFilters(Collections.emptySet()) + ); + + final ReplicationCheckpoint checkpoint = new ReplicationCheckpoint(indexShard.shardId(), 1111, 111, 11, 1); + + final PublishCheckpointRequest request = new PublishCheckpointRequest(checkpoint); + + action.shardOperationOnPrimary(request, indexShard, ActionTestUtils.assertNoFailureListener(result -> { + // we should forward the request containing the current publish checkpoint to the replica + assertThat(result.replicaRequest(), sameInstance(request)); + })); + + } + + public void testPublishCheckpointActionOnReplica() { + final IndicesService indicesService = mock(IndicesService.class); + + final Index index = new Index("index", "uuid"); + final IndexService indexService = mock(IndexService.class); + when(indicesService.indexServiceSafe(index)).thenReturn(indexService); + + final int id = randomIntBetween(0, 4); + final IndexShard indexShard = mock(IndexShard.class); + when(indexService.getShard(id)).thenReturn(indexShard); + + final ShardId shardId = new ShardId(index, id); + when(indexShard.shardId()).thenReturn(shardId); + + final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, clusterService.getClusterSettings()); + + final PublishCheckpointAction action = new PublishCheckpointAction( + Settings.EMPTY, + transportService, + clusterService, + indicesService, + threadPool, + shardStateAction, + new ActionFilters(Collections.emptySet()) + ); + + final ReplicationCheckpoint checkpoint = new ReplicationCheckpoint(indexShard.shardId(), 1111, 111, 11, 1); + + final PublishCheckpointRequest request = new PublishCheckpointRequest(checkpoint); + + final PlainActionFuture listener = PlainActionFuture.newFuture(); + action.shardOperationOnReplica(request, indexShard, listener); + final TransportReplicationAction.ReplicaResult result = listener.actionGet(); + + // onNewCheckpoint should be called on shard with checkpoint request + verify(indexShard).onNewCheckpoint(request); + + // the result should indicate success + final AtomicBoolean success = new AtomicBoolean(); + result.runPostReplicaActions(ActionListener.wrap(r -> success.set(true), e -> fail(e.toString()))); + assertTrue(success.get()); + + } + +} diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 7732d5c868c36..d24016922651c 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -182,6 +182,7 @@ import org.opensearch.indices.recovery.PeerRecoverySourceService; import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoverySettings; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.ingest.IngestService; import org.opensearch.monitor.StatusInfo; import org.opensearch.node.ResponseCollectorService; @@ -1871,7 +1872,8 @@ public void onFailure(final Exception e) { shardStateAction, actionFilters ), - RetentionLeaseSyncer.EMPTY + RetentionLeaseSyncer.EMPTY, + SegmentReplicationCheckpointPublisher.EMPTY ); Map actions = new HashMap<>(); final SystemIndices systemIndices = new SystemIndices(emptyMap()); diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 298fdcaea6465..371fa6d102304 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -94,6 +94,7 @@ import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; import org.opensearch.indices.recovery.StartRecoveryRequest; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.repositories.IndexId; @@ -411,6 +412,47 @@ protected IndexShard newShard( ); } + /** + * creates a new initializing shard. The shard will will be put in its proper path under the + * current node id the shard is assigned to. + * @param routing shard routing to use + * @param shardPath path to use for shard data + * @param indexMetadata indexMetadata for the shard, including any mapping + * @param storeProvider an optional custom store provider to use. If null a default file based store will be created + * @param indexReaderWrapper an optional wrapper to be used during search + * @param globalCheckpointSyncer callback for syncing global checkpoints + * @param indexEventListener index event listener + * @param listeners an optional set of listeners to add to the shard + */ + protected IndexShard newShard( + ShardRouting routing, + ShardPath shardPath, + IndexMetadata indexMetadata, + @Nullable CheckedFunction storeProvider, + @Nullable CheckedFunction indexReaderWrapper, + @Nullable EngineFactory engineFactory, + @Nullable EngineConfigFactory engineConfigFactory, + Runnable globalCheckpointSyncer, + RetentionLeaseSyncer retentionLeaseSyncer, + IndexEventListener indexEventListener, + IndexingOperationListener... listeners + ) throws IOException { + return newShard( + routing, + shardPath, + indexMetadata, + storeProvider, + indexReaderWrapper, + engineFactory, + engineConfigFactory, + globalCheckpointSyncer, + retentionLeaseSyncer, + indexEventListener, + SegmentReplicationCheckpointPublisher.EMPTY, + listeners + ); + } + /** * creates a new initializing shard. * @param routing shard routing to use @@ -420,6 +462,7 @@ protected IndexShard newShard( * @param indexReaderWrapper an optional wrapper to be used during search * @param globalCheckpointSyncer callback for syncing global checkpoints * @param indexEventListener index event listener + * @param checkpointPublisher segment Replication Checkpoint Publisher to publish checkpoint * @param listeners an optional set of listeners to add to the shard */ protected IndexShard newShard( @@ -433,6 +476,7 @@ protected IndexShard newShard( Runnable globalCheckpointSyncer, RetentionLeaseSyncer retentionLeaseSyncer, IndexEventListener indexEventListener, + SegmentReplicationCheckpointPublisher checkpointPublisher, IndexingOperationListener... listeners ) throws IOException { final Settings nodeSettings = Settings.builder().put("node.name", routing.currentNodeId()).build(); @@ -480,7 +524,8 @@ protected IndexShard newShard( Arrays.asList(listeners), globalCheckpointSyncer, retentionLeaseSyncer, - breakerService + breakerService, + checkpointPublisher ); indexShard.addShardFailureCallback(DEFAULT_SHARD_FAILURE_HANDLER); success = true;