opensearch-project · Poojita-Raj · May 22, 2023 · May 23, 2023 · May 25, 2023 · May 30, 2023
@@ -101,6 +101,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Added @dbwiddis as on OpenSearch maintainer ([#7665](https://github.com/opensearch-project/OpenSearch/pull/7665))
 - [Extensions] Add ExtensionAwarePlugin extension point to add custom settings for extensions ([#7526](https://github.com/opensearch-project/OpenSearch/pull/7526))
 - Add new cluster setting to set default index replication type ([#7420](https://github.com/opensearch-project/OpenSearch/pull/7420))
+- [Segment Replication] Rolling upgrade support for default codecs ([#7698](https://github.com/opensearch-project/OpenSearch/pull/7698))
 
 ### Dependencies
 - Bump `com.netflix.nebula:gradle-info-plugin` from 12.0.0 to 12.1.3 (#7564)
@@ -155,4 +156,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 ### Security
 
 [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD
-[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.7...2.x
+[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.7...2.x
@@ -207,6 +207,18 @@ public boolean blocksChanged() {
         return state.blocks() != previousState.blocks();
     }
 
+    /**
+     * Returns <code>true</code> if a version upgrade has taken place in the cluster
+     */
+    public boolean clusterUpgraded() {
+        // previous state was mixed version cluster and current state is not - which indicates upgrade is completed
+        if ((previousState.nodes().getMinNodeVersion() != previousState.nodes().getMaxNodeVersion())
+            && (state.nodes().getMinNodeVersion() == state.nodes().getMaxNodeVersion())) {
+            return true;
+        }
+        return false;
+    }
+
     /**
      * Returns <code>true</code> iff the local node is the mater node of the cluster.
      */

@@ -36,10 +36,13 @@
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.lucene95.Lucene95Codec;
 import org.apache.lucene.codecs.lucene95.Lucene95Codec.Mode;
+import org.opensearch.Version;
 import org.opensearch.common.Nullable;
 import org.opensearch.common.collect.MapBuilder;
 import org.opensearch.index.mapper.MapperService;
 
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.Map;
 
 /**
@@ -58,8 +61,11 @@ public class CodecService {
     public static final String BEST_COMPRESSION_CODEC = "best_compression";
     /** the raw unfiltered lucene default. useful for testing */
     public static final String LUCENE_DEFAULT_CODEC = "lucene_default";
+    static Map<Version, String> versionStringMap = new HashMap<>();
+    public static Map<Version, String> opensearchVersionToLuceneCodec;
 
     public CodecService(@Nullable MapperService mapperService, Logger logger) {
+        loadMap();
         final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
         if (mapperService == null) {
             codecs.put(DEFAULT_CODEC, new Lucene95Codec());
@@ -75,6 +81,14 @@ public CodecService(@Nullable MapperService mapperService, Logger logger) {
         this.codecs = codecs.immutableMap();
     }
 
+    public void loadMap() {
+        versionStringMap.put(Version.fromString("3.0.0"), "Lucene95");
+        versionStringMap.put(Version.fromString("2.8.0"), "Lucene95");
+        versionStringMap.put(Version.fromString("2.7.1"), "Lucene95");
+        versionStringMap.put(Version.fromString("2.7.0"), "Lucene95");
+        opensearchVersionToLuceneCodec = Collections.unmodifiableMap(new HashMap<>(versionStringMap));
+    }
+
     public Codec codec(String name) {
         Codec codec = codecs.get(name);
         if (codec == null) {

@@ -40,6 +40,7 @@
 import org.apache.lucene.search.ReferenceManager;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.similarities.Similarity;
+import org.opensearch.Version;
 import org.opensearch.common.Nullable;
 import org.opensearch.common.settings.Setting;
 import org.opensearch.common.settings.Setting.Property;
@@ -81,7 +82,7 @@ public final class EngineConfig {
     private final TranslogDeletionPolicyFactory translogDeletionPolicyFactory;
     private volatile boolean enableGcDeletes = true;
     private final TimeValue flushMergesAfter;
-    private final String codecName;
+    private String codecName;
     private final ThreadPool threadPool;
     private final Engine.Warmer warmer;
     private final Store store;
@@ -105,6 +106,7 @@ public final class EngineConfig {
     private final boolean isReadOnlyReplica;
     private final BooleanSupplier primaryModeSupplier;
     private final Comparator<LeafReader> leafSorter;
+    private Version clusterMinVersion;
 
     /**
      * A supplier of the outstanding retention leases. This is used during merged operations to determine which operations that have been
@@ -176,7 +178,8 @@ private EngineConfig(Builder builder) {
         this.similarity = builder.similarity;
         this.codecService = builder.codecService;
         this.eventListener = builder.eventListener;
-        codecName = builder.indexSettings.getValue(INDEX_CODEC_SETTING);
+        this.codecName = builder.indexSettings.getValue(INDEX_CODEC_SETTING);
+        this.clusterMinVersion = Version.CURRENT;
         // We need to make the indexing buffer for this shard at least as large
         // as the amount of memory that is available for all engines on the
         // local node so that decisions to flush segments to disk are made by
@@ -251,6 +254,44 @@ public Codec getCodec() {
         return codecService.codec(codecName);
     }
 
+    /**
+     * Returns the codec name of the lucene codec used for writing new segments
+     */
+    public String getCodecName() {
+        return getCodec().getName();
+    }
+
+    /**
+     * Sets the codec name of the lucene codec used for writing new segments
+     */
+    public void setCodecName(String name) {
+        this.codecName = name;
+    }
+
+    /**
+     * Returns the minimum opensearch version among all nodes of a cluster when upgrade is in progress and
+     * segment replication is enabled; null when upgrade not in progress.
+     */
+    public Version getClusterMinVersion() {
+        return clusterMinVersion;
+    }
+
+    /**
+     * Sets the minimum opensearch version among all nodes of a cluster when upgrade is in progress and
+     * segment replication is enabled.
+     */
+    public void setClusterMinVersion(Version clusterMinVersion) {
+        this.clusterMinVersion = clusterMinVersion;
+    }
+
+    /**
+     * Returns the BWC Codec{@link Codec} to be used in the engine during a rolling upgrade when
+     * cluster is in a mixed version state and segment replication is enabled {@link org.apache.lucene.index.IndexWriter}
+     */
+    public Codec getBWCCodec(String codecName) {
+        return codecService.codec(codecName);
+    }
+
     /**
      * Returns a thread-pool mainly used to get estimated time stamps from
      * {@link org.opensearch.threadpool.ThreadPool#relativeTimeInMillis()} and to schedule

@@ -8,18 +8,34 @@
 
 package org.opensearch.index.engine;
 
+import org.opensearch.cluster.node.DiscoveryNodes;
+import org.opensearch.cluster.service.ClusterService;
+import org.opensearch.index.codec.CodecService;
+
 /**
  * Engine Factory implementation used with Segment Replication that wires up replica shards with an ${@link NRTReplicationEngine}
  * and primary with an ${@link InternalEngine}
  *
  * @opensearch.internal
  */
 public class NRTReplicationEngineFactory implements EngineFactory {
+
+    private final ClusterService clusterService;
+
+    public NRTReplicationEngineFactory(ClusterService clusterService) {
+        this.clusterService = clusterService;
+    }
+
     @Override
     public Engine newReadWriteEngine(EngineConfig config) {
         if (config.isReadOnlyReplica()) {
             return new NRTReplicationEngine(config);
         }
+        if (clusterService != null) {
+            DiscoveryNodes nodes = this.clusterService.state().nodes();
+            config.setClusterMinVersion(nodes.getMinNodeVersion());
+            config.setCodecName(config.getBWCCodec(CodecService.opensearchVersionToLuceneCodec.get(nodes.getMinNodeVersion())).getName());
+        }
         return new InternalEngine(config);
     }
 }
@@ -1553,7 +1553,8 @@ public Tuple<GatedCloseable<SegmentInfos>, ReplicationCheckpoint> getLatestSegme
                         shardRouting.primary()
                             ? store.getSegmentMetadataMap(segmentInfos).values().stream().mapToLong(StoreFileMetadata::length).sum()
                             : store.stats(StoreStats.UNKNOWN_RESERVED_BYTES).getSizeInBytes(),
-                        getEngine().config().getCodec().getName()
+                        getEngine().config().getCodecName(),
+                        getEngine().config().getClusterMinVersion()
                     )
                 );
             } catch (IOException e) {
@@ -1787,7 +1788,7 @@ static Engine.Searcher wrapSearcher(
     }
 
     /**
-     * Used with segment replication during relocation handoff, this method updates current read only engine to global
+     * Used with segment replication during relocation handoff and rolling upgrades, this method updates current read only engine to global
      * checkpoint followed by changing to writeable engine
      *
      * @throws IOException if communication failed
@@ -1796,7 +1797,7 @@ static Engine.Searcher wrapSearcher(
      *
      * @opensearch.internal
      */
-    public void resetToWriteableEngine() throws IOException, InterruptedException, TimeoutException {
+    public void resetEngine() throws IOException, InterruptedException, TimeoutException {
         indexShardOperationPermits.blockOperations(30, TimeUnit.MINUTES, () -> { resetEngineToGlobalCheckpoint(); });
     }
 
@@ -3277,7 +3278,7 @@ private void doCheckIndex() throws IOException {
         recoveryState.getVerifyIndex().checkIndexTime(Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - timeNS)));
     }
 
-    Engine getEngine() {
+    public Engine getEngine() {
         Engine engine = getEngineOrNull();
         if (engine == null) {
             throw new AlreadyClosedException("engine is closed");

@@ -851,7 +851,7 @@ private EngineFactory getEngineFactory(final IndexSettings idxSettings) {
                 return config -> new ReadOnlyEngine(config, new SeqNoStats(0, 0, 0), new TranslogStats(), true, Function.identity(), false);
             }
             if (idxSettings.isSegRepEnabled()) {
-                return new NRTReplicationEngineFactory();
+                return new NRTReplicationEngineFactory(clusterService);
             }
             return new InternalEngineFactory();
         } else if (engineFactories.size() == 1) {

@@ -11,11 +11,13 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.message.ParameterizedMessage;
+import org.apache.lucene.store.AlreadyClosedException;
 import org.opensearch.action.support.ChannelActionListener;
 import org.opensearch.cluster.ClusterChangedEvent;
 import org.opensearch.cluster.ClusterStateListener;
 import org.opensearch.cluster.metadata.IndexMetadata;
 import org.opensearch.cluster.node.DiscoveryNode;
+import org.opensearch.cluster.node.DiscoveryNodes;
 import org.opensearch.cluster.routing.ShardRouting;
 import org.opensearch.cluster.service.ClusterService;
 import org.opensearch.common.Nullable;
@@ -37,7 +39,9 @@
 import org.opensearch.transport.TransportService;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 
@@ -170,6 +174,33 @@ public void clusterChanged(ClusterChangedEvent event) {
                 }
             }
         }
+        if (event.clusterUpgraded()) {
+            List<IndexShard> indexShardList = new ArrayList<>();
+            DiscoveryNodes nodes = event.state().nodes();
+            for (IndexService indexService : indicesService) {
+                if (indexService.getIndexSettings().isSegRepEnabled() && (indexService.getIndexSettings().getNumberOfReplicas() > 0)) {
+                    for (IndexShard indexShard : indexService) {
+                        try {
+                            if (indexShard.routingEntry().primary()
+                                && (indexShard.getEngine().config().getClusterMinVersion() != nodes.getMaxNodeVersion())) {
+                                indexShardList.add(indexShard);
+                            }
+                        } catch (AlreadyClosedException e) {
+                            logger.warn("Index shard [{}] engine is already closed.", indexShard.shardId());
+                        }
+                    }
+                }
+            }
+            try {
+                if (indexShardList.isEmpty() == false) {
+                    for (IndexShard indexShard : indexShardList) {
+                        indexShard.resetEngine();
+                    }
+                }
+            } catch (Exception e) {
+                logger.error("Received unexpected exception: [{}]", e.getMessage());
+            }
+        }
     }
 
     @Override

@@ -12,6 +12,7 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.message.ParameterizedMessage;
 import org.opensearch.BaseExceptionsHelper;
+import org.opensearch.Version;
 import org.opensearch.action.ActionListener;
 import org.opensearch.cluster.routing.ShardRouting;
 import org.opensearch.common.Nullable;
@@ -227,6 +228,19 @@ public synchronized void onNewCheckpoint(final ReplicationCheckpoint receivedChe
                 }
             }
             final Thread thread = Thread.currentThread();
+            Version localNodeVersion = Version.CURRENT;
+            // if replica's OS version is not on or after primary version, then can ignore checkpoint
+            if (localNodeVersion.onOrAfter(receivedCheckpoint.getMinVersion()) == false) {
+                logger.trace(
+                    () -> new ParameterizedMessage(
+                        "Ignoring checkpoint, shard not started {} {}\n Shard does not support the received lucene codec version {}",
+                        receivedCheckpoint,
+                        replicaShard.state(),
+                        receivedCheckpoint.getCodec()
-                    () -> new ParameterizedMessage(
-                        "Ignoring checkpoint, shard not started {} {}\n Shard does not support the received lucene codec version {}",
-                        receivedCheckpoint,
-                        replicaShard.state(),
-                        receivedCheckpoint.getCodec()
+() -> new ParameterizedMessage(
+                        "Ignoring checkpoint {} as shard does not support the received lucene codec version {}",
+                        receivedCheckpoint,
+                        receivedCheckpoint.getCodec()
-                    () -> new ParameterizedMessage(
-                        "Ignoring checkpoint, shard not started {} {}\n Shard does not support the received lucene codec version {}",
-                        receivedCheckpoint,
-                        replicaShard.state(),
-                        receivedCheckpoint.getCodec()
+() -> new ParameterizedMessage(
+                        "Ignoring checkpoint {} as shard does not support the received lucene codec version {}",
+                        receivedCheckpoint,
+                        receivedCheckpoint.getCodec()
+                    )
+                );
+                return;
+            }
             if (replicaShard.shouldProcessCheckpoint(receivedCheckpoint)) {
                 startReplication(replicaShard, new SegmentReplicationListener() {
                     @Override
@@ -435,7 +449,7 @@ public void onReplicationDone(SegmentReplicationState state) {
                     try {
                         // Promote engine type for primary target
                         if (indexShard.recoveryState().getPrimary() == true) {
-                            indexShard.resetToWriteableEngine();
+                            indexShard.resetEngine();
                         }
                         channel.sendResponse(TransportResponse.Empty.INSTANCE);
                     } catch (InterruptedException | TimeoutException | IOException e) {