From 90310c319336820732a8eeb6d81776fbfa6e6d48 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Wed, 6 Sep 2023 13:05:53 -0700 Subject: [PATCH] Introducing ZStd compression codec plugin (#9658) (#9832) * introducing zstd compression codec plugin * Moving zstd compression codec as a plugin * introducing zstd compression codec plugin * Adding checks to EngineConfig and fixing tests * incorporating review comments * fixing tests * introducing zstd compression codec plugin * addressing review comments * nit fixes * implementing codec aliases * addressing review comments * review comments * moving codec aliases to custom codec * adding zstd default codec for backward compatibility * renaming to deprecated codec * incorporating review comments * nit fixes --------- (cherry picked from commit 76f1b52647cf19bd19e3ce9d0759953b5d376593) Signed-off-by: Sarthak Aggarwal Signed-off-by: Prabhakar Sithanandam Signed-off-by: Andrew Ross Co-authored-by: Sarthak Aggarwal Co-authored-by: Prabhakar Sithanandam --- CHANGELOG.md | 1 + modules/reindex/build.gradle | 2 + .../index/codec/MultiCodecReindexIT.java | 10 + plugins/custom-codecs/build.gradle | 27 ++ .../index/codec/CodecCompressionLevelIT.java | 16 +- .../index/codec/MultiCodecMergeIT.java | 9 + .../codec/customcodecs/CustomCodecPlugin.java | 47 ++++ .../customcodecs/CustomCodecService.java | 72 +++++ .../CustomCodecServiceFactory.java | 27 ++ .../customcodecs/Lucene95CustomCodec.java | 57 +++- .../Lucene95CustomStoredFieldsFormat.java | 4 + .../index/codec/customcodecs/ZstdCodec.java | 17 +- .../customcodecs/ZstdCompressionMode.java | 0 .../customcodecs/ZstdDeprecatedCodec.java | 61 +++++ .../codec/customcodecs/ZstdNoDictCodec.java | 17 +- .../ZstdNoDictCompressionMode.java | 0 .../codec/customcodecs/package-info.java | 0 .../plugin-metadata/plugin-security.policy | 11 + .../services/org.apache.lucene.codecs.Codec | 1 + .../customcodecs/AbstractCompressorTests.java | 0 .../codec/customcodecs/CustomCodecTests.java | 250 ++++++++++++++++++ ...Lucene95CustomStoredFieldsFormatTests.java | 0 .../customcodecs/ZstdCompressorTests.java | 0 .../ZstdNoDictCompressorTests.java | 0 .../index/codec/ZstdNotEnabledIT.java | 41 +++ .../opensearch/index/codec/CodecAliases.java | 32 +++ .../opensearch/index/codec/CodecService.java | 19 -- .../opensearch/index/engine/EngineConfig.java | 41 ++- .../opensearch/index/codec/CodecTests.java | 105 -------- .../test/OpenSearchIntegTestCase.java | 4 +- 30 files changed, 724 insertions(+), 147 deletions(-) create mode 100644 plugins/custom-codecs/build.gradle rename {server => plugins/custom-codecs}/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java (89%) rename {server => plugins/custom-codecs}/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java (95%) create mode 100644 plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java create mode 100644 plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java create mode 100644 plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecServiceFactory.java rename {server => plugins/custom-codecs}/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java (54%) rename {server => plugins/custom-codecs}/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java (98%) rename {server => plugins/custom-codecs}/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java (79%) rename {server => plugins/custom-codecs}/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java (100%) create mode 100644 plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdDeprecatedCodec.java rename {server => plugins/custom-codecs}/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java (78%) rename {server => plugins/custom-codecs}/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java (100%) rename {server => plugins/custom-codecs}/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java (100%) create mode 100644 plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy rename {server => plugins/custom-codecs}/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec (63%) rename {server => plugins/custom-codecs}/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java (100%) create mode 100644 plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java rename {server => plugins/custom-codecs}/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java (100%) rename {server => plugins/custom-codecs}/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java (100%) rename {server => plugins/custom-codecs}/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java (100%) create mode 100644 server/src/internalClusterTest/java/org/opensearch/index/codec/ZstdNotEnabledIT.java create mode 100644 server/src/main/java/org/opensearch/index/codec/CodecAliases.java diff --git a/CHANGELOG.md b/CHANGELOG.md index b0e0b35cb90d4..6ec26906f3f5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -117,6 +117,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add support for Remote Translog Store stats in `_remotestore/stats/` API ([#9263](https://github.com/opensearch-project/OpenSearch/pull/9263)) - Removing the vec file extension from INDEX_STORE_HYBRID_NIO_EXTENSIONS, to ensure the no performance degradation for vector search via Lucene Engine.([#9528](https://github.com/opensearch-project/OpenSearch/pull/9528))) - Cleanup Unreferenced file on segment merge failure ([#9503](https://github.com/opensearch-project/OpenSearch/pull/9503)) +- Move ZStd to a plugin ([#9658](https://github.com/opensearch-project/OpenSearch/pull/9658)) ### Deprecated diff --git a/modules/reindex/build.gradle b/modules/reindex/build.gradle index 37526a924da73..bb04fffc4f03a 100644 --- a/modules/reindex/build.gradle +++ b/modules/reindex/build.gradle @@ -69,6 +69,7 @@ dependencies { testImplementation project(':modules:transport-netty4') // for parent/child testing testImplementation project(':modules:parent-join') + testImplementation project(':plugins:custom-codecs') } restResources { @@ -95,4 +96,5 @@ forbiddenPatterns { tasks.named("bundlePlugin").configure { dependsOn("copyParentJoinMetadata") dependsOn("copyTransportNetty4Metadata") + dependsOn("copyCustomCodecsMetadata") } diff --git a/modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java b/modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java index 7c2fe8d99c330..e1deb5f2457d9 100644 --- a/modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java +++ b/modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java @@ -15,14 +15,19 @@ import org.opensearch.action.support.ActiveShardCount; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.customcodecs.CustomCodecPlugin; import org.opensearch.index.engine.Segment; import org.opensearch.index.reindex.BulkByScrollResponse; import org.opensearch.index.reindex.ReindexAction; +import org.opensearch.index.reindex.ReindexPlugin; import org.opensearch.index.reindex.ReindexRequestBuilder; import org.opensearch.index.reindex.ReindexTestCase; +import org.opensearch.plugins.Plugin; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.UUID; import java.util.concurrent.ExecutionException; @@ -40,6 +45,11 @@ public class MultiCodecReindexIT extends ReindexTestCase { + @Override + protected Collection> nodePlugins() { + return List.of(CustomCodecPlugin.class, ReindexPlugin.class); + } + public void testReindexingMultipleCodecs() throws InterruptedException, ExecutionException { internalCluster().ensureAtLeastNumDataNodes(1); Map codecMap = Map.of( diff --git a/plugins/custom-codecs/build.gradle b/plugins/custom-codecs/build.gradle new file mode 100644 index 0000000000000..253822e88b817 --- /dev/null +++ b/plugins/custom-codecs/build.gradle @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +apply plugin: 'opensearch.opensearchplugin' +apply plugin: 'opensearch.internal-cluster-test' + +opensearchplugin { + name 'custom-codecs' + description 'A plugin that implements custom compression codecs.' + classname 'org.opensearch.index.codec.customcodecs.CustomCodecPlugin' + licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt') + noticeFile rootProject.file('NOTICE.txt') +} + +dependencies { + api "com.github.luben:zstd-jni:1.5.5-5" +} + +testingConventions.enabled = false; diff --git a/server/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java b/plugins/custom-codecs/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java similarity index 89% rename from server/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java rename to plugins/custom-codecs/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java index 5f3e53f1454fc..9bef3e2d3d235 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java +++ b/plugins/custom-codecs/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java @@ -12,14 +12,24 @@ import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.customcodecs.CustomCodecPlugin; +import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; +import java.util.Collection; +import java.util.Collections; import java.util.concurrent.ExecutionException; +import static org.opensearch.index.codec.customcodecs.CustomCodecService.ZSTD_CODEC; +import static org.opensearch.index.codec.customcodecs.CustomCodecService.ZSTD_NO_DICT_CODEC; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST) public class CodecCompressionLevelIT extends OpenSearchIntegTestCase { + @Override + protected Collection> nodePlugins() { + return Collections.singletonList(CustomCodecPlugin.class); + } public void testLuceneCodecsCreateIndexWithCompressionLevel() { @@ -62,7 +72,7 @@ public void testZStandardCodecsCreateIndexWithCompressionLevel() { Settings.builder() .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) - .put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC)) + .put("index.codec", randomFrom(ZSTD_CODEC, ZSTD_NO_DICT_CODEC)) .put("index.codec.compression_level", randomIntBetween(1, 6)) .build() ); @@ -81,7 +91,7 @@ public void testZStandardToLuceneCodecsWithCompressionLevel() throws ExecutionEx Settings.builder() .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) - .put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC)) + .put("index.codec", randomFrom(ZSTD_CODEC, ZSTD_NO_DICT_CODEC)) .put("index.codec.compression_level", randomIntBetween(1, 6)) .build() ); @@ -164,7 +174,7 @@ public void testLuceneToZStandardCodecsWithCompressionLevel() throws ExecutionEx .updateSettings( new UpdateSettingsRequest(index).settings( Settings.builder() - .put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC)) + .put("index.codec", randomFrom(ZSTD_CODEC, ZSTD_NO_DICT_CODEC)) .put("index.codec.compression_level", randomIntBetween(1, 6)) ) ) diff --git a/server/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java b/plugins/custom-codecs/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java similarity index 95% rename from server/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java rename to plugins/custom-codecs/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java index 23d5cc3b35486..82633513a2f50 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java +++ b/plugins/custom-codecs/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java @@ -15,11 +15,15 @@ import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.customcodecs.CustomCodecPlugin; import org.opensearch.index.engine.Segment; +import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.UUID; @@ -40,6 +44,11 @@ @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST) public class MultiCodecMergeIT extends OpenSearchIntegTestCase { + @Override + protected Collection> nodePlugins() { + return Collections.singletonList(CustomCodecPlugin.class); + } + public void testForceMergeMultipleCodecs() throws ExecutionException, InterruptedException { Map codecMap = Map.of( diff --git a/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java new file mode 100644 index 0000000000000..91a13a1d924a2 --- /dev/null +++ b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodecs; + +import org.opensearch.index.IndexSettings; +import org.opensearch.index.codec.CodecServiceFactory; +import org.opensearch.index.engine.EngineConfig; +import org.opensearch.plugins.EnginePlugin; +import org.opensearch.plugins.Plugin; + +import java.util.Optional; + +/** + * A plugin that implements custom codecs. Supports these codecs: + *
    + *
  • ZSTD + *
  • ZSTDNODICT + *
+ * + * @opensearch.internal + */ +public final class CustomCodecPlugin extends Plugin implements EnginePlugin { + + /** + * Creates a new instance + */ + public CustomCodecPlugin() {} + + /** + * @param indexSettings is the default indexSettings + * @return the engine factory + */ + @Override + public Optional getCustomCodecServiceFactory(final IndexSettings indexSettings) { + String codecName = indexSettings.getValue(EngineConfig.INDEX_CODEC_SETTING); + if (codecName.equals(CustomCodecService.ZSTD_NO_DICT_CODEC) || codecName.equals(CustomCodecService.ZSTD_CODEC)) { + return Optional.of(new CustomCodecServiceFactory()); + } + return Optional.empty(); + } +} diff --git a/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java new file mode 100644 index 0000000000000..de0eb2b3286d3 --- /dev/null +++ b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodecs; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.opensearch.common.collect.MapBuilder; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.codec.CodecService; +import org.opensearch.index.mapper.MapperService; + +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Stream; + +import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING; + +/** + * CustomCodecService provides ZSTD and ZSTD_NO_DICT compression codecs. + */ +public class CustomCodecService extends CodecService { + private final Map codecs; + /** + * ZStandard codec + */ + public static final String ZSTD_CODEC = "zstd"; + /** + * ZStandard without dictionary codec + */ + public static final String ZSTD_NO_DICT_CODEC = "zstd_no_dict"; + + /** + * Creates a new CustomCodecService. + * + * @param mapperService The mapper service. + * @param indexSettings The index settings. + * @param logger The logger. + */ + public CustomCodecService(MapperService mapperService, IndexSettings indexSettings, Logger logger) { + super(mapperService, indexSettings, logger); + int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING); + final MapBuilder codecs = MapBuilder.newMapBuilder(); + if (mapperService == null) { + codecs.put(ZSTD_CODEC, new ZstdCodec(compressionLevel)); + codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(compressionLevel)); + } else { + codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger, compressionLevel)); + codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger, compressionLevel)); + } + this.codecs = codecs.immutableMap(); + } + + @Override + public Codec codec(String name) { + Codec codec = codecs.get(name); + if (codec == null) { + return super.codec(name); + } + return codec; + } + + @Override + public String[] availableCodecs() { + return Stream.concat(Arrays.stream(super.availableCodecs()), codecs.keySet().stream()).toArray(String[]::new); + } +} diff --git a/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecServiceFactory.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecServiceFactory.java new file mode 100644 index 0000000000000..d634616162684 --- /dev/null +++ b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecServiceFactory.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodecs; + +import org.opensearch.index.codec.CodecService; +import org.opensearch.index.codec.CodecServiceConfig; +import org.opensearch.index.codec.CodecServiceFactory; + +/** + * A factory for creating new {@link CodecService} instance + */ +public class CustomCodecServiceFactory implements CodecServiceFactory { + + /** Creates a new instance. */ + public CustomCodecServiceFactory() {} + + @Override + public CodecService createCodecService(CodecServiceConfig config) { + return new CustomCodecService(config.getMapperService(), config.getIndexSettings(), config.getLogger()); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java similarity index 54% rename from server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java rename to plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java index 8aa422a47a073..89acc980abd2f 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java +++ b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java @@ -15,6 +15,9 @@ import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec; import org.opensearch.index.mapper.MapperService; +import java.util.Collections; +import java.util.Set; + /** * * Extends {@link FilterCodec} to reuse the functionality of Lucene Codec. @@ -23,12 +26,48 @@ * @opensearch.internal */ public abstract class Lucene95CustomCodec extends FilterCodec { + + /** Default compression level used for compression */ public static final int DEFAULT_COMPRESSION_LEVEL = 3; /** Each mode represents a compression algorithm. */ public enum Mode { - ZSTD, - ZSTD_NO_DICT + /** + * ZStandard mode with dictionary + */ + ZSTD("ZSTD", Set.of("zstd")), + /** + * ZStandard mode without dictionary + */ + ZSTD_NO_DICT("ZSTDNODICT", Set.of("zstd_no_dict")), + /** + * Deprecated ZStandard mode, added for backward compatibility to support indices created in 2.9.0 where + * both ZSTD and ZSTD_NO_DICT used Lucene95CustomCodec underneath. This should not be used to + * create new indices. + */ + ZSTD_DEPRECATED("Lucene95CustomCodec", Collections.emptySet()); + + private final String codec; + private final Set aliases; + + Mode(String codec, Set aliases) { + this.codec = codec; + this.aliases = aliases; + } + + /** + * Returns the Codec that is registered with Lucene + */ + public String getCodec() { + return codec; + } + + /** + * Returns the aliases of the Codec + */ + public Set getAliases() { + return aliases; + } } private final StoredFieldsFormat storedFieldsFormat; @@ -51,12 +90,22 @@ public Lucene95CustomCodec(Mode mode) { * @param compressionLevel The compression level. */ public Lucene95CustomCodec(Mode mode, int compressionLevel) { - super("Lucene95CustomCodec", new Lucene95Codec()); + super(mode.getCodec(), new Lucene95Codec()); this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); } + /** + * Creates a new compression codec with the given compression level. We use + * lowercase letters when registering the codec so that we remain consistent with + * the other compression codecs: default, lucene_default, and best_compression. + * + * @param mode The compression codec (ZSTD or ZSTDNODICT). + * @param compressionLevel The compression level. + * @param mapperService The mapper service. + * @param logger The logger. + */ public Lucene95CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) { - super("Lucene95CustomCodec", new PerFieldMappingPostingFormatCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, logger)); + super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, logger)); this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); } diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java similarity index 98% rename from server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java rename to plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java index 2816e2907a5f6..79d97035089ab 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java +++ b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java @@ -101,6 +101,7 @@ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOCo StoredFieldsFormat impl(Lucene95CustomCodec.Mode mode) { switch (mode) { case ZSTD: + case ZSTD_DEPRECATED: return new Lucene90CompressingStoredFieldsFormat( "CustomStoredFieldsZstd", zstdCompressionMode, @@ -125,6 +126,9 @@ Lucene95CustomCodec.Mode getMode() { return mode; } + /** + * Returns the compression level. + */ public int getCompressionLevel() { return compressionLevel; } diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java similarity index 79% rename from server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java rename to plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java index 042f7eaa29e53..a3e3a34a5d258 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java +++ b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java @@ -10,14 +10,17 @@ import org.apache.logging.log4j.Logger; import org.opensearch.common.settings.Setting; +import org.opensearch.index.codec.CodecAliases; import org.opensearch.index.codec.CodecSettings; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperService; +import java.util.Set; + /** * ZstdCodec provides ZSTD compressor using the zstd-jni library. */ -public class ZstdCodec extends Lucene95CustomCodec implements CodecSettings { +public class ZstdCodec extends Lucene95CustomCodec implements CodecSettings, CodecAliases { /** * Creates a new ZstdCodec instance with the default compression level. @@ -35,6 +38,13 @@ public ZstdCodec(int compressionLevel) { super(Mode.ZSTD, compressionLevel); } + /** + * Creates a new ZstdCodec instance. + * + * @param mapperService The mapper service. + * @param logger The logger. + * @param compressionLevel The compression level. + */ public ZstdCodec(MapperService mapperService, Logger logger, int compressionLevel) { super(Mode.ZSTD, compressionLevel, mapperService, logger); } @@ -49,4 +59,9 @@ public String toString() { public boolean supports(Setting setting) { return setting.equals(EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING); } + + @Override + public Set aliases() { + return Mode.ZSTD.getAliases(); + } } diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java similarity index 100% rename from server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java rename to plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java diff --git a/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdDeprecatedCodec.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdDeprecatedCodec.java new file mode 100644 index 0000000000000..02fa386db97b3 --- /dev/null +++ b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdDeprecatedCodec.java @@ -0,0 +1,61 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodecs; + +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.Setting; +import org.opensearch.index.codec.CodecSettings; +import org.opensearch.index.engine.EngineConfig; +import org.opensearch.index.mapper.MapperService; + +/** + * ZstdDeprecatedCodec provides ZSTD compressor using the zstd-jni library. + * Added to support backward compatibility for indices created with Lucene95CustomCodec as codec name. + */ +@Deprecated(since = "2.10") +public class ZstdDeprecatedCodec extends Lucene95CustomCodec implements CodecSettings { + + /** + * Creates a new ZstdDefaultCodec instance with the default compression level. + */ + public ZstdDeprecatedCodec() { + this(DEFAULT_COMPRESSION_LEVEL); + } + + /** + * Creates a new ZstdDefaultCodec instance. + * + * @param compressionLevel The compression level. + */ + public ZstdDeprecatedCodec(int compressionLevel) { + super(Mode.ZSTD_DEPRECATED, compressionLevel); + } + + /** + * Creates a new ZstdDefaultCodec instance. + * + * @param mapperService The mapper service. + * @param logger The logger. + * @param compressionLevel The compression level. + */ + public ZstdDeprecatedCodec(MapperService mapperService, Logger logger, int compressionLevel) { + super(Mode.ZSTD_DEPRECATED, compressionLevel, mapperService, logger); + } + + /** The name for this codec. */ + @Override + public String toString() { + return getClass().getSimpleName(); + } + + @Override + public boolean supports(Setting setting) { + return setting.equals(EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java similarity index 78% rename from server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java rename to plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java index a7e8e0e42ee68..ea7351f755361 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java +++ b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java @@ -10,14 +10,17 @@ import org.apache.logging.log4j.Logger; import org.opensearch.common.settings.Setting; +import org.opensearch.index.codec.CodecAliases; import org.opensearch.index.codec.CodecSettings; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperService; +import java.util.Set; + /** * ZstdNoDictCodec provides ZSTD compressor without a dictionary support. */ -public class ZstdNoDictCodec extends Lucene95CustomCodec implements CodecSettings { +public class ZstdNoDictCodec extends Lucene95CustomCodec implements CodecSettings, CodecAliases { /** * Creates a new ZstdNoDictCodec instance with the default compression level. @@ -35,6 +38,13 @@ public ZstdNoDictCodec(int compressionLevel) { super(Mode.ZSTD_NO_DICT, compressionLevel); } + /** + * Creates a new ZstdNoDictCodec instance. + * + * @param mapperService The mapper service. + * @param logger The logger. + * @param compressionLevel The compression level. + */ public ZstdNoDictCodec(MapperService mapperService, Logger logger, int compressionLevel) { super(Mode.ZSTD_NO_DICT, compressionLevel, mapperService, logger); } @@ -49,4 +59,9 @@ public String toString() { public boolean supports(Setting setting) { return setting.equals(EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING); } + + @Override + public Set aliases() { + return Mode.ZSTD_NO_DICT.getAliases(); + } } diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java similarity index 100% rename from server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java rename to plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java b/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java similarity index 100% rename from server/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java rename to plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java diff --git a/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy b/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy new file mode 100644 index 0000000000000..8161010cfa897 --- /dev/null +++ b/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +grant codeBase "${codebase.zstd-jni}" { + permission java.lang.RuntimePermission "loadLibrary.*"; +}; diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/plugins/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec similarity index 63% rename from server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec rename to plugins/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 8b37d91cd8bc4..ba5054055d00a 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/plugins/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -1,2 +1,3 @@ org.opensearch.index.codec.customcodecs.ZstdCodec org.opensearch.index.codec.customcodecs.ZstdNoDictCodec +org.opensearch.index.codec.customcodecs.ZstdDeprecatedCodec diff --git a/server/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java b/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java similarity index 100% rename from server/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java rename to plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java diff --git a/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java b/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java new file mode 100644 index 0000000000000..5365b9e222d9a --- /dev/null +++ b/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/CustomCodecTests.java @@ -0,0 +1,250 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.index.codec.customcodecs; + +import org.apache.logging.log4j.LogManager; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat; +import org.apache.lucene.codecs.lucene95.Lucene95Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.LuceneTestCase.SuppressCodecs; +import org.opensearch.common.settings.IndexScopedSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.env.Environment; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.codec.CodecService; +import org.opensearch.index.codec.CodecServiceConfig; +import org.opensearch.index.codec.CodecServiceFactory; +import org.opensearch.index.codec.CodecSettings; +import org.opensearch.index.engine.EngineConfig; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.similarity.SimilarityService; +import org.opensearch.indices.mapper.MapperRegistry; +import org.opensearch.plugins.MapperPlugin; +import org.opensearch.test.IndexSettingsModule; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.Collections; +import java.util.Optional; + +import static org.opensearch.index.codec.customcodecs.CustomCodecService.ZSTD_CODEC; +import static org.opensearch.index.codec.customcodecs.CustomCodecService.ZSTD_NO_DICT_CODEC; +import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING; + +@SuppressCodecs("*") // we test against default codec so never get a random one here! +public class CustomCodecTests extends OpenSearchTestCase { + + private CustomCodecPlugin plugin; + + @Before + public void setup() { + plugin = new CustomCodecPlugin(); + } + + public void testZstd() throws Exception { + Codec codec = createCodecService(false).codec("zstd"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); + } + + public void testZstdNoDict() throws Exception { + Codec codec = createCodecService(false).codec("zstd_no_dict"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); + } + + public void testZstdDeprecatedCodec() { + final IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> createCodecService(false).codec("ZSTD_DEPRECATED") + ); + assertTrue(e.getMessage().startsWith("failed to find codec")); + } + + public void testZstdWithCompressionLevel() throws Exception { + int randomCompressionLevel = randomIntBetween(1, 6); + Codec codec = createCodecService(randomCompressionLevel, "zstd").codec("zstd"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel()); + } + + public void testZstdNoDictWithCompressionLevel() throws Exception { + int randomCompressionLevel = randomIntBetween(1, 6); + Codec codec = createCodecService(randomCompressionLevel, "zstd_no_dict").codec("zstd_no_dict"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel()); + } + + public void testBestCompressionWithCompressionLevel() { + final Settings zstdSettings = Settings.builder() + .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) + .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), randomFrom(ZSTD_CODEC, ZSTD_NO_DICT_CODEC)) + .build(); + + // able to validate zstd + final IndexScopedSettings zstdIndexScopedSettings = new IndexScopedSettings( + zstdSettings, + IndexScopedSettings.BUILT_IN_INDEX_SETTINGS + ); + zstdIndexScopedSettings.validate(zstdSettings, true); + } + + public void testLuceneCodecsWithCompressionLevel() { + final Settings customCodecSettings = Settings.builder() + .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) + .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), randomFrom("zstd", "zstd_no_dict")) + .build(); + + final IndexScopedSettings customCodecIndexScopedSettings = new IndexScopedSettings( + customCodecSettings, + IndexScopedSettings.BUILT_IN_INDEX_SETTINGS + ); + customCodecIndexScopedSettings.validate(customCodecSettings, true); + } + + public void testZstandardCompressionLevelSupport() throws Exception { + CodecService codecService = createCodecService(false); + CodecSettings zstdCodec = (CodecSettings) codecService.codec("zstd"); + CodecSettings zstdNoDictCodec = (CodecSettings) codecService.codec("zstd_no_dict"); + assertTrue(zstdCodec.supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING)); + assertTrue(zstdNoDictCodec.supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING)); + } + + public void testDefaultMapperServiceNull() throws Exception { + Codec codec = createCodecService(true).codec("default"); + assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_SPEED, codec); + } + + public void testBestCompressionMapperServiceNull() throws Exception { + Codec codec = createCodecService(true).codec("best_compression"); + assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_COMPRESSION, codec); + } + + public void testZstdMapperServiceNull() throws Exception { + Codec codec = createCodecService(true).codec("zstd"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); + } + + public void testZstdNoDictMapperServiceNull() throws Exception { + Codec codec = createCodecService(true).codec("zstd_no_dict"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); + } + + // write some docs with it, inspect .si to see this was the used compression + private void assertStoredFieldsCompressionEquals(Lucene95Codec.Mode expected, Codec actual) throws Exception { + SegmentReader sr = getSegmentReader(actual); + String v = sr.getSegmentInfo().info.getAttribute(Lucene90StoredFieldsFormat.MODE_KEY); + assertNotNull(v); + assertEquals(expected, Lucene95Codec.Mode.valueOf(v)); + } + + private void assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode expected, Codec actual) throws Exception { + SegmentReader sr = getSegmentReader(actual); + String v = sr.getSegmentInfo().info.getAttribute(Lucene95CustomStoredFieldsFormat.MODE_KEY); + assertNotNull(v); + assertEquals(expected, Lucene95CustomCodec.Mode.valueOf(v)); + } + + private CodecService createCodecService(boolean isMapperServiceNull) throws IOException { + Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); + if (isMapperServiceNull) { + return new CustomCodecService(null, IndexSettingsModule.newIndexSettings("_na", nodeSettings), LogManager.getLogger("test")); + } + return buildCodecService(nodeSettings); + } + + private CodecService createCodecService(int randomCompressionLevel, String codec) throws IOException { + Settings nodeSettings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put("index.codec", codec) + .put("index.codec.compression_level", randomCompressionLevel) + .build(); + return buildCodecService(nodeSettings); + } + + private CodecService buildCodecService(Settings nodeSettings) throws IOException { + + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); + SimilarityService similarityService = new SimilarityService(indexSettings, null, Collections.emptyMap()); + IndexAnalyzers indexAnalyzers = createTestAnalysis(indexSettings, nodeSettings).indexAnalyzers; + MapperRegistry mapperRegistry = new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER); + MapperService service = new MapperService( + indexSettings, + indexAnalyzers, + xContentRegistry(), + similarityService, + mapperRegistry, + () -> null, + () -> false, + null + ); + + Optional customCodecServiceFactory = plugin.getCustomCodecServiceFactory(indexSettings); + if (customCodecServiceFactory.isPresent()) { + return customCodecServiceFactory.get().createCodecService(new CodecServiceConfig(indexSettings, service, logger)); + } + return new CustomCodecService(service, indexSettings, LogManager.getLogger("test")); + } + + private SegmentReader getSegmentReader(Codec codec) throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(null); + iwc.setCodec(codec); + IndexWriter iw = new IndexWriter(dir, iwc); + iw.addDocument(new Document()); + iw.commit(); + iw.close(); + DirectoryReader ir = DirectoryReader.open(dir); + SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); + ir.close(); + dir.close(); + return sr; + } + +} diff --git a/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java b/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java similarity index 100% rename from server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java rename to plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java diff --git a/server/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java b/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java similarity index 100% rename from server/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java rename to plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java diff --git a/server/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java b/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java similarity index 100% rename from server/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java rename to plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/index/codec/ZstdNotEnabledIT.java b/server/src/internalClusterTest/java/org/opensearch/index/codec/ZstdNotEnabledIT.java new file mode 100644 index 0000000000000..9b1fa77fc9a5a --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/index/codec/ZstdNotEnabledIT.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.List; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST) +public class ZstdNotEnabledIT extends OpenSearchIntegTestCase { + + public void testZStdCodecsWithoutPluginInstalled() { + + internalCluster().startNode(); + final String index = "test-index"; + + // creating index with zstd and zstd_no_dict should fail if custom-codecs plugin is not installed + for (String codec : List.of("zstd", "zstd_no_dict")) { + assertThrows( + IllegalArgumentException.class, + () -> createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", codec) + .build() + ) + ); + } + } + +} diff --git a/server/src/main/java/org/opensearch/index/codec/CodecAliases.java b/server/src/main/java/org/opensearch/index/codec/CodecAliases.java new file mode 100644 index 0000000000000..066c092e86db8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/CodecAliases.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.apache.lucene.codecs.Codec; +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Set; + +/** + * This {@link CodecAliases} to provide aliases for the {@link Codec}. + * + * @opensearch.internal + */ +@ExperimentalApi +public interface CodecAliases { + + /** + * Retrieves a set of aliases for an codec. + * + * @return A non-null set of alias strings. If no aliases are available, an empty set should be returned. + */ + default Set aliases() { + return Set.of(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/CodecService.java b/server/src/main/java/org/opensearch/index/codec/CodecService.java index 54feb446fdb40..9b57fe64cbeab 100644 --- a/server/src/main/java/org/opensearch/index/codec/CodecService.java +++ b/server/src/main/java/org/opensearch/index/codec/CodecService.java @@ -39,15 +39,10 @@ import org.opensearch.common.Nullable; import org.opensearch.common.collect.MapBuilder; import org.opensearch.index.IndexSettings; -import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec; -import org.opensearch.index.codec.customcodecs.ZstdCodec; -import org.opensearch.index.codec.customcodecs.ZstdNoDictCodec; import org.opensearch.index.mapper.MapperService; import java.util.Map; -import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING; - /** * Since Lucene 4.0 low level index segments are read and written through a * codec layer that allows to use use-case specific file formats & @@ -68,27 +63,20 @@ public class CodecService { * the raw unfiltered lucene default. useful for testing */ public static final String LUCENE_DEFAULT_CODEC = "lucene_default"; - public static final String ZSTD_CODEC = "zstd"; - public static final String ZSTD_NO_DICT_CODEC = "zstd_no_dict"; public CodecService(@Nullable MapperService mapperService, IndexSettings indexSettings, Logger logger) { final MapBuilder codecs = MapBuilder.newMapBuilder(); assert null != indexSettings; - int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING); if (mapperService == null) { codecs.put(DEFAULT_CODEC, new Lucene95Codec()); codecs.put(LZ4, new Lucene95Codec()); codecs.put(BEST_COMPRESSION_CODEC, new Lucene95Codec(Mode.BEST_COMPRESSION)); codecs.put(ZLIB, new Lucene95Codec(Mode.BEST_COMPRESSION)); - codecs.put(ZSTD_CODEC, new ZstdCodec(compressionLevel)); - codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(compressionLevel)); } else { codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); codecs.put(LZ4, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); codecs.put(ZLIB, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); - codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger, compressionLevel)); - codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger, compressionLevel)); } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { @@ -103,17 +91,10 @@ public CodecService(@Nullable MapperService mapperService, Logger logger) { if (mapperService == null) { codecs.put(DEFAULT_CODEC, new Lucene95Codec()); codecs.put(BEST_COMPRESSION_CODEC, new Lucene95Codec(Mode.BEST_COMPRESSION)); - codecs.put(ZSTD_CODEC, new ZstdCodec()); - codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec()); } else { IndexSettings indexSettings = mapperService.getIndexSettings(); - int compressionLevel = indexSettings == null - ? Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL - : indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING); codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); - codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger, compressionLevel)); - codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger, compressionLevel)); } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 3351931a6b068..00f8ff6d3cd40 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -49,6 +49,7 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.indices.breaker.CircuitBreakerService; import org.opensearch.index.IndexSettings; +import org.opensearch.index.codec.CodecAliases; import org.opensearch.index.codec.CodecService; import org.opensearch.index.codec.CodecSettings; import org.opensearch.index.mapper.ParsedDocument; @@ -133,18 +134,26 @@ public Supplier retentionLeasesSupplier() { case "lz4": case "best_compression": case "zlib": - case "zstd": - case "zstd_no_dict": case "lucene_default": return s; default: - if (Codec.availableCodecs().contains(s) == false) { // we don't error message the not officially supported ones - throw new IllegalArgumentException( - "unknown value for [index.codec] must be one of [default, lz4, best_compression, zlib, zstd, zstd_no_dict] but was: " - + s - ); + if (Codec.availableCodecs().contains(s)) { + return s; } - return s; + + for (String codecName : Codec.availableCodecs()) { + Codec codec = Codec.forName(codecName); + if (codec instanceof CodecAliases) { + CodecAliases codecWithAlias = (CodecAliases) codec; + if (codecWithAlias.aliases().contains(s)) { + return s; + } + } + } + + throw new IllegalArgumentException( + "unknown value for [index.codec] must be one of [default, lz4, best_compression, zlib] but was: " + s + ); } }, Property.IndexScope, Property.NodeScope); @@ -181,9 +190,6 @@ public void validate(String key, Object value, Object dependency) { private static void doValidateCodecSettings(final String codec) { switch (codec) { - case "zstd": - case "zstd_no_dict": - return; case "best_compression": case "zlib": case "lucene_default": @@ -198,6 +204,18 @@ private static void doValidateCodecSettings(final String codec) { return; } } + for (String codecName : Codec.availableCodecs()) { + Codec availableCodec = Codec.forName(codecName); + if (availableCodec instanceof CodecAliases) { + CodecAliases availableCodecWithAlias = (CodecAliases) availableCodec; + if (availableCodecWithAlias.aliases().contains(codec)) { + if (availableCodec instanceof CodecSettings + && ((CodecSettings) availableCodec).supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING)) { + return; + } + } + } + } } throw new IllegalArgumentException("Compression level cannot be set for the " + codec + " codec."); } @@ -238,6 +256,7 @@ private EngineConfig(Builder builder) { this.codecService = builder.codecService; this.eventListener = builder.eventListener; codecName = builder.indexSettings.getValue(INDEX_CODEC_SETTING); + // We need to make the indexing buffer for this shard at least as large // as the amount of memory that is available for all engines on the // local node so that decisions to flush segments to disk are made by diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index 939245c36fb11..5fa8aff8c3a6c 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -48,8 +48,6 @@ import org.opensearch.env.Environment; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.IndexAnalyzers; -import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec; -import org.opensearch.index.codec.customcodecs.Lucene95CustomStoredFieldsFormat; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.similarity.SimilarityService; @@ -95,49 +93,7 @@ public void testZlib() throws Exception { assert codec instanceof PerFieldMappingPostingFormatCodec; } - public void testZstd() throws Exception { - Codec codec = createCodecService(false).codec("zstd"); - assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); - Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); - assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); - } - - public void testZstdNoDict() throws Exception { - Codec codec = createCodecService(false).codec("zstd_no_dict"); - assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); - Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); - assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); - } - - public void testZstdWithCompressionLevel() throws Exception { - int randomCompressionLevel = randomIntBetween(1, 6); - Codec codec = createCodecService(randomCompressionLevel, "zstd").codec("zstd"); - assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); - Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); - assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel()); - } - - public void testZstdNoDictWithCompressionLevel() throws Exception { - int randomCompressionLevel = randomIntBetween(1, 6); - Codec codec = createCodecService(randomCompressionLevel, "zstd_no_dict").codec("zstd_no_dict"); - assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); - Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); - assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel()); - } - public void testBestCompressionWithCompressionLevel() { - final Settings zstdSettings = Settings.builder() - .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) - .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC)) - .build(); - - // able to validate zstd - final IndexScopedSettings zstdIndexScopedSettings = new IndexScopedSettings( - zstdSettings, - IndexScopedSettings.BUILT_IN_INDEX_SETTINGS - ); - zstdIndexScopedSettings.validate(zstdSettings, true); - final Settings settings = Settings.builder() .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC)) @@ -152,17 +108,6 @@ public void testLuceneCodecsWithCompressionLevel() { String codecName = randomFrom(Codec.availableCodecs()); Codec codec = Codec.forName(codecName); - final Settings customCodecSettings = Settings.builder() - .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) - .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), "Lucene95CustomCodec") - .build(); - - final IndexScopedSettings customCodecIndexScopedSettings = new IndexScopedSettings( - customCodecSettings, - IndexScopedSettings.BUILT_IN_INDEX_SETTINGS - ); - customCodecIndexScopedSettings.validate(customCodecSettings, true); - final Settings settings = Settings.builder() .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), codecName) @@ -178,14 +123,6 @@ public void testLuceneCodecsWithCompressionLevel() { } } - public void testZstandardCompressionLevelSupport() throws Exception { - CodecService codecService = createCodecService(false); - CodecSettings zstdCodec = (CodecSettings) codecService.codec("zstd"); - CodecSettings zstdNoDictCodec = (CodecSettings) codecService.codec("zstd_no_dict"); - assertTrue(zstdCodec.supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING)); - assertTrue(zstdNoDictCodec.supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING)); - } - public void testDefaultMapperServiceNull() throws Exception { Codec codec = createCodecService(true).codec("default"); assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_SPEED, codec); @@ -196,20 +133,6 @@ public void testBestCompressionMapperServiceNull() throws Exception { assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_COMPRESSION, codec); } - public void testZstdMapperServiceNull() throws Exception { - Codec codec = createCodecService(true).codec("zstd"); - assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); - Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); - assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); - } - - public void testZstdNoDictMapperServiceNull() throws Exception { - Codec codec = createCodecService(true).codec("zstd_no_dict"); - assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); - Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); - assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); - } - public void testExceptionCodecNull() { assertThrows(IllegalArgumentException.class, () -> createCodecService(true).codec(null)); } @@ -224,12 +147,6 @@ public void testCodecServiceWithNullMapperService() { CodecService codecService = new CodecService(null, indexSettings, LogManager.getLogger("test")); assert codecService.codec("default") instanceof Lucene95Codec; assert codecService.codec("best_compression") instanceof Lucene95Codec; - Lucene95CustomStoredFieldsFormat zstdStoredFieldsFormat = (Lucene95CustomStoredFieldsFormat) codecService.codec("zstd") - .storedFieldsFormat(); - Lucene95CustomStoredFieldsFormat zstdNoDictStoredFieldsFormat = (Lucene95CustomStoredFieldsFormat) codecService.codec("zstd") - .storedFieldsFormat(); - assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, zstdStoredFieldsFormat.getCompressionLevel()); - assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, zstdNoDictStoredFieldsFormat.getCompressionLevel()); } public void testCodecServiceWithOnlyMapperService() throws IOException { @@ -246,12 +163,6 @@ public void testCodecServiceWithOnlyMapperService() throws IOException { ); assert codecService.codec("default") instanceof PerFieldMappingPostingFormatCodec; assert codecService.codec("best_compression") instanceof PerFieldMappingPostingFormatCodec; - Lucene95CustomStoredFieldsFormat zstdStoredFieldsFormat = (Lucene95CustomStoredFieldsFormat) codecService.codec("zstd") - .storedFieldsFormat(); - Lucene95CustomStoredFieldsFormat zstdNoDictStoredFieldsFormat = (Lucene95CustomStoredFieldsFormat) codecService.codec("zstd") - .storedFieldsFormat(); - assertEquals(randomCompressionLevel, zstdStoredFieldsFormat.getCompressionLevel()); - assertEquals(randomCompressionLevel, zstdNoDictStoredFieldsFormat.getCompressionLevel()); } // write some docs with it, inspect .si to see this was the used compression @@ -262,13 +173,6 @@ private void assertStoredFieldsCompressionEquals(Lucene95Codec.Mode expected, Co assertEquals(expected, Lucene95Codec.Mode.valueOf(v)); } - private void assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode expected, Codec actual) throws Exception { - SegmentReader sr = getSegmentReader(actual); - String v = sr.getSegmentInfo().info.getAttribute(Lucene95CustomStoredFieldsFormat.MODE_KEY); - assertNotNull(v); - assertEquals(expected, Lucene95CustomCodec.Mode.valueOf(v)); - } - private CodecService createCodecService(boolean isMapperServiceNull) throws IOException { Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); if (isMapperServiceNull) { @@ -277,15 +181,6 @@ private CodecService createCodecService(boolean isMapperServiceNull) throws IOEx return buildCodecService(nodeSettings); } - private CodecService createCodecService(int randomCompressionLevel, String codec) throws IOException { - Settings nodeSettings = Settings.builder() - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put("index.codec", codec) - .put("index.codec.compression_level", randomCompressionLevel) - .build(); - return buildCodecService(nodeSettings); - } - private CodecService buildCodecService(Settings nodeSettings) throws IOException { IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index c75a153b39b85..96e0dc51ab3fd 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -384,9 +384,7 @@ public abstract class OpenSearchIntegTestCase extends OpenSearchTestCase { CodecService.DEFAULT_CODEC, CodecService.LZ4, CodecService.BEST_COMPRESSION_CODEC, - CodecService.ZLIB, - CodecService.ZSTD_CODEC, - CodecService.ZSTD_NO_DICT_CODEC + CodecService.ZLIB ); @BeforeClass