diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index f9d8cb28f6c61..a71e9a1af0a3f 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -118,9 +118,11 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory; @@ -238,7 +240,17 @@ public Map> getTokenFilters() { filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new)); filters.put("dutch_stem", DutchStemTokenFilterFactory::new); filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new); - filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new); + filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> { + if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_7_6_0)) { + throw new IllegalArgumentException("The [edgeNGram] token filter name was deprecated in 6.4 and " + + "cannot be used in new indices. Please change the filter name to [edge_ngram] instead."); + } else { + deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation", + "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead."); + } + return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings); + }); filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new)); filters.put("fingerprint", FingerprintTokenFilterFactory::new); filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new); @@ -258,7 +270,17 @@ public Map> getTokenFilters() { filters.put("min_hash", MinHashTokenFilterFactory::new); filters.put("multiplexer", MultiplexerTokenFilterFactory::new); filters.put("ngram", NGramTokenFilterFactory::new); - filters.put("nGram", NGramTokenFilterFactory::new); + filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> { + if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_7_6_0)) { + throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [ngram] instead."); + } else { + deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation", + "The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead."); + } + return new NGramTokenFilterFactory(indexSettings, environment, name, settings); + }); filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new)); filters.put("persian_normalization", PersianNormalizationFilterFactory::new); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java new file mode 100644 index 0000000000000..f4da0c422f843 --- /dev/null +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -0,0 +1,240 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.analysis.common; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.Tokenizer; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Map; + +public class CommonAnalysisPluginTests extends ESTestCase { + + /** + * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.0.0 + */ + public void testNGramDeprecationWarning() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0))) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + assertWarnings( + "The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead."); + } + } + + /** + * Check that the deprecated name "nGram" throws an error since 7.0.0 + */ + public void testNGramDeprecationError() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + assertEquals( + "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" + + " name to [ngram] instead.", + ex.getMessage()); + } + } + + /** + * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.0.0 + */ + public void testEdgeNGramDeprecationWarning() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, VersionUtils.getPreviousVersion(Version.V_7_0_0))) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + assertWarnings( + "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead."); + } + } + + /** + * Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0 + */ + public void testEdgeNGramDeprecationError() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + assertEquals( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" + + " name to [edge_ngram] instead.", + ex.getMessage()); + } + } + + /** + * Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and + * logs a warning for earlier indices when the filter is used as a custom filter + */ + public void testnGramFilterInCustomAnalyzerDeprecationError() throws IOException { + final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, Version.CURRENT)) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") + .put("index.analysis.filter.my_ngram.type", "nGram") + .build(); + + final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin(); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, + () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)); + assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [ngram] instead.", ex.getMessage()); + + final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_7_5_2)) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") + .put("index.analysis.filter.my_ngram.type", "nGram") + .build(); + + createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin); + assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead."); + } + + /** + * Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and + * logs a warning for earlier indices when the filter is used as a custom filter + */ + public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException { + final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, Version.CURRENT)) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") + .put("index.analysis.filter.my_ngram.type", "edgeNGram") + .build(); + + final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin(); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, + () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)); + assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [edge_ngram] instead.", ex.getMessage()); + + final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_7_5_2)) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") + .put("index.analysis.filter.my_ngram.type", "edgeNGram") + .build(); + + createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin); + assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead."); + } + + /** + * Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0 + */ + public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT)) + .put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip") + .putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b") + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin(); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, + () -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin)); + assertEquals("[standard_html_strip] analyzer is not supported for new indices, " + + "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter", ex.getMessage()); + } + + /** + * Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7 + */ + public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, + VersionUtils.getPreviousVersion(Version.V_7_0_0))) + .put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip") + .putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b") + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers; + Analyzer analyzer = analyzers.get("custom_analyzer"); + assertNotNull(((NamedAnalyzer) analyzer).analyzer()); + assertWarnings( + "Deprecated analyzer [standard_html_strip] used, " + + "replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"); + } + } +}