Skip to content

Commit

Permalink
Throw Error on deprecated nGram and edgeNGram custom filters (#50376)
Browse files Browse the repository at this point in the history
The camel-case `nGram` and `edgeNGram` filter names were deprecated in 6. We
currently throw errors on new indices when they are used. However these errors
are currently only thrown for pre-configured filters, adding them as custom
filters doesn't trigger the warning and error. This change adds the appropriate
exceptions for `nGram` and `edgeNGram` respectively.

Closes #50360
  • Loading branch information
Christoph Büscher authored Dec 20, 2019
1 parent 64b0719 commit c6f7166
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,11 @@
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
Expand Down Expand Up @@ -238,7 +240,24 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings) {
@Override
public TokenStream create(TokenStream tokenStream) {
if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_8_0_0)) {
throw new IllegalArgumentException(
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead.");
} else {
deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation",
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
}
return super.create(tokenStream);
}

};
});
filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
Expand All @@ -258,7 +277,24 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
filters.put("min_hash", MinHashTokenFilterFactory::new);
filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
filters.put("ngram", NGramTokenFilterFactory::new);
filters.put("nGram", NGramTokenFilterFactory::new);
filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
return new NGramTokenFilterFactory(indexSettings, environment, name, settings) {
@Override
public TokenStream create(TokenStream tokenStream) {
if (indexSettings.getIndexVersionCreated().onOrAfter(org.elasticsearch.Version.V_8_0_0)) {
throw new IllegalArgumentException(
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead.");
} else {
deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation",
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
}
return super.create(tokenStream);
}

};
});
filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;

import java.io.IOException;
import java.io.StringReader;
import java.util.Map;

public class CommonAnalysisPluginTests extends ESTestCase {

/**
* Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and
* logs a warning for earlier indices when the filter is used as a custom filter
*/
public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT))
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
.put("index.analysis.filter.my_ngram.type", "nGram")
.build();

try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings),
settings, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));

IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead.", ex.getMessage());
}

final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.V_7_6_0))
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram").put("index.analysis.filter.my_ngram.type", "nGram")
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
settingsPre7, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer));
assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
}
}

/**
* Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and
* logs a warning for earlier indices when the filter is used as a custom filter
*/
public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT))
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
.build();

try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings),
settings, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));

IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead.", ex.getMessage());
}

final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.V_7_6_0))
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
.build();

try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
settingsPre7, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer));
assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
}
}
}

0 comments on commit c6f7166

Please sign in to comment.