Skip to content

Commit

Permalink
Deprecate nGram and edgeNGram names for ngram filters (#30209)
Browse files Browse the repository at this point in the history
The camel case name `nGram` should be removed in favour of `ngram` and
similar for `edgeNGram` and `edge_ngram`. Before removal, we need to
deprecate the camel case names first. This change adds deprecation
warnings for indices with versions 6.4.0 and higher and logs deprecation
warnings.
  • Loading branch information
Christoph Büscher committed May 17, 2018
1 parent e7debce commit 1d329d8
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,14 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer())));
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input ->
new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE)));
// TODO deprecate edgeNGram
filters.add(PreConfiguredTokenFilter.singleton("edgeNGram", false, input ->
new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE)));
filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> {
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("edgeNGram_deprecation",
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
}
return new EdgeNGramTokenFilter(reader, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
}));
filters.add(PreConfiguredTokenFilter.singleton("elision", true,
input -> new ElisionFilter(input, FrenchAnalyzer.DEFAULT_ARTICLES)));
filters.add(PreConfiguredTokenFilter.singleton("french_stem", false, input -> new SnowballFilter(input, new FrenchStemmer())));
Expand All @@ -256,8 +261,14 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT,
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
filters.add(PreConfiguredTokenFilter.singleton("ngram", false, NGramTokenFilter::new));
// TODO deprecate nGram
filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, (reader, version) -> {
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("nGram_deprecation",
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
}
return new NGramTokenFilter(reader);
}));
filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;

import java.io.IOException;
import java.io.StringReader;
import java.util.Map;

public class CommonAnalysisPluginTests extends ESTestCase {

/**
* Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0
*/
public void testNGramDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer));
assertWarnings(
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
}
}

/**
* Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0
*/
public void testNGramNoDeprecationWarningPre6_4() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_3_0))
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer));
}
}

/**
* Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0
*/
public void testEdgeNGramDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer));
assertWarnings(
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
}
}

/**
* Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0
*/
public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_3_0))
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,11 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.test.ESTokenStreamTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;

import java.io.IOException;
import java.io.StringReader;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween;
import static org.hamcrest.Matchers.instanceOf;
Expand Down Expand Up @@ -128,7 +124,7 @@ public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
for (int i = 0; i < iters; i++) {
final Index index = new Index("test", "_na_");
final String name = "ngr";
Version v = randomVersion(random());
Version v = VersionUtils.randomVersion(random());
Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3);
boolean reverse = random().nextBoolean();
if (reverse) {
Expand All @@ -148,17 +144,4 @@ public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
}
}
}


private Version randomVersion(Random random) throws IllegalArgumentException, IllegalAccessException {
Field[] declaredFields = Version.class.getFields();
List<Field> versionFields = new ArrayList<>();
for (Field field : declaredFields) {
if ((field.getModifiers() & Modifier.STATIC) != 0 && field.getName().startsWith("V_") && field.getType() == Version.class) {
versionFields.add(field);
}
}
return (Version) versionFields.get(random.nextInt(versionFields.size())).get(Version.class);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ public static PreConfiguredTokenFilter singleton(String name, boolean useFilterF
(tokenStream, version) -> create.apply(tokenStream));
}

/**
* Create a pre-configured token filter that may not vary at all.
*/
public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
BiFunction<TokenStream, Version, TokenStream> create) {
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, CachingStrategy.ONE,
(tokenStream, version) -> create.apply(tokenStream, version));
}

/**
* Create a pre-configured token filter that may vary based on the Lucene version.
*/
Expand Down

0 comments on commit 1d329d8

Please sign in to comment.