forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Error on deprecated nGram and edgeNGram custom filters
The camel-case `nGram` and `edgeNGram` filter names were deprecated in 6. We currently throw errors on new indices when they are used. However these errors are currently only thrown for pre-configured filters, adding them as custom filters doesn't trigger the warning and error. This change adds the appropriate exceptions for `nGram` and `edgeNGram` respectively. Closes elastic#50360
- Loading branch information
Christoph Büscher
committed
Dec 20, 2019
1 parent
9e6e4bb
commit 65c87d3
Showing
2 changed files
with
264 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
240 changes: 240 additions & 0 deletions
240
...sis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,240 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.analysis.common; | ||
|
||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.analysis.MockTokenizer; | ||
import org.apache.lucene.analysis.Tokenizer; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.cluster.metadata.IndexMetaData; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.env.Environment; | ||
import org.elasticsearch.index.IndexSettings; | ||
import org.elasticsearch.index.analysis.IndexAnalyzers; | ||
import org.elasticsearch.index.analysis.NamedAnalyzer; | ||
import org.elasticsearch.index.analysis.TokenFilterFactory; | ||
import org.elasticsearch.test.ESTestCase; | ||
import org.elasticsearch.test.IndexSettingsModule; | ||
import org.elasticsearch.test.VersionUtils; | ||
|
||
import java.io.IOException; | ||
import java.io.StringReader; | ||
import java.util.Map; | ||
|
||
public class CommonAnalysisPluginTests extends ESTestCase { | ||
|
||
/** | ||
* Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.0.0 | ||
*/ | ||
public void testNGramDeprecationWarning() throws IOException { | ||
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, | ||
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0))) | ||
.build(); | ||
|
||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); | ||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { | ||
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; | ||
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); | ||
Tokenizer tokenizer = new MockTokenizer(); | ||
tokenizer.setReader(new StringReader("foo bar")); | ||
assertNotNull(tokenFilterFactory.create(tokenizer)); | ||
assertWarnings( | ||
"The [nGram] token filter name is deprecated and will be removed in a future version. " | ||
+ "Please change the filter name to [ngram] instead."); | ||
} | ||
} | ||
|
||
/** | ||
* Check that the deprecated name "nGram" throws an error since 7.0.0 | ||
*/ | ||
public void testNGramDeprecationError() throws IOException { | ||
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) | ||
.build(); | ||
|
||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); | ||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { | ||
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; | ||
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); | ||
Tokenizer tokenizer = new MockTokenizer(); | ||
tokenizer.setReader(new StringReader("foo bar")); | ||
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); | ||
assertEquals( | ||
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" | ||
+ " name to [ngram] instead.", | ||
ex.getMessage()); | ||
} | ||
} | ||
|
||
/** | ||
* Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.0.0 | ||
*/ | ||
public void testEdgeNGramDeprecationWarning() throws IOException { | ||
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, | ||
VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, VersionUtils.getPreviousVersion(Version.V_7_0_0))) | ||
.build(); | ||
|
||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); | ||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { | ||
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; | ||
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); | ||
Tokenizer tokenizer = new MockTokenizer(); | ||
tokenizer.setReader(new StringReader("foo bar")); | ||
assertNotNull(tokenFilterFactory.create(tokenizer)); | ||
assertWarnings( | ||
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. " | ||
+ "Please change the filter name to [edge_ngram] instead."); | ||
} | ||
} | ||
|
||
/** | ||
* Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0 | ||
*/ | ||
public void testEdgeNGramDeprecationError() throws IOException { | ||
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) | ||
.build(); | ||
|
||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); | ||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { | ||
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; | ||
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); | ||
Tokenizer tokenizer = new MockTokenizer(); | ||
tokenizer.setReader(new StringReader("foo bar")); | ||
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); | ||
assertEquals( | ||
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" | ||
+ " name to [edge_ngram] instead.", | ||
ex.getMessage()); | ||
} | ||
} | ||
|
||
/** | ||
* Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and | ||
* logs a warning for earlier indices when the filter is used as a custom filter | ||
*/ | ||
public void testnGramFilterInCustomAnalyzerDeprecationError() throws IOException { | ||
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, | ||
VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, Version.CURRENT)) | ||
.put("index.analysis.analyzer.custom_analyzer.type", "custom") | ||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") | ||
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") | ||
.put("index.analysis.filter.my_ngram.type", "nGram") | ||
.build(); | ||
|
||
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin(); | ||
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, | ||
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)); | ||
assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " | ||
+ "Please change the filter name to [ngram] instead.", ex.getMessage()); | ||
|
||
final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, | ||
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_7_5_2)) | ||
.put("index.analysis.analyzer.custom_analyzer.type", "custom") | ||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") | ||
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") | ||
.put("index.analysis.filter.my_ngram.type", "nGram") | ||
.build(); | ||
|
||
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin); | ||
assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. " | ||
+ "Please change the filter name to [ngram] instead."); | ||
} | ||
|
||
/** | ||
* Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and | ||
* logs a warning for earlier indices when the filter is used as a custom filter | ||
*/ | ||
public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException { | ||
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, | ||
VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, Version.CURRENT)) | ||
.put("index.analysis.analyzer.custom_analyzer.type", "custom") | ||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") | ||
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") | ||
.put("index.analysis.filter.my_ngram.type", "edgeNGram") | ||
.build(); | ||
|
||
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin(); | ||
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, | ||
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)); | ||
assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " | ||
+ "Please change the filter name to [edge_ngram] instead.", ex.getMessage()); | ||
|
||
final Settings settingsPre7 = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, | ||
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_7_5_2)) | ||
.put("index.analysis.analyzer.custom_analyzer.type", "custom") | ||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") | ||
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") | ||
.put("index.analysis.filter.my_ngram.type", "edgeNGram") | ||
.build(); | ||
|
||
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin); | ||
assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. " | ||
+ "Please change the filter name to [edge_ngram] instead."); | ||
} | ||
|
||
/** | ||
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0 | ||
*/ | ||
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException { | ||
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, | ||
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT)) | ||
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip") | ||
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b") | ||
.build(); | ||
|
||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); | ||
CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin(); | ||
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, | ||
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin)); | ||
assertEquals("[standard_html_strip] analyzer is not supported for new indices, " + | ||
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter", ex.getMessage()); | ||
} | ||
|
||
/** | ||
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7 | ||
*/ | ||
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException { | ||
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, | ||
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, | ||
VersionUtils.getPreviousVersion(Version.V_7_0_0))) | ||
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip") | ||
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b") | ||
.build(); | ||
|
||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); | ||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { | ||
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers; | ||
Analyzer analyzer = analyzers.get("custom_analyzer"); | ||
assertNotNull(((NamedAnalyzer) analyzer).analyzer()); | ||
assertWarnings( | ||
"Deprecated analyzer [standard_html_strip] used, " + | ||
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"); | ||
} | ||
} | ||
} |