diff --git a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc index be37d24f7dd7c..e460725523cf6 100644 --- a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc @@ -1,9 +1,9 @@ [[analysis-edgengram-tokenfilter]] === Edge NGram Token Filter -A token filter of type `edgeNGram`. +A token filter of type `edge_ngram`. -The following are settings that can be set for a `edgeNGram` token +The following are settings that can be set for a `edge_ngram` token filter type: [cols="<,<",options="header",] diff --git a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc index acc178a2741fa..53bda23d12bf9 100644 --- a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc @@ -1,9 +1,9 @@ [[analysis-ngram-tokenfilter]] === NGram Token Filter -A token filter of type `nGram`. +A token filter of type `ngram`. -The following are settings that can be set for a `nGram` token filter +The following are settings that can be set for a `ngram` token filter type: [cols="<,<",options="header",] diff --git a/docs/reference/migration/migrate_7_0/analysis.asciidoc b/docs/reference/migration/migrate_7_0/analysis.asciidoc index 36ad41be09aa1..ebcbfe020dd66 100644 --- a/docs/reference/migration/migrate_7_0/analysis.asciidoc +++ b/docs/reference/migration/migrate_7_0/analysis.asciidoc @@ -38,4 +38,12 @@ The `standard` token filter has been removed because it doesn't change anything The `standard_html_strip` analyzer has been deprecated, and should be replaced with a combination of the `standard` tokenizer and `html_strip` char_filter. Indexes created using this analyzer will still be readable in elasticsearch 7.0, -but it will not be possible to create new indexes using it. \ No newline at end of file +but it will not be possible to create new indexes using it. + +[float] +==== The deprecated `nGram` and `edgeNGram` token filter cannot be used on new indices + +The `nGram` and `edgeNGram` token filter names have been deprecated since version 6.4. +Indexes created using these token filters will still be readable in elasticsearch 7.0. +Using them on new indices from version 7.0.0 on however will throw an error when indexing +or analyzing documents. Both names should be replaces by `ngram` or `edge_ngram` respectively. diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index ac439f4bae227..ed667bbe9a676 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -415,7 +415,11 @@ public List getPreConfiguredTokenFilters() { filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input -> new EdgeNGramTokenFilter(input, 1))); filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, false, (reader, version) -> { - if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) { + throw new IllegalArgumentException( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [edge_ngram] instead."); + } else if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation", "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [edge_ngram] instead."); @@ -439,7 +443,10 @@ public List getPreConfiguredTokenFilters() { LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS))); filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false))); filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, false, (reader, version) -> { - if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) { + throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [ngram] instead."); + } else if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation", "The [nGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [ngram] instead."); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java index c52c78ffe27e3..3257117528e79 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -45,7 +45,7 @@ public class CommonAnalysisPluginTests extends ESTestCase { */ public void testNGramDeprecationWarning() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.V_6_7_0)) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -61,6 +61,28 @@ public void testNGramDeprecationWarning() throws IOException { } } + /** + * Check that the deprecated name "nGram" throws an error since 7.0.0 + */ + public void testNGramDeprecationError() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + assertEquals( + "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" + + " name to [ngram] instead.", + ex.getMessage()); + } + } + /** * Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0 */ @@ -80,12 +102,13 @@ public void testNGramNoDeprecationWarningPre6_4() throws IOException { } } + /** * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0 */ public void testEdgeNGramDeprecationWarning() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.V_6_7_0)) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -101,6 +124,28 @@ public void testEdgeNGramDeprecationWarning() throws IOException { } } + /** + * Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0 + */ + public void testEdgeNGramDeprecationError() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + assertEquals( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" + + " name to [edge_ngram] instead.", + ex.getMessage()); + } + } + /** * Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0 */ diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java index e96243efc4254..8f58a074cf102 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java @@ -81,7 +81,7 @@ public void testNgramHighlightingWithBrokenPositions() throws IOException { .put("analysis.tokenizer.autocomplete.max_gram", 20) .put("analysis.tokenizer.autocomplete.min_gram", 1) .put("analysis.tokenizer.autocomplete.token_chars", "letter,digit") - .put("analysis.tokenizer.autocomplete.type", "nGram") + .put("analysis.tokenizer.autocomplete.type", "ngram") .put("analysis.filter.wordDelimiter.type", "word_delimiter") .putList("analysis.filter.wordDelimiter.type_table", "& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM", diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml index 9a7c158fc4734..e6e7d65242560 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml @@ -23,14 +23,14 @@ - match: { detail.tokenizer.tokens.0.token: Foo Bar! } --- -"nGram": +"ngram": - do: indices.analyze: body: text: good explain: true tokenizer: - type: nGram + type: ngram min_gram: 2 max_gram: 2 - length: { detail.tokenizer.tokens: 3 } @@ -40,7 +40,7 @@ - match: { detail.tokenizer.tokens.2.token: od } --- -"nGram_exception": +"ngram_exception": - skip: version: " - 6.99.99" reason: only starting from version 7.x this throws an error @@ -51,7 +51,7 @@ text: good explain: true tokenizer: - type: nGram + type: ngram min_gram: 2 max_gram: 4 --- @@ -133,7 +133,7 @@ text: "foobar" explain: true tokenizer: - type: nGram + type: ngram min_gram: 3 max_gram: 3 - length: { detail.tokenizer.tokens: 4 } @@ -162,9 +162,9 @@ body: text: "foo" explain: true - tokenizer: nGram + tokenizer: ngram - length: { detail.tokenizer.tokens: 5 } - - match: { detail.tokenizer.name: nGram } + - match: { detail.tokenizer.name: ngram } - match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.1.token: fo } - match: { detail.tokenizer.tokens.2.token: o } @@ -194,7 +194,7 @@ text: "foo" explain: true tokenizer: - type: edgeNGram + type: edge_ngram min_gram: 1 max_gram: 3 - length: { detail.tokenizer.tokens: 3 } @@ -219,9 +219,9 @@ body: text: "foo" explain: true - tokenizer: edgeNGram + tokenizer: edge_ngram - length: { detail.tokenizer.tokens: 2 } - - match: { detail.tokenizer.name: edgeNGram } + - match: { detail.tokenizer.name: edge_ngram } - match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.1.token: fo } diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml index ec00b6d41f1c5..56bbed7044e14 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml @@ -76,7 +76,7 @@ analysis: tokenizer: trigram: - type: nGram + type: ngram min_gram: 3 max_gram: 3 filter: