Skip to content

Commit

Permalink
Moved more token filters to analysis-common module.
Browse files Browse the repository at this point in the history
The following token filters were moved: `edge_ngram`, `ngram`, `uppercase`, `lowercase`, `length`, `flatten_graph` and `unique`.

Relates to elastic#23658
  • Loading branch information
martijnvg committed Jun 15, 2017
1 parent 2a78b0a commit 428e707
Show file tree
Hide file tree
Showing 24 changed files with 470 additions and 196 deletions.
1 change: 0 additions & 1 deletion buildSrc/src/main/resources/checkstyle_suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,6 @@
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]NGramTokenizerFactoryTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,12 @@
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
Expand All @@ -83,14 +81,11 @@
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
Expand Down Expand Up @@ -133,8 +128,6 @@
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.UniqueTokenFilterFactory;
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
Expand Down Expand Up @@ -209,25 +202,16 @@ private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(Li
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
tokenFilters.register("stop", StopTokenFilterFactory::new);
tokenFilters.register("reverse", ReverseTokenFilterFactory::new);
tokenFilters.register("length", LengthTokenFilterFactory::new);
tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
tokenFilters.register("standard", StandardTokenFilterFactory::new);
tokenFilters.register("nGram", NGramTokenFilterFactory::new);
tokenFilters.register("ngram", NGramTokenFilterFactory::new);
tokenFilters.register("edgeNGram", EdgeNGramTokenFilterFactory::new);
tokenFilters.register("edge_ngram", EdgeNGramTokenFilterFactory::new);
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
tokenFilters.register("unique", UniqueTokenFilterFactory::new);
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
package org.elasticsearch.search.fetch.subphase.highlight;

import com.carrotsearch.randomizedtesting.generators.RandomPicks;

import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
Expand Down Expand Up @@ -214,54 +213,6 @@ public void testHighTermFrequencyDoc() throws IOException {
assertHighlight(search, 0, "name", 0, startsWith("<em>abc</em> <em>abc</em> <em>abc</em> <em>abc</em>"));
}

public void testNgramHighlighting() throws IOException {
assertAcked(prepareCreate("test")
.addMapping("test",
"name", "type=text,analyzer=name_index_analyzer,search_analyzer=name_search_analyzer,"
+ "term_vector=with_positions_offsets",
"name2", "type=text,analyzer=name2_index_analyzer,search_analyzer=name_search_analyzer,"
+ "term_vector=with_positions_offsets")
.setSettings(Settings.builder()
.put(indexSettings())
.put("analysis.filter.my_ngram.max_gram", 20)
.put("analysis.filter.my_ngram.min_gram", 1)
.put("analysis.filter.my_ngram.type", "ngram")
.put("analysis.tokenizer.my_ngramt.max_gram", 20)
.put("analysis.tokenizer.my_ngramt.min_gram", 1)
.put("analysis.tokenizer.my_ngramt.token_chars", "letter,digit")
.put("analysis.tokenizer.my_ngramt.type", "ngram")
.put("analysis.analyzer.name_index_analyzer.tokenizer", "my_ngramt")
.put("analysis.analyzer.name2_index_analyzer.tokenizer", "whitespace")
.put("analysis.analyzer.name2_index_analyzer.filter", "my_ngram")
.put("analysis.analyzer.name_search_analyzer.tokenizer", "whitespace")));
client().prepareIndex("test", "test", "1")
.setSource("name", "logicacmg ehemals avinci - the know how company",
"name2", "logicacmg ehemals avinci - the know how company").get();
refresh();
ensureGreen();
SearchResponse search = client().prepareSearch().setQuery(matchQuery("name", "logica m"))
.highlighter(new HighlightBuilder().field("name")).get();
assertHighlight(search, 0, "name", 0,
equalTo("<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"));

search = client().prepareSearch().setQuery(matchQuery("name", "logica ma")).highlighter(new HighlightBuilder().field("name")).get();
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"));

search = client().prepareSearch().setQuery(matchQuery("name", "logica")).highlighter(new HighlightBuilder().field("name")).get();
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehemals avinci - the know how company"));

search = client().prepareSearch().setQuery(matchQuery("name2", "logica m")).highlighter(new HighlightBuilder().field("name2"))
.get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"));

search = client().prepareSearch().setQuery(matchQuery("name2", "logica ma")).highlighter(new HighlightBuilder().field("name2"))
.get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"));

search = client().prepareSearch().setQuery(matchQuery("name2", "logica")).highlighter(new HighlightBuilder().field("name2")).get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> ehemals avinci - the know how company"));
}

public void testEnsureNoNegativeOffsets() throws Exception {
assertAcked(prepareCreate("test")
.addMapping("type1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@

package org.elasticsearch.search.query;

import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;

import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
Expand Down Expand Up @@ -56,6 +46,16 @@
import java.util.List;
import java.util.Set;

import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;

public class QueryStringIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
Expand Down Expand Up @@ -91,10 +91,6 @@ public void testBasicAllQuery() throws Exception {
resp = client().prepareSearch("test").setQuery(queryStringQuery("Bar")).get();
assertHitCount(resp, 3L);
assertHits(resp.getHits(), "1", "2", "3");

resp = client().prepareSearch("test").setQuery(queryStringQuery("foa")).get();
assertHitCount(resp, 1L);
assertHits(resp.getHits(), "3");
}

public void testWithDate() throws Exception {
Expand Down Expand Up @@ -161,8 +157,6 @@ public void testDocWithAllTypes() throws Exception {
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(queryStringQuery("Baz")).get();
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(queryStringQuery("sbaz")).get();
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(queryStringQuery("19")).get();
assertHits(resp.getHits(), "1");
// nested doesn't match because it's hidden
Expand Down Expand Up @@ -223,11 +217,11 @@ public void testExplicitAllFieldsRequested() throws Exception {
indexRandom(true, false, reqs);

SearchResponse resp = client().prepareSearch("test2").setQuery(
queryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
queryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
assertHitCount(resp, 0L);

resp = client().prepareSearch("test2").setQuery(
queryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
queryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
assertHits(resp.getHits(), "1");
assertHitCount(resp, 1L);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,6 @@ public void testBasicAllQuery() throws Exception {
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")).get();
assertHitCount(resp, 3L);
assertHits(resp.getHits(), "1", "2", "3");

resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("foa")).get();
assertHitCount(resp, 1L);
assertHits(resp.getHits(), "3");
}

public void testWithDate() throws Exception {
Expand Down Expand Up @@ -480,8 +476,6 @@ public void testDocWithAllTypes() throws Exception {
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")).get();
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("sbaz")).get();
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("19")).get();
assertHits(resp.getHits(), "1");
// nested doesn't match because it's hidden
Expand Down Expand Up @@ -547,11 +541,11 @@ public void testExplicitAllFieldsRequested() throws Exception {
indexRandom(true, false, reqs);

SearchResponse resp = client().prepareSearch("test").setQuery(
simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
assertHitCount(resp, 0L);

resp = client().prepareSearch("test").setQuery(
simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
assertHits(resp.getHits(), "1");
assertHitCount(resp, 1L);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,7 @@
"version": {
"created": "5000099"
},
"analysis": {
"analyzer": {
"my_ngrams": {
"type": "custom",
"tokenizer": "standard",
"filter": ["my_ngrams"]
}
},
"filter": {
"my_ngrams": {
"type": "ngram",
"min_gram": 2,
"max_gram": 2
}
}
}
"query.default_field": "f1"
}
},
"mappings": {
Expand All @@ -31,7 +16,7 @@
},
"properties": {
"f1": {"type": "text"},
"f2": {"type": "text", "analyzer": "my_ngrams"}
"f2": {"type": "text"}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,15 @@
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"my_ngrams": {
"type": "custom",
"tokenizer": "standard",
"filter": ["my_ngrams"]
}
},
"filter": {
"my_ngrams": {
"type": "ngram",
"min_gram": 2,
"max_gram": 2
}
}
}
"number_of_replicas": 0
}
},
"mappings": {
"doc": {
"properties": {
"f1": {"type": "text"},
"f2": {"type": "keyword"},
"f3": {"type": "text", "analyzer": "my_ngrams"},
"f3": {"type": "text"},
"f4": {
"type": "text",
"index_options": "docs"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
Expand Down Expand Up @@ -98,6 +97,15 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
filters.put("trim", TrimTokenFilterFactory::new);
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
filters.put("unique", UniqueTokenFilterFactory::new);
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
filters.put("length", LengthTokenFilterFactory::new);
filters.put("lowercase", LowerCaseTokenFilterFactory::new);
filters.put("uppercase", UpperCaseTokenFilterFactory::new);
filters.put("nGram", NGramTokenFilterFactory::new);
filters.put("ngram", NGramTokenFilterFactory::new);
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
return filters;
}

Expand Down Expand Up @@ -172,7 +180,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, input -> new ReverseStringFilter(input)));
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("russian_stem", false, input -> new SnowballFilter(input, "Russian")));
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_folding", true, ScandinavianFoldingFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_normalization", true, ScandinavianNormalizationFilter::new));
Expand All @@ -185,7 +193,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
filters.add(PreConfiguredTokenFilter.singleton("trim", false, TrimFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("truncate", false, input -> new TruncateTokenFilter(input, 10)));
filters.add(PreConfiguredTokenFilter.singleton("type_as_payload", false, TypeAsPayloadTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("unique", false, input -> new UniqueTokenFilter(input)));
filters.add(PreConfiguredTokenFilter.singleton("unique", false, UniqueTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("uppercase", true, UpperCaseFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("word_delimiter", false, input ->
new WordDelimiterFilter(input,
Expand Down
Loading

0 comments on commit 428e707

Please sign in to comment.