Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Lucene SuggestStopFilter #3863

Merged
merged 1 commit into from
Oct 15, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ encoded.

|`ignore_case` |Set to `true` to lower case all words first. Defaults to
`false`.

|`remove_trailing` |Set to `false` in order to not ignore the last term of
a search if it is a stop word. This is very useful for the completion
suggester as a query like `green a` can be extended to `green apple` even
though you remove stop words in general. Defaults to `true`.
|=======================================================================

stopwords allow for custom language specific expansion of default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.suggest.analyzing.SuggestStopFilter;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
Expand All @@ -45,11 +46,13 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean ignoreCase;

private final boolean enablePositionIncrements;
private final boolean removeTrailing;

@Inject
public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version, ignoreCase);
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", true);
if (!enablePositionIncrements && version.onOrAfter(Version.LUCENE_44)) {
Expand All @@ -60,9 +63,13 @@ public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings

@Override
public TokenStream create(TokenStream tokenStream) {
StopFilter filter = new StopFilter(version, tokenStream, stopWords);
filter.setEnablePositionIncrements(enablePositionIncrements);
return filter;
if (removeTrailing) {
StopFilter filter = new StopFilter(version, tokenStream, stopWords);
filter.setEnablePositionIncrements(enablePositionIncrements);
return filter;
} else {
return new SuggestStopFilter(tokenStream, stopWords);
}
}

public Set<?> stopWords() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.search.suggest.analyzing.SuggestStopFilter;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.inject.ProvisionException;
import org.elasticsearch.common.settings.ImmutableSettings;
Expand Down Expand Up @@ -66,7 +67,6 @@ public void testCorrectPositionIncrementSetting() throws IOException {
TokenStream create = tokenFilter.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("foo bar")));
assertThat(create, instanceOf(StopFilter.class));
assertThat(((StopFilter)create).getEnablePositionIncrements(), equalTo(true));

}

@Test
Expand All @@ -80,7 +80,18 @@ public void testDeprecatedPositionIncrementSettingWithVerions() throws IOExcepti
TokenStream create = tokenFilter.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("foo bar")));
assertThat(create, instanceOf(StopFilter.class));
assertThat(((StopFilter)create).getEnablePositionIncrements(), equalTo(false));

}

@Test
public void testThatSuggestStopFilterWorks() throws Exception {
Settings settings = ImmutableSettings.settingsBuilder()
.put("index.analysis.filter.my_stop.type", "stop")
.put("index.analysis.filter.my_stop.remove_trailing", false)
.build();
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_stop");
assertThat(tokenFilter, instanceOf(StopTokenFilterFactory.class));
TokenStream create = tokenFilter.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("foo an")));
assertThat(create, instanceOf(SuggestStopFilter.class));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,35 @@ public void testThatSortingOnCompletionFieldReturnsUsefulException() throws Exce
}
}

@Test
public void testThatSuggestStopFilterWorks() throws Exception {
ImmutableSettings.Builder settingsBuilder = settingsBuilder()
.put("index.analysis.analyzer.stoptest.tokenizer", "standard")
.putArray("index.analysis.analyzer.stoptest.filter", "standard", "suggest_stop_filter")
.put("index.analysis.filter.suggest_stop_filter.type", "stop")
.put("index.analysis.filter.suggest_stop_filter.remove_trailing", false);

createIndexAndMappingAndSettings(settingsBuilder, "simple", "stoptest", true, true, true);

client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().field(FIELD, "Feed trolls").endObject()
).get();

client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder()
.startObject().field(FIELD, "Feed the trolls").endObject()
).get();

refresh();

assertSuggestions("feed t", "Feed the trolls", "Feed trolls");
assertSuggestions("feed th", "Feed the trolls");
assertSuggestions("feed the", "Feed the trolls");
// stop word complete, gets ignored on query time, makes it "feed" only
assertSuggestions("feed the ", "Feed the trolls", "Feed trolls");
// stopword gets removed, but position increment kicks in, which doesnt work for the prefix suggester
assertSuggestions("feed the t");
}

@Test(expected = MapperParsingException.class)
public void testThatIndexingInvalidFieldsInCompletionFieldResultsInException() throws Exception {
createIndexAndMapping();
Expand Down