Skip to content

Commit

Permalink
Refactor percolator's QueryAnalyzer to use QueryVisitors (#49238)
Browse files Browse the repository at this point in the history
Lucene now allows us to explore the structure of a query using QueryVisitors,
delegating the knowledge of how to recurse through and collect terms to the
query implementations themselves. The percolator currently has a home-grown
external version of this API to construct sets of matching terms that must be
present in a document in order for it to possibly match the query.

This commit removes the home-grown implementation in favour of one using
QueryVisitor. This has the added benefit of making interval queries available
for percolator pre-filtering. Due to a bug in multi-term intervals (LUCENE-9050)
it also includes a clone of some of the lucene intervals logic, that can be removed
once upstream has been fixed.

Closes #45639
  • Loading branch information
romseygeek authored Nov 18, 2019
1 parent 0b89508 commit 0f6ffc2
Show file tree
Hide file tree
Showing 9 changed files with 1,106 additions and 399 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -434,10 +434,9 @@ void processQuery(Query query, ParseContext context) {
ParseContext.Document doc = context.doc();
FieldType pft = (FieldType) this.fieldType();
QueryAnalyzer.Result result;
try {
Version indexVersion = context.mapperService().getIndexSettings().getIndexVersionCreated();
result = QueryAnalyzer.analyze(query, indexVersion);
} catch (QueryAnalyzer.UnsupportedQueryException e) {
Version indexVersion = context.mapperService().getIndexSettings().getIndexVersionCreated();
result = QueryAnalyzer.analyze(query, indexVersion);
if (result == QueryAnalyzer.Result.UNKNOWN) {
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
return;
}
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ public void testDuplicatedClauses() throws Exception {
assertThat(values.get(1), equalTo("field\0value2"));
assertThat(values.get(2), equalTo("field\0value3"));
int msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
assertThat(msm, equalTo(2));
assertThat(msm, equalTo(3));

qb = boolQuery()
.must(boolQuery().must(termQuery("field", "value1")).must(termQuery("field", "value2")))
Expand All @@ -896,7 +896,7 @@ public void testDuplicatedClauses() throws Exception {
assertThat(values.get(3), equalTo("field\0value4"));
assertThat(values.get(4), equalTo("field\0value5"));
msm = doc.rootDoc().getFields(fieldType.minimumShouldMatchField.name())[0].numericValue().intValue();
assertThat(msm, equalTo(2));
assertThat(msm, equalTo(4));

qb = boolQuery()
.minimumShouldMatch(3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.XIntervals;
import org.apache.lucene.queries.intervals.IntervalQuery;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
Expand Down Expand Up @@ -71,11 +75,9 @@
import java.util.function.Consumer;
import java.util.stream.Collectors;

import static org.elasticsearch.percolator.QueryAnalyzer.UnsupportedQueryException;
import static org.elasticsearch.percolator.QueryAnalyzer.analyze;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;

public class QueryAnalyzerTests extends ESTestCase {

Expand Down Expand Up @@ -374,7 +376,7 @@ public void testExactMatch_booleanQuery() {
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
builder.add(termQuery3, BooleanClause.Occur.SHOULD);
result = analyze(builder.build(), Version.CURRENT);
assertThat("Minimum match has not impact on whether the result is verified", result.verified, is(true));
assertThat("Minimum match has no impact on whether the result is verified", result.verified, is(true));
assertThat("msm is at least two so result.minimumShouldMatch should 2 too", result.minimumShouldMatch, equalTo(msm));

builder = new BooleanQuery.Builder();
Expand Down Expand Up @@ -722,18 +724,14 @@ public void testExtractQueryMetadata_matchAllDocsQuery() {

public void testExtractQueryMetadata_unsupportedQuery() {
TermRangeQuery termRangeQuery = new TermRangeQuery("_field", null, null, true, false);
UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class,
() -> analyze(termRangeQuery, Version.CURRENT));
assertThat(e.getUnsupportedQuery(), sameInstance(termRangeQuery));
assertEquals(Result.UNKNOWN, analyze(termRangeQuery, Version.CURRENT));

TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
builder.add(termRangeQuery, BooleanClause.Occur.SHOULD);
BooleanQuery bq = builder.build();

e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq, Version.CURRENT));
assertThat(e.getUnsupportedQuery(), sameInstance(termRangeQuery));
assertEquals(Result.UNKNOWN, analyze(bq, Version.CURRENT));
}

public void testExtractQueryMetadata_unsupportedQueryInBoolQueryWithMustClauses() {
Expand Down Expand Up @@ -765,8 +763,7 @@ public void testExtractQueryMetadata_unsupportedQueryInBoolQueryWithMustClauses(
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
BooleanQuery bq2 = builder.build();
UnsupportedQueryException e = expectThrows(UnsupportedQueryException.class, () -> analyze(bq2, Version.CURRENT));
assertThat(e.getUnsupportedQuery(), sameInstance(unsupportedQuery));
assertEquals(Result.UNKNOWN, analyze(bq2, Version.CURRENT));
}

public void testExtractQueryMetadata_disjunctionMaxQuery() {
Expand Down Expand Up @@ -936,10 +933,10 @@ public void testPointRangeQuery() {
public void testTooManyPointDimensions() {
// For now no extraction support for geo queries:
Query query1 = LatLonPoint.newBoxQuery("_field", 0, 1, 0, 1);
expectThrows(UnsupportedQueryException.class, () -> analyze(query1, Version.CURRENT));
assertEquals(Result.UNKNOWN, analyze(query1, Version.CURRENT));

Query query2 = LongPoint.newRangeQuery("_field", new long[]{0, 0, 0}, new long[]{1, 1, 1});
expectThrows(UnsupportedQueryException.class, () -> analyze(query2, Version.CURRENT));
assertEquals(Result.UNKNOWN, analyze(query2, Version.CURRENT));
}

public void testPointRangeQuery_lowerUpperReversed() {
Expand Down Expand Up @@ -1054,7 +1051,7 @@ public void testExtractQueryMetadata_duplicatedClauses() {
Result result = analyze(builder.build(), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(2));
assertThat(result.minimumShouldMatch, equalTo(4));
assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),
new Term("field", "value3"), new Term("field", "value4"));

Expand Down Expand Up @@ -1091,16 +1088,10 @@ public void testExtractQueryMetadata_duplicatedClauses() {
public void testEmptyQueries() {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Result result = analyze(builder.build(), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
assertEquals(result, Result.MATCH_NONE);

result = analyze(new DisjunctionMaxQuery(Collections.emptyList(), 0f), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
assertEquals(result, Result.MATCH_NONE);
}

private static void assertDimension(byte[] expected, Consumer<byte[]> consumer) {
Expand All @@ -1113,4 +1104,108 @@ private static void assertTermsEqual(Set<QueryExtraction> actual, Term... expect
assertEquals(Arrays.stream(expected).map(QueryExtraction::new).collect(Collectors.toSet()), actual);
}

public void testIntervalQueries() {
IntervalsSource source = Intervals.or(Intervals.term("term1"), Intervals.term("term2"));
Result result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "term1"), new Term("field", "term2"));

source = Intervals.ordered(Intervals.term("term1"), Intervals.term("term2"),
Intervals.or(Intervals.term("term3"), Intervals.term("term4")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(3));
assertTermsEqual(result.extractions, new Term("field", "term1"), new Term("field", "term2"),
new Term("field", "term3"), new Term("field", "term4"));

source = Intervals.ordered(Intervals.term("term1"), XIntervals.wildcard(new BytesRef("a*")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "term1"));

source = Intervals.ordered(XIntervals.wildcard(new BytesRef("a*")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertEquals(Result.UNKNOWN, result);

source = Intervals.or(Intervals.term("b"), XIntervals.wildcard(new BytesRef("a*")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertEquals(Result.UNKNOWN, result);

source = Intervals.ordered(Intervals.term("term1"), XIntervals.prefix(new BytesRef("a")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "term1"));

source = Intervals.ordered(XIntervals.prefix(new BytesRef("a")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertEquals(Result.UNKNOWN, result);

source = Intervals.or(Intervals.term("b"), XIntervals.prefix(new BytesRef("a")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertEquals(Result.UNKNOWN, result);

source = Intervals.containedBy(Intervals.term("a"), Intervals.ordered(Intervals.term("b"), Intervals.term("c")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(3));
assertTermsEqual(result.extractions, new Term("field", "a"), new Term("field", "b"), new Term("field", "c"));

source = Intervals.containing(Intervals.term("a"), Intervals.ordered(Intervals.term("b"), Intervals.term("c")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(3));
assertTermsEqual(result.extractions, new Term("field", "a"), new Term("field", "b"), new Term("field", "c"));

source = Intervals.overlapping(Intervals.term("a"), Intervals.ordered(Intervals.term("b"), Intervals.term("c")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(3));
assertTermsEqual(result.extractions, new Term("field", "a"), new Term("field", "b"), new Term("field", "c"));

source = Intervals.within(Intervals.term("a"), 2, Intervals.ordered(Intervals.term("b"), Intervals.term("c")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(3));
assertTermsEqual(result.extractions, new Term("field", "a"), new Term("field", "b"), new Term("field", "c"));

source = Intervals.notContainedBy(Intervals.term("a"), Intervals.ordered(Intervals.term("b"), Intervals.term("c")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "a"));

source = Intervals.notContaining(Intervals.term("a"), Intervals.ordered(Intervals.term("b"), Intervals.term("c")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "a"));

source = Intervals.nonOverlapping(Intervals.term("a"), Intervals.ordered(Intervals.term("b"), Intervals.term("c")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "a"));

source = Intervals.notWithin(Intervals.term("a"), 2, Intervals.ordered(Intervals.term("b"), Intervals.term("c")));
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "a"));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.InPlaceMergeSorter;
Expand All @@ -36,6 +38,8 @@
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

/**
* BlendedTermQuery can be used to unify term statistics across
Expand Down Expand Up @@ -242,6 +246,17 @@ public String toString(String field) {
return builder.toString();
}

@Override
public void visit(QueryVisitor visitor) {
Set<String> fields = Arrays.stream(terms).map(Term::field).collect(Collectors.toUnmodifiableSet());
for (String field : fields) {
if (visitor.acceptField(field) == false) {
return;
}
}
visitor.getSubVisitor(BooleanClause.Occur.SHOULD, this).consumeTerms(this, terms);
}

private class TermAndBoost implements Comparable<TermAndBoost> {
protected final Term term;
protected float boost;
Expand Down
Loading

0 comments on commit 0f6ffc2

Please sign in to comment.