Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow doc-values only search on boolean fields #82925

Merged
merged 2 commits into from
Jan 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/reference/mapping/params/doc-values.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ makes this data access pattern possible. They store the same values as the
sorting and aggregations. Doc values are supported on almost all field types,
with the __notable exception of `text` and `annotated_text` fields__.

<<number,Numeric types>>, <<date,date types>>, and the <<keyword, keyword type>>
<<number,Numeric types>>, <<date,date types>>, the <<boolean,boolean type>>
and the <<keyword,keyword type>>
can also be queried using term or range-based queries
when they are not <<mapping-index,indexed>> but only have doc values enabled.
Query performance on doc values is much slower than on index structures, but
Expand Down
5 changes: 4 additions & 1 deletion docs/reference/mapping/types/boolean.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,10 @@ The following parameters are accepted by `boolean` fields:

<<mapping-index,`index`>>::

Should the field be searchable? Accepts `true` (default) and `false`.
Should the field be quickly searchable? Accepts `true` (default) and
`false`. Fields that only have <<doc-values,`doc_values`>>
enabled can still be queried using term or range-based queries,
albeit slower.

<<null-value,`null_value`>>::

Expand Down
2 changes: 1 addition & 1 deletion docs/reference/query-dsl.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ the stability of the cluster. Those queries can be categorised as follows:

* Queries that need to do linear scans to identify matches:
** <<query-dsl-script-query,`script` queries>>
** queries on <<number,numeric>>, <<date,date>>, or <<keyword,keyword>> fields that are not indexed
** queries on <<number,numeric>>, <<date,date>>, <<boolean,boolean>>, or <<keyword,keyword>> fields that are not indexed
but have <<doc-values,doc values>> enabled

* Queries that have a high up-front cost:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ setup:
non_indexed_keyword:
type: keyword
index: false
non_indexed_boolean:
type: boolean
index: false
geo:
type: keyword
object:
Expand Down Expand Up @@ -240,6 +243,18 @@ setup:

- match: {fields.non_indexed_keyword.keyword.searchable: true}

---
"Field caps for boolean field with only doc values":
- skip:
version: " - 8.0.99"
reason: "doc values search was added in 8.1.0"
- do:
field_caps:
index: 'test1,test2,test3'
fields: non_indexed_boolean

- match: {fields.non_indexed_boolean.boolean.searchable: true}

---
"Get object and nested field caps":

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ setup:
keyword:
type: keyword
index: false
boolean:
type: boolean
index: false

- do:
index:
Expand All @@ -54,6 +57,7 @@ setup:
short: 1
date: "2017/01/01"
keyword: "key1"
boolean: "false"

- do:
index:
Expand All @@ -69,6 +73,7 @@ setup:
short: 2
date: "2017/01/02"
keyword: "key2"
boolean: "true"

- do:
indices.refresh: {}
Expand Down Expand Up @@ -252,3 +257,30 @@ setup:
index: test
body: { query: { range: { keyword: { gte: "key1" } } } }
- length: { hits.hits: 2 }

---
"Test match query on boolean field where only doc values are enabled":

- do:
search:
index: test
body: { query: { match: { boolean: { query: "false" } } } }
- length: { hits.hits: 1 }

---
"Test terms query on boolean field where only doc values are enabled":

- do:
search:
index: test
body: { query: { terms: { boolean: [ "false", "true" ] } } }
- length: { hits.hits: 2 }

---
"Test range query on boolean field where only doc values are enabled":

- do:
search:
index: test
body: { query: { range: { boolean: { gte: "false" } } } }
- length: { hits.hits: 2 }
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.BytesRef;
Expand All @@ -37,6 +41,7 @@

import java.io.IOException;
import java.time.ZoneId;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -155,18 +160,27 @@ public BooleanFieldType(
}

public BooleanFieldType(String name) {
this(name, true, false, true, false, null, Collections.emptyMap());
this(name, true);
}

public BooleanFieldType(String name, boolean searchable) {
this(name, searchable, false, true, false, null, Collections.emptyMap());
public BooleanFieldType(String name, boolean isIndexed) {
this(name, isIndexed, true);
}

public BooleanFieldType(String name, boolean isIndexed, boolean hasDocValues) {
this(name, isIndexed, isIndexed, hasDocValues, false, null, Collections.emptyMap());
}

@Override
public String typeName() {
return CONTENT_TYPE;
}

@Override
public boolean isSearchable() {
return isIndexed() || hasDocValues();
}

@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
if (format != null) {
Expand Down Expand Up @@ -209,6 +223,15 @@ public BytesRef indexedValueForSearch(Object value) {
};
}

private long docValueForSearch(Object value) {
BytesRef ref = indexedValueForSearch(value);
if (Values.TRUE.equals(ref)) {
return 1;
} else {
return 0;
}
}

@Override
public Boolean valueForDisplay(Object value) {
if (value == null) {
Expand All @@ -234,6 +257,30 @@ public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) {
return DocValueFormat.BOOLEAN;
}

@Override
public Query termQuery(Object value, SearchExecutionContext context) {
failIfNotIndexedNorDocValuesFallback(context);
if (isIndexed()) {
return super.termQuery(value, context);
} else {
return SortedNumericDocValuesField.newSlowExactQuery(name(), docValueForSearch(value));
}
}

@Override
public Query termsQuery(Collection<?> values, SearchExecutionContext context) {
failIfNotIndexedNorDocValuesFallback(context);
if (isIndexed()) {
return super.termsQuery(values, context);
} else {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Object value : values) {
builder.add(termQuery(value, context), BooleanClause.Occur.SHOULD);
}
return new ConstantScoreQuery(builder.build());
}
}

@Override
public Query rangeQuery(
Object lowerTerm,
Expand All @@ -242,14 +289,35 @@ public Query rangeQuery(
boolean includeUpper,
SearchExecutionContext context
) {
failIfNotIndexed();
return new TermRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper
);
failIfNotIndexedNorDocValuesFallback(context);
if (isIndexed()) {
return new TermRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper
);
} else {
long l = 0;
long u = 1;
if (lowerTerm != null) {
l = docValueForSearch(lowerTerm);
if (includeLower == false) {
l = Math.max(1, l + 1);
}
}
if (upperTerm != null) {
u = docValueForSearch(upperTerm);
if (includeUpper == false) {
l = Math.min(0, l - 1);
}
}
if (l > u) {
return new MatchNoDocsQuery();
}
return SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.sandbox.search.DocValuesTermsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
Expand Down Expand Up @@ -50,6 +51,7 @@

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
Expand Down Expand Up @@ -339,13 +341,51 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) {
}

@Override
protected boolean allowDocValueBasedQueries() {
return true;
public boolean isSearchable() {
return isIndexed() || hasDocValues();
}

@Override
public boolean isSearchable() {
return isIndexed() || hasDocValues();
public Query termQuery(Object value, SearchExecutionContext context) {
failIfNotIndexedNorDocValuesFallback(context);
if (isIndexed()) {
return super.termQuery(value, context);
} else {
return SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value));
}
}

@Override
public Query termsQuery(Collection<?> values, SearchExecutionContext context) {
failIfNotIndexedNorDocValuesFallback(context);
if (isIndexed()) {
return super.termsQuery(values, context);
} else {
BytesRef[] bytesRefs = values.stream().map(this::indexedValueForSearch).toArray(BytesRef[]::new);
return new DocValuesTermsQuery(name(), bytesRefs);
}
}

@Override
public Query rangeQuery(
Object lowerTerm,
Object upperTerm,
boolean includeLower,
boolean includeUpper,
SearchExecutionContext context
) {
failIfNotIndexedNorDocValuesFallback(context);
if (isIndexed()) {
return super.rangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, context);
} else {
return SortedSetDocValuesField.newSlowRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper
);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.FuzzyQuery;
Expand Down Expand Up @@ -211,27 +210,13 @@ public Query rangeQuery(
+ "' is set to false."
);
}
if (allowDocValueBasedQueries()) {
failIfNotIndexedNorDocValuesFallback(context);
} else {
failIfNotIndexed();
}
if (isIndexed()) {
return new TermRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper
);
} else {
return SortedSetDocValuesField.newSlowRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper
);
}
failIfNotIndexed();
return new TermRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper
);
}
}
Loading