From 3038efd02351faa23fc6d4a46c3dbb34c20ea189 Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Mon, 24 Jan 2022 10:22:40 +0100 Subject: [PATCH 1/2] Allow doc-values only search on boolean fields --- .../mapping/params/doc-values.asciidoc | 3 +- docs/reference/mapping/types/boolean.asciidoc | 5 +- docs/reference/query-dsl.asciidoc | 2 +- .../test/field_caps/10_basic.yml | 15 ++++ .../test/search/390_doc_values_search.yml | 32 +++++++ .../index/mapper/BooleanFieldMapper.java | 90 ++++++++++++++++--- .../index/mapper/KeywordFieldMapper.java | 48 +++++++++- .../index/mapper/StringFieldType.java | 31 ++----- .../index/mapper/TermBasedFieldType.java | 30 +------ .../index/mapper/BooleanFieldTypeTests.java | 34 +++++-- 10 files changed, 218 insertions(+), 72 deletions(-) diff --git a/docs/reference/mapping/params/doc-values.asciidoc b/docs/reference/mapping/params/doc-values.asciidoc index 7d3b4170fc9da..122d555036cea 100644 --- a/docs/reference/mapping/params/doc-values.asciidoc +++ b/docs/reference/mapping/params/doc-values.asciidoc @@ -17,7 +17,8 @@ makes this data access pattern possible. They store the same values as the sorting and aggregations. Doc values are supported on almost all field types, with the __notable exception of `text` and `annotated_text` fields__. -<>, <>, and the <> +<>, <>, the <> +and the <> can also be queried using term or range-based queries when they are not <> but only have doc values enabled. Query performance on doc values is much slower than on index structures, but diff --git a/docs/reference/mapping/types/boolean.asciidoc b/docs/reference/mapping/types/boolean.asciidoc index 9a9dafa37a2c1..81055a0d2df5f 100644 --- a/docs/reference/mapping/types/boolean.asciidoc +++ b/docs/reference/mapping/types/boolean.asciidoc @@ -174,7 +174,10 @@ The following parameters are accepted by `boolean` fields: <>:: - Should the field be searchable? Accepts `true` (default) and `false`. + Should the field be quickly searchable? Accepts `true` (default) and + `false`. Fields that only have <> + enabled can still be queried using term or range-based queries, + albeit slower. <>:: diff --git a/docs/reference/query-dsl.asciidoc b/docs/reference/query-dsl.asciidoc index 39d60c0116f95..c0fa107ab1468 100644 --- a/docs/reference/query-dsl.asciidoc +++ b/docs/reference/query-dsl.asciidoc @@ -33,7 +33,7 @@ the stability of the cluster. Those queries can be categorised as follows: * Queries that need to do linear scans to identify matches: ** <> -** queries on <>, <>, or <> fields that are not indexed +** queries on <>, <>, <>, or <> fields that are not indexed but have <> enabled * Queries that have a high up-front cost: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/field_caps/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/field_caps/10_basic.yml index ff6643ad72f7f..06067c6f4c62d 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/field_caps/10_basic.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/field_caps/10_basic.yml @@ -89,6 +89,9 @@ setup: non_indexed_keyword: type: keyword index: false + non_indexed_boolean: + type: boolean + index: false geo: type: keyword object: @@ -240,6 +243,18 @@ setup: - match: {fields.non_indexed_keyword.keyword.searchable: true} +--- +"Field caps for boolean field with only doc values": + - skip: + version: " - 8.0.99" + reason: "doc values search was added in 8.1.0" + - do: + field_caps: + index: 'test1,test2,test3' + fields: non_indexed_boolean + + - match: {fields.non_indexed_boolean.boolean.searchable: true} + --- "Get object and nested field caps": diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml index 0e8aec31e85d1..b6cd34767951b 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml @@ -39,6 +39,9 @@ setup: keyword: type: keyword index: false + boolean: + type: boolean + index: false - do: index: @@ -54,6 +57,7 @@ setup: short: 1 date: "2017/01/01" keyword: "key1" + boolean: "false" - do: index: @@ -69,6 +73,7 @@ setup: short: 2 date: "2017/01/02" keyword: "key2" + boolean: "true" - do: indices.refresh: {} @@ -252,3 +257,30 @@ setup: index: test body: { query: { range: { keyword: { gte: "key1" } } } } - length: { hits.hits: 2 } + +--- +"Test match query on boolean field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { match: { boolean: { query: "false" } } } } + - length: { hits.hits: 1 } + +--- +"Test terms query on boolean field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { terms: { keyword: [ "false", "true" ] } } } + - length: { hits.hits: 2 } + +--- +"Test range query on boolean field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { range: { keyword: { gte: "false" } } } } + - length: { hits.hits: 2 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java index d18a5be255dff..9af63cd7d17bc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java @@ -14,6 +14,10 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; @@ -37,6 +41,7 @@ import java.io.IOException; import java.time.ZoneId; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; @@ -155,11 +160,15 @@ public BooleanFieldType( } public BooleanFieldType(String name) { - this(name, true, false, true, false, null, Collections.emptyMap()); + this(name, true); } - public BooleanFieldType(String name, boolean searchable) { - this(name, searchable, false, true, false, null, Collections.emptyMap()); + public BooleanFieldType(String name, boolean isIndexed) { + this(name, isIndexed, true); + } + + public BooleanFieldType(String name, boolean isIndexed, boolean hasDocValues) { + this(name, isIndexed, isIndexed, hasDocValues, false, null, Collections.emptyMap()); } @Override @@ -167,6 +176,11 @@ public String typeName() { return CONTENT_TYPE; } + @Override + public boolean isSearchable() { + return isIndexed() || hasDocValues(); + } + @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { if (format != null) { @@ -209,6 +223,15 @@ public BytesRef indexedValueForSearch(Object value) { }; } + private long docValueForSearch(Object value) { + BytesRef ref = indexedValueForSearch(value); + if (Values.TRUE.equals(ref)) { + return 1; + } else { + return 0; + } + } + @Override public Boolean valueForDisplay(Object value) { if (value == null) { @@ -234,6 +257,30 @@ public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) { return DocValueFormat.BOOLEAN; } + @Override + public Query termQuery(Object value, SearchExecutionContext context) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.termQuery(value, context); + } else { + return SortedNumericDocValuesField.newSlowExactQuery(name(), docValueForSearch(value)); + } + } + + @Override + public Query termsQuery(Collection values, SearchExecutionContext context) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.termsQuery(values, context); + } else { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Object value : values) { + builder.add(termQuery(value, context), BooleanClause.Occur.SHOULD); + } + return new ConstantScoreQuery(builder.build()); + } + } + @Override public Query rangeQuery( Object lowerTerm, @@ -242,14 +289,35 @@ public Query rangeQuery( boolean includeUpper, SearchExecutionContext context ) { - failIfNotIndexed(); - return new TermRangeQuery( - name(), - lowerTerm == null ? null : indexedValueForSearch(lowerTerm), - upperTerm == null ? null : indexedValueForSearch(upperTerm), - includeLower, - includeUpper - ); + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper + ); + } else { + long l = 0; + long u = 1; + if (lowerTerm != null) { + l = docValueForSearch(lowerTerm); + if (includeLower == false) { + l = Math.max(1, l + 1); + } + } + if (upperTerm != null) { + u = docValueForSearch(upperTerm); + if (includeUpper == false) { + l = Math.min(0, l - 1); + } + } + if (l > u) { + return new MatchNoDocsQuery(); + } + return SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); + } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 13ca733666a52..756bc4206ac9a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -20,6 +20,7 @@ import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.sandbox.search.DocValuesTermsQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -50,6 +51,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Locale; @@ -339,13 +341,51 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) { } @Override - protected boolean allowDocValueBasedQueries() { - return true; + public boolean isSearchable() { + return isIndexed() || hasDocValues(); } @Override - public boolean isSearchable() { - return isIndexed() || hasDocValues(); + public Query termQuery(Object value, SearchExecutionContext context) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.termQuery(value, context); + } else { + return SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value)); + } + } + + @Override + public Query termsQuery(Collection values, SearchExecutionContext context) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.termsQuery(values, context); + } else { + BytesRef[] bytesRefs = values.stream().map(this::indexedValueForSearch).toArray(BytesRef[]::new); + return new DocValuesTermsQuery(name(), bytesRefs); + } + } + + @Override + public Query rangeQuery( + Object lowerTerm, + Object upperTerm, + boolean includeLower, + boolean includeUpper, + SearchExecutionContext context + ) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.rangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, context); + } else { + return SortedSetDocValuesField.newSlowRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper + ); + } } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java index 3979ebf218dd4..791a394e832aa 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.FuzzyQuery; @@ -211,27 +210,13 @@ public Query rangeQuery( + "' is set to false." ); } - if (allowDocValueBasedQueries()) { - failIfNotIndexedNorDocValuesFallback(context); - } else { - failIfNotIndexed(); - } - if (isIndexed()) { - return new TermRangeQuery( - name(), - lowerTerm == null ? null : indexedValueForSearch(lowerTerm), - upperTerm == null ? null : indexedValueForSearch(upperTerm), - includeLower, - includeUpper - ); - } else { - return SortedSetDocValuesField.newSlowRangeQuery( - name(), - lowerTerm == null ? null : indexedValueForSearch(lowerTerm), - upperTerm == null ? null : indexedValueForSearch(upperTerm), - includeLower, - includeUpper - ); - } + failIfNotIndexed(); + return new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper + ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java index 02db08072039f..80e6d04d967d5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java @@ -8,9 +8,7 @@ package org.elasticsearch.index.mapper; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.Term; -import org.apache.lucene.sandbox.search.DocValuesTermsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; @@ -37,10 +35,6 @@ public TermBasedFieldType( super(name, isIndexed, isStored, hasDocValues, textSearchInfo, meta); } - protected boolean allowDocValueBasedQueries() { - return false; - } - /** Returns the indexed value used to construct search "values". * This method is used for the default implementations of most * query factory methods such as {@link #termQuery}. */ @@ -61,31 +55,15 @@ public boolean mayExistInIndex(SearchExecutionContext context) { @Override public Query termQuery(Object value, SearchExecutionContext context) { - if (allowDocValueBasedQueries()) { - failIfNotIndexedNorDocValuesFallback(context); - } else { - failIfNotIndexed(); - } - if (isIndexed()) { - return new TermQuery(new Term(name(), indexedValueForSearch(value))); - } else { - return SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value)); - } + failIfNotIndexed(); + return new TermQuery(new Term(name(), indexedValueForSearch(value))); } @Override public Query termsQuery(Collection values, SearchExecutionContext context) { - if (allowDocValueBasedQueries()) { - failIfNotIndexedNorDocValuesFallback(context); - } else { - failIfNotIndexed(); - } + failIfNotIndexed(); BytesRef[] bytesRefs = values.stream().map(this::indexedValueForSearch).toArray(BytesRef[]::new); - if (isIndexed()) { - return new TermInSetQuery(name(), bytesRefs); - } else { - return new DocValuesTermsQuery(name(), bytesRefs); - } + return new TermInSetQuery(name(), bytesRefs); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldTypeTests.java index 248bac44be083..55f076e985c4d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldTypeTests.java @@ -7,8 +7,11 @@ */ package org.elasticsearch.index.mapper; +import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; import java.io.IOException; import java.util.Collections; @@ -33,12 +36,33 @@ public void testValueForSearch() { public void testTermQuery() { MappedFieldType ft = new BooleanFieldMapper.BooleanFieldType("field"); - assertEquals(new TermQuery(new Term("field", "T")), ft.termQuery("true", null)); - assertEquals(new TermQuery(new Term("field", "F")), ft.termQuery("false", null)); + assertEquals(new TermQuery(new Term("field", "T")), ft.termQuery("true", MOCK_CONTEXT)); + assertEquals(new TermQuery(new Term("field", "F")), ft.termQuery("false", MOCK_CONTEXT)); - MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("true", null)); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + MappedFieldType ft2 = new BooleanFieldMapper.BooleanFieldType("field", false); + assertEquals(SortedNumericDocValuesField.newSlowExactQuery("field", 1), ft2.termQuery("true", MOCK_CONTEXT)); + assertEquals(SortedNumericDocValuesField.newSlowExactQuery("field", 0), ft2.termQuery("false", MOCK_CONTEXT)); + + MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false, false); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("true", MOCK_CONTEXT)); + assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage()); + } + + public void testRangeQuery() { + MappedFieldType ft = new BooleanFieldMapper.BooleanFieldType("field"); + Query expected = new TermRangeQuery("field", BooleanFieldMapper.Values.FALSE, BooleanFieldMapper.Values.TRUE, true, true); + assertEquals(expected, ft.rangeQuery("false", "true", true, true, null, null, null, MOCK_CONTEXT)); + + ft = new BooleanFieldMapper.BooleanFieldType("field", false); + expected = SortedNumericDocValuesField.newSlowRangeQuery("field", 0, 1); + assertEquals(expected, ft.rangeQuery("false", "true", true, true, null, null, null, MOCK_CONTEXT)); + + MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false, false); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.rangeQuery("false", "true", true, true, null, null, null, MOCK_CONTEXT) + ); + assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage()); } public void testFetchSourceValue() throws IOException { From 6e9ef347283a283811803438a6ce1282fcc91386 Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Mon, 24 Jan 2022 10:54:59 +0100 Subject: [PATCH 2/2] copy paste error --- .../rest-api-spec/test/search/390_doc_values_search.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml index b6cd34767951b..f39f89c876485 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml @@ -273,7 +273,7 @@ setup: - do: search: index: test - body: { query: { terms: { keyword: [ "false", "true" ] } } } + body: { query: { terms: { boolean: [ "false", "true" ] } } } - length: { hits.hits: 2 } --- @@ -282,5 +282,5 @@ setup: - do: search: index: test - body: { query: { range: { keyword: { gte: "false" } } } } + body: { query: { range: { boolean: { gte: "false" } } } } - length: { hits.hits: 2 }