From 4b5fab3be4ec815452caab1db9c9122e1cbc39c3 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 28 Apr 2020 15:50:55 +0300 Subject: [PATCH 01/39] Initial version of doc_count field mapper --- .../index/mapper/DocCountFieldMapper.java | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java new file mode 100644 index 0000000000000..8b985db44341a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -0,0 +1,150 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.mapper.ParseContext.Document; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.QueryShardException; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** Mapper for the _doc_count field. */ +public class DocCountFieldMapper extends MetadataFieldMapper { + + public static final String NAME = "_doc_count"; + public static final String CONTENT_TYPE = "_doc_count"; + + public static class Defaults { + + public static final String NAME = DocCountFieldMapper.NAME; + public static final MappedFieldType FIELD_TYPE = new DocCountFieldType(); + + static { + FIELD_TYPE.setName(NAME); + FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC); + FIELD_TYPE.setIndexOptions(IndexOptions.NONE); + FIELD_TYPE.setHasDocValues(true); + FIELD_TYPE.freeze(); + } + } + + public static class TypeParser implements MetadataFieldMapper.TypeParser { + @Override + public Builder parse(String name, Map node, + ParserContext parserContext) throws MapperParsingException { + throw new MapperParsingException(NAME + " is not configurable"); + } + + @Override + public MetadataFieldMapper getDefault(ParserContext context) { + final Settings indexSettings = context.mapperService().getIndexSettings().getSettings(); + return new DocCountFieldMapper(indexSettings); + } + } + + static final class DocCountFieldType extends MappedFieldType { + + DocCountFieldType() { + } + + protected DocCountFieldType(DocCountFieldType ref) { + super(ref); + } + + @Override + public MappedFieldType clone() { + return new DocCountFieldType(this); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public Query existsQuery(QueryShardContext context) { + return new DocValuesFieldExistsQuery(name()); + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + throw new QueryShardException(context, "The _doc_count field is not searchable"); + } + } + + private DocCountFieldMapper(Settings indexSettings) { + super(NAME, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE, indexSettings); + } + + @Override + public void preParse(ParseContext context) throws IOException { + super.parse(context); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + // see InternalEngine.updateVersion to see where the real version value is set + final Field docCount = new NumericDocValuesField(NAME, -1L); + context.version(docCount); + fields.add(docCount); + } + + @Override + public void parse(ParseContext context) throws IOException { + // _version added in preparse + } + + @Override + public void postParse(ParseContext context) throws IOException { + // In the case of nested docs, let's fill nested docs with version=1 so that Lucene doesn't write a Bitset for documents + // that don't have the field. This is consistent with the default value for efficiency. + Field version = context.version(); + assert version != null; + for (Document doc : context.nonRootDocuments()) { + doc.add(version); + } + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return builder; + } + + @Override + protected void doMerge(Mapper mergeWith) { + // nothing to do + } +} From cd515b30ef33ca8871e78d881acd77af897303b8 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 12 May 2020 14:55:33 +0300 Subject: [PATCH 02/39] added tests --- .../elasticsearch/indices/IndicesModule.java | 2 + .../mapper/DocCountFieldMapperTests.java | 136 ++++++++++++++++++ .../index/mapper/DocCountFieldTypeTests.java | 28 ++++ .../AggregateMetricFeatureSetUsage.java | 5 + 4 files changed, 171 insertions(+) create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index 0d951a43a1953..84d82fa148411 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -34,6 +34,7 @@ import org.elasticsearch.index.mapper.BooleanFieldMapper; import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DocCountFieldMapper; import org.elasticsearch.index.mapper.FieldAliasMapper; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.GeoPointFieldMapper; @@ -128,6 +129,7 @@ public static Map getMappers(List mappe mappers.put(CompletionFieldMapper.CONTENT_TYPE, new CompletionFieldMapper.TypeParser()); mappers.put(FieldAliasMapper.CONTENT_TYPE, new FieldAliasMapper.TypeParser()); mappers.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser()); + mappers.put(DocCountFieldMapper.CONTENT_TYPE, new DocCountFieldMapper.TypeParser()); for (MapperPlugin mapperPlugin : mapperPlugins) { for (Map.Entry entry : mapperPlugin.getMappers().entrySet()) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java new file mode 100644 index 0000000000000..eb130238b6b9c --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -0,0 +1,136 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.mapper; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.InternalSettingsPlugin; +import org.junit.Before; + +import java.util.Collection; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.core.IsInstanceOf.instanceOf; + +public class DocCountFieldMapperTests extends ESSingleNodeTestCase { + + private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE; + private static final String FIELD_NAME = "doc_count"; + + IndexService indexService; + DocumentMapperParser parser; + + @Override + protected Collection> getPlugins() { + return pluginList(InternalSettingsPlugin.class); + } + + @Before + public void setup() { + indexService = createIndex("test"); + parser = indexService.mapperService().documentMapperParser(); + } + + /** + * Test parsing field mapping and adding simple field + */ + public void testParseValue() throws Exception { + ensureGreen(); + + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject(FIELD_NAME) + .field("type", CONTENT_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + ); + + DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); + + Mapper fieldMapper = defaultMapper.mappers().getMapper(FIELD_NAME); + assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); + + ParsedDocument doc = defaultMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes( + XContentFactory.jsonBuilder() + .startObject() + .field(FIELD_NAME, 10) + .endObject() + ), + XContentType.JSON + ) + ); + + assertThat(doc.rootDoc().getField(FIELD_NAME), notNullValue()); + } + + /** + * Test parsing a doc_count field that contains no values + */ + public void testParseEmptyValue() throws Exception { + ensureGreen(); + + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject(FIELD_NAME) + .field("type", CONTENT_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + ); + + DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); + + Mapper fieldMapper = defaultMapper.mappers().getMapper(FIELD_NAME); + assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); + + Exception e = expectThrows( + MapperParsingException.class, + () -> defaultMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes(XContentFactory.jsonBuilder().startObject().startObject(FIELD_NAME).endObject().endObject()), + XContentType.JSON + ) + ) + ); + assertThat(e.getCause().getMessage(), containsString("Aggregate metric field [metric] must contain all metrics")); + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java new file mode 100644 index 0000000000000..110625d14aea5 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java @@ -0,0 +1,28 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.mapper; + +public class DocCountFieldTypeTests extends FieldTypeTestCase { + + @Override + protected MappedFieldType createDefaultFieldType() { + DocCountFieldMapper.DocCountFieldType fieldType = new DocCountFieldMapper.DocCountFieldType(); + return fieldType; + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java index de5afebeec253..a28148e0c8640 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java @@ -45,4 +45,9 @@ public boolean equals(Object obj) { public int hashCode() { return Objects.hash(available, enabled); } + + @Override + public Version getMinimalSupportedVersion() { + return Version.V_8_0_0; + } } From 655e112caf7d407a14b48c77566792d80f940f59 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 14 May 2020 17:13:22 +0300 Subject: [PATCH 03/39] Build fixes --- .../elasticsearch/index/mapper/DocCountFieldMapper.java | 9 +++------ .../aggregatemetric/AggregateMetricFeatureSetUsage.java | 8 ++------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 8b985db44341a..afd5dd7c4ec53 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -23,7 +23,6 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.settings.Settings; @@ -33,7 +32,6 @@ import org.elasticsearch.index.query.QueryShardException; import java.io.IOException; -import java.util.List; import java.util.Map; /** Mapper for the _doc_count field. */ @@ -96,7 +94,7 @@ public Query existsQuery(QueryShardContext context) { @Override public Query termQuery(Object value, QueryShardContext context) { - throw new QueryShardException(context, "The _doc_count field is not searchable"); + throw new QueryShardException(context, "Field of type [" + CONTENT_TYPE + " ] is not searchable"); } } @@ -110,11 +108,10 @@ public void preParse(ParseContext context) throws IOException { } @Override - protected void parseCreateField(ParseContext context, List fields) throws IOException { - // see InternalEngine.updateVersion to see where the real version value is set + protected void parseCreateField(ParseContext context) throws IOException { final Field docCount = new NumericDocValuesField(NAME, -1L); context.version(docCount); - fields.add(docCount); + context.doc().add(docCount); } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java index a28148e0c8640..e163f71e187df 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java @@ -24,7 +24,8 @@ public AggregateMetricFeatureSetUsage(boolean available, boolean enabled) { super(XPackField.AGGREGATE_METRIC, available, enabled); } - @Override public Version getMinimalSupportedVersion() { + @Override + public Version getMinimalSupportedVersion() { return Version.V_8_0_0; } @@ -45,9 +46,4 @@ public boolean equals(Object obj) { public int hashCode() { return Objects.hash(available, enabled); } - - @Override - public Version getMinimalSupportedVersion() { - return Version.V_8_0_0; - } } From 191d7937209bc9630da247cb492075d29d530c07 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 14 May 2020 18:03:46 +0300 Subject: [PATCH 04/39] Added tests for doc_count fieldmapper --- .../index/mapper/DocCountFieldMapper.java | 79 ++++++++++--------- .../mapper/DocCountFieldMapperTests.java | 40 +--------- 2 files changed, 41 insertions(+), 78 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index afd5dd7c4ec53..0b8fa7c67802e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ - package org.elasticsearch.index.mapper; import org.apache.lucene.document.Field; @@ -27,26 +26,24 @@ import org.apache.lucene.search.Query; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.index.mapper.ParseContext.Document; +import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; import java.io.IOException; import java.util.Map; -/** Mapper for the _doc_count field. */ -public class DocCountFieldMapper extends MetadataFieldMapper { +import static org.elasticsearch.index.mapper.TypeParsers.parseField; - public static final String NAME = "_doc_count"; - public static final String CONTENT_TYPE = "_doc_count"; +/** Mapper for the doc_count field. */ +public class DocCountFieldMapper extends FieldMapper { - public static class Defaults { + public static final String CONTENT_TYPE = "doc_count"; - public static final String NAME = DocCountFieldMapper.NAME; + public static class Defaults { public static final MappedFieldType FIELD_TYPE = new DocCountFieldType(); static { - FIELD_TYPE.setName(NAME); FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC); FIELD_TYPE.setIndexOptions(IndexOptions.NONE); FIELD_TYPE.setHasDocValues(true); @@ -54,17 +51,27 @@ public static class Defaults { } } - public static class TypeParser implements MetadataFieldMapper.TypeParser { + public static class Builder extends FieldMapper.Builder { + + public Builder(String name) { + super(name, DocCountFieldMapper.Defaults.FIELD_TYPE, DocCountFieldMapper.Defaults.FIELD_TYPE); + builder = this; + } + @Override - public Builder parse(String name, Map node, - ParserContext parserContext) throws MapperParsingException { - throw new MapperParsingException(NAME + " is not configurable"); + public DocCountFieldMapper build(BuilderContext context) { + setupFieldType(context); + return new DocCountFieldMapper(name, fieldType, defaultFieldType, context.indexSettings()); } + } + public static class TypeParser implements Mapper.TypeParser { @Override - public MetadataFieldMapper getDefault(ParserContext context) { - final Settings indexSettings = context.mapperService().getIndexSettings().getSettings(); - return new DocCountFieldMapper(indexSettings); + public DocCountFieldMapper.Builder parse(String name, Map node, ParserContext parserContext) + throws MapperParsingException { + DocCountFieldMapper.Builder builder = new DocCountFieldMapper.Builder(name); + parseField(builder, name, node, parserContext); + return builder; } } @@ -94,39 +101,33 @@ public Query existsQuery(QueryShardContext context) { @Override public Query termQuery(Object value, QueryShardContext context) { - throw new QueryShardException(context, "Field of type [" + CONTENT_TYPE + " ] is not searchable"); + throw new QueryShardException(context, "Field [" + name() + " ]of type [" + CONTENT_TYPE + "] is not searchable"); + } } - private DocCountFieldMapper(Settings indexSettings) { - super(NAME, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE, indexSettings); - } + protected DocCountFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings) { + super(simpleName, fieldType, defaultFieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); - @Override - public void preParse(ParseContext context) throws IOException { - super.parse(context); } @Override protected void parseCreateField(ParseContext context) throws IOException { - final Field docCount = new NumericDocValuesField(NAME, -1L); - context.version(docCount); - context.doc().add(docCount); - } + Number value; + if (context.parser().currentToken() == XContentParser.Token.VALUE_NUMBER) { + value = context.parser().numberValue().floatValue(); + } else { + return; + } - @Override - public void parse(ParseContext context) throws IOException { - // _version added in preparse - } + if (value != null) { + if (value.longValue() < 0 || value.floatValue() != value.longValue()) { + throw new IllegalArgumentException( + "Field [" + fieldType.name() + "] must always be a positive integer"); + } - @Override - public void postParse(ParseContext context) throws IOException { - // In the case of nested docs, let's fill nested docs with version=1 so that Lucene doesn't write a Bitset for documents - // that don't have the field. This is consistent with the default value for efficiency. - Field version = context.version(); - assert version != null; - for (Document doc : context.nonRootDocuments()) { - doc.add(version); + final Field docCount = new NumericDocValuesField(name(), value.longValue()); + context.doc().add(docCount); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index eb130238b6b9c..5dbeb243b5862 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -91,46 +91,8 @@ public void testParseValue() throws Exception { XContentType.JSON ) ); - - assertThat(doc.rootDoc().getField(FIELD_NAME), notNullValue()); + assertEquals(10L, doc.rootDoc().getField(FIELD_NAME).numericValue()); } - /** - * Test parsing a doc_count field that contains no values - */ - public void testParseEmptyValue() throws Exception { - ensureGreen(); - - String mapping = Strings.toString( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", CONTENT_TYPE) - .endObject() - .endObject() - .endObject() - .endObject() - ); - - DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); - - Mapper fieldMapper = defaultMapper.mappers().getMapper(FIELD_NAME); - assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); - - Exception e = expectThrows( - MapperParsingException.class, - () -> defaultMapper.parse( - new SourceToParse( - "test", - "1", - BytesReference.bytes(XContentFactory.jsonBuilder().startObject().startObject(FIELD_NAME).endObject().endObject()), - XContentType.JSON - ) - ) - ); - assertThat(e.getCause().getMessage(), containsString("Aggregate metric field [metric] must contain all metrics")); - } } From 5f81beeb062c99343512d9a1b50a8c33d65e7e6c Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Wed, 17 Jun 2020 18:24:47 +0300 Subject: [PATCH 05/39] doc count tests --- .../mapper/DocCountFieldMapperTests.java | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 5dbeb243b5862..14ed8953b369e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -31,8 +31,6 @@ import java.util.Collection; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.core.IsInstanceOf.instanceOf; public class DocCountFieldMapperTests extends ESSingleNodeTestCase { @@ -94,5 +92,42 @@ public void testParseValue() throws Exception { assertEquals(10L, doc.rootDoc().getField(FIELD_NAME).numericValue()); } + public void testReadDocCounts() throws Exception { + ensureGreen(); + + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject(FIELD_NAME) + .field("type", CONTENT_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + ); + + DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); + + Mapper fieldMapper = defaultMapper.mappers().getMapper(FIELD_NAME); + assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); + + ParsedDocument doc = defaultMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes( + XContentFactory.jsonBuilder() + .startObject() + .field(FIELD_NAME, 10) + .field("t", "5") + .endObject() + ), + XContentType.JSON + ) + ); + assertEquals(10L, doc.rootDoc().getField(FIELD_NAME).numericValue()); + } } From 520ac9afdc0494a785d1c77adef53271eba06b16 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 30 Jun 2020 15:15:34 +0300 Subject: [PATCH 06/39] Resolve conflicts after merge from master --- .../index/mapper/DocCountFieldMapper.java | 43 +++++++++++++------ .../index/mapper/DocCountFieldTypeTests.java | 8 ++-- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 0b8fa7c67802e..ab10f78766343 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -19,18 +19,20 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; import java.io.IOException; +import java.util.Collections; +import java.util.List; import java.util.Map; import static org.elasticsearch.index.mapper.TypeParsers.parseField; @@ -41,27 +43,28 @@ public class DocCountFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "doc_count"; public static class Defaults { - public static final MappedFieldType FIELD_TYPE = new DocCountFieldType(); + public static final FieldType FIELD_TYPE = new FieldType(); static { FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC); FIELD_TYPE.setIndexOptions(IndexOptions.NONE); - FIELD_TYPE.setHasDocValues(true); +// FIELD_TYPE.setHasDocValues(true); FIELD_TYPE.freeze(); } } - public static class Builder extends FieldMapper.Builder { + public static class Builder extends FieldMapper.Builder { public Builder(String name) { - super(name, DocCountFieldMapper.Defaults.FIELD_TYPE, DocCountFieldMapper.Defaults.FIELD_TYPE); + super(name, Defaults.FIELD_TYPE); builder = this; } @Override public DocCountFieldMapper build(BuilderContext context) { - setupFieldType(context); - return new DocCountFieldMapper(name, fieldType, defaultFieldType, context.indexSettings()); + DocCountFieldType defaultFieldType = new DocCountFieldType(buildFullName(context), hasDocValues, meta); + + return new DocCountFieldMapper(name, fieldType, defaultFieldType); } } @@ -75,9 +78,14 @@ public DocCountFieldMapper.Builder parse(String name, Map node, } } - static final class DocCountFieldType extends MappedFieldType { + public static final class DocCountFieldType extends MappedFieldType { + + public DocCountFieldType(String name) { + this(name, true, Collections.emptyMap()); + } - DocCountFieldType() { + public DocCountFieldType(String name, boolean hasDocValues, Map meta) { + super(name, false, hasDocValues, TextSearchInfo.NONE, meta); } protected DocCountFieldType(DocCountFieldType ref) { @@ -106,9 +114,11 @@ public Query termQuery(Object value, QueryShardContext context) { } } - protected DocCountFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings) { - super(simpleName, fieldType, defaultFieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); - + protected DocCountFieldMapper( + String simpleName, + FieldType fieldType, + MappedFieldType defaultFieldType) { + super(simpleName, fieldType, defaultFieldType, MultiFields.empty(), CopyTo.empty()); } @Override @@ -123,7 +133,7 @@ protected void parseCreateField(ParseContext context) throws IOException { if (value != null) { if (value.longValue() < 0 || value.floatValue() != value.longValue()) { throw new IllegalArgumentException( - "Field [" + fieldType.name() + "] must always be a positive integer"); + "Field [" + fieldType().name() + "] must always be a positive integer"); } final Field docCount = new NumericDocValuesField(name(), value.longValue()); @@ -131,6 +141,11 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + public DocCountFieldType fieldType() { + return (DocCountFieldType) super.fieldType(); + } + @Override protected String contentType() { return CONTENT_TYPE; @@ -142,7 +157,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - protected void doMerge(Mapper mergeWith) { + protected void mergeOptions(FieldMapper mergeWith, List conflicts) { // nothing to do } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java index 110625d14aea5..6df91a239df36 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java @@ -18,11 +18,13 @@ */ package org.elasticsearch.index.mapper; -public class DocCountFieldTypeTests extends FieldTypeTestCase { +import java.util.Map; + +public class DocCountFieldTypeTests extends FieldTypeTestCase { @Override - protected MappedFieldType createDefaultFieldType() { - DocCountFieldMapper.DocCountFieldType fieldType = new DocCountFieldMapper.DocCountFieldType(); + protected DocCountFieldMapper.DocCountFieldType createDefaultFieldType(String name, Map meta) { + DocCountFieldMapper.DocCountFieldType fieldType = new DocCountFieldMapper.DocCountFieldType(name, true, meta); return fieldType; } } From 676ffc63133db42e8e92246c0d5f9ddc453ac944 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 30 Jun 2020 15:58:24 +0300 Subject: [PATCH 07/39] Added yaml test for doc_count field type --- .../360_doc_count_field.yml | 43 +++++++++++++++++++ .../index/mapper/DocCountFieldMapper.java | 3 +- 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml new file mode 100644 index 0000000000000..56e85a5fcef7a --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml @@ -0,0 +1,43 @@ +setup: + - do: + indices.create: + index: test_1 + body: + settings: + number_of_replicas: 0 + mappings: + properties: + doc_count: + type: doc_count + str: + type: keyword + number: + type: integer + + - do: + bulk: + index: test_1 + refresh: true + body: + - '{"index": {}}' + - '{"doc_count": 10, "str": "abc", "number" : 500 }' + - '{"index": {}}' + - '{"doc_count": 5, "str": "xyz", "number" : 1500 }' + - '{"index": {}}' + - '{"doc_count": 7, "str": "foo", "number" : 100 }' + - '{"index": {}}' + - '{"doc_count": 1, "str": "foo", "number" : 30 }' + - '{"index": {}}' + - '{"str": "abc", "number" : 500 }' +--- +"Test terms agg with doc_count": + + - do: + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str" } } } } + + - match: { hits.total: 5 } + - length: { aggregations.str_terms.buckets: 3 } + - match: { aggregations.str_terms.buckets.0.key: 'abc' } + - match: { aggregations.str_terms.buckets.0.doc_count: 11 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index ab10f78766343..f5f1bef5eeb3f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -48,7 +48,6 @@ public static class Defaults { static { FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC); FIELD_TYPE.setIndexOptions(IndexOptions.NONE); -// FIELD_TYPE.setHasDocValues(true); FIELD_TYPE.freeze(); } } @@ -85,7 +84,7 @@ public DocCountFieldType(String name) { } public DocCountFieldType(String name, boolean hasDocValues, Map meta) { - super(name, false, hasDocValues, TextSearchInfo.NONE, meta); + super(name, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); } protected DocCountFieldType(DocCountFieldType ref) { From d3b9c45788315b4e1347c069f668fc5ec97a64e3 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 30 Jun 2020 20:05:19 +0300 Subject: [PATCH 08/39] Minor changes to test --- .../search.aggregation/360_doc_count_field.yml | 13 +++++++------ .../index/mapper/DocCountFieldMapper.java | 15 +++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml index 56e85a5fcef7a..95599709112d5 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml @@ -7,12 +7,12 @@ setup: number_of_replicas: 0 mappings: properties: - doc_count: - type: doc_count str: type: keyword number: type: integer + dc: + type: doc_count - do: bulk: @@ -20,15 +20,16 @@ setup: refresh: true body: - '{"index": {}}' - - '{"doc_count": 10, "str": "abc", "number" : 500 }' + - '{"dc": 10, "str": "abc", "number" : 500 }' - '{"index": {}}' - - '{"doc_count": 5, "str": "xyz", "number" : 1500 }' + - '{"dc": 5, "str": "xyz", "number" : 1500 }' - '{"index": {}}' - - '{"doc_count": 7, "str": "foo", "number" : 100 }' + - '{"dc": 7, "str": "foo", "number" : 100 }' - '{"index": {}}' - - '{"doc_count": 1, "str": "foo", "number" : 30 }' + - '{"dc": 1, "str": "foo", "number" : 30 }' - '{"index": {}}' - '{"str": "abc", "number" : 500 }' + --- "Test terms agg with doc_count": diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index f5f1bef5eeb3f..9056c3a3dcf7b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -62,12 +62,15 @@ public Builder(String name) { @Override public DocCountFieldMapper build(BuilderContext context) { DocCountFieldType defaultFieldType = new DocCountFieldType(buildFullName(context), hasDocValues, meta); - return new DocCountFieldMapper(name, fieldType, defaultFieldType); } } public static class TypeParser implements Mapper.TypeParser { + + public TypeParser() { + } + @Override public DocCountFieldMapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { @@ -79,15 +82,15 @@ public DocCountFieldMapper.Builder parse(String name, Map node, public static final class DocCountFieldType extends MappedFieldType { - public DocCountFieldType(String name) { - this(name, true, Collections.emptyMap()); - } - public DocCountFieldType(String name, boolean hasDocValues, Map meta) { super(name, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); } - protected DocCountFieldType(DocCountFieldType ref) { + public DocCountFieldType(String name) { + this(name, true, Collections.emptyMap()); + } + + DocCountFieldType(DocCountFieldType ref) { super(ref); } From c36ecacadf05dad4ae0569ac3bb311069e66c4c0 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 2 Jul 2020 11:03:45 +0300 Subject: [PATCH 09/39] Fix issue with not-registering field mapper --- .../org/elasticsearch/index/mapper/DocCountFieldMapper.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 9056c3a3dcf7b..384de37ac6bb4 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -153,11 +153,6 @@ protected String contentType() { return CONTENT_TYPE; } - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - return builder; - } - @Override protected void mergeOptions(FieldMapper mergeWith, List conflicts) { // nothing to do From 4dca391c27d91f7edb956b22343b148ccc8c4984 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 2 Jul 2020 11:04:03 +0300 Subject: [PATCH 10/39] Simplify terms agg test --- .../test/search.aggregation/360_doc_count_field.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml index 95599709112d5..aae296906e2bb 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml @@ -22,11 +22,11 @@ setup: - '{"index": {}}' - '{"dc": 10, "str": "abc", "number" : 500 }' - '{"index": {}}' - - '{"dc": 5, "str": "xyz", "number" : 1500 }' + - '{"dc": 5, "str": "xyz", "number" : 100 }' - '{"index": {}}' - '{"dc": 7, "str": "foo", "number" : 100 }' - '{"index": {}}' - - '{"dc": 1, "str": "foo", "number" : 30 }' + - '{"dc": 1, "str": "foo", "number" : 200 }' - '{"index": {}}' - '{"str": "abc", "number" : 500 }' @@ -36,9 +36,9 @@ setup: - do: search: rest_total_hits_as_int: true - body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str" } } } } + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "number" } } } } - match: { hits.total: 5 } - length: { aggregations.str_terms.buckets: 3 } - - match: { aggregations.str_terms.buckets.0.key: 'abc' } - - match: { aggregations.str_terms.buckets.0.doc_count: 11 } + - match: { aggregations.str_terms.buckets.0.key: 100 } + - match: { aggregations.str_terms.buckets.0.doc_count: 8 } From 912d9434ba319e5f8b76b5b5f3d0ef6e818683be Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 9 Jul 2020 19:26:57 +0300 Subject: [PATCH 11/39] Add doc_count provider in the buckets aggregator --- .../360_doc_count_field.yml | 30 +++++++++-- .../index/mapper/DocCountFieldMapper.java | 1 - .../search/aggregations/AggregatorBase.java | 4 +- .../bucket/BucketsAggregator.java | 24 ++++++++- .../aggregations/bucket/DocCountProvider.java | 37 +++++++++++++ .../bucket/FieldDocCountProvider.java | 52 +++++++++++++++++++ .../bucket/nested/NestedAggregator.java | 3 +- 7 files changed, 142 insertions(+), 9 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java create mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldDocCountProvider.java diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml index aae296906e2bb..c669b8bb19c51 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml @@ -31,14 +31,36 @@ setup: - '{"str": "abc", "number" : 500 }' --- -"Test terms agg with doc_count": +"Test numeric terms agg with doc_count": - do: search: rest_total_hits_as_int: true - body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "number" } } } } + body: { "size" : 0, "aggs" : { "num_terms" : { "terms" : { "field" : "number" } } } } + + - match: { hits.total: 5 } + - length: { aggregations.num_terms.buckets: 3 } + - match: { aggregations.num_terms.buckets.0.key: 100 } + - match: { aggregations.num_terms.buckets.0.doc_count: 12 } + - match: { aggregations.num_terms.buckets.1.key: 500 } + - match: { aggregations.num_terms.buckets.1.doc_count: 11 } + - match: { aggregations.num_terms.buckets.2.key: 200 } + - match: { aggregations.num_terms.buckets.2.doc_count: 1 } + + +--- +"Test string terms agg with doc_count": + + - do: + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str" } } } } - match: { hits.total: 5 } - length: { aggregations.str_terms.buckets: 3 } - - match: { aggregations.str_terms.buckets.0.key: 100 } - - match: { aggregations.str_terms.buckets.0.doc_count: 8 } + - match: { aggregations.str_terms.buckets.0.key: "abc" } + - match: { aggregations.str_terms.buckets.0.doc_count: 11 } + - match: { aggregations.str_terms.buckets.1.key: "foo" } + - match: { aggregations.str_terms.buckets.1.doc_count: 8 } + - match: { aggregations.str_terms.buckets.2.key: "xyz" } + - match: { aggregations.str_terms.buckets.2.doc_count: 5 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 384de37ac6bb4..f71bc96ea9328 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; -import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java index 18170b8883e9f..423d8262a08f1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java @@ -157,7 +157,7 @@ public Map metadata() { @Override public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { - preGetSubLeafCollectors(); + preGetSubLeafCollectors(ctx); final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx); return getLeafCollector(ctx, sub); } @@ -166,7 +166,7 @@ public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws * Can be overridden by aggregator implementations that like the perform an operation before the leaf collectors * of children aggregators are instantiated for the next segment. */ - protected void preGetSubLeafCollectors() throws IOException { + protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { } /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 3c50fb0f64171..de42b72f2dc92 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -18,9 +18,12 @@ */ package org.elasticsearch.search.aggregations.bucket; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.index.mapper.DocCountFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorBase; @@ -51,6 +54,7 @@ public abstract class BucketsAggregator extends AggregatorBase { private final BigArrays bigArrays; private final IntConsumer multiBucketConsumer; private IntArray docCounts; + private DocCountProvider fieldDocCountProvider = DocCountProvider.DEFAULT_PROVIDER; public BucketsAggregator(String name, AggregatorFactories factories, SearchContext context, Aggregator parent, Map metadata) throws IOException { @@ -90,7 +94,8 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long * Same as {@link #collectBucket(LeafBucketCollector, int, long)}, but doesn't check if the docCounts needs to be re-sized. */ public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException { - if (docCounts.increment(bucketOrd, 1) == 1) { + int docCount = fieldDocCountProvider.getDocCount(doc); + if (docCounts.increment(bucketOrd, docCount) == 1) { // We calculate the final number of buckets only during the reduce phase. But we still need to // trigger bucket consumer from time to time in order to give it a chance to check available memory and break // the execution if we are running out. To achieve that we are passing 0 as a bucket count. @@ -391,4 +396,21 @@ public static boolean descendsFromGlobalAggregator(Aggregator parent) { return false; } + @Override + protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { + super.preGetSubLeafCollectors(ctx); + // Check index mappings to find a field of type doc_count. If one is found + // use that one to retrieve the doc count for the bucket + + // In agg tests fieldTypes is null. TODO: Fix test fixtures so that fieldTypes is not null + if (context.getQueryShardContext().getMapperService().fieldTypes() != null) { + for (MappedFieldType fieldType : context.getQueryShardContext().getMapperService().fieldTypes()) { + if (DocCountFieldMapper.CONTENT_TYPE.equals(fieldType.typeName())) { + // If a field of type doc_count has been found, use it to provide the bucket doc_count values + fieldDocCountProvider = new FieldDocCountProvider(ctx, fieldType.name()); + break; + } + } + } + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java new file mode 100644 index 0000000000000..91aa3407bcfc9 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -0,0 +1,37 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import java.io.IOException; + +/** + * Interface that allows adding custom implementations for providing + * doc_count values for a document. + */ +public interface DocCountProvider { + + /** + * Default implementation of a doc_count provider returns for every doc collected. + */ + DocCountProvider DEFAULT_PROVIDER = doc -> 1; + + /** Return the number of doc_count for a specific document */ + int getDocCount(int doc) throws IOException; +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldDocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldDocCountProvider.java new file mode 100644 index 0000000000000..ac8c5483014b8 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldDocCountProvider.java @@ -0,0 +1,52 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; + +import java.io.IOException; + +/** + * An implementation of a doc_count provider that reads the doc_count value + * in a doc value field in the document + */ +public class FieldDocCountProvider implements DocCountProvider { + + private NumericDocValues docCountValues; + + public FieldDocCountProvider(LeafReaderContext ctx, String docCountFieldName) { + try { + docCountValues = DocValues.getNumeric(ctx.reader(), docCountFieldName); + } catch (IOException e) { + docCountValues = null; + } + } + + @Override + public int getDocCount(int doc) throws IOException { + if (docCountValues != null && docCountValues.advanceExact(doc)) { + return (int) docCountValues.longValue(); + } else { + return 1; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 07390e43a667d..2f7b6cfb9fc75 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -106,7 +106,8 @@ public void collect(int parentDoc, long bucket) throws IOException { } @Override - protected void preGetSubLeafCollectors() throws IOException { + protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { + super.preGetSubLeafCollectors(ctx); processBufferedDocs(); } From c0f23ae439df00d16a62a001fd4dd32a46a6fcd2 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 14 Jul 2020 18:23:33 +0300 Subject: [PATCH 12/39] Initialize doc_count provider once doc_count provider is initialized once in the buckets agg constructor. --- .../bucket/BucketsAggregator.java | 28 ++++++++++--------- .../aggregations/bucket/DocCountProvider.java | 4 +++ ...r.java => FieldBasedDocCountProvider.java} | 24 ++++++++++------ 3 files changed, 35 insertions(+), 21 deletions(-) rename server/src/main/java/org/elasticsearch/search/aggregations/bucket/{FieldDocCountProvider.java => FieldBasedDocCountProvider.java} (77%) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 9b7280c77b045..fa5e68c7bfb49 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -67,6 +67,19 @@ public BucketsAggregator(String name, AggregatorFactories factories, SearchConte multiBucketConsumer = (count) -> {}; } docCounts = bigArrays.newIntArray(1, true); + + // Check index mappings to find a field of type doc_count. If one is found + // use that one to retrieve the doc count for the bucket + // In agg tests fieldTypes is null. TODO: Fix test fixtures so that fieldTypes is not null + if (context.getQueryShardContext().getMapperService().fieldTypes() != null) { + for (MappedFieldType fieldType : context.getQueryShardContext().getMapperService().fieldTypes()) { + if (DocCountFieldMapper.CONTENT_TYPE.equals(fieldType.typeName())) { + // If a field of type doc_count has been found, use it to provide the bucket doc_count values + fieldDocCountProvider = new FieldBasedDocCountProvider(fieldType.name()); + break; + } + } + } } /** @@ -400,18 +413,7 @@ public static boolean descendsFromGlobalAggregator(Aggregator parent) { @Override protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { super.preGetSubLeafCollectors(ctx); - // Check index mappings to find a field of type doc_count. If one is found - // use that one to retrieve the doc count for the bucket - - // In agg tests fieldTypes is null. TODO: Fix test fixtures so that fieldTypes is not null - if (context.getQueryShardContext().getMapperService().fieldTypes() != null) { - for (MappedFieldType fieldType : context.getQueryShardContext().getMapperService().fieldTypes()) { - if (DocCountFieldMapper.CONTENT_TYPE.equals(fieldType.typeName())) { - // If a field of type doc_count has been found, use it to provide the bucket doc_count values - fieldDocCountProvider = new FieldDocCountProvider(ctx, fieldType.name()); - break; - } - } - } + // Set LeafReaderContext to the field based doc_count provider + fieldDocCountProvider.setLeafReaderContext(ctx); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java index 91aa3407bcfc9..2aad03180071b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.aggregations.bucket; +import org.apache.lucene.index.LeafReaderContext; + import java.io.IOException; /** @@ -34,4 +36,6 @@ public interface DocCountProvider { /** Return the number of doc_count for a specific document */ int getDocCount(int doc) throws IOException; + + default void setLeafReaderContext(LeafReaderContext ctx) {} } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldDocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java similarity index 77% rename from server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldDocCountProvider.java rename to server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java index ac8c5483014b8..3087d0667a096 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldDocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java @@ -27,18 +27,16 @@ /** * An implementation of a doc_count provider that reads the doc_count value - * in a doc value field in the document + * in a doc value field in the document. If a document has no doc_count field + * the implementation will return 1 as the default value. */ -public class FieldDocCountProvider implements DocCountProvider { +public class FieldBasedDocCountProvider implements DocCountProvider { + private final String docCountFieldName; private NumericDocValues docCountValues; - public FieldDocCountProvider(LeafReaderContext ctx, String docCountFieldName) { - try { - docCountValues = DocValues.getNumeric(ctx.reader(), docCountFieldName); - } catch (IOException e) { - docCountValues = null; - } + public FieldBasedDocCountProvider(String docCountFieldName) { + this.docCountFieldName = docCountFieldName; } @Override @@ -49,4 +47,14 @@ public int getDocCount(int doc) throws IOException { return 1; } } + + @Override + public void setLeafReaderContext(LeafReaderContext ctx) { + try { + docCountValues = DocValues.getNumeric(ctx.reader(), docCountFieldName); + } catch (IOException e) { + docCountValues = null; + } + } + } From f7b43c11bd92d5353f3c534a1f4dca8f3b0a03f2 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Wed, 15 Jul 2020 23:03:39 +0300 Subject: [PATCH 13/39] Added tests for FieldBasedDocCountProvider --- .../bucket/BucketsAggregator.java | 13 +-- .../FieldBasedDocCountProviderTests.java | 108 ++++++++++++++++++ .../aggregations/AggregatorTestCase.java | 1 + 3 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index fa5e68c7bfb49..01d600dc3d49a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -70,14 +70,11 @@ public BucketsAggregator(String name, AggregatorFactories factories, SearchConte // Check index mappings to find a field of type doc_count. If one is found // use that one to retrieve the doc count for the bucket - // In agg tests fieldTypes is null. TODO: Fix test fixtures so that fieldTypes is not null - if (context.getQueryShardContext().getMapperService().fieldTypes() != null) { - for (MappedFieldType fieldType : context.getQueryShardContext().getMapperService().fieldTypes()) { - if (DocCountFieldMapper.CONTENT_TYPE.equals(fieldType.typeName())) { - // If a field of type doc_count has been found, use it to provide the bucket doc_count values - fieldDocCountProvider = new FieldBasedDocCountProvider(fieldType.name()); - break; - } + for (MappedFieldType fieldType : context.getQueryShardContext().getMapperService().fieldTypes()) { + if (DocCountFieldMapper.CONTENT_TYPE.equals(fieldType.typeName())) { + // If a field of type doc_count has been found, use it to provide the bucket doc_count values + fieldDocCountProvider = new FieldBasedDocCountProvider(fieldType.name()); + break; } } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java new file mode 100644 index 0000000000000..a36ab35f2b294 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java @@ -0,0 +1,108 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.index.mapper.DocCountFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal; + +import java.io.IOException; +import java.util.List; +import java.util.function.Consumer; + +import static java.util.Collections.singleton; + + +public class FieldBasedDocCountProviderTests extends AggregatorTestCase { + + private static final String DOC_COUNT_FIELD = "doc_count"; + private static final String NUMBER_FIELD = "number"; + + public void testDocsWithDocCount() throws IOException { + testAggregation(new MatchAllDocsQuery(), iw -> { + iw.addDocument(List.of( + new NumericDocValuesField(DOC_COUNT_FIELD, 4), + new SortedNumericDocValuesField(NUMBER_FIELD, 1) + )); + iw.addDocument(List.of( + new NumericDocValuesField(DOC_COUNT_FIELD, 5), + new SortedNumericDocValuesField(NUMBER_FIELD, 7) + )); + iw.addDocument(List.of( + // Intentionally omit doc_count field + new SortedNumericDocValuesField(NUMBER_FIELD, 1) + )); + }, global -> { + assertEquals(10, global.getDocCount()); + }); + } + + public void testDocsWithoutDocCount() throws IOException { + testAggregation(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 1))); + iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 7))); + iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 1))); + }, global -> { + assertEquals(3, global.getDocCount()); + }); + } + + public void testQueryFiltering() throws IOException { + testAggregation(IntPoint.newRangeQuery(NUMBER_FIELD, 4, 5), iw -> { + iw.addDocument(List.of( + new NumericDocValuesField(DOC_COUNT_FIELD, 4), + new IntPoint(NUMBER_FIELD, 6) + )); + iw.addDocument(List.of( + new NumericDocValuesField(DOC_COUNT_FIELD, 2), + new IntPoint(NUMBER_FIELD, 5) + )); + iw.addDocument(List.of( + // Intentionally omit doc_count field + new IntPoint(NUMBER_FIELD, 1) + )); + iw.addDocument(List.of( + // Intentionally omit doc_count field + new IntPoint(NUMBER_FIELD, 5) + )); + }, global -> { + assertEquals(3, global.getDocCount()); + }); + } + + private void testAggregation(Query query, + CheckedConsumer indexer, + Consumer verify) throws IOException { + GlobalAggregationBuilder aggregationBuilder = new GlobalAggregationBuilder("_name"); + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD, NumberFieldMapper.NumberType.LONG); + MappedFieldType docCountFieldType = new DocCountFieldMapper.DocCountFieldType(DOC_COUNT_FIELD); + testCase(aggregationBuilder, query, indexer, verify, fieldType, docCountFieldType); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index be8197058c6fa..29455f5f62f64 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -306,6 +306,7 @@ public boolean shouldCache(Query query) { // TODO: now just needed for top_hits, this will need to be revised for other agg unit tests: MapperService mapperService = mapperServiceMock(); + when(mapperService.fieldTypes()).thenReturn(List.of(fieldTypes)); when(mapperService.getIndexSettings()).thenReturn(indexSettings); when(mapperService.hasNested()).thenReturn(false); when(searchContext.mapperService()).thenReturn(mapperService); From 5e1b96a524df89f3897500e236f024822c3dc90a Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 16 Jul 2020 15:56:49 +0300 Subject: [PATCH 14/39] Added more tests to DocCountFieldMapper --- .../index/mapper/DocCountFieldMapper.java | 4 +- .../mapper/DocCountFieldMapperTests.java | 158 ++++++++++++++++-- 2 files changed, 150 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index f71bc96ea9328..029f0061ca8e1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -132,9 +132,9 @@ protected void parseCreateField(ParseContext context) throws IOException { } if (value != null) { - if (value.longValue() < 0 || value.floatValue() != value.longValue()) { + if (value.longValue() <= 0 || value.floatValue() != value.longValue()) { throw new IllegalArgumentException( - "Field [" + fieldType().name() + "] must always be a positive integer"); + "Field [" + fieldType().name() + "] must be a positive integer"); } final Field docCount = new NumericDocValuesField(name(), value.longValue()); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 14ed8953b369e..30d216c0da7c6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -31,12 +31,13 @@ import java.util.Collection; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.core.IsInstanceOf.instanceOf; public class DocCountFieldMapperTests extends ESSingleNodeTestCase { private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE; - private static final String FIELD_NAME = "doc_count"; + private static final String DOC_COUNT_FIELD = "doc_count"; IndexService indexService; DocumentMapperParser parser; @@ -63,7 +64,7 @@ public void testParseValue() throws Exception { .startObject() .startObject("_doc") .startObject("properties") - .startObject(FIELD_NAME) + .startObject(DOC_COUNT_FIELD) .field("type", CONTENT_TYPE) .endObject() .endObject() @@ -73,7 +74,7 @@ public void testParseValue() throws Exception { DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); - Mapper fieldMapper = defaultMapper.mappers().getMapper(FIELD_NAME); + Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); ParsedDocument doc = defaultMapper.parse( @@ -83,13 +84,13 @@ public void testParseValue() throws Exception { BytesReference.bytes( XContentFactory.jsonBuilder() .startObject() - .field(FIELD_NAME, 10) + .field(DOC_COUNT_FIELD, 10) .endObject() ), XContentType.JSON ) ); - assertEquals(10L, doc.rootDoc().getField(FIELD_NAME).numericValue()); + assertEquals(10L, doc.rootDoc().getField(DOC_COUNT_FIELD).numericValue()); } public void testReadDocCounts() throws Exception { @@ -100,7 +101,7 @@ public void testReadDocCounts() throws Exception { .startObject() .startObject("_doc") .startObject("properties") - .startObject(FIELD_NAME) + .startObject(DOC_COUNT_FIELD) .field("type", CONTENT_TYPE) .endObject() .endObject() @@ -109,8 +110,7 @@ public void testReadDocCounts() throws Exception { ); DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); - - Mapper fieldMapper = defaultMapper.mappers().getMapper(FIELD_NAME); + Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); ParsedDocument doc = defaultMapper.parse( @@ -120,14 +120,152 @@ public void testReadDocCounts() throws Exception { BytesReference.bytes( XContentFactory.jsonBuilder() .startObject() - .field(FIELD_NAME, 10) + .field(DOC_COUNT_FIELD, 10) .field("t", "5") .endObject() ), XContentType.JSON ) ); - assertEquals(10L, doc.rootDoc().getField(FIELD_NAME).numericValue()); + assertEquals(10L, doc.rootDoc().getField(DOC_COUNT_FIELD).numericValue()); + } + + /** + * Test that invalid field mapping containing more than one doc_count fields + */ + @AwaitsFix(bugUrl = "") + public void testInvalidMappingWithMultipleDocCounts() throws Exception { + ensureGreen(); + + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject(DOC_COUNT_FIELD) + .field("type", CONTENT_TYPE) + .endObject() + .startObject("another_doc_count") + .field("type", CONTENT_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + ); + + Exception e = expectThrows( + IllegalArgumentException.class, + () -> parser.parse("_doc", new CompressedXContent(mapping)) + ); + assertThat(e.getMessage(), containsString("Property [metrics] must be set for field [metric].")); } + public void testInvalidDocument_NegativeDocCount() throws Exception { + ensureGreen(); + + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject(DOC_COUNT_FIELD) + .field("type", CONTENT_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + ); + + DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); + Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); + assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); + + Exception e = expectThrows( + MapperParsingException.class, + () -> defaultMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes( + XContentFactory.jsonBuilder() + .startObject() + .field(DOC_COUNT_FIELD, -5) + .field("t", "5") + .endObject() + ), XContentType.JSON))); + assertThat(e.getCause().getMessage(), containsString("Field [doc_count] must be a positive integer")); + } + + + public void testInvalidDocument_ZeroDocCount() throws Exception { + ensureGreen(); + + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject(DOC_COUNT_FIELD) + .field("type", CONTENT_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + ); + + DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); + Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); + assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); + + Exception e = expectThrows( + MapperParsingException.class, + () -> defaultMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes( + XContentFactory.jsonBuilder() + .startObject() + .field(DOC_COUNT_FIELD, 0) + .field("t", "5") + .endObject() + ), XContentType.JSON))); + assertThat(e.getCause().getMessage(), containsString("Field [doc_count] must be a positive integer")); + } + + public void testInvalidDocument_FractionalDocCount() throws Exception { + ensureGreen(); + + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject(DOC_COUNT_FIELD) + .field("type", CONTENT_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + ); + + DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); + Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); + assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); + + Exception e = expectThrows( + MapperParsingException.class, + () -> defaultMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes( + XContentFactory.jsonBuilder() + .startObject() + .field(DOC_COUNT_FIELD, 100.23) + .field("t", "5") + .endObject() + ), XContentType.JSON))); + assertThat(e.getCause().getMessage(), containsString("Field [doc_count] must be a positive integer")); + } } From 80d832b71de217e49225dd20659e127919a772db Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 16 Jul 2020 18:05:38 +0300 Subject: [PATCH 15/39] Fixed NPE at AggregatorTestCase --- .../search/aggregations/AggregatorTestCase.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 29455f5f62f64..3e65eafd5911c 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -304,9 +304,13 @@ public boolean shouldCache(Query query) { BigArrays bigArrays = new MockBigArrays(new MockPageCacheRecycler(Settings.EMPTY), circuitBreakerService).withCircuitBreaking(); when(searchContext.bigArrays()).thenReturn(bigArrays); - // TODO: now just needed for top_hits, this will need to be revised for other agg unit tests: MapperService mapperService = mapperServiceMock(); - when(mapperService.fieldTypes()).thenReturn(List.of(fieldTypes)); + Map fieldNameToType = new HashMap<>(); + fieldNameToType.putAll(Arrays.stream(fieldTypes) + .filter(Objects::nonNull) + .collect(Collectors.toMap(MappedFieldType::name, Function.identity()))); + fieldNameToType.putAll(getFieldAliases(fieldTypes)); + when(mapperService.fieldTypes()).thenReturn(fieldNameToType.values()); when(mapperService.getIndexSettings()).thenReturn(indexSettings); when(mapperService.hasNested()).thenReturn(false); when(searchContext.mapperService()).thenReturn(mapperService); @@ -330,14 +334,8 @@ public boolean shouldCache(Query query) { } return null; }); - Map fieldNameToType = new HashMap<>(); - fieldNameToType.putAll(Arrays.stream(fieldTypes) - .filter(Objects::nonNull) - .collect(Collectors.toMap(MappedFieldType::name, Function.identity()))); - fieldNameToType.putAll(getFieldAliases(fieldTypes)); - registerFieldTypes(searchContext, mapperService, - fieldNameToType); + registerFieldTypes(searchContext, mapperService, fieldNameToType); doAnswer(invocation -> { /* Store the release-ables so we can release them at the end of the test case. This is important because aggregations don't * close their sub-aggregations. This is fairly similar to what the production code does. */ From e24d68007e0c7d5b213e1432c92187f745fbcbe2 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Fri, 17 Jul 2020 11:43:00 +0300 Subject: [PATCH 16/39] Updated branch to fix build after merge --- .../index/mapper/DocCountFieldMapper.java | 10 ------- .../index/mapper/DocCountFieldTypeTests.java | 30 ------------------- 2 files changed, 40 deletions(-) delete mode 100644 server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 029f0061ca8e1..680c54a047d6d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -89,15 +89,6 @@ public DocCountFieldType(String name) { this(name, true, Collections.emptyMap()); } - DocCountFieldType(DocCountFieldType ref) { - super(ref); - } - - @Override - public MappedFieldType clone() { - return new DocCountFieldType(this); - } - @Override public String typeName() { return CONTENT_TYPE; @@ -111,7 +102,6 @@ public Query existsQuery(QueryShardContext context) { @Override public Query termQuery(Object value, QueryShardContext context) { throw new QueryShardException(context, "Field [" + name() + " ]of type [" + CONTENT_TYPE + "] is not searchable"); - } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java deleted file mode 100644 index 6df91a239df36..0000000000000 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.mapper; - -import java.util.Map; - -public class DocCountFieldTypeTests extends FieldTypeTestCase { - - @Override - protected DocCountFieldMapper.DocCountFieldType createDefaultFieldType(String name, Map meta) { - DocCountFieldMapper.DocCountFieldType fieldType = new DocCountFieldMapper.DocCountFieldType(name, true, meta); - return fieldType; - } -} From 74c727b39af8532ec3a47e948737b1397855117f Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Fri, 17 Jul 2020 14:36:39 +0300 Subject: [PATCH 17/39] Added validation for single doc_count field Added validation to ensure only a single doc_count field exists in the mapping --- .../index/mapper/DocCountFieldMapper.java | 13 ++++++++++++- .../org/elasticsearch/index/mapper/FieldMapper.java | 8 ++++++++ .../index/mapper/MapperMergeValidator.java | 8 ++++---- .../index/mapper/MetadataFieldMapper.java | 8 -------- .../index/mapper/DocCountFieldMapperTests.java | 12 +++++++----- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 680c54a047d6d..e5838f29863e2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -124,7 +124,7 @@ protected void parseCreateField(ParseContext context) throws IOException { if (value != null) { if (value.longValue() <= 0 || value.floatValue() != value.longValue()) { throw new IllegalArgumentException( - "Field [" + fieldType().name() + "] must be a positive integer"); + "Field [" + fieldType().name() + "] must be a positive integer."); } final Field docCount = new NumericDocValuesField(name(), value.longValue()); @@ -146,4 +146,15 @@ protected String contentType() { protected void mergeOptions(FieldMapper mergeWith, List conflicts) { // nothing to do } + + public void validate(DocumentFieldMappers lookup) { + // Make sure that only one doc_count field exists in the mapping + for (Mapper mapper : lookup) { + if (typeName().equals(mapper.typeName()) && name().equals(mapper.name()) == false) { + throw new IllegalArgumentException( + "Field [" + name() + "] conflicts with field [" + mapper.name() + + "]. Only one field of type [" + typeName() + "] is allowed."); + } + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 30f38ce16d8a3..89487544522a2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -368,6 +368,14 @@ private void mergeSharedOptions(FieldMapper mergeWith, List conflicts) { } } + /** + * Called when mapping gets merged. Provides the opportunity to validate other fields a field mapper + * is supposed to work with before a mapping update is completed. + */ + public void validate(DocumentFieldMappers lookup) { + // noop by default + } + /** * Merge type-specific options and check for incompatible settings in mappings to be merged */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperMergeValidator.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperMergeValidator.java index 992f9345bb7b5..932bdd6c02ff8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperMergeValidator.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperMergeValidator.java @@ -119,7 +119,7 @@ public static void validateFieldReferences(List fieldMappers, DocumentMapper newMapper) { validateCopyTo(fieldMappers, fullPathObjectMappers, fieldTypes); validateFieldAliasTargets(fieldAliasMappers, fullPathObjectMappers); - validateMetadataFieldMappers(metadataMappers, newMapper); + validateFieldMappers(fieldMappers, newMapper); } private static void validateCopyTo(List fieldMappers, @@ -174,9 +174,9 @@ private static void validateFieldAliasTargets(List fieldAliasM } } - private static void validateMetadataFieldMappers(MetadataFieldMapper[] metadataMappers, DocumentMapper newMapper) { - for (MetadataFieldMapper metadataFieldMapper : metadataMappers) { - metadataFieldMapper.validate(newMapper.mappers()); + private static void validateFieldMappers(List fieldMappers, DocumentMapper newMapper) { + for (FieldMapper fieldMapper : fieldMappers) { + fieldMapper.validate(newMapper.mappers()); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MetadataFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/MetadataFieldMapper.java index 05b18099e7b1e..54b9aaf57027b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MetadataFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MetadataFieldMapper.java @@ -66,14 +66,6 @@ protected MetadataFieldMapper(FieldType fieldType, MappedFieldType mappedFieldTy super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); } - /** - * Called when mapping gets merged. Provides the opportunity to validate other fields a metadata field mapper - * is supposed to work with before a mapping update is completed. - */ - public void validate(DocumentFieldMappers lookup) { - // noop by default - } - /** * Called before {@link FieldMapper#parse(ParseContext)} on the {@link RootObjectMapper}. */ diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 30d216c0da7c6..57949677b3e25 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -32,6 +32,7 @@ import java.util.Collection; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.core.IsInstanceOf.instanceOf; public class DocCountFieldMapperTests extends ESSingleNodeTestCase { @@ -133,10 +134,8 @@ public void testReadDocCounts() throws Exception { /** * Test that invalid field mapping containing more than one doc_count fields */ - @AwaitsFix(bugUrl = "") public void testInvalidMappingWithMultipleDocCounts() throws Exception { ensureGreen(); - String mapping = Strings.toString( XContentFactory.jsonBuilder() .startObject() @@ -152,12 +151,15 @@ public void testInvalidMappingWithMultipleDocCounts() throws Exception { .endObject() .endObject() ); - Exception e = expectThrows( IllegalArgumentException.class, - () -> parser.parse("_doc", new CompressedXContent(mapping)) + () -> indexService.mapperService() + .merge("_doc", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE) + ); + assertThat( + e.getMessage(), + equalTo("Field [doc_count] conflicts with field [another_doc_count]. Only one field of type [doc_count] is allowed.") ); - assertThat(e.getMessage(), containsString("Property [metrics] must be set for field [metric].")); } public void testInvalidDocument_NegativeDocCount() throws Exception { From cd2c84de057f4ca4f1acc76d2a9fa583f9480099 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Fri, 17 Jul 2020 18:22:01 +0300 Subject: [PATCH 18/39] Added version skips to fix broken tests --- .../test/search.aggregation/360_doc_count_field.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml index c669b8bb19c51..958158c68f644 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml @@ -1,4 +1,7 @@ setup: + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" - do: indices.create: index: test_1 @@ -32,6 +35,9 @@ setup: --- "Test numeric terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" - do: search: @@ -50,7 +56,9 @@ setup: --- "Test string terms agg with doc_count": - + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" - do: search: rest_total_hits_as_int: true From 91246eb97b3725cd5aa281f36d1b4cf6063e8809 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Fri, 17 Jul 2020 19:11:23 +0300 Subject: [PATCH 19/39] Added documentation for doc_count --- .../mapping/types/doc-count.asciidoc | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 docs/reference/mapping/types/doc-count.asciidoc diff --git a/docs/reference/mapping/types/doc-count.asciidoc b/docs/reference/mapping/types/doc-count.asciidoc new file mode 100644 index 0000000000000..3cc2ae1a5d2d4 --- /dev/null +++ b/docs/reference/mapping/types/doc-count.asciidoc @@ -0,0 +1,123 @@ +[[doc-count]] +=== Document count data type +++++ +Doc count +++++ + +Bucket aggregations always return a field named `doc_count` showing the number of documents that were aggregated and partitioned +in each bucket. Computation of the value of `doc_count` is very simple. `doc_count` is incremented by 1 for every document collected +in each bucket. + +While this simple approach has been effective when computing aggregations over individual documents, it fails to accurately represent +documents that store pre-aggregated data (such as `histogram` or `aggregate_metric_double` fields), because one summary field represents +more than one documents. + +To allow for correct computation of the number of documents when working with pre-aggregated data, we have introduced a +field type named `doc_count`. A `doc_count` must always be a positive integer representing the number of documents aggregated in a single +summary field. + +When a field of type `doc_count` is added to any index, all bucket aggregations will respect its value and increment the bucket `doc_count` +by the value of the field. If a document does not contain any `doc_count` fields, `doc_count = 1` is implied by default. + +[IMPORTANT] +======== +* Only a single `doc_count` field is allowed per index. More than one fields of type `doc_count` are not allowed. +* A `doc_count` field can only store a single positive integer per document. Nested arrays are not allowed. +* If a document contains no `doc_count` fields, aggregators will increment by 1, which is the default behavior. +======== + +[[doc-count-example]] +==== Example + +The following <> API request creates a new index with the following field mappings: + +* `my_histogram`, a `histogram` field used to store percentile data +* `my_text`, a `keyword` field used to store a title for the histogram +* `my_doc_count`, a `doc_count` field used to store how many documents have been aggregated to produce the histogram + +[source,console] +-------------------------------------------------- +PUT my_index +{ + "mappings" : { + "properties" : { + "my_histogram" : { + "type" : "histogram" + }, + "my_doc_count" : { + "type" : "doc_count" + }, + "my_text" : { + "type" : "keyword" + } + } + } +} +-------------------------------------------------- + +The following <> API requests store pre-aggregated data for +two histograms: `histogram_1` and `histogram_2`. + +[source,console] +-------------------------------------------------- +PUT my_index/_doc/1 +{ + "my_text" : "histogram_1", + "my_histogram" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [3, 7, 23, 12, 6] + }, + "my_doc_count": 45 <1> +} + +PUT my_index/_doc/2 +{ + "my_text" : "histogram_2", + "my_histogram" : { + "values" : [0.1, 0.25, 0.35, 0.4, 0.45, 0.5], + "counts" : [8, 17, 8, 7, 6, 2] + }, + "my_doc_count": 62 <1> +} +-------------------------------------------------- +<1> Field `my_doc_count` is a positive integer storing the number of documents aggregated to produce each histogram. + +If we run the following <> on `my_index`: + +[source,console] +-------------------------------------------------- +GET /_search +{ + "aggs" : { + "histogram_title" : { + "terms" : { "field" : "my_text" } + } + } +} +-------------------------------------------------- + +We will get the following response: + +[source,console-result] +-------------------------------------------------- +{ + ... + "aggregations" : { + "histogram_title" : { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets" : [ + { + "key" : "histogram_2", + "doc_count" : 62 + }, + { + "key" : "histogram_1", + "doc_count" : 45 + } + ] + } + } +} +-------------------------------------------------- +// TESTRESPONSE[skip:test not setup] From 77aa346b114ed6b121beeb21dc028cf7c8671270 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 20 Jul 2020 18:06:47 +0300 Subject: [PATCH 20/39] Changes to address review comments: - Minor doc change - Added yml test that merges template with multiple doc_count - Changed DocCountFieldType indexing to TextSearchInfo.NONE --- .../mapping/types/doc-count.asciidoc | 2 +- .../360_doc_count_field.yml | 34 +++++++++++++++++++ .../index/mapper/DocCountFieldMapper.java | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/docs/reference/mapping/types/doc-count.asciidoc b/docs/reference/mapping/types/doc-count.asciidoc index 3cc2ae1a5d2d4..0893729c8ada7 100644 --- a/docs/reference/mapping/types/doc-count.asciidoc +++ b/docs/reference/mapping/types/doc-count.asciidoc @@ -10,7 +10,7 @@ in each bucket. While this simple approach has been effective when computing aggregations over individual documents, it fails to accurately represent documents that store pre-aggregated data (such as `histogram` or `aggregate_metric_double` fields), because one summary field represents -more than one documents. +multiple documents. To allow for correct computation of the number of documents when working with pre-aggregated data, we have introduced a field type named `doc_count`. A `doc_count` must always be a positive integer representing the number of documents aggregated in a single diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml index 958158c68f644..62938eb9c167e 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml @@ -72,3 +72,37 @@ setup: - match: { aggregations.str_terms.buckets.1.doc_count: 8 } - match: { aggregations.str_terms.buckets.2.key: "xyz" } - match: { aggregations.str_terms.buckets.2.doc_count: 5 } + + +--- +"Multiple doc_count fields from template": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + + - do: + indices.put_template: + name: test + body: + index_patterns: test-* + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + str: + type: keyword + doc_count: + type: doc_count + + - do: + catch: /.*Only one field of type \[doc_count\] is allowed.*/ + indices.create: + index: test-1 + body: + mappings: + properties: + number: + type: integer + another_doc_count: + type: doc_count diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index e5838f29863e2..eb026adca6aa2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -82,7 +82,7 @@ public DocCountFieldMapper.Builder parse(String name, Map node, public static final class DocCountFieldType extends MappedFieldType { public DocCountFieldType(String name, boolean hasDocValues, Map meta) { - super(name, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); + super(name, false, hasDocValues, TextSearchInfo.NONE, meta); } public DocCountFieldType(String name) { From 39c43a083d945758f126870cf591074a31b03aba Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 20 Jul 2020 22:54:19 +0300 Subject: [PATCH 21/39] Use _doc_count as Lucene field for doc count --- .../index/mapper/DocCountFieldMapper.java | 7 +++++-- .../aggregations/bucket/BucketsAggregator.java | 17 +++-------------- .../bucket/FieldBasedDocCountProvider.java | 8 +++++--- .../index/mapper/DocCountFieldMapperTests.java | 4 ++-- .../bucket/FieldBasedDocCountProviderTests.java | 2 +- 5 files changed, 16 insertions(+), 22 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index eb026adca6aa2..168ace35f67f4 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -40,6 +40,7 @@ public class DocCountFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "doc_count"; + public static final String CANONICAL_NAME = "_doc_count"; public static class Defaults { public static final FieldType FIELD_TYPE = new FieldType(); @@ -96,7 +97,7 @@ public String typeName() { @Override public Query existsQuery(QueryShardContext context) { - return new DocValuesFieldExistsQuery(name()); + return new DocValuesFieldExistsQuery(CANONICAL_NAME); } @Override @@ -127,7 +128,9 @@ protected void parseCreateField(ParseContext context) throws IOException { "Field [" + fieldType().name() + "] must be a positive integer."); } - final Field docCount = new NumericDocValuesField(name(), value.longValue()); + // Since we allow a single doc_count field per mapping, we can use a + // a canonical name for the Lucene field. + final Field docCount = new NumericDocValuesField(CANONICAL_NAME, value.longValue()); context.doc().add(docCount); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 01d600dc3d49a..9ad0279f6a14a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -22,8 +22,6 @@ import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.IntArray; -import org.elasticsearch.index.mapper.DocCountFieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorBase; @@ -55,7 +53,7 @@ public abstract class BucketsAggregator extends AggregatorBase { private final BigArrays bigArrays; private final IntConsumer multiBucketConsumer; private IntArray docCounts; - private DocCountProvider fieldDocCountProvider = DocCountProvider.DEFAULT_PROVIDER; + private DocCountProvider fieldDocCountProvider; public BucketsAggregator(String name, AggregatorFactories factories, SearchContext context, Aggregator parent, CardinalityUpperBound bucketCardinality, Map metadata) throws IOException { @@ -67,16 +65,7 @@ public BucketsAggregator(String name, AggregatorFactories factories, SearchConte multiBucketConsumer = (count) -> {}; } docCounts = bigArrays.newIntArray(1, true); - - // Check index mappings to find a field of type doc_count. If one is found - // use that one to retrieve the doc count for the bucket - for (MappedFieldType fieldType : context.getQueryShardContext().getMapperService().fieldTypes()) { - if (DocCountFieldMapper.CONTENT_TYPE.equals(fieldType.typeName())) { - // If a field of type doc_count has been found, use it to provide the bucket doc_count values - fieldDocCountProvider = new FieldBasedDocCountProvider(fieldType.name()); - break; - } - } + fieldDocCountProvider = new FieldBasedDocCountProvider(); } /** @@ -106,7 +95,7 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long */ public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException { int docCount = fieldDocCountProvider.getDocCount(doc); - if (docCounts.increment(bucketOrd, docCount) == 1) { + if (docCounts.increment(bucketOrd, docCount) == docCount) { // We calculate the final number of buckets only during the reduce phase. But we still need to // trigger bucket consumer from time to time in order to give it a chance to check available memory and break // the execution if we are running out. To achieve that we are passing 0 as a bucket count. diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java index 3087d0667a096..39589009d9472 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; +import org.elasticsearch.index.mapper.DocCountFieldMapper; import java.io.IOException; @@ -35,8 +36,10 @@ public class FieldBasedDocCountProvider implements DocCountProvider { private final String docCountFieldName; private NumericDocValues docCountValues; - public FieldBasedDocCountProvider(String docCountFieldName) { - this.docCountFieldName = docCountFieldName; + public FieldBasedDocCountProvider() { + // Since we allow a single doc_count field per mapping, we use a constant + // canonical name for the Lucene field. + this.docCountFieldName = DocCountFieldMapper.CANONICAL_NAME; } @Override @@ -56,5 +59,4 @@ public void setLeafReaderContext(LeafReaderContext ctx) { docCountValues = null; } } - } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 57949677b3e25..754d4d950a40f 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -91,7 +91,7 @@ public void testParseValue() throws Exception { XContentType.JSON ) ); - assertEquals(10L, doc.rootDoc().getField(DOC_COUNT_FIELD).numericValue()); + assertEquals(10L, doc.rootDoc().getField(DocCountFieldMapper.CANONICAL_NAME).numericValue()); } public void testReadDocCounts() throws Exception { @@ -128,7 +128,7 @@ public void testReadDocCounts() throws Exception { XContentType.JSON ) ); - assertEquals(10L, doc.rootDoc().getField(DOC_COUNT_FIELD).numericValue()); + assertEquals(10L, doc.rootDoc().getField(DocCountFieldMapper.CANONICAL_NAME).numericValue()); } /** diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java index a36ab35f2b294..8eb66edcdbdaf 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java @@ -42,7 +42,7 @@ public class FieldBasedDocCountProviderTests extends AggregatorTestCase { - private static final String DOC_COUNT_FIELD = "doc_count"; + private static final String DOC_COUNT_FIELD = DocCountFieldMapper.CANONICAL_NAME; private static final String NUMBER_FIELD = "number"; public void testDocsWithDocCount() throws IOException { From 8ca3fbc684c3996112a0428362b84fa53074ba0a Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 20 Jul 2020 23:46:50 +0300 Subject: [PATCH 22/39] Minor change: field rename --- .../search/aggregations/bucket/BucketsAggregator.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 9ad0279f6a14a..9382d38fd0617 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -53,7 +53,7 @@ public abstract class BucketsAggregator extends AggregatorBase { private final BigArrays bigArrays; private final IntConsumer multiBucketConsumer; private IntArray docCounts; - private DocCountProvider fieldDocCountProvider; + private DocCountProvider docCountProvider; public BucketsAggregator(String name, AggregatorFactories factories, SearchContext context, Aggregator parent, CardinalityUpperBound bucketCardinality, Map metadata) throws IOException { @@ -65,7 +65,7 @@ public BucketsAggregator(String name, AggregatorFactories factories, SearchConte multiBucketConsumer = (count) -> {}; } docCounts = bigArrays.newIntArray(1, true); - fieldDocCountProvider = new FieldBasedDocCountProvider(); + docCountProvider = new FieldBasedDocCountProvider(); } /** @@ -94,7 +94,7 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long * Same as {@link #collectBucket(LeafBucketCollector, int, long)}, but doesn't check if the docCounts needs to be re-sized. */ public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException { - int docCount = fieldDocCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); if (docCounts.increment(bucketOrd, docCount) == docCount) { // We calculate the final number of buckets only during the reduce phase. But we still need to // trigger bucket consumer from time to time in order to give it a chance to check available memory and break @@ -400,6 +400,6 @@ public static boolean descendsFromGlobalAggregator(Aggregator parent) { protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { super.preGetSubLeafCollectors(ctx); // Set LeafReaderContext to the field based doc_count provider - fieldDocCountProvider.setLeafReaderContext(ctx); + docCountProvider.setLeafReaderContext(ctx); } } From 83929cb6b67494ac039e08f16387bce20761691d Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 27 Jul 2020 15:23:31 +0300 Subject: [PATCH 23/39] Minor change to yml test. --- .../test/search.aggregation/360_doc_count_field.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml index 62938eb9c167e..7e2d1ece009dd 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml @@ -1,7 +1,4 @@ setup: - - skip: - version: " - 7.99.99" - reason: "Doc count fields are only implemented in 8.0" - do: indices.create: index: test_1 From 0a1731d1eb66bff5628bb9a4320f93757f8899bb Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Wed, 2 Sep 2020 16:21:59 +0300 Subject: [PATCH 24/39] Fix errors from merge --- .../index/mapper/DocCountFieldMapper.java | 23 +++++++++++-------- .../index/mapper/FieldMapper.java | 8 ------- .../aggregations/AggregatorTestCase.java | 9 +------- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 168ace35f67f4..3767f2f6f95da 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -135,6 +135,19 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + @Override + public ValueFetcher valueFetcher(MapperService mapperService, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + return new SourceValueFetcher(name(), mapperService, parsesArrayValue()) { + @Override + protected Object parseSourceValue(Object value) { + return value; + } + }; + } + @Override public DocCountFieldType fieldType() { return (DocCountFieldType) super.fieldType(); @@ -150,14 +163,4 @@ protected void mergeOptions(FieldMapper mergeWith, List conflicts) { // nothing to do } - public void validate(DocumentFieldMappers lookup) { - // Make sure that only one doc_count field exists in the mapping - for (Mapper mapper : lookup) { - if (typeName().equals(mapper.typeName()) && name().equals(mapper.name()) == false) { - throw new IllegalArgumentException( - "Field [" + name() + "] conflicts with field [" + mapper.name() - + "]. Only one field of type [" + typeName() + "] is allowed."); - } - } - } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 759b75d28f98b..d5e1bf5386fca 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -422,14 +422,6 @@ private void mergeSharedOptions(FieldMapper mergeWith, List conflicts) { } } - /** - * Called when mapping gets merged. Provides the opportunity to validate other fields a field mapper - * is supposed to work with before a mapping update is completed. - */ - public void validate(DocumentFieldMappers lookup) { - // noop by default - } - /** * Merge type-specific options and check for incompatible settings in mappings to be merged */ diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 0137c3acc73cd..bed4f73fdc2fa 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -333,14 +333,7 @@ public boolean shouldCache(Query query) { return null; }); - Map fieldNameToType = new HashMap<>(); - fieldNameToType.putAll(Arrays.stream(fieldTypes) - .filter(Objects::nonNull) - .collect(Collectors.toMap(MappedFieldType::name, Function.identity()))); - fieldNameToType.putAll(getFieldAliases(fieldTypes)); - - registerFieldTypes(searchContext, mapperService, - fieldNameToType); + registerFieldTypes(searchContext, mapperService, fieldNameToType); doAnswer(invocation -> { /* Store the release-ables so we can release them at the end of the test case. This is important because aggregations don't * close their sub-aggregations. This is fairly similar to what the production code does. */ From 82f092aa92c01a69231c9f1923cfe6fa45bcdd19 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Wed, 2 Sep 2020 18:53:31 +0300 Subject: [PATCH 25/39] Converted _doc_count to metadata field type --- .../index/mapper/DocCountFieldMapper.java | 99 +++++++------------ .../elasticsearch/indices/IndicesModule.java | 2 +- .../bucket/FieldBasedDocCountProvider.java | 2 +- .../mapper/DocCountFieldMapperTests.java | 10 +- .../FieldBasedDocCountProviderTests.java | 4 +- 5 files changed, 45 insertions(+), 72 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 3767f2f6f95da..2c4dcf546d394 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -19,10 +19,7 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.IndexOptions; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.xcontent.XContentParser; @@ -32,62 +29,45 @@ import java.io.IOException; import java.util.Collections; import java.util.List; -import java.util.Map; - -import static org.elasticsearch.index.mapper.TypeParsers.parseField; /** Mapper for the doc_count field. */ -public class DocCountFieldMapper extends FieldMapper { +public class DocCountFieldMapper extends MetadataFieldMapper { - public static final String CONTENT_TYPE = "doc_count"; - public static final String CANONICAL_NAME = "_doc_count"; + public static final String NAME = "_doc_count"; + public static final String CONTENT_TYPE = "_doc_count"; - public static class Defaults { - public static final FieldType FIELD_TYPE = new FieldType(); + public static final TypeParser PARSER = new ConfigurableTypeParser( + c -> new DocCountFieldMapper(), + c -> new DocCountFieldMapper.Builder()) { - static { - FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC); - FIELD_TYPE.setIndexOptions(IndexOptions.NONE); - FIELD_TYPE.freeze(); + public boolean isAllowedInSource() { + return true; } - } + }; - public static class Builder extends FieldMapper.Builder { + static class Builder extends MetadataFieldMapper.Builder { - public Builder(String name) { - super(name, Defaults.FIELD_TYPE); - builder = this; + public Builder() { + super(NAME); } @Override - public DocCountFieldMapper build(BuilderContext context) { - DocCountFieldType defaultFieldType = new DocCountFieldType(buildFullName(context), hasDocValues, meta); - return new DocCountFieldMapper(name, fieldType, defaultFieldType); - } - } - - public static class TypeParser implements Mapper.TypeParser { - - public TypeParser() { + protected List> getParameters() { + return Collections.emptyList(); } @Override - public DocCountFieldMapper.Builder parse(String name, Map node, ParserContext parserContext) - throws MapperParsingException { - DocCountFieldMapper.Builder builder = new DocCountFieldMapper.Builder(name); - parseField(builder, name, node, parserContext); - return builder; + public DocCountFieldMapper build(BuilderContext context) { + return new DocCountFieldMapper(); } } public static final class DocCountFieldType extends MappedFieldType { - public DocCountFieldType(String name, boolean hasDocValues, Map meta) { - super(name, false, hasDocValues, TextSearchInfo.NONE, meta); - } + public static final DocCountFieldType INSTANCE = new DocCountFieldType(); - public DocCountFieldType(String name) { - this(name, true, Collections.emptyMap()); + public DocCountFieldType() { + super(NAME, false, true, TextSearchInfo.NONE, Collections.emptyMap()); } @Override @@ -97,7 +77,7 @@ public String typeName() { @Override public Query existsQuery(QueryShardContext context) { - return new DocValuesFieldExistsQuery(CANONICAL_NAME); + return new DocValuesFieldExistsQuery(NAME); } @Override @@ -106,11 +86,8 @@ public Query termQuery(Object value, QueryShardContext context) { } } - protected DocCountFieldMapper( - String simpleName, - FieldType fieldType, - MappedFieldType defaultFieldType) { - super(simpleName, fieldType, defaultFieldType, MultiFields.empty(), CopyTo.empty()); + private DocCountFieldMapper() { + super(DocCountFieldType.INSTANCE); } @Override @@ -130,23 +107,26 @@ protected void parseCreateField(ParseContext context) throws IOException { // Since we allow a single doc_count field per mapping, we can use a // a canonical name for the Lucene field. - final Field docCount = new NumericDocValuesField(CANONICAL_NAME, value.longValue()); + final Field docCount = new NumericDocValuesField(NAME, value.longValue()); context.doc().add(docCount); } } @Override - public ValueFetcher valueFetcher(MapperService mapperService, String format) { - if (format != null) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); - } - return new SourceValueFetcher(name(), mapperService, parsesArrayValue()) { - @Override - protected Object parseSourceValue(Object value) { - return value; - } - }; - } + public void preParse(ParseContext context) { } + +// @Override +// public ValueFetcher valueFetcher(MapperService mapperService, String format) { +// if (format != null) { +// throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); +// } +// return new SourceValueFetcher(name(), mapperService, parsesArrayValue()) { +// @Override +// protected Object parseSourceValue(Object value) { +// return value; +// } +// }; +// } @Override public DocCountFieldType fieldType() { @@ -158,9 +138,4 @@ protected String contentType() { return CONTENT_TYPE; } - @Override - protected void mergeOptions(FieldMapper mergeWith, List conflicts) { - // nothing to do - } - } diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index fa054fadfe3ca..61c04e5a26f5c 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -125,7 +125,6 @@ public static Map getMappers(List mappe mappers.put(CompletionFieldMapper.CONTENT_TYPE, CompletionFieldMapper.PARSER); mappers.put(FieldAliasMapper.CONTENT_TYPE, new FieldAliasMapper.TypeParser()); mappers.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser()); - mappers.put(DocCountFieldMapper.CONTENT_TYPE, new DocCountFieldMapper.TypeParser()); for (MapperPlugin mapperPlugin : mapperPlugins) { for (Map.Entry entry : mapperPlugin.getMappers().entrySet()) { @@ -157,6 +156,7 @@ private static Map initBuiltInMetadataMa builtInMetadataMappers.put(NestedPathFieldMapper.NAME, NestedPathFieldMapper.PARSER); builtInMetadataMappers.put(VersionFieldMapper.NAME, VersionFieldMapper.PARSER); builtInMetadataMappers.put(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PARSER); + builtInMetadataMappers.put(DocCountFieldMapper.NAME, DocCountFieldMapper.PARSER); //_field_names must be added last so that it has a chance to see all the other mappers builtInMetadataMappers.put(FieldNamesFieldMapper.NAME, FieldNamesFieldMapper.PARSER); return Collections.unmodifiableMap(builtInMetadataMappers); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java index 39589009d9472..8f1b946c2b724 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java @@ -39,7 +39,7 @@ public class FieldBasedDocCountProvider implements DocCountProvider { public FieldBasedDocCountProvider() { // Since we allow a single doc_count field per mapping, we use a constant // canonical name for the Lucene field. - this.docCountFieldName = DocCountFieldMapper.CANONICAL_NAME; + this.docCountFieldName = DocCountFieldMapper.NAME; } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 754d4d950a40f..b1aa01b8f0850 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -38,7 +38,7 @@ public class DocCountFieldMapperTests extends ESSingleNodeTestCase { private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE; - private static final String DOC_COUNT_FIELD = "doc_count"; + private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME; IndexService indexService; DocumentMapperParser parser; @@ -65,9 +65,6 @@ public void testParseValue() throws Exception { .startObject() .startObject("_doc") .startObject("properties") - .startObject(DOC_COUNT_FIELD) - .field("type", CONTENT_TYPE) - .endObject() .endObject() .endObject() .endObject() @@ -85,13 +82,14 @@ public void testParseValue() throws Exception { BytesReference.bytes( XContentFactory.jsonBuilder() .startObject() + .field("foo", 500) .field(DOC_COUNT_FIELD, 10) .endObject() ), XContentType.JSON ) ); - assertEquals(10L, doc.rootDoc().getField(DocCountFieldMapper.CANONICAL_NAME).numericValue()); + assertEquals(10L, doc.rootDoc().getField(DocCountFieldMapper.NAME).numericValue()); } public void testReadDocCounts() throws Exception { @@ -128,7 +126,7 @@ public void testReadDocCounts() throws Exception { XContentType.JSON ) ); - assertEquals(10L, doc.rootDoc().getField(DocCountFieldMapper.CANONICAL_NAME).numericValue()); + assertEquals(10L, doc.rootDoc().getField(DocCountFieldMapper.NAME).numericValue()); } /** diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java index 8eb66edcdbdaf..1016a092fa291 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java @@ -42,7 +42,7 @@ public class FieldBasedDocCountProviderTests extends AggregatorTestCase { - private static final String DOC_COUNT_FIELD = DocCountFieldMapper.CANONICAL_NAME; + private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME; private static final String NUMBER_FIELD = "number"; public void testDocsWithDocCount() throws IOException { @@ -102,7 +102,7 @@ private void testAggregation(Query query, Consumer verify) throws IOException { GlobalAggregationBuilder aggregationBuilder = new GlobalAggregationBuilder("_name"); MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD, NumberFieldMapper.NumberType.LONG); - MappedFieldType docCountFieldType = new DocCountFieldMapper.DocCountFieldType(DOC_COUNT_FIELD); + MappedFieldType docCountFieldType = new DocCountFieldMapper.DocCountFieldType(); testCase(aggregationBuilder, query, indexer, verify, fieldType, docCountFieldType); } } From ba923596935d33177cdab550fb77d8995335fb15 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 10 Sep 2020 17:42:21 +0300 Subject: [PATCH 26/39] Throw an error if parsed value is not a number --- .../org/elasticsearch/index/mapper/DocCountFieldMapper.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 2c4dcf546d394..d54709798bedf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -96,7 +96,8 @@ protected void parseCreateField(ParseContext context) throws IOException { if (context.parser().currentToken() == XContentParser.Token.VALUE_NUMBER) { value = context.parser().numberValue().floatValue(); } else { - return; + throw new IllegalArgumentException( + "Field [" + fieldType().name() + "] must be a positive integer."); } if (value != null) { From 522c38503debd618f7fcb6dd05c3526246b91f16 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 21 Sep 2020 17:53:10 +0300 Subject: [PATCH 27/39] Make _doc_count field a metadata field --- ...ount_field.yml => 370_doc_count_field.yml} | 44 +-- .../index/mapper/DocCountFieldMapper.java | 57 ++-- .../mapper/DocCountFieldMapperTests.java | 251 ++---------------- 3 files changed, 50 insertions(+), 302 deletions(-) rename rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/{360_doc_count_field.yml => 370_doc_count_field.yml} (65%) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml similarity index 65% rename from rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml rename to rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml index 7e2d1ece009dd..b52af963041ec 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml @@ -11,8 +11,6 @@ setup: type: keyword number: type: integer - dc: - type: doc_count - do: bulk: @@ -20,13 +18,13 @@ setup: refresh: true body: - '{"index": {}}' - - '{"dc": 10, "str": "abc", "number" : 500 }' + - '{"_doc_count": 10, "str": "abc", "number" : 500 }' - '{"index": {}}' - - '{"dc": 5, "str": "xyz", "number" : 100 }' + - '{"_doc_count": 5, "str": "xyz", "number" : 100 }' - '{"index": {}}' - - '{"dc": 7, "str": "foo", "number" : 100 }' + - '{"_doc_count": 7, "str": "foo", "number" : 100 }' - '{"index": {}}' - - '{"dc": 1, "str": "foo", "number" : 200 }' + - '{"_doc_count": 1, "str": "foo", "number" : 200 }' - '{"index": {}}' - '{"str": "abc", "number" : 500 }' @@ -69,37 +67,3 @@ setup: - match: { aggregations.str_terms.buckets.1.doc_count: 8 } - match: { aggregations.str_terms.buckets.2.key: "xyz" } - match: { aggregations.str_terms.buckets.2.doc_count: 5 } - - ---- -"Multiple doc_count fields from template": - - skip: - version: " - 7.99.99" - reason: "Doc count fields are only implemented in 8.0" - - - do: - indices.put_template: - name: test - body: - index_patterns: test-* - settings: - number_of_shards: 1 - number_of_replicas: 0 - mappings: - properties: - str: - type: keyword - doc_count: - type: doc_count - - - do: - catch: /.*Only one field of type \[doc_count\] is allowed.*/ - indices.create: - index: test-1 - body: - mappings: - properties: - number: - type: integer - another_doc_count: - type: doc_count diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index d54709798bedf..78c7dfaed27bd 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; import java.util.Collections; @@ -38,12 +39,7 @@ public class DocCountFieldMapper extends MetadataFieldMapper { public static final TypeParser PARSER = new ConfigurableTypeParser( c -> new DocCountFieldMapper(), - c -> new DocCountFieldMapper.Builder()) { - - public boolean isAllowedInSource() { - return true; - } - }; + c -> new DocCountFieldMapper.Builder()); static class Builder extends MetadataFieldMapper.Builder { @@ -92,42 +88,37 @@ private DocCountFieldMapper() { @Override protected void parseCreateField(ParseContext context) throws IOException { - Number value; if (context.parser().currentToken() == XContentParser.Token.VALUE_NUMBER) { - value = context.parser().numberValue().floatValue(); - } else { - throw new IllegalArgumentException( - "Field [" + fieldType().name() + "] must be a positive integer."); - } + Number value = context.parser().numberValue().floatValue(); - if (value != null) { - if (value.longValue() <= 0 || value.floatValue() != value.longValue()) { - throw new IllegalArgumentException( - "Field [" + fieldType().name() + "] must be a positive integer."); - } + if (value != null) { + if (value.longValue() <= 0 || value.floatValue() != value.longValue()) { + throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer."); + } - // Since we allow a single doc_count field per mapping, we can use a - // a canonical name for the Lucene field. - final Field docCount = new NumericDocValuesField(NAME, value.longValue()); - context.doc().add(docCount); + final Field docCount = new NumericDocValuesField(NAME, value.longValue()); + context.doc().add(docCount); + } + } else { + throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer."); } } @Override public void preParse(ParseContext context) { } -// @Override -// public ValueFetcher valueFetcher(MapperService mapperService, String format) { -// if (format != null) { -// throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); -// } -// return new SourceValueFetcher(name(), mapperService, parsesArrayValue()) { -// @Override -// protected Object parseSourceValue(Object value) { -// return value; -// } -// }; -// } + @Override + public ValueFetcher valueFetcher(MapperService mapperService, SearchLookup lookup, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + return new SourceValueFetcher(name(), mapperService, parsesArrayValue()) { + @Override + protected Object parseSourceValue(Object value) { + return value; + } + }; + } @Override public DocCountFieldType fieldType() { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index b1aa01b8f0850..b69b0592e3084 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -18,254 +18,47 @@ */ package org.elasticsearch.index.mapper; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.compress.CompressedXContent; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.IndexService; -import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.test.ESSingleNodeTestCase; -import org.elasticsearch.test.InternalSettingsPlugin; -import org.junit.Before; - -import java.util.Collection; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexableField; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.core.IsInstanceOf.instanceOf; -public class DocCountFieldMapperTests extends ESSingleNodeTestCase { +public class DocCountFieldMapperTests extends MapperServiceTestCase { private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE; private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME; - IndexService indexService; - DocumentMapperParser parser; - - @Override - protected Collection> getPlugins() { - return pluginList(InternalSettingsPlugin.class); - } - - @Before - public void setup() { - indexService = createIndex("test"); - parser = indexService.mapperService().documentMapperParser(); - } - /** * Test parsing field mapping and adding simple field */ public void testParseValue() throws Exception { - ensureGreen(); - - String mapping = Strings.toString( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .endObject() - .endObject() - .endObject() - ); - - DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); - - Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); - assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); - - ParsedDocument doc = defaultMapper.parse( - new SourceToParse( - "test", - "1", - BytesReference.bytes( - XContentFactory.jsonBuilder() - .startObject() - .field("foo", 500) - .field(DOC_COUNT_FIELD, 10) - .endObject() - ), - XContentType.JSON - ) - ); - assertEquals(10L, doc.rootDoc().getField(DocCountFieldMapper.NAME).numericValue()); - } - - public void testReadDocCounts() throws Exception { - ensureGreen(); - - String mapping = Strings.toString( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject(DOC_COUNT_FIELD) - .field("type", CONTENT_TYPE) - .endObject() - .endObject() - .endObject() - .endObject() - ); - - DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); - Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); - assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); - - ParsedDocument doc = defaultMapper.parse( - new SourceToParse( - "test", - "1", - BytesReference.bytes( - XContentFactory.jsonBuilder() - .startObject() - .field(DOC_COUNT_FIELD, 10) - .field("t", "5") - .endObject() - ), - XContentType.JSON - ) - ); - assertEquals(10L, doc.rootDoc().getField(DocCountFieldMapper.NAME).numericValue()); - } - - /** - * Test that invalid field mapping containing more than one doc_count fields - */ - public void testInvalidMappingWithMultipleDocCounts() throws Exception { - ensureGreen(); - String mapping = Strings.toString( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject(DOC_COUNT_FIELD) - .field("type", CONTENT_TYPE) - .endObject() - .startObject("another_doc_count") - .field("type", CONTENT_TYPE) - .endObject() - .endObject() - .endObject() - .endObject() - ); - Exception e = expectThrows( - IllegalArgumentException.class, - () -> indexService.mapperService() - .merge("_doc", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE) - ); - assertThat( - e.getMessage(), - equalTo("Field [doc_count] conflicts with field [another_doc_count]. Only one field of type [doc_count] is allowed.") - ); + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + ParsedDocument doc = mapper.parse(source(b -> + b.field("foo", 500) + .field(CONTENT_TYPE, 100) + )); + + IndexableField field = doc.rootDoc().getField(DOC_COUNT_FIELD); + assertEquals(100L, field.numericValue()); + assertEquals(DocValuesType.NUMERIC, field.fieldType().docValuesType()); + assertEquals(1, doc.rootDoc().getFields(DOC_COUNT_FIELD).length); } public void testInvalidDocument_NegativeDocCount() throws Exception { - ensureGreen(); - - String mapping = Strings.toString( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject(DOC_COUNT_FIELD) - .field("type", CONTENT_TYPE) - .endObject() - .endObject() - .endObject() - .endObject() - ); - - DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); - Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); - assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); - - Exception e = expectThrows( - MapperParsingException.class, - () -> defaultMapper.parse( - new SourceToParse( - "test", - "1", - BytesReference.bytes( - XContentFactory.jsonBuilder() - .startObject() - .field(DOC_COUNT_FIELD, -5) - .field("t", "5") - .endObject() - ), XContentType.JSON))); - assertThat(e.getCause().getMessage(), containsString("Field [doc_count] must be a positive integer")); + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, -100)))); + assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer")); } - public void testInvalidDocument_ZeroDocCount() throws Exception { - ensureGreen(); - - String mapping = Strings.toString( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject(DOC_COUNT_FIELD) - .field("type", CONTENT_TYPE) - .endObject() - .endObject() - .endObject() - .endObject() - ); - - DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); - Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); - assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); - - Exception e = expectThrows( - MapperParsingException.class, - () -> defaultMapper.parse( - new SourceToParse( - "test", - "1", - BytesReference.bytes( - XContentFactory.jsonBuilder() - .startObject() - .field(DOC_COUNT_FIELD, 0) - .field("t", "5") - .endObject() - ), XContentType.JSON))); - assertThat(e.getCause().getMessage(), containsString("Field [doc_count] must be a positive integer")); + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 0)))); + assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer")); } public void testInvalidDocument_FractionalDocCount() throws Exception { - ensureGreen(); - - String mapping = Strings.toString( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject(DOC_COUNT_FIELD) - .field("type", CONTENT_TYPE) - .endObject() - .endObject() - .endObject() - .endObject() - ); - - DocumentMapper defaultMapper = parser.parse("_doc", new CompressedXContent(mapping)); - Mapper fieldMapper = defaultMapper.mappers().getMapper(DOC_COUNT_FIELD); - assertThat(fieldMapper, instanceOf(DocCountFieldMapper.class)); - - Exception e = expectThrows( - MapperParsingException.class, - () -> defaultMapper.parse( - new SourceToParse( - "test", - "1", - BytesReference.bytes( - XContentFactory.jsonBuilder() - .startObject() - .field(DOC_COUNT_FIELD, 100.23) - .field("t", "5") - .endObject() - ), XContentType.JSON))); - assertThat(e.getCause().getMessage(), containsString("Field [doc_count] must be a positive integer")); + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23)))); + assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer")); } } From df2a2ebb75b7ca92e862f3f15e180192cf991d13 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 21 Sep 2020 18:26:35 +0300 Subject: [PATCH 28/39] Fixed broken tests --- .../org/elasticsearch/index/mapper/DocCountFieldMapper.java | 2 +- .../java/org/elasticsearch/indices/IndicesModuleTests.java | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 78c7dfaed27bd..bd327f35d74f4 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -43,7 +43,7 @@ public class DocCountFieldMapper extends MetadataFieldMapper { static class Builder extends MetadataFieldMapper.Builder { - public Builder() { + Builder() { super(NAME); } diff --git a/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java b/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java index 5753ac7d5cee0..fb23e1247c265 100644 --- a/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java +++ b/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.indices; import org.elasticsearch.Version; +import org.elasticsearch.index.mapper.DocCountFieldMapper; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.IgnoredFieldMapper; @@ -77,7 +78,8 @@ public Map getMetadataMappers() { private static final String[] EXPECTED_METADATA_FIELDS = new String[]{ IgnoredFieldMapper.NAME, IdFieldMapper.NAME, RoutingFieldMapper.NAME, IndexFieldMapper.NAME, SourceFieldMapper.NAME, TypeFieldMapper.NAME, - NestedPathFieldMapper.NAME, VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, FieldNamesFieldMapper.NAME }; + NestedPathFieldMapper.NAME, VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, DocCountFieldMapper.NAME, + FieldNamesFieldMapper.NAME }; public void testBuiltinMappers() { IndicesModule module = new IndicesModule(Collections.emptyList()); From 838436f85d884feaffa48657bdb973e8011d64b1 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 22 Sep 2020 18:11:30 +0300 Subject: [PATCH 29/39] Fix bug in low cardinality ordinal terms aggs --- .../370_doc_count_field.yml | 38 ++++++++++++++++--- .../bucket/BucketsAggregator.java | 2 +- .../GlobalOrdinalsStringTermsAggregator.java | 7 +++- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml index b52af963041ec..f4a69de1c2e7b 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml @@ -18,15 +18,15 @@ setup: refresh: true body: - '{"index": {}}' - - '{"_doc_count": 10, "str": "abc", "number" : 500 }' + - '{"_doc_count": 10, "str": "abc", "number" : 500, "unmapped": "abc" }' - '{"index": {}}' - - '{"_doc_count": 5, "str": "xyz", "number" : 100 }' + - '{"_doc_count": 5, "str": "xyz", "number" : 100, "unmapped": "xyz" }' - '{"index": {}}' - - '{"_doc_count": 7, "str": "foo", "number" : 100 }' + - '{"_doc_count": 7, "str": "foo", "number" : 100, "unmapped": "foo" }' - '{"index": {}}' - - '{"_doc_count": 1, "str": "foo", "number" : 200 }' + - '{"_doc_count": 1, "str": "foo", "number" : 200, "unmapped": "foo" }' - '{"index": {}}' - - '{"str": "abc", "number" : 500 }' + - '{"str": "abc", "number" : 500, "unmapped": "abc" }' --- "Test numeric terms agg with doc_count": @@ -50,7 +50,7 @@ setup: --- -"Test string terms agg with doc_count": +"Test keyword terms agg with doc_count": - skip: version: " - 7.99.99" reason: "Doc count fields are only implemented in 8.0" @@ -67,3 +67,29 @@ setup: - match: { aggregations.str_terms.buckets.1.doc_count: 8 } - match: { aggregations.str_terms.buckets.2.key: "xyz" } - match: { aggregations.str_terms.buckets.2.doc_count: 5 } + +--- + +"Test unmapped string terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"_doc_count": 10, "str": "abc" }' + - '{"index": {}}' + - '{"str": "abc" }' + - do: + search: + index: test_2 + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str.keyword" } } } } + + - match: { hits.total: 2 } + - length: { aggregations.str_terms.buckets: 1 } + - match: { aggregations.str_terms.buckets.0.key: "abc" } + - match: { aggregations.str_terms.buckets.0.doc_count: 11 } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 3898028a02471..adffcb96f92af 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -54,7 +54,7 @@ public abstract class BucketsAggregator extends AggregatorBase { private final BigArrays bigArrays; private final IntConsumer multiBucketConsumer; private IntArray docCounts; - private DocCountProvider docCountProvider; + protected final DocCountProvider docCountProvider; public BucketsAggregator(String name, AggregatorFactories factories, SearchContext context, Aggregator parent, CardinalityUpperBound bucketCardinality, Map metadata) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 42c549f0ba839..eeddf137a60d1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -305,6 +305,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol assert sub == LeafBucketCollector.NO_OP_COLLECTOR; final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds); mapping = valuesSource.globalOrdinalsMapping(ctx); + docCountProvider.setLeafReaderContext(ctx); // Dense mode doesn't support include/exclude so we don't have to check it here. if (singleValues != null) { segmentsWithSingleValuedOrds++; @@ -316,7 +317,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } int ord = singleValues.ordValue(); - segmentDocCounts.increment(ord + 1, 1); + int docCount = docCountProvider.getDocCount(doc); + segmentDocCounts.increment(ord + 1, docCount); } }); } @@ -329,7 +331,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) { - segmentDocCounts.increment(segmentOrd + 1, 1); + int docCount = docCountProvider.getDocCount(doc); + segmentDocCounts.increment(segmentOrd + 1, docCount); } } }); From 4a92c8054909eea07983298a2fa085c9ab909c62 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 22 Sep 2020 18:43:44 +0300 Subject: [PATCH 30/39] Update docs that _doc_count is a metadata field --- docs/reference/mapping/fields.asciidoc | 6 +++ .../doc-count-field.asciidoc} | 41 ++++++++----------- 2 files changed, 24 insertions(+), 23 deletions(-) rename docs/reference/mapping/{types/doc-count.asciidoc => fields/doc-count-field.asciidoc} (63%) diff --git a/docs/reference/mapping/fields.asciidoc b/docs/reference/mapping/fields.asciidoc index df9dbb376b5eb..3559483132dfe 100644 --- a/docs/reference/mapping/fields.asciidoc +++ b/docs/reference/mapping/fields.asciidoc @@ -59,6 +59,11 @@ some of these metadata fields can be customized when a mapping type is created. Application specific metadata. +=== Doc count metadata field + +<>:: + + A custom field used for storing doc counts when a document represents pre-aggregated data. include::fields/field-names-field.asciidoc[] @@ -76,3 +81,4 @@ include::fields/source-field.asciidoc[] include::fields/type-field.asciidoc[] +include::fields/doc-count-field.asciidoc[] diff --git a/docs/reference/mapping/types/doc-count.asciidoc b/docs/reference/mapping/fields/doc-count-field.asciidoc similarity index 63% rename from docs/reference/mapping/types/doc-count.asciidoc rename to docs/reference/mapping/fields/doc-count-field.asciidoc index 0893729c8ada7..52eb8ea52a71f 100644 --- a/docs/reference/mapping/types/doc-count.asciidoc +++ b/docs/reference/mapping/fields/doc-count-field.asciidoc @@ -1,39 +1,37 @@ -[[doc-count]] -=== Document count data type +[[mapping-doc-count-field]] +=== `_doc_count` data type ++++ -Doc count +_doc_count ++++ Bucket aggregations always return a field named `doc_count` showing the number of documents that were aggregated and partitioned in each bucket. Computation of the value of `doc_count` is very simple. `doc_count` is incremented by 1 for every document collected in each bucket. -While this simple approach has been effective when computing aggregations over individual documents, it fails to accurately represent -documents that store pre-aggregated data (such as `histogram` or `aggregate_metric_double` fields), because one summary field represents -multiple documents. +While this simple approach is effective when computing aggregations over individual documents, it fails to accurately represent +documents that store pre-aggregated data (such as `histogram` or `aggregate_metric_double` fields), because one summary field may +represent multiple documents. To allow for correct computation of the number of documents when working with pre-aggregated data, we have introduced a -field type named `doc_count`. A `doc_count` must always be a positive integer representing the number of documents aggregated in a single -summary field. +metadata field type named `_doc_count`. `_doc_count` must always be a positive integer representing the number of documents +aggregated in a single summary field. -When a field of type `doc_count` is added to any index, all bucket aggregations will respect its value and increment the bucket `doc_count` -by the value of the field. If a document does not contain any `doc_count` fields, `doc_count = 1` is implied by default. +When field `_doc_count` is added to a document, all bucket aggregations will respect its value and increment the bucket `doc_count` +by the value of the field. If a document does not contain any `_doc_count` field, `_doc_count = 1` is implied by default. [IMPORTANT] ======== -* Only a single `doc_count` field is allowed per index. More than one fields of type `doc_count` are not allowed. -* A `doc_count` field can only store a single positive integer per document. Nested arrays are not allowed. -* If a document contains no `doc_count` fields, aggregators will increment by 1, which is the default behavior. +* A `_doc_count` field can only store a single positive integer per document. Nested arrays are not allowed. +* If a document contains no `_doc_count` fields, aggregators will increment by 1, which is the default behavior. ======== -[[doc-count-example]] +[[mapping-doc-count-field-example]] ==== Example The following <> API request creates a new index with the following field mappings: * `my_histogram`, a `histogram` field used to store percentile data * `my_text`, a `keyword` field used to store a title for the histogram -* `my_doc_count`, a `doc_count` field used to store how many documents have been aggregated to produce the histogram [source,console] -------------------------------------------------- @@ -44,9 +42,6 @@ PUT my_index "my_histogram" : { "type" : "histogram" }, - "my_doc_count" : { - "type" : "doc_count" - }, "my_text" : { "type" : "keyword" } @@ -67,7 +62,7 @@ PUT my_index/_doc/1 "values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 6] }, - "my_doc_count": 45 <1> + "_doc_count": 45 <1> } PUT my_index/_doc/2 @@ -77,10 +72,10 @@ PUT my_index/_doc/2 "values" : [0.1, 0.25, 0.35, 0.4, 0.45, 0.5], "counts" : [8, 17, 8, 7, 6, 2] }, - "my_doc_count": 62 <1> + "_doc_count_": 62 <1> } -------------------------------------------------- -<1> Field `my_doc_count` is a positive integer storing the number of documents aggregated to produce each histogram. +<1> Field `_doc_count` must be a positive integer storing the number of documents aggregated to produce each histogram. If we run the following <> on `my_index`: @@ -89,7 +84,7 @@ If we run the following < Date: Wed, 23 Sep 2020 13:04:55 +0300 Subject: [PATCH 31/39] Fix broken ML tests --- .../xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java index 4f8731f72ad65..4fcfeed4804d3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java @@ -59,7 +59,7 @@ public class ExtractedFieldsDetector { */ private static final List IGNORE_FIELDS = Arrays.asList("_id", "_field_names", "_index", "_parent", "_routing", "_seq_no", "_source", "_type", "_uid", "_version", "_feature", "_ignored", "_nested_path", DestinationIndex.INCREMENTAL_ID, - "_data_stream_timestamp"); + "_data_stream_timestamp", "_doc_count"); private final DataFrameAnalyticsConfig config; private final int docValueFieldsLimit; From 23e4b30d3da5cbb902f3767b918464cb16bf5b02 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Wed, 23 Sep 2020 16:31:47 +0300 Subject: [PATCH 32/39] Fix errors after merge --- .../org/elasticsearch/index/mapper/DocCountFieldMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index bd327f35d74f4..74188d396bd1b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -63,7 +63,7 @@ public static final class DocCountFieldType extends MappedFieldType { public static final DocCountFieldType INSTANCE = new DocCountFieldType(); public DocCountFieldType() { - super(NAME, false, true, TextSearchInfo.NONE, Collections.emptyMap()); + super(NAME, false, false, true, TextSearchInfo.NONE, Collections.emptyMap()); } @Override From b25865318495e96740bfe228eee7040aa21b91d2 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Fri, 2 Oct 2020 22:12:49 +0300 Subject: [PATCH 33/39] Addressed review comments --- .../index/mapper/DocCountFieldMapper.java | 5 +-- .../bucket/BucketsAggregator.java | 2 +- .../aggregations/bucket/DocCountProvider.java | 41 ------------------- .../bucket/FieldBasedDocCountProvider.java | 4 +- .../GlobalOrdinalsStringTermsAggregator.java | 2 +- .../mapper/DocCountFieldMapperTests.java | 2 +- 6 files changed, 6 insertions(+), 50 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 74188d396bd1b..a0ad35b286329 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -89,13 +89,12 @@ private DocCountFieldMapper() { @Override protected void parseCreateField(ParseContext context) throws IOException { if (context.parser().currentToken() == XContentParser.Token.VALUE_NUMBER) { - Number value = context.parser().numberValue().floatValue(); + Long value = context.parser().longValue(false); if (value != null) { - if (value.longValue() <= 0 || value.floatValue() != value.longValue()) { + if (value.longValue() <= 0) { throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer."); } - final Field docCount = new NumericDocValuesField(NAME, value.longValue()); context.doc().add(docCount); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index adffcb96f92af..16aeede42027c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -54,7 +54,7 @@ public abstract class BucketsAggregator extends AggregatorBase { private final BigArrays bigArrays; private final IntConsumer multiBucketConsumer; private IntArray docCounts; - protected final DocCountProvider docCountProvider; + protected final FieldBasedDocCountProvider docCountProvider; public BucketsAggregator(String name, AggregatorFactories factories, SearchContext context, Aggregator parent, CardinalityUpperBound bucketCardinality, Map metadata) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java deleted file mode 100644 index 2aad03180071b..0000000000000 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search.aggregations.bucket; - -import org.apache.lucene.index.LeafReaderContext; - -import java.io.IOException; - -/** - * Interface that allows adding custom implementations for providing - * doc_count values for a document. - */ -public interface DocCountProvider { - - /** - * Default implementation of a doc_count provider returns for every doc collected. - */ - DocCountProvider DEFAULT_PROVIDER = doc -> 1; - - /** Return the number of doc_count for a specific document */ - int getDocCount(int doc) throws IOException; - - default void setLeafReaderContext(LeafReaderContext ctx) {} -} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java index 8f1b946c2b724..aeab1ab1758df 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java @@ -31,7 +31,7 @@ * in a doc value field in the document. If a document has no doc_count field * the implementation will return 1 as the default value. */ -public class FieldBasedDocCountProvider implements DocCountProvider { +public class FieldBasedDocCountProvider { private final String docCountFieldName; private NumericDocValues docCountValues; @@ -42,7 +42,6 @@ public FieldBasedDocCountProvider() { this.docCountFieldName = DocCountFieldMapper.NAME; } - @Override public int getDocCount(int doc) throws IOException { if (docCountValues != null && docCountValues.advanceExact(doc)) { return (int) docCountValues.longValue(); @@ -51,7 +50,6 @@ public int getDocCount(int doc) throws IOException { } } - @Override public void setLeafReaderContext(LeafReaderContext ctx) { try { docCountValues = DocValues.getNumeric(ctx.reader(), docCountFieldName); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index eeddf137a60d1..af0ef9b29e7e7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -305,7 +305,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol assert sub == LeafBucketCollector.NO_OP_COLLECTOR; final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds); mapping = valuesSource.globalOrdinalsMapping(ctx); - docCountProvider.setLeafReaderContext(ctx); +// docCountProvider.setLeafReaderContext(ctx); // Dense mode doesn't support include/exclude so we don't have to check it here. if (singleValues != null) { segmentsWithSingleValuedOrds++; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index b69b0592e3084..62844ee93c546 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -59,6 +59,6 @@ public void testInvalidDocument_ZeroDocCount() throws Exception { public void testInvalidDocument_FractionalDocCount() throws Exception { DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23)))); - assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer")); + assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Long without data loss")); } } From f5ed1df579c55b65bb63317934e1d6877a28f1ba Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 19 Oct 2020 16:10:04 +0300 Subject: [PATCH 34/39] Addressed reviewer comments --- docs/reference/mapping/fields.asciidoc | 4 +-- .../index/mapper/DocCountFieldMapper.java | 19 ++++++-------- .../bucket/BucketsAggregator.java | 26 +++++++++---------- .../bucket/FieldBasedDocCountProvider.java | 6 ++--- .../adjacency/AdjacencyMatrixAggregator.java | 4 +-- .../GlobalOrdinalsStringTermsAggregator.java | 12 ++++----- .../mapper/DocCountFieldMapperTests.java | 7 +++++ 7 files changed, 40 insertions(+), 38 deletions(-) diff --git a/docs/reference/mapping/fields.asciidoc b/docs/reference/mapping/fields.asciidoc index 3559483132dfe..22ad75eb7e5d5 100644 --- a/docs/reference/mapping/fields.asciidoc +++ b/docs/reference/mapping/fields.asciidoc @@ -65,6 +65,8 @@ some of these metadata fields can be customized when a mapping type is created. A custom field used for storing doc counts when a document represents pre-aggregated data. +include::fields/doc-count-field.asciidoc[] + include::fields/field-names-field.asciidoc[] include::fields/ignored-field.asciidoc[] @@ -80,5 +82,3 @@ include::fields/routing-field.asciidoc[] include::fields/source-field.asciidoc[] include::fields/type-field.asciidoc[] - -include::fields/doc-count-field.asciidoc[] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index a0ad35b286329..838d3d07a5b83 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.search.lookup.SearchLookup; @@ -88,19 +89,15 @@ private DocCountFieldMapper() { @Override protected void parseCreateField(ParseContext context) throws IOException { - if (context.parser().currentToken() == XContentParser.Token.VALUE_NUMBER) { - Long value = context.parser().longValue(false); - - if (value != null) { - if (value.longValue() <= 0) { - throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer."); - } - final Field docCount = new NumericDocValuesField(NAME, value.longValue()); - context.doc().add(docCount); - } - } else { + XContentParser parser = context.parser(); + XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, parser.currentToken(), parser); + + long value = parser.longValue(false); + if (value <= 0) { throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer."); } + final Field docCount = new NumericDocValuesField(NAME, value); + context.doc().add(docCount); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 16aeede42027c..1b7a877a30d16 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -21,7 +21,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorBase; @@ -53,7 +53,7 @@ public abstract class BucketsAggregator extends AggregatorBase { private final BigArrays bigArrays; private final IntConsumer multiBucketConsumer; - private IntArray docCounts; + private LongArray docCounts; protected final FieldBasedDocCountProvider docCountProvider; public BucketsAggregator(String name, AggregatorFactories factories, SearchContext context, Aggregator parent, @@ -65,7 +65,7 @@ public BucketsAggregator(String name, AggregatorFactories factories, SearchConte } else { multiBucketConsumer = (count) -> {}; } - docCounts = bigArrays.newIntArray(1, true); + docCounts = bigArrays.newLongArray(1, true); docCountProvider = new FieldBasedDocCountProvider(); } @@ -95,7 +95,7 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long * Same as {@link #collectBucket(LeafBucketCollector, int, long)}, but doesn't check if the docCounts needs to be re-sized. */ public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException { - int docCount = docCountProvider.getDocCount(doc); + long docCount = docCountProvider.getDocCount(doc); if (docCounts.increment(bucketOrd, docCount) == docCount) { // We calculate the final number of buckets only during the reduce phase. But we still need to // trigger bucket consumer from time to time in order to give it a chance to check available memory and break @@ -127,11 +127,11 @@ public final void mergeBuckets(long[] mergeMap, long newNumBuckets) { * merge the actual ordinals and doc ID deltas. */ public final void mergeBuckets(long newNumBuckets, LongUnaryOperator mergeMap){ - try (IntArray oldDocCounts = docCounts) { - docCounts = bigArrays.newIntArray(newNumBuckets, true); + try (LongArray oldDocCounts = docCounts) { + docCounts = bigArrays.newLongArray(newNumBuckets, true); docCounts.fill(0, newNumBuckets, 0); for (long i = 0; i < oldDocCounts.size(); i++) { - int docCount = oldDocCounts.get(i); + long docCount = oldDocCounts.get(i); if(docCount == 0) continue; @@ -144,14 +144,14 @@ public final void mergeBuckets(long newNumBuckets, LongUnaryOperator mergeMap){ } } - public IntArray getDocCounts() { + public LongArray getDocCounts() { return docCounts; } /** * Utility method to increment the doc counts of the given bucket (identified by the bucket ordinal) */ - public final void incrementBucketDocCount(long bucketOrd, int inc) { + public final void incrementBucketDocCount(long bucketOrd, long inc) { docCounts = bigArrays.grow(docCounts, bucketOrd + 1); docCounts.increment(bucketOrd, inc); } @@ -159,7 +159,7 @@ public final void incrementBucketDocCount(long bucketOrd, int inc) { /** * Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal) */ - public final int bucketDocCount(long bucketOrd) { + public final long bucketDocCount(long bucketOrd) { if (bucketOrd >= docCounts.size()) { // This may happen eg. if no document in the highest buckets is accepted by a sub aggregator. // For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value @@ -299,7 +299,7 @@ protected final InternalAggregation[] buildAggregationsForFixedBucketCount(l } @FunctionalInterface protected interface BucketBuilderForFixedCount { - B build(int offsetInOwningOrd, int docCount, InternalAggregations subAggregationResults); + B build(int offsetInOwningOrd, long docCount, InternalAggregations subAggregationResults); } /** @@ -370,7 +370,7 @@ protected final InternalAggregation[] buildAggregationsForVariableBuckets(lo } @FunctionalInterface protected interface BucketBuilderForVariable { - B build(long bucketValue, int docCount, InternalAggregations subAggregationResults); + B build(long bucketValue, long docCount, InternalAggregations subAggregationResults); } @FunctionalInterface protected interface ResultBuilderForVariable { @@ -398,7 +398,7 @@ public BucketComparator bucketComparator(String key, SortOrder order) { return super.bucketComparator(key, order); } if (key == null || "doc_count".equals(key)) { - return (lhs, rhs) -> order.reverseMul() * Integer.compare(bucketDocCount(lhs), bucketDocCount(rhs)); + return (lhs, rhs) -> order.reverseMul() * Long.compare(bucketDocCount(lhs), bucketDocCount(rhs)); } throw new IllegalArgumentException("Ordering on a single-bucket aggregation can only be done on its doc_count. " + "Either drop the key (a la \"" + name() + "\") or change it to \"doc_count\" (a la \"" + name() + diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java index aeab1ab1758df..3e91a3cc3a254 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java @@ -42,11 +42,11 @@ public FieldBasedDocCountProvider() { this.docCountFieldName = DocCountFieldMapper.NAME; } - public int getDocCount(int doc) throws IOException { + public long getDocCount(int doc) throws IOException { if (docCountValues != null && docCountValues.advanceExact(doc)) { - return (int) docCountValues.longValue(); + return docCountValues.longValue(); } else { - return 1; + return 1L; } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java index afc1bea27c2f3..c12a6f002b418 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java @@ -200,7 +200,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I List buckets = new ArrayList<>(filters.length); for (int i = 0; i < keys.length; i++) { long bucketOrd = bucketOrd(owningBucketOrds[owningBucketOrdIdx], i); - int docCount = bucketDocCount(bucketOrd); + long docCount = bucketDocCount(bucketOrd); // Empty buckets are not returned because this aggregation will commonly be used under a // a date-histogram where we will look for transactions over time and can expect many // empty buckets. @@ -214,7 +214,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I for (int i = 0; i < keys.length; i++) { for (int j = i + 1; j < keys.length; j++) { long bucketOrd = bucketOrd(owningBucketOrds[owningBucketOrdIdx], pos); - int docCount = bucketDocCount(bucketOrd); + long docCount = bucketDocCount(bucketOrd); // Empty buckets are not returned due to potential for very sparse matrices if (docCount > 0) { String intersectKey = keys[i] + separator + keys[j]; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index af0ef9b29e7e7..bfd1ea455cef5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -30,7 +30,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; -import org.elasticsearch.common.util.IntArray; import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.LongHash; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -272,7 +271,7 @@ protected void doClose() { static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { private LongUnaryOperator mapping; - private IntArray segmentDocCounts; + private LongArray segmentDocCounts; LowCardinality( String name, @@ -292,7 +291,7 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { super(name, factories, resultStrategy, valuesSource, order, format, bucketCountThresholds, null, context, parent, remapGlobalOrds, collectionMode, showTermDocCountError, CardinalityUpperBound.ONE, metadata); assert factories == null || factories.countAggregators() == 0; - this.segmentDocCounts = context.bigArrays().newIntArray(1, true); + this.segmentDocCounts = context.bigArrays().newLongArray(1, true); } @Override @@ -305,7 +304,6 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol assert sub == LeafBucketCollector.NO_OP_COLLECTOR; final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds); mapping = valuesSource.globalOrdinalsMapping(ctx); -// docCountProvider.setLeafReaderContext(ctx); // Dense mode doesn't support include/exclude so we don't have to check it here. if (singleValues != null) { segmentsWithSingleValuedOrds++; @@ -317,7 +315,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } int ord = singleValues.ordValue(); - int docCount = docCountProvider.getDocCount(doc); + long docCount = docCountProvider.getDocCount(doc); segmentDocCounts.increment(ord + 1, docCount); } }); @@ -331,7 +329,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) { - int docCount = docCountProvider.getDocCount(doc); + long docCount = docCountProvider.getDocCount(doc); segmentDocCounts.increment(segmentOrd + 1, docCount); } } @@ -355,7 +353,7 @@ private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IO for (long i = 1; i < segmentDocCounts.size(); i++) { // We use set(...) here, because we need to reset the slow to 0. // segmentDocCounts get reused over the segments and otherwise counts would be too high. - int inc = segmentDocCounts.set(i, 0); + long inc = segmentDocCounts.set(i, 0); if (inc == 0) { continue; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 62844ee93c546..25cf8c064a847 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -56,6 +56,13 @@ public void testInvalidDocument_ZeroDocCount() throws Exception { assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer")); } + public void testInvalidDocument_NonNumericDocCount() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, "foo")))); + assertThat(e.getCause().getMessage(), + containsString("Failed to parse object: expecting token of type [VALUE_NUMBER] but found [VALUE_STRING]")); + } + public void testInvalidDocument_FractionalDocCount() throws Exception { DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23)))); From 4138d16bcd0027ae4742b930d33fec193a239e65 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Wed, 21 Oct 2020 09:49:31 +0300 Subject: [PATCH 35/39] Added DocCountFieldTypeTests --- .../index/mapper/DocCountFieldMapper.java | 35 +++++++----- .../index/mapper/DocCountFieldTypeTests.java | 55 +++++++++++++++++++ 2 files changed, 76 insertions(+), 14 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 838d3d07a5b83..a2eca1ad1eff8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -63,6 +63,8 @@ public static final class DocCountFieldType extends MappedFieldType { public static final DocCountFieldType INSTANCE = new DocCountFieldType(); + private static final Long defaultValue = 1L; + public DocCountFieldType() { super(NAME, false, false, true, TextSearchInfo.NONE, Collections.emptyMap()); } @@ -79,7 +81,25 @@ public Query existsQuery(QueryShardContext context) { @Override public Query termQuery(Object value, QueryShardContext context) { - throw new QueryShardException(context, "Field [" + name() + " ]of type [" + CONTENT_TYPE + "] is not searchable"); + throw new QueryShardException(context, "Field [" + name() + "] of type [" + typeName() + "] is not searchable"); + } + + @Override + public ValueFetcher valueFetcher(MapperService mapperService, SearchLookup searchLookup, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + + return new SourceValueFetcher(name(), mapperService, defaultValue) { + @Override + protected Object parseSourceValue(Object value) { + if ("".equals(value)) { + return defaultValue; + } else { + return NumberFieldMapper.NumberType.objectToLong(value, false); + } + } + }; } } @@ -103,19 +123,6 @@ protected void parseCreateField(ParseContext context) throws IOException { @Override public void preParse(ParseContext context) { } - @Override - public ValueFetcher valueFetcher(MapperService mapperService, SearchLookup lookup, String format) { - if (format != null) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); - } - return new SourceValueFetcher(name(), mapperService, parsesArrayValue()) { - @Override - protected Object parseSourceValue(Object value) { - return value; - } - }; - } - @Override public DocCountFieldType fieldType() { return (DocCountFieldType) super.fieldType(); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java new file mode 100644 index 0000000000000..f8f5d3c2e810c --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java @@ -0,0 +1,55 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.elasticsearch.index.query.QueryShardException; + +import java.io.IOException; +import java.util.Arrays; + +public class DocCountFieldTypeTests extends FieldTypeTestCase { + + public void testTermQuery() { + MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); + QueryShardException e = expectThrows(QueryShardException.class, + () -> ft.termQuery(10L, randomMockShardContext())); + assertEquals("Field [_doc_count] of type [_doc_count] is not searchable", e.getMessage()); + } + + public void testRangeQuery() { + MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> ft.rangeQuery(null, null, randomBoolean(), randomBoolean(), null, null, null, null)); + assertEquals("Field [_doc_count] of type [_doc_count] does not support range queries", e.getMessage()); + } + + public void testExistsQuery() { + MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); + assertTrue(ft.existsQuery(randomMockShardContext()) instanceof DocValuesFieldExistsQuery); + } + + public void testFetchSourceValue() throws IOException { + MappedFieldType fieldType = new DocCountFieldMapper.DocCountFieldType(); + assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, 14)); + assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, "14")); + assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, "")); + assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, null)); + } +} From 654847e421be6e59ac2642febfcb628551b8f9f6 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 27 Oct 2020 14:42:51 +0200 Subject: [PATCH 36/39] Fix errors after merge --- .../java/org/elasticsearch/indices/IndicesModuleTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java b/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java index 7476727b698bb..7c58d977154af 100644 --- a/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java +++ b/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java @@ -77,8 +77,8 @@ public Map getMetadataMappers() { private static final String[] EXPECTED_METADATA_FIELDS = new String[]{ IgnoredFieldMapper.NAME, IdFieldMapper.NAME, RoutingFieldMapper.NAME, IndexFieldMapper.NAME, SourceFieldMapper.NAME, - NestedPathFieldMapper.NAME, VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, FieldNamesFieldMapper.NAME, - DocCountFieldMapper.NAME }; + NestedPathFieldMapper.NAME, VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, DocCountFieldMapper.NAME, + FieldNamesFieldMapper.NAME }; public void testBuiltinMappers() { IndicesModule module = new IndicesModule(Collections.emptyList()); From 7b7ca43d6aa9bac621f3977c519a407122e10623 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 27 Oct 2020 17:20:55 +0200 Subject: [PATCH 37/39] Make composite agg respect _doc_count field Cleaned up/simplified DocCountProvider class --- .../370_doc_count_field.yml | 55 +++++++++++++++++++ .../bucket/BucketsAggregator.java | 6 +- ...untProvider.java => DocCountProvider.java} | 17 ++---- .../bucket/composite/CompositeAggregator.java | 5 +- .../CompositeValuesCollectorQueue.java | 22 ++++---- .../bucket/composite/SortedDocsProducer.java | 6 +- ...rTests.java => DocCountProviderTests.java} | 2 +- .../CompositeValuesCollectorQueueTests.java | 2 +- 8 files changed, 84 insertions(+), 31 deletions(-) rename server/src/main/java/org/elasticsearch/search/aggregations/bucket/{FieldBasedDocCountProvider.java => DocCountProvider.java} (74%) rename server/src/test/java/org/elasticsearch/search/aggregations/bucket/{FieldBasedDocCountProviderTests.java => DocCountProviderTests.java} (98%) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml index f4a69de1c2e7b..8c19a418f0a8f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml @@ -93,3 +93,58 @@ setup: - length: { aggregations.str_terms.buckets: 1 } - match: { aggregations.str_terms.buckets.0.key: "abc" } - match: { aggregations.str_terms.buckets.0.doc_count: 11 } + +--- +"Test composite str_terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + - do: + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : + { "composite_agg" : { "composite" : + { + "sources": ["str_terms": { "terms": { "field": "str" } }] + } + } + } + } + + - match: { hits.total: 5 } + - length: { aggregations.composite_agg.buckets: 3 } + - match: { aggregations.composite_agg.buckets.0.key.str_terms: "abc" } + - match: { aggregations.composite_agg.buckets.0.doc_count: 11 } + - match: { aggregations.composite_agg.buckets.1.key.str_terms: "foo" } + - match: { aggregations.composite_agg.buckets.1.doc_count: 8 } + - match: { aggregations.composite_agg.buckets.2.key.str_terms: "xyz" } + - match: { aggregations.composite_agg.buckets.2.doc_count: 5 } + + +--- +"Test composite num_terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + - do: + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : + { "composite_agg" : + { "composite" : + { + "sources": ["num_terms" : { "terms" : { "field" : "number" } }] + } + } + } + } + + - match: { hits.total: 5 } + - length: { aggregations.composite_agg.buckets: 3 } + - match: { aggregations.composite_agg.buckets.0.key.num_terms: 100 } + - match: { aggregations.composite_agg.buckets.0.doc_count: 12 } + - match: { aggregations.composite_agg.buckets.1.key.num_terms: 200 } + - match: { aggregations.composite_agg.buckets.1.doc_count: 1 } + - match: { aggregations.composite_agg.buckets.2.key.num_terms: 500 } + - match: { aggregations.composite_agg.buckets.2.doc_count: 11 } + diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 1b7a877a30d16..427c084d0e1a5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -54,7 +54,7 @@ public abstract class BucketsAggregator extends AggregatorBase { private final BigArrays bigArrays; private final IntConsumer multiBucketConsumer; private LongArray docCounts; - protected final FieldBasedDocCountProvider docCountProvider; + protected final DocCountProvider docCountProvider; public BucketsAggregator(String name, AggregatorFactories factories, SearchContext context, Aggregator parent, CardinalityUpperBound bucketCardinality, Map metadata) throws IOException { @@ -66,7 +66,7 @@ public BucketsAggregator(String name, AggregatorFactories factories, SearchConte multiBucketConsumer = (count) -> {}; } docCounts = bigArrays.newLongArray(1, true); - docCountProvider = new FieldBasedDocCountProvider(); + docCountProvider = new DocCountProvider(); } /** @@ -418,7 +418,7 @@ public static boolean descendsFromGlobalAggregator(Aggregator parent) { @Override protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { super.preGetSubLeafCollectors(ctx); - // Set LeafReaderContext to the field based doc_count provider + // Set LeafReaderContext to the doc_count provider docCountProvider.setLeafReaderContext(ctx); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java similarity index 74% rename from server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java rename to server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java index 3e91a3cc3a254..a3cb5f50a15cb 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -27,21 +27,14 @@ import java.io.IOException; /** - * An implementation of a doc_count provider that reads the doc_count value - * in a doc value field in the document. If a document has no doc_count field - * the implementation will return 1 as the default value. + * An implementation of a doc_count provider that reads the value + * of the _doc_count field in the document. If a document does not have a + * _doc_count field the implementation will return 1 as the default value. */ -public class FieldBasedDocCountProvider { +public class DocCountProvider { - private final String docCountFieldName; private NumericDocValues docCountValues; - public FieldBasedDocCountProvider() { - // Since we allow a single doc_count field per mapping, we use a constant - // canonical name for the Lucene field. - this.docCountFieldName = DocCountFieldMapper.NAME; - } - public long getDocCount(int doc) throws IOException { if (docCountValues != null && docCountValues.advanceExact(doc)) { return docCountValues.longValue(); @@ -52,7 +45,7 @@ public long getDocCount(int doc) throws IOException { public void setLeafReaderContext(LeafReaderContext ctx) { try { - docCountValues = DocValues.getNumeric(ctx.reader(), docCountFieldName); + docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME); } catch (IOException e) { docCountValues = null; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java index 2791e1edd72c7..8093d7bfa7ed9 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -160,7 +160,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I int slot = queue.pop(); CompositeKey key = queue.toCompositeKey(slot); InternalAggregations aggs = subAggsForBuckets[slot]; - int docCount = queue.getDocCount(slot); + long docCount = queue.getDocCount(slot); buckets[queue.size()] = new InternalComposite.InternalBucket(sourceNames, formats, key, reverseMuls, docCount, aggs); } CompositeKey lastBucket = num > 0 ? buckets[num-1].getRawKey() : null; @@ -435,7 +435,8 @@ private LeafBucketCollector getFirstPassCollector(RoaringDocIdSet.Builder builde @Override public void collect(int doc, long bucket) throws IOException { try { - if (queue.addIfCompetitive(indexSortPrefix)) { + long docCount = docCountProvider.getDocCount(doc); + if (queue.addIfCompetitive(indexSortPrefix, docCount)) { if (builder != null && lastDoc != doc) { builder.add(doc); lastDoc = doc; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java index 70887e0724c8a..35caefb495da4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java @@ -25,7 +25,7 @@ import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.LeafBucketCollector; import java.io.IOException; @@ -65,7 +65,7 @@ public int hashCode() { private final Map map; private final SingleDimensionValuesSource[] arrays; - private IntArray docCounts; + private LongArray docCounts; private boolean afterKeyIsSet = false; /** @@ -88,7 +88,7 @@ public int hashCode() { sources[i].setAfter(afterKey.get(i)); } } - this.docCounts = bigArrays.newIntArray(1, false); + this.docCounts = bigArrays.newLongArray(1, false); } @Override @@ -127,19 +127,19 @@ Comparable getUpperValueLeadSource() throws IOException { /** * Returns the document count in slot. */ - int getDocCount(int slot) { + long getDocCount(int slot) { return docCounts.get(slot); } /** * Copies the current value in slot. */ - private void copyCurrent(int slot) { + private void copyCurrent(int slot, long value) { for (int i = 0; i < arrays.length; i++) { arrays[i].copyCurrent(slot); } docCounts = bigArrays.grow(docCounts, slot+1); - docCounts.set(slot, 1); + docCounts.set(slot, value); } /** @@ -248,8 +248,8 @@ LeafBucketCollector getLeafCollector(Comparable forceLeadSourceValue, * Check if the current candidate should be added in the queue. * @return true if the candidate is competitive (added or already in the queue). */ - boolean addIfCompetitive() { - return addIfCompetitive(0); + boolean addIfCompetitive(long inc) { + return addIfCompetitive(0, inc); } @@ -263,12 +263,12 @@ boolean addIfCompetitive() { * * @throws CollectionTerminatedException if the current collection can be terminated early due to index sorting. */ - boolean addIfCompetitive(int indexSortSourcePrefix) { + boolean addIfCompetitive(int indexSortSourcePrefix, long inc) { // checks if the candidate key is competitive Integer topSlot = compareCurrent(); if (topSlot != null) { // this key is already in the top N, skip it - docCounts.increment(topSlot, 1); + docCounts.increment(topSlot, inc); return true; } if (afterKeyIsSet) { @@ -309,7 +309,7 @@ boolean addIfCompetitive(int indexSortSourcePrefix) { newSlot = size(); } // move the candidate key to its new slot - copyCurrent(newSlot); + copyCurrent(newSlot, inc); map.put(new Slot(newSlot), newSlot); add(newSlot); return true; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java index 01e2c0a3ae5b0..7a5cef87b6731 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java @@ -27,6 +27,7 @@ import org.apache.lucene.util.DocIdSetBuilder; import org.elasticsearch.common.Nullable; import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.bucket.DocCountProvider; import java.io.IOException; @@ -54,6 +55,8 @@ protected boolean processBucket(CompositeValuesCollectorQueue queue, LeafReaderC Comparable leadSourceBucket, @Nullable DocIdSetBuilder builder) throws IOException { final int[] topCompositeCollected = new int[1]; final boolean[] hasCollected = new boolean[1]; + final DocCountProvider docCountProvider = new DocCountProvider(); + docCountProvider.setLeafReaderContext(context); final LeafBucketCollector queueCollector = new LeafBucketCollector() { int lastDoc = -1; @@ -66,7 +69,8 @@ protected boolean processBucket(CompositeValuesCollectorQueue queue, LeafReaderC @Override public void collect(int doc, long bucket) throws IOException { hasCollected[0] = true; - if (queue.addIfCompetitive()) { + long docCount = docCountProvider.getDocCount(doc); + if (queue.addIfCompetitive(docCount)) { topCompositeCollected[0]++; if (adder != null && doc != lastDoc) { if (remainingBits == 0) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java rename to server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java index 1016a092fa291..f7ba8db8a66f2 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/FieldBasedDocCountProviderTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java @@ -40,7 +40,7 @@ import static java.util.Collections.singleton; -public class FieldBasedDocCountProviderTests extends AggregatorTestCase { +public class DocCountProviderTests extends AggregatorTestCase { private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME; private static final String NUMBER_FIELD = "number"; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java index 8cb35b6b861b4..57f04d5192529 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java @@ -309,7 +309,7 @@ private void testRandomCase(boolean forceMerge, final LeafBucketCollector leafCollector = new LeafBucketCollector() { @Override public void collect(int doc, long bucket) throws IOException { - queue.addIfCompetitive(indexSortSourcePrefix); + queue.addIfCompetitive(indexSortSourcePrefix, 1); } }; final LeafBucketCollector queueCollector = queue.getLeafCollector(leafReaderContext, leafCollector); From 1d969a1d832f3c22a755baee86c54e14548a949f Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Tue, 27 Oct 2020 21:27:03 +0200 Subject: [PATCH 38/39] DocCountProvider rethrows IOException instead of swallowing it --- .../search/aggregations/bucket/DocCountProvider.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java index a3cb5f50a15cb..9cf25e098cb0f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -43,11 +43,7 @@ public long getDocCount(int doc) throws IOException { } } - public void setLeafReaderContext(LeafReaderContext ctx) { - try { - docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME); - } catch (IOException e) { - docCountValues = null; - } + public void setLeafReaderContext(LeafReaderContext ctx) throws IOException { + docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME); } } From cb05034370d6b5d586c44b4f5259f2c99e22ce3d Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 2 Nov 2020 17:42:35 +0200 Subject: [PATCH 39/39] Set familyTypeName of _doc_count to integer --- .../org/elasticsearch/index/mapper/DocCountFieldMapper.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index a2eca1ad1eff8..611bab88f66fe 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -74,6 +74,11 @@ public String typeName() { return CONTENT_TYPE; } + @Override + public String familyTypeName() { + return NumberFieldMapper.NumberType.INTEGER.typeName(); + } + @Override public Query existsQuery(QueryShardContext context) { return new DocValuesFieldExistsQuery(NAME);