diff --git a/docs/changelog/118802.yaml b/docs/changelog/118802.yaml new file mode 100644 index 0000000000000..600c4b6a1e203 --- /dev/null +++ b/docs/changelog/118802.yaml @@ -0,0 +1,5 @@ +pr: 118802 +summary: ST_EXTENT_AGG optimize envelope extraction from doc-values for cartesian_shape +area: "ES|QL" +type: enhancement +issues: [] diff --git a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java index 1616d2727bf8a..b0634f0f1332f 100644 --- a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java +++ b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java @@ -46,6 +46,7 @@ import org.elasticsearch.legacygeo.builders.ShapeBuilder; import org.elasticsearch.legacygeo.parsers.ShapeParser; import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.locationtech.spatial4j.shape.Point; @@ -530,6 +531,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) { protected Function>, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } private final IndexVersion indexCreatedVersion; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java index 02a3ae11524e3..4b0542f7f7b03 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java @@ -8,9 +8,18 @@ */ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.lucene.spatial.GeometryDocValueReader; +import java.io.IOException; +import java.nio.ByteOrder; import java.util.Map; import java.util.function.Function; @@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) { // we don't support null value fors shapes return nullValue; } + + @Override + public BlockLoader blockLoader(BlockLoaderContext blContext) { + return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported() + ? new BoundsBlockLoader(name(), coordinateEncoder()) + : blockLoaderFromSource(blContext); + } + + protected abstract boolean isBoundsExtractionSupported(); + + protected abstract CoordinateEncoder coordinateEncoder(); + + // Visible for testing + static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader { + private final String fieldName; + private final CoordinateEncoder encoder; + + BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) { + this.fieldName = fieldName; + this.encoder = encoder; + } + + @Override + public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException { + return new BlockLoader.AllReader() { + @Override + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + try (var builder = factory.bytesRefs(docs.count())) { + for (int i = 0; i < docs.count(); i++) { + read(binaryDocValues, docs.get(i), reader, builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + read(binaryDocValues, docId, reader, (BytesRefBuilder) builder); + } + + private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder) + throws IOException { + binaryDocValues.advanceExact(doc); + reader.reset(binaryDocValues.binaryValue()); + var extent = reader.getExtent(); + // This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to + // preserve its properties as a WKB shape, only to convert it back to ints when we compute the + // aggregation. An obvious optimization would be to avoid this back-and-forth conversion. + var rectangle = new Rectangle( + encoder.decodeX(extent.minX()), + encoder.decodeX(extent.maxX()), + encoder.decodeY(extent.maxY()), + encoder.decodeY(extent.minY()) + ); + builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN))); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + }; + } + + @Override + public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + } } protected Explicit coerce; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 92c9422fcbc9d..c85c86783de63 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -704,10 +704,15 @@ public enum FieldExtractPreference { * Load the field from doc-values into a BlockLoader supporting doc-values. */ DOC_VALUES, + /** + * Loads the field by extracting the extent from the binary encoded representation + */ + EXTRACT_SPATIAL_BOUNDS, /** * No preference. Leave the choice of where to load the field from up to the FieldType. */ - NONE + NONE; + } /** diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java new file mode 100644 index 0000000000000..bd58f4d443d34 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -0,0 +1,95 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geo.GeometryTestUtils; +import org.elasticsearch.geo.ShapeTestUtils; +import org.elasticsearch.geometry.Geometry; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; +import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField; +import org.elasticsearch.lucene.spatial.CartesianShapeIndexer; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; + +import java.io.IOException; +import java.util.Optional; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.IntStream; + +public class AbstractShapeGeometryFieldMapperTests extends ESTestCase { + public void testCartesianBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.CARTESIAN, + () -> ShapeTestUtils.randomGeometryWithoutCircle(0, false), + CartesianShapeIndexer::new, + SpatialEnvelopeVisitor::visitCartesian + ); + } + + // TODO when we turn this optimization on for geo, this test should pass. + public void ignoreTestGeoBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.GEO, + () -> GeometryTestUtils.randomGeometryWithoutCircle(0, false), + field -> new GeoShapeIndexer(Orientation.RIGHT, field), + g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP) + ); + } + + private void testBoundsBlockLoaderAux( + CoordinateEncoder encoder, + Supplier generator, + Function indexerFactory, + Function> visitor + ) throws IOException { + var geometries = IntStream.range(0, 20).mapToObj(i -> generator.get()).toList(); + var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder); + try (Directory directory = newDirectory()) { + try (var iw = new RandomIndexWriter(random(), directory)) { + for (Geometry geometry : geometries) { + var shape = new BinaryShapeDocValuesField("field", encoder); + shape.add(indexerFactory.apply("field").indexShape(geometry), geometry); + var doc = new Document(); + doc.add(shape); + iw.addDocument(doc); + } + } + var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + LeafReaderContext ctx = reader.leaves().get(0); + TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices)); + for (int i = 0; i < indices.length; i++) { + var idx = indices[i]; + var geometry = geometries.get(idx); + var geoString = geometry.toString(); + var geometryString = geoString.length() > 200 ? geoString.substring(0, 200) + "..." : geoString; + Rectangle r = visitor.apply(geometry).get(); + assertThat( + Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometryString), + (BytesRef) block.get(i), + WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder)) + ); + } + } + } + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index fcd3ebb63dc81..ec6b45bff8300 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.fielddata.LeafFieldData; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.index.translog.Translog; @@ -87,8 +88,6 @@ import java.util.stream.IntStream; import static java.util.stream.Collectors.toList; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE; import static org.elasticsearch.test.MapMatcher.assertMap; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -1436,7 +1435,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str this(columnAtATimeReader, true, mapper, loaderFieldName); } - private BlockLoader getBlockLoader(boolean columnReader) { + private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) { SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null); return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override @@ -1450,8 +1449,8 @@ public IndexSettings indexSettings() { } @Override - public MappedFieldType.FieldExtractPreference fieldExtractPreference() { - return columnReader ? DOC_VALUES : NONE; + public FieldExtractPreference fieldExtractPreference() { + return fieldExtractPreference; } @Override @@ -1509,7 +1508,9 @@ protected final void testBlockLoader( BlockReaderSupport blockReaderSupport, SourceLoader sourceLoader ) throws IOException { - BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader); + // EXTRACT_SPATIAL_BOUNDS is not currently supported in this test path. + var fieldExtractPreference = columnReader ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; + BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference); Function valuesConvert = loadBlockExpected(blockReaderSupport, columnReader); if (valuesConvert == null) { assertNull(loader); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java similarity index 60% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java index 48fbc9c8e0378..2d55b439bd1b7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java @@ -1,14 +1,16 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; -import org.elasticsearch.compute.aggregation.spatial.PointType; import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.hamcrest.Description; import org.hamcrest.Matchers; import org.hamcrest.TypeSafeMatcher; @@ -19,23 +21,31 @@ */ public class RectangleMatcher extends TypeSafeMatcher { private final Rectangle r; - private final PointType pointType; + private final CoordinateEncoder coordinateEncoder; private final double error; - public static TypeSafeMatcher closeTo(Rectangle r, double error, PointType pointType) { - return new RectangleMatcher(r, error, pointType); + public static TypeSafeMatcher closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { + return new RectangleMatcher(r, error, coordinateEncoder); } - private RectangleMatcher(Rectangle r, double error, PointType pointType) { + private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { this.r = r; - this.pointType = pointType; + this.coordinateEncoder = coordinateEncoder; this.error = error; } + /** + * Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints. + */ + public static TypeSafeMatcher closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) { + var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY()); + return closeTo(normalized, v, encoder); + } + @Override protected boolean matchesSafely(Rectangle other) { // For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe. - boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX(); + boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX(); boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX()) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX())) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java similarity index 69% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java index 535bb820458cd..809f2862c208c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java @@ -1,11 +1,13 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; import org.apache.lucene.util.BytesRef; import org.elasticsearch.geometry.Geometry; @@ -23,6 +25,10 @@ public WellKnownBinaryBytesRefMatcher(Matcher matcher) { this.matcher = matcher; } + public static Matcher encodes(TypeSafeMatcher matcher) { + return new WellKnownBinaryBytesRefMatcher(matcher); + } + @Override public boolean matchesSafely(BytesRef bytesRef) { return matcher.matches(fromBytesRef(bytesRef)); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java index 5395ca0b85163..fb45f869c4133 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java @@ -7,12 +7,11 @@ package org.elasticsearch.compute.aggregation.spatial; -import org.apache.lucene.geo.GeoEncodingUtils; -import org.apache.lucene.geo.XYEncodingUtils; import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.util.Optional; @@ -23,26 +22,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitGeo(geo, WrapLongitude.WRAP); } - @Override - public double decodeX(int encoded) { - return GeoEncodingUtils.decodeLongitude(encoded); - } - - @Override - public double decodeY(int encoded) { - return GeoEncodingUtils.decodeLatitude(encoded); - } - - @Override - public int encodeX(double decoded) { - return GeoEncodingUtils.encodeLongitude(decoded); - } - - @Override - public int encodeY(double decoded) { - return GeoEncodingUtils.encodeLatitude(decoded); - } - // Geo encodes the longitude in the lower 32 bits and the latitude in the upper 32 bits. @Override public int extractX(long encoded) { @@ -53,6 +32,11 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.GEO; + } }, CARTESIAN { @Override @@ -60,26 +44,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitCartesian(geo); } - @Override - public double decodeX(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public double decodeY(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public int encodeX(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - - @Override - public int encodeY(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - @Override public int extractX(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); @@ -89,19 +53,18 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractSecond(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.CARTESIAN; + } }; public abstract Optional computeEnvelope(Geometry geo); - public abstract double decodeX(int encoded); - - public abstract double decodeY(int encoded); - - public abstract int encodeX(double decoded); - - public abstract int encodeY(double decoded); - public abstract int extractX(long encoded); public abstract int extractY(long encoded); + + public abstract CoordinateEncoder encoder(); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java index 9ce0ccdda0ff5..cb765e4d6757e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java @@ -72,10 +72,10 @@ public void add(int groupId, Geometry geometry) { .ifPresent( r -> add( groupId, - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -122,10 +122,10 @@ public Block toBlock(IntVector selected, DriverContext driverContext) { new BytesRef( WellKnownBinary.toWKB( new Rectangle( - pointType.decodeX(minXs.get(si)), - pointType.decodeX(maxXs.get(si)), - pointType.decodeY(maxYs.get(si)), - pointType.decodeY(minYs.get(si)) + pointType.encoder().decodeX(minXs.get(si)), + pointType.encoder().decodeX(maxXs.get(si)), + pointType.encoder().decodeY(maxYs.get(si)), + pointType.encoder().decodeY(minYs.get(si)) ), ByteOrder.LITTLE_ENDIAN ) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java index 3dd7a6d4acde2..41bc50abcf6bc 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java @@ -91,8 +91,8 @@ public void add(int groupId, Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java index 0eea9b79f73ea..3dc150d1702a2 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java @@ -14,6 +14,7 @@ import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.nio.ByteOrder; @@ -46,10 +47,10 @@ public void add(Geometry geo) { pointType.computeEnvelope(geo) .ifPresent( r -> add( - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -74,8 +75,9 @@ public Block toBlock(DriverContext driverContext) { } private byte[] toWKB() { + CoordinateEncoder encoder = pointType.encoder(); return WellKnownBinary.toWKB( - new Rectangle(pointType.decodeX(minX), pointType.decodeX(maxX), pointType.decodeY(maxY), pointType.decodeY(minY)), + new Rectangle(encoder.decodeX(minX), encoder.decodeX(maxX), encoder.decodeY(maxY), encoder.decodeY(minY)), ByteOrder.LITTLE_ENDIAN ); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java index 99200d2ed99f5..0d6163636fcde 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java @@ -53,8 +53,8 @@ public void add(Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 0a0618d94d434..5ecbf15001244 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -104,6 +104,8 @@ public class CsvTestsDataLoader { private static final TestsDataset COUNTRIES_BBOX_WEB = new TestsDataset("countries_bbox_web"); private static final TestsDataset AIRPORT_CITY_BOUNDARIES = new TestsDataset("airport_city_boundaries"); private static final TestsDataset CARTESIAN_MULTIPOLYGONS = new TestsDataset("cartesian_multipolygons"); + private static final TestsDataset CARTESIAN_MULTIPOLYGONS_NO_DOC_VALUES = new TestsDataset("cartesian_multipolygons_no_doc_values") + .withData("cartesian_multipolygons.csv"); private static final TestsDataset MULTIVALUE_GEOMETRIES = new TestsDataset("multivalue_geometries"); private static final TestsDataset MULTIVALUE_POINTS = new TestsDataset("multivalue_points"); private static final TestsDataset DISTANCES = new TestsDataset("distances"); @@ -148,6 +150,7 @@ public class CsvTestsDataLoader { Map.entry(COUNTRIES_BBOX_WEB.indexName, COUNTRIES_BBOX_WEB), Map.entry(AIRPORT_CITY_BOUNDARIES.indexName, AIRPORT_CITY_BOUNDARIES), Map.entry(CARTESIAN_MULTIPOLYGONS.indexName, CARTESIAN_MULTIPOLYGONS), + Map.entry(CARTESIAN_MULTIPOLYGONS_NO_DOC_VALUES.indexName, CARTESIAN_MULTIPOLYGONS_NO_DOC_VALUES), Map.entry(MULTIVALUE_GEOMETRIES.indexName, MULTIVALUE_GEOMETRIES), Map.entry(MULTIVALUE_POINTS.indexName, MULTIVALUE_POINTS), Map.entry(DATE_NANOS.indexName, DATE_NANOS), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-cartesian_multipolygons_no_doc_values.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-cartesian_multipolygons_no_doc_values.json new file mode 100644 index 0000000000000..fb271c4975462 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-cartesian_multipolygons_no_doc_values.json @@ -0,0 +1,15 @@ +{ + "properties": { + "id": { + "type": "long" + }, + "name": { + "type": "keyword" + }, + "shape": { + "type": "shape", + "index": true, + "doc_values": false + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index bb70d48775fa6..5e23917222345 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -1798,6 +1798,47 @@ extent:cartesian_shape BBOX (0.0, 3.0, 3.0, 0.0) ; +stExtentCartesianShapesGrouping +required_capability: st_extent_agg + +FROM cartesian_multipolygons +| EVAL key = SUBSTRING(name,1,3) +| STATS extent = ST_EXTENT_AGG(shape), count = COUNT() BY key +| KEEP count, key, extent +| SORT count DESC, key ASC +; + +count:long | key:keyword | extent:cartesian_shape +8 | Bot | BBOX (0.0, 3.0, 1.0, 0.0) +8 | Top | BBOX (0.0, 3.0, 3.0, 2.0) +4 | Fou | BBOX (0.0, 3.0, 3.0, 0.0) +; + +stExtentCartesianShapesNoDocValues +required_capability: st_extent_agg +FROM cartesian_multipolygons_no_doc_values | STATS extent = ST_EXTENT_AGG(shape) +; + +extent:cartesian_shape +BBOX (0.0, 3.0, 3.0, 0.0) +; + +stExtentCartesianShapesGroupingNoDocValues +required_capability: st_extent_agg + +FROM cartesian_multipolygons_no_doc_values +| EVAL key = SUBSTRING(name,1,3) +| STATS extent = ST_EXTENT_AGG(shape), count = COUNT() BY key +| KEEP count, key, extent +| SORT count DESC, key ASC +; + +count:long | key:keyword | extent:cartesian_shape +8 | Bot | BBOX (0.0, 3.0, 1.0, 0.0) +8 | Top | BBOX (0.0, 3.0, 3.0, 2.0) +4 | Fou | BBOX (0.0, 3.0, 3.0, 0.0) +; + ############################################### # Tests for ST_INTERSECTS on CARTESIAN_POINT type diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java index c6e57ad222f4e..ab0eb52cbe060 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java @@ -103,11 +103,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (type) { case GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; default -> throw EsqlIllegalArgumentException.illegalDataType(type); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java index d7b4861cdc03f..a9922eef36746 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java @@ -104,11 +104,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (field().dataType()) { case GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; // Shapes don't differentiate between source and doc values. case GEO_SHAPE -> new SpatialExtentGeoShapeAggregatorFunctionSupplier(inputChannels); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index 1eaade043658b..a865f784137ad 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushTopNToSource; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.ReplaceSourceAttributes; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialDocValuesExtraction; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialShapeBoundsExtraction; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; import org.elasticsearch.xpack.esql.rule.Rule; @@ -73,7 +74,13 @@ protected List> rules(boolean optimizeForEsSource) { var pushdown = new Batch("Push to ES", esSourceRules.toArray(Rule[]::new)); // add the field extraction in just one pass // add it at the end after all the other rules have ran - var fieldExtraction = new Batch<>("Field extraction", Limiter.ONCE, new InsertFieldExtraction(), new SpatialDocValuesExtraction()); + var fieldExtraction = new Batch<>( + "Field extraction", + Limiter.ONCE, + new InsertFieldExtraction(), + new SpatialDocValuesExtraction(), + new SpatialShapeBoundsExtraction() + ); return asList(pushdown, fieldExtraction); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java index 0f1c32e94f867..f66ed5c8e4ec1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialAggregateFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.BinarySpatialFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialRelatesFunction; @@ -156,6 +157,9 @@ private boolean foundField(Expression expression, Set foundAttri /** * This function disallows the use of more than one field for doc-values extraction in the same spatial relation function. * This is because comparing two doc-values fields is not supported in the current implementation. + * This also rejects fields that do not have doc-values in the field mapping, as well as rejecting geo_shape and cartesian_shape + * because we do not yet support full doc-values extraction for non-point geometries. We do have aggregations that support + * shapes, and to prevent them triggering this rule on non-point geometries we have to explicitly disallow them here. */ private boolean allowedForDocValues( FieldAttribute fieldAttribute, @@ -166,6 +170,9 @@ private boolean allowedForDocValues( if (stats.hasDocValues(fieldAttribute.fieldName()) == false) { return false; } + if (fieldAttribute.dataType() == DataType.GEO_SHAPE || fieldAttribute.dataType() == DataType.CARTESIAN_SHAPE) { + return false; + } var candidateDocValuesAttributes = new HashSet<>(foundAttributes); candidateDocValuesAttributes.add(fieldAttribute); var spatialRelatesAttributes = new HashSet(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java new file mode 100644 index 0000000000000..f6f087064a02f --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundsExtraction.java @@ -0,0 +1,108 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; + +import org.elasticsearch.lucene.spatial.GeometryDocValueWriter; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialExtent; +import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; +import org.elasticsearch.xpack.esql.optimizer.PhysicalOptimizerRules.ParameterizedOptimizerRule; +import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; +import org.elasticsearch.xpack.esql.plan.physical.EvalExec; +import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; +import org.elasticsearch.xpack.esql.plan.physical.FilterExec; +import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; + +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * This rule is responsible for marking spatial shape fields whose extent can be extracted from the binary representation encoded by + * {@link GeometryDocValueWriter}. + * This is a very specific optimization that is only used in the context of ST_EXTENT_AGG aggregations. + * Normally spatial fields are extracted from source values because this maintains original precision, but is very slow. + * Simply extracting the spatial bounds from the binary encoding loses both precision and geometry topological information for shapes. + * For this reason we only consider extract the extent under very specific conditions: + *
    + *
  • The spatial data is of type GEO_SHAPE or CARTESIAN_SHAPE.
  • + *
  • The spatial data is consumed directly by an ST_EXTENT_AGG.
  • + *
  • The spatial data is not consumed by any other operation. While is this is stricter than necessary, + * it is a good enough approximation for now. For example, an aggregation like {@code count} shouldn't stop this optimization, + * not a check like {@code isNotNull}.
  • + *
+ */ +public class SpatialShapeBoundsExtraction extends ParameterizedOptimizerRule { + @Override + protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerContext ctx) { + var foundAttributes = new HashSet(); + + return aggregate.transformDown(UnaryExec.class, exec -> { + switch (exec) { + case AggregateExec agg -> { + List aggregateFunctions = agg.aggregates() + .stream() + .flatMap(e -> SpatialShapeBoundsExtraction.extractAggregateFunction(e).stream()) + .toList(); + List spatialExtents = aggregateFunctions.stream() + .filter(SpatialExtent.class::isInstance) + .map(SpatialExtent.class::cast) + .toList(); + List nonSpatialExtents = aggregateFunctions.stream() + .filter(a -> a instanceof SpatialExtent == false) + .toList(); + // While we currently do not have any non-extent aggregations which apply to shapes, we might have them in the future. + Set fieldsAppearingInNonSpatialExtents = nonSpatialExtents.stream() + .flatMap(af -> af.references().stream()) + .filter(FieldAttribute.class::isInstance) + .map(f -> ((FieldAttribute) f).field()) + .collect(Collectors.toSet()); + spatialExtents.stream() + .map(SpatialExtent::field) + .filter(FieldAttribute.class::isInstance) + .map(FieldAttribute.class::cast) + .filter( + f -> isShape(f.field().getDataType()) + && fieldsAppearingInNonSpatialExtents.contains(f.field()) == false + && ctx.searchStats().hasDocValues(f.fieldName()) + ) + .forEach(foundAttributes::add); + } + case EvalExec evalExec -> foundAttributes.removeAll(evalExec.references()); + case FilterExec filterExec -> foundAttributes.removeAll(filterExec.condition().references()); + case FieldExtractExec fieldExtractExec -> { + var boundsAttributes = new HashSet<>(foundAttributes); + boundsAttributes.retainAll(fieldExtractExec.attributesToExtract()); + if (boundsAttributes.isEmpty() == false) { + exec = fieldExtractExec.withBoundsAttributes(boundsAttributes); + } + } + default -> { // Do nothing + } + } + return exec; + }); + } + + private static boolean isShape(DataType dataType) { + return dataType == DataType.GEO_SHAPE || dataType == DataType.CARTESIAN_SHAPE; + } + + private static Optional extractAggregateFunction(NamedExpression expr) { + return expr instanceof Alias as && as.child() instanceof AggregateFunction af ? Optional.of(af) : Optional.empty(); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java index ec996c5c84064..e9783a241f0b9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java @@ -7,9 +7,12 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -31,9 +34,10 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { ); private final List attributesToExtract; - private final Attribute sourceAttribute; + private final @Nullable Attribute sourceAttribute; + /** - * Attributes that many be extracted as doc values even if that makes them + * Attributes that may be extracted as doc values even if that makes them * less accurate. This is mostly used for geo fields which lose a lot of * precision in their doc values, but in some cases doc values provides * enough precision to do the job. @@ -43,17 +47,32 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { */ private final Set docValuesAttributes; + /** + * Attributes of a shape whose extent can be extracted directly from the doc-values encoded geometry. + *

+ * This is never serialized between nodes and only used locally. + *

+ */ + private final Set boundsAttributes; + private List lazyOutput; public FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract) { - this(source, child, attributesToExtract, Set.of()); + this(source, child, attributesToExtract, Set.of(), Set.of()); } - private FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract, Set docValuesAttributes) { + private FieldExtractExec( + Source source, + PhysicalPlan child, + List attributesToExtract, + Set docValuesAttributes, + Set boundsAttributes + ) { super(source, child); this.attributesToExtract = attributesToExtract; this.sourceAttribute = extractSourceAttributesFrom(child); this.docValuesAttributes = docValuesAttributes; + this.boundsAttributes = boundsAttributes; } private FieldExtractExec(StreamInput in) throws IOException { @@ -62,7 +81,7 @@ private FieldExtractExec(StreamInput in) throws IOException { in.readNamedWriteable(PhysicalPlan.class), in.readNamedWriteableCollectionAsList(Attribute.class) ); - // docValueAttributes are only used on the data node and never serialized. + // docValueAttributes and boundsAttributes are only used on the data node and never serialized. } @Override @@ -70,7 +89,7 @@ public void writeTo(StreamOutput out) throws IOException { Source.EMPTY.writeTo(out); out.writeNamedWriteable(child()); out.writeNamedWriteableCollection(attributesToExtract()); - // docValueAttributes are only used on the data node and never serialized. + // docValueAttributes and boundsAttributes are only used on the data node and never serialized. } @Override @@ -78,7 +97,7 @@ public String getWriteableName() { return ENTRY.name; } - public static Attribute extractSourceAttributesFrom(PhysicalPlan plan) { + public static @Nullable Attribute extractSourceAttributesFrom(PhysicalPlan plan) { for (Attribute attribute : plan.outputSet()) { if (EsQueryExec.isSourceAttribute(attribute)) { return attribute; @@ -99,18 +118,22 @@ protected NodeInfo info() { @Override public UnaryExec replaceChild(PhysicalPlan newChild) { - return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes, boundsAttributes); } public FieldExtractExec withDocValuesAttributes(Set docValuesAttributes) { - return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundsAttributes); + } + + public FieldExtractExec withBoundsAttributes(Set boundsAttributes) { + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundsAttributes); } public List attributesToExtract() { return attributesToExtract; } - public Attribute sourceAttribute() { + public @Nullable Attribute sourceAttribute() { return sourceAttribute; } @@ -118,8 +141,8 @@ public Set docValuesAttributes() { return docValuesAttributes; } - public boolean hasDocValuesAttribute(Attribute attr) { - return docValuesAttributes.contains(attr); + public Set boundsAttributes() { + return boundsAttributes; } @Override @@ -142,7 +165,7 @@ public PhysicalPlan estimateRowSize(State state) { @Override public int hashCode() { - return Objects.hash(attributesToExtract, docValuesAttributes, child()); + return Objects.hash(attributesToExtract, docValuesAttributes, boundsAttributes, child()); } @Override @@ -158,12 +181,27 @@ public boolean equals(Object obj) { FieldExtractExec other = (FieldExtractExec) obj; return Objects.equals(attributesToExtract, other.attributesToExtract) && Objects.equals(docValuesAttributes, other.docValuesAttributes) + && Objects.equals(boundsAttributes, other.boundsAttributes) && Objects.equals(child(), other.child()); } @Override public String nodeString() { - return nodeName() + NodeUtils.limitedToString(attributesToExtract) + "<" + NodeUtils.limitedToString(docValuesAttributes) + ">"; + return Strings.format( + "%s<%s,%s>", + nodeName() + NodeUtils.limitedToString(attributesToExtract), + docValuesAttributes, + boundsAttributes + ); } + public MappedFieldType.FieldExtractPreference fieldExtractPreference(Attribute attr) { + if (boundsAttributes.contains(attr)) { + return MappedFieldType.FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS; + } + if (docValuesAttributes.contains(attr)) { + return MappedFieldType.FieldExtractPreference.DOC_VALUES; + } + return MappedFieldType.FieldExtractPreference.NONE; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 39e2a3bc1d5af..a9118e6b8cbd4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -114,12 +114,11 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi .toList(); List fields = new ArrayList<>(); int docChannel = source.layout.get(sourceAttr.id()).channel(); - var docValuesAttrs = fieldExtractExec.docValuesAttributes(); for (Attribute attr : fieldExtractExec.attributesToExtract()) { layout.append(attr); var unionTypes = findUnionTypes(attr); DataType dataType = attr.dataType(); - MappedFieldType.FieldExtractPreference fieldExtractPreference = PlannerUtils.extractPreference(docValuesAttrs.contains(attr)); + MappedFieldType.FieldExtractPreference fieldExtractPreference = fieldExtractExec.fieldExtractPreference(attr); ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference); // Do not use the field attribute name, this can deviate from the field name for union types. String fieldName = attr instanceof FieldAttribute fa ? fa.fieldName() : attr.name(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java index 99342218fb674..403c94f32bea5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java @@ -17,11 +17,11 @@ import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.RectangleMatcher; -import org.elasticsearch.xpack.esql.expression.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; import org.elasticsearch.xpack.esql.expression.function.FunctionName; import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; @@ -82,20 +82,7 @@ private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier List.of(fieldTypedData), "SpatialExtent[field=Attribute[channel=0]]", expectedType, - new WellKnownBinaryBytesRefMatcher<>( - RectangleMatcher.closeTo( - new Rectangle( - // Since we use integers locally which are later decoded to doubles, all computation is effectively done using - // floats, not doubles. - (float) result.getMinX(), - (float) result.getMaxX(), - (float) result.getMaxY(), - (float) result.getMinY() - ), - 1e-3, - pointType - ) - ) + new WellKnownBinaryBytesRefMatcher<>(RectangleMatcher.closeToFloat(result, 1e-3, pointType.encoder())) ); }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 3518beda7dc48..6ea99e4767060 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -171,13 +171,16 @@ import static org.elasticsearch.xpack.esql.core.expression.Expressions.names; import static org.elasticsearch.xpack.esql.core.expression.function.scalar.FunctionTestUtils.l; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; import static org.elasticsearch.xpack.esql.parser.ExpressionBuilder.MAX_EXPRESSION_DEPTH; import static org.elasticsearch.xpack.esql.parser.LogicalPlanBuilder.MAX_QUERY_DEPTH; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasItem; @@ -211,6 +214,9 @@ public class PhysicalPlanOptimizerTests extends ESTestCase { private TestDataSource airportsNotIndexed; // Test when spatial field has doc values but is not indexed private TestDataSource airportsNotIndexedNorDocValues; // Test when spatial field is neither indexed nor has doc-values private TestDataSource airportsWeb; // Cartesian point field tests + private TestDataSource airportsCityBoundaries; + private TestDataSource cartesianMultipolygons; // cartesian_shape field tests + private TestDataSource cartesianMultipolygonsNoDocValues; // cartesian_shape field tests but has no doc values private TestDataSource countriesBbox; // geo_shape field tests private TestDataSource countriesBboxWeb; // cartesian_shape field tests @@ -280,6 +286,25 @@ public void init() { new TestConfigurableSearchStats().exclude(Config.INDEXED, "location").exclude(Config.DOC_VALUES, "location") ); this.airportsWeb = makeTestDataSource("airports_web", "mapping-airports_web.json", functionRegistry, enrichResolution); + this.airportsCityBoundaries = makeTestDataSource( + "airports_city_boundaries", + "mapping-airport_city_boundaries.json", + functionRegistry, + enrichResolution + ); + this.cartesianMultipolygons = makeTestDataSource( + "cartesian_multipolygons", + "mapping-cartesian_multipolygons.json", + functionRegistry, + enrichResolution + ); + this.cartesianMultipolygonsNoDocValues = makeTestDataSource( + "cartesian_multipolygons_no_doc_values", + "mapping-cartesian_multipolygons_no_doc_values.json", + functionRegistry, + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.DOC_VALUES, "shape") + ); this.countriesBbox = makeTestDataSource("countriesBbox", "mapping-countries_bbox.json", functionRegistry, enrichResolution); this.countriesBboxWeb = makeTestDataSource( "countriesBboxWeb", @@ -2823,12 +2848,13 @@ public void testSpatialTypesAndStatsCentroidUseDocValues() { "from airports | eval location = to_geopoint(location) | stats centroid = st_centroid_agg(location)" }) { for (boolean withDocValues : new boolean[] { false, true }) { var testData = withDocValues ? airports : airportsNoDocValues; + var fieldExtractPreference = withDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -2840,12 +2866,12 @@ public void testSpatialTypesAndStatsCentroidUseDocValues() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, withDocValues); - assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), withDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, fieldExtractPreference); + assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), fieldExtractPreference); } } } @@ -2884,13 +2910,14 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { "from airports | stats extent = st_extent_agg(to_geopoint(location))", "from airports | eval location = to_geopoint(location) | stats extent = st_extent_agg(location)" }) { for (boolean withDocValues : new boolean[] { false, true }) { + var fieldExtractPreference = withDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var testData = withDocValues ? airports : airportsNoDocValues; var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -2902,12 +2929,12 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, withDocValues); - assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), withDocValues); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, fieldExtractPreference); + assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), fieldExtractPreference); } } } @@ -2916,24 +2943,25 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ - * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] - * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#70,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#70,true[BOOLEAN]) AS cen + * troid],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ + * Aggregate[STANDARD,[],[SPATIALEXTENT(location{f}#70,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#70,true[BOOLEAN] + * ) AS centroid]] + * \_EsRelation[airports][abbrev{f}#66, city{f}#72, city_location{f}#73, coun..]]] * * After local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ - * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] - * \_FieldExtractExec[location{f}#48][location{f}#48] - * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ - * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#70,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#70,true[BOOLEAN]) AS cen + * troid],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#70,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#70,true[BOOLEAN]) AS cen + * troid],INITIAL,[...]] + * \_FieldExtractExec[location{f}#70][location{f}#70],[] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#117], limit[], sort[] estimatedRowSize[25] * * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] *

@@ -2945,13 +2973,14 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { "from airports | stats extent = st_extent_agg(location), centroid = st_centroid_agg(location)", "from airports | stats extent = st_extent_agg(location), centroid = st_centroid_agg(city_location)", }) { for (boolean withDocValues : new boolean[] { false, true }) { + var fieldExtractPreference = withDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var testData = withDocValues ? airports : airportsNoDocValues; var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -2963,16 +2992,190 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, false); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, withDocValues); - assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), withDocValues); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_POINT, fieldExtractPreference); + assertChildIsGeoPointExtract(withDocValues ? agg : as(agg.child(), FilterExec.class), fieldExtractPreference); } } } + /** + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],FINAL,[ + * $$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, + * $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],200] + * \_ExchangeExec[[ + * $$extent$minNegX{r}#11, $$extent$minPosX{r}#12, $$extent$maxNegX{r}#13, + * $$extent$maxPosX{r}#14, $$extent$maxY{r}#15, $$extent$minY{r}#16],true] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#10,true[BOOLEAN]) AS extent],INITIAL,[ + * $$extent$minNegX{r}#30, $$extent$minPosX{r}#31, $$extent$maxNegX{r}#32, + * $$extent$maxPosX{r}#33, $$extent$maxY{r}#34, $$extent$minY{r}#35],200] + * \_FieldExtractExec[city_boundary{f}#10][],[city_boundary{f}#10] + * \_EsQueryExec[airports_city_boundaries], indexMode[standard], query[ + * {"exists":{"field":"city_boundary","boost":1.0}} + * ][_doc{f}#36], limit[], sort[] estimatedRowSize[204] + * + */ + public void testSpatialTypesAndStatsExtentOfGeoShapeDoesNotUseBinaryExtraction() { + // TODO: When we get geo_shape working with bounds extraction from doc-values, change the name of this test + var query = "FROM airports_city_boundaries | STATS extent = ST_EXTENT_AGG(city_boundary)"; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use extent extraction + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses extent extraction + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not using doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is using a specific + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + assertChildIsExtractedAs(agg, FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS, GEO_SHAPE); + } + + /** + * This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. + * TODO: Currently this tests nothing, because geo_shape is not supported anyway for bounds extraction, + * but it should be updated when it is supported. + */ + public void testSpatialTypesAndStatsExtentOfShapesNegativeCases() { + for (String query : new String[] { """ + FROM airports_city_boundaries + | EVAL prefix = SUBSTRING(TO_STRING(city_boundary), 5) + | STATS extent = ST_EXTENT_AGG(city_boundary) BY prefix""", """ + FROM airports_city_boundaries + | WHERE STARTS_WITH(TO_STRING(city_boundary), "MULTIPOLYGON") + | STATS extent = ST_EXTENT_AGG(city_boundary)""" }) { + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + var exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); + assertChildIsExtractedAs(exec, FieldExtractPreference.NONE, GEO_SHAPE); + } + } + + /** + * Test cartesian_shape bounds extraction occurs when the shape has doc-values and not otherwise. + */ + public void testSpatialTypesAndStatsExtentOfCartesianShapesWithAndWithoutDocValues() { + for (boolean hasDocValues : new boolean[] { true, false }) { + var query = """ + FROM cartesian_multipolygons \ + | STATS extent = ST_EXTENT_AGG(shape)"""; + var testData = hasDocValues ? cartesianMultipolygons : cartesianMultipolygonsNoDocValues; + var fieldExtractPreference = hasDocValues ? FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS : FieldExtractPreference.NONE; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); + + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // For cartesian_shape extraction, we extract bounds from doc-values directly into a BBOX encoded as BytesRef, + // so the aggregation does not need to know about it. + assertAggregation(agg, "extent", SpatialExtent.class, CARTESIAN_SHAPE, FieldExtractPreference.NONE); + var exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertAggregation( + agg, + "extent", + "hasDocValues:" + hasDocValues, + SpatialExtent.class, + CARTESIAN_SHAPE, + FieldExtractPreference.NONE + ); + var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); + // For cartesian_shape, the bounds extraction is done in the FieldExtractExec, so it does need to know about this + assertChildIsExtractedAs(exec, fieldExtractPreference, CARTESIAN_SHAPE); + } + } + + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#13,true[BOOLEAN]) AS extent, SPATIALCENTROID(city_location{f}#12,true[BOOLEA + * N]) AS centroid],...] + * \_ExchangeExec[[..]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[...]] + * \_EsRelation[airports_city_boundaries][abbrev{f}#8, airport{f}#9, city{f}#11, city_boundar..] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + */ + public void testMixedSpatialBoundsAndPointsExtracted() { + var query = """ + FROM airports_city_boundaries \ + | STATS extent = ST_EXTENT_AGG(city_boundary), centroid = ST_CENTROID_AGG(city_location)"""; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses both doc-values and bounds extraction + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not field-optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is field optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, FieldExtractPreference.NONE); + var fieldExtractExec = as(agg.child(), FieldExtractExec.class); + assertThat(fieldExtractExec.boundsAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_boundary"))); + assertThat(fieldExtractExec.docValuesAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_location"))); + } + /** * This test does not have real index fields, and therefor asserts that doc-values field extraction does NOT occur. * Before local optimizations: @@ -3004,11 +3207,11 @@ public void testSpatialTypesAndStatsUseDocValuesNestedLiteral() { var agg = as(limit.child(), AggregateExec.class); assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var eval = as(agg.child(), EvalExec.class); as(eval.child(), LocalSourceExec.class); @@ -3018,11 +3221,11 @@ public void testSpatialTypesAndStatsUseDocValuesNestedLiteral() { agg = as(limit.child(), AggregateExec.class); assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); eval = as(agg.child(), EvalExec.class); as(eval.child(), LocalSourceExec.class); } @@ -3059,7 +3262,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregations() { assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3072,14 +3275,14 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregations() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); - assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); } /** @@ -3120,8 +3323,8 @@ public void testSpatialTypesAndStatsUseDocValuesMultiSpatialAggregations() { assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3134,16 +3337,16 @@ public void testSpatialTypesAndStatsUseDocValuesMultiSpatialAggregations() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, true); - assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, true); - assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "airports", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertAggregation(agg, "cities", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); } /** @@ -3181,7 +3384,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsFiltered() { assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3196,14 +3399,14 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsFiltered() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); - var source = assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + var source = assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); var qb = as(source.query(), SingleValueQuery.Builder.class); assertThat("Expected predicate to be passed to Lucene query", qb.source().text(), equalTo("scalerank == 9")); } @@ -3235,6 +3438,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsFiltered() { public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { for (boolean useDocValues : new boolean[] { false }) { var testData = useDocValues ? airports : airportsNoDocValues; + var fieldExtractPreference = useDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var plan = this.physicalPlan(""" FROM airports | STATS centroid=ST_CENTROID_AGG(location), count=COUNT() BY scalerank @@ -3247,7 +3451,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { assertThat(att.name(), equalTo("scalerank")); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3262,7 +3466,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { assertThat(att.name(), equalTo("scalerank")); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); @@ -3270,8 +3474,8 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { assertThat(att.name(), equalTo("scalerank")); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, useDocValues); - assertChildIsGeoPointExtract(agg, useDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, fieldExtractPreference); + assertChildIsGeoPointExtract(agg, fieldExtractPreference); } } @@ -3312,19 +3516,19 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGroupedAggregat assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); assertAggregation(agg, "count", Sum.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); assertAggregation(agg, "count", Sum.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("One grouping in aggregation", agg.groupings().size(), equalTo(1)); var att = as(agg.groupings().get(0), Attribute.class); assertThat(att.name(), equalTo("scalerank")); assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3338,19 +3542,19 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGroupedAggregat assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); assertAggregation(agg, "count", Sum.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); assertAggregation(agg, "count", Sum.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); agg = as(agg.child(), AggregateExec.class); assertThat("Aggregation is FINAL", agg.getMode(), equalTo(FINAL)); assertThat("One grouping in aggregation", agg.groupings().size(), equalTo(1)); att = as(agg.groupings().get(0), Attribute.class); assertThat(att.name(), equalTo("scalerank")); assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("One grouping in aggregation", agg.groupings().size(), equalTo(1)); @@ -3359,8 +3563,8 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGroupedAggregat // below the exchange (in data node) the aggregation is using doc-values assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); - assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); } /** @@ -3400,7 +3604,7 @@ public void testEnrichBeforeSpatialAggregationSupportsDocValues() { var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3416,16 +3620,16 @@ public void testEnrichBeforeSpatialAggregationSupportsDocValues() { limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); var enrichExec = as(agg.child(), EnrichExec.class); assertThat(enrichExec.mode(), equalTo(Enrich.Mode.ANY)); assertThat(enrichExec.concreteIndices(), equalTo(Map.of("", "airport_city_boundaries"))); assertThat(enrichExec.enrichFields().size(), equalTo(3)); - assertChildIsGeoPointExtract(enrichExec, true); + assertChildIsGeoPointExtract(enrichExec, FieldExtractPreference.DOC_VALUES); } /** @@ -3725,7 +3929,7 @@ public void testPushDownSpatialRelatesStringToSourceAndUseDocValuesForCentroid() assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), false); + assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); var fAgg = as(fragment.fragment(), Aggregate.class); @@ -3738,15 +3942,15 @@ public void testPushDownSpatialRelatesStringToSourceAndUseDocValuesForCentroid() agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), false); + assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), true); + assertAggregation(agg, "centroid", SpatialCentroid.class, test.locationType(), FieldExtractPreference.DOC_VALUES); if (test.canPushToSource) { - var source = assertChildIsExtractedAsDocValues(agg, true, test.locationType()); + var source = assertChildIsExtractedAs(agg, FieldExtractPreference.DOC_VALUES, test.locationType()); var condition = as(source.query(), SpatialRelatesQuery.ShapeQueryBuilder.class); assertThat("Geometry field name: " + test.predicate(), condition.fieldName(), equalTo("location")); assertThat("Spatial relationship: " + test.predicate(), condition.relation(), equalTo(test.relationship())); @@ -3812,6 +4016,7 @@ public void testPushSpatialIntersectsStringToSourceAndUseDocValuesForCentroid() for (boolean isIndexed : new boolean[] { true, false }) { for (boolean useDocValues : new boolean[] { true, false }) { + var fieldExtractPreference = useDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE; var testData = useDocValues ? (isIndexed ? airports : airportsNotIndexed) : (isIndexed ? airportsNoDocValues : airportsNotIndexedNorDocValues); @@ -3821,7 +4026,7 @@ public void testPushSpatialIntersectsStringToSourceAndUseDocValuesForCentroid() assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3835,15 +4040,15 @@ public void testPushSpatialIntersectsStringToSourceAndUseDocValuesForCentroid() agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, useDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, fieldExtractPreference); if (isIndexed) { - var source = assertChildIsGeoPointExtract(agg, useDocValues); + var source = assertChildIsGeoPointExtract(agg, fieldExtractPreference); // Query is pushed to lucene if field is indexed (and does not require doc-values or isAggregatable) var condition = as(source.query(), SpatialRelatesQuery.ShapeQueryBuilder.class); assertThat("Geometry field name", condition.fieldName(), equalTo("location")); @@ -3934,7 +4139,7 @@ AND ST_INTERSECTS(TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))"), l assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -3950,14 +4155,14 @@ AND ST_INTERSECTS(TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))"), l agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, true); - var source = assertChildIsGeoPointExtract(agg, true); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + var source = assertChildIsGeoPointExtract(agg, FieldExtractPreference.DOC_VALUES); var booleanQuery = as(source.query(), BoolQueryBuilder.class); assertThat("Expected boolean query of three predicates", booleanQuery.must().size(), equalTo(3)); var condition = as(booleanQuery.must().get(1), SpatialRelatesQuery.ShapeQueryBuilder.class); @@ -4008,8 +4213,8 @@ public void testIntersectsOnTwoPointFieldAndBothCentroidUsesDocValues() { assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -4023,15 +4228,15 @@ public void testIntersectsOnTwoPointFieldAndBothCentroidUsesDocValues() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, true); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var filterExec = as(agg.child(), FilterExec.class); var extract = as(filterExec.child(), FieldExtractExec.class); assertFieldExtractionWithDocValues(extract, GEO_POINT, "location"); @@ -4056,7 +4261,7 @@ public void testIntersectsOnTwoPointFieldAndOneCentroidUsesDocValues() { // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); var aggFieldName = findSingleAggregation(agg, "location", "city_location"); - assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -4070,13 +4275,13 @@ public void testIntersectsOnTwoPointFieldAndOneCentroidUsesDocValues() { agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, true); + assertAggregation(agg, aggFieldName, SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); var filterExec = as(agg.child(), FilterExec.class); var extract = as(filterExec.child(), FieldExtractExec.class); assertFieldExtractionWithDocValues(extract, GEO_POINT, aggFieldName); @@ -4098,8 +4303,8 @@ AND ST_INTERSECTS(city_location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 1 assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); // Before optimization the aggregation does not use doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); var exchange = as(agg.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); @@ -4115,15 +4320,15 @@ AND ST_INTERSECTS(city_location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 1 agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, false); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, false); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.NONE); exchange = as(agg.child(), ExchangeExec.class); agg = as(exchange.child(), AggregateExec.class); assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); // below the exchange (in data node) the aggregation is using doc-values assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, true); - assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, true); + assertAggregation(agg, "location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); + assertAggregation(agg, "city_location", SpatialCentroid.class, GEO_POINT, FieldExtractPreference.DOC_VALUES); var extract = as(agg.child(), FieldExtractExec.class); assertFieldExtractionWithDocValues(extract, GEO_POINT, "location", "city_location"); var source = source(extract.child()); @@ -4687,7 +4892,7 @@ public void testPushSpatialDistanceEvalWithSimpleStatsToSource() { var evalExec = as(aggExec2.child(), EvalExec.class); var stDistance = as(evalExec.fields().get(0).child(), StDistance.class); assertThat("Expect distance function to expect doc-values", stDistance.leftDocValues(), is(false)); - var source = assertChildIsGeoPointExtract(evalExec, false); + var source = assertChildIsGeoPointExtract(evalExec, FieldExtractPreference.NONE); // No sort is pushed down assertThat(source.limit(), nullValue()); @@ -4802,7 +5007,7 @@ public void testPushSpatialDistanceEvalWithStatsToSource() { var evalExec = as(aggExec2.child(), EvalExec.class); var stDistance = as(evalExec.fields().get(0).child(), StDistance.class); assertThat("Expect distance function to expect doc-values", stDistance.leftDocValues(), is(true)); - var source = assertChildIsGeoPointExtract(evalExec, true); + var source = assertChildIsGeoPointExtract(evalExec, FieldExtractPreference.DOC_VALUES); // No sort is pushed down assertThat(source.limit(), nullValue()); @@ -7111,17 +7316,35 @@ private static void assertFilterCondition( assertThat("Expected filter value", value.value(), equalTo(expected)); } - private EsQueryExec assertChildIsGeoPointExtract(UnaryExec parent, boolean useDocValues) { - return assertChildIsExtractedAsDocValues(parent, useDocValues, GEO_POINT); + private EsQueryExec assertChildIsGeoPointExtract(UnaryExec parent, FieldExtractPreference fieldExtractPreference) { + return assertChildIsExtractedAs(parent, fieldExtractPreference, GEO_POINT); } - private EsQueryExec assertChildIsExtractedAsDocValues(UnaryExec parent, boolean useDocValues, DataType dataType) { + private static EsQueryExec assertChildIsExtractedAs( + UnaryExec parent, + FieldExtractPreference fieldExtractPreference, + DataType dataType + ) { var extract = as(parent.child(), FieldExtractExec.class); + switch (fieldExtractPreference) { + case NONE -> { + assertThat(extract.docValuesAttributes(), is(empty())); + assertThat(extract.boundsAttributes(), is(empty())); + } + case DOC_VALUES -> { + assertThat(extract.docValuesAttributes(), is(not(empty()))); + assertThat(extract.boundsAttributes(), is(empty())); + } + case EXTRACT_SPATIAL_BOUNDS -> { + assertThat(extract.docValuesAttributes(), is(empty())); + assertThat(extract.boundsAttributes(), is(not(empty()))); + } + } assertTrue( - "Expect field attribute to be extracted as " + (useDocValues ? "doc-values" : "source"), + "Expect field attribute to be extracted as " + fieldExtractPreference, extract.attributesToExtract() .stream() - .allMatch(attr -> extract.hasDocValuesAttribute(attr) == useDocValues && attr.dataType() == dataType) + .allMatch(attr -> extract.fieldExtractPreference(attr) == fieldExtractPreference && attr.dataType() == dataType) ); return source(extract.child()); } @@ -7131,17 +7354,24 @@ private static void assertAggregation( String aliasName, Class aggClass, DataType fieldType, - boolean useDocValues + FieldExtractPreference fieldExtractPreference + ) { + assertAggregation(plan, aliasName, "Aggregation with fieldExtractPreference", aggClass, fieldType, fieldExtractPreference); + } + + private static void assertAggregation( + PhysicalPlan plan, + String aliasName, + String reason, + Class aggClass, + DataType fieldType, + FieldExtractPreference fieldExtractPreference ) { var aggFunc = assertAggregation(plan, aliasName, aggClass); var aggField = as(aggFunc.field(), Attribute.class); var spatialAgg = as(aggFunc, SpatialAggregateFunction.class); - assertThat( - "Expected spatial aggregation to use doc-values", - spatialAgg.fieldExtractPreference(), - equalTo(useDocValues ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE) - ); - assertThat("", aggField.dataType(), equalTo(fieldType)); + assertThat(spatialAgg.fieldExtractPreference(), equalTo(fieldExtractPreference)); + assertThat(reason, aggField.dataType(), equalTo(fieldType)); } private static AggregateFunction assertAggregation(PhysicalPlan plan, String aliasName, Class aggClass) { @@ -7182,13 +7412,14 @@ private static QueryBuilder findQueryBuilder(BoolQueryBuilder booleanQuery, Stri } private void assertFieldExtractionWithDocValues(FieldExtractExec extract, DataType dataType, String... fieldNames) { + var docValuesAttributes = extract.docValuesAttributes(); extract.attributesToExtract().forEach(attr -> { String name = attr.name(); if (asList(fieldNames).contains(name)) { - assertThat("Expected field '" + name + "' to use doc-values", extract.hasDocValuesAttribute(attr), equalTo(true)); + assertThat("Expected field '" + name + "' to use doc-values", docValuesAttributes.contains(attr), equalTo(true)); assertThat("Expected field '" + name + "' to have data type " + dataType, attr.dataType(), equalTo(dataType)); } else { - assertThat("Expected field '" + name + "' to NOT use doc-values", extract.hasDocValuesAttribute(attr), equalTo(false)); + assertThat("Expected field '" + name + "' to NOT use doc-values", docValuesAttributes.contains(attr), equalTo(false)); } }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index e91fc6e49312d..78512636b57e9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -86,7 +86,10 @@ public PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fieldExt for (Attribute attr : fieldExtractExec.attributesToExtract()) { layout.append(attr); op = op.with( - new TestFieldExtractOperatorFactory(attr, PlannerUtils.extractPreference(fieldExtractExec.hasDocValuesAttribute(attr))), + new TestFieldExtractOperatorFactory( + attr, + PlannerUtils.extractPreference(fieldExtractExec.docValuesAttributes().contains(attr)) + ), layout.build() ); } diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java index 23505eda493af..67d25556a2aa7 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java @@ -298,6 +298,17 @@ public List parseStoredValues(List storedValues) { protected Function, List> getFormatter(String format) { return geoFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } public static class TypeParser implements Mapper.TypeParser { diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java index e5d5354327f5a..2d586ac8eb86a 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java @@ -184,6 +184,16 @@ public String typeName() { protected Function, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + return true; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.CARTESIAN; + } } private final Builder builder;