diff --git a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java index 1616d2727bf8a..506918b12fe96 100644 --- a/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java +++ b/modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java @@ -46,6 +46,7 @@ import org.elasticsearch.legacygeo.builders.ShapeBuilder; import org.elasticsearch.legacygeo.parsers.ShapeParser; import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import org.locationtech.spatial4j.shape.Point; @@ -401,7 +402,6 @@ public void parse( } public static final class GeoShapeFieldType extends AbstractShapeGeometryFieldType> implements GeoShapeQueryable { - private String tree = Defaults.TREE; private SpatialStrategy strategy = Defaults.STRATEGY; private boolean pointsOnly = Defaults.POINTS_ONLY; @@ -530,6 +530,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) { protected Function>, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } private final IndexVersion indexCreatedVersion; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java index 02a3ae11524e3..4b0542f7f7b03 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java @@ -8,9 +8,18 @@ */ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.lucene.spatial.GeometryDocValueReader; +import java.io.IOException; +import java.nio.ByteOrder; import java.util.Map; import java.util.function.Function; @@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) { // we don't support null value fors shapes return nullValue; } + + @Override + public BlockLoader blockLoader(BlockLoaderContext blContext) { + return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported() + ? new BoundsBlockLoader(name(), coordinateEncoder()) + : blockLoaderFromSource(blContext); + } + + protected abstract boolean isBoundsExtractionSupported(); + + protected abstract CoordinateEncoder coordinateEncoder(); + + // Visible for testing + static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader { + private final String fieldName; + private final CoordinateEncoder encoder; + + BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) { + this.fieldName = fieldName; + this.encoder = encoder; + } + + @Override + public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException { + return new BlockLoader.AllReader() { + @Override + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + try (var builder = factory.bytesRefs(docs.count())) { + for (int i = 0; i < docs.count(); i++) { + read(binaryDocValues, docs.get(i), reader, builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + var binaryDocValues = context.reader().getBinaryDocValues(fieldName); + var reader = new GeometryDocValueReader(); + read(binaryDocValues, docId, reader, (BytesRefBuilder) builder); + } + + private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder) + throws IOException { + binaryDocValues.advanceExact(doc); + reader.reset(binaryDocValues.binaryValue()); + var extent = reader.getExtent(); + // This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to + // preserve its properties as a WKB shape, only to convert it back to ints when we compute the + // aggregation. An obvious optimization would be to avoid this back-and-forth conversion. + var rectangle = new Rectangle( + encoder.decodeX(extent.minX()), + encoder.decodeX(extent.maxX()), + encoder.decodeY(extent.maxY()), + encoder.decodeY(extent.minY()) + ); + builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN))); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + }; + } + + @Override + public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + } } protected Explicit coerce; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 35722be20b9be..20d23ab97ac26 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -676,11 +676,27 @@ public enum FieldExtractPreference { /** * Load the field from doc-values into a BlockLoader supporting doc-values. */ - DOC_VALUES, + DOC_VALUES(true), + /** Loads the field by extracting the extent from the binary encoded representation */ + EXTRACT_SPATIAL_BOUNDS(false), /** * No preference. Leave the choice of where to load the field from up to the FieldType. */ - NONE + NONE(false); + + private final boolean isColumnReader; + + FieldExtractPreference(boolean isColumnReader) { + this.isColumnReader = isColumnReader; + } + + public static FieldExtractPreference forColumnReader(boolean columnReader) { + return columnReader ? DOC_VALUES : NONE; + } + + public boolean isColumnReader() { + return isColumnReader; + } } /** diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java new file mode 100644 index 0000000000000..8ca412405a062 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapperTests.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.geo.Orientation; +import org.elasticsearch.geo.GeometryTestUtils; +import org.elasticsearch.geo.ShapeTestUtils; +import org.elasticsearch.geometry.Geometry; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; +import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField; +import org.elasticsearch.lucene.spatial.CartesianShapeIndexer; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; + +import java.io.IOException; +import java.util.Optional; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.IntStream; + +public class AbstractShapeGeometryFieldMapperTests extends ESTestCase { + // TODO handle geo as well, this is actually bugged, since extracting the result ignores minneg etc. + public void testCartesianBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.CARTESIAN, + () -> ShapeTestUtils.randomGeometryWithoutCircle(0, false), + field -> new CartesianShapeIndexer(field), + SpatialEnvelopeVisitor::visitCartesian + ); + } + + // TODO when we turn this optimization on for geo, handle this as well. + public void ignoreTestGeoBoundsBlockLoader() throws IOException { + testBoundsBlockLoaderAux( + CoordinateEncoder.GEO, + () -> GeometryTestUtils.randomGeometryWithoutCircle(0, false), + field -> new GeoShapeIndexer(Orientation.RIGHT, field), + g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP) + ); + } + + private void testBoundsBlockLoaderAux( + CoordinateEncoder encoder, + Supplier generator, + Function indexerFactory, + Function> visitor + ) throws IOException { + var geometries = IntStream.range(0, 20).mapToObj(i -> ShapeTestUtils.randomGeometryWithoutCircle(0, false)).toList(); + var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder); + try (Directory directory = newDirectory()) { + try (var iw = new RandomIndexWriter(random(), directory)) { + for (Geometry geometry : geometries) { + var shape = new BinaryShapeDocValuesField("field", encoder); + shape.add(indexerFactory.apply("field").indexShape(geometry), geometry); + var doc = new Document(); + doc.add(shape); + iw.addDocument(doc); + } + } + var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + LeafReaderContext ctx = reader.leaves().get(0); + TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices)); + for (int i = 0; i < indices.length; i++) { + var idx = indices[i]; + Rectangle r = visitor.apply(geometries.get(idx)).get(); + assertThat( + Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometries.get(idx)), + (BytesRef) block.get(i), + WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder)) + ); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 32cbcfc2441a1..9675638b2b394 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -1355,6 +1355,6 @@ private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSo MapperService mapper = syntheticSource ? createSytheticSourceMapperService(mapping) : createMapperService(mapping); BlockReaderSupport blockReaderSupport = getSupportedReaders(mapper, "field.sub"); var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); + testBlockLoader(MappedFieldType.FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 2da2c5a08c177..f6b2420ce0b03 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.fielddata.LeafFieldData; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.index.translog.Translog; @@ -87,8 +88,6 @@ import java.util.stream.IntStream; import static java.util.stream.Collectors.toList; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES; -import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE; import static org.elasticsearch.test.MapMatcher.assertMap; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -1420,7 +1419,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str this(columnAtATimeReader, true, mapper, loaderFieldName); } - private BlockLoader getBlockLoader(boolean columnReader) { + private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) { SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null); return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override @@ -1434,8 +1433,8 @@ public IndexSettings indexSettings() { } @Override - public MappedFieldType.FieldExtractPreference fieldExtractPreference() { - return columnReader ? DOC_VALUES : NONE; + public FieldExtractPreference fieldExtractPreference() { + return fieldExtractPreference; } @Override @@ -1484,16 +1483,20 @@ private void testBlockLoader(boolean syntheticSource, boolean columnReader) thro ); } var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP); - testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader); + testBlockLoader(FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader); } protected final void testBlockLoader( - boolean columnReader, + FieldExtractPreference fieldExtractPreference, SyntheticSourceExample example, BlockReaderSupport blockReaderSupport, SourceLoader sourceLoader ) throws IOException { - BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader); + var columnReader = switch (fieldExtractPreference) { + case DOC_VALUES -> true; + case NONE, EXTRACT_SPATIAL_BOUNDS -> false; + }; + BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference); Function valuesConvert = loadBlockExpected(blockReaderSupport, columnReader); if (valuesConvert == null) { assertNull(loader); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java similarity index 69% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java index 48fbc9c8e0378..dd3fb1e8924c5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/RectangleMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/RectangleMatcher.java @@ -5,10 +5,10 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; -import org.elasticsearch.compute.aggregation.spatial.PointType; import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import org.hamcrest.Description; import org.hamcrest.Matchers; import org.hamcrest.TypeSafeMatcher; @@ -19,23 +19,31 @@ */ public class RectangleMatcher extends TypeSafeMatcher { private final Rectangle r; - private final PointType pointType; + private final CoordinateEncoder coordinateEncoder; private final double error; - public static TypeSafeMatcher closeTo(Rectangle r, double error, PointType pointType) { - return new RectangleMatcher(r, error, pointType); + public static TypeSafeMatcher closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { + return new RectangleMatcher(r, error, coordinateEncoder); } - private RectangleMatcher(Rectangle r, double error, PointType pointType) { + private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) { this.r = r; - this.pointType = pointType; + this.coordinateEncoder = coordinateEncoder; this.error = error; } + /** + * Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints. + */ + public static TypeSafeMatcher closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) { + var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY()); + return closeTo(normalized, v, encoder); + } + @Override protected boolean matchesSafely(Rectangle other) { // For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe. - boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX(); + boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX(); boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX()) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX())) || (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java similarity index 69% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java rename to test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java index 535bb820458cd..8fd7fbcfdd725 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/WellKnownBinaryBytesRefMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/WellKnownBinaryBytesRefMatcher.java @@ -1,3 +1,12 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License @@ -5,7 +14,7 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.expression; +package org.elasticsearch.test.hamcrest; import org.apache.lucene.util.BytesRef; import org.elasticsearch.geometry.Geometry; @@ -23,6 +32,10 @@ public WellKnownBinaryBytesRefMatcher(Matcher matcher) { this.matcher = matcher; } + public static Matcher encodes(TypeSafeMatcher matcher) { + return new WellKnownBinaryBytesRefMatcher(matcher); + } + @Override public boolean matchesSafely(BytesRef bytesRef) { return matcher.matches(fromBytesRef(bytesRef)); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java index 5395ca0b85163..fb45f869c4133 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/PointType.java @@ -7,12 +7,11 @@ package org.elasticsearch.compute.aggregation.spatial; -import org.apache.lucene.geo.GeoEncodingUtils; -import org.apache.lucene.geo.XYEncodingUtils; import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.util.Optional; @@ -23,26 +22,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitGeo(geo, WrapLongitude.WRAP); } - @Override - public double decodeX(int encoded) { - return GeoEncodingUtils.decodeLongitude(encoded); - } - - @Override - public double decodeY(int encoded) { - return GeoEncodingUtils.decodeLatitude(encoded); - } - - @Override - public int encodeX(double decoded) { - return GeoEncodingUtils.encodeLongitude(decoded); - } - - @Override - public int encodeY(double decoded) { - return GeoEncodingUtils.encodeLatitude(decoded); - } - // Geo encodes the longitude in the lower 32 bits and the latitude in the upper 32 bits. @Override public int extractX(long encoded) { @@ -53,6 +32,11 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.GEO; + } }, CARTESIAN { @Override @@ -60,26 +44,6 @@ public Optional computeEnvelope(Geometry geo) { return SpatialEnvelopeVisitor.visitCartesian(geo); } - @Override - public double decodeX(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public double decodeY(int encoded) { - return XYEncodingUtils.decode(encoded); - } - - @Override - public int encodeX(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - - @Override - public int encodeY(double decoded) { - return XYEncodingUtils.encode((float) decoded); - } - @Override public int extractX(long encoded) { return SpatialAggregationUtils.extractFirst(encoded); @@ -89,19 +53,18 @@ public int extractX(long encoded) { public int extractY(long encoded) { return SpatialAggregationUtils.extractSecond(encoded); } + + @Override + public CoordinateEncoder encoder() { + return CoordinateEncoder.CARTESIAN; + } }; public abstract Optional computeEnvelope(Geometry geo); - public abstract double decodeX(int encoded); - - public abstract double decodeY(int encoded); - - public abstract int encodeX(double decoded); - - public abstract int encodeY(double decoded); - public abstract int extractX(long encoded); public abstract int extractY(long encoded); + + public abstract CoordinateEncoder encoder(); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java index 9ce0ccdda0ff5..cb765e4d6757e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingState.java @@ -72,10 +72,10 @@ public void add(int groupId, Geometry geometry) { .ifPresent( r -> add( groupId, - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -122,10 +122,10 @@ public Block toBlock(IntVector selected, DriverContext driverContext) { new BytesRef( WellKnownBinary.toWKB( new Rectangle( - pointType.decodeX(minXs.get(si)), - pointType.decodeX(maxXs.get(si)), - pointType.decodeY(maxYs.get(si)), - pointType.decodeY(minYs.get(si)) + pointType.encoder().decodeX(minXs.get(si)), + pointType.encoder().decodeX(maxXs.get(si)), + pointType.encoder().decodeY(maxYs.get(si)), + pointType.encoder().decodeY(minYs.get(si)) ), ByteOrder.LITTLE_ENDIAN ) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java index 3dd7a6d4acde2..41bc50abcf6bc 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentGroupingStateWrappedLongitudeState.java @@ -91,8 +91,8 @@ public void add(int groupId, Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java index 0eea9b79f73ea..3dc150d1702a2 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentState.java @@ -14,6 +14,7 @@ import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.lucene.spatial.CoordinateEncoder; import java.nio.ByteOrder; @@ -46,10 +47,10 @@ public void add(Geometry geo) { pointType.computeEnvelope(geo) .ifPresent( r -> add( - pointType.encodeX(r.getMinX()), - pointType.encodeX(r.getMaxX()), - pointType.encodeY(r.getMaxY()), - pointType.encodeY(r.getMinY()) + pointType.encoder().encodeX(r.getMinX()), + pointType.encoder().encodeX(r.getMaxX()), + pointType.encoder().encodeY(r.getMaxY()), + pointType.encoder().encodeY(r.getMinY()) ) ); } @@ -74,8 +75,9 @@ public Block toBlock(DriverContext driverContext) { } private byte[] toWKB() { + CoordinateEncoder encoder = pointType.encoder(); return WellKnownBinary.toWKB( - new Rectangle(pointType.decodeX(minX), pointType.decodeX(maxX), pointType.decodeY(maxY), pointType.decodeY(minY)), + new Rectangle(encoder.decodeX(minX), encoder.decodeX(maxX), encoder.decodeY(maxY), encoder.decodeY(minY)), ByteOrder.LITTLE_ENDIAN ); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java index 99200d2ed99f5..0d6163636fcde 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/spatial/SpatialExtentStateWrappedLongitudeState.java @@ -53,8 +53,8 @@ public void add(Geometry geo) { SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMinPosX()), SpatialAggregationUtils.encodeNegativeLongitude(geoPointVisitor.getMaxNegX()), SpatialAggregationUtils.encodePositiveLongitude(geoPointVisitor.getMaxPosX()), - POINT_TYPE.encodeY(geoPointVisitor.getMaxY()), - POINT_TYPE.encodeY(geoPointVisitor.getMinY()) + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMaxY()), + POINT_TYPE.encoder().encodeY(geoPointVisitor.getMinY()) ); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 18ce9d7e3e057..77dde5e875080 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -390,7 +390,7 @@ public static LogicalPlan localSource(BlockFactory blockFactory, List } public static T as(Object node, Class type) { - Assert.assertThat(node, instanceOf(type)); + Assert.assertThat("Unexpected type: " + node.getClass(), node, instanceOf(type)); return type.cast(node); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java index 84915d024ea82..54c05cf1bad52 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java @@ -103,11 +103,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (type) { case DataType.GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialCentroidCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; default -> throw EsqlIllegalArgumentException.illegalDataType(type); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java index 5cc1701faf13a..34e5c9d68fc86 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtent.java @@ -104,11 +104,11 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { return switch (field().dataType()) { case DataType.GEO_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentGeoPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) { case DOC_VALUES -> new SpatialExtentCartesianPointDocValuesAggregatorFunctionSupplier(inputChannels); - case NONE -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); + case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier(inputChannels); }; // Shapes don't differentiate between source and doc values. case DataType.GEO_SHAPE -> new SpatialExtentGeoShapeAggregatorFunctionSupplier(inputChannels); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index 1eaade043658b..eb148952e0a26 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushTopNToSource; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.ReplaceSourceAttributes; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialDocValuesExtraction; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialShapeBoundExtraction; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; import org.elasticsearch.xpack.esql.rule.Rule; @@ -73,7 +74,13 @@ protected List> rules(boolean optimizeForEsSource) { var pushdown = new Batch("Push to ES", esSourceRules.toArray(Rule[]::new)); // add the field extraction in just one pass // add it at the end after all the other rules have ran - var fieldExtraction = new Batch<>("Field extraction", Limiter.ONCE, new InsertFieldExtraction(), new SpatialDocValuesExtraction()); + var fieldExtraction = new Batch<>( + "Field extraction", + Limiter.ONCE, + new InsertFieldExtraction(), + new SpatialDocValuesExtraction(), + new SpatialShapeBoundExtraction() + ); return asList(pushdown, fieldExtraction); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java new file mode 100644 index 0000000000000..6949602aed0a4 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialShapeBoundExtraction.java @@ -0,0 +1,100 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; + +import org.elasticsearch.lucene.spatial.GeometryDocValueWriter; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialExtent; +import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; +import org.elasticsearch.xpack.esql.optimizer.PhysicalOptimizerRules.ParameterizedOptimizerRule; +import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; +import org.elasticsearch.xpack.esql.plan.physical.EvalExec; +import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; +import org.elasticsearch.xpack.esql.plan.physical.FilterExec; +import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; + +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * This rule is responsible for marking spatial shape fields whose extent can be extracted from the binary representation encoded by + * {@link GeometryDocValueWriter}. + * This is a very specific optimization that is only used in the context of ST_EXTENT_AGG aggregations. + * Normally spatial fields are extracted from source values because this maintains original precision, but is very slow. + * Simply extracting the spatial bounds from the binary encoding loses both precision and geometry topological information for shapes. + * For this reason we only consider extract the extent under very specific conditions: + *
    + *
  • The spatial data is of type GEO_SHAPE or CARTESIAN_SHAPE.
  • + *
  • The spatial data is consumed directly by an ST_EXTENT_AGG.
  • + *
  • The spatial is not consumed by any other operation. While is this is stricter than necessary, + * it is a good enough approximation for now.
  • + *
+ */ +public class SpatialShapeBoundExtraction extends ParameterizedOptimizerRule { + @Override + protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerContext ctx) { + var foundAttributes = new HashSet(); + + return aggregate.transformDown(UnaryExec.class, exec -> { + switch (exec) { + case AggregateExec agg -> { + List aggregateFunctions = agg.aggregates() + .stream() + .flatMap(e -> SpatialShapeBoundExtraction.extractAggregateFunction(e).stream()) + .toList(); + List spatialExtents = aggregateFunctions.stream() + .filter(SpatialExtent.class::isInstance) + .map(SpatialExtent.class::cast) + .toList(); + List nonSpatialExtents = aggregateFunctions.stream() + .filter(a -> a instanceof SpatialExtent == false) + .toList(); + // While we currently do not have any non-extent aggregations which apply to shapes, we might have them in the future. + Set fieldsAppearingInNonSpatialExtents = nonSpatialExtents.stream() + .flatMap(af -> af.references().stream()) + .filter(FieldAttribute.class::isInstance) + .map(f -> ((FieldAttribute) f).field()) + .collect(Collectors.toSet()); + spatialExtents.stream() + .map(SpatialExtent::field) + .filter(FieldAttribute.class::isInstance) + .map(FieldAttribute.class::cast) + .filter(f -> isShape(f.field().getDataType()) && fieldsAppearingInNonSpatialExtents.contains(f.field()) == false) + .forEach(foundAttributes::add); + } + case EvalExec evalExec -> foundAttributes.removeAll(evalExec.references()); + case FilterExec filterExec -> foundAttributes.removeAll(filterExec.condition().references()); + case FieldExtractExec fieldExtractExec -> { + foundAttributes.retainAll(fieldExtractExec.attributesToExtract()); + return fieldExtractExec.withBoundAttributes(foundAttributes); + } + default -> { // Do nothing + } + } + return exec; + }); + } + + private static boolean isShape(DataType dataType) { + return dataType == DataType.GEO_SHAPE || dataType == DataType.CARTESIAN_SHAPE; + } + + private static Optional extractAggregateFunction(NamedExpression expr) { + return expr instanceof Alias as && as.child() instanceof AggregateFunction af ? Optional.of(af) : Optional.empty(); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java index ec996c5c84064..0fddfb652afff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FieldExtractExec.java @@ -7,9 +7,11 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -31,9 +33,10 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { ); private final List attributesToExtract; - private final Attribute sourceAttribute; + private final @Nullable Attribute sourceAttribute; + /** - * Attributes that many be extracted as doc values even if that makes them + * Attributes that may be extracted as doc values even if that makes them * less accurate. This is mostly used for geo fields which lose a lot of * precision in their doc values, but in some cases doc values provides * enough precision to do the job. @@ -43,17 +46,32 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize { */ private final Set docValuesAttributes; + /** + * Attributes of a shape whose extent can be extracted directly from the encoded geometry. + *

+ * This is never serialized between nodes and only used locally. + *

+ */ + private final Set boundAttributes; + private List lazyOutput; public FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract) { - this(source, child, attributesToExtract, Set.of()); + this(source, child, attributesToExtract, Set.of(), Set.of()); } - private FieldExtractExec(Source source, PhysicalPlan child, List attributesToExtract, Set docValuesAttributes) { + private FieldExtractExec( + Source source, + PhysicalPlan child, + List attributesToExtract, + Set docValuesAttributes, + Set boundAttributes + ) { super(source, child); this.attributesToExtract = attributesToExtract; this.sourceAttribute = extractSourceAttributesFrom(child); this.docValuesAttributes = docValuesAttributes; + this.boundAttributes = boundAttributes; } private FieldExtractExec(StreamInput in) throws IOException { @@ -78,7 +96,7 @@ public String getWriteableName() { return ENTRY.name; } - public static Attribute extractSourceAttributesFrom(PhysicalPlan plan) { + public static @Nullable Attribute extractSourceAttributesFrom(PhysicalPlan plan) { for (Attribute attribute : plan.outputSet()) { if (EsQueryExec.isSourceAttribute(attribute)) { return attribute; @@ -99,18 +117,22 @@ protected NodeInfo info() { @Override public UnaryExec replaceChild(PhysicalPlan newChild) { - return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes, boundAttributes); } public FieldExtractExec withDocValuesAttributes(Set docValuesAttributes) { - return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes); + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); + } + + public FieldExtractExec withBoundAttributes(Set boundAttributes) { + return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundAttributes); } public List attributesToExtract() { return attributesToExtract; } - public Attribute sourceAttribute() { + public @Nullable Attribute sourceAttribute() { return sourceAttribute; } @@ -118,8 +140,8 @@ public Set docValuesAttributes() { return docValuesAttributes; } - public boolean hasDocValuesAttribute(Attribute attr) { - return docValuesAttributes.contains(attr); + public Set boundAttributes() { + return boundAttributes; } @Override @@ -142,7 +164,7 @@ public PhysicalPlan estimateRowSize(State state) { @Override public int hashCode() { - return Objects.hash(attributesToExtract, docValuesAttributes, child()); + return Objects.hash(attributesToExtract, docValuesAttributes, boundAttributes, child()); } @Override @@ -158,12 +180,18 @@ public boolean equals(Object obj) { FieldExtractExec other = (FieldExtractExec) obj; return Objects.equals(attributesToExtract, other.attributesToExtract) && Objects.equals(docValuesAttributes, other.docValuesAttributes) + && Objects.equals(boundAttributes, other.boundAttributes) && Objects.equals(child(), other.child()); } @Override public String nodeString() { - return nodeName() + NodeUtils.limitedToString(attributesToExtract) + "<" + NodeUtils.limitedToString(docValuesAttributes) + ">"; + return Strings.format( + "%s<%s,%s>", + nodeName() + NodeUtils.limitedToString(attributesToExtract), + docValuesAttributes, + boundAttributes + ); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java index a1faa537ba052..225e10f99c853 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialExtentTests.java @@ -17,11 +17,11 @@ import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor; import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor.WrapLongitude; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.test.hamcrest.RectangleMatcher; +import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.RectangleMatcher; -import org.elasticsearch.xpack.esql.expression.WellKnownBinaryBytesRefMatcher; import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; import org.elasticsearch.xpack.esql.expression.function.FunctionName; import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; @@ -82,20 +82,7 @@ private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier List.of(fieldTypedData), "SpatialExtent[field=Attribute[channel=0]]", expectedType, - new WellKnownBinaryBytesRefMatcher<>( - RectangleMatcher.closeTo( - new Rectangle( - // Since we use integers locally which are later decoded to doubles, all computation is effectively done using - // floats, not doubles. - (float) result.getMinX(), - (float) result.getMaxX(), - (float) result.getMaxY(), - (float) result.getMinY() - ), - 1e-3, - pointType - ) - ) + new WellKnownBinaryBytesRefMatcher<>(RectangleMatcher.closeToFloat(result, 1e-3, pointType.encoder())) ); }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 9f6ef89008a24..c22fda9ad690d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -164,12 +164,14 @@ import static org.elasticsearch.xpack.esql.core.expression.function.scalar.FunctionTestUtils.l; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; import static org.elasticsearch.xpack.esql.parser.ExpressionBuilder.MAX_EXPRESSION_DEPTH; import static org.elasticsearch.xpack.esql.parser.LogicalPlanBuilder.MAX_QUERY_DEPTH; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasItem; @@ -199,6 +201,7 @@ public class PhysicalPlanOptimizerTests extends ESTestCase { private TestDataSource testData; private int allFieldRowSize; // TODO: Move this into testDataSource so tests that load other indexes can also assert on this private TestDataSource airports; + private TestDataSource airportsCityBoundaries; private TestDataSource airportsNoDocValues; // Test when spatial field is indexed but has no doc values private TestDataSource airportsNotIndexed; // Test when spatial field has doc values but is not indexed private TestDataSource airportsNotIndexedNorDocValues; // Test when spatial field is neither indexed nor has doc-values @@ -250,6 +253,13 @@ public void init() { // Some tests use data from the airports and countries indexes, so we load that here, and use it in the plan(q, airports) function. this.airports = makeTestDataSource("airports", "mapping-airports.json", functionRegistry, enrichResolution); + this.airportsCityBoundaries = makeTestDataSource( + "airports_city_boundaries", + "mapping-airport_city_boundaries.json", + functionRegistry, + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.DOC_VALUES, "city_boundary") + ); this.airportsNoDocValues = makeTestDataSource( "airports-no-doc-values", "mapping-airports_no_doc_values.json", @@ -2908,24 +2918,23 @@ public void testSpatialTypesAndStatsExtentUseDocValues() { * Before local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ - * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] - * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],FINAL,...] + * \_ExchangeExec[[...]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[..]] + * \_EsRelation[airports-no-doc-values][abbrev{f}#8, city{f}#14, city_location{f}#15, count..]]] * * After local optimizations: * * LimitExec[1000[INTEGER]] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, - * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] - * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] - * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ - * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] - * \_FieldExtractExec[location{f}#48][location{f}#48] - * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ - * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],FINAL,[...]] + * \_ExchangeExec[[...]] + * \_AggregateExec[[],[SPATIALEXTENT(location{f}#12,true[BOOLEAN]) AS extent, SPATIALCENTROID(location{f}#12,true[BOOLEAN]) AS cen + * troid],INITIAL,...] + * \_FilterExec[ISNOTNULL(location{f}#12)] + * \_FieldExtractExec[location{f}#12] + * \_EsQueryExec[airports-no-doc-values], indexMode[standard], query[][_doc{f}#59], limit[], sort[] estimatedRowSize[25] * * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] *

@@ -2965,6 +2974,156 @@ public void testSpatialTypesAndStatsExtentAndCentroidUseDocValues() { } } + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],null] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[ + * Aggregate[STANDARD,[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent]] + * \_EsRelation[airports][abbrev{f}#44, city{f}#50, city_location{f}#51, coun..]]] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] + *

+ * Also note that the type converting function is removed when it does not actually convert the type, + * ensuring that ReferenceAttributes are not created for the same field, and the optimization can still work. + */ + public void testSpatialTypesAndStatsExtentOfShapesUsesBinaryExtraction() { + for (String query : new String[] { "from airports_city_boundaries | stats extent = st_extent_agg(city_boundary)", }) { + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use extent extraction + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses extent extraction + System.out.println(plan); + var optimized = optimizedPlan(plan, testData.stats); + System.out.println(optimized); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not using doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is using a specific + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertChildIsExtractedAsBounds(agg, GEO_SHAPE); + } + } + + // This test verifies that the aggregation does not use spatial bounds extraction when the shape appears in an eval or filter. + public void testSpatialTypesAndStatsExtentOfShapesNegativeCases() { + for (String query : new String[] { """ + FROM airports_city_boundaries | \ + EVAL prefix = SUBSTRING(TO_STRING(city_boundary), 5) | \ + STATS extent = ST_EXTENT_AGG(city_boundary) BY prefix""", """ + FROM airports_city_boundaries \ + | WHERE STARTS_WITH(TO_STRING(city_boundary), "MULTIPOLYGON") \ + | STATS extent = ST_EXTENT_AGG(city_boundary)""", }) { + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + + var optimized = optimizedPlan(plan, testData.stats); + System.out.println(optimized); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var exec = agg.child() instanceof FieldExtractExec ? agg : as(agg.child(), UnaryExec.class); + assertChildIsExtractedAsDocValues(exec, withDocValues, GEO_SHAPE); + } + } + + /** + * Before local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALEXTENT(city_boundary{f}#13,true[BOOLEAN]) AS extent, SPATIALCENTROID(city_location{f}#12,true[BOOLEA + * N]) AS centroid],...] + * \_ExchangeExec[[..]] + * \_FragmentExec[filter=null, estimatedRowSize=0, reducer=[], fragment=[...]] + * \_EsRelation[airports_city_boundaries][abbrev{f}#8, airport{f}#9, city{f}#11, city_boundar..] + * + * After local optimizations: + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],FINAL,[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, + * maxPosX{r}#55, maxY{r}#56, minY{r}#57],21] + * \_ExchangeExec[[minNegX{r}#52, minPosX{r}#53, maxNegX{r}#54, maxPosX{r}#55, maxY{r}#56, minY{r}#57],true] + * \_AggregateExec[[],[SPATIALSTEXTENT(location{f}#48,true[BOOLEAN]) AS extent],INITIAL,[ + * minNegX{r}#73, minPosX{r}#74, maxNegX{rb#75, maxPosX{r}#76, maxY{r}#77, minY{r}#78],21] + * \_FieldExtractExec[location{f}#48][location{f}#48] + * \_EsQueryExec[airports], indexMode[standard], query[{"exists":{"field":"location","boost":1.0}}][ + * _doc{f}#79], limit[], sort[] estimatedRowSize[25] + * + */ + public void testMixedSpatialBoundsAndPointsExtracted() { + var query = """ + FROM airports_city_boundaries \ + | STATS extent = ST_EXTENT_AGG(city_boundary), centroid = ST_CENTROID_AGG(city_location)"""; + var withDocValues = false; + var testData = airportsCityBoundaries; + var plan = physicalPlan(query, testData); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + // Before optimization the aggregation does not use doc-values + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, false); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + as(fAgg.child(), EsRelation.class); + + // Now optimize the plan and assert the aggregation uses both doc-values and bounds extraction + System.out.println(plan); + var optimized = optimizedPlan(plan, testData.stats); + System.out.println(optimized); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not field-optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, withDocValues); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + // below the exchange (in data node) the aggregation is field optimized. + assertAggregation(agg, "extent", SpatialExtent.class, GEO_SHAPE, withDocValues); + var fieldExtractExec = as(agg.child(), FieldExtractExec.class); + assertThat(fieldExtractExec.boundAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_boundary"))); + assertThat(fieldExtractExec.docValuesAttributes().stream().map(a -> a.sourceText()).toList(), equalTo(List.of("city_location"))); + } + /** * This test does not have real index fields, and therefor asserts that doc-values field extraction does NOT occur. * Before local optimizations: @@ -6912,12 +7071,23 @@ private EsQueryExec assertChildIsGeoPointExtract(UnaryExec parent, boolean useDo } private EsQueryExec assertChildIsExtractedAsDocValues(UnaryExec parent, boolean useDocValues, DataType dataType) { + // TODO(gal) why is this OK To vacuously true? var extract = as(parent.child(), FieldExtractExec.class); + assertThat(extract.boundAttributes(), is(empty())); assertTrue( "Expect field attribute to be extracted as " + (useDocValues ? "doc-values" : "source"), extract.attributesToExtract() .stream() - .allMatch(attr -> extract.hasDocValuesAttribute(attr) == useDocValues && attr.dataType() == dataType) + .allMatch(attr -> extract.docValuesAttributes().contains(attr) == useDocValues && attr.dataType() == dataType) + ); + return source(extract.child()); + } + + private static EsQueryExec assertChildIsExtractedAsBounds(UnaryExec parent, DataType dataType) { + var extract = as(parent.child(), FieldExtractExec.class); + assertTrue( + "Expect field attribute to be extracted as bounds", + extract.attributesToExtract().stream().allMatch(attr -> extract.boundAttributes().contains(attr) && attr.dataType() == dataType) ); return source(extract.child()); } @@ -6978,13 +7148,14 @@ private static QueryBuilder findQueryBuilder(BoolQueryBuilder booleanQuery, Stri } private void assertFieldExtractionWithDocValues(FieldExtractExec extract, DataType dataType, String... fieldNames) { + var docValuesAttributes = extract.docValuesAttributes(); extract.attributesToExtract().forEach(attr -> { String name = attr.name(); if (asList(fieldNames).contains(name)) { - assertThat("Expected field '" + name + "' to use doc-values", extract.hasDocValuesAttribute(attr), equalTo(true)); + assertThat("Expected field '" + name + "' to use doc-values", docValuesAttributes.contains(attr), equalTo(true)); assertThat("Expected field '" + name + "' to have data type " + dataType, attr.dataType(), equalTo(dataType)); } else { - assertThat("Expected field '" + name + "' to NOT use doc-values", extract.hasDocValuesAttribute(attr), equalTo(false)); + assertThat("Expected field '" + name + "' to NOT use doc-values", docValuesAttributes.contains(attr), equalTo(false)); } }); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index e91fc6e49312d..78512636b57e9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -86,7 +86,10 @@ public PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fieldExt for (Attribute attr : fieldExtractExec.attributesToExtract()) { layout.append(attr); op = op.with( - new TestFieldExtractOperatorFactory(attr, PlannerUtils.extractPreference(fieldExtractExec.hasDocValuesAttribute(attr))), + new TestFieldExtractOperatorFactory( + attr, + PlannerUtils.extractPreference(fieldExtractExec.docValuesAttributes().contains(attr)) + ), layout.build() ); } diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java index 23505eda493af..224abd2002455 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/GeoShapeWithDocValuesFieldMapper.java @@ -208,7 +208,6 @@ public GeoShapeWithDocValuesFieldMapper build(MapperBuilderContext context) { } public static final class GeoShapeWithDocValuesFieldType extends AbstractShapeGeometryFieldType implements GeoShapeQueryable { - private final GeoFormatterFactory geoFormatterFactory; private final FieldValues scriptValues; @@ -298,6 +297,17 @@ public List parseStoredValues(List storedValues) { protected Function, List> getFormatter(String format) { return geoFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + // Extracting bounds for geo shapes is not implemented yet. + return false; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.GEO; + } } public static class TypeParser implements Mapper.TypeParser { diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java index e5d5354327f5a..2d586ac8eb86a 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/mapper/ShapeFieldMapper.java @@ -184,6 +184,16 @@ public String typeName() { protected Function, List> getFormatter(String format) { return GeometryFormatterFactory.getFormatter(format, Function.identity()); } + + @Override + protected boolean isBoundsExtractionSupported() { + return true; + } + + @Override + protected CoordinateEncoder coordinateEncoder() { + return CoordinateEncoder.CARTESIAN; + } } private final Builder builder;