Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ESQL: ST_EXTENT_AGG optimize envelope extraction from doc-values for cartesian_shape #118802

Merged
merged 31 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
9ce03ad
ESQL: ST_EXTENT_AGG binary extent optimization
GalLalouche Dec 4, 2024
a519779
ESQL: ST_EXTENT_AGG binary extent optimization
GalLalouche Dec 4, 2024
d0c329f
Update docs/changelog/118802.yaml
GalLalouche Dec 17, 2024
7d75dd9
Merge branch 'optimization/st_extent' of github.com:GalLalouche/elast…
GalLalouche Dec 17, 2024
9055219
Manually modify changelog
GalLalouche Dec 17, 2024
e61f7a8
Fix TODO
GalLalouche Dec 17, 2024
753c8d7
Small refactors
GalLalouche Dec 17, 2024
f736839
ESQL: ST_EXTENT_AGG binary extent optimization
GalLalouche Dec 4, 2024
b6ba18a
Update docs/changelog/118802.yaml
GalLalouche Dec 17, 2024
3f0b58f
Manually modify changelog
GalLalouche Dec 17, 2024
47966cf
Fix TODO
GalLalouche Dec 17, 2024
a8a2c95
Merge remote-tracking branch 'origin/main' into optimization/st_extent
craigtaverner Dec 18, 2024
4104176
Better changelog message
craigtaverner Dec 18, 2024
05116c0
Reverted remove line
craigtaverner Dec 18, 2024
e89600b
Merge branch 'optimization/st_extent' of github.com:GalLalouche/elast…
GalLalouche Dec 19, 2024
e2743ae
Merge branch 'main' into optimization/st_extent
craigtaverner Dec 19, 2024
72e3e34
Review fixes
GalLalouche Dec 19, 2024
d62e37d
Merge branch 'main' into optimization/st_extent
GalLalouche Dec 19, 2024
f823bc5
Merge branch 'main' into optimization/st_extent
craigtaverner Dec 19, 2024
71aadca
Merge branch 'optimization/st_extent' of github.com:GalLalouche/elast…
craigtaverner Dec 19, 2024
93e7129
Merge remote-tracking branch 'origin/main' into optimization/st_extent
craigtaverner Dec 19, 2024
48d8f94
Review fixes
GalLalouche Dec 19, 2024
58a491a
Fix test docs, revert MapperTestCase changes
GalLalouche Dec 19, 2024
bc4a693
Merge branch 'optimization/st_extent' of github.com:GalLalouche/elast…
GalLalouche Dec 19, 2024
c6875bc
Inline forColumnReader
GalLalouche Dec 19, 2024
7aa6734
Added missing test for bounds extraction from cartesian_shape doc-values
craigtaverner Dec 19, 2024
1d8ed8f
Merge branch 'optimization/st_extent' of github.com:GalLalouche/elast…
craigtaverner Dec 19, 2024
eff2f67
Merge branch 'main' into optimization/st_extent
craigtaverner Dec 19, 2024
ebfd723
Fixed the test that was testing the wrong thing
craigtaverner Dec 20, 2024
5553b68
Merge branch 'main' into optimization/st_extent
craigtaverner Dec 20, 2024
a9d6027
Added bounds extracting with grouping tests and fixed bug with multip…
craigtaverner Dec 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/118802.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 118802
summary: ST_EXTENT_AGG optimize envelope extraction from doc-values for cartesian_shape
area: "ES|QL"
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.elasticsearch.legacygeo.builders.ShapeBuilder;
import org.elasticsearch.legacygeo.parsers.ShapeParser;
import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.locationtech.spatial4j.shape.Point;
Expand Down Expand Up @@ -530,6 +531,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) {
protected Function<List<ShapeBuilder<?, ?, ?>>, List<Object>> getFormatter(String format) {
return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry);
}

@Override
protected boolean isBoundsExtractionSupported() {
// Extracting bounds for geo shapes is not implemented yet.
return false;
}

@Override
protected CoordinateEncoder coordinateEncoder() {
return CoordinateEncoder.GEO;
}
}

private final IndexVersion indexCreatedVersion;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,18 @@
*/
package org.elasticsearch.index.mapper;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.Orientation;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.WellKnownBinary;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.lucene.spatial.GeometryDocValueReader;

import java.io.IOException;
import java.nio.ByteOrder;
import java.util.Map;
import java.util.function.Function;

Expand Down Expand Up @@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) {
// we don't support null value fors shapes
return nullValue;
}

@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported()
? new BoundsBlockLoader(name(), coordinateEncoder())
: blockLoaderFromSource(blContext);
}

protected abstract boolean isBoundsExtractionSupported();

protected abstract CoordinateEncoder coordinateEncoder();

// Visible for testing
static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
private final String fieldName;
private final CoordinateEncoder encoder;

BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) {
this.fieldName = fieldName;
this.encoder = encoder;
}

@Override
public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException {
return new BlockLoader.AllReader() {
@Override
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException {
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
var reader = new GeometryDocValueReader();
try (var builder = factory.bytesRefs(docs.count())) {
for (int i = 0; i < docs.count(); i++) {
read(binaryDocValues, docs.get(i), reader, builder);
}
return builder.build();
}
}

@Override
public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException {
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
var reader = new GeometryDocValueReader();
read(binaryDocValues, docId, reader, (BytesRefBuilder) builder);
}

private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder)
throws IOException {
binaryDocValues.advanceExact(doc);
reader.reset(binaryDocValues.binaryValue());
var extent = reader.getExtent();
// This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to
// preserve its properties as a WKB shape, only to convert it back to ints when we compute the
// aggregation. An obvious optimization would be to avoid this back-and-forth conversion.
GalLalouche marked this conversation as resolved.
Show resolved Hide resolved
var rectangle = new Rectangle(
encoder.decodeX(extent.minX()),
encoder.decodeX(extent.maxX()),
encoder.decodeY(extent.maxY()),
encoder.decodeY(extent.minY())
);
builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN)));
}

@Override
public boolean canReuse(int startingDocID) {
return true;
}
};
}

@Override
public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) {
return factory.bytesRefs(expectedCount);
}
}
}

protected Explicit<Boolean> coerce;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,10 +677,15 @@ public enum FieldExtractPreference {
* Load the field from doc-values into a BlockLoader supporting doc-values.
*/
DOC_VALUES,
/**
* Loads the field by extracting the extent from the binary encoded representation
*/
EXTRACT_SPATIAL_BOUNDS,
/**
* No preference. Leave the choice of where to load the field from up to the FieldType.
*/
NONE
NONE;

}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.geo.Orientation;
import org.elasticsearch.geo.GeometryTestUtils;
import org.elasticsearch.geo.ShapeTestUtils;
import org.elasticsearch.geometry.Geometry;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor;
import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField;
import org.elasticsearch.lucene.spatial.CartesianShapeIndexer;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.hamcrest.RectangleMatcher;
import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher;

import java.io.IOException;
import java.util.Optional;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.IntStream;

public class AbstractShapeGeometryFieldMapperTests extends ESTestCase {
public void testCartesianBoundsBlockLoader() throws IOException {
testBoundsBlockLoaderAux(
CoordinateEncoder.CARTESIAN,
() -> ShapeTestUtils.randomGeometryWithoutCircle(0, false),
CartesianShapeIndexer::new,
SpatialEnvelopeVisitor::visitCartesian
);
}

// TODO when we turn this optimization on for geo, this test should pass.
public void ignoreTestGeoBoundsBlockLoader() throws IOException {
testBoundsBlockLoaderAux(
CoordinateEncoder.GEO,
() -> GeometryTestUtils.randomGeometryWithoutCircle(0, false),
field -> new GeoShapeIndexer(Orientation.RIGHT, field),
g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP)
);
}

private void testBoundsBlockLoaderAux(
CoordinateEncoder encoder,
Supplier<Geometry> generator,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parameter not used

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope. Still unused.

Function<String, ShapeIndexer> indexerFactory,
Function<Geometry, Optional<Rectangle>> visitor
) throws IOException {
var geometries = IntStream.range(0, 20).mapToObj(i -> generator.get()).toList();
var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder);
try (Directory directory = newDirectory()) {
try (var iw = new RandomIndexWriter(random(), directory)) {
for (Geometry geometry : geometries) {
var shape = new BinaryShapeDocValuesField("field", encoder);
shape.add(indexerFactory.apply("field").indexShape(geometry), geometry);
var doc = new Document();
doc.add(shape);
iw.addDocument(doc);
}
}
var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason for this only looking at every second geometry?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Verifying that the block loader extracts data from the correct docs.

try (DirectoryReader reader = DirectoryReader.open(directory)) {
LeafReaderContext ctx = reader.leaves().get(0);
TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices));
for (int i = 0; i < indices.length; i++) {
var idx = indices[i];
var geometry = geometries.get(idx);
var geoString = geometry.toString();
var geometryString = geoString.length() > 200 ? geoString.substring(0, 200) + "..." : geoString;
Rectangle r = visitor.apply(geometry).get();
assertThat(
Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometryString),
(BytesRef) block.get(i),
WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder))
);
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.elasticsearch.index.fielddata.LeafFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.index.translog.Translog;
Expand Down Expand Up @@ -87,8 +88,6 @@
import java.util.stream.IntStream;

import static java.util.stream.Collectors.toList;
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE;
import static org.elasticsearch.test.MapMatcher.assertMap;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.contains;
Expand Down Expand Up @@ -1420,7 +1419,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str
this(columnAtATimeReader, true, mapper, loaderFieldName);
}

private BlockLoader getBlockLoader(boolean columnReader) {
private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this move makes things less clear. In most types there is a clear binary choice between row-reading and column-reading. Only for spatial types is there an actual fieldExtractPreference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep it this way and change it in the future, after we add more cases such as handling geo.

SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null);
return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() {
@Override
Expand All @@ -1434,8 +1433,8 @@ public IndexSettings indexSettings() {
}

@Override
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
return columnReader ? DOC_VALUES : NONE;
public FieldExtractPreference fieldExtractPreference() {
return fieldExtractPreference;
}

@Override
Expand Down Expand Up @@ -1493,7 +1492,9 @@ protected final void testBlockLoader(
BlockReaderSupport blockReaderSupport,
SourceLoader sourceLoader
) throws IOException {
BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader);
// EXTRACT_SPATIAL_BOUNDS is not currently supported in this test path.
var fieldExtractPreference = columnReader ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE;
BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference);
Function<Object, Object> valuesConvert = loadBlockExpected(blockReaderSupport, columnReader);
if (valuesConvert == null) {
assertNull(loader);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xpack.esql.expression;
package org.elasticsearch.test.hamcrest;

import org.elasticsearch.compute.aggregation.spatial.PointType;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.hamcrest.Description;
import org.hamcrest.Matchers;
import org.hamcrest.TypeSafeMatcher;
Expand All @@ -19,23 +21,31 @@
*/
public class RectangleMatcher extends TypeSafeMatcher<Rectangle> {
private final Rectangle r;
private final PointType pointType;
private final CoordinateEncoder coordinateEncoder;
private final double error;

public static TypeSafeMatcher<Rectangle> closeTo(Rectangle r, double error, PointType pointType) {
return new RectangleMatcher(r, error, pointType);
public static TypeSafeMatcher<Rectangle> closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) {
return new RectangleMatcher(r, error, coordinateEncoder);
}

private RectangleMatcher(Rectangle r, double error, PointType pointType) {
private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) {
this.r = r;
this.pointType = pointType;
this.coordinateEncoder = coordinateEncoder;
this.error = error;
}

/**
* Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints.
*/
public static TypeSafeMatcher<Rectangle> closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) {
var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY());
return closeTo(normalized, v, encoder);
}

@Override
protected boolean matchesSafely(Rectangle other) {
// For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe.
boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX();
boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX();
boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX())
|| (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX()))
|| (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xpack.esql.expression;
package org.elasticsearch.test.hamcrest;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.geometry.Geometry;
Expand All @@ -23,6 +25,10 @@ public WellKnownBinaryBytesRefMatcher(Matcher<G> matcher) {
this.matcher = matcher;
}

public static <G extends Geometry> Matcher<BytesRef> encodes(TypeSafeMatcher<G> matcher) {
return new WellKnownBinaryBytesRefMatcher<G>(matcher);
}

@Override
public boolean matchesSafely(BytesRef bytesRef) {
return matcher.matches(fromBytesRef(bytesRef));
Expand Down
Loading