Skip to content

Commit

Permalink
ESQL: ST_EXTENT_AGG binary extent optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
GalLalouche committed Dec 16, 2024
1 parent 8deb32f commit 2369a33
Show file tree
Hide file tree
Showing 24 changed files with 648 additions and 141 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.elasticsearch.legacygeo.builders.ShapeBuilder;
import org.elasticsearch.legacygeo.parsers.ShapeParser;
import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.locationtech.spatial4j.shape.Point;
Expand Down Expand Up @@ -401,7 +402,6 @@ public void parse(
}

public static final class GeoShapeFieldType extends AbstractShapeGeometryFieldType<ShapeBuilder<?, ?, ?>> implements GeoShapeQueryable {

private String tree = Defaults.TREE;
private SpatialStrategy strategy = Defaults.STRATEGY;
private boolean pointsOnly = Defaults.POINTS_ONLY;
Expand Down Expand Up @@ -530,6 +530,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) {
protected Function<List<ShapeBuilder<?, ?, ?>>, List<Object>> getFormatter(String format) {
return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry);
}

@Override
protected boolean isBoundsExtractionSupported() {
// Extracting bounds for geo shapes is not implemented yet.
return false;
}

@Override
protected CoordinateEncoder coordinateEncoder() {
return CoordinateEncoder.GEO;
}
}

private final IndexVersion indexCreatedVersion;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,18 @@
*/
package org.elasticsearch.index.mapper;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.Orientation;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.WellKnownBinary;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.lucene.spatial.GeometryDocValueReader;

import java.io.IOException;
import java.nio.ByteOrder;
import java.util.Map;
import java.util.function.Function;

Expand Down Expand Up @@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) {
// we don't support null value fors shapes
return nullValue;
}

@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported()
? new BoundsBlockLoader(name(), coordinateEncoder())
: blockLoaderFromSource(blContext);
}

protected abstract boolean isBoundsExtractionSupported();

protected abstract CoordinateEncoder coordinateEncoder();

// Visible for testing
static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
private final String fieldName;
private final CoordinateEncoder encoder;

BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) {
this.fieldName = fieldName;
this.encoder = encoder;
}

@Override
public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException {
return new BlockLoader.AllReader() {
@Override
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException {
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
var reader = new GeometryDocValueReader();
try (var builder = factory.bytesRefs(docs.count())) {
for (int i = 0; i < docs.count(); i++) {
read(binaryDocValues, docs.get(i), reader, builder);
}
return builder.build();
}
}

@Override
public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException {
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
var reader = new GeometryDocValueReader();
read(binaryDocValues, docId, reader, (BytesRefBuilder) builder);
}

private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder)
throws IOException {
binaryDocValues.advanceExact(doc);
reader.reset(binaryDocValues.binaryValue());
var extent = reader.getExtent();
// This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to
// preserve its properties as a WKB shape, only to convert it back to ints when we compute the
// aggregation. An obvious optimization would be to avoid this back-and-forth conversion.
var rectangle = new Rectangle(
encoder.decodeX(extent.minX()),
encoder.decodeX(extent.maxX()),
encoder.decodeY(extent.maxY()),
encoder.decodeY(extent.minY())
);
builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN)));
}

@Override
public boolean canReuse(int startingDocID) {
return true;
}
};
}

@Override
public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) {
return factory.bytesRefs(expectedCount);
}
}
}

protected Explicit<Boolean> coerce;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -676,11 +676,27 @@ public enum FieldExtractPreference {
/**
* Load the field from doc-values into a BlockLoader supporting doc-values.
*/
DOC_VALUES,
DOC_VALUES(true),
/** Loads the field by extracting the extent from the binary encoded representation */
EXTRACT_SPATIAL_BOUNDS(false),
/**
* No preference. Leave the choice of where to load the field from up to the FieldType.
*/
NONE
NONE(false);

private final boolean isColumnReader;

FieldExtractPreference(boolean isColumnReader) {
this.isColumnReader = isColumnReader;
}

public static FieldExtractPreference forColumnReader(boolean columnReader) {
return columnReader ? DOC_VALUES : NONE;
}

public boolean isColumnReader() {
return isColumnReader;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.geo.Orientation;
import org.elasticsearch.geo.GeometryTestUtils;
import org.elasticsearch.geo.ShapeTestUtils;
import org.elasticsearch.geometry.Geometry;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor;
import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField;
import org.elasticsearch.lucene.spatial.CartesianShapeIndexer;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.hamcrest.RectangleMatcher;
import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher;

import java.io.IOException;
import java.util.Optional;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.IntStream;

public class AbstractShapeGeometryFieldMapperTests extends ESTestCase {
// TODO handle geo as well, this is actually bugged, since extracting the result ignores minneg etc.
public void testCartesianBoundsBlockLoader() throws IOException {
testBoundsBlockLoaderAux(
CoordinateEncoder.CARTESIAN,
() -> ShapeTestUtils.randomGeometryWithoutCircle(0, false),
field -> new CartesianShapeIndexer(field),
SpatialEnvelopeVisitor::visitCartesian
);
}

// TODO when we turn this optimization on for geo, handle this as well.
public void ignoreTestGeoBoundsBlockLoader() throws IOException {
testBoundsBlockLoaderAux(
CoordinateEncoder.GEO,
() -> GeometryTestUtils.randomGeometryWithoutCircle(0, false),
field -> new GeoShapeIndexer(Orientation.RIGHT, field),
g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP)
);
}

private void testBoundsBlockLoaderAux(
CoordinateEncoder encoder,
Supplier<Geometry> generator,
Function<String, ShapeIndexer> indexerFactory,
Function<Geometry, Optional<Rectangle>> visitor
) throws IOException {
var geometries = IntStream.range(0, 20).mapToObj(i -> ShapeTestUtils.randomGeometryWithoutCircle(0, false)).toList();
var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder);
try (Directory directory = newDirectory()) {
try (var iw = new RandomIndexWriter(random(), directory)) {
for (Geometry geometry : geometries) {
var shape = new BinaryShapeDocValuesField("field", encoder);
shape.add(indexerFactory.apply("field").indexShape(geometry), geometry);
var doc = new Document();
doc.add(shape);
iw.addDocument(doc);
}
}
var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray();
try (DirectoryReader reader = DirectoryReader.open(directory)) {
LeafReaderContext ctx = reader.leaves().get(0);
TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices));
for (int i = 0; i < indices.length; i++) {
var idx = indices[i];
Rectangle r = visitor.apply(geometries.get(idx)).get();
assertThat(
Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometries.get(idx)),
(BytesRef) block.get(i),
WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder))
);
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,6 @@ private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSo
MapperService mapper = syntheticSource ? createSytheticSourceMapperService(mapping) : createMapperService(mapping);
BlockReaderSupport blockReaderSupport = getSupportedReaders(mapper, "field.sub");
var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP);
testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader);
testBlockLoader(MappedFieldType.FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.elasticsearch.index.fielddata.LeafFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.index.translog.Translog;
Expand Down Expand Up @@ -87,8 +88,6 @@
import java.util.stream.IntStream;

import static java.util.stream.Collectors.toList;
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE;
import static org.elasticsearch.test.MapMatcher.assertMap;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.contains;
Expand Down Expand Up @@ -1420,7 +1419,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str
this(columnAtATimeReader, true, mapper, loaderFieldName);
}

private BlockLoader getBlockLoader(boolean columnReader) {
private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) {
SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null);
return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() {
@Override
Expand All @@ -1434,8 +1433,8 @@ public IndexSettings indexSettings() {
}

@Override
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
return columnReader ? DOC_VALUES : NONE;
public FieldExtractPreference fieldExtractPreference() {
return fieldExtractPreference;
}

@Override
Expand Down Expand Up @@ -1484,16 +1483,20 @@ private void testBlockLoader(boolean syntheticSource, boolean columnReader) thro
);
}
var sourceLoader = mapper.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP);
testBlockLoader(columnReader, example, blockReaderSupport, sourceLoader);
testBlockLoader(FieldExtractPreference.forColumnReader(columnReader), example, blockReaderSupport, sourceLoader);
}

protected final void testBlockLoader(
boolean columnReader,
FieldExtractPreference fieldExtractPreference,
SyntheticSourceExample example,
BlockReaderSupport blockReaderSupport,
SourceLoader sourceLoader
) throws IOException {
BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader);
var columnReader = switch (fieldExtractPreference) {
case DOC_VALUES -> true;
case NONE, EXTRACT_SPATIAL_BOUNDS -> false;
};
BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference);
Function<Object, Object> valuesConvert = loadBlockExpected(blockReaderSupport, columnReader);
if (valuesConvert == null) {
assertNull(loader);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
* 2.0.
*/

package org.elasticsearch.xpack.esql.expression;
package org.elasticsearch.test.hamcrest;

import org.elasticsearch.compute.aggregation.spatial.PointType;
import org.elasticsearch.geometry.Rectangle;
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
import org.hamcrest.Description;
import org.hamcrest.Matchers;
import org.hamcrest.TypeSafeMatcher;
Expand All @@ -19,23 +19,31 @@
*/
public class RectangleMatcher extends TypeSafeMatcher<Rectangle> {
private final Rectangle r;
private final PointType pointType;
private final CoordinateEncoder coordinateEncoder;
private final double error;

public static TypeSafeMatcher<Rectangle> closeTo(Rectangle r, double error, PointType pointType) {
return new RectangleMatcher(r, error, pointType);
public static TypeSafeMatcher<Rectangle> closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) {
return new RectangleMatcher(r, error, coordinateEncoder);
}

private RectangleMatcher(Rectangle r, double error, PointType pointType) {
private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) {
this.r = r;
this.pointType = pointType;
this.coordinateEncoder = coordinateEncoder;
this.error = error;
}

/**
* Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints.
*/
public static TypeSafeMatcher<Rectangle> closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) {
var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY());
return closeTo(normalized, v, encoder);
}

@Override
protected boolean matchesSafely(Rectangle other) {
// For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe.
boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX();
boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX();
boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX())
|| (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX()))
|| (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180));
Expand Down
Loading

0 comments on commit 2369a33

Please sign in to comment.