From 5a4c5206384b4fddb3b52ac7eace5e0236d38d90 Mon Sep 17 00:00:00 2001 From: "James A. Gill" Date: Thu, 25 Jul 2019 16:08:46 -0400 Subject: [PATCH] Use Flatbush instead of JTS STR RTree for spatial joins There are memory problems with spatial joins, potentially due to the large number of objects created by the JTS RTree. The JTS tree used for the build side of spatial joins creates many objects and may not report its memory precisely. A Flatbush creates very few objects, with a low memory footprint, as well as faster build/query times. --- presto-main/pom.xml | 5 -- .../presto/operator/PagesRTreeIndex.java | 56 +++++++++++-------- .../operator/PagesSpatialIndexSupplier.java | 37 +++--------- 3 files changed, 40 insertions(+), 58 deletions(-) diff --git a/presto-main/pom.xml b/presto-main/pom.xml index 36f8181a1ffb..fccd5033ffc3 100644 --- a/presto-main/pom.xml +++ b/presto-main/pom.xml @@ -16,11 +16,6 @@ - - org.locationtech.jts - jts-core - - com.esri.geometry esri-geometry-api diff --git a/presto-main/src/main/java/com/facebook/presto/operator/PagesRTreeIndex.java b/presto-main/src/main/java/com/facebook/presto/operator/PagesRTreeIndex.java index 000aa85e9755..412388356efb 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/PagesRTreeIndex.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/PagesRTreeIndex.java @@ -16,7 +16,10 @@ import com.esri.core.geometry.ogc.OGCGeometry; import com.esri.core.geometry.ogc.OGCPoint; import com.facebook.presto.Session; +import com.facebook.presto.geospatial.GeometryUtils; import com.facebook.presto.geospatial.Rectangle; +import com.facebook.presto.geospatial.rtree.Flatbush; +import com.facebook.presto.geospatial.rtree.HasExtent; import com.facebook.presto.operator.SpatialIndexBuilderOperator.SpatialPredicate; import com.facebook.presto.spi.Page; import com.facebook.presto.spi.PageBuilder; @@ -26,8 +29,6 @@ import io.airlift.slice.Slice; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.longs.LongArrayList; -import org.locationtech.jts.geom.Envelope; -import org.locationtech.jts.index.strtree.STRtree; import org.openjdk.jol.info.ClassLayout; import java.util.List; @@ -35,6 +36,7 @@ import java.util.Optional; import java.util.OptionalDouble; +import static com.facebook.presto.geospatial.GeometryUtils.getExtent; import static com.facebook.presto.geospatial.serde.GeometrySerde.deserialize; import static com.facebook.presto.operator.JoinUtils.channelsToPages; import static com.facebook.presto.operator.SyntheticAddress.decodePosition; @@ -54,25 +56,33 @@ public class PagesRTreeIndex private final List types; private final List outputChannels; private final List> channels; - private final STRtree rtree; + private final Flatbush rtree; private final int radiusChannel; private final SpatialPredicate spatialRelationshipTest; private final JoinFilterFunction filterFunction; private final Map partitions; public static final class GeometryWithPosition + implements HasExtent { private static final int INSTANCE_SIZE = ClassLayout.parseClass(GeometryWithPosition.class).instanceSize(); private final OGCGeometry ogcGeometry; private final int partition; private final int position; + private final Rectangle extent; public GeometryWithPosition(OGCGeometry ogcGeometry, int partition, int position) + { + this(ogcGeometry, partition, position, 0.0f); + } + + public GeometryWithPosition(OGCGeometry ogcGeometry, int partition, int position, double radius) { this.ogcGeometry = requireNonNull(ogcGeometry, "ogcGeometry is null"); this.partition = partition; this.position = position; + this.extent = GeometryUtils.getExtent(ogcGeometry, radius); } public OGCGeometry getGeometry() @@ -90,9 +100,16 @@ public int getPosition() return position; } - public long getEstimatedMemorySizeInBytes() + @Override + public Rectangle getExtent() + { + return extent; + } + + @Override + public long getEstimatedSizeInBytes() { - return INSTANCE_SIZE + ogcGeometry.estimateMemorySize(); + return INSTANCE_SIZE + ogcGeometry.estimateMemorySize() + extent.getEstimatedSizeInBytes(); } } @@ -102,7 +119,7 @@ public PagesRTreeIndex( List types, List outputChannels, List> channels, - STRtree rtree, + Flatbush rtree, Optional radiusChannel, SpatialPredicate spatialRelationshipTest, Optional filterFunctionFactory, @@ -119,14 +136,6 @@ public PagesRTreeIndex( this.partitions = requireNonNull(partitions, "partitions is null"); } - private static Envelope getEnvelope(OGCGeometry ogcGeometry) - { - com.esri.core.geometry.Envelope env = new com.esri.core.geometry.Envelope(); - ogcGeometry.getEsriGeometry().queryEnvelope(env); - - return new Envelope(env.getXMin(), env.getXMax(), env.getYMin(), env.getYMax()); - } - /** * Returns an array of addresses from {@link PagesIndex#valueAddresses} corresponding * to rows with matching geometries. @@ -155,11 +164,10 @@ public int[] findJoinPositions(int probePosition, Page probe, int probeGeometryC IntArrayList matchingPositions = new IntArrayList(); - Envelope envelope = getEnvelope(probeGeometry); - rtree.query(envelope, item -> { - GeometryWithPosition geometryWithPosition = (GeometryWithPosition) item; + Rectangle queryRectangle = getExtent(probeGeometry); + rtree.findIntersections(queryRectangle, geometryWithPosition -> { OGCGeometry buildGeometry = geometryWithPosition.getGeometry(); - if (partitions.isEmpty() || (probePartition == geometryWithPosition.getPartition() && (probeIsPoint || (buildGeometry instanceof OGCPoint) || testReferencePoint(envelope, buildGeometry, probePartition)))) { + if (partitions.isEmpty() || (probePartition == geometryWithPosition.getPartition() && (probeIsPoint || (buildGeometry instanceof OGCPoint) || testReferencePoint(queryRectangle, buildGeometry, probePartition)))) { if (radiusChannel == -1) { if (spatialRelationshipTest.apply(buildGeometry, probeGeometry, OptionalDouble.empty())) { matchingPositions.add(geometryWithPosition.getPosition()); @@ -176,18 +184,18 @@ public int[] findJoinPositions(int probePosition, Page probe, int probeGeometryC return matchingPositions.toIntArray(null); } - private boolean testReferencePoint(Envelope probeEnvelope, OGCGeometry buildGeometry, int partition) + private boolean testReferencePoint(Rectangle probeEnvelope, OGCGeometry buildGeometry, int partition) { - Envelope buildEnvelope = getEnvelope(buildGeometry); - Envelope intersection = buildEnvelope.intersection(probeEnvelope); - if (intersection.isNull()) { + Rectangle buildEnvelope = getExtent(buildGeometry); + Rectangle intersection = buildEnvelope.intersection(probeEnvelope); + if (intersection == null) { return false; } Rectangle extent = partitions.get(partition); - double x = intersection.getMinX(); - double y = intersection.getMinY(); + double x = intersection.getXMin(); + double y = intersection.getYMin(); return x >= extent.getXMin() && x < extent.getXMax() && y >= extent.getYMin() && y < extent.getYMax(); } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/PagesSpatialIndexSupplier.java b/presto-main/src/main/java/com/facebook/presto/operator/PagesSpatialIndexSupplier.java index e5ade38f3a9f..4da8b62c9a6e 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/PagesSpatialIndexSupplier.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/PagesSpatialIndexSupplier.java @@ -20,6 +20,7 @@ import com.esri.core.geometry.ogc.OGCGeometry; import com.facebook.presto.Session; import com.facebook.presto.geospatial.Rectangle; +import com.facebook.presto.geospatial.rtree.Flatbush; import com.facebook.presto.operator.PagesRTreeIndex.GeometryWithPosition; import com.facebook.presto.operator.SpatialIndexBuilderOperator.SpatialPredicate; import com.facebook.presto.spi.block.Block; @@ -28,10 +29,7 @@ import io.airlift.slice.Slice; import io.airlift.units.DataSize; import it.unimi.dsi.fastutil.longs.LongArrayList; -import org.locationtech.jts.geom.Envelope; -import org.locationtech.jts.index.strtree.AbstractNode; -import org.locationtech.jts.index.strtree.ItemBoundable; -import org.locationtech.jts.index.strtree.STRtree; +import it.unimi.dsi.fastutil.objects.ObjectArrayList; import org.openjdk.jol.info.ClassLayout; import java.util.List; @@ -39,7 +37,6 @@ import java.util.Optional; import java.util.function.Supplier; -import static com.facebook.presto.geospatial.GeometryUtils.getJtsEnvelope; import static com.facebook.presto.geospatial.serde.GeometrySerde.deserialize; import static com.facebook.presto.operator.PagesSpatialIndex.EMPTY_INDEX; import static com.facebook.presto.operator.SyntheticAddress.decodePosition; @@ -54,9 +51,6 @@ public class PagesSpatialIndexSupplier implements Supplier { private static final int INSTANCE_SIZE = ClassLayout.parseClass(PagesSpatialIndexSupplier.class).instanceSize(); - private static final int ENVELOPE_INSTANCE_SIZE = ClassLayout.parseClass(Envelope.class).instanceSize(); - private static final int STRTREE_INSTANCE_SIZE = ClassLayout.parseClass(STRtree.class).instanceSize(); - private static final int ABSTRACT_NODE_INSTANCE_SIZE = ClassLayout.parseClass(AbstractNode.class).instanceSize(); private final Session session; private final LongArrayList addresses; @@ -66,7 +60,7 @@ public class PagesSpatialIndexSupplier private final Optional radiusChannel; private final SpatialPredicate spatialRelationshipTest; private final Optional filterFunctionFactory; - private final STRtree rtree; + private final Flatbush rtree; private final Map partitions; private final long memorySizeInBytes; @@ -94,15 +88,14 @@ public PagesSpatialIndexSupplier( this.rtree = buildRTree(addresses, channels, geometryChannel, radiusChannel, partitionChannel); this.radiusChannel = radiusChannel; - this.memorySizeInBytes = INSTANCE_SIZE + - (rtree.isEmpty() ? 0 : STRTREE_INSTANCE_SIZE + computeMemorySizeInBytes(rtree.getRoot())); + this.memorySizeInBytes = INSTANCE_SIZE + rtree.getEstimatedSizeInBytes(); } - private static STRtree buildRTree(LongArrayList addresses, List> channels, int geometryChannel, Optional radiusChannel, Optional partitionChannel) + private static Flatbush buildRTree(LongArrayList addresses, List> channels, int geometryChannel, Optional radiusChannel, Optional partitionChannel) { - STRtree rtree = new STRtree(); Operator relateOperator = OperatorFactoryLocal.getInstance().getOperator(Operator.Type.Relate); + ObjectArrayList geometries = new ObjectArrayList<>(); for (int position = 0; position < addresses.size(); position++) { long pageAddress = addresses.getLong(position); int blockIndex = decodeSliceIndex(pageAddress); @@ -137,11 +130,10 @@ private static STRtree buildRTree(LongArrayList addresses, List> cha partition = toIntExact(INTEGER.getLong(partitionBlock, blockPosition)); } - rtree.insert(getJtsEnvelope(ogcGeometry, radius), new GeometryWithPosition(ogcGeometry, partition, position)); + geometries.add(new GeometryWithPosition(ogcGeometry, partition, position, radius)); } - rtree.build(); - return rtree; + return new Flatbush<>(geometries.toArray(new GeometryWithPosition[] {})); } private static void accelerateGeometry(OGCGeometry ogcGeometry, Operator relateOperator) @@ -157,19 +149,6 @@ private static void accelerateGeometry(OGCGeometry ogcGeometry, Operator relateO } } - private long computeMemorySizeInBytes(AbstractNode root) - { - if (root.getLevel() == 0) { - return ABSTRACT_NODE_INSTANCE_SIZE + ENVELOPE_INSTANCE_SIZE + root.getChildBoundables().stream().mapToLong(child -> computeMemorySizeInBytes((ItemBoundable) child)).sum(); - } - return ABSTRACT_NODE_INSTANCE_SIZE + ENVELOPE_INSTANCE_SIZE + root.getChildBoundables().stream().mapToLong(child -> computeMemorySizeInBytes((AbstractNode) child)).sum(); - } - - private long computeMemorySizeInBytes(ItemBoundable item) - { - return ENVELOPE_INSTANCE_SIZE + ((GeometryWithPosition) item.getItem()).getEstimatedMemorySizeInBytes(); - } - // doesn't include memory used by channels and addresses which are shared with PagesIndex public DataSize getEstimatedSize() {