Skip to content

Commit

Permalink
PARQUET-2471: Add support for geometry logical type
Browse files Browse the repository at this point in the history
  • Loading branch information
wgtmac committed Jun 20, 2024
1 parent 9d04cf3 commit 4f24986
Show file tree
Hide file tree
Showing 15 changed files with 1,047 additions and 1 deletion.
5 changes: 5 additions & 0 deletions parquet-column/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
<version>${jts.version}</version>
</dependency>

<dependency>
<groupId>com.carrotsearch</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.parquet.column.statistics;

import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Types;

Expand All @@ -30,6 +31,7 @@ public class BinaryStatistics extends Statistics<Binary> {

private Binary max;
private Binary min;
private GeometryStatistics geometryStatistics = null;

/**
* @deprecated will be removed in 2.0.0. Use {@link Statistics#createStats(org.apache.parquet.schema.Type)} instead
Expand All @@ -41,6 +43,10 @@ public BinaryStatistics() {

BinaryStatistics(PrimitiveType type) {
super(type);
LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
if (logicalType instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) {
geometryStatistics = new GeometryStatistics();
}
}

private BinaryStatistics(BinaryStatistics other) {
Expand All @@ -49,6 +55,9 @@ private BinaryStatistics(BinaryStatistics other) {
initializeStats(other.min, other.max);
}
setNumNulls(other.getNumNulls());
if (other.geometryStatistics != null) {
geometryStatistics = other.geometryStatistics.copy();
}
}

@Override
Expand All @@ -62,6 +71,9 @@ public void updateStats(Binary value) {
} else if (comparator().compare(max, value) < 0) {
max = value.copy();
}
if (geometryStatistics != null) {
geometryStatistics.update(value);
}
}

@Override
Expand All @@ -72,6 +84,9 @@ public void mergeStatisticsMinMax(Statistics stats) {
} else {
updateStats(binaryStats.getMin(), binaryStats.getMax());
}
if (geometryStatistics != null) {
geometryStatistics.merge(binaryStats.geometryStatistics);
}
}

/**
Expand Down Expand Up @@ -190,4 +205,12 @@ public void setMinMax(Binary min, Binary max) {
public BinaryStatistics copy() {
return new BinaryStatistics(this);
}

public void setGeometryStatistics(GeometryStatistics geometryStatistics) {
this.geometryStatistics = geometryStatistics;
}

public GeometryStatistics getGeometryStatistics() {
return geometryStatistics;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.column.statistics;

import org.apache.parquet.Preconditions;
import org.apache.parquet.column.statistics.geometry.BoundingBox;
import org.apache.parquet.column.statistics.geometry.Covering;
import org.apache.parquet.column.statistics.geometry.EnvelopeCovering;
import org.apache.parquet.column.statistics.geometry.GeometryTypes;
import org.apache.parquet.io.api.Binary;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.io.ParseException;
import org.locationtech.jts.io.WKBReader;

public class GeometryStatistics {

private final BoundingBox boundingBox;
private final Covering covering;
private final GeometryTypes geometryTypes;
private final WKBReader reader = new WKBReader();

public GeometryStatistics(BoundingBox boundingBox, Covering covering, GeometryTypes geometryTypes) {
this.boundingBox = boundingBox;
this.covering = covering;
this.geometryTypes = geometryTypes;
}

public GeometryStatistics() {
this(new BoundingBox(), new EnvelopeCovering(), new GeometryTypes());
}

public BoundingBox getBoundingBox() {
return boundingBox;
}

public Covering getCovering() {
return covering;
}

public GeometryTypes getGeometryTypes() {
return geometryTypes;
}

public void update(Binary value) {
if (value == null) {
return;
}
try {
Geometry geom = reader.read(value.getBytes());
update(geom);
} catch (ParseException e) {
abort();
}
}

public void update(Geometry geom) {
boundingBox.update(geom);
covering.update(geom);
geometryTypes.update(geom);
}

public void merge(GeometryStatistics other) {
Preconditions.checkArgument(other != null, "Cannot merge with null GeometryStatistics");
boundingBox.merge(other.boundingBox);
covering.merge(other.covering);
geometryTypes.merge(other.geometryTypes);
}

public void reset() {
boundingBox.reset();
covering.reset();
geometryTypes.reset();
}

public void abort() {
boundingBox.abort();
covering.abort();
geometryTypes.abort();
}

public GeometryStatistics copy() {
return new GeometryStatistics(boundingBox.copy(), covering.copy(), geometryTypes.copy());
}

@Override
public String toString() {
return "GeometryStatistics{" + "boundingBox="
+ boundingBox + ", covering="
+ covering + ", geometryTypes="
+ geometryTypes + '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ public Builder withNumNulls(long numNulls) {
return this;
}

public Builder withGeometryStatistics(GeometryStatistics geometryStatistics) {
throw new UnsupportedOperationException("Please use the GeometryBuilder");
}

public Statistics<?> build() {
Statistics<?> stats = createStats(type);
if (min != null && max != null) {
Expand Down Expand Up @@ -178,6 +182,30 @@ public Statistics<?> build() {
}
}

// Builder for GEOMETRY type to handle GeometryStatistics
private static class GeometryBuilder extends Builder {

private GeometryStatistics geometryStatistics;

public GeometryBuilder(PrimitiveType type) {
super(type);
assert type.getPrimitiveTypeName() == PrimitiveTypeName.BINARY;
}

@Override
public Builder withGeometryStatistics(GeometryStatistics geometryStatistics) {
this.geometryStatistics = geometryStatistics;
return this;
}

@Override
public Statistics<?> build() {
BinaryStatistics stats = (BinaryStatistics) super.build();
stats.setGeometryStatistics(geometryStatistics);
return stats;
}
}

private final PrimitiveType type;
private final PrimitiveComparator<T> comparator;
private boolean hasNonNullValue;
Expand Down Expand Up @@ -269,6 +297,11 @@ public static Builder getBuilderForReading(PrimitiveType type) {
if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.Float16LogicalTypeAnnotation) {
return new Float16Builder(type);
}
return new Builder(type);
case BINARY:
if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) {
return new GeometryBuilder(type);
}
default:
return new Builder(type);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.column.statistics.geometry;

import org.apache.parquet.Preconditions;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.Geometry;

public class BoundingBox {

private double xMin = Double.MAX_VALUE;
private double xMax = Double.MIN_VALUE;
private double yMin = Double.MAX_VALUE;
private double yMax = Double.MIN_VALUE;
private double zMin = Double.MAX_VALUE;
private double zMax = Double.MIN_VALUE;
private double mMin = Double.MAX_VALUE;
private double mMax = Double.MIN_VALUE;

public BoundingBox(
double xMin, double xMax, double yMin, double yMax, double zMin, double zMax, double mMin, double mMax) {
this.xMin = xMin;
this.xMax = xMax;
this.yMin = yMin;
this.yMax = yMax;
this.zMin = zMin;
this.zMax = zMax;
this.mMin = mMin;
this.mMax = mMax;
}

public BoundingBox() {}

public double getXMin() {
return xMin;
}

public double getXMax() {
return xMax;
}

public double getYMin() {
return yMin;
}

public double getYMax() {
return yMax;
}

public double getZMin() {
return zMin;
}

public double getZMax() {
return zMax;
}

public double getMMin() {
return mMin;
}

public double getMMax() {
return mMax;
}

public void update(Geometry geom) {
if (geom == null || geom.isEmpty()) {
return;
}
Coordinate[] coordinates = geom.getCoordinates();
for (Coordinate coordinate : coordinates) {
update(coordinate.getX(), coordinate.getY(), coordinate.getZ(), coordinate.getM());
}
}

public void update(double x, double y, double z, double m) {
xMin = Math.min(xMin, x);
xMax = Math.max(xMax, x);
yMin = Math.min(yMin, y);
yMax = Math.max(yMax, y);
zMin = Math.min(zMin, z);
zMax = Math.max(zMax, z);
mMin = Math.min(mMin, m);
mMax = Math.max(mMax, m);
}

public void merge(BoundingBox other) {
Preconditions.checkArgument(other != null, "Cannot merge with null bounding box");
xMin = Math.min(xMin, other.xMin);
xMax = Math.max(xMax, other.xMax);
yMin = Math.min(yMin, other.yMin);
yMax = Math.max(yMax, other.yMax);
zMin = Math.min(zMin, other.zMin);
zMax = Math.max(zMax, other.zMax);
mMin = Math.min(mMin, other.mMin);
mMax = Math.max(mMax, other.mMax);
}

public void reset() {
xMin = Double.MAX_VALUE;
xMax = Double.MIN_VALUE;
yMin = Double.MAX_VALUE;
yMax = Double.MIN_VALUE;
zMin = Double.MAX_VALUE;
zMax = Double.MIN_VALUE;
mMin = Double.MAX_VALUE;
mMax = Double.MIN_VALUE;
}

public void abort() {
xMin = Double.NaN;
xMax = Double.NaN;
yMin = Double.NaN;
yMax = Double.NaN;
zMin = Double.NaN;
zMax = Double.NaN;
mMin = Double.NaN;
mMax = Double.NaN;
}

public BoundingBox copy() {
return new BoundingBox(xMin, xMax, yMin, yMax, zMin, zMax, mMin, mMax);
}

@Override
public String toString() {
return "BoundingBox{" + "xMin="
+ xMin + ", xMax="
+ xMax + ", yMin="
+ yMin + ", yMax="
+ yMax + ", zMin="
+ zMin + ", zMax="
+ zMax + ", mMin="
+ mMin + ", mMax="
+ mMax + '}';
}
}
Loading

0 comments on commit 4f24986

Please sign in to comment.