diff --git a/parquet-column/pom.xml b/parquet-column/pom.xml index 4d5aa20f9e..a0ce5d142e 100644 --- a/parquet-column/pom.xml +++ b/parquet-column/pom.xml @@ -76,6 +76,12 @@ ${slf4j.version} + + org.locationtech.jts + jts-core + ${jts.version} + + com.carrotsearch junit-benchmarks diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java index 87d39bf16e..e4d72ebade 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java @@ -18,7 +18,9 @@ */ package org.apache.parquet.column.statistics; +import org.apache.parquet.column.statistics.geometry.GeometryStatistics; import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Types; @@ -30,6 +32,7 @@ public class BinaryStatistics extends Statistics { private Binary max; private Binary min; + private GeometryStatistics geometryStatistics = null; /** * @deprecated will be removed in 2.0.0. Use {@link Statistics#createStats(org.apache.parquet.schema.Type)} instead @@ -41,6 +44,13 @@ public BinaryStatistics() { BinaryStatistics(PrimitiveType type) { super(type); + LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation(); + if (logicalType instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) { + LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType = + (LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) logicalType; + geometryStatistics = new GeometryStatistics( + geometryLogicalType.getEdges(), geometryLogicalType.getCrs(), geometryLogicalType.getMetadata()); + } } private BinaryStatistics(BinaryStatistics other) { @@ -49,6 +59,9 @@ private BinaryStatistics(BinaryStatistics other) { initializeStats(other.min, other.max); } setNumNulls(other.getNumNulls()); + if (other.geometryStatistics != null) { + geometryStatistics = other.geometryStatistics.copy(); + } } @Override @@ -62,6 +75,9 @@ public void updateStats(Binary value) { } else if (comparator().compare(max, value) < 0) { max = value.copy(); } + if (geometryStatistics != null) { + geometryStatistics.update(value); + } } @Override @@ -72,6 +88,9 @@ public void mergeStatisticsMinMax(Statistics stats) { } else { updateStats(binaryStats.getMin(), binaryStats.getMax()); } + if (geometryStatistics != null) { + geometryStatistics.merge(binaryStats.geometryStatistics); + } } /** @@ -190,4 +209,12 @@ public void setMinMax(Binary min, Binary max) { public BinaryStatistics copy() { return new BinaryStatistics(this); } + + public void setGeometryStatistics(GeometryStatistics geometryStatistics) { + this.geometryStatistics = geometryStatistics; + } + + public GeometryStatistics getGeometryStatistics() { + return geometryStatistics; + } } diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java index 83070d49f1..f18bca9598 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java @@ -20,6 +20,7 @@ import java.util.Arrays; import org.apache.parquet.column.UnknownColumnTypeException; +import org.apache.parquet.column.statistics.geometry.GeometryStatistics; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.Float16; import org.apache.parquet.schema.LogicalTypeAnnotation; @@ -64,6 +65,10 @@ public Builder withNumNulls(long numNulls) { return this; } + public Builder withGeometryStatistics(GeometryStatistics geometryStatistics) { + throw new UnsupportedOperationException("Please use the GeometryBuilder"); + } + public Statistics build() { Statistics stats = createStats(type); if (min != null && max != null) { @@ -178,6 +183,30 @@ public Statistics build() { } } + // Builder for GEOMETRY type to handle GeometryStatistics + private static class GeometryBuilder extends Builder { + + private GeometryStatistics geometryStatistics; + + public GeometryBuilder(PrimitiveType type) { + super(type); + assert type.getPrimitiveTypeName() == PrimitiveTypeName.BINARY; + } + + @Override + public Builder withGeometryStatistics(GeometryStatistics geometryStatistics) { + this.geometryStatistics = geometryStatistics; + return this; + } + + @Override + public Statistics build() { + BinaryStatistics stats = (BinaryStatistics) super.build(); + stats.setGeometryStatistics(geometryStatistics); + return stats; + } + } + private final PrimitiveType type; private final PrimitiveComparator comparator; private boolean hasNonNullValue; @@ -269,6 +298,11 @@ public static Builder getBuilderForReading(PrimitiveType type) { if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.Float16LogicalTypeAnnotation) { return new Float16Builder(type); } + return new Builder(type); + case BINARY: + if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) { + return new GeometryBuilder(type); + } default: return new Builder(type); } diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/BoundingBox.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/BoundingBox.java new file mode 100644 index 0000000000..5c4a57c50d --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/BoundingBox.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.column.statistics.geometry; + +import org.apache.parquet.Preconditions; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Geometry; + +public class BoundingBox { + + private double xMin = Double.POSITIVE_INFINITY; + private double xMax = Double.NEGATIVE_INFINITY; + private double yMin = Double.POSITIVE_INFINITY; + private double yMax = Double.NEGATIVE_INFINITY; + private double zMin = Double.POSITIVE_INFINITY; + private double zMax = Double.NEGATIVE_INFINITY; + private double mMin = Double.POSITIVE_INFINITY; + private double mMax = Double.NEGATIVE_INFINITY; + + public BoundingBox( + double xMin, double xMax, double yMin, double yMax, double zMin, double zMax, double mMin, double mMax) { + this.xMin = xMin; + this.xMax = xMax; + this.yMin = yMin; + this.yMax = yMax; + this.zMin = zMin; + this.zMax = zMax; + this.mMin = mMin; + this.mMax = mMax; + } + + public BoundingBox() {} + + public double getXMin() { + return xMin; + } + + public double getXMax() { + return xMax; + } + + public double getYMin() { + return yMin; + } + + public double getYMax() { + return yMax; + } + + public double getZMin() { + return zMin; + } + + public double getZMax() { + return zMax; + } + + public double getMMin() { + return mMin; + } + + public double getMMax() { + return mMax; + } + + void update(double minX, double maxX, double minY, double maxY, double minZ, double maxZ) { + xMin = Math.min(xMin, minX); + yMin = Math.min(yMin, minY); + xMax = Math.max(xMax, maxX); + yMax = Math.max(yMax, maxY); + zMin = Math.min(zMin, minZ); + zMax = Math.max(zMax, maxZ); + } + + // Method to update the bounding box with the coordinates of a Geometry object + // geometry can be changed by this method + void update(Geometry geometry) { + GeometryUtils.normalizeLongitude(geometry); + Envelope envelope = geometry.getEnvelopeInternal(); + double minX = envelope.getMinX(); + double minY = envelope.getMinY(); + double maxX = envelope.getMaxX(); + double maxY = envelope.getMaxY(); + + // JTS (Java Topology Suite) does not handle Z-coordinates directly in the Envelope class + // because it's primarily used for 2D geometries. However, we can iterate through the + // coordinates of the geometry to find the minimum and maximum Z values. + double minZ = Double.POSITIVE_INFINITY; + double maxZ = Double.NEGATIVE_INFINITY; + + Coordinate[] coordinates = geometry.getCoordinates(); + for (Coordinate coord : coordinates) { + if (!Double.isNaN(coord.getZ())) { + // Update zMin and zMax by iterating through the coordinates. + minZ = Math.min(minZ, coord.getZ()); + maxZ = Math.max(maxZ, coord.getZ()); + } + } + + update(minX, maxX, minY, maxY, minZ, maxZ); + } + + void merge(BoundingBox other) { + Preconditions.checkArgument(other != null, "Cannot merge with null bounding box"); + xMin = Math.min(xMin, other.xMin); + xMax = Math.max(xMax, other.xMax); + yMin = Math.min(yMin, other.yMin); + yMax = Math.max(yMax, other.yMax); + zMin = Math.min(zMin, other.zMin); + zMax = Math.max(zMax, other.zMax); + mMin = Math.min(mMin, other.mMin); + mMax = Math.max(mMax, other.mMax); + } + + public void reset() { + xMin = Double.POSITIVE_INFINITY; + xMax = Double.NEGATIVE_INFINITY; + yMin = Double.POSITIVE_INFINITY; + yMax = Double.NEGATIVE_INFINITY; + zMin = Double.POSITIVE_INFINITY; + zMax = Double.NEGATIVE_INFINITY; + mMin = Double.POSITIVE_INFINITY; + mMax = Double.NEGATIVE_INFINITY; + } + + public void abort() { + xMin = Double.NaN; + xMax = Double.NaN; + yMin = Double.NaN; + yMax = Double.NaN; + zMin = Double.NaN; + zMax = Double.NaN; + mMin = Double.NaN; + mMax = Double.NaN; + } + + public BoundingBox copy() { + return new BoundingBox(xMin, xMax, yMin, yMax, zMin, zMax, mMin, mMax); + } + + @Override + public String toString() { + return "BoundingBox{" + "xMin=" + + xMin + ", xMax=" + + xMax + ", yMin=" + + yMin + ", yMax=" + + yMax + ", zMin=" + + zMin + ", zMax=" + + zMax + ", mMin=" + + mMin + ", mMax=" + + mMax + '}'; + } +} diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/DummyBoundingBox.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/DummyBoundingBox.java new file mode 100644 index 0000000000..334b9ddb4a --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/DummyBoundingBox.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.column.statistics.geometry; + +import org.locationtech.jts.geom.Geometry; + +// Immutable dummy BoundingBox class +class DummyBoundingBox extends BoundingBox { + @Override + public void update(double minX, double maxX, double minY, double maxY, double minZ, double maxZ) { + // No-op + } + + @Override + public void update(Geometry geometry) { + // No-op + } + + @Override + public void merge(BoundingBox other) { + // No-op + } + + @Override + public void reset() { + // No-op + } + + @Override + public void abort() { + // No-op + } + + @Override + public BoundingBox copy() { + return this; // Return the same instance + } +} diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryStatistics.java new file mode 100644 index 0000000000..35874904f0 --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryStatistics.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.column.statistics.geometry; + +import java.nio.ByteBuffer; +import org.apache.parquet.Preconditions; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.io.ParseException; +import org.locationtech.jts.io.WKBReader; + +public class GeometryStatistics { + + private static final BoundingBox DUMMY_BOUNDING_BOX = new DummyBoundingBox(); + + // Metadata that may impact the statistics calculation + private final LogicalTypeAnnotation.Edges edges; + private final String crs; + private final ByteBuffer metadata; + + private final BoundingBox boundingBox; + private final GeometryTypes geometryTypes; + private final WKBReader reader = new WKBReader(); + + public GeometryStatistics( + LogicalTypeAnnotation.Edges edges, + String crs, + ByteBuffer metadata, + BoundingBox boundingBox, + GeometryTypes geometryTypes) { + this.edges = edges; + this.crs = crs; + this.metadata = metadata; + this.boundingBox = supportsBoundingBox() ? boundingBox : DUMMY_BOUNDING_BOX; + this.geometryTypes = geometryTypes; + } + + public GeometryStatistics(LogicalTypeAnnotation.Edges edges, String crs, ByteBuffer metadata) { + this(edges, crs, metadata, new BoundingBox(), new GeometryTypes()); + } + + public BoundingBox getBoundingBox() { + return boundingBox; + } + + // public Map getCoverings() { + // return coverings; + // } + + public GeometryTypes getGeometryTypes() { + return geometryTypes; + } + + public void update(Binary value) { + if (value == null) { + return; + } + try { + Geometry geom = reader.read(value.getBytes()); + update(geom); + } catch (ParseException e) { + abort(); + } + } + + private void update(Geometry geom) { + if (supportsBoundingBox()) { + boundingBox.update(geom); + } + geometryTypes.update(geom); + } + + /** + * A bounding box is a rectangular region defined by two points, the lower left + * and upper right corners. It is used to represent the minimum and maximum + * coordinates of a geometry. Only planar geometries can have a bounding box. + */ + private boolean supportsBoundingBox() { + // Only planar geometries can have a bounding box + // based on the current specification + return edges == LogicalTypeAnnotation.Edges.PLANAR; + } + + /** + * A custom WKB-encoded polygon or multi-polygon to represent a covering of + * geometries. For example, it may be a bounding box, or an envelope of geometries + * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if + * an edge of geographic coordinates crosses the antimeridian). In addition, it can + * also be used to provide vendor-agnostic coverings like S2 or H3 grids. + */ + private boolean supportsCovering() { + // This version assumes only build coverings for planar edges + // In case of spherical edges, no coverings are built + return edges == LogicalTypeAnnotation.Edges.PLANAR; + } + + public void merge(GeometryStatistics other) { + Preconditions.checkArgument(other != null, "Cannot merge with null GeometryStatistics"); + + if (boundingBox != null && other.boundingBox != null) { + boundingBox.merge(other.boundingBox); + } + + if (geometryTypes != null && other.geometryTypes != null) { + geometryTypes.merge(other.geometryTypes); + } + } + + public void reset() { + boundingBox.reset(); + geometryTypes.reset(); + } + + public void abort() { + boundingBox.abort(); + geometryTypes.abort(); + } + + // Copy the statistics + public GeometryStatistics copy() { + return new GeometryStatistics( + edges, + crs, + metadata, + boundingBox != null ? boundingBox.copy() : null, + geometryTypes != null ? geometryTypes.copy() : null); + } + + @Override + public String toString() { + return "GeometryStatistics{" + "boundingBox=" + boundingBox + ", coverings=" + geometryTypes + '}'; + } +} diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryTypes.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryTypes.java new file mode 100644 index 0000000000..4c85382ba0 --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryTypes.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.column.statistics.geometry; + +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.parquet.Preconditions; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; + +public class GeometryTypes { + + private static final int UNKNOWN_TYPE_ID = -1; + private Set types = new HashSet<>(); + private boolean valid = true; + + public GeometryTypes(Set types) { + this.types = types; + } + + public GeometryTypes() {} + + public Set getTypes() { + return types; + } + + void update(Geometry geometry) { + if (!valid) { + return; + } + int code = getGeometryTypeCode(geometry); + if (code != UNKNOWN_TYPE_ID) { + types.add(code); + } else { + valid = false; + types.clear(); + } + } + + public void merge(GeometryTypes other) { + Preconditions.checkArgument(other != null, "Cannot merge with null GeometryTypes"); + if (!valid) { + return; + } + if (!other.valid) { + valid = false; + types.clear(); + return; + } + types.addAll(other.types); + } + + public void reset() { + types.clear(); + valid = true; + } + + public void abort() { + valid = false; + types.clear(); + } + + public GeometryTypes copy() { + return new GeometryTypes(new HashSet<>(types)); + } + + @Override + public String toString() { + return "GeometryTypes{" + "types=" + + types.stream().map(this::typeIdToString).collect(Collectors.toSet()) + '}'; + } + + private int getGeometryTypeId(Geometry geometry) { + switch (geometry.getGeometryType()) { + case Geometry.TYPENAME_POINT: + return 1; + case Geometry.TYPENAME_LINESTRING: + return 2; + case Geometry.TYPENAME_POLYGON: + return 3; + case Geometry.TYPENAME_MULTIPOINT: + return 4; + case Geometry.TYPENAME_MULTILINESTRING: + return 5; + case Geometry.TYPENAME_MULTIPOLYGON: + return 6; + case Geometry.TYPENAME_GEOMETRYCOLLECTION: + return 7; + default: + return UNKNOWN_TYPE_ID; + } + } + + /** + * This is from the following spec proposed: + *

+ * The geometry types of all geometries, or an empty array if they are not + * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1] + * except that values in the list are WKB (ISO variant) integer codes [2]. Table + * below shows the most common geometry types and their codes: + *

+ * | Type | XY | XYZ | XYM | XYZM | + * | :----------------- | :--- | :--- | :--- | :--: | + * | Point | 0001 | 1001 | 2001 | 3001 | + * | LineString | 0002 | 1002 | 2002 | 3002 | + * | Polygon | 0003 | 1003 | 2003 | 3003 | + * | MultiPoint | 0004 | 1004 | 2004 | 3004 | + * | MultiLineString | 0005 | 1005 | 2005 | 3005 | + * | MultiPolygon | 0006 | 1006 | 2006 | 3006 | + * | GeometryCollection | 0007 | 1007 | 2007 | 3007 | + *

+ * In addition, the following rules are used: + * - A list of multiple values indicates that multiple geometry types are + * present (e.g. `[0003, 0006]`). + * - An empty array explicitly signals that the geometry types are not known. + * - The geometry types in the list must be unique (e.g. `[0001, 0001]` + * is not valid). + *

+ * Please refer to links below for more detail: + * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.0.0/format-specs/geoparquet.md?plain=1#L91 + */ + private int getGeometryTypeCode(Geometry geometry) { + int typeId = getGeometryTypeId(geometry); + if (typeId == UNKNOWN_TYPE_ID) { + return UNKNOWN_TYPE_ID; + } + Coordinate[] coordinates = geometry.getCoordinates(); + boolean hasZ = false; + boolean hasM = false; + for (Coordinate coordinate : coordinates) { + if (!Double.isNaN(coordinate.getZ())) { + hasZ = true; + } + if (!Double.isNaN(coordinate.getM())) { + hasM = true; + } + if (hasZ && hasM) { + break; + } + } + if (hasZ) { + typeId += 1000; + } + if (hasM) { + typeId += 2000; + } + return typeId; + } + + private String typeIdToString(int typeId) { + String typeString; + switch (typeId % 1000) { + case 1: + typeString = Geometry.TYPENAME_POINT; + break; + case 2: + typeString = Geometry.TYPENAME_LINESTRING; + break; + case 3: + typeString = Geometry.TYPENAME_POLYGON; + break; + case 4: + typeString = Geometry.TYPENAME_MULTIPOINT; + break; + case 5: + typeString = Geometry.TYPENAME_MULTILINESTRING; + break; + case 6: + typeString = Geometry.TYPENAME_MULTIPOLYGON; + break; + case 7: + typeString = Geometry.TYPENAME_GEOMETRYCOLLECTION; + break; + default: + return "Unknown"; + } + if (typeId >= 3000) { + typeString += " (XYZM)"; + } else if (typeId >= 2000) { + typeString += " (XYM)"; + } else if (typeId >= 1000) { + typeString += " (XYZ)"; + } else { + typeString += " (XY)"; + } + return typeString; + } +} diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryUtils.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryUtils.java new file mode 100644 index 0000000000..f91eafe49b --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryUtils.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.parquet.column.statistics.geometry; + +import org.locationtech.jts.geom.CoordinateSequence; +import org.locationtech.jts.geom.CoordinateSequenceFilter; +import org.locationtech.jts.geom.Geometry; + +class GeometryUtils { + + public static void normalizeLongitude(Geometry geometry) { + if (geometry == null || geometry.isEmpty()) { + return; + } + + geometry.apply(new CoordinateSequenceFilter() { + @Override + public void filter(CoordinateSequence seq, int i) { + double x = seq.getX(i); + // Normalize the longitude to be within -180 to 180 range + while (x > 180) x -= 360; + while (x < -180) x += 360; + seq.setOrdinate(i, CoordinateSequence.X, x); + } + + @Override + public boolean isDone() { + return false; // Continue processing until all coordinates are processed + } + + @Override + public boolean isGeometryChanged() { + return true; // The geometry is changed as we are modifying the coordinates + } + }); + + geometry.geometryChanged(); // Notify the geometry that its coordinates have been changed + } +} diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java index 86099717df..3b6a210b4e 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java @@ -21,6 +21,7 @@ import java.nio.ByteBuffer; import java.util.List; import java.util.PrimitiveIterator; +import org.apache.parquet.column.statistics.geometry.GeometryStatistics; import org.apache.parquet.filter2.predicate.FilterPredicate.Visitor; import org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter; @@ -71,4 +72,12 @@ default List getRepetitionLevelHistogram() { default List getDefinitionLevelHistogram() { throw new UnsupportedOperationException("Definition level histogram is not implemented"); } + + /** + * @return the unmodifiable list of the geometry statistics for each page; + * used for converting to the related thrift object + */ + default List getGeometryStatistics() { + throw new UnsupportedOperationException("Geometry statistics is not implemented"); + } } diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java index ffbb82197b..7e41a976c8 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java @@ -38,8 +38,10 @@ import java.util.Set; import java.util.function.IntPredicate; import org.apache.parquet.column.MinMax; +import org.apache.parquet.column.statistics.BinaryStatistics; import org.apache.parquet.column.statistics.SizeStatistics; import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.column.statistics.geometry.GeometryStatistics; import org.apache.parquet.filter2.predicate.Operators.And; import org.apache.parquet.filter2.predicate.Operators.Contains; import org.apache.parquet.filter2.predicate.Operators.Eq; @@ -56,6 +58,7 @@ import org.apache.parquet.filter2.predicate.Operators.UserDefined; import org.apache.parquet.filter2.predicate.UserDefinedPredicate; import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.PrimitiveComparator; import org.apache.parquet.schema.PrimitiveStringifier; import org.apache.parquet.schema.PrimitiveType; @@ -105,6 +108,8 @@ int translate(int arrayIndex) { private long[] repLevelHistogram; // might be null private long[] defLevelHistogram; + // might be null + private GeometryStatistics[] geometryStatistics; static String truncate(String str) { if (str.length() <= MAX_VALUE_LENGTH_FOR_TOSTRING) { @@ -182,6 +187,17 @@ public List getDefinitionLevelHistogram() { return LongLists.unmodifiable(LongArrayList.wrap(defLevelHistogram)); } + @Override + public List getGeometryStatistics() { + List geomStats = new ArrayList<>(); + if (geometryStatistics != null) { + for (GeometryStatistics stats : geometryStatistics) { + geomStats.add(stats.copy()); + } + } + return geomStats; + } + @Override public String toString() { try (Formatter formatter = new Formatter()) { @@ -494,6 +510,7 @@ public long getMinMaxSize() { private int nextPageIndex; private LongList repLevelHistogram = new LongArrayList(); private LongList defLevelHistogram = new LongArrayList(); + private List geometryStatistics = new ArrayList<>(); /** * @return a no-op builder that does not collect statistics objects and therefore returns {@code null} at @@ -584,10 +601,46 @@ public static ColumnIndex build( List maxValues, List repLevelHistogram, List defLevelHistogram) { + return build(type, boundaryOrder, nullPages, nullCounts, minValues, maxValues, null, null, null); + } + + /** + * @param type + * the primitive type + * @param boundaryOrder + * the boundary order of the min/max values + * @param nullPages + * the null pages (one boolean value for each page that signifies whether the page consists of nulls + * entirely) + * @param nullCounts + * the number of null values for each page + * @param minValues + * the min values for each page + * @param maxValues + * the max values for each page + * @param repLevelHistogram + * the repetition level histogram for all levels of each page + * @param defLevelHistogram + * the definition level histogram for all levels of each page + * @param geometryStatistics + * the geometry statistics for each page (apply to GEOMETRY logical type only) + * @return the newly created {@link ColumnIndex} object based on the specified arguments + */ + public static ColumnIndex build( + PrimitiveType type, + BoundaryOrder boundaryOrder, + List nullPages, + List nullCounts, + List minValues, + List maxValues, + List repLevelHistogram, + List defLevelHistogram, + List geometryStatistics) { ColumnIndexBuilder builder = createNewBuilder(type, Integer.MAX_VALUE); - builder.fill(nullPages, nullCounts, minValues, maxValues, repLevelHistogram, defLevelHistogram); + builder.fill( + nullPages, nullCounts, minValues, maxValues, repLevelHistogram, defLevelHistogram, geometryStatistics); ColumnIndexBase columnIndex = builder.build(type); columnIndex.boundaryOrder = requireNonNull(boundaryOrder); return columnIndex; @@ -635,6 +688,16 @@ public void add(Statistics stats, SizeStatistics sizeStats) { defLevelHistogram = null; } + if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) { + assert stats instanceof BinaryStatistics; + BinaryStatistics binaryStats = (BinaryStatistics) stats; + if (geometryStatistics != null && binaryStats.getGeometryStatistics() != null) { + geometryStatistics.add(binaryStats.getGeometryStatistics()); + } else { + geometryStatistics = null; + } + } + ++nextPageIndex; } @@ -648,7 +711,8 @@ private void fill( List minValues, List maxValues, List repLevelHistogram, - List defLevelHistogram) { + List defLevelHistogram, + List geometryStatistics) { clear(); int pageCount = nullPages.size(); if ((nullCounts != null && nullCounts.size() != pageCount) @@ -695,6 +759,9 @@ private void fill( if (defLevelHistogram != null) { this.defLevelHistogram.addAll(defLevelHistogram); } + if (geometryStatistics != null) { + this.geometryStatistics.addAll(geometryStatistics); + } } /** @@ -731,6 +798,10 @@ private ColumnIndexBase build(PrimitiveType type) { if (defLevelHistogram != null && !defLevelHistogram.isEmpty()) { columnIndex.defLevelHistogram = defLevelHistogram.toLongArray(); } + if (geometryStatistics != null && !geometryStatistics.isEmpty()) { + columnIndex.geometryStatistics = new GeometryStatistics[geometryStatistics.size()]; + geometryStatistics.toArray(columnIndex.geometryStatistics); + } return columnIndex; } @@ -777,6 +848,7 @@ private void clear() { pageIndexes.clear(); repLevelHistogram.clear(); defLevelHistogram.clear(); + geometryStatistics.clear(); } abstract void clearMinMax(); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 05629dd388..6e0a28d263 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -33,6 +33,7 @@ import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER; +import java.nio.ByteBuffer; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -146,6 +147,22 @@ protected LogicalTypeAnnotation fromString(List params) { protected LogicalTypeAnnotation fromString(List params) { return float16Type(); } + }, + GEOMETRY { + @Override + protected LogicalTypeAnnotation fromString(List params) { + if (params.size() < 2) { + throw new RuntimeException( + "Expecting at least 2 parameters for geometry logical type, got " + params.size()); + } + GeometryEncoding encoding = GeometryEncoding.valueOf(params.get(0)); + Edges edges = Edges.valueOf(params.get(1)); + String crs = params.size() > 2 ? params.get(2) : null; + String crs_encoding = params.size() > 3 ? params.get(3) : null; + ByteBuffer metadata = + params.size() > 4 ? ByteBuffer.wrap(params.get(4).getBytes()) : null; + return geometryType(encoding, edges, crs, crs_encoding, metadata); + } }; protected abstract LogicalTypeAnnotation fromString(List params); @@ -316,6 +333,11 @@ public static Float16LogicalTypeAnnotation float16Type() { return Float16LogicalTypeAnnotation.INSTANCE; } + public static GeometryLogicalTypeAnnotation geometryType( + GeometryEncoding encoding, Edges edges, String crs, String crs_encoding, ByteBuffer metadata) { + return new GeometryLogicalTypeAnnotation(encoding, edges, crs, crs_encoding, metadata); + } + public static class StringLogicalTypeAnnotation extends LogicalTypeAnnotation { private static final StringLogicalTypeAnnotation INSTANCE = new StringLogicalTypeAnnotation(); @@ -1091,6 +1113,129 @@ public int hashCode() { } } + /** + * Allowed for physical type: BYTE_ARRAY. + * + * Well-known binary (WKB) representations of geometries. It supports 2D or + * 3D geometries of the standard geometry types (Point, LineString, Polygon, + * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This + * is the preferred option for maximum portability. + * + * This encoding enables GeometryStatistics to be set in the column chunk + * and page index. + */ + public enum GeometryEncoding { + WKB + } + + /** + * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge + * between points represent a straight cartesian line or the shortest line on + * the sphere. Please note that it only applies to polygons. + */ + public enum Edges { + PLANAR, + SPHERICAL + } + + public static class GeometryLogicalTypeAnnotation extends LogicalTypeAnnotation { + private final GeometryEncoding encoding; + private final Edges edges; + private final String crs; + private final String crs_encoding; + private final ByteBuffer metadata; + + private GeometryLogicalTypeAnnotation( + GeometryEncoding encoding, Edges edges, String crs, String crs_encoding, ByteBuffer metadata) { + Preconditions.checkArgument(encoding != null, "Geometry encoding is required"); + Preconditions.checkArgument(edges != null, "Geometry edges is required"); + this.encoding = encoding; + this.edges = edges; + this.crs = crs; + this.crs_encoding = crs_encoding; + this.metadata = metadata; + } + + @Override + @Deprecated + public OriginalType toOriginalType() { + return null; + } + + @Override + public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + return logicalTypeAnnotationVisitor.visit(this); + } + + @Override + LogicalTypeToken getType() { + return LogicalTypeToken.GEOMETRY; + } + + @Override + protected String typeParametersAsString() { + StringBuilder sb = new StringBuilder(); + sb.append("("); + sb.append(encoding); + sb.append(","); + sb.append(edges); + if (crs != null && !crs.isEmpty()) { + sb.append(","); + sb.append(crs); + } + // TODO: Fix it: there's a high probability that crs itself contains comma, + // so this may introduce ambiguity to the generated type parameters. + if (metadata != null) { + sb.append(","); + sb.append(metadata); + } + sb.append(")"); + return sb.toString(); + } + + public GeometryEncoding getEncoding() { + return encoding; + } + + public Edges getEdges() { + return edges; + } + + public String getCrs() { + return crs; + } + + public String getCrs_encoding() { + return crs_encoding; + } + + public ByteBuffer getMetadata() { + return metadata; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof GeometryLogicalTypeAnnotation)) { + return false; + } + GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation) obj; + return (encoding == other.encoding) && (edges == other.edges) && crs.equals(other.crs); + } + + @Override + public int hashCode() { + return Objects.hash(encoding, crs, edges); + } + + @Override + PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) { + if (encoding == GeometryEncoding.WKB) { + return PrimitiveStringifier.WKB_STRINGIFIER; + } + return super.valueStringifier(primitiveType); + } + } + /** * Implement this interface to visit a logical type annotation in the schema. * The default implementation for each logical type specific visitor method is empty. @@ -1162,5 +1307,9 @@ default Optional visit(MapKeyValueTypeAnnotation mapKeyValueLogicalType) { default Optional visit(Float16LogicalTypeAnnotation float16LogicalType) { return empty(); } + + default Optional visit(GeometryLogicalTypeAnnotation geometryLogicalType) { + return empty(); + } } } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java index c46e94367f..bb5c8a9474 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java @@ -35,6 +35,9 @@ import java.util.concurrent.TimeUnit; import javax.naming.OperationNotSupportedException; import org.apache.parquet.io.api.Binary; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.io.ParseException; +import org.locationtech.jts.io.WKBReader; /** * Class that provides string representations for the primitive values. These string values are to be used for @@ -449,4 +452,20 @@ String stringifyNotNull(Binary value) { return Float16.toFloatString(value); } }; + + static final PrimitiveStringifier WKB_STRINGIFIER = new BinaryStringifierBase("WKB_STRINGIFIER") { + + @Override + String stringifyNotNull(Binary value) { + + Geometry geometry; + try { + WKBReader reader = new WKBReader(); + geometry = reader.read(value.getBytesUnsafe()); + return geometry.toText(); + } catch (ParseException e) { + return BINARY_INVALID; + } + } + }; } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index e74d7cde02..f08e20333d 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -271,6 +271,14 @@ public Optional visit( LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) { return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR); } + + @Override + public Optional visit( + LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) { + // ColumnOrder is undefined for GEOMETRY logical type. Use the default comparator for + // now. + return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR); + } }) .orElseThrow(() -> new ShouldNeverHappenException( "No comparator logic implemented for BINARY logical type: " + logicalType)); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 5bc2f89f47..45985c7a31 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -571,6 +571,15 @@ public Optional visit( return checkBinaryPrimitiveType(enumLogicalType); } + @Override + public Optional visit( + LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) { + if (geometryLogicalType.getEncoding() != LogicalTypeAnnotation.GeometryEncoding.WKB) { + throw new RuntimeException("Only WKB geometry encoding is supported for now"); + } + return checkBinaryPrimitiveType(geometryLogicalType); + } + private Optional checkFixedPrimitiveType( int l, LogicalTypeAnnotation logicalTypeAnnotation) { Preconditions.checkState( diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml index ce023b5185..95a15ef098 100644 --- a/parquet-hadoop/pom.xml +++ b/parquet-hadoop/pom.xml @@ -135,6 +135,12 @@ jar compile + + org.locationtech.jts + jts-core + ${jts.version} + test + io.airlift aircompressor diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 194670f2df..6ac392e203 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -24,6 +24,7 @@ import static org.apache.parquet.format.Util.readFileMetaData; import static org.apache.parquet.format.Util.writeColumnMetaData; import static org.apache.parquet.format.Util.writePageHeader; +import static org.apache.parquet.schema.LogicalTypeAnnotation.Edges.PLANAR; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; @@ -50,6 +51,7 @@ import org.apache.parquet.column.EncodingStats; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.column.statistics.BinaryStatistics; +import org.apache.parquet.column.statistics.geometry.GeometryTypes; import org.apache.parquet.column.values.bloomfilter.BloomFilter; import org.apache.parquet.crypto.AesCipher; import org.apache.parquet.crypto.AesGcmEncryptor; @@ -65,6 +67,7 @@ import org.apache.parquet.format.BloomFilterHash; import org.apache.parquet.format.BloomFilterHeader; import org.apache.parquet.format.BoundaryOrder; +import org.apache.parquet.format.BoundingBox; import org.apache.parquet.format.BsonType; import org.apache.parquet.format.ColumnChunk; import org.apache.parquet.format.ColumnCryptoMetaData; @@ -78,12 +81,16 @@ import org.apache.parquet.format.DateType; import org.apache.parquet.format.DecimalType; import org.apache.parquet.format.DictionaryPageHeader; +import org.apache.parquet.format.Edges; import org.apache.parquet.format.Encoding; import org.apache.parquet.format.EncryptionWithColumnKey; import org.apache.parquet.format.EnumType; import org.apache.parquet.format.FieldRepetitionType; import org.apache.parquet.format.FileMetaData; import org.apache.parquet.format.Float16Type; +import org.apache.parquet.format.GeometryEncoding; +import org.apache.parquet.format.GeometryStatistics; +import org.apache.parquet.format.GeometryType; import org.apache.parquet.format.IntType; import org.apache.parquet.format.JsonType; import org.apache.parquet.format.KeyValue; @@ -113,12 +120,8 @@ import org.apache.parquet.format.UUIDType; import org.apache.parquet.format.Uncompressed; import org.apache.parquet.format.XxHash; -import org.apache.parquet.hadoop.metadata.BlockMetaData; -import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; -import org.apache.parquet.hadoop.metadata.ColumnPath; -import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.hadoop.metadata.*; import org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType; -import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.internal.column.columnindex.BinaryTruncator; import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder; import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder; @@ -346,6 +349,27 @@ static org.apache.parquet.format.TimeUnit convertUnit(LogicalTypeAnnotation.Time } } + static org.apache.parquet.format.GeometryEncoding convertGeometryEncoding( + LogicalTypeAnnotation.GeometryEncoding encoding) { + switch (encoding) { + case WKB: + return org.apache.parquet.format.GeometryEncoding.WKB; + default: + throw new RuntimeException("Unknown geometry encoding " + encoding); + } + } + + static org.apache.parquet.format.Edges convertEdges(LogicalTypeAnnotation.Edges edges) { + switch (edges) { + case PLANAR: + return org.apache.parquet.format.Edges.PLANAR; + case SPHERICAL: + return org.apache.parquet.format.Edges.SPHERICAL; + default: + throw new RuntimeException("Unknown edges " + edges); + } + } + private static class ConvertedTypeConverterVisitor implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor { @Override @@ -519,6 +543,26 @@ public Optional visit(LogicalTypeAnnotation.Float16LogicalTypeAnnot public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) { return of(LogicalType.UNKNOWN(new NullType())); } + + @Override + public Optional visit(LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) { + GeometryType geometryType = new GeometryType(); + if (geometryLogicalType.getEncoding() != null) { + geometryType.setEncoding(convertGeometryEncoding(geometryLogicalType.getEncoding())); + } + if (geometryLogicalType.getCrs() != null) { + geometryType.setCrs(geometryLogicalType.getCrs()); + } + if (geometryLogicalType.getCrs_encoding() != null) { + if (geometryLogicalType.getCrs_encoding().equalsIgnoreCase("WKB")) { + geometryType.setEncoding(GeometryEncoding.WKB); + } + } + if (geometryLogicalType.getEdges() != null) { + geometryType.setEdges(convertEdges(geometryLogicalType.getEdges())); + } + return of(LogicalType.GEOMETRY(geometryType)); + } } private void addRowGroup( @@ -582,6 +626,10 @@ private void addRowGroup( metaData.setSize_statistics(toParquetSizeStatistics(columnMetaData.getSizeStatistics())); } + if (columnMetaData.getGeometryStatistics() != null) { + metaData.setGeometry_stats(toParquetGeometryStatistics(columnMetaData.getGeometryStatistics())); + } + if (!encryptMetaData) { columnChunk.setMeta_data(metaData); } else { @@ -769,6 +817,37 @@ public static Statistics toParquetStatistics( return formatStats; } + private static GeometryStatistics toParquetStatistics( + org.apache.parquet.column.statistics.geometry.GeometryStatistics stats) { + GeometryStatistics formatStats = new GeometryStatistics(); + + if (stats.getBoundingBox() != null) { + formatStats.setBbox(toParquetBoundingBox(stats.getBoundingBox())); + } + List geometryTypes = new ArrayList<>(stats.getGeometryTypes().getTypes()); + Collections.sort(geometryTypes); + formatStats.setGeometry_types(geometryTypes); + + return formatStats; + } + + private static BoundingBox toParquetBoundingBox(org.apache.parquet.column.statistics.geometry.BoundingBox bbox) { + BoundingBox formatBbox = new BoundingBox(); + formatBbox.setXmin(bbox.getXMin()); + formatBbox.setXmax(bbox.getXMax()); + formatBbox.setYmin(bbox.getYMin()); + formatBbox.setYmax(bbox.getYMax()); + if (bbox.getZMin() <= bbox.getZMax()) { + formatBbox.setZmin(bbox.getZMin()); + formatBbox.setZmax(bbox.getZMax()); + } + if (bbox.getMMin() <= bbox.getMMax()) { + formatBbox.setMmin(bbox.getMMin()); + formatBbox.setMmax(bbox.getMMax()); + } + return formatBbox; + } + private static boolean withinLimit(org.apache.parquet.column.statistics.Statistics stats, int truncateLength) { if (stats.isSmallerThan(MAX_STATS_SIZE)) { return true; @@ -874,6 +953,65 @@ public org.apache.parquet.column.statistics.Statistics fromParquetStatistics( return fromParquetStatisticsInternal(createdBy, statistics, type, expectedOrder); } + private GeometryStatistics toParquetGeometryStatistics( + org.apache.parquet.column.statistics.geometry.GeometryStatistics geometryStatistics) { + if (geometryStatistics == null) { + return null; + } + + GeometryStatistics formatStats = new GeometryStatistics(); + + if (geometryStatistics.getBoundingBox() != null) { + formatStats.setBbox(toParquetBoundingBox(geometryStatistics.getBoundingBox())); + } + + if (geometryStatistics.getGeometryTypes() != null) { + List geometryTypes = + new ArrayList<>(geometryStatistics.getGeometryTypes().getTypes()); + Collections.sort(geometryTypes); + formatStats.setGeometry_types(geometryTypes); + } + + return formatStats; + } + + static org.apache.parquet.column.statistics.geometry.GeometryStatistics fromParquetStatistics( + GeometryStatistics formatGeomStats, PrimitiveType type) { + org.apache.parquet.column.statistics.geometry.BoundingBox bbox = null; + if (formatGeomStats.isSetBbox()) { + BoundingBox formatBbox = formatGeomStats.getBbox(); + bbox = new org.apache.parquet.column.statistics.geometry.BoundingBox( + formatBbox.getXmin(), + formatBbox.getXmax(), + formatBbox.getYmin(), + formatBbox.getYmax(), + formatBbox.isSetZmin() ? formatBbox.getZmin() : Double.NaN, + formatBbox.isSetZmax() ? formatBbox.getZmax() : Double.NaN, + formatBbox.isSetMmin() ? formatBbox.getMmin() : Double.NaN, + formatBbox.isSetMmax() ? formatBbox.getMmax() : Double.NaN); + } + org.apache.parquet.column.statistics.geometry.GeometryTypes geometryTypes = null; + if (formatGeomStats.isSetGeometry_types()) { + geometryTypes = new GeometryTypes(new HashSet<>(formatGeomStats.getGeometry_types())); + } + + // get the logical type annotation data from the type + LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation(); + if (logicalType instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) { + LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType = + (LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) logicalType; + return new org.apache.parquet.column.statistics.geometry.GeometryStatistics( + geometryLogicalType.getEdges(), + geometryLogicalType.getCrs(), + geometryLogicalType.getMetadata(), + bbox, + geometryTypes); + } + return new org.apache.parquet.column.statistics.geometry.GeometryStatistics( + // this case should not happen in normal cases + null, null, null, bbox, geometryTypes); + } + /** * Sort order for page and column statistics. Types are associated with sort * orders (e.g., UTF8 columns should use UNSIGNED) and column stats are @@ -1031,6 +1169,12 @@ public Optional visit( LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { return of(SortOrder.SIGNED); } + + @Override + public Optional visit( + LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) { + return of(SortOrder.UNKNOWN); + } }) .orElse(defaultSortOrder(primitive.getPrimitiveTypeName())); } @@ -1174,6 +1318,14 @@ LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) { return LogicalTypeAnnotation.uuidType(); case FLOAT16: return LogicalTypeAnnotation.float16Type(); + case GEOMETRY: + GeometryType geometry = type.getGEOMETRY(); + return LogicalTypeAnnotation.geometryType( + convertGeometryEncoding(geometry.getEncoding()), + convertEdges(geometry.getEdges()), + geometry.getCrs(), + geometry.getEncoding().name(), + null); default: throw new RuntimeException("Unknown logical type " + type); } @@ -1192,6 +1344,32 @@ private LogicalTypeAnnotation.TimeUnit convertTimeUnit(TimeUnit unit) { } } + private LogicalTypeAnnotation.GeometryEncoding convertGeometryEncoding(GeometryEncoding encoding) { + if (encoding == null) { + return null; + } + switch (encoding) { + case WKB: + return LogicalTypeAnnotation.GeometryEncoding.WKB; + default: + throw new RuntimeException("Unknown geometry encoding " + encoding); + } + } + + private static LogicalTypeAnnotation.Edges convertEdges(Edges edge) { + if (edge == null) { + return null; + } + switch (edge) { + case PLANAR: + return PLANAR; + case SPHERICAL: + return LogicalTypeAnnotation.Edges.SPHERICAL; + default: + throw new RuntimeException("Unknown geometry edge " + edge); + } + } + private static void addKeyValue(FileMetaData fileMetaData, String key, String value) { KeyValue keyValue = new KeyValue(key); keyValue.value = value; @@ -1608,7 +1786,8 @@ public ColumnChunkMetaData buildColumnChunkMetaData( metaData.num_values, metaData.total_compressed_size, metaData.total_uncompressed_size, - fromParquetSizeStatistics(metaData.size_statistics, type)); + fromParquetSizeStatistics(metaData.size_statistics, type), + fromParquetStatistics(metaData.geometry_stats, type)); } public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws IOException { @@ -2271,6 +2450,7 @@ public static ColumnIndex toParquetColumnIndex( if (defLevelHistogram != null && !defLevelHistogram.isEmpty()) { parquetColumnIndex.setDefinition_level_histograms(defLevelHistogram); } + return parquetColumnIndex; } @@ -2287,7 +2467,8 @@ public static org.apache.parquet.internal.column.columnindex.ColumnIndex fromPar parquetColumnIndex.getMin_values(), parquetColumnIndex.getMax_values(), parquetColumnIndex.getRepetition_level_histograms(), - parquetColumnIndex.getDefinition_level_histograms()); + parquetColumnIndex.getDefinition_level_histograms(), + null); } public static OffsetIndex toParquetOffsetIndex( diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java index 814b98c50f..ad26685e30 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java @@ -54,8 +54,10 @@ import org.apache.parquet.column.EncodingStats; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.column.page.DictionaryPage; +import org.apache.parquet.column.statistics.BinaryStatistics; import org.apache.parquet.column.statistics.SizeStatistics; import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.column.statistics.geometry.GeometryStatistics; import org.apache.parquet.column.values.bloomfilter.BloomFilter; import org.apache.parquet.crypto.AesCipher; import org.apache.parquet.crypto.ColumnEncryptionProperties; @@ -1379,6 +1381,11 @@ public void endColumn() throws IOException { currentColumnIndexes.add(columnIndexBuilder.build()); } currentOffsetIndexes.add(offsetIndexBuilder.build(currentChunkFirstDataPage)); + // calculate the geometryStatistics from the BinaryStatistics + GeometryStatistics geometryStatistics = null; + if (currentStatistics instanceof BinaryStatistics) + geometryStatistics = ((BinaryStatistics) currentStatistics).getGeometryStatistics(); + currentBlock.addColumn(ColumnChunkMetaData.get( currentChunkPath, currentChunkType, @@ -1391,7 +1398,8 @@ public void endColumn() throws IOException { currentChunkValueCount, compressedLength, uncompressedLength, - currentSizeStatistics)); + currentSizeStatistics, + geometryStatistics)); this.currentBlock.setTotalByteSize(currentBlock.getTotalByteSize() + uncompressedLength); this.uncompressedLength = 0; this.compressedLength = 0; diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java index 14a949b0e0..0037b697f2 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java @@ -31,6 +31,7 @@ import org.apache.parquet.column.statistics.BooleanStatistics; import org.apache.parquet.column.statistics.SizeStatistics; import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.column.statistics.geometry.GeometryStatistics; import org.apache.parquet.crypto.AesCipher; import org.apache.parquet.crypto.InternalColumnDecryptionSetup; import org.apache.parquet.crypto.InternalFileDecryptor; @@ -39,6 +40,7 @@ import org.apache.parquet.format.ColumnMetaData; import org.apache.parquet.format.converter.ParquetMetadataConverter; import org.apache.parquet.internal.hadoop.metadata.IndexReference; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Types; @@ -114,7 +116,7 @@ public static ColumnChunkMetaData get( * @param totalUncompressedSize uncompressed data size * @return a column chunk metadata instance * @deprecated will be removed in 2.0.0. Use - * {@link #get(ColumnPath, PrimitiveType, CompressionCodecName, EncodingStats, Set, Statistics, long, long, long, long, long)} + * {@link #get(ColumnPath, PrimitiveType, CompressionCodecName, EncodingStats, Set, Statistics, long, long, long, long, long, SizeStatistics, org.apache.parquet.format.GeometryStatistics)} * instead. */ @Deprecated @@ -169,6 +171,7 @@ public static ColumnChunkMetaData get( valueCount, totalSize, totalUncompressedSize, + null, null); } @@ -199,7 +202,25 @@ public static ColumnChunkMetaData get( long valueCount, long totalSize, long totalUncompressedSize, - SizeStatistics sizeStatistics) { + SizeStatistics sizeStatistics, + GeometryStatistics geometryStats) { + + LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation(); + if (logicalType instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) { + return new GeometryColumnChunkMetaData( + path, + type, + codec, + encodingStats, + encodings, + statistics, + firstDataPage, + dictionaryPageOffset, + valueCount, + totalSize, + totalUncompressedSize, + geometryStats); + } // to save space we store those always positive longs in ints when they fit. if (positiveLongFitsInAnInt(firstDataPage) @@ -395,6 +416,12 @@ public SizeStatistics getSizeStatistics() { throw new UnsupportedOperationException("SizeStatistics is not implemented"); } + /** @return the geometry stats for this column */ + @JsonIgnore + public GeometryStatistics getGeometryStatistics() { + return null; + } + /** * Method should be considered private * @@ -850,4 +877,105 @@ public SizeStatistics getSizeStatistics() { public boolean isEncrypted() { return true; } + + public GeometryStatistics getGeometryStatistics() { + return shadowColumnChunkMetaData.getGeometryStatistics(); + } +} + +class GeometryColumnChunkMetaData extends ColumnChunkMetaData { + + private final long firstDataPageOffset; + private final long dictionaryPageOffset; + private final long valueCount; + private final long totalSize; + private final long totalUncompressedSize; + private final Statistics statistics; + private final GeometryStatistics geometryStatistics; + + /** + * @param path column identifier + * @param type type of the column + * @param codec + * @param encodings + * @param statistics + * @param firstDataPageOffset + * @param dictionaryPageOffset + * @param valueCount + * @param totalSize + * @param totalUncompressedSize + * @param geometryStatistics + */ + GeometryColumnChunkMetaData( + ColumnPath path, + PrimitiveType type, + CompressionCodecName codec, + EncodingStats encodingStats, + Set encodings, + Statistics statistics, + long firstDataPageOffset, + long dictionaryPageOffset, + long valueCount, + long totalSize, + long totalUncompressedSize, + GeometryStatistics geometryStatistics) { + super(encodingStats, ColumnChunkProperties.get(path, type, codec, encodings)); + this.statistics = statistics; + this.firstDataPageOffset = firstDataPageOffset; + this.dictionaryPageOffset = dictionaryPageOffset; + this.valueCount = valueCount; + this.totalSize = totalSize; + this.totalUncompressedSize = totalUncompressedSize; + this.geometryStatistics = geometryStatistics; + } + + /** + * @return start of the column data offset + */ + public long getFirstDataPageOffset() { + return firstDataPageOffset; + } + + /** + * @return the location of the dictionary page if any + */ + public long getDictionaryPageOffset() { + return dictionaryPageOffset; + } + + /** + * @return count of values in this block of the column + */ + public long getValueCount() { + return valueCount; + } + + /** + * @return the totalUncompressedSize + */ + public long getTotalUncompressedSize() { + return totalUncompressedSize; + } + + /** + * @return the totalSize + */ + public long getTotalSize() { + return totalSize; + } + + public SizeStatistics getSizeStatistics() { + return null; + } + + /** + * @return the stats for this column + */ + public Statistics getStatistics() { + return statistics; + } + + public GeometryStatistics getGeometryStatistics() { + return geometryStatistics; + } } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestGeometryTypeRoundTrip.java b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestGeometryTypeRoundTrip.java new file mode 100644 index 0000000000..aae9719ca3 --- /dev/null +++ b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestGeometryTypeRoundTrip.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.statistics; + +import static org.apache.parquet.schema.LogicalTypeAnnotation.geometryType; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import org.apache.hadoop.conf.Configuration; +import org.apache.parquet.Preconditions; +import org.apache.parquet.column.statistics.geometry.GeometryStatistics; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.GroupFactory; +import org.apache.parquet.example.data.simple.SimpleGroupFactory; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.example.ExampleParquetWriter; +import org.apache.parquet.hadoop.example.GroupWriteSupport; +import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.internal.column.columnindex.ColumnIndex; +import org.apache.parquet.io.LocalInputFile; +import org.apache.parquet.io.LocalOutputFile; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.LogicalTypeAnnotation.Edges; +import org.apache.parquet.schema.LogicalTypeAnnotation.GeometryEncoding; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.Types; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.io.WKBWriter; + +public class TestGeometryTypeRoundTrip { + + @Rule + public TemporaryFolder temp = new TemporaryFolder(); + + private Path newTempPath() throws IOException { + File file = temp.newFile(); + Preconditions.checkArgument(file.delete(), "Could not remove temp file"); + return file.toPath(); + } + + @Test + public void testEPSG4326BasicReadWriteGeometryValue() throws Exception { + GeometryFactory geomFactory = new GeometryFactory(); + + // A class to convert JTS Geometry objects to and from Well-Known Binary (WKB) format. + WKBWriter wkbWriter = new WKBWriter(); + + // EPSG:4326: Also known as WGS 84, it uses latitude and longitude coordinates. + Binary[] points = { + Binary.fromConstantByteArray(wkbWriter.write(geomFactory.createPoint(new Coordinate(1.0, 1.0)))), + Binary.fromConstantByteArray(wkbWriter.write(geomFactory.createPoint(new Coordinate(2.0, 2.0)))) + }; + + // A message type that represents a message with a geometry column. + MessageType schema = Types.buildMessage() + .required(BINARY) + .as(geometryType(GeometryEncoding.WKB, Edges.PLANAR, "EPSG:4326", "PROJJSON", null)) + .named("col_geom") + .named("msg"); + + Configuration conf = new Configuration(); + GroupWriteSupport.setSchema(schema, conf); + GroupFactory factory = new SimpleGroupFactory(schema); + Path path = newTempPath(); + try (ParquetWriter writer = ExampleParquetWriter.builder(new LocalOutputFile(path)) + .withConf(conf) + .withDictionaryEncoding(false) + .build()) { + for (Binary value : points) { + writer.write(factory.newGroup().append("col_geom", value)); + } + } + + try (ParquetFileReader reader = ParquetFileReader.open(new LocalInputFile(path))) { + Assert.assertEquals(2, reader.getRecordCount()); + + ParquetMetadata footer = reader.getFooter(); + Assert.assertNotNull(footer); + + ColumnChunkMetaData columnChunkMetaData = + reader.getRowGroups().get(0).getColumns().get(0); + Assert.assertNotNull(columnChunkMetaData); + + GeometryStatistics geometryStatistics = columnChunkMetaData.getGeometryStatistics(); + Assert.assertNotNull(geometryStatistics); + + Assert.assertEquals(1.0, geometryStatistics.getBoundingBox().getXMin(), 0.0); + Assert.assertEquals(2.0, geometryStatistics.getBoundingBox().getXMax(), 0.0); + Assert.assertEquals(1.0, geometryStatistics.getBoundingBox().getYMin(), 0.0); + Assert.assertEquals(2.0, geometryStatistics.getBoundingBox().getYMax(), 0.0); + + ColumnIndex columnIndex = reader.readColumnIndex(columnChunkMetaData); + Assert.assertNotNull(columnIndex); + } + } + + @Test + public void testEPSG4326BasicReadWriteGeometryValueWithCovering() throws Exception { + GeometryFactory geomFactory = new GeometryFactory(); + + // A class to convert JTS Geometry objects to and from Well-Known Binary (WKB) format. + WKBWriter wkbWriter = new WKBWriter(); + + // EPSG:4326: Also known as WGS 84, it uses latitude and longitude coordinates. + Binary[] points = { + Binary.fromConstantByteArray(wkbWriter.write(geomFactory.createPoint(new Coordinate(1.0, 1.0)))), + Binary.fromConstantByteArray(wkbWriter.write(geomFactory.createPoint(new Coordinate(2.0, 2.0)))) + }; + + // A message type that represents a message with a geometry column. + MessageType schema = Types.buildMessage() + .required(BINARY) + .as(geometryType(GeometryEncoding.WKB, Edges.PLANAR, "EPSG:4326", "PROJJSON", null)) + .named("col_geom") + .named("msg"); + + Configuration conf = new Configuration(); + GroupWriteSupport.setSchema(schema, conf); + GroupFactory factory = new SimpleGroupFactory(schema); + Path path = newTempPath(); + try (ParquetWriter writer = ExampleParquetWriter.builder(new LocalOutputFile(path)) + .withConf(conf) + .withDictionaryEncoding(false) + .build()) { + for (Binary value : points) { + writer.write(factory.newGroup().append("col_geom", value)); + } + } + + try (ParquetFileReader reader = ParquetFileReader.open(new LocalInputFile(path))) { + Assert.assertEquals(2, reader.getRecordCount()); + + ParquetMetadata footer = reader.getFooter(); + Assert.assertNotNull(footer); + + ColumnChunkMetaData columnChunkMetaData = + reader.getRowGroups().get(0).getColumns().get(0); + Assert.assertNotNull(columnChunkMetaData); + + GeometryStatistics geometryStatistics = columnChunkMetaData.getGeometryStatistics(); + Assert.assertNotNull(geometryStatistics); + + ColumnIndex columnIndex = reader.readColumnIndex(columnChunkMetaData); + Assert.assertNotNull(columnIndex); + } + } + + @Test + public void testEPSG3857BasicReadWriteGeometryValue() throws Exception { + GeometryFactory geomFactory = new GeometryFactory(); + + // A class to convert JTS Geometry objects to and from Well-Known Binary (WKB) format. + WKBWriter wkbWriter = new WKBWriter(); + + // EPSG:3857: Web Mercator projection, commonly used by web mapping applications. + Binary[] points = { + Binary.fromConstantByteArray( + wkbWriter.write(geomFactory.createPoint(new Coordinate(-8237491.37, 4974209.75)))), + Binary.fromConstantByteArray( + wkbWriter.write(geomFactory.createPoint(new Coordinate(-8237491.37, 4974249.75)))), + Binary.fromConstantByteArray( + wkbWriter.write(geomFactory.createPoint(new Coordinate(-8237531.37, 4974209.75)))), + Binary.fromConstantByteArray( + wkbWriter.write(geomFactory.createPoint(new Coordinate(-8237531.37, 4974249.75)))) + }; + + // A message type that represents a message with a geometry column. + MessageType schema = Types.buildMessage() + .required(BINARY) + .as(geometryType(GeometryEncoding.WKB, Edges.SPHERICAL, "EPSG:3857", "PROJJSON", null)) + .named("col_geom") + .named("msg"); + + Configuration conf = new Configuration(); + GroupWriteSupport.setSchema(schema, conf); + GroupFactory factory = new SimpleGroupFactory(schema); + Path path = newTempPath(); + try (ParquetWriter writer = ExampleParquetWriter.builder(new LocalOutputFile(path)) + .withConf(conf) + .withDictionaryEncoding(false) + .build()) { + for (Binary value : points) { + writer.write(factory.newGroup().append("col_geom", value)); + } + } + } +} diff --git a/pom.xml b/pom.xml index b81789d7a5..b4d6ccfe4a 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ 2.30.0 shaded.parquet 3.3.6 - 2.10.0 + 2.11.0-SNAPSHOT 1.13.1 thrift ${thrift.executable} @@ -85,7 +85,7 @@ 0.16.0 h2 0.10.0 - 0.20.0 + 0.21.0 ${thrift.version} 8.5.13 0.9.33 @@ -97,6 +97,7 @@ 2.0.9 0.16 1.6.0 + 1.19.0 2.3