getTypes() {
+ return types;
+ }
+
+ void update(Geometry geometry) {
+ if (!valid) {
+ return;
+ }
+ int code = getGeometryTypeCode(geometry);
+ if (code != UNKNOWN_TYPE_ID) {
+ types.add(code);
+ } else {
+ valid = false;
+ types.clear();
+ }
+ }
+
+ public void merge(GeometryTypes other) {
+ Preconditions.checkArgument(other != null, "Cannot merge with null GeometryTypes");
+ if (!valid) {
+ return;
+ }
+ if (!other.valid) {
+ valid = false;
+ types.clear();
+ return;
+ }
+ types.addAll(other.types);
+ }
+
+ public void reset() {
+ types.clear();
+ valid = true;
+ }
+
+ public void abort() {
+ valid = false;
+ types.clear();
+ }
+
+ public GeometryTypes copy() {
+ return new GeometryTypes(new HashSet<>(types));
+ }
+
+ @Override
+ public String toString() {
+ return "GeometryTypes{" + "types="
+ + types.stream().map(this::typeIdToString).collect(Collectors.toSet()) + '}';
+ }
+
+ private int getGeometryTypeId(Geometry geometry) {
+ switch (geometry.getGeometryType()) {
+ case Geometry.TYPENAME_POINT:
+ return 1;
+ case Geometry.TYPENAME_LINESTRING:
+ return 2;
+ case Geometry.TYPENAME_POLYGON:
+ return 3;
+ case Geometry.TYPENAME_MULTIPOINT:
+ return 4;
+ case Geometry.TYPENAME_MULTILINESTRING:
+ return 5;
+ case Geometry.TYPENAME_MULTIPOLYGON:
+ return 6;
+ case Geometry.TYPENAME_GEOMETRYCOLLECTION:
+ return 7;
+ default:
+ return UNKNOWN_TYPE_ID;
+ }
+ }
+
+ /**
+ * This is from the following spec proposed:
+ *
+ * The geometry types of all geometries, or an empty array if they are not
+ * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1]
+ * except that values in the list are WKB (ISO variant) integer codes [2]. Table
+ * below shows the most common geometry types and their codes:
+ *
+ * | Type | XY | XYZ | XYM | XYZM |
+ * | :----------------- | :--- | :--- | :--- | :--: |
+ * | Point | 0001 | 1001 | 2001 | 3001 |
+ * | LineString | 0002 | 1002 | 2002 | 3002 |
+ * | Polygon | 0003 | 1003 | 2003 | 3003 |
+ * | MultiPoint | 0004 | 1004 | 2004 | 3004 |
+ * | MultiLineString | 0005 | 1005 | 2005 | 3005 |
+ * | MultiPolygon | 0006 | 1006 | 2006 | 3006 |
+ * | GeometryCollection | 0007 | 1007 | 2007 | 3007 |
+ *
+ * In addition, the following rules are used:
+ * - A list of multiple values indicates that multiple geometry types are
+ * present (e.g. `[0003, 0006]`).
+ * - An empty array explicitly signals that the geometry types are not known.
+ * - The geometry types in the list must be unique (e.g. `[0001, 0001]`
+ * is not valid).
+ *
+ * Please refer to links below for more detail:
+ * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
+ * [2] https://github.com/opengeospatial/geoparquet/blob/v1.0.0/format-specs/geoparquet.md?plain=1#L91
+ */
+ private int getGeometryTypeCode(Geometry geometry) {
+ int typeId = getGeometryTypeId(geometry);
+ if (typeId == UNKNOWN_TYPE_ID) {
+ return UNKNOWN_TYPE_ID;
+ }
+ Coordinate[] coordinates = geometry.getCoordinates();
+ boolean hasZ = false;
+ boolean hasM = false;
+ for (Coordinate coordinate : coordinates) {
+ if (!Double.isNaN(coordinate.getZ())) {
+ hasZ = true;
+ }
+ if (!Double.isNaN(coordinate.getM())) {
+ hasM = true;
+ }
+ if (hasZ && hasM) {
+ break;
+ }
+ }
+ if (hasZ) {
+ typeId += 1000;
+ }
+ if (hasM) {
+ typeId += 2000;
+ }
+ return typeId;
+ }
+
+ private String typeIdToString(int typeId) {
+ String typeString;
+ switch (typeId % 1000) {
+ case 1:
+ typeString = Geometry.TYPENAME_POINT;
+ break;
+ case 2:
+ typeString = Geometry.TYPENAME_LINESTRING;
+ break;
+ case 3:
+ typeString = Geometry.TYPENAME_POLYGON;
+ break;
+ case 4:
+ typeString = Geometry.TYPENAME_MULTIPOINT;
+ break;
+ case 5:
+ typeString = Geometry.TYPENAME_MULTILINESTRING;
+ break;
+ case 6:
+ typeString = Geometry.TYPENAME_MULTIPOLYGON;
+ break;
+ case 7:
+ typeString = Geometry.TYPENAME_GEOMETRYCOLLECTION;
+ break;
+ default:
+ return "Unknown";
+ }
+ if (typeId >= 3000) {
+ typeString += " (XYZM)";
+ } else if (typeId >= 2000) {
+ typeString += " (XYM)";
+ } else if (typeId >= 1000) {
+ typeString += " (XYZ)";
+ } else {
+ typeString += " (XY)";
+ }
+ return typeString;
+ }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryUtils.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryUtils.java
new file mode 100644
index 0000000000..f91eafe49b
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeometryUtils.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.column.statistics.geometry;
+
+import org.locationtech.jts.geom.CoordinateSequence;
+import org.locationtech.jts.geom.CoordinateSequenceFilter;
+import org.locationtech.jts.geom.Geometry;
+
+class GeometryUtils {
+
+ public static void normalizeLongitude(Geometry geometry) {
+ if (geometry == null || geometry.isEmpty()) {
+ return;
+ }
+
+ geometry.apply(new CoordinateSequenceFilter() {
+ @Override
+ public void filter(CoordinateSequence seq, int i) {
+ double x = seq.getX(i);
+ // Normalize the longitude to be within -180 to 180 range
+ while (x > 180) x -= 360;
+ while (x < -180) x += 360;
+ seq.setOrdinate(i, CoordinateSequence.X, x);
+ }
+
+ @Override
+ public boolean isDone() {
+ return false; // Continue processing until all coordinates are processed
+ }
+
+ @Override
+ public boolean isGeometryChanged() {
+ return true; // The geometry is changed as we are modifying the coordinates
+ }
+ });
+
+ geometry.geometryChanged(); // Notify the geometry that its coordinates have been changed
+ }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeospatialStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeospatialStatistics.java
new file mode 100644
index 0000000000..57b7de7fc9
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeospatialStatistics.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.statistics.geometry;
+
+import java.nio.ByteBuffer;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.io.api.Binary;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.io.ParseException;
+import org.locationtech.jts.io.WKBReader;
+
+public class GeospatialStatistics {
+
+ private static final BoundingBox DUMMY_BOUNDING_BOX = new DummyBoundingBox();
+
+ // Metadata that may impact the statistics calculation
+ private final String crs;
+ private final ByteBuffer metadata;
+
+ private final BoundingBox boundingBox;
+ private final GeometryTypes geometryTypes;
+ private final WKBReader reader = new WKBReader();
+
+ public GeospatialStatistics(String crs, ByteBuffer metadata, BoundingBox boundingBox, GeometryTypes geometryTypes) {
+ this.crs = crs;
+ this.metadata = metadata;
+ this.boundingBox = supportsBoundingBox() ? boundingBox : DUMMY_BOUNDING_BOX;
+ this.geometryTypes = geometryTypes;
+ }
+
+ public GeospatialStatistics(String crs, ByteBuffer metadata) {
+ this(crs, metadata, new BoundingBox(), new GeometryTypes());
+ }
+
+ public BoundingBox getBoundingBox() {
+ return boundingBox;
+ }
+
+ public GeometryTypes getGeometryTypes() {
+ return geometryTypes;
+ }
+
+ public void update(Binary value) {
+ if (value == null) {
+ return;
+ }
+ try {
+ Geometry geom = reader.read(value.getBytes());
+ update(geom);
+ } catch (ParseException e) {
+ abort();
+ }
+ }
+
+ private void update(Geometry geom) {
+ if (supportsBoundingBox()) {
+ boundingBox.update(geom, crs);
+ }
+ geometryTypes.update(geom);
+ }
+
+ /**
+ * A bounding box is a rectangular region defined by two points, the lower left
+ * and upper right corners. It is used to represent the minimum and maximum
+ * coordinates of a geometry. Only planar geometries can have a bounding box.
+ */
+ private boolean supportsBoundingBox() {
+ // Only planar geometries can have a bounding box
+ // based on the current specification
+ return true;
+ }
+
+ public void merge(GeospatialStatistics other) {
+ Preconditions.checkArgument(other != null, "Cannot merge with null GeometryStatistics");
+
+ if (boundingBox != null && other.boundingBox != null) {
+ boundingBox.merge(other.boundingBox);
+ }
+
+ if (geometryTypes != null && other.geometryTypes != null) {
+ geometryTypes.merge(other.geometryTypes);
+ }
+ }
+
+ public void reset() {
+ boundingBox.reset();
+ geometryTypes.reset();
+ }
+
+ public void abort() {
+ boundingBox.abort();
+ geometryTypes.abort();
+ }
+
+ // Copy the statistics
+ public GeospatialStatistics copy() {
+ return new GeospatialStatistics(
+ crs,
+ metadata,
+ boundingBox != null ? boundingBox.copy() : null,
+ geometryTypes != null ? geometryTypes.copy() : null);
+ }
+
+ @Override
+ public String toString() {
+ return "GeospatialStatistics{" + "boundingBox=" + boundingBox + ", coverings=" + geometryTypes + '}';
+ }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java
index 86099717df..5a7931a1c6 100644
--- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java
@@ -21,6 +21,7 @@
import java.nio.ByteBuffer;
import java.util.List;
import java.util.PrimitiveIterator;
+import org.apache.parquet.column.statistics.geometry.GeospatialStatistics;
import org.apache.parquet.filter2.predicate.FilterPredicate.Visitor;
import org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter;
@@ -71,4 +72,12 @@ default List getRepetitionLevelHistogram() {
default List getDefinitionLevelHistogram() {
throw new UnsupportedOperationException("Definition level histogram is not implemented");
}
+
+ /**
+ * @return the unmodifiable list of the geometry statistics for each page;
+ * used for converting to the related thrift object
+ */
+ default List getGeometryStatistics() {
+ throw new UnsupportedOperationException("Geospatial statistics is not implemented");
+ }
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
index e78b2ceae1..e67fddeeae 100644
--- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
@@ -38,8 +38,10 @@
import java.util.Set;
import java.util.function.IntPredicate;
import org.apache.parquet.column.MinMax;
+import org.apache.parquet.column.statistics.BinaryStatistics;
import org.apache.parquet.column.statistics.SizeStatistics;
import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.column.statistics.geometry.GeospatialStatistics;
import org.apache.parquet.filter2.predicate.Operators.And;
import org.apache.parquet.filter2.predicate.Operators.Contains;
import org.apache.parquet.filter2.predicate.Operators.Eq;
@@ -56,6 +58,7 @@
import org.apache.parquet.filter2.predicate.Operators.UserDefined;
import org.apache.parquet.filter2.predicate.UserDefinedPredicate;
import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveComparator;
import org.apache.parquet.schema.PrimitiveStringifier;
import org.apache.parquet.schema.PrimitiveType;
@@ -105,6 +108,8 @@ int translate(int arrayIndex) {
private long[] repLevelHistogram;
// might be null
private long[] defLevelHistogram;
+ // might be null
+ private GeospatialStatistics[] geospatialStatistics;
static String truncate(String str) {
if (str.length() <= MAX_VALUE_LENGTH_FOR_TOSTRING) {
@@ -200,6 +205,17 @@ private String formatHistogram(long[] histogram, int pageIndex) {
return TOSTRING_MISSING_VALUE_MARKER;
}
+ @Override
+ public List getGeometryStatistics() {
+ List geomStats = new ArrayList<>();
+ if (geospatialStatistics != null) {
+ for (GeospatialStatistics stats : geospatialStatistics) {
+ geomStats.add(stats.copy());
+ }
+ }
+ return geomStats;
+ }
+
@Override
public String toString() {
try (Formatter formatter = new Formatter()) {
@@ -521,6 +537,7 @@ public long getMinMaxSize() {
private int nextPageIndex;
private LongList repLevelHistogram = new LongArrayList();
private LongList defLevelHistogram = new LongArrayList();
+ private List geospatialStatistics = new ArrayList<>();
/**
* @return a no-op builder that does not collect statistics objects and therefore returns {@code null} at
@@ -611,10 +628,52 @@ public static ColumnIndex build(
List maxValues,
List repLevelHistogram,
List defLevelHistogram) {
+ return build(type, boundaryOrder, nullPages, nullCounts, minValues, maxValues, null, null, null);
+ }
+
+ /**
+ * @param type
+ * the primitive type
+ * @param boundaryOrder
+ * the boundary order of the min/max values
+ * @param nullPages
+ * the null pages (one boolean value for each page that signifies whether the page consists of nulls
+ * entirely)
+ * @param nullCounts
+ * the number of null values for each page
+ * @param minValues
+ * the min values for each page
+ * @param maxValues
+ * the max values for each page
+ * @param repLevelHistogram
+ * the repetition level histogram for all levels of each page
+ * @param defLevelHistogram
+ * the definition level histogram for all levels of each page
+ * @param geospatialStatistics
+ * the geometry statistics for each page (apply to GEOMETRY logical type only)
+ * @return the newly created {@link ColumnIndex} object based on the specified arguments
+ */
+ public static ColumnIndex build(
+ PrimitiveType type,
+ BoundaryOrder boundaryOrder,
+ List nullPages,
+ List nullCounts,
+ List minValues,
+ List maxValues,
+ List repLevelHistogram,
+ List defLevelHistogram,
+ List geospatialStatistics) {
ColumnIndexBuilder builder = createNewBuilder(type, Integer.MAX_VALUE);
- builder.fill(nullPages, nullCounts, minValues, maxValues, repLevelHistogram, defLevelHistogram);
+ builder.fill(
+ nullPages,
+ nullCounts,
+ minValues,
+ maxValues,
+ repLevelHistogram,
+ defLevelHistogram,
+ geospatialStatistics);
ColumnIndexBase> columnIndex = builder.build(type);
columnIndex.boundaryOrder = requireNonNull(boundaryOrder);
return columnIndex;
@@ -662,6 +721,16 @@ public void add(Statistics> stats, SizeStatistics sizeStats) {
defLevelHistogram = null;
}
+ if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) {
+ assert stats instanceof BinaryStatistics;
+ BinaryStatistics binaryStats = (BinaryStatistics) stats;
+ if (geospatialStatistics != null && binaryStats.getGeospatialStatistics() != null) {
+ geospatialStatistics.add(binaryStats.getGeospatialStatistics());
+ } else {
+ geospatialStatistics = null;
+ }
+ }
+
++nextPageIndex;
}
@@ -675,7 +744,8 @@ private void fill(
List minValues,
List maxValues,
List repLevelHistogram,
- List defLevelHistogram) {
+ List defLevelHistogram,
+ List geospatialStatistics) {
clear();
int pageCount = nullPages.size();
if ((nullCounts != null && nullCounts.size() != pageCount)
@@ -722,6 +792,9 @@ private void fill(
if (defLevelHistogram != null) {
this.defLevelHistogram.addAll(defLevelHistogram);
}
+ if (geospatialStatistics != null) {
+ this.geospatialStatistics.addAll(geospatialStatistics);
+ }
}
/**
@@ -758,6 +831,10 @@ private ColumnIndexBase> build(PrimitiveType type) {
if (defLevelHistogram != null && !defLevelHistogram.isEmpty()) {
columnIndex.defLevelHistogram = defLevelHistogram.toLongArray();
}
+ if (geospatialStatistics != null && !geospatialStatistics.isEmpty()) {
+ columnIndex.geospatialStatistics = new GeospatialStatistics[geospatialStatistics.size()];
+ geospatialStatistics.toArray(columnIndex.geospatialStatistics);
+ }
return columnIndex;
}
@@ -804,6 +881,7 @@ private void clear() {
pageIndexes.clear();
repLevelHistogram.clear();
defLevelHistogram.clear();
+ geospatialStatistics.clear();
}
abstract void clearMinMax();
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 05629dd388..25ba540cc4 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -33,6 +33,7 @@
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER;
+import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -43,6 +44,10 @@
import org.apache.parquet.Preconditions;
public abstract class LogicalTypeAnnotation {
+
+ // TODO: Move this to an external configuration
+ public static final String DEFAULT_GEOMETRY_CRS = "OGC:CRS84";
+
enum LogicalTypeToken {
MAP {
@Override
@@ -146,6 +151,33 @@ protected LogicalTypeAnnotation fromString(List params) {
protected LogicalTypeAnnotation fromString(List params) {
return float16Type();
}
+ },
+ GEOMETRY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List params) {
+ if (params.size() < 1) {
+ throw new RuntimeException(
+ "Expecting at least 1 parameter for geometry logical type, got " + params.size());
+ }
+ String crs = params.size() > 0 ? params.get(0) : null;
+ ByteBuffer metadata =
+ params.size() > 2 ? ByteBuffer.wrap(params.get(2).getBytes()) : null;
+ return geometryType(crs, metadata);
+ }
+ },
+ GEOGRAPHY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List params) {
+ if (params.size() < 1) {
+ throw new RuntimeException(
+ "Expecting at least 1 parameter for geography logical type, got " + params.size());
+ }
+ String crs = params.size() > 0 ? params.get(0) : null;
+ String edgeAlgorithm = params.size() > 1 ? params.get(1) : null;
+ ByteBuffer metadata =
+ params.size() > 2 ? ByteBuffer.wrap(params.get(2).getBytes()) : null;
+ return geographyType(crs, edgeAlgorithm, metadata);
+ }
};
protected abstract LogicalTypeAnnotation fromString(List params);
@@ -316,6 +348,22 @@ public static Float16LogicalTypeAnnotation float16Type() {
return Float16LogicalTypeAnnotation.INSTANCE;
}
+ public static GeometryLogicalTypeAnnotation geometryType(String crs, ByteBuffer metadata) {
+ return new GeometryLogicalTypeAnnotation(crs, metadata);
+ }
+
+ public static GeometryLogicalTypeAnnotation geometryType(ByteBuffer metadata) {
+ return new GeometryLogicalTypeAnnotation(DEFAULT_GEOMETRY_CRS, metadata);
+ }
+
+ public static GeometryLogicalTypeAnnotation geometryType() {
+ return new GeometryLogicalTypeAnnotation(DEFAULT_GEOMETRY_CRS, null);
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType(String crs, String edgeAlgorithm, ByteBuffer metadata) {
+ return new GeographyLogicalTypeAnnotation(crs, edgeAlgorithm, metadata);
+ }
+
public static class StringLogicalTypeAnnotation extends LogicalTypeAnnotation {
private static final StringLogicalTypeAnnotation INSTANCE = new StringLogicalTypeAnnotation();
@@ -1091,6 +1139,156 @@ public int hashCode() {
}
}
+ public static class GeometryLogicalTypeAnnotation extends LogicalTypeAnnotation {
+ private final String crs;
+ private final ByteBuffer metadata;
+
+ private GeometryLogicalTypeAnnotation(String crs, ByteBuffer metadata) {
+ this.crs = crs;
+ this.metadata = metadata;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOMETRY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("(");
+ sb.append(",");
+ if (crs != null && !crs.isEmpty()) {
+ sb.append(",");
+ sb.append(crs);
+ }
+ if (metadata != null) {
+ sb.append(",");
+ sb.append(metadata);
+ }
+ sb.append(")");
+ return sb.toString();
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ public ByteBuffer getMetadata() {
+ return metadata;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeometryLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation) obj;
+ return crs.equals(other.crs);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
+ public static class GeographyLogicalTypeAnnotation extends LogicalTypeAnnotation {
+ private final String crs;
+ private final String edgeAlgorithm;
+ private final ByteBuffer metadata;
+
+ private GeographyLogicalTypeAnnotation(String crs, String edgeAlgorithm, ByteBuffer metadata) {
+ this.crs = crs;
+ this.edgeAlgorithm = edgeAlgorithm;
+ this.metadata = metadata;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOMETRY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("(");
+ sb.append(",");
+ if (crs != null && !crs.isEmpty()) {
+ sb.append(",");
+ sb.append(crs);
+ }
+ if (edgeAlgorithm != null) {
+ sb.append(",");
+ sb.append(edgeAlgorithm);
+ }
+ if (metadata != null) {
+ sb.append(",");
+ sb.append(metadata);
+ }
+ sb.append(")");
+ return sb.toString();
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ public String getEdgeAlgorithm() {
+ return edgeAlgorithm;
+ }
+
+ public ByteBuffer getMetadata() {
+ return metadata;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeometryLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation) obj;
+ return crs.equals(other.crs);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
/**
* Implement this interface to visit a logical type annotation in the schema.
* The default implementation for each logical type specific visitor method is empty.
@@ -1162,5 +1360,13 @@ default Optional visit(MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
default Optional visit(Float16LogicalTypeAnnotation float16LogicalType) {
return empty();
}
+
+ default Optional visit(GeometryLogicalTypeAnnotation geometryLogicalType) {
+ return empty();
+ }
+
+ default Optional visit(GeographyLogicalTypeAnnotation geographyLogicalType) {
+ return empty();
+ }
}
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
index c46e94367f..bb5c8a9474 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
@@ -35,6 +35,9 @@
import java.util.concurrent.TimeUnit;
import javax.naming.OperationNotSupportedException;
import org.apache.parquet.io.api.Binary;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.io.ParseException;
+import org.locationtech.jts.io.WKBReader;
/**
* Class that provides string representations for the primitive values. These string values are to be used for
@@ -449,4 +452,20 @@ String stringifyNotNull(Binary value) {
return Float16.toFloatString(value);
}
};
+
+ static final PrimitiveStringifier WKB_STRINGIFIER = new BinaryStringifierBase("WKB_STRINGIFIER") {
+
+ @Override
+ String stringifyNotNull(Binary value) {
+
+ Geometry geometry;
+ try {
+ WKBReader reader = new WKBReader();
+ geometry = reader.read(value.getBytesUnsafe());
+ return geometry.toText();
+ } catch (ParseException e) {
+ return BINARY_INVALID;
+ }
+ }
+ };
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index e74d7cde02..f08e20333d 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -271,6 +271,14 @@ public Optional visit(
LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
}
+
+ @Override
+ public Optional visit(
+ LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
+ // ColumnOrder is undefined for GEOMETRY logical type. Use the default comparator for
+ // now.
+ return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+ }
})
.orElseThrow(() -> new ShouldNeverHappenException(
"No comparator logic implemented for BINARY logical type: " + logicalType));
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index 5bc2f89f47..b69e3f7f6f 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -571,6 +571,18 @@ public Optional visit(
return checkBinaryPrimitiveType(enumLogicalType);
}
+ @Override
+ public Optional visit(
+ LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
+ return checkBinaryPrimitiveType(geometryLogicalType);
+ }
+
+ @Override
+ public Optional visit(
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) {
+ return checkBinaryPrimitiveType(geographyLogicalType);
+ }
+
private Optional checkFixedPrimitiveType(
int l, LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/statistics/geometry/BoundingBoxTest.java b/parquet-column/src/test/java/org/apache/parquet/column/statistics/geometry/BoundingBoxTest.java
new file mode 100644
index 0000000000..f0f01e5c86
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/statistics/geometry/BoundingBoxTest.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.statistics.geometry;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.GeometryFactory;
+import org.locationtech.jts.geom.Point;
+import org.locationtech.jts.geom.Polygon;
+
+public class BoundingBoxTest {
+
+ @Test
+ public void testUpdate() {
+ GeometryFactory geometryFactory = new GeometryFactory();
+ BoundingBox boundingBox = new BoundingBox();
+
+ // Create a 2D point
+ Point point2D = geometryFactory.createPoint(new Coordinate(10, 20));
+ boundingBox.update(point2D, "EPSG:4326");
+ Assert.assertEquals(10.0, boundingBox.getXMin(), 0.0);
+ Assert.assertEquals(10.0, boundingBox.getXMax(), 0.0);
+ Assert.assertEquals(20.0, boundingBox.getYMin(), 0.0);
+ Assert.assertEquals(20.0, boundingBox.getYMax(), 0.0);
+ }
+
+ @Test
+ public void testWraparound() {
+ GeometryFactory geometryFactory = new GeometryFactory();
+ BoundingBox boundingBox = new BoundingBox();
+
+ // Create a polygon near the antimeridian line
+ Coordinate[] coords1 = new Coordinate[] {
+ new Coordinate(170, 10), new Coordinate(175, 15), new Coordinate(170, 15), new Coordinate(170, 10)
+ };
+ Polygon polygon1 = geometryFactory.createPolygon(coords1);
+ boundingBox.update(polygon1, "EPSG:4326");
+ // Check if the wraparound is handled correctly
+ Assert.assertEquals(170.0, boundingBox.getXMin(), 0.0);
+ Assert.assertEquals(175.0, boundingBox.getXMax(), 0.0);
+ Assert.assertEquals(10.0, boundingBox.getYMin(), 0.0);
+ Assert.assertEquals(15.0, boundingBox.getYMax(), 0.0);
+
+ // Create an additional polygon crossing the antimeridian line
+ Coordinate[] coords2 = new Coordinate[] {
+ new Coordinate(175, -10), new Coordinate(-175, -5), new Coordinate(175, -5), new Coordinate(175, -10)
+ };
+ Polygon polygon2 = geometryFactory.createPolygon(coords2);
+
+ boundingBox.update(polygon2, "EPSG:4326");
+ // Check if the wraparound is handled correctly
+ Assert.assertEquals(175.0, boundingBox.getXMin(), 0.0);
+ Assert.assertEquals(-175.0, boundingBox.getXMax(), 0.0);
+ Assert.assertEquals(-10.0, boundingBox.getYMin(), 0.0);
+ Assert.assertEquals(15.0, boundingBox.getYMax(), 0.0);
+
+ // Create another polygon on the other side of the antimeridian line
+ Coordinate[] coords3 = new Coordinate[] {
+ new Coordinate(-170, 20), new Coordinate(-165, 25), new Coordinate(-170, 25), new Coordinate(-170, 20)
+ };
+ // longitude range: [-170, -165]
+ Polygon polygon3 = geometryFactory.createPolygon(coords3);
+ boundingBox.update(polygon3, "EPSG:4326");
+
+ // Check if the wraparound is handled correctly
+ Assert.assertEquals(175.0, boundingBox.getXMin(), 0.0);
+ Assert.assertEquals(-175.0, boundingBox.getXMax(), 0.0);
+ Assert.assertEquals(-10.0, boundingBox.getYMin(), 0.0);
+ Assert.assertEquals(25.0, boundingBox.getYMax(), 0.0);
+ }
+}
diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml
index d4aa4b42a7..48e1d639e9 100644
--- a/parquet-hadoop/pom.xml
+++ b/parquet-hadoop/pom.xml
@@ -135,6 +135,12 @@
jar
compile