diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java b/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java index d8818c559983..330461f157fc 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java @@ -21,8 +21,8 @@ import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.PrimitiveColumnIO; -import org.apache.parquet.schema.DecimalMetadata; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; import javax.annotation.Nullable; @@ -34,7 +34,6 @@ import java.util.Optional; import static com.google.common.base.Preconditions.checkArgument; -import static org.apache.parquet.schema.OriginalType.DECIMAL; import static org.apache.parquet.schema.Type.Repetition.REPEATED; public final class ParquetTypeUtils @@ -80,7 +79,7 @@ public static ColumnIO getArrayElementColumn(ColumnIO columnIO) * } */ if (columnIO instanceof GroupColumnIO && - columnIO.getType().getOriginalType() == null && + columnIO.getType().getLogicalTypeAnnotation() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { @@ -221,11 +220,11 @@ public static ColumnIO lookupColumnById(GroupColumnIO groupColumnIO, int columnI public static Optional createDecimalType(RichColumnDescriptor descriptor) { - if (descriptor.getPrimitiveType().getOriginalType() != DECIMAL) { + if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof DecimalLogicalTypeAnnotation) { return Optional.empty(); } - DecimalMetadata decimalMetadata = descriptor.getPrimitiveType().getDecimalMetadata(); - return Optional.of(DecimalType.createDecimalType(decimalMetadata.getPrecision(), decimalMetadata.getScale())); + DecimalLogicalTypeAnnotation decimalLogicalType = (DecimalLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation(); + return Optional.of(DecimalType.createDecimalType(decimalLogicalType.getPrecision(), decimalLogicalType.getScale())); } /** diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java index 6eba0774a0e8..2386082331ff 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java @@ -35,8 +35,8 @@ import org.apache.parquet.internal.filter2.columnindex.RowRanges; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.TimeLogicalTypeAnnotation; import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.joda.time.DateTimeZone; @@ -105,13 +105,16 @@ public static PrimitiveColumnReader createReader(RichColumnDescriptor descriptor case INT32: return createDecimalColumnReader(descriptor).orElse(new IntColumnReader(descriptor)); case INT64: - if (descriptor.getPrimitiveType().getOriginalType() == OriginalType.TIME_MICROS) { + if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof TimeLogicalTypeAnnotation && + ((TimeLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation()).getUnit() == LogicalTypeAnnotation.TimeUnit.MICROS) { return new TimeMicrosColumnReader(descriptor); } - if (descriptor.getPrimitiveType().getOriginalType() == OriginalType.TIMESTAMP_MICROS) { + if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof TimestampLogicalTypeAnnotation && + ((TimestampLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation()).getUnit() == LogicalTypeAnnotation.TimeUnit.MICROS) { return new TimestampMicrosColumnReader(descriptor); } - if (descriptor.getPrimitiveType().getOriginalType() == OriginalType.TIMESTAMP_MILLIS) { + if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof TimestampLogicalTypeAnnotation && + ((TimestampLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation()).getUnit() == LogicalTypeAnnotation.TimeUnit.MILLIS) { return new Int64TimestampMillisColumnReader(descriptor); } if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof TimestampLogicalTypeAnnotation && diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetSchemaConverter.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetSchemaConverter.java index 1c5d07fe3eb6..c3a51f8a71ed 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetSchemaConverter.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetSchemaConverter.java @@ -28,7 +28,6 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type.Repetition; import org.apache.parquet.schema.Types; @@ -143,7 +142,7 @@ private org.apache.parquet.schema.Type getPrimitiveType(Type type, String name, .named(name); } if (DATE.equals(type)) { - return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name); + return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(LogicalTypeAnnotation.dateType()).named(name); } if (BIGINT.equals(type)) { return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition).named(name); diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java index 804c75e74ea5..72b6153f688d 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java @@ -15,8 +15,8 @@ import com.google.common.collect.Lists; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; @@ -24,8 +24,6 @@ import java.util.List; import static com.google.common.base.Preconditions.checkArgument; -import static org.apache.parquet.schema.OriginalType.LIST; -import static org.apache.parquet.schema.OriginalType.MAP; import static org.apache.parquet.schema.Type.Repetition.REPEATED; // Code from iceberg @@ -44,8 +42,8 @@ else if (type.isPrimitive()) { else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); - OriginalType annotation = group.getOriginalType(); - if (annotation == LIST) { + LogicalTypeAnnotation annotation = group.getLogicalTypeAnnotation(); + if (LogicalTypeAnnotation.listType().equals(annotation)) { checkArgument(!group.isRepetition(REPEATED), "Invalid list: top-level group is repeated: " + group); checkArgument(group.getFieldCount() == 1, @@ -70,7 +68,7 @@ else if (type.isPrimitive()) { visitor.fieldNames.pop(); } } - else if (annotation == MAP) { + else if (LogicalTypeAnnotation.mapType().equals(annotation)) { checkArgument(!group.isRepetition(REPEATED), "Invalid map: top-level group is repeated: " + group); checkArgument(group.getFieldCount() == 1, diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriters.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriters.java index 8909ea0d3b57..dcda643e2301 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriters.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriters.java @@ -47,9 +47,9 @@ import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.TimeLogicalTypeAnnotation; import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.joda.time.DateTimeZone; @@ -215,7 +215,7 @@ private static PrimitiveValueWriter getValueWriter(ValuesWriter valuesWriter, Ty return new DateValueWriter(valuesWriter, parquetType); } if (TIME_MICROS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIME_MICROS); + verifyParquetType(type, parquetType, TimeLogicalTypeAnnotation.class, isTime(LogicalTypeAnnotation.TimeUnit.MICROS)); return new TimeMicrosValueWriter(valuesWriter, parquetType); } if (type instanceof TimestampType) { @@ -224,28 +224,23 @@ private static PrimitiveValueWriter getValueWriter(ValuesWriter valuesWriter, Ty return new Int96TimestampValueWriter(valuesWriter, type, parquetType, parquetTimeZone.get()); } if (TIMESTAMP_MILLIS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIMESTAMP_MILLIS); verifyParquetType(type, parquetType, TimestampLogicalTypeAnnotation.class, isTimestamp(LogicalTypeAnnotation.TimeUnit.MILLIS)); return new TimestampMillisValueWriter(valuesWriter, type, parquetType); } if (TIMESTAMP_MICROS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIMESTAMP_MICROS); verifyParquetType(type, parquetType, TimestampLogicalTypeAnnotation.class, isTimestamp(LogicalTypeAnnotation.TimeUnit.MICROS)); return new BigintValueWriter(valuesWriter, type, parquetType); } if (TIMESTAMP_NANOS.equals(type)) { - verifyParquetType(type, parquetType, (OriginalType) null); // no OriginalType for timestamp NANOS verifyParquetType(type, parquetType, TimestampLogicalTypeAnnotation.class, isTimestamp(LogicalTypeAnnotation.TimeUnit.NANOS)); return new TimestampNanosValueWriter(valuesWriter, type, parquetType); } } if (TIMESTAMP_TZ_MILLIS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIMESTAMP_MILLIS); return new TimestampTzMillisValueWriter(valuesWriter, parquetType); } if (TIMESTAMP_TZ_MICROS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIMESTAMP_MICROS); return new TimestampTzMicrosValueWriter(valuesWriter, parquetType); } if (DOUBLE.equals(type)) { @@ -264,11 +259,6 @@ private static PrimitiveValueWriter getValueWriter(ValuesWriter valuesWriter, Ty throw new TrinoException(NOT_SUPPORTED, format("Unsupported type for Parquet writer: %s", type)); } - private static void verifyParquetType(Type type, PrimitiveType parquetType, OriginalType originalType) - { - checkArgument(parquetType.getOriginalType() == originalType, "Wrong Parquet type '%s' for Trino type '%s'", parquetType, type); - } - private static void verifyParquetType(Type type, PrimitiveType parquetType, Class annotationType, Predicate predicate) { checkArgument( @@ -277,6 +267,14 @@ private static void verifyParquetType(Type type, PrimitiveType parquetType, "Wrong Parquet type '%s' for Trino type '%s'", parquetType, type); } + private static Predicate isTime(LogicalTypeAnnotation.TimeUnit precision) + { + requireNonNull(precision, "precision is null"); + return annotation -> annotation.getUnit() == precision && + // isAdjustedToUTC=false indicates Local semantics (timestamps not normalized to UTC) + !annotation.isAdjustedToUTC(); + } + private static Predicate isTimestamp(LogicalTypeAnnotation.TimeUnit precision) { requireNonNull(precision, "precision is null"); diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnIndexBuilder.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnIndexBuilder.java index b0f8edb19d32..d6944356fc01 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnIndexBuilder.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnIndexBuilder.java @@ -24,6 +24,7 @@ import org.apache.parquet.internal.column.columnindex.ColumnIndex; import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder; import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Types; import org.testng.annotations.Test; @@ -51,9 +52,6 @@ import static org.apache.parquet.filter2.predicate.FilterApi.notEq; import static org.apache.parquet.filter2.predicate.FilterApi.userDefined; import static org.apache.parquet.filter2.predicate.LogicalInverter.invert; -import static org.apache.parquet.schema.OriginalType.DECIMAL; -import static org.apache.parquet.schema.OriginalType.UINT_8; -import static org.apache.parquet.schema.OriginalType.UTF8; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; @@ -265,7 +263,7 @@ public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics fileCreatedBy, St if (max != null) { statistics.setMax(max.getBytes(UTF_8)); } - assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8))) + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("Test column"))) .isInstanceOfSatisfying(BinaryStatistics.class, columnStatistics -> { assertFalse(columnStatistics.isEmpty()); @@ -437,7 +438,7 @@ private void testReadStatsBinaryUtf8OldWriter(Optional fileCreatedBy, St @Test(dataProvider = "allCreatedBy") public void testReadStatsBinaryUtf8(Optional fileCreatedBy) { - PrimitiveType varchar = new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8); + PrimitiveType varchar = Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("Test column"); Statistics statistics; // Stats written by Parquet after https://issues.apache.org/jira/browse/PARQUET-1025 @@ -477,7 +478,7 @@ public void testReadNullStats(Optional fileCreatedBy) columnStatistics -> assertTrue(columnStatistics.isEmpty())); // varchar - assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8))) + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("Test column"))) .isInstanceOfSatisfying( BinaryStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty()));