Skip to content

Commit

Permalink
Use direct dependency on Parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
electrum committed Jul 28, 2023
1 parent 82b3ace commit 4bb2d7f
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 41 deletions.
14 changes: 14 additions & 0 deletions lib/trino-hive-formats/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,26 @@
<scope>provided</scope>
</dependency>

<!-- TODO: remove after making Avro use Aircompressor -->
<dependency>
<groupId>com.github.luben</groupId>
<artifactId>zstd-jni</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-hadoop-toolkit</artifactId>
<scope>runtime</scope>
</dependency>

<!-- TODO: remove after making Avro use Aircompressor -->
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>testing</artifactId>
Expand Down
36 changes: 31 additions & 5 deletions lib/trino-parquet/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@
<artifactId>trino-plugin-toolkit</artifactId>
</dependency>

<dependency>
<groupId>io.trino.hive</groupId>
<artifactId>hive-apache</artifactId>
</dependency>

<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
Expand All @@ -71,6 +66,31 @@
<artifactId>joda-time</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-common</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-encoding</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-format-structures</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-spi</artifactId>
Expand Down Expand Up @@ -137,6 +157,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.trino.hive</groupId>
<artifactId>hive-apache</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.trino.tpch</groupId>
<artifactId>tpch</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
import static java.lang.Boolean.TRUE;
import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.WRITER_TIMEZONE;
import static org.apache.parquet.format.Util.readFileMetaData;
import static org.apache.parquet.format.converter.ParquetMetadataConverterUtil.getLogicalTypeAnnotation;

Expand Down Expand Up @@ -401,7 +400,7 @@ private static void validateFileMetadata(ParquetDataSourceId dataSourceId, org.a
ParquetWriteValidation writeValidation = parquetWriteValidation.get();
writeValidation.validateTimeZone(
dataSourceId,
Optional.ofNullable(fileMetaData.getKeyValueMetaData().get(WRITER_TIMEZONE)));
Optional.ofNullable(fileMetaData.getKeyValueMetaData().get("writer.time.zone")));
writeValidation.validateColumns(dataSourceId, fileMetaData.getSchema());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
import static java.nio.charset.StandardCharsets.US_ASCII;
import static java.util.Collections.nCopies;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.WRITER_TIMEZONE;
import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0;

public class ParquetWriter
Expand Down Expand Up @@ -349,7 +348,7 @@ Slice getFooter(List<RowGroup> rowGroups, MessageType messageType)
fileMetaData.setSchema(MessageTypeConverter.toParquetSchema(messageType));
// Added based on org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport
parquetTimeZone.ifPresent(dateTimeZone -> fileMetaData.setKey_value_metadata(
ImmutableList.of(new KeyValue(WRITER_TIMEZONE).setValue(dateTimeZone.getID()))));
ImmutableList.of(new KeyValue("writer.time.zone").setValue(dateTimeZone.getID()))));
long totalRows = rowGroups.stream().mapToLong(RowGroup::getNum_rows).sum();
fileMetaData.setNum_rows(totalRows);
fileMetaData.setRow_groups(ImmutableList.copyOf(rowGroups));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.parquet.format.converter;

import org.apache.parquet.format.ConvertedType;
import org.apache.parquet.format.LogicalType;
import org.apache.parquet.format.SchemaElement;
import org.apache.parquet.schema.LogicalTypeAnnotation;

public final class ParquetMetadataConverterUtil
{
private ParquetMetadataConverterUtil() {}

public static LogicalTypeAnnotation getLogicalTypeAnnotation(ParquetMetadataConverter converter, ConvertedType type, SchemaElement element)
{
return converter.getLogicalTypeAnnotation(type, element);
}

public static LogicalTypeAnnotation getLogicalTypeAnnotation(ParquetMetadataConverter converter, LogicalType type)
{
return converter.getLogicalTypeAnnotation(type);
}

public static LogicalType convertToLogicalType(ParquetMetadataConverter converter, LogicalTypeAnnotation annotation)
{
return converter.convertToLogicalType(annotation);
}
}
25 changes: 20 additions & 5 deletions plugin/trino-delta-lake/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,6 @@
<artifactId>hadoop-apache</artifactId>
</dependency>

<dependency>
<groupId>io.trino.hive</groupId>
<artifactId>hive-apache</artifactId>
</dependency>

<dependency>
<groupId>jakarta.annotation</groupId>
<artifactId>jakarta.annotation-api</artifactId>
Expand All @@ -184,6 +179,26 @@
<artifactId>antlr4-runtime</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-common</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-format-structures</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
</dependency>

<dependency>
<groupId>org.roaringbitmap</groupId>
<artifactId>RoaringBitmap</artifactId>
Expand Down
26 changes: 20 additions & 6 deletions plugin/trino-hive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,26 @@
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-common</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-format-structures</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
</dependency>

<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
Expand Down Expand Up @@ -326,12 +346,6 @@
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>testing</artifactId>
Expand Down
23 changes: 17 additions & 6 deletions plugin/trino-hudi/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@
<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-hive</artifactId>
<exclusions>
<!-- TODO: remove when removed from trino-hive -->
<exclusion>
<groupId>io.trino.hive</groupId>
<artifactId>hive-apache</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
Expand All @@ -110,11 +117,6 @@
<artifactId>trino-plugin-toolkit</artifactId>
</dependency>

<dependency>
<groupId>io.trino.hive</groupId>
<artifactId>hive-apache</artifactId>
</dependency>

<dependency>
<groupId>jakarta.annotation</groupId>
<artifactId>jakarta.annotation-api</artifactId>
Expand All @@ -135,6 +137,16 @@
<artifactId>avro</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
</dependency>

<dependency>
<groupId>org.weakref</groupId>
<artifactId>jmxutils</artifactId>
Expand Down Expand Up @@ -396,5 +408,4 @@
</plugin>
</plugins>
</build>

</project>
43 changes: 38 additions & 5 deletions plugin/trino-iceberg/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@
<groupId>io.airlift</groupId>
<artifactId>http-client</artifactId>
</exclusion>
<!-- TODO: remove when removed from trino-hive -->
<exclusion>
<groupId>io.trino.hive</groupId>
<artifactId>hive-apache</artifactId>
</exclusion>
</exclusions>
</dependency>

Expand All @@ -166,11 +171,6 @@
<artifactId>trino-plugin-toolkit</artifactId>
</dependency>

<dependency>
<groupId>io.trino.hive</groupId>
<artifactId>hive-apache</artifactId>
</dependency>

<dependency>
<groupId>io.trino.hive</groupId>
<artifactId>hive-thrift</artifactId>
Expand Down Expand Up @@ -234,6 +234,26 @@
<artifactId>iceberg-parquet</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-common</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-format-structures</artifactId>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
</dependency>

<dependency>
<groupId>org.jdbi</groupId>
<artifactId>jdbi3-core</artifactId>
Expand Down Expand Up @@ -559,6 +579,19 @@
</dependencies>

<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<configuration>
<ignoredNonTestScopedDependencies>
<ignoredNonTestScopedDependency>org.apache.parquet:parquet-common</ignoredNonTestScopedDependency>
</ignoredNonTestScopedDependencies>
</configuration>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.antlr</groupId>
Expand Down
Loading

0 comments on commit 4bb2d7f

Please sign in to comment.