From decd761fe86b38bc7ba7674f92e9d5ad50eb3c69 Mon Sep 17 00:00:00 2001 From: Raunaq Morarka Date: Wed, 10 May 2023 11:46:06 +0530 Subject: [PATCH] Fix reporting of row group size by parquet writer Output of parquet-tools Before Row group 25: count: 2240892 26.32 B records start: 1479039171 total(compressed): 56.243 MB total(uncompressed):56.243 MB After Row group 25: count: 2244256 26.34 B records start: 1479178837 total(compressed): 56.370 MB total(uncompressed):167.418 MB --- .../src/main/java/io/trino/parquet/writer/ParquetWriter.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java index ad519113224..31ed9c3bd1d 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java @@ -374,9 +374,10 @@ Slice getFooter(List rowGroups, MessageType messageType) private void updateRowGroups(List columnMetaData) { - long totalBytes = columnMetaData.stream().mapToLong(ColumnMetaData::getTotal_compressed_size).sum(); + long totalCompressedBytes = columnMetaData.stream().mapToLong(ColumnMetaData::getTotal_compressed_size).sum(); + long totalBytes = columnMetaData.stream().mapToLong(ColumnMetaData::getTotal_uncompressed_size).sum(); ImmutableList columnChunks = columnMetaData.stream().map(ParquetWriter::toColumnChunk).collect(toImmutableList()); - rowGroupBuilder.add(new RowGroup(columnChunks, totalBytes, rows)); + rowGroupBuilder.add(new RowGroup(columnChunks, totalBytes, rows).setTotal_compressed_size(totalCompressedBytes)); } private static org.apache.parquet.format.ColumnChunk toColumnChunk(ColumnMetaData metaData)