Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ChunkedSliceOutput to chunk and buffer writes in parquet #18564

Merged
merged 3 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import io.airlift.compress.snappy.SnappyCompressor;
import io.airlift.compress.zstd.ZstdCompressor;
import io.airlift.slice.Slices;
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.format.CompressionCodec;

import java.io.ByteArrayOutputStream;
Expand Down Expand Up @@ -68,7 +67,7 @@ public ParquetDataOutput compress(byte[] input)
try (GZIPOutputStream outputStream = new GZIPOutputStream(byteArrayOutputStream)) {
outputStream.write(input, 0, input.length);
}
return createDataOutput(BytesInput.from(byteArrayOutputStream));
return createDataOutput(byteArrayOutputStream);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@

import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import org.apache.parquet.bytes.BytesInput;
import io.trino.plugin.base.io.ChunkedSliceOutput;

import java.io.ByteArrayOutputStream;
import java.io.IOException;

import static java.util.Objects.requireNonNull;
Expand All @@ -29,7 +30,7 @@ static ParquetDataOutput createDataOutput(Slice slice)
return new ParquetDataOutput()
{
@Override
public long size()
public int size()
{
return slice.length();
}
Expand All @@ -42,22 +43,41 @@ public void writeData(SliceOutput sliceOutput)
};
}

static ParquetDataOutput createDataOutput(BytesInput bytesInput)
static ParquetDataOutput createDataOutput(ChunkedSliceOutput chunkedSliceOutput)
{
requireNonNull(bytesInput, "bytesInput is null");
requireNonNull(chunkedSliceOutput, "chunkedSliceOutput is null");
return new ParquetDataOutput()
{
@Override
public long size()
public int size()
{
return bytesInput.size();
return chunkedSliceOutput.size();
}

@Override
public void writeData(SliceOutput sliceOutput)
raunaqmorarka marked this conversation as resolved.
Show resolved Hide resolved
{
chunkedSliceOutput.getSlices().forEach(sliceOutput::writeBytes);
raunaqmorarka marked this conversation as resolved.
Show resolved Hide resolved
}
};
}

static ParquetDataOutput createDataOutput(ByteArrayOutputStream byteArrayOutputStream)
{
requireNonNull(byteArrayOutputStream, "byteArrayOutputStream is null");
return new ParquetDataOutput()
{
@Override
public int size()
{
return byteArrayOutputStream.size();
}

@Override
public void writeData(SliceOutput sliceOutput)
{
try {
bytesInput.writeAllTo(sliceOutput);
byteArrayOutputStream.writeTo(sliceOutput);
}
catch (IOException e) {
throw new RuntimeException(e);
Expand All @@ -69,7 +89,7 @@ public void writeData(SliceOutput sliceOutput)
/**
* Number of bytes that will be written.
*/
long size();
int size();

/**
* Writes data to the output. The output must be exactly
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ public void close()
try (outputStream) {
columnWriters.forEach(ColumnWriter::close);
flush();
columnWriters = ImmutableList.of();
raunaqmorarka marked this conversation as resolved.
Show resolved Hide resolved
writeFooter();
}
bufferedBytes = 0;
Expand Down Expand Up @@ -298,7 +299,9 @@ private void flush()
if (rows == 0) {
// Avoid writing empty row groups as these are ignored by the reader
verify(
bufferDataList.stream().allMatch(buffer -> buffer.getData().size() == 0),
bufferDataList.stream()
.flatMap(bufferData -> bufferData.getData().stream())
.allMatch(dataOutput -> dataOutput.size() == 0),
"Buffer should be empty when there are no rows");
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import io.trino.parquet.writer.repdef.RepLevelWriterProvider;
import io.trino.parquet.writer.repdef.RepLevelWriterProviders;
import io.trino.parquet.writer.valuewriter.PrimitiveValueWriter;
import io.trino.plugin.base.io.ChunkedSliceOutput;
import jakarta.annotation.Nullable;
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.column.ColumnDescriptor;
Expand All @@ -35,7 +36,6 @@

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Expand All @@ -51,13 +51,14 @@
import static io.trino.parquet.writer.repdef.DefLevelWriterProvider.getRootDefinitionLevelWriter;
import static io.trino.parquet.writer.repdef.RepLevelWriterProvider.RepetitionLevelWriter;
import static io.trino.parquet.writer.repdef.RepLevelWriterProvider.getRootRepetitionLevelWriter;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;

public class PrimitiveColumnWriter
implements ColumnWriter
{
private static final int INSTANCE_SIZE = instanceSize(PrimitiveColumnWriter.class);
private static final int MINIMUM_OUTPUT_BUFFER_CHUNK_SIZE = 8 * 1024;
private static final int MAXIMUM_OUTPUT_BUFFER_CHUNK_SIZE = 2 * 1024 * 1024;

private final ColumnDescriptor columnDescriptor;
private final CompressionCodec compressionCodec;
Expand Down Expand Up @@ -86,7 +87,7 @@ public class PrimitiveColumnWriter

private final int maxDefinitionLevel;

private final List<ParquetDataOutput> pageBuffer = new ArrayList<>();
private final ChunkedSliceOutput compressedOutputStream;

@Nullable
private final ParquetCompressor compressor;
Expand All @@ -109,6 +110,7 @@ public PrimitiveColumnWriter(ColumnDescriptor columnDescriptor, PrimitiveValueWr
this.compressor = getCompressor(compressionCodec);
this.pageSizeThreshold = pageSizeThreshold;
this.columnStatistics = Statistics.createStats(columnDescriptor.getPrimitiveType());
this.compressedOutputStream = new ChunkedSliceOutput(MINIMUM_OUTPUT_BUFFER_CHUNK_SIZE, MAXIMUM_OUTPUT_BUFFER_CHUNK_SIZE);
}

@Override
Expand Down Expand Up @@ -200,36 +202,35 @@ private void flushCurrentPageToBuffer()
definitionLevelWriter.getBytes(),
primitiveValueWriter.getBytes())
.toByteArray();
long uncompressedSize = pageDataBytes.length;
int uncompressedSize = pageDataBytes.length;
ParquetDataOutput pageData = (compressor != null)
? compressor.compress(pageDataBytes)
: createDataOutput(Slices.wrappedBuffer(pageDataBytes));
long compressedSize = pageData.size();
int compressedSize = pageData.size();

Statistics<?> statistics = primitiveValueWriter.getStatistics();
statistics.incrementNumNulls(currentPageNullCounts);
columnStatistics.mergeStatistics(statistics);

ByteArrayOutputStream pageHeaderOutputStream = new ByteArrayOutputStream();
parquetMetadataConverter.writeDataPageV1Header(toIntExact(uncompressedSize),
toIntExact(compressedSize),
int writtenBytesSoFar = compressedOutputStream.size();
parquetMetadataConverter.writeDataPageV1Header(uncompressedSize,
compressedSize,
valueCount,
repetitionLevelWriter.getEncoding(),
definitionLevelWriter.getEncoding(),
primitiveValueWriter.getEncoding(),
pageHeaderOutputStream);
ParquetDataOutput pageHeader = createDataOutput(BytesInput.from(pageHeaderOutputStream));
compressedOutputStream);
int pageHeaderSize = compressedOutputStream.size() - writtenBytesSoFar;

dataPagesWithEncoding.merge(parquetMetadataConverter.getEncoding(primitiveValueWriter.getEncoding()), 1, Integer::sum);

// update total stats
totalUnCompressedSize += pageHeader.size() + uncompressedSize;
long pageCompressedSize = pageHeader.size() + compressedSize;
totalUnCompressedSize += pageHeaderSize + uncompressedSize;
int pageCompressedSize = pageHeaderSize + compressedSize;
totalCompressedSize += pageCompressedSize;
totalValues += valueCount;

pageBuffer.add(pageHeader);
pageBuffer.add(pageData);
pageData.writeData(compressedOutputStream);
pageBufferedBytes += pageCompressedSize;

// Add encoding should be called after ValuesWriter#getBytes() and before ValuesWriter#reset()
Expand All @@ -250,30 +251,30 @@ private void flushCurrentPageToBuffer()
private List<ParquetDataOutput> getDataStreams()
throws IOException
{
List<ParquetDataOutput> dictPage = new ArrayList<>();
ImmutableList.Builder<ParquetDataOutput> outputs = ImmutableList.builder();
if (valueCount > 0) {
flushCurrentPageToBuffer();
}
// write dict page if possible
DictionaryPage dictionaryPage = primitiveValueWriter.toDictPageAndClose();
if (dictionaryPage != null) {
long uncompressedSize = dictionaryPage.getUncompressedSize();
int uncompressedSize = dictionaryPage.getUncompressedSize();
byte[] pageBytes = dictionaryPage.getBytes().toByteArray();
ParquetDataOutput pageData = compressor != null
? compressor.compress(pageBytes)
: createDataOutput(Slices.wrappedBuffer(pageBytes));
long compressedSize = pageData.size();
int compressedSize = pageData.size();

ByteArrayOutputStream dictStream = new ByteArrayOutputStream();
parquetMetadataConverter.writeDictionaryPageHeader(
toIntExact(uncompressedSize),
raunaqmorarka marked this conversation as resolved.
Show resolved Hide resolved
toIntExact(compressedSize),
uncompressedSize,
compressedSize,
dictionaryPage.getDictionarySize(),
dictionaryPage.getEncoding(),
dictStream);
ParquetDataOutput pageHeader = createDataOutput(BytesInput.from(dictStream));
dictPage.add(pageHeader);
dictPage.add(pageData);
ParquetDataOutput pageHeader = createDataOutput(dictStream);
outputs.add(pageHeader);
outputs.add(pageData);
totalCompressedSize += pageHeader.size() + compressedSize;
totalUnCompressedSize += pageHeader.size() + uncompressedSize;
dictionaryPagesWithEncoding.merge(new ParquetMetadataConverter().getEncoding(dictionaryPage.getEncoding()), 1, Integer::sum);
Expand All @@ -282,10 +283,8 @@ private List<ParquetDataOutput> getDataStreams()
}
getDataStreamsCalled = true;

return ImmutableList.<ParquetDataOutput>builder()
.addAll(dictPage)
.addAll(pageBuffer)
.build();
outputs.add(createDataOutput(compressedOutputStream));
return outputs.build();
}

@Override
Expand All @@ -298,6 +297,7 @@ public long getBufferedBytes()
public long getRetainedBytes()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it called after a flush? (is there something like a flush here)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no "flush" here, there is a "close" followed by a getBuffer to extract the buffered pages.
This is called by connector page sink for every writer after writing each page.

{
return INSTANCE_SIZE +
compressedOutputStream.getRetainedSize() +
raunaqmorarka marked this conversation as resolved.
Show resolved Hide resolved
primitiveValueWriter.getAllocatedSize() +
definitionLevelWriter.getAllocatedSize() +
repetitionLevelWriter.getAllocatedSize();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
Expand Down Expand Up @@ -66,11 +67,23 @@ private ParquetTestUtils() {}

public static Slice writeParquetFile(ParquetWriterOptions writerOptions, List<Type> types, List<String> columnNames, List<io.trino.spi.Page> inputPages)
throws IOException
{
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
ParquetWriter writer = createParquetWriter(outputStream, writerOptions, types, columnNames);

for (io.trino.spi.Page inputPage : inputPages) {
checkArgument(types.size() == inputPage.getChannelCount());
writer.write(inputPage);
}
writer.close();
return Slices.wrappedBuffer(outputStream.toByteArray());
}

public static ParquetWriter createParquetWriter(OutputStream outputStream, ParquetWriterOptions writerOptions, List<Type> types, List<String> columnNames)
{
checkArgument(types.size() == columnNames.size());
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(types, columnNames, false, false);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
ParquetWriter writer = new ParquetWriter(
return new ParquetWriter(
outputStream,
schemaConverter.getMessageType(),
schemaConverter.getPrimitiveTypes(),
Expand All @@ -80,13 +93,6 @@ public static Slice writeParquetFile(ParquetWriterOptions writerOptions, List<Ty
false,
Optional.of(DateTimeZone.getDefault()),
Optional.empty());

for (io.trino.spi.Page inputPage : inputPages) {
checkArgument(types.size() == inputPage.getChannelCount());
writer.write(inputPage);
}
writer.close();
return Slices.wrappedBuffer(outputStream.toByteArray());
}

public static ParquetReader createParquetReader(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@
import org.apache.parquet.schema.PrimitiveType;
import org.testng.annotations.Test;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext;
import static io.trino.parquet.ParquetTestUtils.createParquetWriter;
import static io.trino.parquet.ParquetTestUtils.generateInputPages;
import static io.trino.parquet.ParquetTestUtils.writeParquetFile;
import static io.trino.spi.type.BigintType.BIGINT;
Expand Down Expand Up @@ -143,4 +146,33 @@ public void testColumnReordering()
assertThat(offsets).isSorted();
}
}

@Test
public void testWriterMemoryAccounting()
throws IOException
{
List<String> columnNames = ImmutableList.of("columnA", "columnB");
List<Type> types = ImmutableList.of(INTEGER, INTEGER);

ParquetWriter writer = createParquetWriter(
new ByteArrayOutputStream(),
ParquetWriterOptions.builder()
.setMaxPageSize(DataSize.ofBytes(1024))
.build(),
types,
columnNames);
List<io.trino.spi.Page> inputPages = generateInputPages(types, 1000, 100);

long previousRetainedBytes = 0;
for (io.trino.spi.Page inputPage : inputPages) {
checkArgument(types.size() == inputPage.getChannelCount());
writer.write(inputPage);
long currentRetainedBytes = writer.getRetainedBytes();
assertThat(currentRetainedBytes).isGreaterThanOrEqualTo(previousRetainedBytes);
previousRetainedBytes = currentRetainedBytes;
}
assertThat(previousRetainedBytes).isGreaterThanOrEqualTo(2 * Integer.BYTES * 1000 * 100);
writer.close();
assertThat(previousRetainedBytes - writer.getRetainedBytes()).isGreaterThanOrEqualTo(2 * Integer.BYTES * 1000 * 100);
}
}