diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/DataPage.java b/presto-parquet/src/main/java/io/prestosql/parquet/DataPage.java index a7be3adf163d..1d8765388f11 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/DataPage.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/DataPage.java @@ -18,9 +18,9 @@ public abstract class DataPage { protected final int valueCount; - public DataPage(int compressedSize, int uncompressedSize, int valueCount) + public DataPage(int uncompressedSize, int valueCount) { - super(compressedSize, uncompressedSize); + super(uncompressedSize); this.valueCount = valueCount; } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/DataPageV1.java b/presto-parquet/src/main/java/io/prestosql/parquet/DataPageV1.java index 6b5d6d07af29..6282e745774d 100755 --- a/presto-parquet/src/main/java/io/prestosql/parquet/DataPageV1.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/DataPageV1.java @@ -37,7 +37,7 @@ public DataPageV1( ParquetEncoding definitionLevelEncoding, ParquetEncoding valuesEncoding) { - super(slice.length(), uncompressedSize, valueCount); + super(uncompressedSize, valueCount); this.slice = requireNonNull(slice, "slice is null"); this.statistics = statistics; this.repetitionLevelEncoding = repetitionLevelEncoding; @@ -80,7 +80,6 @@ public String toString() .add("definitionLevelEncoding", definitionLevelEncoding) .add("valuesEncoding", valuesEncoding) .add("valueCount", valueCount) - .add("compressedSize", compressedSize) .add("uncompressedSize", uncompressedSize) .toString(); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/DataPageV2.java b/presto-parquet/src/main/java/io/prestosql/parquet/DataPageV2.java index 42dc6a82a887..bbc614b3a39b 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/DataPageV2.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/DataPageV2.java @@ -43,7 +43,7 @@ public DataPageV2( Statistics statistics, boolean isCompressed) { - super(repetitionLevels.length() + definitionLevels.length() + slice.length(), uncompressedSize, valueCount); + super(uncompressedSize, valueCount); this.rowCount = rowCount; this.nullCount = nullCount; this.repetitionLevels = requireNonNull(repetitionLevels, "repetitionLevels slice is null"); @@ -107,7 +107,6 @@ public String toString() .add("statistics", statistics) .add("isCompressed", isCompressed) .add("valueCount", valueCount) - .add("compressedSize", compressedSize) .add("uncompressedSize", uncompressedSize) .toString(); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/DictionaryPage.java b/presto-parquet/src/main/java/io/prestosql/parquet/DictionaryPage.java index fd6684ecaa88..9a011fd723b4 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/DictionaryPage.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/DictionaryPage.java @@ -15,10 +15,7 @@ import io.airlift.slice.Slice; -import java.util.Arrays; - import static com.google.common.base.MoreObjects.toStringHelper; -import static io.airlift.slice.Slices.wrappedBuffer; import static java.util.Objects.requireNonNull; public class DictionaryPage @@ -38,8 +35,8 @@ public DictionaryPage(Slice slice, int dictionarySize, ParquetEncoding encoding) public DictionaryPage(Slice slice, int uncompressedSize, int dictionarySize, ParquetEncoding encoding) { - super(requireNonNull(slice, "slice is null").length(), uncompressedSize); - this.slice = slice; + super(uncompressedSize); + this.slice = requireNonNull(slice, "slice is null"); this.dictionarySize = dictionarySize; this.encoding = requireNonNull(encoding, "encoding is null"); } @@ -59,11 +56,6 @@ public ParquetEncoding getEncoding() return encoding; } - public DictionaryPage copy() - { - return new DictionaryPage(wrappedBuffer(Arrays.copyOf(slice.getBytes(), slice.length())), getUncompressedSize(), dictionarySize, encoding); - } - @Override public String toString() { @@ -71,7 +63,6 @@ public String toString() .add("slice", slice) .add("dictionarySize", dictionarySize) .add("encoding", encoding) - .add("compressedSize", compressedSize) .add("uncompressedSize", uncompressedSize) .toString(); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/Page.java b/presto-parquet/src/main/java/io/prestosql/parquet/Page.java index b383ac268131..f5dff2a4a1d9 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/Page.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/Page.java @@ -15,20 +15,13 @@ public abstract class Page { - protected final int compressedSize; protected final int uncompressedSize; - public Page(int compressedSize, int uncompressedSize) + public Page(int uncompressedSize) { - this.compressedSize = compressedSize; this.uncompressedSize = uncompressedSize; } - public int getCompressedSize() - { - return compressedSize; - } - public int getUncompressedSize() { return uncompressedSize; diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/ParquetCompressionUtils.java b/presto-parquet/src/main/java/io/prestosql/parquet/ParquetCompressionUtils.java index 7ccf6bcf9b2b..bcbfe698434f 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/ParquetCompressionUtils.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/ParquetCompressionUtils.java @@ -31,6 +31,7 @@ import static io.airlift.slice.SizeOf.SIZE_OF_LONG; import static io.airlift.slice.Slices.EMPTY_SLICE; import static io.airlift.slice.Slices.wrappedBuffer; +import static java.lang.Math.toIntExact; import static java.util.Objects.requireNonNull; import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET; @@ -137,8 +138,7 @@ private static Slice decompressFramed(Decompressor decompressor, Slice input, in private static int decompress(Decompressor decompressor, Slice input, int inputOffset, int inputLength, byte[] output, int outputOffset) { byte[] byteArray = (byte[]) input.getBase(); - int byteArrayOffset = inputOffset + (int) (input.getAddress() - ARRAY_BYTE_BASE_OFFSET); - int size = decompressor.decompress(byteArray, byteArrayOffset, inputLength, output, outputOffset, output.length - outputOffset); - return size; + int byteArrayOffset = inputOffset + toIntExact(input.getAddress() - ARRAY_BYTE_BASE_OFFSET); + return decompressor.decompress(byteArray, byteArrayOffset, inputLength, output, outputOffset, output.length - outputOffset); } } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/ParquetTypeUtils.java b/presto-parquet/src/main/java/io/prestosql/parquet/ParquetTypeUtils.java index 7a2b077576d9..89b536c178aa 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/ParquetTypeUtils.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/ParquetTypeUtils.java @@ -238,7 +238,7 @@ public static long getShortDecimalValue(byte[] bytes) } for (int i = 0; i < bytes.length; i++) { - value |= ((long) bytes[bytes.length - i - 1] & 0xFFL) << (8 * i); + value |= (bytes[bytes.length - i - 1] & 0xFFL) << (8 * i); } return value; diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/RichColumnDescriptor.java b/presto-parquet/src/main/java/io/prestosql/parquet/RichColumnDescriptor.java index 004f3aca8a33..1f87974c9027 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/RichColumnDescriptor.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/RichColumnDescriptor.java @@ -22,23 +22,16 @@ public class RichColumnDescriptor extends ColumnDescriptor { - private final PrimitiveType primitiveType; private final boolean required; public RichColumnDescriptor( ColumnDescriptor descriptor, PrimitiveType primitiveType) { - super(descriptor.getPath(), primitiveType.getPrimitiveTypeName(), primitiveType.getTypeLength(), descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel()); - this.primitiveType = primitiveType; + super(descriptor.getPath(), primitiveType, descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel()); this.required = primitiveType.getRepetition() != OPTIONAL; } - public PrimitiveType getPrimitiveType() - { - return primitiveType; - } - public boolean isRequired() { return required; diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/BinaryDictionary.java b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/BinaryDictionary.java index db2fd8549f4b..2430486a3eab 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/BinaryDictionary.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/BinaryDictionary.java @@ -44,14 +44,14 @@ public BinaryDictionary(DictionaryPage dictionaryPage, Integer length) for (int i = 0; i < content.length; i++) { int len = readIntLittleEndian(dictionaryBytes, offset); offset += 4; - content[i] = Binary.fromByteArray(dictionaryBytes, offset, len); + content[i] = Binary.fromReusedByteArray(dictionaryBytes, offset, len); offset += len; } } else { checkArgument(length > 0, "Invalid byte array length: %s", length); for (int i = 0; i < content.length; i++) { - content[i] = Binary.fromByteArray(dictionaryBytes, offset, length); + content[i] = Binary.fromReusedByteArray(dictionaryBytes, offset, length); offset += length; } } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/Dictionary.java b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/Dictionary.java index 65ba8fb29bae..c7044f93aa6e 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/Dictionary.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/Dictionary.java @@ -20,19 +20,12 @@ public abstract class Dictionary { - private final ParquetEncoding encoding; - public Dictionary(ParquetEncoding encoding) { checkArgument( encoding == ParquetEncoding.PLAIN_DICTIONARY || encoding == ParquetEncoding.PLAIN, - " dictionary does not support encoding: %s", encoding); - this.encoding = encoding; - } - - public ParquetEncoding getEncoding() - { - return encoding; + "Dictionary does not support encoding: %s", + encoding); } public Binary decodeToBinary(int id) diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/predicate/Predicate.java b/presto-parquet/src/main/java/io/prestosql/parquet/predicate/Predicate.java index 682e54bc5490..ecf1ed254d60 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/predicate/Predicate.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/predicate/Predicate.java @@ -22,21 +22,6 @@ public interface Predicate { - Predicate TRUE = new Predicate() - { - @Override - public boolean matches(long numberOfRows, Map> statistics, ParquetDataSourceId id, boolean failOnCorruptedParquetStatistics) - { - return true; - } - - @Override - public boolean matches(Map dictionaries) - { - return true; - } - }; - /** * Should the Parquet Reader process a file section with the specified statistics. * diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/predicate/TupleDomainParquetPredicate.java b/presto-parquet/src/main/java/io/prestosql/parquet/predicate/TupleDomainParquetPredicate.java index 0537cf64fd5e..76585fbb513b 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/predicate/TupleDomainParquetPredicate.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/predicate/TupleDomainParquetPredicate.java @@ -92,12 +92,12 @@ public boolean matches(long numberOfRows, Map> s Statistics columnStatistics = statistics.get(column); if (columnStatistics == null || columnStatistics.isEmpty()) { // no stats for column + continue; } - else { - Domain domain = getDomain(effectivePredicateDomain.getType(), numberOfRows, columnStatistics, id, column.toString(), failOnCorruptedParquetStatistics); - if (effectivePredicateDomain.intersect(domain).isNone()) { - return false; - } + + Domain domain = getDomain(effectivePredicateDomain.getType(), numberOfRows, columnStatistics, id, column.toString(), failOnCorruptedParquetStatistics); + if (effectivePredicateDomain.intersect(domain).isNone()) { + return false; } } return true; @@ -212,8 +212,8 @@ public static Domain getDomain(Type type, long rowCount, Statistics statistic if (isVarcharType(type) && statistics instanceof BinaryStatistics) { BinaryStatistics binaryStatistics = (BinaryStatistics) statistics; - Slice minSlice = Slices.wrappedBuffer(binaryStatistics.getMin().getBytes()); - Slice maxSlice = Slices.wrappedBuffer(binaryStatistics.getMax().getBytes()); + Slice minSlice = Slices.wrappedBuffer(binaryStatistics.genericGetMin().getBytes()); + Slice maxSlice = Slices.wrappedBuffer(binaryStatistics.genericGetMax().getBytes()); if (minSlice.compareTo(maxSlice) > 0) { failWithCorruptionException(failOnCorruptedParquetStatistics, column, id, binaryStatistics); return Domain.create(ValueSet.all(type), hasNullValue); diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/reader/ColumnChunkDescriptor.java b/presto-parquet/src/main/java/io/prestosql/parquet/reader/ColumnChunkDescriptor.java index deefda69efec..8cae486af842 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/reader/ColumnChunkDescriptor.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/reader/ColumnChunkDescriptor.java @@ -20,21 +20,11 @@ public class ColumnChunkDescriptor { private final ColumnDescriptor columnDescriptor; private final ColumnChunkMetaData columnChunkMetaData; - private final int size; - public ColumnChunkDescriptor( - ColumnDescriptor columnDescriptor, - ColumnChunkMetaData columnChunkMetaData, - int size) + public ColumnChunkDescriptor(ColumnDescriptor columnDescriptor, ColumnChunkMetaData columnChunkMetaData) { this.columnDescriptor = columnDescriptor; this.columnChunkMetaData = columnChunkMetaData; - this.size = size; - } - - public int getSize() - { - return size; } public ColumnDescriptor getColumnDescriptor() diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/reader/ParquetColumnChunk.java b/presto-parquet/src/main/java/io/prestosql/parquet/reader/ParquetColumnChunk.java index b288e60a4b7f..22a5488f4e6b 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/reader/ParquetColumnChunk.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/reader/ParquetColumnChunk.java @@ -49,11 +49,6 @@ public ParquetColumnChunk( this.pos = offset; } - public ColumnChunkDescriptor getDescriptor() - { - return descriptor; - } - protected PageHeader readPageHeader() throws IOException { @@ -91,11 +86,6 @@ public PageReader readAllPages() return new PageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage); } - public int getPosition() - { - return pos; - } - private Slice getSlice(int size) { Slice slice = wrappedBuffer(buf, pos, size); diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/reader/ParquetReader.java b/presto-parquet/src/main/java/io/prestosql/parquet/reader/ParquetReader.java index b07b1ec0d8a3..8ccf2f62dee9 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/reader/ParquetReader.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/reader/ParquetReader.java @@ -70,13 +70,12 @@ public class ParquetReader private int currentBlock; private BlockMetaData currentBlockMetadata; - private long currentPosition; private long currentGroupRowCount; private long nextRowInGroup; private int batchSize; private int nextBatchSize = INITIAL_BATCH_SIZE; private final PrimitiveColumnReader[] columnReaders; - private long[] maxBytesPerCell; + private final long[] maxBytesPerCell; private long maxCombinedBytesPerRow; private final long maxReadBlockBytes; private int maxBatchSize = MAX_VECTOR_LENGTH; @@ -107,11 +106,6 @@ public void close() dataSource.close(); } - public long getPosition() - { - return currentPosition; - } - public int nextBatch() { if (nextRowInGroup >= currentGroupRowCount && !advanceToNextRowGroup()) { @@ -123,7 +117,6 @@ public int nextBatch() batchSize = toIntExact(min(batchSize, currentGroupRowCount - nextRowInGroup)); nextRowInGroup += batchSize; - currentPosition += batchSize; Arrays.stream(columnReaders) .forEach(reader -> reader.prepareNextRead(batchSize)); return batchSize; @@ -216,7 +209,8 @@ private ColumnChunk readPrimitive(PrimitiveField field) int totalSize = toIntExact(metadata.getTotalSize()); byte[] buffer = allocateBlock(totalSize); dataSource.readFully(startingPosition, buffer); - ColumnChunkDescriptor descriptor = new ColumnChunkDescriptor(columnDescriptor, metadata, totalSize); + ColumnChunkDescriptor descriptor = new ColumnChunkDescriptor(columnDescriptor, metadata); + @SuppressWarnings("resource") ParquetColumnChunk columnChunk = new ParquetColumnChunk(descriptor, buffer, 0); columnReader.setPageReader(columnChunk.readAllPages()); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/reader/PrimitiveColumnReader.java b/presto-parquet/src/main/java/io/prestosql/parquet/reader/PrimitiveColumnReader.java index 95d9c80f1bfc..36621d89b3af 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/reader/PrimitiveColumnReader.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/reader/PrimitiveColumnReader.java @@ -31,7 +31,6 @@ import it.unimi.dsi.fastutil.ints.IntList; import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.bytes.BytesUtils; -import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder; import org.apache.parquet.io.ParquetDecodingException; @@ -156,11 +155,6 @@ public void prepareNextRead(int batchSize) nextBatchSize = batchSize; } - public ColumnDescriptor getDescriptor() - { - return columnDescriptor; - } - public ColumnChunk readPrimitive(Field field) { IntList definitionLevels = new IntArrayList();