From b1475a706bcb3188d52409fdd18af0eac1fc7b69 Mon Sep 17 00:00:00 2001 From: Gabor Szadovszky Date: Mon, 30 Sep 2024 08:19:14 +0200 Subject: [PATCH] GH-3013: Fix potential ClassCastException at reading DELTA_BYTE_ARRAY encoding (#3019) --- .../parquet/column/impl/ColumnReaderBase.java | 1 - .../deltastrings/DeltaByteArrayReader.java | 2 +- .../impl/TestCorruptDeltaByteArrays.java | 44 ++++++++++++++++++- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java index 98bb56eb38..87418409f4 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java @@ -728,7 +728,6 @@ private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int } if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) - && previousReader != null && dataColumn instanceof RequiresPreviousReader) { // previous reader can only be set if reading sequentially ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader); diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayReader.java index 2ef156683c..4af28313d6 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayReader.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayReader.java @@ -88,7 +88,7 @@ public Binary readBytes() { */ @Override public void setPreviousReader(ValuesReader reader) { - if (reader != null) { + if (reader instanceof DeltaByteArrayReader) { this.previous = ((DeltaByteArrayReader) reader).previous; } } diff --git a/parquet-column/src/test/java/org/apache/parquet/column/impl/TestCorruptDeltaByteArrays.java b/parquet-column/src/test/java/org/apache/parquet/column/impl/TestCorruptDeltaByteArrays.java index 4bfce8eea7..105376bfea 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/impl/TestCorruptDeltaByteArrays.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/impl/TestCorruptDeltaByteArrays.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -42,15 +43,17 @@ import org.apache.parquet.column.values.ValuesWriter; import org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader; import org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter; +import org.apache.parquet.column.values.dictionary.DictionaryValuesReader; import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.PrimitiveConverter; import org.apache.parquet.schema.PrimitiveType; import org.junit.Assert; import org.junit.Test; +import org.mockito.Mockito; public class TestCorruptDeltaByteArrays { @Test - public void testCorruptDeltaByteArrayVerisons() { + public void testCorruptDeltaByteArrayVersions() { assertTrue(CorruptDeltaByteArrays.requiresSequentialReads( "parquet-mr version 1.6.0 (build abcd)", Encoding.DELTA_BYTE_ARRAY)); assertTrue(CorruptDeltaByteArrays.requiresSequentialReads((String) null, Encoding.DELTA_BYTE_ARRAY)); @@ -74,7 +77,7 @@ public void testCorruptDeltaByteArrayVerisons() { } @Test - public void testEncodingRequiresSequentailRead() { + public void testEncodingRequiresSequentialRead() { ParsedVersion impala = new ParsedVersion("impala", "1.2.0", "abcd"); assertFalse(CorruptDeltaByteArrays.requiresSequentialReads(impala, Encoding.DELTA_BYTE_ARRAY)); ParsedVersion broken = new ParsedVersion("parquet-mr", "1.8.0-SNAPSHOT", "abcd"); @@ -262,6 +265,43 @@ public void addBinary(Binary value) { Assert.assertEquals(values, actualValues); } + @Test + public void testPreviousReaderSetting() { + Binary previous = Binary.fromString("<<>>"); + DeltaByteArrayReader previousReader = new DeltaByteArrayReader(); + setPrevious(previousReader, previous); + + DeltaByteArrayReader reader = new DeltaByteArrayReader(); + reader.setPreviousReader(previousReader); + assertSame(previous, getPrevious(reader)); + + reader.setPreviousReader(null); + assertSame("The previous field should have not changed", previous, getPrevious(reader)); + + reader.setPreviousReader(Mockito.mock(DictionaryValuesReader.class)); + assertSame("The previous field should have not changed", previous, getPrevious(reader)); + } + + private Binary getPrevious(DeltaByteArrayReader reader) { + try { + Field previousField = reader.getClass().getDeclaredField("previous"); + previousField.setAccessible(true); + return (Binary) previousField.get(reader); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new AssertionError("Unable to get the private field \"previous\" of the reader" + reader, e); + } + } + + private void setPrevious(DeltaByteArrayReader reader, Binary previous) { + try { + Field previousField = reader.getClass().getDeclaredField("previous"); + previousField.setAccessible(true); + previousField.set(reader, previous); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new AssertionError("Unable to set the private field \"previous\" of the reader" + reader, e); + } + } + public void corruptWriter(DeltaByteArrayWriter writer, String data) throws Exception { Field previous = writer.getClass().getDeclaredField("previous"); previous.setAccessible(true);