diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java index 7ce908743b3e..955734917ff8 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java @@ -61,6 +61,7 @@ public final class HiveSessionProperties private static final String RESPECT_TABLE_FORMAT = "respect_table_format"; private static final String PARQUET_PREDICATE_PUSHDOWN_ENABLED = "parquet_predicate_pushdown_enabled"; private static final String PARQUET_OPTIMIZED_READER_ENABLED = "parquet_optimized_reader_enabled"; + private static final String PARQUET_USE_COLUMN_NAME = "parquet_use_column_names"; private static final String MAX_SPLIT_SIZE = "max_split_size"; private static final String MAX_INITIAL_SPLIT_SIZE = "max_initial_split_size"; public static final String RCFILE_OPTIMIZED_WRITER_ENABLED = "rcfile_optimized_writer_enabled"; @@ -212,6 +213,11 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon "Experimental: Parquet: Enable predicate pushdown for Parquet", hiveClientConfig.isParquetPredicatePushdownEnabled(), false), + booleanSessionProperty( + PARQUET_USE_COLUMN_NAME, + "Experimental: Parquet: Access Parquet columns using names from the file", + hiveClientConfig.isUseParquetColumnNames(), + false), dataSizeSessionProperty( MAX_SPLIT_SIZE, "Max split size", @@ -379,6 +385,11 @@ public static boolean isParquetPredicatePushdownEnabled(ConnectorSession session return session.getProperty(PARQUET_PREDICATE_PUSHDOWN_ENABLED, Boolean.class); } + public static boolean isUseParquetColumnNames(ConnectorSession session) + { + return session.getProperty(PARQUET_USE_COLUMN_NAME, Boolean.class); + } + public static DataSize getMaxSplitSize(ConnectorSession session) { return session.getProperty(MAX_SPLIT_SIZE, DataSize.class); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java index ddac655c168e..5dd5a33f081b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java @@ -15,7 +15,6 @@ import com.facebook.presto.hive.FileFormatDataSourceStats; import com.facebook.presto.hive.HdfsEnvironment; -import com.facebook.presto.hive.HiveClientConfig; import com.facebook.presto.hive.HiveColumnHandle; import com.facebook.presto.hive.HivePageSourceFactory; import com.facebook.presto.hive.parquet.predicate.ParquetPredicate; @@ -57,6 +56,7 @@ import static com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA; import static com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedReaderEnabled; import static com.facebook.presto.hive.HiveSessionProperties.isParquetPredicatePushdownEnabled; +import static com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames; import static com.facebook.presto.hive.HiveUtil.getDeserializerClassName; import static com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource; import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getColumnIO; @@ -80,20 +80,13 @@ public class ParquetPageSourceFactory .build(); private final TypeManager typeManager; - private final boolean useParquetColumnNames; private final HdfsEnvironment hdfsEnvironment; private final FileFormatDataSourceStats stats; @Inject - public ParquetPageSourceFactory(TypeManager typeManager, HiveClientConfig config, HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) - { - this(typeManager, requireNonNull(config, "hiveClientConfig is null").isUseParquetColumnNames(), hdfsEnvironment, stats); - } - - public ParquetPageSourceFactory(TypeManager typeManager, boolean useParquetColumnNames, HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) + public ParquetPageSourceFactory(TypeManager typeManager, HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); - this.useParquetColumnNames = useParquetColumnNames; this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.stats = requireNonNull(stats, "stats is null"); } @@ -129,7 +122,7 @@ public Optional createPageSource( fileSize, schema, columns, - useParquetColumnNames, + isUseParquetColumnNames(session), typeManager, isParquetPredicatePushdownEnabled(session), effectivePredicate, diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetRecordCursorProvider.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetRecordCursorProvider.java index 7b65cef10e53..d39f7f16779e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetRecordCursorProvider.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetRecordCursorProvider.java @@ -15,7 +15,6 @@ import com.facebook.presto.hive.FileFormatDataSourceStats; import com.facebook.presto.hive.HdfsEnvironment; -import com.facebook.presto.hive.HiveClientConfig; import com.facebook.presto.hive.HiveColumnHandle; import com.facebook.presto.hive.HiveRecordCursorProvider; import com.facebook.presto.spi.ConnectorSession; @@ -35,6 +34,7 @@ import java.util.Set; import static com.facebook.presto.hive.HiveSessionProperties.isParquetPredicatePushdownEnabled; +import static com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames; import static com.facebook.presto.hive.HiveUtil.getDeserializerClassName; import static java.util.Objects.requireNonNull; @@ -46,19 +46,12 @@ public class ParquetRecordCursorProvider .add("parquet.hive.serde.ParquetHiveSerDe") .build(); - private final boolean useParquetColumnNames; private final HdfsEnvironment hdfsEnvironment; private final FileFormatDataSourceStats stats; @Inject - public ParquetRecordCursorProvider(HiveClientConfig hiveClientConfig, HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) + public ParquetRecordCursorProvider(HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) { - this(requireNonNull(hiveClientConfig, "hiveClientConfig is null").isUseParquetColumnNames(), hdfsEnvironment, stats); - } - - public ParquetRecordCursorProvider(boolean useParquetColumnNames, HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) - { - this.useParquetColumnNames = useParquetColumnNames; this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.stats = requireNonNull(stats, "stats is null"); } @@ -91,7 +84,7 @@ public Optional createRecordCursor( fileSize, schema, columns, - useParquetColumnNames, + isUseParquetColumnNames(session), typeManager, isParquetPredicatePushdownEnabled(session), effectivePredicate, diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java b/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java index cac1b76414ac..481f42af89ae 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java @@ -74,7 +74,7 @@ public static Set getDefaultHiveDataStreamFactories(HiveC .add(new RcFilePageSourceFactory(TYPE_MANAGER, testHdfsEnvironment, stats)) .add(new OrcPageSourceFactory(TYPE_MANAGER, hiveClientConfig, testHdfsEnvironment, stats)) .add(new DwrfPageSourceFactory(TYPE_MANAGER, testHdfsEnvironment, stats)) - .add(new ParquetPageSourceFactory(TYPE_MANAGER, hiveClientConfig, testHdfsEnvironment, stats)) + .add(new ParquetPageSourceFactory(TYPE_MANAGER, testHdfsEnvironment, stats)) .build(); } @@ -82,7 +82,7 @@ public static Set getDefaultHiveRecordCursorProvider(H { HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig); return ImmutableSet.builder() - .add(new ParquetRecordCursorProvider(hiveClientConfig, testHdfsEnvironment, new FileFormatDataSourceStats())) + .add(new ParquetRecordCursorProvider(testHdfsEnvironment, new FileFormatDataSourceStats())) .add(new GenericHiveRecordCursorProvider(testHdfsEnvironment)) .build(); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveFileFormats.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveFileFormats.java index 93f82def99ae..14bb386c75c8 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveFileFormats.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveFileFormats.java @@ -101,10 +101,13 @@ public class TestHiveFileFormats extends AbstractTestHiveFileFormats { private static final FileFormatDataSourceStats STATS = new FileFormatDataSourceStats(); - private static TestingConnectorSession parquetCursorSession = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setParquetOptimizedReaderEnabled(false).setParquetPredicatePushdownEnabled(false), new OrcFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetCursorPushdownSession = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setParquetOptimizedReaderEnabled(false).setParquetPredicatePushdownEnabled(true), new OrcFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetPageSourceSession = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setParquetOptimizedReaderEnabled(true).setParquetPredicatePushdownEnabled(false), new OrcFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetPageSourcePushdown = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setParquetOptimizedReaderEnabled(true).setParquetPredicatePushdownEnabled(true), new OrcFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetCursorSession = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false, false, false), new OrcFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetCursorSessionUseName = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false, false, true), new OrcFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetCursorPushdownSession = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false, true, false), new OrcFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetCursorPushdownSessionUseName = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false, true, true), new OrcFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetPageSourceSession = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(true, false, false), new OrcFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetPageSourceSessionUseName = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(true, false, true), new OrcFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetPageSourcePushdown = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(true, true, false), new OrcFileWriterConfig()).getSessionProperties()); private static final DateTimeZone HIVE_STORAGE_TIME_ZONE = DateTimeZone.forID("Asia/Katmandu"); @@ -346,12 +349,12 @@ public void testParquet(int rowCount) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(parquetCursorSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT, STATS)); + .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(PARQUET) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(parquetCursorPushdownSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT, STATS)); + .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); } @Test(dataProvider = "rowCount") @@ -364,14 +367,14 @@ public void testParquetCaseInsensitiveColumnLookup(int rowCount) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withRowsCount(rowCount) - .withSession(parquetCursorSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(true, HDFS_ENVIRONMENT, STATS)); + .withSession(parquetCursorSessionUseName) + .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withRowsCount(rowCount) - .withSession(parquetCursorPushdownSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(true, HDFS_ENVIRONMENT, STATS)); + .withSession(parquetCursorPushdownSessionUseName) + .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); } @Test(dataProvider = "rowCount") @@ -383,12 +386,12 @@ public void testParquetPageSource(int rowCount) .withColumns(testColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) - .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); + .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(PARQUET) .withColumns(testColumns) .withSession(parquetPageSourcePushdown) .withRowsCount(rowCount) - .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); + .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); } @Test(dataProvider = "rowCount") @@ -398,7 +401,6 @@ public void testParquetPageSourceSchemaEvolution(int rowCount) List writeColumns = getTestColumnsSupportedByParquet(); // test index-based access - boolean useParquetColumnNames = false; List readColumns = writeColumns.stream() .map(column -> new TestColumn( column.getName() + "_new", @@ -412,16 +414,15 @@ public void testParquetPageSourceSchemaEvolution(int rowCount) .withReadColumns(readColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) - .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, useParquetColumnNames, HDFS_ENVIRONMENT, STATS)); + .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); // test name-based access - useParquetColumnNames = true; readColumns = Lists.reverse(writeColumns); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) - .withSession(parquetPageSourceSession) - .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, useParquetColumnNames, HDFS_ENVIRONMENT, STATS)); + .withSession(parquetPageSourceSessionUseName) + .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); } @Test(dataProvider = "rowCount") @@ -434,14 +435,14 @@ public void testParquetUseColumnNames(int rowCount) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withRowsCount(rowCount) - .withSession(parquetCursorSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(true, HDFS_ENVIRONMENT, STATS)); + .withSession(parquetCursorSessionUseName) + .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withRowsCount(rowCount) - .withSession(parquetCursorPushdownSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(true, HDFS_ENVIRONMENT, STATS)); + .withSession(parquetCursorPushdownSessionUseName) + .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); } private static List getTestColumnsSupportedByParquet() @@ -492,8 +493,8 @@ public void testParquetThrift(int rowCount) File file = new File(this.getClass().getClassLoader().getResource("addressbook.parquet").getPath()); FileSplit split = new FileSplit(new Path(file.getAbsolutePath()), 0, file.length(), new String[0]); - HiveRecordCursorProvider cursorProvider = new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT, STATS); - testCursorProvider(cursorProvider, split, PARQUET, testColumns, 1); + HiveRecordCursorProvider cursorProvider = new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS); + testCursorProvider(cursorProvider, split, PARQUET, testColumns, SESSION, 1); } @Test(dataProvider = "rowCount") @@ -564,23 +565,23 @@ public void testTruncateVarcharColumn() .withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetCursorSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT, STATS)); + .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(PARQUET) .withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetCursorPushdownSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT, STATS)); + .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(PARQUET) .withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetPageSourceSession) - .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); + .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(PARQUET) .withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetPageSourcePushdown) - .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); + .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(AVRO) .withWriteColumns(ImmutableList.of(writeColumn)) @@ -627,20 +628,20 @@ public void testFailForLongVarcharPartitionColumn() assertThatFileFormat(PARQUET) .withColumns(columns) .withSession(parquetCursorSession) - .isFailingForRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); + .isFailingForRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); assertThatFileFormat(PARQUET) .withColumns(columns) .withSession(parquetCursorPushdownSession) - .isFailingForRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); + .isFailingForRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); assertThatFileFormat(PARQUET) .withColumns(columns) .withSession(parquetPageSourceSession) - .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); + .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); assertThatFileFormat(PARQUET) .withColumns(columns) .withSession(parquetPageSourcePushdown) - .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); + .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); assertThatFileFormat(SEQUENCEFILE) .withColumns(columns) @@ -655,6 +656,7 @@ private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List testColumns, + ConnectorSession session, int rowCount) { Properties splitProperties = new Properties(); @@ -674,7 +676,7 @@ private void testCursorProvider(HiveRecordCursorProvider cursorProvider, ImmutableSet.of(cursorProvider), ImmutableSet.of(), configuration, - SESSION, + session, split.getPath(), OptionalInt.empty(), split.getStart(), @@ -785,6 +787,15 @@ private FileFormatAssertion assertThatFileFormat(HiveStorageFormat hiveStorageFo .withStorageFormat(hiveStorageFormat); } + private static HiveClientConfig createParquetHiveClientConfig(boolean enableOptimizedReader, boolean enablePredicatePushDown, boolean useParquetColumnNames) + { + HiveClientConfig config = new HiveClientConfig(); + config.setParquetOptimizedReaderEnabled(enableOptimizedReader) + .setParquetPredicatePushdownEnabled(enablePredicatePushDown) + .setUseParquetColumnNames(useParquetColumnNames); + return config; + } + private class FileFormatAssertion { private final String formatName; @@ -912,7 +923,7 @@ private void assertRead(Optional pageSourceFactory, Optio testPageSourceFactory(pageSourceFactory.get(), split, storageFormat, readColumns, session, rowsCount); } if (cursorProvider.isPresent()) { - testCursorProvider(cursorProvider.get(), split, storageFormat, readColumns, rowsCount); + testCursorProvider(cursorProvider.get(), split, storageFormat, readColumns, session, rowsCount); } } finally { diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/FileFormat.java b/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/FileFormat.java index 2db85a55f0d2..88cf0e591d78 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/FileFormat.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/FileFormat.java @@ -192,7 +192,7 @@ public boolean supportsDate() @Override public ConnectorPageSource createFileFormatReader(ConnectorSession session, HdfsEnvironment hdfsEnvironment, File targetFile, List columnNames, List columnTypes) { - HivePageSourceFactory pageSourceFactory = new ParquetPageSourceFactory(TYPE_MANAGER, false, hdfsEnvironment, new FileFormatDataSourceStats()); + HivePageSourceFactory pageSourceFactory = new ParquetPageSourceFactory(TYPE_MANAGER, hdfsEnvironment, new FileFormatDataSourceStats()); return createPageSource(pageSourceFactory, session, targetFile, columnNames, columnTypes, HiveStorageFormat.PARQUET); } @@ -298,7 +298,7 @@ public boolean supportsDate() @Override public ConnectorPageSource createFileFormatReader(ConnectorSession session, HdfsEnvironment hdfsEnvironment, File targetFile, List columnNames, List columnTypes) { - HiveRecordCursorProvider cursorProvider = new ParquetRecordCursorProvider(false, hdfsEnvironment, new FileFormatDataSourceStats()); + HiveRecordCursorProvider cursorProvider = new ParquetRecordCursorProvider(hdfsEnvironment, new FileFormatDataSourceStats()); return createPageSource(cursorProvider, session, targetFile, columnNames, columnTypes, HiveStorageFormat.PARQUET); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/AbstractTestParquetReader.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/AbstractTestParquetReader.java index 8afff85fd1f8..0a24e8191855 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/AbstractTestParquetReader.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/AbstractTestParquetReader.java @@ -209,7 +209,7 @@ public void testCustomSchemaArrayOfStucts() Type structType = RowType.from(asList(field("a", BIGINT), field("b", BOOLEAN), field("c", VARCHAR))); tester.testSingleLevelArrayRoundTrip( getStandardListObjectInspector(getStandardStructObjectInspector(structFieldNames, asList(javaLongObjectInspector, javaBooleanObjectInspector, javaStringObjectInspector))), - values, values, new ArrayType(structType), Optional.of(customSchemaArrayOfStucts)); + values, values, "self", new ArrayType(structType), Optional.of(customSchemaArrayOfStucts)); } @Test @@ -545,7 +545,7 @@ public void testComplexNestedStructs() Iterable> mapsStringString = createNullableTestMaps(mapStringKeys, stringPrimitives); List struct1FieldNames = asList("mapIntStringField", "stringArrayField", "intField"); - Iterable stucts1 = createNullableTestStructs(mapsIntString, arraysString, intPrimitives); + Iterable structs1 = createNullableTestStructs(mapsIntString, arraysString, intPrimitives); ObjectInspector struct1ObjectInspector = getStandardStructObjectInspector(struct1FieldNames, asList( getStandardMapObjectInspector(javaIntObjectInspector, javaStringObjectInspector), @@ -557,7 +557,7 @@ public void testComplexNestedStructs() field("intField", INTEGER))); List struct2FieldNames = asList("mapIntStringField", "stringArrayField", "structField"); - Iterable structs2 = createNullableTestStructs(mapsIntString, arraysString, stucts1); + Iterable structs2 = createNullableTestStructs(mapsIntString, arraysString, structs1); ObjectInspector struct2ObjectInspector = getStandardStructObjectInspector(struct2FieldNames, asList( getStandardMapObjectInspector(javaIntObjectInspector, javaStringObjectInspector), @@ -581,7 +581,7 @@ public void testComplexNestedStructs() field("booleanField", BOOLEAN))); List struct4FieldNames = asList("mapIntDoubleField", "booleanArrayField", "structField"); - Iterable stucts4 = createNullableTestStructs(mapsIntDouble, arraysBoolean, structs3); + Iterable structs4 = createNullableTestStructs(mapsIntDouble, arraysBoolean, structs3); ObjectInspector struct4ObjectInspector = getStandardStructObjectInspector(struct4FieldNames, asList( getStandardMapObjectInspector(javaIntObjectInspector, javaDoubleObjectInspector), @@ -604,7 +604,7 @@ public void testComplexNestedStructs() getStandardMapObjectInspector(javaStringObjectInspector, javaStringObjectInspector)); List types = ImmutableList.of(struct1Type, struct2Type, struct3Type, struct4Type, mapType(INTEGER, DOUBLE), new ArrayType(BOOLEAN), mapType(VARCHAR, VARCHAR)); - Iterable[] values = new Iterable[] {stucts1, structs2, structs3, stucts4, mapsIntDouble, arraysBoolean, mapsStringString}; + Iterable[] values = new Iterable[] {structs1, structs2, structs3, structs4, mapsIntDouble, arraysBoolean, mapsStringString}; tester.assertRoundTrip(objectInspectors, values, values, structFieldNames, types, Optional.empty()); } @@ -920,7 +920,7 @@ public void testSchemaWithRepeatedOptionalRequiredFields() getStandardListObjectInspector(javaStringObjectInspector), getStandardListObjectInspector( getStandardStructObjectInspector(contactsFieldNames, asList(javaStringObjectInspector, javaStringObjectInspector))))), - values, values, addressBookType, Optional.of(parquetSchema)); + values, values, "address_book", addressBookType, Optional.of(parquetSchema)); } @Test @@ -946,7 +946,7 @@ public void testSchemaWithOptionalOptionalRequiredFields() ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaStringObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); - tester.testRoundTrip(aInspector, aValues, aValues, aType, Optional.of(parquetSchema)); + tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); } @Test @@ -972,7 +972,7 @@ public void testSchemaWithOptionalRequiredOptionalFields() ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); - tester.testRoundTrip(aInspector, aValues, aValues, aType, Optional.of(parquetSchema)); + tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); } @Test @@ -998,7 +998,7 @@ public void testSchemaWithRequiredRequiredOptionalFields() ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); - tester.testRoundTrip(aInspector, aValues, aValues, aType, Optional.of(parquetSchema)); + tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); } @Test @@ -1024,7 +1024,7 @@ public void testSchemaWithRequiredOptionalOptionalFields() ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); - tester.testRoundTrip(aInspector, aValues, aValues, aType, Optional.of(parquetSchema)); + tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); } @Test @@ -1050,7 +1050,7 @@ public void testSchemaWithRequiredOptionalRequiredFields() ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaStringObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); - tester.testRoundTrip(aInspector, aValues, aValues, aType, Optional.of(parquetSchema)); + tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); } @Test @@ -1137,7 +1137,7 @@ public void testOldAvroArray() " }" + "} "); Iterable> nonNullArrayElements = createTestArrays(intsBetween(0, 31_234)); - tester.testSingleLevelArrayRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), nonNullArrayElements, nonNullArrayElements, new ArrayType(INTEGER), Optional.of(parquetMrAvroSchema)); + tester.testSingleLevelArrayRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), nonNullArrayElements, nonNullArrayElements, "my_list", new ArrayType(INTEGER), Optional.of(parquetMrAvroSchema)); } @Test @@ -1152,7 +1152,7 @@ public void testNewAvroArray() " } " + "}"); Iterable> values = createTestArrays(limit(cycle(asList(1, null, 3, 5, null, null, null, 7, 11, null, 13, 17)), 30_000)); - tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, new ArrayType(INTEGER), Optional.of(parquetMrAvroSchema)); + tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, "my_list", new ArrayType(INTEGER), Optional.of(parquetMrAvroSchema)); } /** @@ -1171,7 +1171,7 @@ public void testArraySchemas() " }" + "} "); Iterable> nonNullArrayElements = createTestArrays(intsBetween(0, 31_234)); - tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), nonNullArrayElements, nonNullArrayElements, new ArrayType(INTEGER), Optional.of(parquetMrNullableSpecSchema)); + tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), nonNullArrayElements, nonNullArrayElements, "my_list", new ArrayType(INTEGER), Optional.of(parquetMrNullableSpecSchema)); MessageType parquetMrNonNullSpecSchema = parseMessageType("message hive_schema {" + " required group my_list (LIST){" + @@ -1191,7 +1191,7 @@ public void testArraySchemas() " }" + " }" + "} "); - tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, new ArrayType(INTEGER), Optional.of(sparkSchema)); + tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, "my_list", new ArrayType(INTEGER), Optional.of(sparkSchema)); MessageType hiveSchema = parseMessageType("message hive_schema {" + " optional group my_list (LIST){" + @@ -1200,7 +1200,7 @@ public void testArraySchemas() " }" + " }" + "} "); - tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, new ArrayType(INTEGER), Optional.of(hiveSchema)); + tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, "my_list", new ArrayType(INTEGER), Optional.of(hiveSchema)); MessageType customNamingSchema = parseMessageType("message hive_schema {" + " optional group my_list (LIST){" + @@ -1209,7 +1209,7 @@ public void testArraySchemas() " }" + " }" + "} "); - tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, new ArrayType(INTEGER), Optional.of(customNamingSchema)); + tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, "my_list", new ArrayType(INTEGER), Optional.of(customNamingSchema)); } /** @@ -1233,7 +1233,7 @@ public void testMapSchemas() " } " + " }" + "} "); - tester.testRoundTrip(getStandardMapObjectInspector(javaStringObjectInspector, javaIntObjectInspector), values, values, mapType(VARCHAR, INTEGER), Optional.of(map)); + tester.testRoundTrip(getStandardMapObjectInspector(javaStringObjectInspector, javaIntObjectInspector), values, values, "my_map", mapType(VARCHAR, INTEGER), Optional.of(map)); // Map (nullable map, non-null values) map = parseMessageType("message hive_schema {" + @@ -1244,7 +1244,7 @@ public void testMapSchemas() " } " + " }" + "} "); - tester.testRoundTrip(getStandardMapObjectInspector(javaStringObjectInspector, javaIntObjectInspector), values, values, mapType(VARCHAR, INTEGER), Optional.of(map)); + tester.testRoundTrip(getStandardMapObjectInspector(javaStringObjectInspector, javaIntObjectInspector), values, values, "my_map", mapType(VARCHAR, INTEGER), Optional.of(map)); // Map (non-null map, nullable values) map = parseMessageType("message hive_schema {" + @@ -1303,7 +1303,7 @@ public void testMapSchemas() " } " + " }" + " } "); - tester.testRoundTrip(getStandardMapObjectInspector(javaStringObjectInspector, javaIntObjectInspector), nullableValues, nullableValues, mapType(VARCHAR, INTEGER), Optional.of(map)); + tester.testRoundTrip(getStandardMapObjectInspector(javaStringObjectInspector, javaIntObjectInspector), nullableValues, nullableValues, "my_map", mapType(VARCHAR, INTEGER), Optional.of(map)); // Map (nullable map, nullable values) map = parseMessageType("message hive_schema {" + @@ -1314,7 +1314,7 @@ public void testMapSchemas() " } " + " }" + " } "); - tester.testRoundTrip(getStandardMapObjectInspector(javaStringObjectInspector, javaIntObjectInspector), nullableValues, nullableValues, mapType(VARCHAR, INTEGER), Optional.of(map)); + tester.testRoundTrip(getStandardMapObjectInspector(javaStringObjectInspector, javaIntObjectInspector), nullableValues, nullableValues, "my_map", mapType(VARCHAR, INTEGER), Optional.of(map)); } @Test diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java index e48457fa59bb..d6a2df37e811 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java @@ -194,6 +194,18 @@ public void testSingleLevelArrayRoundTrip(ObjectInspector objectInspector, Itera testRoundTrip(singletonList(objectInspector), new Iterable[] {writeValues}, new Iterable[] {readValues}, TEST_COLUMN, singletonList(type), parquetSchema, true); } + public void testRoundTrip(ObjectInspector objectInspector, Iterable writeValues, Iterable readValues, String columnName, Type type, Optional parquetSchema) + throws Exception + { + testRoundTrip(singletonList(objectInspector), new Iterable[] {writeValues}, new Iterable[] {readValues}, singletonList(columnName), singletonList(type), parquetSchema, false); + } + + public void testSingleLevelArrayRoundTrip(ObjectInspector objectInspector, Iterable writeValues, Iterable readValues, String columnName, Type type, Optional parquetSchema) + throws Exception + { + testRoundTrip(singletonList(objectInspector), new Iterable[] {writeValues}, new Iterable[] {readValues}, singletonList(columnName), singletonList(type), parquetSchema, true); + } + public void testRoundTrip(List objectInspectors, Iterable[] writeValues, Iterable[] readValues, List columnNames, List columnTypes, Optional parquetSchema, boolean singleLevelArray) throws Exception {