diff --git a/docs/src/main/sphinx/connector/hive.rst b/docs/src/main/sphinx/connector/hive.rst index aafa5748497..3946d81310f 100644 --- a/docs/src/main/sphinx/connector/hive.rst +++ b/docs/src/main/sphinx/connector/hive.rst @@ -1637,10 +1637,10 @@ with Parquet files performed by the Hive connector. - ``true`` * - ``parquet.optimized-writer.enabled`` - Whether the optimized writer is used when writing Parquet files. - Set this property to ``true`` to use the optimized parquet writer by + Set this property to ``false`` to disable the optimized parquet writer by default. The equivalent catalog session property is ``parquet_optimized_writer_enabled``. - - ``false`` + - ``true`` * - ``parquet.optimized-writer.validation-percentage`` - Percentage of parquet files to validate after write by re-reading the whole file when ``parquet.optimized-writer.enabled`` is set to ``true``. diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriterOptions.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriterOptions.java index 8ac15de1e3d..7b7af7591ac 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriterOptions.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriterOptions.java @@ -13,11 +13,10 @@ */ package io.trino.parquet.writer; +import com.google.common.primitives.Ints; import io.airlift.units.DataSize; import org.apache.parquet.hadoop.ParquetWriter; -import static java.lang.Math.toIntExact; - public class ParquetWriterOptions { private static final DataSize DEFAULT_MAX_ROW_GROUP_SIZE = DataSize.ofBytes(ParquetWriter.DEFAULT_BLOCK_SIZE); @@ -35,8 +34,8 @@ public static ParquetWriterOptions.Builder builder() private ParquetWriterOptions(DataSize maxBlockSize, DataSize maxPageSize, int batchSize) { - this.maxRowGroupSize = toIntExact(maxBlockSize.toBytes()); - this.maxPageSize = toIntExact(maxPageSize.toBytes()); + this.maxRowGroupSize = Ints.saturatedCast(maxBlockSize.toBytes()); + this.maxPageSize = Ints.saturatedCast(maxPageSize.toBytes()); this.batchSize = batchSize; } diff --git a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/session/PropertyMetadataUtil.java b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/session/PropertyMetadataUtil.java index 2f139416421..412e3020b82 100644 --- a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/session/PropertyMetadataUtil.java +++ b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/session/PropertyMetadataUtil.java @@ -15,10 +15,12 @@ import io.airlift.units.DataSize; import io.airlift.units.Duration; +import io.trino.spi.TrinoException; import io.trino.spi.session.PropertyMetadata; import java.util.function.Consumer; +import static io.trino.spi.StandardErrorCode.INVALID_SESSION_PROPERTY; import static io.trino.spi.type.VarcharType.VARCHAR; public final class PropertyMetadataUtil @@ -47,6 +49,20 @@ public static PropertyMetadata dataSizeProperty(String name, String de DataSize::toString); } + public static void validateMinDataSize(String name, DataSize value, DataSize min) + { + if (value.compareTo(min) < 0) { + throw new TrinoException(INVALID_SESSION_PROPERTY, "%s must be at least %s: %s".formatted(name, min, value)); + } + } + + public static void validateMaxDataSize(String name, DataSize value, DataSize max) + { + if (value.compareTo(max) > 0) { + throw new TrinoException(INVALID_SESSION_PROPERTY, "%s must be at most %s: %s".formatted(name, max, value)); + } + } + public static PropertyMetadata durationProperty(String name, String description, Duration defaultValue, boolean hidden) { return durationProperty(name, description, defaultValue, value -> {}, hidden); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorSmokeTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorSmokeTest.java index de5f5a9b978..d4c1ba86759 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorSmokeTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorSmokeTest.java @@ -19,7 +19,6 @@ import io.airlift.json.ObjectMapperProvider; import io.airlift.units.Duration; import io.trino.plugin.deltalake.transactionlog.writer.S3NativeTransactionLogSynchronizer; -import io.trino.plugin.hive.parquet.ParquetWriterConfig; import io.trino.testing.QueryRunner; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -30,7 +29,6 @@ import java.util.Set; import java.util.concurrent.TimeUnit; -import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.testing.TestingNames.randomNameSuffix; @@ -49,7 +47,6 @@ public class TestDeltaLakeConnectorSmokeTest protected QueryRunner createDeltaLakeQueryRunner(Map connectorProperties) throws Exception { - verify(!new ParquetWriterConfig().isParquetOptimizedWriterEnabled(), "This test assumes the optimized Parquet writer is disabled by default"); return DeltaLakeQueryRunner.createS3DeltaLakeQueryRunner( DELTA_CATALOG, SCHEMA, diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeDelete.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeDelete.java index c278f84ea1e..7bfa4760eaa 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeDelete.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeDelete.java @@ -16,7 +16,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.trino.plugin.hive.containers.HiveMinioDataLake; -import io.trino.plugin.hive.parquet.ParquetWriterConfig; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.QueryRunner; import io.trino.tpch.TpchTable; @@ -24,7 +23,6 @@ import java.util.Set; -import static com.google.common.base.Verify.verify; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; @@ -42,8 +40,6 @@ public class TestDeltaLakeDelete protected QueryRunner createQueryRunner() throws Exception { - verify(!new ParquetWriterConfig().isParquetOptimizedWriterEnabled(), "This test assumes the optimized Parquet writer is disabled by default"); - hiveMinioDataLake = closeAfterClass(new HiveMinioDataLake(bucketName)); hiveMinioDataLake.start(); QueryRunner queryRunner = DeltaLakeQueryRunner.createS3DeltaLakeQueryRunner( diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePreferredPartitioning.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePreferredPartitioning.java index ad3b7d3f220..831f134db72 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePreferredPartitioning.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePreferredPartitioning.java @@ -16,12 +16,10 @@ import com.google.common.collect.ImmutableMap; import io.trino.Session; import io.trino.plugin.hive.containers.HiveMinioDataLake; -import io.trino.plugin.hive.parquet.ParquetWriterConfig; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.QueryRunner; import org.testng.annotations.Test; -import static com.google.common.base.Verify.verify; import static io.trino.SystemSessionProperties.TASK_PARTITIONED_WRITER_COUNT; import static io.trino.SystemSessionProperties.USE_PREFERRED_WRITE_PARTITIONING; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; @@ -39,10 +37,6 @@ public class TestDeltaLakePreferredPartitioning protected QueryRunner createQueryRunner() throws Exception { - verify( - !new ParquetWriterConfig().isParquetOptimizedWriterEnabled(), - "This test assumes the optimized Parquet writer is disabled by default"); - HiveMinioDataLake hiveMinioDataLake = closeAfterClass(new HiveMinioDataLake(TEST_BUCKET_NAME)); hiveMinioDataLake.start(); return createS3DeltaLakeQueryRunner( diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeUpdate.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeUpdate.java index d0e160106ea..47630dccffb 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeUpdate.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeUpdate.java @@ -15,13 +15,11 @@ import com.google.common.collect.ImmutableMap; import io.trino.plugin.hive.containers.HiveMinioDataLake; -import io.trino.plugin.hive.parquet.ParquetWriterConfig; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.QueryRunner; import io.trino.tpch.TpchTable; import org.testng.annotations.Test; -import static com.google.common.base.Verify.verify; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; @@ -42,8 +40,6 @@ public TestDeltaLakeUpdate() protected QueryRunner createQueryRunner() throws Exception { - verify(!new ParquetWriterConfig().isParquetOptimizedWriterEnabled(), "This test assumes the optimized Parquet writer is disabled by default"); - HiveMinioDataLake hiveMinioDataLake = closeAfterClass(new HiveMinioDataLake(bucketName)); hiveMinioDataLake.start(); QueryRunner queryRunner = DeltaLakeQueryRunner.createS3DeltaLakeQueryRunner( diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java index a61ab0ca41a..a5a6f8c5a5b 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java @@ -40,6 +40,12 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.trino.plugin.base.session.PropertyMetadataUtil.dataSizeProperty; import static io.trino.plugin.base.session.PropertyMetadataUtil.durationProperty; +import static io.trino.plugin.base.session.PropertyMetadataUtil.validateMaxDataSize; +import static io.trino.plugin.base.session.PropertyMetadataUtil.validateMinDataSize; +import static io.trino.plugin.hive.parquet.ParquetWriterConfig.PARQUET_WRITER_MAX_BLOCK_SIZE; +import static io.trino.plugin.hive.parquet.ParquetWriterConfig.PARQUET_WRITER_MAX_PAGE_SIZE; +import static io.trino.plugin.hive.parquet.ParquetWriterConfig.PARQUET_WRITER_MIN_BLOCK_SIZE; +import static io.trino.plugin.hive.parquet.ParquetWriterConfig.PARQUET_WRITER_MIN_PAGE_SIZE; import static io.trino.spi.StandardErrorCode.INVALID_SESSION_PROPERTY; import static io.trino.spi.session.PropertyMetadata.booleanProperty; import static io.trino.spi.session.PropertyMetadata.doubleProperty; @@ -427,11 +433,19 @@ public HiveSessionProperties( PARQUET_WRITER_BLOCK_SIZE, "Parquet: Writer block size", parquetWriterConfig.getBlockSize(), + value -> { + validateMinDataSize(PARQUET_WRITER_BLOCK_SIZE, value, DataSize.valueOf(PARQUET_WRITER_MIN_BLOCK_SIZE)); + validateMaxDataSize(PARQUET_WRITER_BLOCK_SIZE, value, DataSize.valueOf(PARQUET_WRITER_MAX_BLOCK_SIZE)); + }, false), dataSizeProperty( PARQUET_WRITER_PAGE_SIZE, "Parquet: Writer page size", parquetWriterConfig.getPageSize(), + value -> { + validateMinDataSize(PARQUET_WRITER_PAGE_SIZE, value, DataSize.valueOf(PARQUET_WRITER_MIN_PAGE_SIZE)); + validateMaxDataSize(PARQUET_WRITER_PAGE_SIZE, value, DataSize.valueOf(PARQUET_WRITER_MAX_PAGE_SIZE)); + }, false), integerProperty( PARQUET_WRITER_BATCH_SIZE, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetWriterConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetWriterConfig.java index 386ab6b5904..a5b433983cf 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetWriterConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetWriterConfig.java @@ -17,6 +17,8 @@ import io.airlift.configuration.ConfigDescription; import io.airlift.configuration.LegacyConfig; import io.airlift.units.DataSize; +import io.airlift.units.MaxDataSize; +import io.airlift.units.MinDataSize; import io.trino.parquet.writer.ParquetWriterOptions; import org.apache.parquet.hadoop.ParquetWriter; @@ -25,13 +27,20 @@ public class ParquetWriterConfig { - private boolean parquetOptimizedWriterEnabled; + public static final String PARQUET_WRITER_MIN_BLOCK_SIZE = "4MB"; + public static final String PARQUET_WRITER_MAX_BLOCK_SIZE = "2GB"; + public static final String PARQUET_WRITER_MIN_PAGE_SIZE = "8kB"; + public static final String PARQUET_WRITER_MAX_PAGE_SIZE = "8MB"; + + private boolean parquetOptimizedWriterEnabled = true; private DataSize blockSize = DataSize.ofBytes(ParquetWriter.DEFAULT_BLOCK_SIZE); private DataSize pageSize = DataSize.ofBytes(ParquetWriter.DEFAULT_PAGE_SIZE); private int batchSize = ParquetWriterOptions.DEFAULT_BATCH_SIZE; private double validationPercentage = 5; + @MinDataSize(PARQUET_WRITER_MIN_BLOCK_SIZE) + @MaxDataSize(PARQUET_WRITER_MAX_BLOCK_SIZE) public DataSize getBlockSize() { return blockSize; @@ -45,6 +54,8 @@ public ParquetWriterConfig setBlockSize(DataSize blockSize) return this; } + @MinDataSize(PARQUET_WRITER_MIN_PAGE_SIZE) + @MaxDataSize(PARQUET_WRITER_MAX_PAGE_SIZE) public DataSize getPageSize() { return pageSize; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestParquetPageSkipping.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestParquetPageSkipping.java index db9d69d6844..9a3ba2eec77 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestParquetPageSkipping.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestParquetPageSkipping.java @@ -70,7 +70,7 @@ private void buildSortedTables(String tableName, String sortByColumnName, String assertUpdate( Session.builder(getSession()) .setCatalogSessionProperty(catalog, "parquet_writer_page_size", "10000B") - .setCatalogSessionProperty(catalog, "parquet_writer_block_size", "100GB") + .setCatalogSessionProperty(catalog, "parquet_writer_block_size", "2GB") .build(), format("INSERT INTO %s SELECT *, ARRAY[rand(), rand(), rand()] FROM tpch.tiny.orders", tableName), 15000); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java index 7b546d2d3f3..918e631692d 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java @@ -2077,6 +2077,10 @@ public void testEmptyBucketedTable() if ((storageFormat == HiveStorageFormat.AVRO) && (compressionCodec == HiveCompressionCodec.LZ4)) { continue; } + if ((storageFormat == HiveStorageFormat.PARQUET) && (compressionCodec == HiveCompressionCodec.LZ4)) { + // TODO (https://github.com/trinodb/trino/issues/9142) Support LZ4 compression with native Parquet writer + continue; + } testEmptyBucketedTable(storageFormat, compressionCodec, true); } testEmptyBucketedTable(storageFormat, HiveCompressionCodec.GZIP, false); @@ -5159,10 +5163,10 @@ public void testParquetTimestampPredicatePushdown(HiveTimestampPrecision timesta } @Test(dataProvider = "timestampPrecisionAndValues") - public void testParquetTimestampPredicatePushdownOptimizedWriter(HiveTimestampPrecision timestampPrecision, LocalDateTime value) + public void testParquetTimestampPredicatePushdownHiveWriter(HiveTimestampPrecision timestampPrecision, LocalDateTime value) { Session session = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "parquet_optimized_writer_enabled", "true") + .setCatalogSessionProperty("hive", "parquet_optimized_writer_enabled", "false") .build(); doTestParquetTimestampPredicatePushdown(session, timestampPrecision, value); } @@ -5271,11 +5275,11 @@ public void testParquetDictionaryPredicatePushdown() } @Test - public void testParquetDictionaryPredicatePushdownWithOptimizedWriter() + public void testParquetDictionaryPredicatePushdownWithHiveWriter() { testParquetDictionaryPredicatePushdown( Session.builder(getSession()) - .setCatalogSessionProperty("hive", "parquet_optimized_writer_enabled", "true") + .setCatalogSessionProperty("hive", "parquet_optimized_writer_enabled", "false") .build()); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java index 255a89365d7..9eb86dc2a14 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java @@ -26,6 +26,8 @@ protected QueryRunner createQueryRunner() return HiveQueryRunner.builder() .setHiveProperties( ImmutableMap.of( + // TODO (https://github.com/trinodb/trino/issues/9359) use optimized writer + "parquet.optimized-writer.enabled", "false", "parquet.use-column-index", "true", "parquet.max-buffer-size", "1MB", "parquet.optimized-reader.enabled", "false")) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkippingWithOptimizedReader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkippingWithOptimizedReader.java index f68b673d913..64af89fd0ad 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkippingWithOptimizedReader.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkippingWithOptimizedReader.java @@ -26,6 +26,8 @@ protected QueryRunner createQueryRunner() return HiveQueryRunner.builder() .setHiveProperties( ImmutableMap.of( + // TODO (https://github.com/trinodb/trino/issues/9359) use optimized writer + "parquet.optimized-writer.enabled", "false", "parquet.use-column-index", "true", "parquet.max-buffer-size", "1MB", "parquet.optimized-reader.enabled", "true")) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetWriterConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetWriterConfig.java index 3f94e09be0c..76daf2d5025 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetWriterConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetWriterConfig.java @@ -32,7 +32,7 @@ public class TestParquetWriterConfig public void testDefaults() { assertRecordedDefaults(recordDefaults(ParquetWriterConfig.class) - .setParquetOptimizedWriterEnabled(false) + .setParquetOptimizedWriterEnabled(true) .setBlockSize(DataSize.ofBytes(ParquetWriter.DEFAULT_BLOCK_SIZE)) .setPageSize(DataSize.ofBytes(ParquetWriter.DEFAULT_PAGE_SIZE)) .setBatchSize(ParquetWriterOptions.DEFAULT_BATCH_SIZE) @@ -46,32 +46,32 @@ public void testLegacyProperties() ParquetWriterConfig.class, Map.of( "parquet.optimized-writer.enabled", "true", - "parquet.writer.block-size", "2PB", - "parquet.writer.page-size", "3PB"), + "parquet.writer.block-size", "33MB", + "parquet.writer.page-size", "7MB"), Map.of( "parquet.experimental-optimized-writer.enabled", "true", - "hive.parquet.writer.block-size", "2PB", - "hive.parquet.writer.page-size", "3PB"), + "hive.parquet.writer.block-size", "33MB", + "hive.parquet.writer.page-size", "7MB"), Map.of( "hive.parquet.optimized-writer.enabled", "true", - "hive.parquet.writer.block-size", "2PB", - "hive.parquet.writer.page-size", "3PB")); + "hive.parquet.writer.block-size", "33MB", + "hive.parquet.writer.page-size", "7MB")); } @Test public void testExplicitPropertyMappings() { Map properties = Map.of( - "parquet.optimized-writer.enabled", "true", + "parquet.optimized-writer.enabled", "false", "parquet.writer.block-size", "234MB", - "parquet.writer.page-size", "11MB", + "parquet.writer.page-size", "6MB", "parquet.writer.batch-size", "100", "parquet.optimized-writer.validation-percentage", "10"); ParquetWriterConfig expected = new ParquetWriterConfig() - .setParquetOptimizedWriterEnabled(true) + .setParquetOptimizedWriterEnabled(false) .setBlockSize(DataSize.of(234, MEGABYTE)) - .setPageSize(DataSize.of(11, MEGABYTE)) + .setPageSize(DataSize.of(6, MEGABYTE)) .setBatchSize(100) .setValidationPercentage(10); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCompatibility.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCompatibility.java index 54a556c0968..6fcb989fbda 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCompatibility.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCompatibility.java @@ -165,10 +165,6 @@ public void testInsertAllSupportedDataTypesWithTrino(TestHiveStorageFormats.Stor public void testTimestampFieldWrittenByOptimizedParquetWriterCanBeReadByHive() throws Exception { - // only admin user is allowed to change session properties - setAdminRole(onTrino().getConnection()); - setSessionProperty(onTrino().getConnection(), "hive.parquet_optimized_writer_enabled", "true"); - String tableName = "parquet_table_timestamp_created_in_trino"; onTrino().executeQuery("DROP TABLE IF EXISTS " + tableName); onTrino().executeQuery("CREATE TABLE " + tableName + "(timestamp_precision varchar, a_timestamp timestamp) WITH (format = 'PARQUET')"); @@ -199,10 +195,6 @@ public void testTimestampFieldWrittenByOptimizedParquetWriterCanBeReadByHive() public void testSmallDecimalFieldWrittenByOptimizedParquetWriterCanBeReadByHive() throws Exception { - // only admin user is allowed to change session properties - setAdminRole(onTrino().getConnection()); - setSessionProperty(onTrino().getConnection(), "hive.parquet_optimized_writer_enabled", "true"); - String tableName = "parquet_table_small_decimal_created_in_trino"; onTrino().executeQuery("DROP TABLE IF EXISTS " + tableName); onTrino().executeQuery("CREATE TABLE " + tableName + " (a_decimal DECIMAL(5,0)) WITH (format='PARQUET')"); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSparkCompatibility.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSparkCompatibility.java index 2dea41dc742..942747255f0 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSparkCompatibility.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSparkCompatibility.java @@ -394,10 +394,10 @@ public void testReadTrinoCreatedParquetTable() } @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS}) - public void testReadTrinoCreatedParquetTableWithNativeWriter() + public void testReadTrinoCreatedParquetTableWithHiveWriter() { - onTrino().executeQuery("SET SESSION " + TRINO_CATALOG + ".parquet_optimized_writer_enabled = true"); - testReadTrinoCreatedTable("using_native_parquet", "PARQUET"); + onTrino().executeQuery("SET SESSION " + TRINO_CATALOG + ".parquet_optimized_writer_enabled = false"); + testReadTrinoCreatedTable("using_hive_parquet", "PARQUET"); } private void testReadTrinoCreatedTable(String tableName, String tableFormat) diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveStorageFormats.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveStorageFormats.java index 2c1b96d2f73..f864ad11a43 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveStorageFormats.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveStorageFormats.java @@ -244,8 +244,8 @@ public static StorageFormat[] storageFormatsWithConfiguration() { return new StorageFormat[] { storageFormat("ORC", ImmutableMap.of("hive.orc_optimized_writer_validate", "true")), - storageFormat("PARQUET"), - storageFormat("PARQUET", ImmutableMap.of("hive.parquet_optimized_writer_enabled", "true")), + storageFormat("PARQUET", ImmutableMap.of("hive.parquet_optimized_writer_validation_percentage", "100")), + storageFormat("PARQUET", ImmutableMap.of("hive.parquet_optimized_writer_enabled", "false")), storageFormat("RCBINARY", ImmutableMap.of("hive.rcfile_optimized_writer_validate", "true")), storageFormat("RCTEXT", ImmutableMap.of("hive.rcfile_optimized_writer_validate", "true")), storageFormat("SEQUENCEFILE"), @@ -788,13 +788,13 @@ public void testLargeParquetInsert() } @Test(groups = STORAGE_FORMATS_DETAILED) - public void testLargeParquetInsertWithNativeWriter() + public void testLargeParquetInsertWithHiveWriter() { DataSize reducedRowGroupSize = DataSize.ofBytes(ParquetWriter.DEFAULT_PAGE_SIZE / 4); runLargeInsert(storageFormat( "PARQUET", ImmutableMap.of( - "hive.parquet_optimized_writer_enabled", "true", + "hive.parquet_optimized_writer_enabled", "false", "hive.parquet_writer_page_size", reducedRowGroupSize.toBytesValueString(), "task_scale_writers_enabled", "false", "task_writer_count", "1")));