diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConfig.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConfig.java index 25774ed49187..419fe56b09dd 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConfig.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConfig.java @@ -88,15 +88,6 @@ public class HiveConfig private boolean assumeCanonicalPartitionKeys; - private boolean useOrcColumnNames; - private boolean orcBloomFiltersEnabled; - private DataSize orcMaxMergeDistance = new DataSize(1, MEGABYTE); - private DataSize orcMaxBufferSize = new DataSize(8, MEGABYTE); - private DataSize orcTinyStripeThreshold = new DataSize(8, MEGABYTE); - private DataSize orcStreamBufferSize = new DataSize(8, MEGABYTE); - private DataSize orcMaxReadBlockSize = new DataSize(16, MEGABYTE); - private boolean orcLazyReadSmallRanges = true; - private boolean rcfileWriterValidate; private boolean skipDeletionForAlter; @@ -492,112 +483,6 @@ public HiveConfig setWriteValidationThreads(int writeValidationThreads) return this; } - public boolean isUseOrcColumnNames() - { - return useOrcColumnNames; - } - - @Config("hive.orc.use-column-names") - @ConfigDescription("Access ORC columns using names from the file") - public HiveConfig setUseOrcColumnNames(boolean useOrcColumnNames) - { - this.useOrcColumnNames = useOrcColumnNames; - return this; - } - - @NotNull - public DataSize getOrcMaxMergeDistance() - { - return orcMaxMergeDistance; - } - - @Config("hive.orc.max-merge-distance") - public HiveConfig setOrcMaxMergeDistance(DataSize orcMaxMergeDistance) - { - this.orcMaxMergeDistance = orcMaxMergeDistance; - return this; - } - - @NotNull - public DataSize getOrcMaxBufferSize() - { - return orcMaxBufferSize; - } - - @Config("hive.orc.max-buffer-size") - public HiveConfig setOrcMaxBufferSize(DataSize orcMaxBufferSize) - { - this.orcMaxBufferSize = orcMaxBufferSize; - return this; - } - - @NotNull - public DataSize getOrcStreamBufferSize() - { - return orcStreamBufferSize; - } - - @Config("hive.orc.stream-buffer-size") - public HiveConfig setOrcStreamBufferSize(DataSize orcStreamBufferSize) - { - this.orcStreamBufferSize = orcStreamBufferSize; - return this; - } - - @NotNull - public DataSize getOrcTinyStripeThreshold() - { - return orcTinyStripeThreshold; - } - - @Config("hive.orc.tiny-stripe-threshold") - public HiveConfig setOrcTinyStripeThreshold(DataSize orcTinyStripeThreshold) - { - this.orcTinyStripeThreshold = orcTinyStripeThreshold; - return this; - } - - @NotNull - public DataSize getOrcMaxReadBlockSize() - { - return orcMaxReadBlockSize; - } - - @Config("hive.orc.max-read-block-size") - public HiveConfig setOrcMaxReadBlockSize(DataSize orcMaxReadBlockSize) - { - this.orcMaxReadBlockSize = orcMaxReadBlockSize; - return this; - } - - @Deprecated - public boolean isOrcLazyReadSmallRanges() - { - return orcLazyReadSmallRanges; - } - - // TODO remove config option once efficacy is proven - @Deprecated - @Config("hive.orc.lazy-read-small-ranges") - @ConfigDescription("ORC read small disk ranges lazily") - public HiveConfig setOrcLazyReadSmallRanges(boolean orcLazyReadSmallRanges) - { - this.orcLazyReadSmallRanges = orcLazyReadSmallRanges; - return this; - } - - public boolean isOrcBloomFiltersEnabled() - { - return orcBloomFiltersEnabled; - } - - @Config("hive.orc.bloom-filters.enabled") - public HiveConfig setOrcBloomFiltersEnabled(boolean orcBloomFiltersEnabled) - { - this.orcBloomFiltersEnabled = orcBloomFiltersEnabled; - return this; - } - public boolean isRcfileWriterValidate() { return rcfileWriterValidate; diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveModule.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveModule.java index 38b2820eb2a1..6c53afaff854 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveModule.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveModule.java @@ -23,6 +23,7 @@ import io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore; import io.prestosql.plugin.hive.orc.OrcFileWriterFactory; import io.prestosql.plugin.hive.orc.OrcPageSourceFactory; +import io.prestosql.plugin.hive.orc.OrcReaderConfig; import io.prestosql.plugin.hive.orc.OrcWriterConfig; import io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory; import io.prestosql.plugin.hive.parquet.ParquetReaderConfig; @@ -109,6 +110,7 @@ public void configure(Binder binder) Multibinder fileWriterFactoryBinder = newSetBinder(binder, HiveFileWriterFactory.class); binder.bind(OrcFileWriterFactory.class).in(Scopes.SINGLETON); newExporter(binder).export(OrcFileWriterFactory.class).withGeneratedName(); + configBinder(binder).bindConfig(OrcReaderConfig.class); configBinder(binder).bindConfig(OrcWriterConfig.class); fileWriterFactoryBinder.addBinding().to(OrcFileWriterFactory.class).in(Scopes.SINGLETON); fileWriterFactoryBinder.addBinding().to(RcFileFileWriterFactory.class).in(Scopes.SINGLETON); diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveSessionProperties.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveSessionProperties.java index 0fc1d3a496a6..ebc1118725bf 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveSessionProperties.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveSessionProperties.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableList; import io.airlift.units.DataSize; import io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode; +import io.prestosql.plugin.hive.orc.OrcReaderConfig; import io.prestosql.plugin.hive.orc.OrcWriterConfig; import io.prestosql.plugin.hive.parquet.ParquetReaderConfig; import io.prestosql.plugin.hive.parquet.ParquetWriterConfig; @@ -107,6 +108,7 @@ public static InsertExistingPartitionsBehavior valueOf(String value, boolean imm @Inject public HiveSessionProperties( HiveConfig hiveConfig, + OrcReaderConfig orcReaderConfig, OrcWriterConfig orcWriterConfig, ParquetReaderConfig parquetReaderConfig, ParquetWriterConfig parquetWriterConfig) @@ -134,37 +136,37 @@ public HiveSessionProperties( booleanProperty( ORC_BLOOM_FILTERS_ENABLED, "ORC: Enable bloom filters for predicate pushdown", - hiveConfig.isOrcBloomFiltersEnabled(), + orcReaderConfig.isBloomFiltersEnabled(), false), dataSizeProperty( ORC_MAX_MERGE_DISTANCE, "ORC: Maximum size of gap between two reads to merge into a single read", - hiveConfig.getOrcMaxMergeDistance(), + orcReaderConfig.getMaxMergeDistance(), false), dataSizeProperty( ORC_MAX_BUFFER_SIZE, "ORC: Maximum size of a single read", - hiveConfig.getOrcMaxBufferSize(), + orcReaderConfig.getMaxBufferSize(), false), dataSizeProperty( ORC_STREAM_BUFFER_SIZE, "ORC: Size of buffer for streaming reads", - hiveConfig.getOrcStreamBufferSize(), + orcReaderConfig.getStreamBufferSize(), false), dataSizeProperty( ORC_TINY_STRIPE_THRESHOLD, "ORC: Threshold below which an ORC stripe or file will read in its entirety", - hiveConfig.getOrcTinyStripeThreshold(), + orcReaderConfig.getTinyStripeThreshold(), false), dataSizeProperty( ORC_MAX_READ_BLOCK_SIZE, "ORC: Soft max size of Presto blocks produced by ORC reader", - hiveConfig.getOrcMaxReadBlockSize(), + orcReaderConfig.getMaxBlockSize(), false), booleanProperty( ORC_LAZY_READ_SMALL_RANGES, "Experimental: ORC: Read small file segments lazily", - hiveConfig.isOrcLazyReadSmallRanges(), + orcReaderConfig.isLazyReadSmallRanges(), false), dataSizeProperty( ORC_STRING_STATISTICS_LIMIT, diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/orc/OrcPageSourceFactory.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/orc/OrcPageSourceFactory.java index bf9ea4ab5909..be1c6e237591 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/orc/OrcPageSourceFactory.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/orc/OrcPageSourceFactory.java @@ -27,7 +27,6 @@ import io.prestosql.plugin.hive.FileFormatDataSourceStats; import io.prestosql.plugin.hive.HdfsEnvironment; import io.prestosql.plugin.hive.HiveColumnHandle; -import io.prestosql.plugin.hive.HiveConfig; import io.prestosql.plugin.hive.HivePageSourceFactory; import io.prestosql.spi.PrestoException; import io.prestosql.spi.connector.ConnectorPageSource; @@ -83,9 +82,9 @@ public class OrcPageSourceFactory private final FileFormatDataSourceStats stats; @Inject - public OrcPageSourceFactory(TypeManager typeManager, HiveConfig config, HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) + public OrcPageSourceFactory(TypeManager typeManager, OrcReaderConfig config, HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) { - this(typeManager, requireNonNull(config, "config is null").isUseOrcColumnNames(), hdfsEnvironment, stats); + this(typeManager, requireNonNull(config, "config is null").isUseColumnNames(), hdfsEnvironment, stats); } public OrcPageSourceFactory(TypeManager typeManager, boolean useOrcColumnNames, HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/orc/OrcReaderConfig.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/orc/OrcReaderConfig.java new file mode 100644 index 000000000000..7912d9bc0501 --- /dev/null +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/orc/OrcReaderConfig.java @@ -0,0 +1,141 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.hive.orc; + +import io.airlift.configuration.Config; +import io.airlift.configuration.ConfigDescription; +import io.airlift.units.DataSize; + +import javax.validation.constraints.NotNull; + +import static io.airlift.units.DataSize.Unit.MEGABYTE; + +public class OrcReaderConfig +{ + private boolean useColumnNames; + private boolean bloomFiltersEnabled; + + private DataSize maxMergeDistance = new DataSize(1, MEGABYTE); + private DataSize maxBufferSize = new DataSize(8, MEGABYTE); + private DataSize tinyStripeThreshold = new DataSize(8, MEGABYTE); + private DataSize streamBufferSize = new DataSize(8, MEGABYTE); + private DataSize maxBlockSize = new DataSize(16, MEGABYTE); + private boolean lazyReadSmallRanges = true; + + public boolean isUseColumnNames() + { + return useColumnNames; + } + + @Config("hive.orc.use-column-names") + @ConfigDescription("Access ORC columns using names from the file") + public OrcReaderConfig setUseColumnNames(boolean useColumnNames) + { + this.useColumnNames = useColumnNames; + return this; + } + + public boolean isBloomFiltersEnabled() + { + return bloomFiltersEnabled; + } + + @Config("hive.orc.bloom-filters.enabled") + public OrcReaderConfig setBloomFiltersEnabled(boolean bloomFiltersEnabled) + { + this.bloomFiltersEnabled = bloomFiltersEnabled; + return this; + } + + @NotNull + public DataSize getMaxMergeDistance() + { + return maxMergeDistance; + } + + @Config("hive.orc.max-merge-distance") + public OrcReaderConfig setMaxMergeDistance(DataSize maxMergeDistance) + { + this.maxMergeDistance = maxMergeDistance; + return this; + } + + @NotNull + public DataSize getMaxBufferSize() + { + return maxBufferSize; + } + + @Config("hive.orc.max-buffer-size") + public OrcReaderConfig setMaxBufferSize(DataSize maxBufferSize) + { + this.maxBufferSize = maxBufferSize; + return this; + } + + @NotNull + public DataSize getTinyStripeThreshold() + { + return tinyStripeThreshold; + } + + @Config("hive.orc.tiny-stripe-threshold") + public OrcReaderConfig setTinyStripeThreshold(DataSize tinyStripeThreshold) + { + this.tinyStripeThreshold = tinyStripeThreshold; + return this; + } + + @NotNull + public DataSize getStreamBufferSize() + { + return streamBufferSize; + } + + @Config("hive.orc.stream-buffer-size") + public OrcReaderConfig setStreamBufferSize(DataSize streamBufferSize) + { + this.streamBufferSize = streamBufferSize; + return this; + } + + @NotNull + public DataSize getMaxBlockSize() + { + return maxBlockSize; + } + + @Config("hive.orc.max-read-block-size") + public OrcReaderConfig setMaxBlockSize(DataSize maxBlockSize) + { + this.maxBlockSize = maxBlockSize; + return this; + } + + @Deprecated + public boolean isLazyReadSmallRanges() + { + return lazyReadSmallRanges; + } + + // TODO remove config option once efficacy is proven + @Deprecated + @Config("hive.orc.lazy-read-small-ranges") + @ConfigDescription("ORC read small disk ranges lazily") + public OrcReaderConfig setLazyReadSmallRanges(boolean lazyReadSmallRanges) + { + this.lazyReadSmallRanges = lazyReadSmallRanges; + return this; + } +} diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/HiveTestUtils.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/HiveTestUtils.java index 5f4ac377db12..a5250ad611d0 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/HiveTestUtils.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/HiveTestUtils.java @@ -26,6 +26,7 @@ import io.prestosql.plugin.hive.gcs.HiveGcsConfig; import io.prestosql.plugin.hive.orc.OrcFileWriterFactory; import io.prestosql.plugin.hive.orc.OrcPageSourceFactory; +import io.prestosql.plugin.hive.orc.OrcReaderConfig; import io.prestosql.plugin.hive.orc.OrcWriterConfig; import io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory; import io.prestosql.plugin.hive.parquet.ParquetReaderConfig; @@ -81,6 +82,7 @@ public static HiveSessionProperties getHiveSessionProperties(HiveConfig hiveConf { return new HiveSessionProperties( hiveConfig, + new OrcReaderConfig(), new OrcWriterConfig(), new ParquetReaderConfig(), new ParquetWriterConfig()); @@ -91,7 +93,7 @@ public static Set getDefaultHivePageSourceFactories(HiveC FileFormatDataSourceStats stats = new FileFormatDataSourceStats(); return ImmutableSet.builder() .add(new RcFilePageSourceFactory(TYPE_MANAGER, hdfsEnvironment, stats)) - .add(new OrcPageSourceFactory(TYPE_MANAGER, hiveConfig, hdfsEnvironment, stats)) + .add(new OrcPageSourceFactory(TYPE_MANAGER, new OrcReaderConfig(), hdfsEnvironment, stats)) .add(new ParquetPageSourceFactory(TYPE_MANAGER, hdfsEnvironment, stats)) .build(); } diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveConfig.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveConfig.java index b24711b5666d..e839fd4a50ca 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveConfig.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveConfig.java @@ -64,15 +64,7 @@ public void testDefaults() .setWriteValidationThreads(16) .setTextMaxLineLength(new DataSize(100, Unit.MEGABYTE)) .setUseParquetColumnNames(false) - .setUseOrcColumnNames(false) .setAssumeCanonicalPartitionKeys(false) - .setOrcBloomFiltersEnabled(false) - .setOrcMaxMergeDistance(new DataSize(1, Unit.MEGABYTE)) - .setOrcMaxBufferSize(new DataSize(8, Unit.MEGABYTE)) - .setOrcStreamBufferSize(new DataSize(8, Unit.MEGABYTE)) - .setOrcTinyStripeThreshold(new DataSize(8, Unit.MEGABYTE)) - .setOrcMaxReadBlockSize(new DataSize(16, Unit.MEGABYTE)) - .setOrcLazyReadSmallRanges(true) .setRcfileWriterValidate(false) .setSkipDeletionForAlter(false) .setSkipTargetCleanupOnRollback(false) @@ -130,14 +122,6 @@ public void testExplicitPropertyMappings() .put("hive.assume-canonical-partition-keys", "true") .put("hive.text.max-line-length", "13MB") .put("hive.parquet.use-column-names", "true") - .put("hive.orc.use-column-names", "true") - .put("hive.orc.bloom-filters.enabled", "true") - .put("hive.orc.max-merge-distance", "22kB") - .put("hive.orc.max-buffer-size", "44kB") - .put("hive.orc.stream-buffer-size", "55kB") - .put("hive.orc.tiny-stripe-threshold", "61kB") - .put("hive.orc.max-read-block-size", "66kB") - .put("hive.orc.lazy-read-small-ranges", "false") .put("hive.rcfile.writer.validate", "true") .put("hive.skip-deletion-for-alter", "true") .put("hive.skip-target-cleanup-on-rollback", "true") @@ -192,15 +176,7 @@ public void testExplicitPropertyMappings() .setWriteValidationThreads(11) .setTextMaxLineLength(new DataSize(13, Unit.MEGABYTE)) .setUseParquetColumnNames(true) - .setUseOrcColumnNames(true) .setAssumeCanonicalPartitionKeys(true) - .setOrcBloomFiltersEnabled(true) - .setOrcMaxMergeDistance(new DataSize(22, Unit.KILOBYTE)) - .setOrcMaxBufferSize(new DataSize(44, Unit.KILOBYTE)) - .setOrcStreamBufferSize(new DataSize(55, Unit.KILOBYTE)) - .setOrcTinyStripeThreshold(new DataSize(61, Unit.KILOBYTE)) - .setOrcMaxReadBlockSize(new DataSize(66, Unit.KILOBYTE)) - .setOrcLazyReadSmallRanges(false) .setRcfileWriterValidate(true) .setSkipDeletionForAlter(true) .setSkipTargetCleanupOnRollback(true) diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveFileFormats.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveFileFormats.java index 0a413fffa7f1..f985289dc01d 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveFileFormats.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveFileFormats.java @@ -23,6 +23,7 @@ import io.prestosql.orc.OrcWriterOptions; import io.prestosql.plugin.hive.orc.OrcFileWriterFactory; import io.prestosql.plugin.hive.orc.OrcPageSourceFactory; +import io.prestosql.plugin.hive.orc.OrcReaderConfig; import io.prestosql.plugin.hive.orc.OrcWriterConfig; import io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory; import io.prestosql.plugin.hive.parquet.ParquetReaderConfig; @@ -312,6 +313,7 @@ public void testOrcOptimizedWriter(int rowCount) { ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties( new HiveConfig(), + new OrcReaderConfig(), new OrcWriterConfig() .setValidationPercentage(100.0), new ParquetReaderConfig(), diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestOrcPageSourceMemoryTracking.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestOrcPageSourceMemoryTracking.java index 431eb92005ee..00f77749a4e7 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestOrcPageSourceMemoryTracking.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestOrcPageSourceMemoryTracking.java @@ -31,6 +31,10 @@ import io.prestosql.operator.project.CursorProcessor; import io.prestosql.operator.project.PageProcessor; import io.prestosql.plugin.hive.orc.OrcPageSourceFactory; +import io.prestosql.plugin.hive.orc.OrcReaderConfig; +import io.prestosql.plugin.hive.orc.OrcWriterConfig; +import io.prestosql.plugin.hive.parquet.ParquetReaderConfig; +import io.prestosql.plugin.hive.parquet.ParquetWriterConfig; import io.prestosql.spi.Page; import io.prestosql.spi.block.Block; import io.prestosql.spi.classloader.ThreadContextClassLoader; @@ -43,6 +47,7 @@ import io.prestosql.sql.gen.PageFunctionCompiler; import io.prestosql.sql.planner.plan.PlanNodeId; import io.prestosql.sql.relational.RowExpression; +import io.prestosql.testing.TestingConnectorSession; import io.prestosql.testing.TestingSplit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -100,7 +105,6 @@ import static io.prestosql.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.prestosql.plugin.hive.HiveTestUtils.SESSION; import static io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER; -import static io.prestosql.plugin.hive.HiveTestUtils.getHiveSession; import static io.prestosql.spi.type.VarcharType.createUnboundedVarcharType; import static io.prestosql.sql.relational.Expressions.field; import static io.prestosql.testing.TestingHandles.TEST_TABLE_HANDLE; @@ -251,8 +255,13 @@ public void testMaxReadBytes(int rowCount) throws Exception { int maxReadBytes = 1_000; - ConnectorSession session = getHiveSession(new HiveConfig() - .setOrcMaxReadBlockSize(new DataSize(maxReadBytes, BYTE))); + ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties( + new HiveConfig(), + new OrcReaderConfig() + .setMaxBlockSize(new DataSize(maxReadBytes, BYTE)), + new OrcWriterConfig(), + new ParquetReaderConfig(), + new ParquetWriterConfig()).getSessionProperties()); FileFormatDataSourceStats stats = new FileFormatDataSourceStats(); // Build a table where every row gets larger, so we can test that the "batchSize" reduces diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/orc/TestOrcReaderConfig.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/orc/TestOrcReaderConfig.java new file mode 100644 index 000000000000..036f0b69d19c --- /dev/null +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/orc/TestOrcReaderConfig.java @@ -0,0 +1,69 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.hive.orc; + +import com.google.common.collect.ImmutableMap; +import io.airlift.units.DataSize; +import io.airlift.units.DataSize.Unit; +import org.testng.annotations.Test; + +import java.util.Map; + +import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; +import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; +import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; + +public class TestOrcReaderConfig +{ + @Test + public void testDefaults() + { + assertRecordedDefaults(recordDefaults(OrcReaderConfig.class) + .setUseColumnNames(false) + .setBloomFiltersEnabled(false) + .setMaxMergeDistance(new DataSize(1, Unit.MEGABYTE)) + .setMaxBufferSize(new DataSize(8, Unit.MEGABYTE)) + .setStreamBufferSize(new DataSize(8, Unit.MEGABYTE)) + .setTinyStripeThreshold(new DataSize(8, Unit.MEGABYTE)) + .setMaxBlockSize(new DataSize(16, Unit.MEGABYTE)) + .setLazyReadSmallRanges(true)); + } + + @Test + public void testExplicitPropertyMappings() + { + Map properties = new ImmutableMap.Builder() + .put("hive.orc.use-column-names", "true") + .put("hive.orc.bloom-filters.enabled", "true") + .put("hive.orc.max-merge-distance", "22kB") + .put("hive.orc.max-buffer-size", "44kB") + .put("hive.orc.stream-buffer-size", "55kB") + .put("hive.orc.tiny-stripe-threshold", "61kB") + .put("hive.orc.max-read-block-size", "66kB") + .put("hive.orc.lazy-read-small-ranges", "false") + .build(); + + OrcReaderConfig expected = new OrcReaderConfig() + .setUseColumnNames(true) + .setBloomFiltersEnabled(true) + .setMaxMergeDistance(new DataSize(22, Unit.KILOBYTE)) + .setMaxBufferSize(new DataSize(44, Unit.KILOBYTE)) + .setStreamBufferSize(new DataSize(55, Unit.KILOBYTE)) + .setTinyStripeThreshold(new DataSize(61, Unit.KILOBYTE)) + .setMaxBlockSize(new DataSize(66, Unit.KILOBYTE)) + .setLazyReadSmallRanges(false); + + assertFullMapping(properties, expected); + } +} diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/ParquetTester.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/ParquetTester.java index 93c17055ef45..1cd3c032bbd9 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/ParquetTester.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/ParquetTester.java @@ -24,6 +24,7 @@ import io.prestosql.plugin.hive.HiveSessionProperties; import io.prestosql.plugin.hive.HiveStorageFormat; import io.prestosql.plugin.hive.benchmark.FileFormat; +import io.prestosql.plugin.hive.orc.OrcReaderConfig; import io.prestosql.plugin.hive.orc.OrcWriterConfig; import io.prestosql.plugin.hive.parquet.write.MapKeyValuesSchemaConverter; import io.prestosql.plugin.hive.parquet.write.SingleLevelArrayMapKeyValuesSchemaConverter; @@ -339,6 +340,7 @@ static void assertMaxReadBytes( new HiveConfig() .setHiveStorageFormat(HiveStorageFormat.PARQUET) .setUseParquetColumnNames(false), + new OrcReaderConfig(), new OrcWriterConfig(), new ParquetReaderConfig() .setMaxReadBlockSize(maxReadBlockSize),