From d475e569375561186bb3372b61a884756d5382fb Mon Sep 17 00:00:00 2001 From: Raunaq Morarka Date: Wed, 25 Sep 2024 18:42:42 +0530 Subject: [PATCH] Simplify schema field in HiveSplit for ORC, Parquet and RCFile tables Extract the required properties for ORC, Parquet and RCFile into explicit fields and use a Map only when necessary. This avoids repeated look-ups in schema Map and reduces size of HiveSplit for the more common cases. --- .../hive/BackgroundHiveSplitLoader.java | 4 +- .../plugin/hive/HivePageSourceFactory.java | 3 +- .../plugin/hive/HivePageSourceProvider.java | 9 ++-- .../java/io/trino/plugin/hive/HiveSplit.java | 13 +++--- .../trino/plugin/hive/InternalHiveSplit.java | 6 +-- .../java/io/trino/plugin/hive/Schema.java | 43 +++++++++++++++++++ .../hive/avro/AvroPageSourceFactory.java | 7 ++- .../hive/line/LinePageSourceFactory.java | 15 +++---- .../plugin/hive/orc/OrcPageSourceFactory.java | 17 +++----- .../parquet/ParquetPageSourceFactory.java | 14 +++--- .../hive/rcfile/RcFilePageSourceFactory.java | 21 ++++----- .../io/trino/plugin/hive/util/HiveUtil.java | 2 +- .../hive/util/InternalHiveSplitFactory.java | 20 ++++++--- .../plugin/hive/TestHiveFileFormats.java | 4 +- .../trino/plugin/hive/TestHivePageSink.java | 4 +- .../io/trino/plugin/hive/TestHiveSplit.java | 2 +- .../plugin/hive/TestHiveSplitSource.java | 4 +- .../TestNodeLocalDynamicSplitPruning.java | 8 +--- .../hive/TestOrcPageSourceMemoryTracking.java | 19 ++++---- .../hive/orc/TestOrcPageSourceFactory.java | 12 ++---- .../plugin/hive/orc/TestOrcPredicates.java | 9 +++- .../plugin/hive/parquet/ParquetUtil.java | 4 +- 22 files changed, 130 insertions(+), 110 deletions(-) create mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/Schema.java diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java index 99e32b168a29..0d0395a0fb5e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java @@ -115,11 +115,11 @@ import static io.trino.plugin.hive.util.HiveBucketing.getBucketingVersion; import static io.trino.plugin.hive.util.HiveTypeUtil.typeSupported; import static io.trino.plugin.hive.util.HiveUtil.checkCondition; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; import static io.trino.plugin.hive.util.HiveUtil.getFooterCount; import static io.trino.plugin.hive.util.HiveUtil.getHeaderCount; import static io.trino.plugin.hive.util.HiveUtil.getInputFormatName; import static io.trino.plugin.hive.util.HiveUtil.getPartitionKeyColumnHandles; +import static io.trino.plugin.hive.util.HiveUtil.getSerializationLibraryName; import static io.trino.plugin.hive.util.PartitionMatchSupplier.createPartitionMatchSupplier; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static java.lang.Integer.parseInt; @@ -414,7 +414,7 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) if (tableBucketInfo.isPresent()) { throw new TrinoException(NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported"); } - HiveStorageFormat targetStorageFormat = getSymlinkStorageFormat(getDeserializerClassName(schema)); + HiveStorageFormat targetStorageFormat = getSymlinkStorageFormat(getSerializationLibraryName(schema)); ListMultimap targets = getTargetLocationsByParentFromSymlink(location); InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory( diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java index 52dfabaf2e08..43979da2c704 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java @@ -19,7 +19,6 @@ import io.trino.spi.predicate.TupleDomain; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.OptionalInt; @@ -32,7 +31,7 @@ Optional createPageSource( long length, long estimatedFileSize, long fileModifiedTime, - Map schema, + Schema schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index 325f2292da83..1ce82c26c55e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -72,7 +72,6 @@ import static io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter; import static io.trino.plugin.hive.util.HiveBucketing.getHiveBucketFilter; import static io.trino.plugin.hive.util.HiveTypeUtil.getHiveTypeForDereferences; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; import static io.trino.plugin.hive.util.HiveUtil.getInputFormatName; import static io.trino.plugin.hive.util.HiveUtil.getPrefilledColumnValue; import static java.util.Objects.requireNonNull; @@ -162,8 +161,8 @@ public ConnectorPageSource createPageSource( } throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Unsupported input format: serde=%s, format=%s, partition=%s, path=%s".formatted( - getDeserializerClassName(hiveSplit.getSchema()), - getInputFormatName(hiveSplit.getSchema()).orElse(null), + hiveSplit.getSchema().serializationLibraryName(), + getInputFormatName(hiveSplit.getSchema().serdeProperties()).orElse(null), hiveSplit.getPartitionName(), hiveSplit.getPath())); } @@ -177,7 +176,7 @@ public static Optional createHivePageSource( long length, long estimatedFileSize, long fileModifiedTime, - Map schema, + Schema schema, TupleDomain effectivePredicate, TypeManager typeManager, Optional bucketConversion, @@ -196,7 +195,7 @@ public static Optional createHivePageSource( Optional bucketAdaptation = createBucketAdaptation(bucketConversion, tableBucketNumber, regularAndInterimColumnMappings); Optional bucketValidator = createBucketValidator(path, bucketValidation, tableBucketNumber, regularAndInterimColumnMappings); - CoercionContext coercionContext = new CoercionContext(getTimestampPrecision(session), extractHiveStorageFormat(getDeserializerClassName(schema))); + CoercionContext coercionContext = new CoercionContext(getTimestampPrecision(session), extractHiveStorageFormat(schema.serializationLibraryName())); for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) { List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, typeManager, coercionContext); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java index a9076cb42ba0..6b7cb0fd5e51 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java @@ -34,7 +34,6 @@ import static io.airlift.slice.SizeOf.estimatedSizeOf; import static io.airlift.slice.SizeOf.instanceSize; import static io.airlift.slice.SizeOf.sizeOf; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.joining; @@ -49,7 +48,7 @@ public class HiveSplit private final long length; private final long estimatedFileSize; private final long fileModifiedTime; - private final Map schema; + private final Schema schema; private final List partitionKeys; private final List addresses; private final String partitionName; @@ -70,7 +69,7 @@ public HiveSplit( @JsonProperty("length") long length, @JsonProperty("estimatedFileSize") long estimatedFileSize, @JsonProperty("fileModifiedTime") long fileModifiedTime, - @JsonProperty("schema") Map schema, + @JsonProperty("schema") Schema schema, @JsonProperty("partitionKeys") List partitionKeys, @JsonProperty("readBucketNumber") OptionalInt readBucketNumber, @JsonProperty("tableBucketNumber") OptionalInt tableBucketNumber, @@ -108,7 +107,7 @@ public HiveSplit( long length, long estimatedFileSize, long fileModifiedTime, - Map schema, + Schema schema, List partitionKeys, List addresses, OptionalInt readBucketNumber, @@ -191,7 +190,7 @@ public long getFileModifiedTime() } @JsonProperty - public Map getSchema() + public Schema getSchema() { return schema; } @@ -270,7 +269,7 @@ public long getRetainedSizeInBytes() { return INSTANCE_SIZE + estimatedSizeOf(path) - + estimatedSizeOf(schema, key -> estimatedSizeOf((String) key), value -> estimatedSizeOf((String) value)) + + schema.getRetainedSizeInBytes() + estimatedSizeOf(partitionKeys, HivePartitionKey::estimatedSizeInBytes) + estimatedSizeOf(addresses, HostAddress::getRetainedSizeInBytes) + estimatedSizeOf(partitionName) @@ -294,7 +293,7 @@ public Map getSplitInfo() .put("hosts", addresses.stream().map(HostAddress::toString).collect(joining(","))) .put("forceLocalScheduling", String.valueOf(forceLocalScheduling)) .put("partitionName", partitionName) - .put("deserializerClassName", getDeserializerClassName(schema)) + .put("serializationLibraryName", schema.serializationLibraryName()) .buildOrThrow(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java index a15ea63e9ca5..c181fb2fa146 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java @@ -46,7 +46,7 @@ public class InternalHiveSplit private final long end; private final long estimatedFileSize; private final long fileModifiedTime; - private final Map schema; + private final Schema schema; private final List partitionKeys; private final List blocks; private final String partitionName; @@ -70,7 +70,7 @@ public InternalHiveSplit( long end, long estimatedFileSize, long fileModifiedTime, - Map schema, + Schema schema, List partitionKeys, List blocks, OptionalInt readBucketNumber, @@ -144,7 +144,7 @@ public long getFileModifiedTime() return fileModifiedTime; } - public Map getSchema() + public Schema getSchema() { return schema; } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/Schema.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/Schema.java new file mode 100644 index 000000000000..9ed591927af5 --- /dev/null +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/Schema.java @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive; + +import io.airlift.slice.SizeOf; + +import java.util.Map; + +import static io.airlift.slice.SizeOf.estimatedSizeOf; +import static io.airlift.slice.SizeOf.instanceSize; +import static java.util.Objects.requireNonNull; + +public record Schema( + String serializationLibraryName, + boolean isFullAcidTable, + Map serdeProperties) +{ + private static final int INSTANCE_SIZE = instanceSize(Schema.class); + + public Schema + { + requireNonNull(serializationLibraryName, "serializationLibraryName is null"); + requireNonNull(serdeProperties, "serdeProperties is null"); + } + + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + + estimatedSizeOf(serializationLibraryName) + + estimatedSizeOf(serdeProperties, SizeOf::estimatedSizeOf, SizeOf::estimatedSizeOf); + } +} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java index ac58e390e562..3d7379ae4ad5 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java @@ -61,7 +61,6 @@ import static io.trino.plugin.hive.ReaderPageSource.noProjectionAdaptation; import static io.trino.plugin.hive.avro.AvroHiveFileUtils.getCanonicalToGivenFieldName; import static io.trino.plugin.hive.avro.AvroHiveFileUtils.wrapInUnionWithNull; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; import static io.trino.plugin.hive.util.HiveUtil.splitError; import static io.trino.spi.type.TimestampType.createTimestampType; import static java.lang.Math.min; @@ -88,7 +87,7 @@ public Optional createPageSource( long length, long estimatedFileSize, long fileModifiedTime, - Map schema, + io.trino.plugin.hive.Schema schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, @@ -96,7 +95,7 @@ public Optional createPageSource( boolean originalFile, AcidTransaction transaction) { - if (!AVRO_SERDE_CLASS.equals(getDeserializerClassName(schema))) { + if (!AVRO_SERDE_CLASS.equals(schema.serializationLibraryName())) { return Optional.empty(); } checkArgument(acidInfo.isEmpty(), "Acid is not supported"); @@ -116,7 +115,7 @@ public Optional createPageSource( Schema tableSchema; try { - tableSchema = AvroHiveFileUtils.determineSchemaOrThrowException(trinoFileSystem, schema); + tableSchema = AvroHiveFileUtils.determineSchemaOrThrowException(trinoFileSystem, schema.serdeProperties()); } catch (IOException | org.apache.avro.AvroTypeException e) { throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, "Unable to load or parse schema", e); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java index 7498bdc9a87b..4f46899c96d5 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java @@ -31,6 +31,7 @@ import io.trino.plugin.hive.HivePageSourceFactory; import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.ReaderPageSource; +import io.trino.plugin.hive.Schema; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; @@ -39,7 +40,6 @@ import java.io.InputStream; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.OptionalInt; @@ -50,7 +50,6 @@ import static io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT; import static io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns; import static io.trino.plugin.hive.ReaderPageSource.noProjectionAdaptation; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; import static io.trino.plugin.hive.util.HiveUtil.getFooterCount; import static io.trino.plugin.hive.util.HiveUtil.getHeaderCount; import static io.trino.plugin.hive.util.HiveUtil.splitError; @@ -83,7 +82,7 @@ public Optional createPageSource( long length, long estimatedFileSize, long fileModifiedTime, - Map schema, + Schema schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, @@ -91,19 +90,19 @@ public Optional createPageSource( boolean originalFile, AcidTransaction transaction) { - if (!lineReaderFactory.getHiveInputFormatClassNames().contains(schema.get(FILE_INPUT_FORMAT)) || - !lineDeserializerFactory.getHiveSerDeClassNames().contains(getDeserializerClassName(schema))) { + if (!lineReaderFactory.getHiveInputFormatClassNames().contains(schema.serdeProperties().get(FILE_INPUT_FORMAT)) || + !lineDeserializerFactory.getHiveSerDeClassNames().contains(schema.serializationLibraryName())) { return Optional.empty(); } checkArgument(acidInfo.isEmpty(), "Acid is not supported"); // get header and footer count - int headerCount = getHeaderCount(schema); + int headerCount = getHeaderCount(schema.serdeProperties()); if (headerCount > 1) { checkArgument(start == 0, "Multiple header rows are not supported for a split file"); } - int footerCount = getFooterCount(schema); + int footerCount = getFooterCount(schema.serdeProperties()); if (footerCount > 0) { checkArgument(start == 0, "Footer not supported for a split file"); } @@ -124,7 +123,7 @@ public Optional createPageSource( projectedReaderColumns.stream() .map(column -> new Column(column.getName(), column.getType(), column.getBaseHiveColumnIndex())) .collect(toImmutableList()), - schema); + schema.serdeProperties()); } // Skip empty inputs diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java index 68f4fde033be..ab39cccf5ca1 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java @@ -41,6 +41,7 @@ import io.trino.plugin.hive.HivePageSourceFactory; import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.ReaderPageSource; +import io.trino.plugin.hive.Schema; import io.trino.plugin.hive.acid.AcidSchema; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.coercions.TypeCoercer; @@ -95,10 +96,7 @@ import static io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.mergedRowColumns; import static io.trino.plugin.hive.orc.OrcPageSource.handleException; import static io.trino.plugin.hive.orc.OrcTypeTranslator.createCoercer; -import static io.trino.plugin.hive.util.AcidTables.isFullAcidTable; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; import static io.trino.plugin.hive.util.HiveUtil.splitError; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; @@ -159,12 +157,9 @@ public OrcPageSourceFactory( this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); } - public static Map stripUnnecessaryProperties(Map schema) + public static boolean stripUnnecessaryProperties(String serializationLibraryName) { - if (ORC_SERDE_CLASS.equals(getDeserializerClassName(schema)) && !isFullAcidTable(schema)) { - return ImmutableMap.of(SERIALIZATION_LIB, schema.get(SERIALIZATION_LIB)); - } - return schema; + return ORC_SERDE_CLASS.equals(serializationLibraryName); } @Override @@ -175,7 +170,7 @@ public Optional createPageSource( long length, long estimatedFileSize, long fileModifiedTime, - Map schema, + Schema schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, @@ -183,7 +178,7 @@ public Optional createPageSource( boolean originalFile, AcidTransaction transaction) { - if (!ORC_SERDE_CLASS.equals(getDeserializerClassName(schema))) { + if (!ORC_SERDE_CLASS.equals(schema.serializationLibraryName())) { return Optional.empty(); } @@ -206,7 +201,7 @@ public Optional createPageSource( readerColumnHandles, columns, isUseOrcColumnNames(session), - isFullAcidTable(schema), + schema.isFullAcidTable(), effectivePredicate, legacyTimeZone, orcReaderOptions diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java index dcd88f2523d6..f3481ac4e253 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java @@ -44,6 +44,7 @@ import io.trino.plugin.hive.HivePageSourceFactory; import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.ReaderPageSource; +import io.trino.plugin.hive.Schema; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.coercions.TypeCoercer; import io.trino.spi.TrinoException; @@ -97,8 +98,6 @@ import static io.trino.plugin.hive.HiveSessionProperties.useParquetBloomFilter; import static io.trino.plugin.hive.parquet.ParquetPageSource.handleException; import static io.trino.plugin.hive.parquet.ParquetTypeTranslator.createCoercer; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.BigintType.BIGINT; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -146,12 +145,9 @@ public ParquetPageSourceFactory( domainCompactionThreshold = hiveConfig.getDomainCompactionThreshold(); } - public static Map stripUnnecessaryProperties(Map schema) + public static boolean stripUnnecessaryProperties(String serializationLibraryName) { - if (PARQUET_SERDE_CLASS_NAMES.contains(getDeserializerClassName(schema))) { - return ImmutableMap.of(SERIALIZATION_LIB, schema.get(SERIALIZATION_LIB)); - } - return schema; + return PARQUET_SERDE_CLASS_NAMES.contains(serializationLibraryName); } @Override @@ -162,7 +158,7 @@ public Optional createPageSource( long length, long estimatedFileSize, long fileModifiedTime, - Map schema, + Schema schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, @@ -170,7 +166,7 @@ public Optional createPageSource( boolean originalFile, AcidTransaction transaction) { - if (!PARQUET_SERDE_CLASS_NAMES.contains(getDeserializerClassName(schema))) { + if (!PARQUET_SERDE_CLASS_NAMES.contains(schema.serializationLibraryName())) { return Optional.empty(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java index 3fe0d461804e..d9121f6adddc 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java @@ -35,6 +35,7 @@ import io.trino.plugin.hive.HivePageSourceFactory; import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.ReaderPageSource; +import io.trino.plugin.hive.Schema; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorPageSource; @@ -46,7 +47,6 @@ import java.io.InputStream; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.OptionalInt; @@ -58,9 +58,7 @@ import static io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT; import static io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns; import static io.trino.plugin.hive.ReaderPageSource.noProjectionAdaptation; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; import static io.trino.plugin.hive.util.HiveUtil.splitError; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static java.lang.Math.min; import static java.util.Objects.requireNonNull; @@ -79,12 +77,9 @@ public RcFilePageSourceFactory(TrinoFileSystemFactory fileSystemFactory, HiveCon this.timeZone = hiveConfig.getRcfileDateTimeZone(); } - public static Map stripUnnecessaryProperties(Map schema) + public static boolean stripUnnecessaryProperties(String serializationLibraryName) { - if (LAZY_BINARY_COLUMNAR_SERDE_CLASS.equals(getDeserializerClassName(schema))) { - return ImmutableMap.of(SERIALIZATION_LIB, schema.get(SERIALIZATION_LIB)); - } - return schema; + return LAZY_BINARY_COLUMNAR_SERDE_CLASS.equals(serializationLibraryName); } @Override @@ -95,7 +90,7 @@ public Optional createPageSource( long length, long estimatedFileSize, long fileModifiedTime, - Map schema, + Schema schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, @@ -104,12 +99,12 @@ public Optional createPageSource( AcidTransaction transaction) { ColumnEncodingFactory columnEncodingFactory; - String deserializerClassName = getDeserializerClassName(schema); - if (deserializerClassName.equals(LAZY_BINARY_COLUMNAR_SERDE_CLASS)) { + String serializationLibraryName = schema.serializationLibraryName(); + if (serializationLibraryName.equals(LAZY_BINARY_COLUMNAR_SERDE_CLASS)) { columnEncodingFactory = new BinaryColumnEncodingFactory(timeZone); } - else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) { - columnEncodingFactory = new TextColumnEncodingFactory(TextEncodingOptions.fromSchema(schema)); + else if (serializationLibraryName.equals(COLUMNAR_SERDE_CLASS)) { + columnEncodingFactory = new TextColumnEncodingFactory(TextEncodingOptions.fromSchema(schema.serdeProperties())); } else { return Optional.empty(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java index f1f2fac0f37e..0ec3ca3e21ed 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java @@ -207,7 +207,7 @@ public static long parseHiveTimestamp(String value) return HIVE_TIMESTAMP_PARSER.parseMillis(value) * MICROSECONDS_PER_MILLISECOND; } - public static String getDeserializerClassName(Map schema) + public static String getSerializationLibraryName(Map schema) { String name = schema.get(SERIALIZATION_LIB); checkCondition(name != null, HIVE_INVALID_METADATA, "Table or partition is missing Hive deserializer property: %s", SERIALIZATION_LIB); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java index 987e4260fcbc..64f5ec7655b4 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java @@ -25,6 +25,7 @@ import io.trino.plugin.hive.HiveStorageFormat; import io.trino.plugin.hive.InternalHiveSplit; import io.trino.plugin.hive.InternalHiveSplit.InternalHiveBlock; +import io.trino.plugin.hive.Schema; import io.trino.plugin.hive.fs.BlockLocation; import io.trino.plugin.hive.fs.TrinoFileStatus; import io.trino.plugin.hive.orc.OrcPageSourceFactory; @@ -45,13 +46,15 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.hive.HiveColumnHandle.isPathColumnHandle; +import static io.trino.plugin.hive.util.AcidTables.isFullAcidTable; +import static io.trino.plugin.hive.util.HiveUtil.getSerializationLibraryName; import static java.util.Objects.requireNonNull; public class InternalHiveSplitFactory { private final String partitionName; private final HiveStorageFormat storageFormat; - private final Map strippedSchema; + private final Schema strippedSchema; private final List partitionKeys; private final Optional pathDomain; private final Map hiveColumnCoercions; @@ -91,13 +94,18 @@ public InternalHiveSplitFactory( checkArgument(minimumTargetSplitSizeInBytes > 0, "minimumTargetSplitSize must be > 0, found: %s", minimumTargetSplitSize); } - private static Map stripUnnecessaryProperties(Map schema) + private static Schema stripUnnecessaryProperties(Map schema) { // Sending the full schema with every split is costly and can be avoided for formats supported natively - schema = OrcPageSourceFactory.stripUnnecessaryProperties(schema); - schema = ParquetPageSourceFactory.stripUnnecessaryProperties(schema); - schema = RcFilePageSourceFactory.stripUnnecessaryProperties(schema); - return schema; + String serializationLibraryName = getSerializationLibraryName(schema); + boolean isFullAcidTable = isFullAcidTable(schema); + Map serdeProperties = schema; + if (RcFilePageSourceFactory.stripUnnecessaryProperties(serializationLibraryName) + || OrcPageSourceFactory.stripUnnecessaryProperties(serializationLibraryName) + || ParquetPageSourceFactory.stripUnnecessaryProperties(serializationLibraryName)) { + serdeProperties = ImmutableMap.of(); + } + return new Schema(serializationLibraryName, isFullAcidTable, serdeProperties); } public String getPartitionName() diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index 14b2dc78dc93..cbcd55728728 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -155,7 +155,6 @@ import static io.trino.plugin.hive.util.HiveTypeTranslator.toHiveType; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.CharType.createCharType; @@ -974,7 +973,6 @@ private static void testPageSourceFactory( Map splitProperties = ImmutableMap.builder() .put(FILE_INPUT_FORMAT, storageFormat.getInputFormat()) - .put(SERIALIZATION_LIB, storageFormat.getSerde()) .put(LIST_COLUMNS, String.join(",", splitPropertiesColumnNames.build())) .put(LIST_COLUMN_TYPES, String.join(",", splitPropertiesColumnTypes.build())) .buildOrThrow(); @@ -1008,7 +1006,7 @@ private static void testPageSourceFactory( fileSize, paddedFileSize, 12345, - splitProperties, + new Schema(storageFormat.getSerde(), false, splitProperties), TupleDomain.all(), TESTING_TYPE_MANAGER, Optional.empty(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java index 9be058858671..5cce3e02e27d 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java @@ -85,7 +85,6 @@ import static io.trino.plugin.hive.util.HiveTypeTranslator.toHiveType; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.DateType.DATE; import static io.trino.spi.type.DoubleType.DOUBLE; @@ -329,7 +328,6 @@ private static ConnectorPageSource createPageSource(TrinoFileSystemFactory fileS long length = fileSystemFactory.create(ConnectorIdentity.ofUser("test")).newInputFile(location).length(); Map splitProperties = ImmutableMap.builder() .put(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()) - .put(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerde()) .put(LIST_COLUMNS, Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toImmutableList()))) .put(LIST_COLUMN_TYPES, Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toImmutableList()))) .buildOrThrow(); @@ -340,7 +338,7 @@ private static ConnectorPageSource createPageSource(TrinoFileSystemFactory fileS length, length, 0, - splitProperties, + new Schema(config.getHiveStorageFormat().getSerde(), false, splitProperties), ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java index aefac56ae591..177933ba6987 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java @@ -68,7 +68,7 @@ public void testJsonRoundTrip() 87, 88, Instant.now().toEpochMilli(), - schema, + new Schema("abc", true, schema), partitionKeys, addresses, OptionalInt.empty(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java index 89a60c4cc08b..95f629147881 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java @@ -236,7 +236,7 @@ public void testReaderWaitsForSplits() // wait for thread to get the split ConnectorSplit split = splits.get(800, TimeUnit.MILLISECONDS); - assertThat(((HiveSplit) split).getSchema()).containsEntry("id", "33"); + assertThat(((HiveSplit) split).getSchema().serdeProperties()).containsEntry("id", "33"); } finally { // make sure the thread exits @@ -331,7 +331,7 @@ private TestSplit(int id, OptionalInt bucketNumber, DataSize fileSize, BooleanSu fileSize.toBytes(), fileSize.toBytes(), Instant.now().toEpochMilli(), - ImmutableMap.of("id", String.valueOf(id)), + new Schema("abc", false, ImmutableMap.of("id", String.valueOf(id))), ImmutableList.of(), ImmutableList.of(new InternalHiveBlock(0, fileSize.toBytes(), ImmutableList.of())), bucketNumber, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java index ea7527832cf6..da8dd68b1d6f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java @@ -48,12 +48,10 @@ import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; import static io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories; import static io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.testing.TestingHandles.TEST_CATALOG_HANDLE; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static java.util.concurrent.CompletableFuture.completedFuture; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT; import static org.assertj.core.api.Assertions.assertThat; class TestNodeLocalDynamicSplitPruning @@ -117,10 +115,6 @@ private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle TrinoFileSystemFactory fileSystemFactory = new MemoryFileSystemFactory(); fileSystemFactory.create(ConnectorIdentity.ofUser("test")).newOutputFile(location).create().close(); - Map splitProperties = ImmutableMap.builder() - .put(FILE_INPUT_FORMAT, hiveConfig.getHiveStorageFormat().getInputFormat()) - .put(SERIALIZATION_LIB, hiveConfig.getHiveStorageFormat().getSerde()) - .buildOrThrow(); HiveSplit split = new HiveSplit( "", location.toString(), @@ -128,7 +122,7 @@ private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle 0, 0, 0, - splitProperties, + new Schema(hiveConfig.getHiveStorageFormat().getSerde(), false, ImmutableMap.of()), ImmutableList.of(new HivePartitionKey(PARTITION_COLUMN.getName(), "42")), ImmutableList.of(), OptionalInt.of(1), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java index c7595047ed4f..c630dafbd401 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java @@ -60,7 +60,6 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterOptions; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.io.orc.Writer; @@ -83,7 +82,6 @@ import java.time.Instant; import java.util.Arrays; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.OptionalInt; import java.util.Properties; @@ -111,7 +109,6 @@ import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_COMMENTS; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; import static io.trino.sql.relational.Expressions.field; import static io.trino.testing.TestingHandles.TEST_CATALOG_HANDLE; @@ -124,7 +121,6 @@ import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; import static java.util.stream.Collectors.toList; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT; import static org.apache.hadoop.hive.ql.io.orc.CompressionKind.ZLIB; import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector; @@ -491,7 +487,7 @@ public void testScanFilterAndProjectOperator() private class TestPreparer { private final FileSplit fileSplit; - private final Map schema; + private final Schema schema; private final List columns; private final List types; private final String partitionName; @@ -509,12 +505,13 @@ public TestPreparer(String tempFilePath, List testColumns, int numRo throws Exception { OrcSerde serde = new OrcSerde(); - schema = ImmutableMap.builder() - .put(LIST_COLUMNS, testColumns.stream().map(TestColumn::getName).collect(Collectors.joining(","))) - .put(LIST_COLUMN_TYPES, testColumns.stream().map(TestColumn::getType).collect(Collectors.joining(","))) - .put(FILE_INPUT_FORMAT, OrcInputFormat.class.getName()) - .put(SERIALIZATION_LIB, serde.getClass().getName()) - .buildOrThrow(); + schema = new Schema( + serde.getClass().getName(), + false, + ImmutableMap.builder() + .put(LIST_COLUMNS, testColumns.stream().map(TestColumn::getName).collect(Collectors.joining(","))) + .put(LIST_COLUMN_TYPES, testColumns.stream().map(TestColumn::getType).collect(Collectors.joining(","))) + .buildOrThrow()); partitionKeys = testColumns.stream() .filter(TestColumn::isPartitionKey) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java index 9a03efc52380..2a3d5d2ae486 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java @@ -26,6 +26,7 @@ import io.trino.plugin.hive.HiveConfig; import io.trino.plugin.hive.HivePageSourceFactory; import io.trino.plugin.hive.ReaderPageSource; +import io.trino.plugin.hive.Schema; import io.trino.spi.Page; import io.trino.spi.connector.ConnectorPageSource; import io.trino.spi.predicate.Domain; @@ -51,15 +52,12 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.io.Resources.getResource; -import static io.trino.hive.thrift.metastore.hive_metastoreConstants.FILE_INPUT_FORMAT; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL; import static io.trino.plugin.hive.HiveTestUtils.SESSION; import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; import static io.trino.plugin.hive.util.HiveTypeTranslator.toHiveType; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.VarcharType.VARCHAR; @@ -328,13 +326,9 @@ private static HiveColumnHandle toHiveColumnHandle(NationColumn nationColumn, in Optional.empty()); } - private static Map createSchema() + private static Schema createSchema() { - return ImmutableMap.builder() - .put(SERIALIZATION_LIB, ORC.getSerde()) - .put(FILE_INPUT_FORMAT, ORC.getInputFormat()) - .put(TRANSACTIONAL, "true") - .buildOrThrow(); + return new Schema(ORC.getSerde(), true, ImmutableMap.of()); } private static void assertEqualsByColumns(Set columns, List actualRows, List expectedRows) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java index bca72e5a8b6c..788e76db6cbd 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java @@ -31,6 +31,7 @@ import io.trino.plugin.hive.HiveConfig; import io.trino.plugin.hive.HivePageSourceProvider; import io.trino.plugin.hive.NodeVersion; +import io.trino.plugin.hive.Schema; import io.trino.plugin.hive.WriterKind; import io.trino.plugin.hive.util.HiveTypeTranslator; import io.trino.spi.Page; @@ -183,7 +184,13 @@ private static ConnectorPageSource createPageSource( length, length, inputFile.lastModified().toEpochMilli(), - getTableProperties(), + new Schema( + ORC.getSerde(), + false, + ImmutableMap.builder() + .put(LIST_COLUMNS, COLUMNS.stream().map(HiveColumnHandle::getName).collect(Collectors.joining(","))) + .put(LIST_COLUMN_TYPES, COLUMNS.stream().map(HiveColumnHandle::getHiveType).map(HiveType::toString).collect(Collectors.joining(","))) + .buildOrThrow()), effectivePredicate, TESTING_TYPE_MANAGER, Optional.empty(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java index c1bba68fdd76..c2bb087ec90f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java @@ -21,6 +21,7 @@ import io.trino.plugin.hive.HiveConfig; import io.trino.plugin.hive.HivePageSourceFactory; import io.trino.plugin.hive.HiveStorageFormat; +import io.trino.plugin.hive.Schema; import io.trino.spi.connector.ConnectorPageSource; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.predicate.TupleDomain; @@ -43,7 +44,6 @@ import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; import static io.trino.plugin.hive.util.HiveTypeTranslator.toHiveType; -import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; final class ParquetUtil { @@ -96,7 +96,7 @@ private static ConnectorPageSource createPageSource(ConnectorSession session, Fi parquetFile.length(), parquetFile.length(), parquetFile.lastModified(), - ImmutableMap.of(SERIALIZATION_LIB, HiveStorageFormat.PARQUET.getSerde()), + new Schema(HiveStorageFormat.PARQUET.getSerde(), false, ImmutableMap.of()), columns, domain, Optional.empty(),