diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index 4b12227bf59a..a9442a0bd195 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -210,8 +210,10 @@ public static Optional createHivePageSource( Optional bucketAdaptation = createBucketAdaptation(bucketConversion, tableBucketNumber, regularAndInterimColumnMappings); Optional bucketValidator = createBucketValidator(path, bucketValidation, tableBucketNumber, regularAndInterimColumnMappings); + HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session); + for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) { - List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager); + List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager, timestampPrecision); Optional readerWithProjections = pageSourceFactory.createPageSource( configuration, @@ -243,13 +245,13 @@ public static Optional createHivePageSource( bucketValidator, adapter, typeManager, - getTimestampPrecision(session), + timestampPrecision, pageSource)); } } for (HiveRecordCursorProvider provider : cursorProviders) { - List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, false, typeManager); + List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, false, typeManager, timestampPrecision); Optional readerWithProjections = provider.createRecordCursor( configuration, session, @@ -544,7 +546,7 @@ public static List extractRegularAndInterimColumnMappings(List toColumnHandles(List regularColumnMappings, boolean doCoercion, TypeManager typeManager) + public static List toColumnHandles(List regularColumnMappings, boolean doCoercion, TypeManager typeManager, HiveTimestampPrecision timestampPrecision) { return regularColumnMappings.stream() .map(columnMapping -> { @@ -560,14 +562,14 @@ public static List toColumnHandles(List regular projectedColumn.getDereferenceIndices(), projectedColumn.getDereferenceNames(), fromHiveType, - fromHiveType.getType(typeManager)); + fromHiveType.getType(typeManager, timestampPrecision)); }); return new HiveColumnHandle( columnHandle.getBaseColumnName(), columnHandle.getBaseHiveColumnIndex(), fromHiveTypeBase, - fromHiveTypeBase.getType(typeManager), + fromHiveTypeBase.getType(typeManager, timestampPrecision), newColumnProjectionInfo, columnHandle.getColumnType(), columnHandle.getComment()); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java index 700fa9499b88..edfe07f869d8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java @@ -24,6 +24,7 @@ import java.util.Optional; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP; import static io.trino.spi.type.TimestampType.createTimestampType; public final class OrcTypeTranslator @@ -32,7 +33,7 @@ private OrcTypeTranslator() {} public static Optional> createCoercer(OrcTypeKind fromOrcType, Type toTrinoType, HiveTimestampPrecision timestampPrecision) { - if (fromOrcType.equals(OrcTypeKind.TIMESTAMP) && toTrinoType instanceof VarcharType varcharType) { + if (fromOrcType == TIMESTAMP && toTrinoType instanceof VarcharType varcharType) { TimestampType timestampType = createTimestampType(timestampPrecision.getPrecision()); if (timestampType.isShort()) { return Optional.of(new ShortTimestampToVarcharCoercer(timestampType, varcharType)); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java index cb73475640e1..aee099959a18 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java @@ -34,7 +34,6 @@ import io.trino.plugin.hive.HiveColumnHandle; import io.trino.plugin.hive.HiveConfig; import io.trino.plugin.hive.HivePageSourceFactory; -import io.trino.plugin.hive.HiveTimestampPrecision; import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.ReaderPageSource; import io.trino.plugin.hive.acid.AcidTransaction; @@ -45,7 +44,6 @@ import io.trino.spi.connector.EmptyPageSource; import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.Type; -import io.trino.spi.type.TypeManager; import org.apache.hadoop.conf.Configuration; import org.joda.time.DateTimeZone; @@ -62,7 +60,6 @@ import static io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA; import static io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT; import static io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns; -import static io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision; import static io.trino.plugin.hive.ReaderPageSource.noProjectionAdaptation; import static io.trino.plugin.hive.util.HiveClassNames.COLUMNAR_SERDE_CLASS; import static io.trino.plugin.hive.util.HiveClassNames.LAZY_BINARY_COLUMNAR_SERDE_CLASS; @@ -77,15 +74,13 @@ public class RcFilePageSourceFactory { private static final DataSize BUFFER_SIZE = DataSize.of(8, Unit.MEGABYTE); - private final TypeManager typeManager; private final TrinoFileSystemFactory fileSystemFactory; private final FileFormatDataSourceStats stats; private final DateTimeZone timeZone; @Inject - public RcFilePageSourceFactory(TypeManager typeManager, TrinoFileSystemFactory fileSystemFactory, FileFormatDataSourceStats stats, HiveConfig hiveConfig) + public RcFilePageSourceFactory(TrinoFileSystemFactory fileSystemFactory, FileFormatDataSourceStats stats, HiveConfig hiveConfig) { - this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.stats = requireNonNull(stats, "stats is null"); this.timeZone = hiveConfig.getRcfileDateTimeZone(); @@ -168,9 +163,8 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) { try { ImmutableMap.Builder readColumns = ImmutableMap.builder(); - HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session); for (HiveColumnHandle column : projectedReaderColumns) { - readColumns.put(column.getBaseHiveColumnIndex(), column.getHiveType().getType(typeManager, timestampPrecision)); + readColumns.put(column.getBaseHiveColumnIndex(), column.getType()); } RcFileReader rcFileReader = new RcFileReader( diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java index 5a419fd6d119..b57daea8eacf 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java @@ -206,7 +206,7 @@ public static Set getDefaultHivePageSourceFactories(HdfsE .add(new RegexPageSourceFactory(fileSystemFactory, stats, hiveConfig)) .add(new SimpleTextFilePageSourceFactory(fileSystemFactory, stats, hiveConfig)) .add(new SimpleSequenceFilePageSourceFactory(fileSystemFactory, stats, hiveConfig)) - .add(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, fileSystemFactory, stats, hiveConfig)) + .add(new RcFilePageSourceFactory(fileSystemFactory, stats, hiveConfig)) .add(new OrcPageSourceFactory(new OrcReaderConfig(), fileSystemFactory, stats, hiveConfig)) .add(new ParquetPageSourceFactory(fileSystemFactory, stats, new ParquetReaderConfig(), hiveConfig)) .build(); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index 7414db5793a3..4904e01a6892 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -282,7 +282,7 @@ public void testRcTextPageSource(int rowCount, long fileSizePadding) .withColumns(TEST_COLUMNS) .withRowsCount(rowCount) .withFileSizePadding(fileSizePadding) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); } @Test(dataProvider = "rowCount") @@ -299,7 +299,7 @@ public void testRcTextOptimizedWriter(int rowCount) .withRowsCount(rowCount) .withFileWriterFactory(new RcFileFileWriterFactory(FILE_SYSTEM_FACTORY, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)) .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); } @Test(dataProvider = "rowCount") @@ -316,7 +316,7 @@ public void testRcBinaryPageSource(int rowCount) assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); } @Test(dataProvider = "rowCount") @@ -341,7 +341,7 @@ public void testRcBinaryOptimizedWriter(int rowCount) // generic Hive writer corrupts timestamps .withSkipGenericWriterTest() .withFileWriterFactory(new RcFileFileWriterFactory(FILE_SYSTEM_FACTORY, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())) + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())) .withColumns(testColumnsNoTimestamps) .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); } @@ -571,13 +571,13 @@ public void testTruncateVarcharColumn() assertThatFileFormat(RCTEXT) .withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())) + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())) .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); assertThatFileFormat(RCBINARY) .withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())) + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())) .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); assertThatFileFormat(ORC) @@ -804,7 +804,7 @@ public void testRCTextProjectedColumnsPageSource(int rowCount) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withRowsCount(rowCount) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); } @Test(dataProvider = "rowCount") @@ -837,7 +837,7 @@ public void testRCBinaryProjectedColumns(int rowCount) // generic Hive writer corrupts timestamps .withSkipGenericWriterTest() .withFileWriterFactory(new RcFileFileWriterFactory(FILE_SYSTEM_FACTORY, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); } @Test(dataProvider = "rowCount") @@ -867,7 +867,7 @@ public void testRCBinaryProjectedColumnsPageSource(int rowCount) // generic Hive writer corrupts timestamps .withSkipGenericWriterTest() .withFileWriterFactory(new RcFileFileWriterFactory(FILE_SYSTEM_FACTORY, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)) - .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); + .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig())); } @Test @@ -884,12 +884,12 @@ public void testFailForLongVarcharPartitionColumn() assertThatFileFormat(RCTEXT) .withColumns(columns) - .isFailingForPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()), expectedErrorCode, expectedMessage) + .isFailingForPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()), expectedErrorCode, expectedMessage) .isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage); assertThatFileFormat(RCBINARY) .withColumns(columns) - .isFailingForPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()), expectedErrorCode, expectedMessage) + .isFailingForPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()), expectedErrorCode, expectedMessage) .isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage); assertThatFileFormat(ORC) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java index 49760a125d60..ee450abc9663 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java @@ -55,7 +55,6 @@ import static io.trino.parquet.writer.ParquetSchemaConverter.HIVE_PARQUET_USE_LEGACY_DECIMAL_ENCODING; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static org.joda.time.DateTimeZone.UTC; public final class StandardFileFormats @@ -73,7 +72,7 @@ public HiveStorageFormat getFormat() @Override public Optional getHivePageSourceFactory(HdfsEnvironment hdfsEnvironment) { - return Optional.of(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_FILE_SYSTEM_FACTORY, new FileFormatDataSourceStats(), new HiveConfig().setRcfileTimeZone("UTC"))); + return Optional.of(new RcFilePageSourceFactory(HDFS_FILE_SYSTEM_FACTORY, new FileFormatDataSourceStats(), new HiveConfig().setRcfileTimeZone("UTC"))); } @Override @@ -104,7 +103,7 @@ public HiveStorageFormat getFormat() @Override public Optional getHivePageSourceFactory(HdfsEnvironment hdfsEnvironment) { - return Optional.of(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_FILE_SYSTEM_FACTORY, new FileFormatDataSourceStats(), new HiveConfig().setRcfileTimeZone("UTC"))); + return Optional.of(new RcFilePageSourceFactory(HDFS_FILE_SYSTEM_FACTORY, new FileFormatDataSourceStats(), new HiveConfig().setRcfileTimeZone("UTC"))); } @Override diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java index 606c3c9c1b24..ca77700d3047 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java @@ -16,7 +16,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; import io.trino.jdbc.TrinoArray; +import io.trino.plugin.hive.HiveTimestampPrecision; import io.trino.tempto.assertions.QueryAssert.Row; import io.trino.tempto.fulfillment.table.MutableTablesState; import io.trino.tempto.fulfillment.table.TableDefinition; @@ -28,10 +30,13 @@ import java.math.BigDecimal; import java.sql.JDBCType; +import java.sql.SQLException; +import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.function.Function; @@ -41,10 +46,12 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.airlift.testing.Assertions.assertEqualsIgnoreOrder; +import static io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS; import static io.trino.tempto.assertions.QueryAssert.Row.row; import static io.trino.tempto.assertions.QueryAssert.assertThat; import static io.trino.tempto.context.ThreadLocalTestContextHolder.testContext; import static io.trino.tempto.fulfillment.table.TableHandle.tableHandle; +import static io.trino.tests.product.utils.JdbcDriverUtils.setSessionProperty; import static io.trino.tests.product.utils.QueryExecutors.onHive; import static io.trino.tests.product.utils.QueryExecutors.onTrino; import static java.lang.String.format; @@ -60,6 +67,7 @@ import static java.sql.JDBCType.SMALLINT; import static java.sql.JDBCType.STRUCT; import static java.sql.JDBCType.VARCHAR; +import static java.util.Collections.nCopies; import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; @@ -364,6 +372,132 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { .buildOrThrow(); } + protected void doTestHiveCoercionWithDifferentTimestampPrecision(HiveTableDefinition tableDefinition) + { + String tableName = mutableTableInstanceOf(tableDefinition).getNameInDatabase(); + + // Insert all the data with nanoseconds precision + setHiveTimestampPrecision(NANOSECONDS); + onTrino().executeQuery( + """ + INSERT INTO %s + SELECT + (CAST(ROW (timestamp_value, -1) AS ROW(keep TIMESTAMP(9), si2i SMALLINT))), + ARRAY [CAST(ROW (timestamp_value, -1) AS ROW (keep TIMESTAMP(9), si2i SMALLINT))], + MAP (ARRAY [2], ARRAY [CAST(ROW (timestamp_value, -1) AS ROW (keep TIMESTAMP(9), si2i SMALLINT))]), + 1 + FROM (VALUES + (TIMESTAMP '2121-07-15 15:30:12.123499'), + (TIMESTAMP '2121-07-15 15:30:12.123500'), + (TIMESTAMP '2121-07-15 15:30:12.123501'), + (TIMESTAMP '2121-07-15 15:30:12.123499999'), + (TIMESTAMP '2121-07-15 15:30:12.123500000'), + (TIMESTAMP '2121-07-15 15:30:12.123500001')) AS t (timestamp_value) + """.formatted(tableName)); + + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_row_to_row timestamp_row_to_row struct", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_list_to_list timestamp_list_to_list array>", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_map_to_map timestamp_map_to_map map>", tableName)); + + for (HiveTimestampPrecision hiveTimestampPrecision : HiveTimestampPrecision.values()) { + setHiveTimestampPrecision(hiveTimestampPrecision); + assertThat(onTrino().executeQuery("SHOW COLUMNS FROM " + tableName).project(1, 2)).containsExactlyInOrder( + row("timestamp_row_to_row", "row(keep timestamp(%d), si2i integer)".formatted(hiveTimestampPrecision.getPrecision())), + row("timestamp_list_to_list", "array(row(keep timestamp(%d), si2i integer))".formatted(hiveTimestampPrecision.getPrecision())), + row("timestamp_map_to_map", "map(integer, row(keep timestamp(%d), si2i integer))".formatted(hiveTimestampPrecision.getPrecision())), + row("id", "bigint")); + + List allColumns = ImmutableList.of( + "timestamp_row_to_row", + "timestamp_list_to_list", + "timestamp_map_to_map", + "id"); + + // For Trino, remove unsupported columns + List trinoReadColumns = removeUnsupportedColumnsForTrino(allColumns, tableName); + Map> expectedTinoResults = Maps.filterKeys( + expectedRowsForEngineProvider(Engine.TRINO, hiveTimestampPrecision), + trinoReadColumns::contains); + + String trinoReadQuery = format("SELECT %s FROM %s", String.join(", ", trinoReadColumns), tableName); + assertQueryResults(Engine.TRINO, trinoReadQuery, expectedTinoResults, trinoReadColumns, 6, tableName); + + List hiveReadColumns = removeUnsupportedColumnsForHive(allColumns, tableName); + Map> expectedHiveResults = Maps.filterKeys( + expectedRowsForEngineProvider(Engine.HIVE, hiveTimestampPrecision), + hiveReadColumns::contains); + + String hiveSelectQuery = format("SELECT %s FROM %s", String.join(", ", hiveReadColumns), tableName); + assertQueryResults(Engine.HIVE, hiveSelectQuery, expectedHiveResults, hiveReadColumns, 6, tableName); + } + } + + protected Map> expectedRowsForEngineProvider(Engine engine, HiveTimestampPrecision timestampPrecision) + { + if (engine == Engine.HIVE) { + List baseData = ImmutableList.of( + "{\"keep\":\"2121-07-15 15:30:12.123499\",\"si2i\":-1}", + "{\"keep\":\"2121-07-15 15:30:12.1235\",\"si2i\":-1}", + "{\"keep\":\"2121-07-15 15:30:12.123501\",\"si2i\":-1}", + "{\"keep\":\"2121-07-15 15:30:12.123499999\",\"si2i\":-1}", + "{\"keep\":\"2121-07-15 15:30:12.1235\",\"si2i\":-1}", + "{\"keep\":\"2121-07-15 15:30:12.123500001\",\"si2i\":-1}"); + return ImmutableMap.>builder() + .put("timestamp_row_to_row", baseData) + .put("timestamp_list_to_list", baseData.stream() + .map(ImmutableList::of) + .map(Objects::toString) + .collect(toImmutableList())) + .put("timestamp_map_to_map", baseData.stream() + .map("{2:%s}"::formatted) + .collect(toImmutableList())) + .put("id", nCopies(6, 1)) + .buildOrThrow(); + } + + List timestampValue = switch (timestampPrecision) { + case MILLISECONDS -> ImmutableList.of( + Timestamp.valueOf("2121-07-15 15:30:12.123"), + Timestamp.valueOf("2121-07-15 15:30:12.124"), + Timestamp.valueOf("2121-07-15 15:30:12.124"), + Timestamp.valueOf("2121-07-15 15:30:12.123"), + Timestamp.valueOf("2121-07-15 15:30:12.124"), + Timestamp.valueOf("2121-07-15 15:30:12.124")); + case MICROSECONDS -> ImmutableList.of( + Timestamp.valueOf("2121-07-15 15:30:12.123499"), + Timestamp.valueOf("2121-07-15 15:30:12.1235"), + Timestamp.valueOf("2121-07-15 15:30:12.123501"), + Timestamp.valueOf("2121-07-15 15:30:12.1235"), + Timestamp.valueOf("2121-07-15 15:30:12.1235"), + Timestamp.valueOf("2121-07-15 15:30:12.1235")); + case NANOSECONDS -> ImmutableList.of( + Timestamp.valueOf("2121-07-15 15:30:12.123499"), + Timestamp.valueOf("2121-07-15 15:30:12.1235"), + Timestamp.valueOf("2121-07-15 15:30:12.123501"), + Timestamp.valueOf("2121-07-15 15:30:12.123499999"), + Timestamp.valueOf("2121-07-15 15:30:12.1235"), + Timestamp.valueOf("2121-07-15 15:30:12.123500001")); + }; + + List baseData = timestampValue.stream() + .map(timestamp -> rowBuilder() + .addField("keep", timestamp) + .addField("si2i", -1) + .build()) + .collect(toImmutableList()); + + return ImmutableMap.>builder() + .put("timestamp_row_to_row", baseData) + .put("timestamp_list_to_list", baseData.stream() + .map(ImmutableList::of) + .collect(toImmutableList())) + .put("timestamp_map_to_map", baseData.stream() + .map(entry -> ImmutableMap.of(2, entry)) + .collect(toImmutableList())) + .put("id", nCopies(6, 1)) + .buildOrThrow(); + } + protected List removeUnsupportedColumnsForHive(List columns, String tableName) { // TODO: assert exceptions being thrown for each column @@ -460,10 +594,16 @@ protected Map expectedExceptionsWithHiveContext() .put(columnContext("1.2", "orc", "map_to_map"), "Unknown encoding kind: DIRECT_V2") // Parquet .put(columnContext("1.2", "parquet", "list_to_list"), "java.lang.UnsupportedOperationException: Cannot inspect java.util.ArrayList") + .put(columnContext("1.2", "parquet", "timestamp_row_to_row"), "Timestamp value coerced to a different value due to zone difference in HiveServer") + .put(columnContext("1.2", "parquet", "timestamp_list_to_list"), "java.lang.UnsupportedOperationException: Cannot inspect java.util.ArrayList") + .put(columnContext("1.2", "parquet", "timestamp_map_to_map"), "java.lang.UnsupportedOperationException: Cannot inspect java.util.ArrayList") // Rcbinary .put(columnContext("1.2", "rcbinary", "row_to_row"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct") .put(columnContext("1.2", "rcbinary", "list_to_list"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray") .put(columnContext("1.2", "rcbinary", "map_to_map"), "java.util.HashMap cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap") + .put(columnContext("1.2", "rcbinary", "timestamp_row_to_row"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct") + .put(columnContext("1.2", "rcbinary", "timestamp_list_to_list"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray") + .put(columnContext("1.2", "rcbinary", "timestamp_map_to_map"), "java.util.HashMap cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap") // // 2.1 // Parquet @@ -490,6 +630,9 @@ protected Map expectedExceptionsWithHiveContext() .put(columnContext("3.1", "rcbinary", "row_to_row"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct") .put(columnContext("3.1", "rcbinary", "list_to_list"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray") .put(columnContext("3.1", "rcbinary", "map_to_map"), "java.util.LinkedHashMap cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap") + .put(columnContext("3.1", "rcbinary", "timestamp_row_to_row"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct") + .put(columnContext("3.1", "rcbinary", "timestamp_list_to_list"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray") + .put(columnContext("3.1", "rcbinary", "timestamp_map_to_map"), "java.util.LinkedHashMap cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap") .buildOrThrow(); } @@ -527,7 +670,7 @@ private void assertQueryResults( for (int sqlIndex = 1; sqlIndex <= columns.size(); sqlIndex++) { String column = columns.get(sqlIndex - 1); - if (column.equals("row_to_row") || column.equals("map_to_map")) { + if (column.contains("row_to_row") || column.contains("map_to_map")) { assertEqualsIgnoreOrder( actual.column(sqlIndex), column(expectedRows, sqlIndex), @@ -535,7 +678,7 @@ private void assertQueryResults( continue; } - if (column.equals("list_to_list")) { + if (column.contains("list_to_list")) { assertEqualsIgnoreOrder( engine == Engine.TRINO ? extract(actual.column(sqlIndex)) : actual.column(sqlIndex), column(expectedRows, sqlIndex), @@ -636,6 +779,10 @@ private void assertColumnTypes( .put("timestamp_to_string", VARCHAR) .put("timestamp_to_bounded_varchar", VARCHAR) .put("timestamp_to_smaller_varchar", VARCHAR) + .put("timestamp_to_varchar", VARCHAR) + .put("timestamp_row_to_row", engine == Engine.TRINO ? JAVA_OBJECT : STRUCT) // row + .put("timestamp_list_to_list", ARRAY) // list + .put("timestamp_map_to_map", JAVA_OBJECT) // map .buildOrThrow(); assertThat(queryResult) @@ -763,4 +910,14 @@ private static QueryResult execute(Engine engine, String sql, QueryExecutor.Quer { return engine.queryExecutor().executeQuery(sql, params); } + + private static void setHiveTimestampPrecision(HiveTimestampPrecision hiveTimestampPrecision) + { + try { + setSessionProperty(onTrino().getConnection(), "hive.timestamp_precision", hiveTimestampPrecision.name()); + } + catch (SQLException e) { + throw new RuntimeException(e); + } + } } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java index 2004b547219d..0f1a1fc8b3fd 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java @@ -24,6 +24,7 @@ import java.util.Optional; +import static io.trino.tempto.Requirements.compose; import static io.trino.tempto.assertions.QueryAssert.Row.row; import static io.trino.tempto.assertions.QueryAssert.assertThat; import static io.trino.tempto.fulfillment.table.MutableTableRequirement.State.CREATED; @@ -43,10 +44,18 @@ public class TestHiveCoercionOnPartitionedTable .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_TEXTFILE = tableDefinitionForTimestampCoercionBuilder("TEXTFILE", Optional.empty(), Optional.of("DELIMITED FIELDS TERMINATED BY '|'")) + .setNoData() + .build(); + public static final HiveTableDefinition HIVE_COERCION_PARQUET = tableDefinitionBuilder("PARQUET", Optional.empty(), Optional.empty()) .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_PARQUET = tableDefinitionForTimestampCoercionBuilder("PARQUET", Optional.empty(), Optional.empty()) + .setNoData() + .build(); + public static final HiveTableDefinition HIVE_COERCION_AVRO = avroTableDefinitionBuilder() .setNoData() .build(); @@ -55,14 +64,26 @@ public class TestHiveCoercionOnPartitionedTable .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_ORC = tableDefinitionForTimestampCoercionBuilder("ORC", Optional.empty(), Optional.empty()) + .setNoData() + .build(); + public static final HiveTableDefinition HIVE_COERCION_RCTEXT = tableDefinitionBuilder("RCFILE", Optional.of("RCTEXT"), Optional.of("SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'")) .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_RCTEXT = tableDefinitionForTimestampCoercionBuilder("RCFILE", Optional.of("RCTEXT"), Optional.of("SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'")) + .setNoData() + .build(); + public static final HiveTableDefinition HIVE_COERCION_RCBINARY = tableDefinitionBuilder("RCFILE", Optional.of("RCBINARY"), Optional.of("SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'")) .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_RCBINARY = tableDefinitionForTimestampCoercionBuilder("RCFILE", Optional.of("RCBINARY"), Optional.of("SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'")) + .setNoData() + .build(); + private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBuilder(String fileFormat, Optional recommendTableName, Optional rowFormat) { String tableName = format("%s_hive_coercion", recommendTableName.orElse(fileFormat).toLowerCase(ENGLISH)); @@ -108,6 +129,21 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui "STORED AS " + fileFormat); } + private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionForTimestampCoercionBuilder(String fileFormat, Optional tablePrefix, Optional rowFormat) + { + String tableName = format("%s_hive_timestamp_coercion", tablePrefix.orElse(fileFormat).toLowerCase(ENGLISH)); + return HiveTableDefinition.builder(tableName) + .setCreateTableDDLTemplate("" + + "CREATE TABLE %NAME%(" + + " timestamp_row_to_row STRUCT, " + + " timestamp_list_to_list ARRAY>, " + + " timestamp_map_to_map MAP>" + + ") " + + "PARTITIONED BY (id BIGINT) " + + rowFormat.map(s -> format("ROW FORMAT %s ", s)).orElse("") + + "STORED AS " + fileFormat); + } + private static HiveTableDefinition.HiveTableDefinitionBuilder avroTableDefinitionBuilder() { return HiveTableDefinition.builder("avro_hive_coercion") @@ -126,7 +162,9 @@ public static final class TextRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_TEXTFILE).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_TEXTFILE).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_TEXTFILE).withState(CREATED).build()); } } @@ -136,7 +174,9 @@ public static final class OrcRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_ORC).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_ORC).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_ORC).withState(CREATED).build()); } } @@ -146,7 +186,9 @@ public static final class RcTextRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_RCTEXT).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_RCTEXT).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_RCTEXT).withState(CREATED).build()); } } @@ -156,7 +198,9 @@ public static final class RcBinaryRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_RCBINARY).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_RCBINARY).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_RCBINARY).withState(CREATED).build()); } } @@ -166,7 +210,9 @@ public static final class ParquetRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_PARQUET).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_PARQUET).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_PARQUET).withState(CREATED).build()); } } @@ -187,6 +233,13 @@ public void testHiveCoercionTextFile() doTestHiveCoercion(HIVE_COERCION_TEXTFILE); } + @Requires(TextRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveCoercionWithDifferentTimestampPrecisionTextFile() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_TEXTFILE); + } + @Requires(OrcRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionOrc() @@ -194,6 +247,13 @@ public void testHiveCoercionOrc() doTestHiveCoercion(HIVE_COERCION_ORC); } + @Requires(OrcRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveCoercionWithDifferentTimestampPrecisionOrc() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_ORC); + } + @Requires(RcTextRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionRcText() @@ -201,6 +261,13 @@ public void testHiveCoercionRcText() doTestHiveCoercion(HIVE_COERCION_RCTEXT); } + @Requires(RcTextRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveCoercionWithDifferentTimestampPrecisionRcText() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_RCTEXT); + } + @Requires(RcBinaryRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionRcBinary() @@ -208,6 +275,13 @@ public void testHiveCoercionRcBinary() doTestHiveCoercion(HIVE_COERCION_RCBINARY); } + @Requires(RcBinaryRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveCoercionWithDifferentTimestampPrecisionRcBinary() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_RCBINARY); + } + @Requires(ParquetRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionParquet() @@ -215,6 +289,13 @@ public void testHiveCoercionParquet() doTestHiveCoercion(HIVE_COERCION_PARQUET); } + @Requires(ParquetRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveCoercionWithDifferentTimestampPrecisionParquet() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_PARQUET); + } + @Requires(AvroRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionAvro() diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java index a430a1fd2ff6..68adf1a33e60 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java @@ -24,6 +24,7 @@ import java.util.Map; +import static io.trino.tempto.Requirements.compose; import static io.trino.tempto.fulfillment.table.MutableTableRequirement.State.CREATED; import static io.trino.tests.product.TestGroups.HIVE_COERCION; import static io.trino.tests.product.TestGroups.JDBC; @@ -37,6 +38,10 @@ public class TestHiveCoercionOnUnpartitionedTable .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_ORC = tableDefinitionForTimestampCoercionBuilder("ORC") + .setNoData() + .build(); + private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBuilder(String fileFormat) { String tableName = format("%s_hive_coercion_unpartitioned", fileFormat.toLowerCase(ENGLISH)); @@ -79,13 +84,28 @@ char_to_smaller_char CHAR(3), STORED AS\s""" + fileFormat); } + private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionForTimestampCoercionBuilder(String fileFormat) + { + String tableName = format("%s_hive_timestamp_coercion_unpartitioned", fileFormat.toLowerCase(ENGLISH)); + return HiveTableDefinition.builder(tableName) + .setCreateTableDDLTemplate(""" + CREATE TABLE %NAME%( + timestamp_row_to_row STRUCT, + timestamp_list_to_list ARRAY>, + timestamp_map_to_map MAP>, + id BIGINT) + STORED AS\s""" + fileFormat); + } + public static final class OrcRequirements implements RequirementsProvider { @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_ORC).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_ORC).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_ORC).withState(CREATED).build()); } } @@ -96,6 +116,13 @@ public void testHiveCoercionOrc() doTestHiveCoercion(HIVE_COERCION_ORC); } + @Requires(OrcRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveCoercionWithDifferentTimestampPrecision() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_ORC); + } + @Override protected Map expectedExceptionsWithTrinoContext() {