diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index 1bb6a337f349..384aad066581 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -557,6 +557,10 @@ private ConnectorTableMetadata doGetTableMetadata(ConnectorSession session, Sche Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); + if (isIcebergTable(table) || isDeltaLakeTable(table)) { + throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Not a Hive table '%s'", tableName)); + } + if (!translateHiveViews && isHiveOrPrestoView(table)) { throw new TableNotFoundException(tableName); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java index e7ee90cb6e44..8d0ebda51797 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java @@ -43,7 +43,11 @@ import static com.google.common.base.MoreObjects.firstNonNull; import static com.google.common.base.Strings.nullToEmpty; import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA; +import static io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT; +import static io.trino.plugin.hive.HiveType.HIVE_INT; import static io.trino.plugin.hive.metastore.util.Memoizers.memoizeLast; +import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable; +import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; @@ -68,31 +72,41 @@ public static Database convertDatabase(com.amazonaws.services.glue.model.Databas public static Table convertTable(com.amazonaws.services.glue.model.Table glueTable, String dbName) { - // TODO (https://github.com/trinodb/trino/issues/10902) glueTable.getStorageDescriptor() is an optional field in Glue - requireNonNull(glueTable.getStorageDescriptor(), () -> format("Table StorageDescriptor is null for table %s.%s (%s)", dbName, glueTable.getName(), glueTable)); - Map tableParameters = convertParameters(glueTable.getParameters()); - StorageDescriptor sd = glueTable.getStorageDescriptor(); - Table.Builder tableBuilder = Table.builder() .setDatabaseName(dbName) .setTableName(glueTable.getName()) .setOwner(Optional.ofNullable(glueTable.getOwner())) // Athena treats missing table type as EXTERNAL_TABLE. .setTableType(firstNonNull(glueTable.getTableType(), EXTERNAL_TABLE.name())) - .setDataColumns(convertColumns(sd.getColumns(), sd.getSerdeInfo().getSerializationLibrary())) .setParameters(tableParameters) .setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText())) .setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText())); - if (glueTable.getPartitionKeys() != null) { - tableBuilder.setPartitionColumns(convertColumns(glueTable.getPartitionKeys(), sd.getSerdeInfo().getSerializationLibrary())); + StorageDescriptor sd = glueTable.getStorageDescriptor(); + if (sd == null) { + if (isIcebergTable(tableParameters) || isDeltaLakeTable(tableParameters)) { + // Iceberg and Delta Lake tables do not use the StorageDescriptor field, but we need to return a Table so the caller can check that + // the table is an Iceberg/Delta table and decide whether to redirect or fail. + tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty()))); + tableBuilder.getStorageBuilder().setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.PARQUET)); + } + else { + throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Table StorageDescriptor is null for table %s.%s (%s)", dbName, glueTable.getName(), glueTable)); + } } else { - tableBuilder.setPartitionColumns(ImmutableList.of()); + tableBuilder.setDataColumns(convertColumns(sd.getColumns(), sd.getSerdeInfo().getSerializationLibrary())); + if (glueTable.getPartitionKeys() != null) { + tableBuilder.setPartitionColumns(convertColumns(glueTable.getPartitionKeys(), sd.getSerdeInfo().getSerializationLibrary())); + } + else { + tableBuilder.setPartitionColumns(ImmutableList.of()); + } + // No benefit to memoizing here, just reusing the implementation + new StorageConverter().setStorageBuilder(sd, tableBuilder.getStorageBuilder(), tableParameters); } - // No benefit to memoizing here, just reusing the implementation - new StorageConverter().setStorageBuilder(sd, tableBuilder.getStorageBuilder(), tableParameters); + return tableBuilder.build(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java index 5025214fa718..31e5c59972c1 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java @@ -90,6 +90,7 @@ import java.lang.reflect.Method; import java.math.BigDecimal; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; import java.util.Properties; @@ -1128,13 +1129,23 @@ public static boolean isHiveSystemSchema(String schemaName) public static boolean isDeltaLakeTable(Table table) { - return table.getParameters().containsKey(SPARK_TABLE_PROVIDER_KEY) - && table.getParameters().get(SPARK_TABLE_PROVIDER_KEY).toLowerCase(ENGLISH).equals(DELTA_LAKE_PROVIDER); + return isDeltaLakeTable(table.getParameters()); + } + + public static boolean isDeltaLakeTable(Map tableParameters) + { + return tableParameters.containsKey(SPARK_TABLE_PROVIDER_KEY) + && tableParameters.get(SPARK_TABLE_PROVIDER_KEY).toLowerCase(ENGLISH).equals(DELTA_LAKE_PROVIDER); } public static boolean isIcebergTable(Table table) { - return ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(table.getParameters().get(ICEBERG_TABLE_TYPE_NAME)); + return isIcebergTable(table.getParameters()); + } + + public static boolean isIcebergTable(Map tableParameters) + { + return ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(tableParameters.get(ICEBERG_TABLE_TYPE_NAME)); } public static boolean isSparkBucketedTable(Table table) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java index cc32eb3f3c2e..5b4344b4f357 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java @@ -15,8 +15,10 @@ import com.amazonaws.services.glue.AWSGlueAsync; import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; +import com.amazonaws.services.glue.model.CreateTableRequest; import com.amazonaws.services.glue.model.Database; import com.amazonaws.services.glue.model.DeleteDatabaseRequest; +import com.amazonaws.services.glue.model.DeleteTableRequest; import com.amazonaws.services.glue.model.EntityNotFoundException; import com.amazonaws.services.glue.model.GetDatabasesRequest; import com.amazonaws.services.glue.model.GetDatabasesResult; @@ -89,6 +91,12 @@ import static io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults; import static io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.DECIMAL_TYPE; import static io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.decimalOf; +import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER; +import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME; +import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE; +import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY; +import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable; +import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; import static io.trino.spi.connector.RetryMode.NO_RETRIES; import static io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE; import static io.trino.spi.statistics.ColumnStatisticType.MIN_VALUE; @@ -102,6 +110,7 @@ import static java.util.UUID.randomUUID; import static java.util.concurrent.TimeUnit.DAYS; import static org.apache.hadoop.hive.common.FileUtils.makePartName; +import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.testng.Assert.assertEquals; @@ -1088,6 +1097,49 @@ public void testInvalidColumnStatisticsMetadata() } } + @Test + public void testTableWithoutStorageDescriptor() + { + // StorageDescriptor is an Optional field for Glue tables. Iceberg and Delta Lake tables may not have it set. + SchemaTableName table = temporaryTable("test_missing_storage_descriptor"); + DeleteTableRequest deleteTableRequest = new DeleteTableRequest() + .withDatabaseName(table.getSchemaName()) + .withName(table.getTableName()); + try { + TableInput tableInput = new TableInput() + .withName(table.getTableName()) + .withTableType(EXTERNAL_TABLE.name()); + glueClient.createTable(new CreateTableRequest() + .withDatabaseName(database) + .withTableInput(tableInput)); + + assertThatThrownBy(() -> metastore.getTable(table.getSchemaName(), table.getTableName())) + .hasMessageStartingWith("Table StorageDescriptor is null for table"); + glueClient.deleteTable(deleteTableRequest); + + // Iceberg table + tableInput = tableInput.withParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE)); + glueClient.createTable(new CreateTableRequest() + .withDatabaseName(database) + .withTableInput(tableInput)); + assertTrue(isIcebergTable(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow())); + glueClient.deleteTable(deleteTableRequest); + + // Delta Lake table + tableInput = tableInput.withParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER)); + glueClient.createTable(new CreateTableRequest() + .withDatabaseName(database) + .withTableInput(tableInput)); + assertTrue(isDeltaLakeTable(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow())); + } + finally { + // Table cannot be dropped through HiveMetastore since a TableHandle cannot be created + glueClient.deleteTable(new DeleteTableRequest() + .withDatabaseName(table.getSchemaName()) + .withName(table.getTableName())); + } + } + private Block singleValueBlock(long value) { return BigintType.BIGINT.createBlockBuilder(null, 1).writeLong(value).build();