Skip to content

Commit

Permalink
Handle missing StorageDescriptor in Hive Glue tables
Browse files Browse the repository at this point in the history
  • Loading branch information
alexjo2144 authored and findepi committed Feb 24, 2022
1 parent 4a44c7c commit 9d8651e
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,10 @@ private ConnectorTableMetadata doGetTableMetadata(ConnectorSession session, Sche
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName())
.orElseThrow(() -> new TableNotFoundException(tableName));

if (isIcebergTable(table) || isDeltaLakeTable(table)) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Not a Hive table '%s'", tableName));
}

if (!translateHiveViews && isHiveOrPrestoView(table)) {
throw new TableNotFoundException(tableName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@
import static com.google.common.base.MoreObjects.firstNonNull;
import static com.google.common.base.Strings.nullToEmpty;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT;
import static io.trino.plugin.hive.HiveType.HIVE_INT;
import static io.trino.plugin.hive.metastore.util.Memoizers.memoizeLast;
import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable;
import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
Expand All @@ -68,31 +72,41 @@ public static Database convertDatabase(com.amazonaws.services.glue.model.Databas

public static Table convertTable(com.amazonaws.services.glue.model.Table glueTable, String dbName)
{
// TODO (https://github.com/trinodb/trino/issues/10902) glueTable.getStorageDescriptor() is an optional field in Glue
requireNonNull(glueTable.getStorageDescriptor(), () -> format("Table StorageDescriptor is null for table %s.%s (%s)", dbName, glueTable.getName(), glueTable));

Map<String, String> tableParameters = convertParameters(glueTable.getParameters());
StorageDescriptor sd = glueTable.getStorageDescriptor();

Table.Builder tableBuilder = Table.builder()
.setDatabaseName(dbName)
.setTableName(glueTable.getName())
.setOwner(Optional.ofNullable(glueTable.getOwner()))
// Athena treats missing table type as EXTERNAL_TABLE.
.setTableType(firstNonNull(glueTable.getTableType(), EXTERNAL_TABLE.name()))
.setDataColumns(convertColumns(sd.getColumns(), sd.getSerdeInfo().getSerializationLibrary()))
.setParameters(tableParameters)
.setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText()))
.setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText()));

if (glueTable.getPartitionKeys() != null) {
tableBuilder.setPartitionColumns(convertColumns(glueTable.getPartitionKeys(), sd.getSerdeInfo().getSerializationLibrary()));
StorageDescriptor sd = glueTable.getStorageDescriptor();
if (sd == null) {
if (isIcebergTable(tableParameters) || isDeltaLakeTable(tableParameters)) {
// Iceberg and Delta Lake tables do not use the StorageDescriptor field, but we need to return a Table so the caller can check that
// the table is an Iceberg/Delta table and decide whether to redirect or fail.
tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty())));
tableBuilder.getStorageBuilder().setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.PARQUET));
}
else {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Table StorageDescriptor is null for table %s.%s (%s)", dbName, glueTable.getName(), glueTable));
}
}
else {
tableBuilder.setPartitionColumns(ImmutableList.of());
tableBuilder.setDataColumns(convertColumns(sd.getColumns(), sd.getSerdeInfo().getSerializationLibrary()));
if (glueTable.getPartitionKeys() != null) {
tableBuilder.setPartitionColumns(convertColumns(glueTable.getPartitionKeys(), sd.getSerdeInfo().getSerializationLibrary()));
}
else {
tableBuilder.setPartitionColumns(ImmutableList.of());
}
// No benefit to memoizing here, just reusing the implementation
new StorageConverter().setStorageBuilder(sd, tableBuilder.getStorageBuilder(), tableParameters);
}
// No benefit to memoizing here, just reusing the implementation
new StorageConverter().setStorageBuilder(sd, tableBuilder.getStorageBuilder(), tableParameters);

return tableBuilder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.Properties;
Expand Down Expand Up @@ -1128,13 +1129,23 @@ public static boolean isHiveSystemSchema(String schemaName)

public static boolean isDeltaLakeTable(Table table)
{
return table.getParameters().containsKey(SPARK_TABLE_PROVIDER_KEY)
&& table.getParameters().get(SPARK_TABLE_PROVIDER_KEY).toLowerCase(ENGLISH).equals(DELTA_LAKE_PROVIDER);
return isDeltaLakeTable(table.getParameters());
}

public static boolean isDeltaLakeTable(Map<String, String> tableParameters)
{
return tableParameters.containsKey(SPARK_TABLE_PROVIDER_KEY)
&& tableParameters.get(SPARK_TABLE_PROVIDER_KEY).toLowerCase(ENGLISH).equals(DELTA_LAKE_PROVIDER);
}

public static boolean isIcebergTable(Table table)
{
return ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(table.getParameters().get(ICEBERG_TABLE_TYPE_NAME));
return isIcebergTable(table.getParameters());
}

public static boolean isIcebergTable(Map<String, String> tableParameters)
{
return ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(tableParameters.get(ICEBERG_TABLE_TYPE_NAME));
}

public static boolean isSparkBucketedTable(Table table)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@

import com.amazonaws.services.glue.AWSGlueAsync;
import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder;
import com.amazonaws.services.glue.model.CreateTableRequest;
import com.amazonaws.services.glue.model.Database;
import com.amazonaws.services.glue.model.DeleteDatabaseRequest;
import com.amazonaws.services.glue.model.DeleteTableRequest;
import com.amazonaws.services.glue.model.EntityNotFoundException;
import com.amazonaws.services.glue.model.GetDatabasesRequest;
import com.amazonaws.services.glue.model.GetDatabasesResult;
Expand Down Expand Up @@ -89,6 +91,12 @@
import static io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults;
import static io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.DECIMAL_TYPE;
import static io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.decimalOf;
import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER;
import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME;
import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE;
import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY;
import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable;
import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable;
import static io.trino.spi.connector.RetryMode.NO_RETRIES;
import static io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE;
import static io.trino.spi.statistics.ColumnStatisticType.MIN_VALUE;
Expand All @@ -102,6 +110,7 @@
import static java.util.UUID.randomUUID;
import static java.util.concurrent.TimeUnit.DAYS;
import static org.apache.hadoop.hive.common.FileUtils.makePartName;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.testng.Assert.assertEquals;
Expand Down Expand Up @@ -1088,6 +1097,49 @@ public void testInvalidColumnStatisticsMetadata()
}
}

@Test
public void testTableWithoutStorageDescriptor()
{
// StorageDescriptor is an Optional field for Glue tables. Iceberg and Delta Lake tables may not have it set.
SchemaTableName table = temporaryTable("test_missing_storage_descriptor");
DeleteTableRequest deleteTableRequest = new DeleteTableRequest()
.withDatabaseName(table.getSchemaName())
.withName(table.getTableName());
try {
TableInput tableInput = new TableInput()
.withName(table.getTableName())
.withTableType(EXTERNAL_TABLE.name());
glueClient.createTable(new CreateTableRequest()
.withDatabaseName(database)
.withTableInput(tableInput));

assertThatThrownBy(() -> metastore.getTable(table.getSchemaName(), table.getTableName()))
.hasMessageStartingWith("Table StorageDescriptor is null for table");
glueClient.deleteTable(deleteTableRequest);

// Iceberg table
tableInput = tableInput.withParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE));
glueClient.createTable(new CreateTableRequest()
.withDatabaseName(database)
.withTableInput(tableInput));
assertTrue(isIcebergTable(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow()));
glueClient.deleteTable(deleteTableRequest);

// Delta Lake table
tableInput = tableInput.withParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER));
glueClient.createTable(new CreateTableRequest()
.withDatabaseName(database)
.withTableInput(tableInput));
assertTrue(isDeltaLakeTable(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow()));
}
finally {
// Table cannot be dropped through HiveMetastore since a TableHandle cannot be created
glueClient.deleteTable(new DeleteTableRequest()
.withDatabaseName(table.getSchemaName())
.withName(table.getTableName()));
}
}

private Block singleValueBlock(long value)
{
return BigintType.BIGINT.createBlockBuilder(null, 1).writeLong(value).build();
Expand Down

0 comments on commit 9d8651e

Please sign in to comment.