Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support DML operations on Delta tables with name column mapping #16183

Merged
merged 2 commits into from
Apr 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ public AbstractDeltaLakePageSink(
case REGULAR:
dataColumnHandles.add(column);
dataColumnsInputIndex.add(inputIndex);
dataColumnNames.add(column.getName());
dataColumnTypes.add(column.getType());
dataColumnNames.add(column.getPhysicalName());
dataColumnTypes.add(column.getPhysicalType());
break;
case SYNTHESIZED:
processSynthesizedColumn(column);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,11 +364,10 @@ private FileWriter createParquetFileWriter(String path, List<DeltaLakeColumnHand
Closeable rollbackAction = () -> fileSystem.deleteFile(path);

List<Type> parquetTypes = dataColumns.stream()
.map(column -> toParquetType(typeOperators, column.getType()))
.map(column -> toParquetType(typeOperators, column.getPhysicalType()))
.collect(toImmutableList());

List<String> dataColumnNames = dataColumns.stream()
.map(DeltaLakeColumnHandle::getName)
.map(DeltaLakeColumnHandle::getPhysicalName)
.collect(toImmutableList());
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(
parquetTypes,
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ public long getRowCount()
public DataFileInfo getDataFileInfo()
throws IOException
{
List<String> dataColumnNames = columnHandles.stream().map(DeltaLakeColumnHandle::getName).collect(toImmutableList());
List<Type> dataColumnTypes = columnHandles.stream().map(DeltaLakeColumnHandle::getType).collect(toImmutableList());
List<String> dataColumnNames = columnHandles.stream().map(DeltaLakeColumnHandle::getPhysicalName).collect(toImmutableList());
List<Type> dataColumnTypes = columnHandles.stream().map(DeltaLakeColumnHandle::getPhysicalType).collect(toImmutableList());
return new DataFileInfo(
relativeFilePath,
getWrittenBytes(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ else if (isValidInRange(minValue)) {
columnStatsBuilder.setDistinctValuesCount(Estimate.of(partitioningColumnsDistinctValues.get(column).size()));
}
if (statistics.isPresent()) {
DeltaLakeColumnStatistics deltaLakeColumnStatistics = statistics.get().getColumnStatistics().get(column.getName());
DeltaLakeColumnStatistics deltaLakeColumnStatistics = statistics.get().getColumnStatistics().get(column.getPhysicalName());
if (deltaLakeColumnStatistics != null && column.getColumnType() != PARTITION_KEY) {
deltaLakeColumnStatistics.getTotalSizeInBytes().ifPresent(size -> columnStatsBuilder.setDataSize(Estimate.of(size)));
columnStatsBuilder.setDistinctValuesCount(Estimate.of(deltaLakeColumnStatistics.getNdvSummary().cardinality()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ private void writeJsonStats(BlockBuilder entryBlockBuilder, RowType entryType, A
private Map<String, Type> getColumnTypeMapping(MetadataEntry deltaMetadata)
{
return extractSchema(deltaMetadata, typeManager).stream()
.collect(toImmutableMap(DeltaLakeColumnMetadata::getName, DeltaLakeColumnMetadata::getType));
.collect(toImmutableMap(DeltaLakeColumnMetadata::getPhysicalName, DeltaLakeColumnMetadata::getPhysicalColumnType));
}

private Optional<String> getStatsString(DeltaLakeJsonFileStatistics parsedStats)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import static io.trino.tempto.assertions.QueryAssert.assertThat;
import static io.trino.testing.TestingNames.randomNameSuffix;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_DATABRICKS;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_EXCLUDE_104;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_EXCLUDE_73;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_EXCLUDE_91;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_OSS;
Expand Down Expand Up @@ -71,7 +72,7 @@ public void testAddColumnWithCommentOnTrino()
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_73, PROFILE_SPECIFIC_TESTS})
@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_73, DELTA_LAKE_EXCLUDE_91, PROFILE_SPECIFIC_TESTS})
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testAddColumnUnsupportedWriterVersion()
{
Expand All @@ -81,14 +82,14 @@ public void testAddColumnUnsupportedWriterVersion()
onDelta().executeQuery(format("" +
"CREATE TABLE default.%s (col int) " +
"USING DELTA LOCATION 's3://%s/%s'" +
"TBLPROPERTIES ('delta.minWriterVersion'='5')",
"TBLPROPERTIES ('delta.minWriterVersion'='6')",
tableName,
bucketName,
tableDirectory));

try {
assertQueryFailure(() -> onTrino().executeQuery("ALTER TABLE delta.default." + tableName + " ADD COLUMN new_col int"))
.hasMessageMatching(".* Table .* requires Delta Lake writer version 5 which is not supported");
.hasMessageMatching(".* Table .* requires Delta Lake writer version 6 which is not supported");
}
finally {
dropDeltaTableWithRetry("default." + tableName);
Expand Down Expand Up @@ -196,14 +197,14 @@ public void testCommentOnTableUnsupportedWriterVersion()
onDelta().executeQuery(format("" +
"CREATE TABLE default.%s (col int) " +
"USING DELTA LOCATION 's3://%s/%s'" +
"TBLPROPERTIES ('delta.minWriterVersion'='5')",
"TBLPROPERTIES ('delta.minWriterVersion'='6')",
tableName,
bucketName,
tableDirectory));

try {
assertQueryFailure(() -> onTrino().executeQuery("COMMENT ON TABLE delta.default." + tableName + " IS 'test comment'"))
.hasMessageMatching(".* Table .* requires Delta Lake writer version 5 which is not supported");
.hasMessageMatching(".* Table .* requires Delta Lake writer version 6 which is not supported");
}
finally {
onTrino().executeQuery("DROP TABLE delta.default." + tableName);
Expand Down Expand Up @@ -242,20 +243,44 @@ public void testCommentOnColumnUnsupportedWriterVersion()
onDelta().executeQuery(format("" +
"CREATE TABLE default.%s (col int) " +
"USING DELTA LOCATION 's3://%s/%s'" +
"TBLPROPERTIES ('delta.minWriterVersion'='5')",
"TBLPROPERTIES ('delta.minWriterVersion'='6')",
tableName,
bucketName,
tableDirectory));

try {
assertQueryFailure(() -> onTrino().executeQuery("COMMENT ON COLUMN delta.default." + tableName + ".col IS 'test column comment'"))
.hasMessageMatching(".* Table .* requires Delta Lake writer version 5 which is not supported");
.hasMessageMatching(".* Table .* requires Delta Lake writer version 6 which is not supported");
}
finally {
onTrino().executeQuery("DROP TABLE delta.default." + tableName);
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_73, DELTA_LAKE_EXCLUDE_91, DELTA_LAKE_EXCLUDE_104, PROFILE_SPECIFIC_TESTS})
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testOptimizeUnsupportedWriterVersion()
{
String tableName = "test_dl_optimize_unsupported_writer_" + randomNameSuffix();
String tableDirectory = "databricks-compatibility-test-" + tableName;

onDelta().executeQuery(format("" +
"CREATE TABLE default.%s (col int) " +
"USING DELTA LOCATION 's3://%s/%s'" +
"TBLPROPERTIES ('delta.minWriterVersion'='6')",
tableName,
bucketName,
tableDirectory));

try {
assertQueryFailure(() -> onTrino().executeQuery("ALTER TABLE delta.default." + tableName + " EXECUTE OPTIMIZE"))
.hasMessageMatching(".* Table .* requires Delta Lake writer version 6 which is not supported");
}
finally {
dropDeltaTableWithRetry(tableName);
}
}

@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testTrinoAlterTablePreservesTableMetadata()
Expand Down
Loading