Skip to content

Commit

Permalink
Respect Thrift delete-files-on-drop for dropping partitions
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Oct 12, 2022
1 parent 3198397 commit a87fc63
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 1 deletion.
2 changes: 1 addition & 1 deletion docs/src/main/sphinx/connector/hive.rst
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ Property Name Description
* - ``hive.metastore.client.keytab``
- Hive metastore client keytab location.
* - ``hive.metastore.thrift.delete-files-on-drop``
- Actively delete the files for drop table operations, for cases when the
- Actively delete the files for drop table or partition operations, for cases when the
metastore does not delete the files. Default is ``false``.

.. _hive-glue-metastore:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,59 @@ public void testRecreateTable()
}
}

@Test
public void testRecreatePartition()
{
String tableName = "test_recreate_partition_" + randomTableSuffix();
String schemaTableName = "%s.%s".formatted(schemaName, tableName);
String partitionLocation = "%s/%s/part=1".formatted(schemaName, tableName);

assertUpdate("CREATE TABLE " + schemaTableName + "(col int, part int) WITH (partitioned_by = ARRAY['part'])");
try {
// Creating an empty partition generates special empty file on S3 (not MinIO)
assertUpdate("CALL system.create_empty_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName));
assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1, 1)", 1);
assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)");

assertThat(getS3ObjectSummaries(partitionLocation)).hasSize(2); // directory + file

// DELETE with Thrift metastore on S3 (not MinIO) leaves some files
// when 'hive.metastore.thrift.delete-files-on-drop' config property is false.
// Then, the subsequent SELECT doesn't return an empty row
assertUpdate("DELETE FROM " + schemaTableName);
assertThat(getS3ObjectSummaries(partitionLocation)).hasSize(0);

assertUpdate("CALL system.create_empty_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName));
assertQueryReturnsEmptyResult("SELECT * FROM " + schemaTableName);
}
finally {
assertUpdate("DROP TABLE " + schemaTableName);
}
}

@Test
public void testUnregisterPartitionNotRemoveData()
{
// Verify unregister_partition procedure doesn't remove physical data even when 'hive.metastore.thrift.delete-files-on-drop' config property is true
String tableName = "test_recreate_partition_" + randomTableSuffix();
String schemaTableName = "%s.%s".formatted(schemaName, tableName);

assertUpdate("CREATE TABLE " + schemaTableName + "(col int, part int) WITH (partitioned_by = ARRAY['part'])");
try {
assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1, 1)", 1);
assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)");

assertUpdate("CALL system.unregister_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName));
assertQueryReturnsEmptyResult("SELECT * FROM " + schemaTableName);

assertUpdate("CALL system.register_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName));
assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)");
}
finally {
assertUpdate("DROP TABLE " + schemaTableName);
}
}

private List<S3ObjectSummary> getS3ObjectSummaries(String prefix)
{
return s3Client.listObjectsV2(writableBucket, prefix).getObjectSummaries();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,12 @@ public void dropPartition(String databaseName, String tableName, List<String> pa
.stopOnIllegalExceptions()
.run("dropPartition", stats.getDropPartition().wrap(() -> {
try (ThriftMetastoreClient client = createMetastoreClient()) {
Partition partition = client.getPartition(databaseName, tableName, parts);
client.dropPartition(databaseName, tableName, parts, deleteData);
String partitionLocation = partition.getSd().getLocation();
if (deleteFilesOnDrop && deleteData && !isNullOrEmpty(partitionLocation) && isManagedTable(client.getTable(databaseName, tableName))) {
deleteDirRecursive(new Path(partitionLocation));
}
}
return null;
}));
Expand Down

0 comments on commit a87fc63

Please sign in to comment.