Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't delete Hive and Iceberg schema locations with nonempty subdirectories #10485

Merged
merged 2 commits into from
Jan 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ public synchronized void dropDatabase(ConnectorSession session, String schemaNam
boolean deleteData = location.map(path -> {
HdfsContext context = new HdfsContext(session);
try (FileSystem fs = hdfsEnvironment.getFileSystem(context, path)) {
return !fs.listFiles(path, false).hasNext();
return !fs.listLocatedStatus(path).hasNext();
}
catch (IOException | RuntimeException e) {
log.warn(e, "Could not check schema directory '%s'", path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ public boolean dropNamespace(ConnectorSession session, String namespace)
boolean deleteData = location.map(path -> {
HdfsContext context = new HdfsContext(session);
try (FileSystem fs = hdfsEnvironment.getFileSystem(context, path)) {
return !fs.listFiles(path, false).hasNext();
return !fs.listLocatedStatus(path).hasNext();
}
catch (IOException e) {
log.warn(e, "Could not check schema directory '%s'", path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,51 +54,68 @@ public void testCreateDropSchema()
}

@Test
public void testDropSchemaWithLocationWithoutExternalFiles()
public void testDropSchemaFiles()
{
String schemaName = "schema_with_empty_location_" + randomTableSuffix();
String schemaDir = warehouseDirectory + "/schema-with-empty-location/";
String schemaName = "schema_without_location_" + randomTableSuffix();
String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName);

onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir));
onTrino().executeQuery(format("CREATE SCHEMA %s", schemaName));
Comment on lines -59 to +62
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These lines aren't actually changed; I just switched the order of this test and the next after renaming them, and they're so similar that git didn't realize.

assertFileExistence(schemaDir, true, "schema directory exists after creating schema");
onTrino().executeQuery("DROP SCHEMA " + schemaName);
assertFileExistence(schemaDir, false, "schema directory exists after dropping schema");
}

@Test
public void testDropSchemaFilesWithoutLocation()
public void testDropSchemaFilesWithLocation()
{
String schemaName = "schema_without_location_" + randomTableSuffix();
String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName);
String schemaName = "schema_with_empty_location_" + randomTableSuffix();
String schemaDir = warehouseDirectory + "/schema-with-empty-location/";

onTrino().executeQuery(format("CREATE SCHEMA %s", schemaName));
onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir));
assertFileExistence(schemaDir, true, "schema directory exists after creating schema");
onTrino().executeQuery("DROP SCHEMA " + schemaName);
assertFileExistence(schemaDir, false, "schema directory exists after dropping schema");
}

@Test
public void testDropSchemaFilesWithLocationWithExternalFile()
@Test // specified location, external file in subdir
public void testDropWithExternalFilesInSubdirectory()
{
String schemaName = "schema_with_nonempty_location_" + randomTableSuffix();
String schemaDir = warehouseDirectory + "/schema-with-nonempty-location/";
// Use subdirectory to make sure file check is recursive
String subDir = schemaDir + "subdir/";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there case with EMPTY subdirectory also covered (that it does not prevent schema from being deleted)?

Copy link
Member Author

@jirassimok jirassimok Jan 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed that behavior so empty directories won't be deleted, and I added a test for that as well.

String externalFile = subDir + "external-file";

// Create file in schema directory before creating schema
String externalFile = schemaDir + "external-file";
hdfsClient.createDirectory(schemaDir);
// Create file below schema directory before creating schema
hdfsClient.createDirectory(subDir);
hdfsClient.saveFile(externalFile, "");

onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir));
assertFileExistence(schemaDir, true, "schema directory exists after creating schema");
assertFileExistence(externalFile, true, "external file exists after creating schema");
onTrino().executeQuery("DROP SCHEMA " + schemaName);
assertFileExistence(schemaDir, true, "schema directory exists after dropping schema");
assertFileExistence(externalFile, true, "external file exists after dropping schema");

hdfsClient.delete(externalFile);
hdfsClient.delete(schemaDir);
}

// Tests create/drop schema transactions with default schema location
@Test
@Test // default location, empty external subdir
public void testDropSchemaFilesWithEmptyExternalSubdir()
{
String schemaName = "schema_with_empty_subdirectory_" + randomTableSuffix();
String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName);
String externalSubdir = schemaDir + "external-subdir/";

hdfsClient.createDirectory(externalSubdir);

onTrino().executeQuery("CREATE SCHEMA " + schemaName);
assertFileExistence(externalSubdir, true, "external subdirectory exists after creating schema");
onTrino().executeQuery("DROP SCHEMA " + schemaName);
assertFileExistence(externalSubdir, true, "external subdirectory exists after dropping schema");

hdfsClient.delete(schemaDir);
}

@Test // default location, transactions without external files
public void testDropSchemaFilesTransactions()
{
String schemaName = "schema_directory_transactions_" + randomTableSuffix();
Expand All @@ -122,8 +139,8 @@ public void testDropSchemaFilesTransactions()
assertFileExistence(schemaDir, false, "schema directory exists after dropping schema");
}

@Test
public void testDropSchemaFilesTransactionsWithExternalFile()
@Test // specified location, transaction with top-level external file
public void testDropTransactionsWithExternalFiles()
{
String schemaName = "schema_transactions_with_external_files_" + randomTableSuffix();
String schemaDir = warehouseDirectory + "/schema-transactions-with-external-files/";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,52 +46,69 @@ public void useIceberg()
}

@Test(groups = ICEBERG)
public void testDropSchemaWithLocationWithoutExternalFiles()
public void testDropSchemaFiles()
{
String schemaName = "schema_with_empty_location_" + randomTableSuffix();
String schemaDir = warehouseDirectory + "/schema-with-empty-location/";
String schemaName = "schema_without_location_" + randomTableSuffix();
String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName);

onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir));
onTrino().executeQuery(format("CREATE SCHEMA %s", schemaName));
assertFileExistence(schemaDir, true, "schema directory exists after creating schema");
onTrino().executeQuery("DROP SCHEMA " + schemaName);
assertFileExistence(schemaDir, false, "schema directory exists after dropping schema");
}

@Test(groups = ICEBERG)
public void testDropSchemaFilesWithoutLocation()
public void testDropSchemaFilesWithLocation()
{
String schemaName = "schema_without_location_" + randomTableSuffix();
String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName);
String schemaName = "schema_with_empty_location_" + randomTableSuffix();
String schemaDir = warehouseDirectory + "/schema-with-empty-location/";

onTrino().executeQuery(format("CREATE SCHEMA %s", schemaName));
onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir));
assertFileExistence(schemaDir, true, "schema directory exists after creating schema");
onTrino().executeQuery("DROP SCHEMA " + schemaName);
assertFileExistence(schemaDir, false, "schema directory exists after dropping schema");
}

@Test(groups = ICEBERG)
public void testDropSchemaFilesWithLocationWithExternalFile()
@Test(groups = ICEBERG) // specified location, external file in subdir
public void testDropWithExternalFilesInSubdirectory()
{
String schemaName = "schema_with_nonempty_location_" + randomTableSuffix();
String schemaDir = warehouseDirectory + "/schema-with-nonempty-location/";
// Use subdirectory to make sure file check is recursive
String subDir = schemaDir + "subdir/";
String externalFile = subDir + "external-file";

// Create file in schema directory before creating schema
String externalFile = schemaDir + "external-file";
hdfsClient.createDirectory(schemaDir);
// Create file below schema directory before creating schema
hdfsClient.createDirectory(subDir);
hdfsClient.saveFile(externalFile, "");

onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir));
assertFileExistence(schemaDir, true, "schema directory exists after creating schema");
assertFileExistence(externalFile, true, "external file exists after creating schema");
onTrino().executeQuery("DROP SCHEMA " + schemaName);
assertFileExistence(schemaDir, true, "schema directory exists after dropping schema");
assertFileExistence(externalFile, true, "external file exists after dropping schema");

hdfsClient.delete(externalFile);
hdfsClient.delete(schemaDir);
}

@Test(groups = ICEBERG)
public void testDropSchemaWithExternalFileWithoutLocation()
@Test(groups = ICEBERG) // make sure empty directories are noticed as well
public void testDropSchemaFilesWithEmptyExternalSubdir()
{
String schemaName = "schema_with_empty_subdirectory_" + randomTableSuffix();
String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName);
String externalSubdir = schemaDir + "external-subdir/";

hdfsClient.createDirectory(externalSubdir);

onTrino().executeQuery("CREATE SCHEMA " + schemaName);
assertFileExistence(externalSubdir, true, "external subdirectory exists after creating schema");
onTrino().executeQuery("DROP SCHEMA " + schemaName);
assertFileExistence(externalSubdir, true, "external subdirectory exists after dropping schema");

hdfsClient.delete(schemaDir);
}

@Test(groups = ICEBERG) // default location, external file at top level
public void testDropWithExternalFiles()
{
String schemaName = "schema_with_files_in_default_location_" + randomTableSuffix();
String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName);
Expand Down