From 5d24a2b4997bdc7d8d5e5fdbaf5917e329fd2c2e Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Thu, 25 Nov 2021 11:53:07 +0100 Subject: [PATCH] Allow ignoring schema location cleanup It is possible that schema location is set to something invalid, or something that used to be valid but is not anymore. In such case, schema location cleanup would fail. --- .../java/io/trino/plugin/hive/HiveConfig.java | 14 ++++++++++++++ .../io/trino/plugin/hive/HiveMetadata.java | 2 +- .../plugin/hive/HiveSessionProperties.java | 11 +++++++++++ .../SemiTransactionalHiveMetastore.java | 19 +++++++++++++------ .../io/trino/plugin/hive/TestHiveConfig.java | 3 +++ 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java index 634ff350ad84..5f157a0b2e32 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java @@ -80,6 +80,7 @@ public class HiveConfig private boolean forceLocalScheduling; private boolean recursiveDirWalkerEnabled; private boolean ignoreAbsentPartitions; + private boolean ignoreSchemaLocationCleanupFailure; private int maxConcurrentFileRenames = 20; private int maxConcurrentMetastoreDrops = 20; @@ -329,6 +330,19 @@ public HiveConfig setIgnoreAbsentPartitions(boolean ignoreAbsentPartitions) return this; } + public boolean isIgnoreSchemaLocationCleanupFailure() + { + return ignoreSchemaLocationCleanupFailure; + } + + @Config("hive.ignore-schema-location-cleanup-failure") + @ConfigDescription("Allows to ignore failures related to file system cleanup during DROP SCHEMA for situations when schema location is misconfigured or no longer reachable") + public HiveConfig setIgnoreSchemaLocationCleanupFailure(boolean ignoreSchemaLocationCleanupFailure) + { + this.ignoreSchemaLocationCleanupFailure = ignoreSchemaLocationCleanupFailure; + return this; + } + @NotNull public DataSize getMaxSplitSize() { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index 66b96b17f074..f13406e53e50 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -809,7 +809,7 @@ public void createSchema(ConnectorSession session, String schemaName, Map delegate.createDatabase(identity, database)); } - public synchronized void dropDatabase(HiveIdentity identity, String schemaName) + public synchronized void dropDatabase(ConnectorSession session, String schemaName) { + HiveIdentity identity = new HiveIdentity(session); HdfsContext context = new HdfsContext( identity.getUsername() .map(ConnectorIdentity::ofUser) @@ -393,10 +395,15 @@ public synchronized void dropDatabase(HiveIdentity identity, String schemaName) log.info("Skipped deleting schema location with external files (%s)", path); } } - catch (IOException e) { - throw new TrinoException( - HIVE_FILESYSTEM_ERROR, - format("Error checking or deleting schema directory '%s'", path), e); + catch (IOException | RuntimeException e) { + if (isIgnoreSchemaLocationCleanupFailure(session)) { + log.warn(e, "Failure when checking or deleting schema directory '%s'", path); + } + else { + throw new TrinoException( + HIVE_FILESYSTEM_ERROR, + format("Error checking or deleting schema directory '%s'", path), e); + } } }); }); @@ -2246,7 +2253,7 @@ private void rollbackShared() .map(Column::getName) .collect(toImmutableList()); List partitionNames = delegate.getPartitionNamesByFilter( - identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionColumnNames, TupleDomain.all()) + identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionColumnNames, TupleDomain.all()) .orElse(ImmutableList.of()); for (List partitionNameBatch : Iterables.partition(partitionNames, 10)) { Collection> partitions = delegate.getPartitionsByNames(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionNameBatch).values(); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java index b99c79533c01..4fb1a98712f8 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java @@ -57,6 +57,7 @@ public void testDefaults() .setMaxConcurrentMetastoreUpdates(20) .setRecursiveDirWalkerEnabled(false) .setIgnoreAbsentPartitions(false) + .setIgnoreSchemaLocationCleanupFailure(false) .setHiveStorageFormat(HiveStorageFormat.ORC) .setHiveCompressionCodec(HiveCompressionCodec.GZIP) .setRespectTableFormat(true) @@ -130,6 +131,7 @@ public void testExplicitPropertyMappings() .put("hive.writer-sort-buffer-size", "13MB") .put("hive.recursive-directories", "true") .put("hive.ignore-absent-partitions", "true") + .put("hive.ignore-schema-location-cleanup-failure", "true") .put("hive.storage-format", "SEQUENCEFILE") .put("hive.compression-codec", "NONE") .put("hive.respect-table-format", "false") @@ -208,6 +210,7 @@ public void testExplicitPropertyMappings() .setMaxConcurrentMetastoreUpdates(100) .setRecursiveDirWalkerEnabled(true) .setIgnoreAbsentPartitions(true) + .setIgnoreSchemaLocationCleanupFailure(true) .setHiveStorageFormat(HiveStorageFormat.SEQUENCEFILE) .setHiveCompressionCodec(HiveCompressionCodec.NONE) .setRespectTableFormat(false)