diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index 564f128dd269..68d41e198c7b 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -1275,12 +1275,24 @@ private static String validateAvroSchemaLiteral(String avroSchemaLiteral) private static Location getValidatedExternalLocation(String location) { + Location validated; try { - return Location.of(location); + validated = Location.of(location); } catch (IllegalArgumentException e) { throw new TrinoException(INVALID_TABLE_PROPERTY, "External location is not a valid file system URI: " + location, e); } + + // TODO (https://github.com/trinodb/trino/issues/17803) We cannot accept locations with double slash until all relevant Hive connector components are migrated off Hadoop Path. + // Hadoop Path "normalizes location", e.g.: + // - removes double slashes (such locations are rejected), + // - removes trailing slash (such locations are accepted; foo/bar and foo/bar/ are treated as equivalent, and rejecting locations with trailing slash could pose UX issues) + // - replaces file:/// with file:/ (such locations are accepted). + if (validated.path().contains("//")) { + throw new TrinoException(INVALID_TABLE_PROPERTY, "Unsupported location that cannot be internally represented: " + location); + } + + return validated; } private void checkExternalPath(HdfsContext context, Path path) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveWriteUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveWriteUtils.java index b6c061ea4775..89152c58c3f4 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveWriteUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveWriteUtils.java @@ -453,7 +453,10 @@ public static Location getTableDefaultLocation(Database database, HdfsContext co } } - return Location.of(location).appendPath(escapeTableName(tableName)); + // Note: this results in `databaseLocation` being a "normalized location", e.g. not containing double slashes. + // TODO (https://github.com/trinodb/trino/issues/17803): We need to use normalized location until all relevant Hive connector components are migrated off Hadoop Path. + Location databaseLocation = Location.of(databasePath.toString()); + return databaseLocation.appendPath(escapeTableName(tableName)); } public static boolean pathExists(HdfsContext context, HdfsEnvironment hdfsEnvironment, Path path) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java index e0f2b5d797af..4df2b89396ac 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java @@ -24,11 +24,9 @@ import java.nio.file.Path; import java.util.HashSet; -import java.util.List; import java.util.Optional; import java.util.Set; -import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.hive.metastore.glue.GlueHiveMetastore.createTestingGlueHiveMetastore; import static io.trino.spi.security.SelectedRole.Type.ROLE; import static io.trino.testing.TestingNames.randomNameSuffix; @@ -123,9 +121,14 @@ public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, St String location = locationPattern.formatted(bucketName, schemaName, tableName); String partitionQueryPart = (partitioned ? ",partitioned_by = ARRAY['col_int']" : ""); - assertUpdate("CREATE TABLE " + tableName + "(col_str, col_int)" + + String create = "CREATE TABLE " + tableName + "(col_str, col_int)" + "WITH (external_location = '" + location + "'" + partitionQueryPart + ") " + - "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)", 3); + "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)"; + if (locationPattern.contains("double_slash")) { + assertQueryFails(create, "\\QUnsupported location that cannot be internally represented: " + location); + return; + } + assertUpdate(create, 3); assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); String actualTableLocation = getTableLocation(tableName); @@ -154,7 +157,9 @@ public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, S assertThat(getSchemaLocation(schemaName)).isEqualTo(schemaLocation); assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_str varchar, col_int int)" + partitionQueryPart); - String expectedTableLocation = (schemaLocation.endsWith("/") ? schemaLocation : schemaLocation + "/") + tableName; + String expectedTableLocation = ((schemaLocation.endsWith("/") ? schemaLocation : schemaLocation + "/") + tableName) + // Hive normalizes double slash + .replaceAll("(? super.testOptimizeWithProvidedTableLocation(partitioned, locationPattern)) + .hasMessageStartingWith("Unsupported location that cannot be internally represented: ") + .hasStackTraceContaining("SQL: CREATE TABLE test_optimize_"); + return; + } + super.testOptimizeWithProvidedTableLocation(partitioned, locationPattern); + } + @Test(dataProvider = "locationPatternsDataProvider") public void testAnalyzeWithProvidedTableLocation(boolean partitioned, String locationPattern) { @@ -186,9 +203,14 @@ public void testAnalyzeWithProvidedTableLocation(boolean partitioned, String loc String location = locationPattern.formatted(bucketName, schemaName, tableName); String partitionQueryPart = (partitioned ? ",partitioned_by = ARRAY['col_int']" : ""); - assertUpdate("CREATE TABLE " + tableName + "(col_str, col_int)" + + String create = "CREATE TABLE " + tableName + "(col_str, col_int)" + "WITH (external_location = '" + location + "'" + partitionQueryPart + ") " + - "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)", 3); + "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)"; + if (locationPattern.contains("double_slash")) { + assertQueryFails(create, "\\QUnsupported location that cannot be internally represented: " + location); + return; + } + assertUpdate(create, 3); assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); @@ -286,13 +308,4 @@ public void testSchemaNameEscape() assertUpdate("DROP SCHEMA \"" + schemaName + "\""); } - - @Override - protected List locationPatterns() - { - // TODO https://github.com/trinodb/trino/issues/17803 Fix correctness issue with double slash - return super.locationPatterns().stream() - .filter(location -> !location.contains("double_slash")) - .collect(toImmutableList()); - } }