From 379f39230884315ea73084cb8e330fda1272fbd8 Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Mon, 10 Jul 2023 11:52:49 +0200 Subject: [PATCH 1/3] Support updates when Delta table location has two trailing slashes --- .../java/io/trino/filesystem/Location.java | 14 ++++++ .../io/trino/filesystem/TestLocation.java | 43 +++++++++++++++++++ .../plugin/deltalake/DeltaLakeMergeSink.java | 2 +- .../hive/BaseS3AndGlueMetastoreTest.java | 15 ------- 4 files changed, 58 insertions(+), 16 deletions(-) diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/Location.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/Location.java index 972e043ab83b..7a4b0a0db4e8 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/Location.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/Location.java @@ -178,6 +178,20 @@ public String fileName() return path.substring(path.lastIndexOf('/') + 1); } + /** + * Returns a new location with the same parent directory as the current location, + * but with the filename corresponding to the specified name. + * The location must be a valid file location. + */ + public Location sibling(String name) + { + requireNonNull(name, "name is null"); + checkArgument(!name.isEmpty(), "name is empty"); + verifyValidFileLocation(); + + return this.withPath(location.substring(0, location.lastIndexOf('/') + 1) + name, path.substring(0, path.lastIndexOf('/') + 1) + name); + } + /** * Creates a new location with all characters removed after the last slash in the path. * This should only be used once, as recursive calls for blob paths may lead to incorrect results. diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/TestLocation.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/TestLocation.java index 304c7625a5c1..0b7dddee4ecd 100644 --- a/lib/trino-filesystem/src/test/java/io/trino/filesystem/TestLocation.java +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/TestLocation.java @@ -362,6 +362,49 @@ private static void assertFileName(String locationString, String fileName) assertThat(location.fileName()).isEqualTo(fileName); } + @Test + void testSibling() + { + assertSiblingFailure("/", "sibling", IllegalStateException.class, "File location must contain a path: /"); + assertSiblingFailure("//", "sibling", IllegalStateException.class, "File location must contain a path: /"); + assertSiblingFailure("file:/", "sibling", IllegalStateException.class, "File location must contain a path: file:/"); + assertSiblingFailure("file://", "sibling", IllegalStateException.class, "File location must contain a path: file://"); + assertSiblingFailure("file:///", "sibling", IllegalStateException.class, "File location must contain a path: file:///"); + assertSiblingFailure("s3://bucket/", "sibling", IllegalStateException.class, "File location must contain a path: s3://bucket/"); + assertSiblingFailure("scheme://userInfo@host/path/", "sibling", IllegalStateException.class, "File location cannot end with '/'"); + + assertSiblingFailure("scheme://userInfo@host/path/filename", null, NullPointerException.class, "name is null"); + assertSiblingFailure("scheme://userInfo@host/path/filename", "", IllegalArgumentException.class, "name is empty"); + + assertSibling("scheme://userInfo@host/path/name", "sibling", "scheme://userInfo@host/path/sibling"); + assertSibling("scheme://userInfo@host/path//name", "sibling", "scheme://userInfo@host/path//sibling"); + assertSibling("scheme://userInfo@host/path///name", "sibling", "scheme://userInfo@host/path///sibling"); + assertSibling("scheme://userInfo@host/level1/level2/name", "sibling", "scheme://userInfo@host/level1/level2/sibling"); + assertSibling("scheme://userInfo@host/level1//level2/name", "sibling", "scheme://userInfo@host/level1//level2/sibling"); + + assertSibling("file:/path/name", "sibling", "file:/path/sibling"); + assertSibling("s3://bucket/directory/filename with spaces", "sibling", "s3://bucket/directory/sibling"); + assertSibling("/path/name", "sibling", "/path/sibling"); + assertSibling("/name", "sibling", "/sibling"); + } + + private static void assertSiblingFailure(String locationString, String siblingName, Class exceptionClass, String exceptionMessage) + { + assertThatThrownBy(() -> Location.of(locationString).sibling(siblingName)) + .isInstanceOf(exceptionClass) + .hasMessageContaining(exceptionMessage); + } + + private static void assertSibling(String locationString, String siblingName, String expectedLocationString) + { + // fileName method only works with valid file locations + Location location = Location.of(locationString); + location.verifyValidFileLocation(); + Location siblingLocation = location.sibling(siblingName); + + assertLocation(siblingLocation, Location.of(expectedLocationString)); + } + @Test void testParentDirectory() { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java index 968ddb77c34b..ec1b074bd40d 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java @@ -330,7 +330,7 @@ private List rewriteFile(String sourcePath, FileDeletion deletion) Location sourceLocation = Location.of(sourcePath); String sourceRelativePath = relativePath(tablePath, sourcePath); - Location targetLocation = sourceLocation.parentDirectory().appendPath(session.getQueryId() + "_" + randomUUID()); + Location targetLocation = sourceLocation.sibling(session.getQueryId() + "_" + randomUUID()); String targetRelativePath = relativePath(tablePath, targetLocation.toString()); FileWriter fileWriter = createParquetFileWriter(targetLocation, dataColumns); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java index 49d6456d1a47..578627ed2c6d 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java @@ -36,7 +36,6 @@ import static com.google.common.base.Verify.verify; import static com.google.common.collect.Sets.union; -import static io.trino.plugin.hive.BaseS3AndGlueMetastoreTest.LocationPattern.TWO_TRAILING_SLASHES; import static io.trino.plugin.hive.S3Assert.s3Path; import static io.trino.testing.DataProviders.cartesianProduct; import static io.trino.testing.DataProviders.toDataProvider; @@ -45,7 +44,6 @@ import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; public abstract class BaseS3AndGlueMetastoreTest extends AbstractTestQueryFramework @@ -109,12 +107,6 @@ public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, Lo assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); - if (locationPattern == TWO_TRAILING_SLASHES && !partitioned && getClass().getName().contains(".deltalake.")) { - // TODO (https://github.com/trinodb/trino/issues/17966): updates fail when Delta table is declared with location ending with two slashes - assertThatThrownBy(() -> query("UPDATE " + tableName + " SET col_str = 'other' WHERE col_int = 2")) - .hasMessageMatching("path \\[(s3://.*)/([-a-zA-Z0-9_]+)] must be a subdirectory of basePath \\[(\\1)//]"); - return; - } assertUpdate("UPDATE " + tableName + " SET col_str = 'other' WHERE col_int = 2", 1); assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str3', 3), ('str4', 4)"); @@ -186,13 +178,6 @@ public void testMergeWithProvidedTableLocation(boolean partitioned, LocationPatt " WHEN NOT MATCHED THEN INSERT VALUES ('str4', 4)", 1); assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); - if (locationPattern == TWO_TRAILING_SLASHES && !partitioned && getClass().getName().contains(".deltalake.")) { - // TODO (https://github.com/trinodb/trino/issues/17966): merge fails when Delta table is declared with location ending with two slashes - assertThatThrownBy(() -> query("MERGE INTO " + tableName + " USING (VALUES 2) t(x) ON col_int = x" + - " WHEN MATCHED THEN UPDATE SET col_str = 'other'")) - .hasMessageMatching("path \\[(s3://.*)/([-a-zA-Z0-9_]+)] must be a subdirectory of basePath \\[(\\1)//]"); - return; - } assertUpdate("MERGE INTO " + tableName + " USING (VALUES 2) t(x) ON col_int = x" + " WHEN MATCHED THEN UPDATE SET col_str = 'other'", 1); assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str3', 3), ('str4', 4)"); From 89a8df14a05085e1b96ca416d316f7bb782a0dcb Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Mon, 10 Jul 2023 12:04:06 +0200 Subject: [PATCH 2/3] Obtain safely a sibling location --- .../writer/S3NativeTransactionLogSynchronizer.java | 2 +- .../main/java/io/trino/plugin/hive/orc/OriginalFilesUtils.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/writer/S3NativeTransactionLogSynchronizer.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/writer/S3NativeTransactionLogSynchronizer.java index 9734313bfb8b..f102c1549795 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/writer/S3NativeTransactionLogSynchronizer.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/writer/S3NativeTransactionLogSynchronizer.java @@ -79,7 +79,7 @@ public boolean isUnsafe() public void write(ConnectorSession session, String clusterId, Location newLogEntryPath, byte[] entryContents) { TrinoFileSystem fileSystem = fileSystemFactory.create(session); - Location locksDirectory = newLogEntryPath.parentDirectory().appendPath(LOCK_DIRECTORY); + Location locksDirectory = newLogEntryPath.sibling(LOCK_DIRECTORY); String newEntryFilename = newLogEntryPath.fileName(); Optional myLockInfo = Optional.empty(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OriginalFilesUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OriginalFilesUtils.java index e2942718c056..8ae8660c3857 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OriginalFilesUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OriginalFilesUtils.java @@ -54,7 +54,7 @@ public static long getPrecedingRowCount( long rowCount = 0; for (OriginalFileInfo originalFileInfo : originalFileInfos) { if (originalFileInfo.getName().compareTo(splitPath.fileName()) < 0) { - Location path = splitPath.parentDirectory().appendPath(originalFileInfo.getName()); + Location path = splitPath.sibling(originalFileInfo.getName()); TrinoInputFile inputFile = fileSystemFactory.create(identity) .newInputFile(path, originalFileInfo.getFileSize()); rowCount += getRowsInFile(inputFile, options, stats); From 2303135048e97cfe684342df90c8863f2c650c35 Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Tue, 11 Jul 2023 10:17:58 +0200 Subject: [PATCH 3/3] empty