diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/HiveFileIterator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/HiveFileIterator.java index 373ad14e3d0c..ef9ae138963f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/HiveFileIterator.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/HiveFileIterator.java @@ -32,7 +32,6 @@ import static io.trino.plugin.hive.HiveErrorCode.HIVE_FILE_NOT_FOUND; import static io.trino.plugin.hive.fs.HiveFileIterator.NestedDirectoryPolicy.FAIL; import static io.trino.plugin.hive.fs.HiveFileIterator.NestedDirectoryPolicy.RECURSE; -import static java.net.URLDecoder.decode; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.Path.SEPARATOR_CHAR; @@ -46,8 +45,8 @@ public enum NestedDirectoryPolicy FAIL } - private final String pathPrefix; private final Table table; + private final Location location; private final TrinoFileSystem fileSystem; private final DirectoryLister directoryLister; private final HdfsNamenodeStats namenodeStats; @@ -62,8 +61,8 @@ public HiveFileIterator( HdfsNamenodeStats namenodeStats, NestedDirectoryPolicy nestedDirectoryPolicy) { - this.pathPrefix = location.toString(); this.table = requireNonNull(table, "table is null"); + this.location = requireNonNull(location, "location is null"); this.fileSystem = requireNonNull(fileSystem, "fileSystem is null"); this.directoryLister = requireNonNull(directoryLister, "directoryLister is null"); this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null"); @@ -80,7 +79,7 @@ protected TrinoFileStatus computeNext() // Ignore hidden files and directories if (nestedDirectoryPolicy == RECURSE) { // Search the full sub-path under the listed prefix for hidden directories - if (isHiddenOrWithinHiddenParentDirectory(new Path(status.getPath()), pathPrefix)) { + if (isHiddenOrWithinHiddenParentDirectory(Location.of(status.getPath()), location)) { continue; } } @@ -121,9 +120,10 @@ static boolean isHiddenFileOrDirectory(Path path) } @VisibleForTesting - static boolean isHiddenOrWithinHiddenParentDirectory(Path path, String prefix) + static boolean isHiddenOrWithinHiddenParentDirectory(Location path, Location rootLocation) { - String pathString = decode(path.toUri().toString()); + String pathString = path.toString(); + String prefix = rootLocation.toString(); checkArgument(pathString.startsWith(prefix), "path %s does not start with prefix %s", pathString, prefix); return containsHiddenPathPartAfterIndex(pathString, prefix.endsWith("/") ? prefix.length() : prefix.length() + 1); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestCachingDirectoryListerRecursiveFilesOnly.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestCachingDirectoryListerRecursiveFilesOnly.java index 2e1996642706..9a0f050fe138 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestCachingDirectoryListerRecursiveFilesOnly.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestCachingDirectoryListerRecursiveFilesOnly.java @@ -30,6 +30,7 @@ import static io.trino.plugin.hive.HiveQueryRunner.TPCH_SCHEMA; import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThat; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -100,9 +101,11 @@ public void testRecursiveDirectoriesWithSpecialCharacters() { // Create partitioned table with special characters assertUpdate("CREATE TABLE recursive_directories_with_special_chars (payload varchar, event_name varchar) WITH (format = 'ORC', partitioned_by = ARRAY['event_name'])"); - assertUpdate("INSERT INTO recursive_directories_with_special_chars VALUES ('data', 'level1|level2'), ('data', 'level1 | level2')", 2); + String values = "VALUES (VARCHAR 'data', VARCHAR 'level1|level2'), ('data', 'level1 | level2'), ('plus sign', 'foo+bar')"; + assertUpdate("INSERT INTO recursive_directories_with_special_chars " + values, 3); // Check that all files can be read - assertQuery("SELECT count(*) FROM recursive_directories_with_special_chars", "VALUES (2)"); + assertThat(query("TABLE recursive_directories_with_special_chars")) + .matches(values); assertUpdate("DROP TABLE recursive_directories_with_special_chars"); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestHiveFileIterator.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestHiveFileIterator.java index 83f435cb4679..009ab19b0137 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestHiveFileIterator.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestHiveFileIterator.java @@ -13,6 +13,7 @@ */ package io.trino.plugin.hive.fs; +import io.trino.filesystem.Location; import org.apache.hadoop.fs.Path; import org.testng.annotations.Test; @@ -27,18 +28,21 @@ public class TestHiveFileIterator @Test public void testRelativeHiddenPathDetection() { - String root = new Path("file:///root-path").toUri().getPath(); - assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, ".hidden/child"), root)); - assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, "_hidden.txt"), root)); - String rootWithSlash = new Path("file:///root-path/").toUri().getPath(); - assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, ".hidden/child"), rootWithSlash)); - assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, "_hidden.txt"), rootWithSlash)); - String rootWithinHidden = new Path("file:///root/.hidden/listing-root").toUri().getPath(); - assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithinHidden, "file.txt"), rootWithinHidden)); - String rootHiddenEnding = new Path("file:///root/hidden-ending_").toUri().getPath(); - assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(rootHiddenEnding, "file.txt"), rootHiddenEnding)); - String insideNonAlphanumericPrefix = new Path("file:///root/With spaces and | pipes/.hidden").toUri().getPath(); - assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(insideNonAlphanumericPrefix, "file.txt"), insideNonAlphanumericPrefix)); + assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/.hidden/child"), Location.of("file:///root-path"))); + assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/_hidden.txt"), Location.of("file:///root-path"))); + + // root path with trailing slash + assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/.hidden/child"), Location.of("file:///root-path/"))); + assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/_hidden.txt"), Location.of("file:///root-path/"))); + + // root path containing .hidden + assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/.hidden/listing-root/file.txt"), Location.of("file:///root/.hidden/listing-root"))); + + // root path ending with an underscore + assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/hidden-ending_/file.txt"), Location.of("file:///root/hidden-ending_"))); + + // root path containing "arbitrary" characters + assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/With spaces and | pipes/.hidden/file.txt"), Location.of("file:///root/With spaces and | pipes/.hidden"))); } @Test