Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix recursive listing from Hive partitions containing non-alphabetic values #18167

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import static io.trino.plugin.hive.HiveErrorCode.HIVE_FILE_NOT_FOUND;
import static io.trino.plugin.hive.fs.HiveFileIterator.NestedDirectoryPolicy.FAIL;
import static io.trino.plugin.hive.fs.HiveFileIterator.NestedDirectoryPolicy.RECURSE;
import static java.net.URLDecoder.decode;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.Path.SEPARATOR_CHAR;

Expand All @@ -46,8 +45,8 @@ public enum NestedDirectoryPolicy
FAIL
}

private final String pathPrefix;
private final Table table;
private final Location location;
private final TrinoFileSystem fileSystem;
private final DirectoryLister directoryLister;
private final HdfsNamenodeStats namenodeStats;
Expand All @@ -62,8 +61,8 @@ public HiveFileIterator(
HdfsNamenodeStats namenodeStats,
NestedDirectoryPolicy nestedDirectoryPolicy)
{
this.pathPrefix = location.toString();
this.table = requireNonNull(table, "table is null");
this.location = requireNonNull(location, "location is null");
this.fileSystem = requireNonNull(fileSystem, "fileSystem is null");
this.directoryLister = requireNonNull(directoryLister, "directoryLister is null");
this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null");
Expand All @@ -80,7 +79,7 @@ protected TrinoFileStatus computeNext()
// Ignore hidden files and directories
if (nestedDirectoryPolicy == RECURSE) {
// Search the full sub-path under the listed prefix for hidden directories
if (isHiddenOrWithinHiddenParentDirectory(new Path(status.getPath()), pathPrefix)) {
if (isHiddenOrWithinHiddenParentDirectory(Location.of(status.getPath()), location)) {
continue;
}
}
Expand Down Expand Up @@ -121,9 +120,10 @@ static boolean isHiddenFileOrDirectory(Path path)
}

@VisibleForTesting
static boolean isHiddenOrWithinHiddenParentDirectory(Path path, String prefix)
static boolean isHiddenOrWithinHiddenParentDirectory(Location path, Location rootLocation)
{
String pathString = decode(path.toUri().toString());
String pathString = path.toString();
String prefix = rootLocation.toString();
checkArgument(pathString.startsWith(prefix), "path %s does not start with prefix %s", pathString, prefix);
return containsHiddenPathPartAfterIndex(pathString, prefix.endsWith("/") ? prefix.length() : prefix.length() + 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import static io.trino.plugin.hive.HiveQueryRunner.TPCH_SCHEMA;
import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES;
import static java.lang.String.format;
import static org.assertj.core.api.Assertions.assertThat;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;

Expand Down Expand Up @@ -100,9 +101,11 @@ public void testRecursiveDirectoriesWithSpecialCharacters()
{
// Create partitioned table with special characters
assertUpdate("CREATE TABLE recursive_directories_with_special_chars (payload varchar, event_name varchar) WITH (format = 'ORC', partitioned_by = ARRAY['event_name'])");
assertUpdate("INSERT INTO recursive_directories_with_special_chars VALUES ('data', 'level1|level2'), ('data', 'level1 | level2')", 2);
String values = "VALUES (VARCHAR 'data', VARCHAR 'level1|level2'), ('data', 'level1 | level2'), ('plus sign', 'foo+bar')";
assertUpdate("INSERT INTO recursive_directories_with_special_chars " + values, 3);
// Check that all files can be read
assertQuery("SELECT count(*) FROM recursive_directories_with_special_chars", "VALUES (2)");
assertThat(query("TABLE recursive_directories_with_special_chars"))
.matches(values);
assertUpdate("DROP TABLE recursive_directories_with_special_chars");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.hive.fs;

import io.trino.filesystem.Location;
import org.apache.hadoop.fs.Path;
import org.testng.annotations.Test;

Expand All @@ -27,18 +28,21 @@ public class TestHiveFileIterator
@Test
public void testRelativeHiddenPathDetection()
{
String root = new Path("file:///root-path").toUri().getPath();
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, ".hidden/child"), root));
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, "_hidden.txt"), root));
String rootWithSlash = new Path("file:///root-path/").toUri().getPath();
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, ".hidden/child"), rootWithSlash));
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, "_hidden.txt"), rootWithSlash));
String rootWithinHidden = new Path("file:///root/.hidden/listing-root").toUri().getPath();
assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithinHidden, "file.txt"), rootWithinHidden));
String rootHiddenEnding = new Path("file:///root/hidden-ending_").toUri().getPath();
assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(rootHiddenEnding, "file.txt"), rootHiddenEnding));
String insideNonAlphanumericPrefix = new Path("file:///root/With spaces and | pipes/.hidden").toUri().getPath();
assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(insideNonAlphanumericPrefix, "file.txt"), insideNonAlphanumericPrefix));
assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/.hidden/child"), Location.of("file:///root-path")));
assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/_hidden.txt"), Location.of("file:///root-path")));

// root path with trailing slash
assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/.hidden/child"), Location.of("file:///root-path/")));
assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/_hidden.txt"), Location.of("file:///root-path/")));

// root path containing .hidden
assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/.hidden/listing-root/file.txt"), Location.of("file:///root/.hidden/listing-root")));

// root path ending with an underscore
assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/hidden-ending_/file.txt"), Location.of("file:///root/hidden-ending_")));

// root path containing "arbitrary" characters
assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/With spaces and | pipes/.hidden/file.txt"), Location.of("file:///root/With spaces and | pipes/.hidden")));
}

@Test
Expand Down