Skip to content

Commit

Permalink
Fix recursive listing from Hive partitions containing non-alphabetic …
Browse files Browse the repository at this point in the history
…values

Fix query failure when `hive.recursive-directories` is used and the
partition directory contains any url-encoded characters, i.e. when
partition value contains pretty much anything other than letters/digits.
  • Loading branch information
findepi committed Jul 7, 2023
1 parent f838546 commit c710b41
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import static io.trino.plugin.hive.HiveErrorCode.HIVE_FILE_NOT_FOUND;
import static io.trino.plugin.hive.fs.HiveFileIterator.NestedDirectoryPolicy.FAIL;
import static io.trino.plugin.hive.fs.HiveFileIterator.NestedDirectoryPolicy.RECURSE;
import static java.net.URLDecoder.decode;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.Path.SEPARATOR_CHAR;

Expand All @@ -46,8 +45,8 @@ public enum NestedDirectoryPolicy
FAIL
}

private final String pathPrefix;
private final Table table;
private final Location location;
private final TrinoFileSystem fileSystem;
private final DirectoryLister directoryLister;
private final HdfsNamenodeStats namenodeStats;
Expand All @@ -62,8 +61,8 @@ public HiveFileIterator(
HdfsNamenodeStats namenodeStats,
NestedDirectoryPolicy nestedDirectoryPolicy)
{
this.pathPrefix = location.toString();
this.table = requireNonNull(table, "table is null");
this.location = requireNonNull(location, "location is null");
this.fileSystem = requireNonNull(fileSystem, "fileSystem is null");
this.directoryLister = requireNonNull(directoryLister, "directoryLister is null");
this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null");
Expand All @@ -80,7 +79,7 @@ protected TrinoFileStatus computeNext()
// Ignore hidden files and directories
if (nestedDirectoryPolicy == RECURSE) {
// Search the full sub-path under the listed prefix for hidden directories
if (isHiddenOrWithinHiddenParentDirectory(new Path(status.getPath()), pathPrefix)) {
if (isHiddenOrWithinHiddenParentDirectory(Location.of(status.getPath()), location)) {
continue;
}
}
Expand Down Expand Up @@ -121,9 +120,10 @@ static boolean isHiddenFileOrDirectory(Path path)
}

@VisibleForTesting
static boolean isHiddenOrWithinHiddenParentDirectory(Path path, String prefix)
static boolean isHiddenOrWithinHiddenParentDirectory(Location path, Location rootLocation)
{
String pathString = decode(path.toUri().toString());
String pathString = path.toString();
String prefix = rootLocation.toString();
checkArgument(pathString.startsWith(prefix), "path %s does not start with prefix %s", pathString, prefix);
return containsHiddenPathPartAfterIndex(pathString, prefix.endsWith("/") ? prefix.length() : prefix.length() + 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import static io.trino.plugin.hive.HiveQueryRunner.TPCH_SCHEMA;
import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES;
import static java.lang.String.format;
import static org.assertj.core.api.Assertions.assertThat;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;

Expand Down Expand Up @@ -100,9 +101,11 @@ public void testRecursiveDirectoriesWithSpecialCharacters()
{
// Create partitioned table with special characters
assertUpdate("CREATE TABLE recursive_directories_with_special_chars (payload varchar, event_name varchar) WITH (format = 'ORC', partitioned_by = ARRAY['event_name'])");
assertUpdate("INSERT INTO recursive_directories_with_special_chars VALUES ('data', 'level1|level2'), ('data', 'level1 | level2')", 2);
String values = "VALUES (VARCHAR 'data', VARCHAR 'level1|level2'), ('data', 'level1 | level2'), ('plus sign', 'foo+bar')";
assertUpdate("INSERT INTO recursive_directories_with_special_chars " + values, 3);
// Check that all files can be read
assertQuery("SELECT count(*) FROM recursive_directories_with_special_chars", "VALUES (2)");
assertThat(query("TABLE recursive_directories_with_special_chars"))
.matches(values);
assertUpdate("DROP TABLE recursive_directories_with_special_chars");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.hive.fs;

import io.trino.filesystem.Location;
import org.apache.hadoop.fs.Path;
import org.testng.annotations.Test;

Expand All @@ -27,18 +28,21 @@ public class TestHiveFileIterator
@Test
public void testRelativeHiddenPathDetection()
{
String root = new Path("file:///root-path").toUri().getPath();
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, ".hidden/child"), root));
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, "_hidden.txt"), root));
String rootWithSlash = new Path("file:///root-path/").toUri().getPath();
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, ".hidden/child"), rootWithSlash));
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, "_hidden.txt"), rootWithSlash));
String rootWithinHidden = new Path("file:///root/.hidden/listing-root").toUri().getPath();
assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithinHidden, "file.txt"), rootWithinHidden));
String rootHiddenEnding = new Path("file:///root/hidden-ending_").toUri().getPath();
assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(rootHiddenEnding, "file.txt"), rootHiddenEnding));
String insideNonAlphanumericPrefix = new Path("file:///root/With spaces and | pipes/.hidden").toUri().getPath();
assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(insideNonAlphanumericPrefix, "file.txt"), insideNonAlphanumericPrefix));
assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/.hidden/child"), Location.of("file:///root-path")));
assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/_hidden.txt"), Location.of("file:///root-path")));

// root path with trailing slash
assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/.hidden/child"), Location.of("file:///root-path/")));
assertTrue(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root-path/_hidden.txt"), Location.of("file:///root-path/")));

// root path containing .hidden
assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/.hidden/listing-root/file.txt"), Location.of("file:///root/.hidden/listing-root")));

// root path ending with an underscore
assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/hidden-ending_/file.txt"), Location.of("file:///root/hidden-ending_")));

// root path containing "arbitrary" characters
assertFalse(isHiddenOrWithinHiddenParentDirectory(Location.of("file:///root/With spaces and | pipes/.hidden/file.txt"), Location.of("file:///root/With spaces and | pipes/.hidden")));
}

@Test
Expand Down

0 comments on commit c710b41

Please sign in to comment.