From 91d7bbc3138cd5938f309b5c3da7097bf1c8a1ab Mon Sep 17 00:00:00 2001 From: Shardul Mahadik Date: Tue, 14 Apr 2020 00:40:58 -0700 Subject: [PATCH] Presto: Pass custom configuration object when using FileSystemUtils --- .../transport/presto/FileSystemClient.java | 4 +++- .../transport/utils/FileSystemUtils.java | 23 +++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/transportable-udfs-presto/src/main/java/com/linkedin/transport/presto/FileSystemClient.java b/transportable-udfs-presto/src/main/java/com/linkedin/transport/presto/FileSystemClient.java index 69622a50..f964433d 100644 --- a/transportable-udfs-presto/src/main/java/com/linkedin/transport/presto/FileSystemClient.java +++ b/transportable-udfs-presto/src/main/java/com/linkedin/transport/presto/FileSystemClient.java @@ -53,7 +53,9 @@ public String copyToLocalFile(String remoteFilename) { Path remotePath = new Path(remoteFilename); Path localPath = new Path(Paths.get(getAndCreateLocalDir(), new File(remoteFilename).getName()).toString()); FileSystem fs = remotePath.getFileSystem(conf); - String resolvedRemoteFilename = FileSystemUtils.resolveLatest(remoteFilename); + // It is important to pass the custom configuration object to FileSystemUtils since we load some extra + // properties from etc/**.xml in getConfiguration() for Presto + String resolvedRemoteFilename = FileSystemUtils.resolveLatest(remoteFilename, conf); Path resolvedRemotePath = new Path(resolvedRemoteFilename); fs.copyToLocalFile(resolvedRemotePath, localPath); return localPath.toString(); diff --git a/transportable-udfs-utils/src/main/java/com/linkedin/transport/utils/FileSystemUtils.java b/transportable-udfs-utils/src/main/java/com/linkedin/transport/utils/FileSystemUtils.java index 68ce0f36..79b7b3c8 100644 --- a/transportable-udfs-utils/src/main/java/com/linkedin/transport/utils/FileSystemUtils.java +++ b/transportable-udfs-utils/src/main/java/com/linkedin/transport/utils/FileSystemUtils.java @@ -14,16 +14,12 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.JobConf; /** * This Utils class handles multiple utilities methods related with Hadoop FileSystem. * */ public class FileSystemUtils { - public static final String MAPREDUCE_FRAMEWORK_NAME = "mapreduce.framework.name"; - public static final String MAPRED_JOB_TRACKER = "mapred.job.tracker"; - public static final String LOCAL = "local"; private FileSystemUtils() { // Empty on purpose @@ -35,12 +31,18 @@ private FileSystemUtils() { * @return the Path's FileSystem if we are not in local mode, local FileSystem if we are. */ public static FileSystem getFileSystem(String filePath) { + return getFileSystem(filePath, new Configuration()); + } + + /** + * Same as {@link #getFileSystem(String)} but allows passing a {@link Configuration} used to resolve the path + */ + public static FileSystem getFileSystem(String filePath, Configuration conf) { FileSystem fs; - JobConf conf = new JobConf(); try { fs = new Path(filePath).getFileSystem(conf); } catch (IOException e) { - throw new RuntimeException("Failed to load the HDFS file system.", e); + throw new RuntimeException("Failed to load the file system for path: " + filePath, e); } return fs; @@ -76,11 +78,18 @@ public static FileSystem getLocalFileSystem() { * @throws IOException when the filesystem could not resolve the path */ public static String resolveLatest(String path) throws IOException { + return resolveLatest(path, new Configuration()); + } + + /** + * Same as {@link #resolveLatest(String)} but allows passing a {@link Configuration} used to resolve the path + */ + public static String resolveLatest(String path, Configuration conf) throws IOException { if (!StringUtils.isBlank(path)) { path = path.trim(); String[] split = path.split("#LATEST"); String retval = split[0]; - FileSystem fs = getFileSystem(path); + FileSystem fs = getFileSystem(path, conf); for (int i = 1; i < split.length; ++i) { retval = resolveLatestHelper(retval, fs, true) + split[i]; }