From c7ba6a7c804592ba4117442df27e47da8db3dcba Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Thu, 9 Apr 2015 17:23:41 +0900 Subject: [PATCH 1/9] [SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path escape spaces in the arguments. --- .../apache/spark/deploy/PythonRunner.scala | 2 +- .../scala/org/apache/spark/util/Utils.scala | 14 ++++--- .../org/apache/spark/util/UtilsSuite.scala | 39 +++++++++++++++---- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 53e18c4bcec23..724d15e31b198 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -82,7 +82,7 @@ object PythonRunner { s"spark-submit is currently only supported for local files: $path") } val windows = Utils.isWindows || testWindows - var formattedPath = if (windows) Utils.formatWindowsPath(path) else path + var formattedPath = Utils.formatPath(path, windows) // Strip the URI scheme from the path formattedPath = diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 0fdfaf300e95d..a0d5f748108d3 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1659,9 +1659,14 @@ private[spark] object Utils extends Logging { val windowsDrive = "([a-zA-Z])".r /** - * Format a Windows path such that it can be safely passed to a URI. + * Format a path such that it can be safely passed to a URI. */ - def formatWindowsPath(path: String): String = path.replace("\\", "/") + def formatPath(path: String, windows: Boolean): String = { + val formatted = path.replace(" ", "%20") + + // In Windows, the file separator is a backslash, but this is inconsistent with the URI format + if (windows) formatted.replace("\\", "/") else formatted + } /** * Indicates whether Spark is currently running unit tests. @@ -1762,9 +1767,8 @@ private[spark] object Utils extends Logging { */ def resolveURI(path: String, testWindows: Boolean = false): URI = { - // In Windows, the file separator is a backslash, but this is inconsistent with the URI format val windows = isWindows || testWindows - val formattedPath = if (windows) formatWindowsPath(path) else path + val formattedPath = formatPath(path, windows) val uri = new URI(formattedPath) if (uri.getPath == null) { @@ -1801,7 +1805,7 @@ private[spark] object Utils extends Logging { Array.empty } else { paths.split(",").filter { p => - val formattedPath = if (windows) formatWindowsPath(p) else p + val formattedPath = formatPath(p, windows) val uri = new URI(formattedPath) Option(uri.getScheme).getOrElse("file") match { case windowsDrive(d) if windows => false diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 5d93086082189..0a78189f2fb0a 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -28,6 +28,7 @@ import java.util.Locale import com.google.common.base.Charsets.UTF_8 import com.google.common.io.Files import org.scalatest.FunSuite +import org.apache.commons.lang3.SystemUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path @@ -233,13 +234,16 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assert(new URI(Utils.resolveURIs(before, testWindows)) === new URI(after)) assert(new URI(Utils.resolveURIs(after, testWindows)) === new URI(after)) } - val cwd = System.getProperty("user.dir") + val rawCwd = System.getProperty("user.dir") + val cwd = if (SystemUtils.IS_OS_WINDOWS) s"/$rawCwd".replace("\\", "/") else rawCwd assertResolves("hdfs:/root/spark.jar", "hdfs:/root/spark.jar") assertResolves("hdfs:///root/spark.jar#app.jar", "hdfs:/root/spark.jar#app.jar") assertResolves("spark.jar", s"file:$cwd/spark.jar") assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar#app.jar") + assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt") assertResolves("C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true) + assertResolves("C:/path to/file.txt", "file:/C:/path%20to/file.txt", testWindows = true) assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true) @@ -258,14 +262,16 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assert(resolve(resolve(after)) === after) assert(resolve(resolve(resolve(after))) === after) } - val cwd = System.getProperty("user.dir") + val rawCwd = System.getProperty("user.dir") + val cwd = if (SystemUtils.IS_OS_WINDOWS) s"/$rawCwd".replace("\\", "/") else rawCwd assertResolves("jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2") assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2") assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3") - assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5", - s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5") - assertResolves("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi", - s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi", testWindows = true) + assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6", + s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5,file:$cwd/path%20to/jar6") + assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""", + s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi,file:/C:/path%20to/jar4", + testWindows = true) } test("nonLocalPaths") { @@ -280,6 +286,8 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assert(Utils.nonLocalPaths("local:/spark.jar,file:/smart.jar,family.py") === Array.empty) assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar") === Array("hdfs:/spark.jar", "s3:/smart.jar")) + assert(Utils.nonLocalPaths("hdfs:/path to/spark.jar,path to/a.jar,s3:/path to/smart.jar") === + Array("hdfs:/path to/spark.jar", "s3:/path to/smart.jar")) assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar,local.py,file:/hello/pi.py") === Array("hdfs:/spark.jar", "s3:/smart.jar")) assert(Utils.nonLocalPaths("local.py,hdfs:/spark.jar,file:/hello/pi.py,s3:/smart.jar") === @@ -293,6 +301,11 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assert(Utils.nonLocalPaths("local:///C:/some/path.jar", testWindows = true) === Array.empty) assert(Utils.nonLocalPaths("hdfs:/a.jar,C:/my.jar,s3:/another.jar", testWindows = true) === Array("hdfs:/a.jar", "s3:/another.jar")) + assert(Utils.nonLocalPaths( + "hdfs:/path to/spark.jar,C:\\path to\\a.jar,s3:/path to/smart.jar" + , testWindows = true + ) === + Array("hdfs:/path to/spark.jar", "s3:/path to/smart.jar")) assert(Utils.nonLocalPaths("D:/your.jar,hdfs:/a.jar,s3:/another.jar", testWindows = true) === Array("hdfs:/a.jar", "s3:/another.jar")) assert(Utils.nonLocalPaths("hdfs:/a.jar,s3:/another.jar,e:/our.jar", testWindows = true) === @@ -392,7 +405,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { val targetDir = new File(tempDir, "target-dir") Files.write("some text", sourceFile, UTF_8) - val path = new Path("file://" + sourceDir.getAbsolutePath) + val path = + if (SystemUtils.IS_OS_WINDOWS) { + new Path("file:/" + sourceDir.getAbsolutePath.replace("\\", "/")) + } else { + new Path("file://" + sourceDir.getAbsolutePath) + } val conf = new Configuration() val fs = Utils.getHadoopFileSystem(path.toString, conf) @@ -412,7 +430,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { val destInnerFile = new File(destInnerDir, sourceFile.getName) assert(destInnerFile.isFile()) - val filePath = new Path("file://" + sourceFile.getAbsolutePath) + val filePath = + if (SystemUtils.IS_OS_WINDOWS) { + new Path("file:/" + sourceFile.getAbsolutePath.replace("\\", "/")) + } else { + new Path("file://" + sourceFile.getAbsolutePath) + } val testFileDir = new File(tempDir, "test-filename") val testFileName = "testFName" val testFilefs = Utils.getHadoopFileSystem(filePath.toString, conf) From 649da829ce1b2ec3de6c8b2fa9c2ba9f7c3355d1 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 10 Apr 2015 01:35:59 +0900 Subject: [PATCH 2/9] Fix classpath handling --- .../src/main/scala/org/apache/spark/repl/SparkILoop.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 8dc0e0c965923..8bb97c6ed2565 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -206,7 +206,7 @@ class SparkILoop( // e.g. file:/C:/my/path.jar -> C:/my/path.jar SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } } else { - SparkILoop.getAddedJars + SparkILoop.getAddedJars.map { jar => new URI(jar).getPath} } // work around for Scala bug val totalClassPath = addedJars.foldLeft( @@ -1109,7 +1109,7 @@ object SparkILoop extends Logging { if (settings.classpath.isDefault) settings.classpath.value = sys.props("java.class.path") - getAddedJars.foreach(settings.classpath.append(_)) + getAddedJars.map(jar => new URI(jar).getPath).foreach(settings.classpath.append(_)) repl process settings } From 10f1c734282041c20307a7c2ada7308e3c8b0c5e Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 10 Apr 2015 14:29:52 +0900 Subject: [PATCH 3/9] Added a comment --- .../src/main/scala/org/apache/spark/repl/SparkILoop.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 8bb97c6ed2565..3443f1a5b89d0 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -206,6 +206,7 @@ class SparkILoop( // e.g. file:/C:/my/path.jar -> C:/my/path.jar SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } } else { + // We need new URI(jar).getPath here for the case that `jar` includes encoded white space (%20). SparkILoop.getAddedJars.map { jar => new URI(jar).getPath} } // work around for Scala bug From 016128d00a87a3846060a56e49880a543d6fa6c4 Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Fri, 17 Apr 2015 21:26:10 +0900 Subject: [PATCH 4/9] fixed to use File.toURI() --- .../scala/org/apache/spark/util/Utils.scala | 28 ++++++------------- .../org/apache/spark/util/UtilsSuite.scala | 13 ++++----- 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index a0d5f748108d3..660fc41dcd98d 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1766,27 +1766,15 @@ private[spark] object Utils extends Logging { * converted into an absolute path with a file:// scheme. */ def resolveURI(path: String, testWindows: Boolean = false): URI = { - - val windows = isWindows || testWindows - val formattedPath = formatPath(path, windows) - - val uri = new URI(formattedPath) - if (uri.getPath == null) { - throw new IllegalArgumentException(s"Given path is malformed: $uri") - } - - Option(uri.getScheme) match { - case Some(windowsDrive(d)) if windows => - new URI("file:/" + uri.toString.stripPrefix("/")) - case None => - // Preserve fragments for HDFS file name substitution (denoted by "#") - // For instance, in "abc.py#xyz.py", "xyz.py" is the name observed by the application - val fragment = uri.getFragment - val part = new File(uri.getPath).toURI - new URI(part.getScheme, part.getPath, fragment) - case Some(other) => - uri + try { + val uri = new URI(path) + if (uri.getScheme() != null) { + return uri + } + } catch { + case e: URISyntaxException => } + new File(path).getAbsoluteFile().toURI() } /** Resolve a comma-separated list of paths. */ diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 0a78189f2fb0a..2c9e914af8a5a 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -239,16 +239,15 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assertResolves("hdfs:/root/spark.jar", "hdfs:/root/spark.jar") assertResolves("hdfs:///root/spark.jar#app.jar", "hdfs:/root/spark.jar#app.jar") assertResolves("spark.jar", s"file:$cwd/spark.jar") - assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar#app.jar") + assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar%23app.jar") assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt") - assertResolves("C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("C:/path to/file.txt", "file:/C:/path%20to/file.txt", testWindows = true) + assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt", testWindows = true) assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true) - intercept[IllegalArgumentException] { Utils.resolveURI("file:foo") } - intercept[IllegalArgumentException] { Utils.resolveURI("file:foo:baby") } + assertResolves("file:foo", s"file:foo") + assertResolves("file:foo:baby", s"file:foo:baby") } test("resolveURIs with multiple paths") { @@ -268,9 +267,9 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2") assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3") assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6", - s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5,file:$cwd/path%20to/jar6") + s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4%23jar5,file:$cwd/path%20to/jar6") assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""", - s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi,file:/C:/path%20to/jar4", + s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4", testWindows = true) } From 84c33d039c8772eb7f47c277da4d36dd53f8afe7 Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Wed, 22 Apr 2015 16:44:26 +0900 Subject: [PATCH 5/9] - use resolveURI in nonLocalPaths - run tests for Windows path only on Windows --- .../scala/org/apache/spark/util/Utils.scala | 3 +-- .../org/apache/spark/util/UtilsSuite.scala | 23 +++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 660fc41dcd98d..f8367d2dc5102 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1793,8 +1793,7 @@ private[spark] object Utils extends Logging { Array.empty } else { paths.split(",").filter { p => - val formattedPath = formatPath(p, windows) - val uri = new URI(formattedPath) + val uri = resolveURI(p, windows) Option(uri.getScheme).getOrElse("file") match { case windowsDrive(d) if windows => false case "local" | "file" => false diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 2c9e914af8a5a..e672530d4c7de 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -241,8 +241,10 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assertResolves("spark.jar", s"file:$cwd/spark.jar") assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar%23app.jar") assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt") - assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt", testWindows = true) + if (SystemUtils.IS_OS_WINDOWS) { + assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true) + assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt", testWindows = true) + } assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true) @@ -268,9 +270,11 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3") assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4%23jar5,file:$cwd/path%20to/jar6") - assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""", - s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4", - testWindows = true) + if (SystemUtils.IS_OS_WINDOWS) { + assertResolves( """hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""", + s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4", + testWindows = true) + } } test("nonLocalPaths") { @@ -285,8 +289,8 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assert(Utils.nonLocalPaths("local:/spark.jar,file:/smart.jar,family.py") === Array.empty) assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar") === Array("hdfs:/spark.jar", "s3:/smart.jar")) - assert(Utils.nonLocalPaths("hdfs:/path to/spark.jar,path to/a.jar,s3:/path to/smart.jar") === - Array("hdfs:/path to/spark.jar", "s3:/path to/smart.jar")) + assert(Utils.nonLocalPaths("hdfs:/spark.jar,path to/a.jar,s3:/smart.jar") === + Array("hdfs:/spark.jar", "s3:/smart.jar")) assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar,local.py,file:/hello/pi.py") === Array("hdfs:/spark.jar", "s3:/smart.jar")) assert(Utils.nonLocalPaths("local.py,hdfs:/spark.jar,file:/hello/pi.py,s3:/smart.jar") === @@ -300,11 +304,6 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assert(Utils.nonLocalPaths("local:///C:/some/path.jar", testWindows = true) === Array.empty) assert(Utils.nonLocalPaths("hdfs:/a.jar,C:/my.jar,s3:/another.jar", testWindows = true) === Array("hdfs:/a.jar", "s3:/another.jar")) - assert(Utils.nonLocalPaths( - "hdfs:/path to/spark.jar,C:\\path to\\a.jar,s3:/path to/smart.jar" - , testWindows = true - ) === - Array("hdfs:/path to/spark.jar", "s3:/path to/smart.jar")) assert(Utils.nonLocalPaths("D:/your.jar,hdfs:/a.jar,s3:/another.jar", testWindows = true) === Array("hdfs:/a.jar", "s3:/another.jar")) assert(Utils.nonLocalPaths("hdfs:/a.jar,s3:/another.jar,e:/our.jar", testWindows = true) === From e03f2898c3bee31c5acfce1a864f81d6f0b39da1 Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Fri, 8 May 2015 20:40:48 +0900 Subject: [PATCH 6/9] removed testWindows from Utils.resolveURI and Utils.resolveURIs. replaced SystemUtils.IS_OS_WINDOWS to Utils.isWindows. removed Utils.formatPath from PythonRunner.scala. --- .../apache/spark/deploy/PythonRunner.scala | 23 +++++----- .../scala/org/apache/spark/util/Utils.scala | 8 ++-- .../spark/deploy/PythonRunnerSuite.scala | 24 +++++----- .../org/apache/spark/util/UtilsSuite.scala | 44 +++++++++---------- 4 files changed, 51 insertions(+), 48 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 724d15e31b198..b5412f20844b1 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -18,6 +18,8 @@ package org.apache.spark.deploy import java.net.URI +import java.io.File +import scala.util.Try import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConversions._ @@ -81,16 +83,13 @@ object PythonRunner { throw new IllegalArgumentException("Launching Python applications through " + s"spark-submit is currently only supported for local files: $path") } - val windows = Utils.isWindows || testWindows - var formattedPath = Utils.formatPath(path, windows) - - // Strip the URI scheme from the path - formattedPath = - new URI(formattedPath).getScheme match { - case null => formattedPath - case Utils.windowsDrive(d) if windows => formattedPath - case _ => new URI(formattedPath).getPath - } + // get path when scheme is file. + val uri = Try(new URI(path)).getOrElse(new File(path).toURI) + var formattedPath = uri.getScheme match { + case null => path + case "file" | "local" => uri.getPath + case _ => null + } // Guard against malformed paths potentially throwing NPE if (formattedPath == null) { @@ -99,7 +98,9 @@ object PythonRunner { // In Windows, the drive should not be prefixed with "/" // For instance, python does not understand "/C:/path/to/sheep.py" - formattedPath = if (windows) formattedPath.stripPrefix("/") else formattedPath + if (formattedPath.matches("/[a-zA-Z]:/.*")) { + formattedPath = formattedPath.stripPrefix("/") + } formattedPath } diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index f8367d2dc5102..d8e3d6d14ef3d 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1765,7 +1765,7 @@ private[spark] object Utils extends Logging { * If the supplied path does not contain a scheme, or is a relative path, it will be * converted into an absolute path with a file:// scheme. */ - def resolveURI(path: String, testWindows: Boolean = false): URI = { + def resolveURI(path: String): URI = { try { val uri = new URI(path) if (uri.getScheme() != null) { @@ -1778,11 +1778,11 @@ private[spark] object Utils extends Logging { } /** Resolve a comma-separated list of paths. */ - def resolveURIs(paths: String, testWindows: Boolean = false): String = { + def resolveURIs(paths: String): String = { if (paths == null || paths.trim.isEmpty) { "" } else { - paths.split(",").map { p => Utils.resolveURI(p, testWindows) }.mkString(",") + paths.split(",").map { p => Utils.resolveURI(p) }.mkString(",") } } @@ -1793,7 +1793,7 @@ private[spark] object Utils extends Logging { Array.empty } else { paths.split(",").filter { p => - val uri = resolveURI(p, windows) + val uri = resolveURI(p) Option(uri.getScheme).getOrElse("file") match { case windowsDrive(d) if windows => false case "local" | "file" => false diff --git a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala index bb6251fb4bfbe..ded049a6942f6 100644 --- a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.deploy import org.scalatest.FunSuite +import org.apache.spark.util.Utils class PythonRunnerSuite extends FunSuite { @@ -28,10 +29,13 @@ class PythonRunnerSuite extends FunSuite { assert(PythonRunner.formatPath("file:///spark.py") === "/spark.py") assert(PythonRunner.formatPath("local:/spark.py") === "/spark.py") assert(PythonRunner.formatPath("local:///spark.py") === "/spark.py") - assert(PythonRunner.formatPath("C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py") - assert(PythonRunner.formatPath("/C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py") assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py") + if (Utils.isWindows) { + assert(PythonRunner.formatPath("C:\\a\\b\\spark.py", testWindows = true) === "C:/a/b/spark.py") + assert(PythonRunner.formatPath("C:\\a b\\spark.py", testWindows = true) === + "C:/a b/spark.py") + } intercept[IllegalArgumentException] { PythonRunner.formatPath("one:two") } intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:s3:xtremeFS") } intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:/path/to/some.py") } @@ -45,14 +49,14 @@ class PythonRunnerSuite extends FunSuite { Array("/app.py", "/spark.py")) assert(PythonRunner.formatPaths("me.py,file:/you.py,local:/we.py") === Array("me.py", "/you.py", "/we.py")) - assert(PythonRunner.formatPaths("C:/a/b/spark.py", testWindows = true) === - Array("C:/a/b/spark.py")) - assert(PythonRunner.formatPaths("/C:/a/b/spark.py", testWindows = true) === - Array("C:/a/b/spark.py")) - assert(PythonRunner.formatPaths("C:/free.py,pie.py", testWindows = true) === - Array("C:/free.py", "pie.py")) - assert(PythonRunner.formatPaths("lovely.py,C:/free.py,file:/d:/fry.py", testWindows = true) === - Array("lovely.py", "C:/free.py", "d:/fry.py")) + if (Utils.isWindows) { + assert(PythonRunner.formatPaths("C:\\a\\b\\spark.py", testWindows = true) === + Array("C:/a/b/spark.py")) + assert(PythonRunner.formatPaths("C:\\free.py,pie.py", testWindows = true) === + Array("C:/free.py", "pie.py")) + assert(PythonRunner.formatPaths("lovely.py,C:\\free.py,file:/d:/fry.py", testWindows = true) === + Array("lovely.py", "C:/free.py", "d:/fry.py")) + } intercept[IllegalArgumentException] { PythonRunner.formatPaths("one:two,three") } intercept[IllegalArgumentException] { PythonRunner.formatPaths("two,three,four:five:six") } intercept[IllegalArgumentException] { PythonRunner.formatPaths("hdfs:/some.py,foo.py") } diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index e672530d4c7de..655b221451b13 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -28,7 +28,6 @@ import java.util.Locale import com.google.common.base.Charsets.UTF_8 import com.google.common.io.Files import org.scalatest.FunSuite -import org.apache.commons.lang3.SystemUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path @@ -222,58 +221,57 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { } test("resolveURI") { - def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = { + def assertResolves(before: String, after: String): Unit = { // This should test only single paths assume(before.split(",").length === 1) // Repeated invocations of resolveURI should yield the same result - def resolve(uri: String): String = Utils.resolveURI(uri, testWindows).toString + def resolve(uri: String): String = Utils.resolveURI(uri).toString assert(resolve(after) === after) assert(resolve(resolve(after)) === after) assert(resolve(resolve(resolve(after))) === after) // Also test resolveURIs with single paths - assert(new URI(Utils.resolveURIs(before, testWindows)) === new URI(after)) - assert(new URI(Utils.resolveURIs(after, testWindows)) === new URI(after)) + assert(new URI(Utils.resolveURIs(before)) === new URI(after)) + assert(new URI(Utils.resolveURIs(after)) === new URI(after)) } val rawCwd = System.getProperty("user.dir") - val cwd = if (SystemUtils.IS_OS_WINDOWS) s"/$rawCwd".replace("\\", "/") else rawCwd + val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd assertResolves("hdfs:/root/spark.jar", "hdfs:/root/spark.jar") assertResolves("hdfs:///root/spark.jar#app.jar", "hdfs:/root/spark.jar#app.jar") assertResolves("spark.jar", s"file:$cwd/spark.jar") assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar%23app.jar") assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt") - if (SystemUtils.IS_OS_WINDOWS) { - assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt", testWindows = true) + if (Utils.isWindows) { + assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt") + assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt") } - assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true) + assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt") + assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt") + assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt") assertResolves("file:foo", s"file:foo") assertResolves("file:foo:baby", s"file:foo:baby") } test("resolveURIs with multiple paths") { - def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = { + def assertResolves(before: String, after: String): Unit = { assume(before.split(",").length > 1) - assert(Utils.resolveURIs(before, testWindows) === after) - assert(Utils.resolveURIs(after, testWindows) === after) + assert(Utils.resolveURIs(before) === after) + assert(Utils.resolveURIs(after) === after) // Repeated invocations of resolveURIs should yield the same result - def resolve(uri: String): String = Utils.resolveURIs(uri, testWindows) + def resolve(uri: String): String = Utils.resolveURIs(uri) assert(resolve(after) === after) assert(resolve(resolve(after)) === after) assert(resolve(resolve(resolve(after))) === after) } val rawCwd = System.getProperty("user.dir") - val cwd = if (SystemUtils.IS_OS_WINDOWS) s"/$rawCwd".replace("\\", "/") else rawCwd + val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd assertResolves("jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2") assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2") assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3") assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4%23jar5,file:$cwd/path%20to/jar6") - if (SystemUtils.IS_OS_WINDOWS) { - assertResolves( """hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""", - s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4", - testWindows = true) + if (Utils.isWindows) { + assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""", + s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4") } } @@ -404,7 +402,7 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { Files.write("some text", sourceFile, UTF_8) val path = - if (SystemUtils.IS_OS_WINDOWS) { + if (Utils.isWindows) { new Path("file:/" + sourceDir.getAbsolutePath.replace("\\", "/")) } else { new Path("file://" + sourceDir.getAbsolutePath) @@ -429,7 +427,7 @@ class UtilsSuite extends FunSuite with ResetSystemProperties { assert(destInnerFile.isFile()) val filePath = - if (SystemUtils.IS_OS_WINDOWS) { + if (Utils.isWindows) { new Path("file:/" + sourceFile.getAbsolutePath.replace("\\", "/")) } else { new Path("file://" + sourceFile.getAbsolutePath) From 1784239d84865badbd98465506ae385f0c6875c7 Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Fri, 8 May 2015 20:43:12 +0900 Subject: [PATCH 7/9] removed Utils.formatPath. --- core/src/main/scala/org/apache/spark/util/Utils.scala | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index d8e3d6d14ef3d..faea704962be6 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1658,16 +1658,6 @@ private[spark] object Utils extends Logging { */ val windowsDrive = "([a-zA-Z])".r - /** - * Format a path such that it can be safely passed to a URI. - */ - def formatPath(path: String, windows: Boolean): String = { - val formatted = path.replace(" ", "%20") - - // In Windows, the file separator is a backslash, but this is inconsistent with the URI format - if (windows) formatted.replace("\\", "/") else formatted - } - /** * Indicates whether Spark is currently running unit tests. */ From ed460473328a4d8972e12c37acdf07ec5fd6b86e Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Mon, 11 May 2015 10:58:17 +0900 Subject: [PATCH 8/9] avoid scalastyle errors. --- .../scala/org/apache/spark/deploy/PythonRunnerSuite.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala index ded049a6942f6..40000ee7101b9 100644 --- a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala @@ -32,7 +32,8 @@ class PythonRunnerSuite extends FunSuite { assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py") if (Utils.isWindows) { - assert(PythonRunner.formatPath("C:\\a\\b\\spark.py", testWindows = true) === "C:/a/b/spark.py") + assert(PythonRunner.formatPath("C:\\a\\b\\spark.py", testWindows = true) === + "C:/a/b/spark.py") assert(PythonRunner.formatPath("C:\\a b\\spark.py", testWindows = true) === "C:/a b/spark.py") } @@ -54,7 +55,8 @@ class PythonRunnerSuite extends FunSuite { Array("C:/a/b/spark.py")) assert(PythonRunner.formatPaths("C:\\free.py,pie.py", testWindows = true) === Array("C:/free.py", "pie.py")) - assert(PythonRunner.formatPaths("lovely.py,C:\\free.py,file:/d:/fry.py", testWindows = true) === + assert(PythonRunner.formatPaths("lovely.py,C:\\free.py,file:/d:/fry.py", + testWindows = true) === Array("lovely.py", "C:/free.py", "d:/fry.py")) } intercept[IllegalArgumentException] { PythonRunner.formatPaths("one:two,three") } From 3f9a18841ed764d9aac001a927aa4196e46705b3 Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Tue, 12 May 2015 09:13:31 -0700 Subject: [PATCH 9/9] modified some errors. --- .../main/scala/org/apache/spark/deploy/PythonRunner.scala | 4 ++-- .../scala/org/apache/spark/deploy/PythonRunnerSuite.scala | 5 +++-- .../src/main/scala/org/apache/spark/repl/SparkILoop.scala | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index b5412f20844b1..c2ed43a5397d6 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -19,10 +19,10 @@ package org.apache.spark.deploy import java.net.URI import java.io.File -import scala.util.Try import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConversions._ +import scala.util.Try import org.apache.spark.api.python.PythonUtils import org.apache.spark.util.{RedirectThread, Utils} @@ -98,7 +98,7 @@ object PythonRunner { // In Windows, the drive should not be prefixed with "/" // For instance, python does not understand "/C:/path/to/sheep.py" - if (formattedPath.matches("/[a-zA-Z]:/.*")) { + if (Utils.isWindows && formattedPath.matches("/[a-zA-Z]:/.*")) { formattedPath = formattedPath.stripPrefix("/") } formattedPath diff --git a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala index 40000ee7101b9..80f2cc02516fe 100644 --- a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.deploy import org.scalatest.FunSuite + import org.apache.spark.util.Utils class PythonRunnerSuite extends FunSuite { @@ -29,9 +30,9 @@ class PythonRunnerSuite extends FunSuite { assert(PythonRunner.formatPath("file:///spark.py") === "/spark.py") assert(PythonRunner.formatPath("local:/spark.py") === "/spark.py") assert(PythonRunner.formatPath("local:///spark.py") === "/spark.py") - assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) === - "C:/a/b/spark.py") if (Utils.isWindows) { + assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) === + "C:/a/b/spark.py") assert(PythonRunner.formatPath("C:\\a\\b\\spark.py", testWindows = true) === "C:/a/b/spark.py") assert(PythonRunner.formatPath("C:\\a b\\spark.py", testWindows = true) === diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 3443f1a5b89d0..c806e9c82d067 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -207,7 +207,7 @@ class SparkILoop( SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } } else { // We need new URI(jar).getPath here for the case that `jar` includes encoded white space (%20). - SparkILoop.getAddedJars.map { jar => new URI(jar).getPath} + SparkILoop.getAddedJars.map { jar => new URI(jar).getPath } } // work around for Scala bug val totalClassPath = addedJars.foldLeft(