diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index acbaba6791850..1c92af45f869a 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -35,6 +35,10 @@ private[spark] object PythonUtils { pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.2.1-src.zip").mkString(File.separator) } pythonPath ++= SparkContext.jarOfObject(this) + sys.env.get("PYSPARK_ARCHIVES_PATH") match { + case Some(path) => pythonPath += path + case None => // do nothing + } pythonPath.mkString(File.pathSeparator) } diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index cdc3ad51492e7..c1effd3c8a718 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -371,11 +371,6 @@ private[spark] class Client( env(ENV_DIST_CLASSPATH) = dcp } - sys.env.get("PYTHONPATH") match { - case Some(pythonPath) => env("PYTHONPATH") = pythonPath - case None => // do nothing - } - env } diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala index 3e23cb8e616f1..6c1101d8ac42e 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala @@ -75,7 +75,12 @@ class ExecutorRunnable( val localResources = prepareLocalResources ctx.setLocalResources(localResources) - + // From SPARK-1920 and SPARK-1520 we know PySpark on Yarn can not work when the assembly jar are + // package by JDK 1.7+, so we ship PySpark archives to executors by Yarn with --py-files, and + // add this path to PYTHONPATH. + for ((k, v) <- localResources if k.contains("spark-pyspark")) { + env("PYSPARK_ARCHIVES_PATH") = k + } ctx.setEnvironment(env) val credentials = UserGroupInformation.getCurrentUser().getCredentials() @@ -299,12 +304,6 @@ class ExecutorRunnable( } System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k, v) => env(k) = v } - - sys.env.get("PYTHONPATH") match { - case Some(pythonPath) => env("PYTHONPATH") = pythonPath - case None => // do nothing - } - env } }