Skip to content

Commit 9f31dac

Browse files
committed
update code and add comments
1 parent f72987c commit 9f31dac

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,12 @@ object SparkSubmit {
328328
}
329329
}
330330

331-
if (args.isPython && System.getenv("PYSPARK_ARCHIVES_PATH") != null) {
332-
args.files = mergeFileLists(args.files, System.getenv("PYSPARK_ARCHIVES_PATH"))
331+
// In yarn mode for a python app, if PYSPARK_ARCHIVES_PATH is in the user environment
332+
// add pyspark archives to files that can be distributed with the job
333+
if (args.isPython && clusterManager == YARN){
334+
sys.env.get("PYSPARK_ARCHIVES_PATH").map { archives =>
335+
args.files = mergeFileLists(args.files, Utils.resolveURIs(archives))
336+
}
333337
}
334338

335339
// If we're running a R app, set the main class to our specific R runner

yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,10 +326,15 @@ private[spark] class Client(
326326
distCacheMgr.setDistFilesEnv(env)
327327
distCacheMgr.setDistArchivesEnv(env)
328328

329-
if (System.getenv("PYSPARK_ARCHIVES_PATH") != null) {
330-
val pythonPath = System.getenv("PYSPARK_ARCHIVES_PATH").split(",").map(
329+
// If PYSPARK_ARCHIVES_PATH is in the user environment, set PYTHONPATH to be passed
330+
// on to the ApplicationMaster and the executors.
331+
sys.env.get("PYSPARK_ARCHIVES_PATH").map { archives =>
332+
// archives will be distributed to each machine's working directory, so strip the
333+
// path prefix
334+
val pythonPath = archives.split(",").map(
331335
p => (new Path(p)).getName).mkString(":")
332336
env("PYTHONPATH") = pythonPath
337+
sparkConf.setExecutorEnv("PYTHONPATH", pythonPath)
333338
}
334339

335340
// Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*

yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,10 +285,6 @@ class ExecutorRunnable(
285285
YarnSparkHadoopUtil.addPathToEnvironment(env, key, value)
286286
}
287287

288-
if (System.getenv("PYTHONPATH") != null) {
289-
env("PYTHONPATH") = System.getenv("PYTHONPATH")
290-
}
291-
292288
// Keep this for backwards compatibility but users should move to the config
293289
sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
294290
YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)

0 commit comments

Comments
 (0)