Skip to content

Commit 4533df2

Browse files
committed
Python arguments patch + tests + docs
1 parent e103225 commit 4533df2

File tree

6 files changed

+93
-27
lines changed

6 files changed

+93
-27
lines changed

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ private[spark] object ClientArguments {
4747
mainAppResource = Some(PythonMainAppResource(mainPyFile))
4848
case Array("--primary-java-resource", primaryJavaResource: String) =>
4949
mainAppResource = Some(JavaMainAppResource(primaryJavaResource))
50-
case Array("--main-class", clazz: String) =>
51-
mainClass = Some(clazz)
50+
case Array("--main-class", m_class: String) =>
51+
mainClass = Some(m_class)
5252
case Array("--other-py-files", pyFiles: String) =>
5353
otherPyFiles = pyFiles.split(",")
5454
case Array("--arg", arg: String) =>
@@ -77,7 +77,13 @@ private[spark] class Client(
7777

7878
private val driverJavaOptions = submissionSparkConf.get(
7979
org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS)
80-
80+
/**
81+
* Run command that initalizes a DriverSpec that will be updated
82+
* after each KubernetesSubmissionStep in the sequence that is passed in.
83+
* The final driver-spec will be used to build the Driver Container,
84+
* Driver Pod, and Kubernetes Resources
85+
*
86+
*/
8187
def run(): Unit = {
8288
var currentDriverSpec = new KubernetesDriverSpec(
8389
driverPod = new PodBuilder().build(),
@@ -146,6 +152,8 @@ private[spark] object Client {
146152
val appName = sparkConf.getOption("spark.app.name").getOrElse("spark")
147153
val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}"
148154
val master = resolveK8sMaster(sparkConf.get("spark.master"))
155+
// This orchestrator determines which steps are necessary to take to resolve varying
156+
// client arguments that are passed in. Use cases include: Scala/Java and Python submission
149157
val submissionStepsOrchestrator = new KubernetesSubmissionStepsOrchestrator(
150158
namespace,
151159
kubernetesAppId,
@@ -177,7 +185,13 @@ private[spark] object Client {
177185
loggingPodStatusWatcher).run()
178186
}
179187
}
180-
188+
/**
189+
* Entry point from SparkSubmit in spark-core
190+
*
191+
*
192+
* @param args Array of strings that have interchanging values that will be
193+
* parsed by ClientArguments with the identifiers that preceed the values
194+
*/
181195
def main(args: Array[String]): Unit = {
182196
val parsedArguments = ClientArguments.fromCommandLineArgs(args)
183197
val sparkConf = new SparkConf()

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesSubmissionStepsOrchestrator.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import org.apache.spark.SparkConf
2020
import org.apache.spark.deploy.kubernetes.ConfigurationUtils
2121
import org.apache.spark.deploy.kubernetes.config._
2222
import org.apache.spark.deploy.kubernetes.constants._
23-
import org.apache.spark.deploy.kubernetes.submit.submitsteps.{BaseSubmissionStep, DependencyResolutionStep, DriverKubernetesCredentialsStep, InitContainerBootstrapStep, KubernetesSubmissionStep, PythonStep}
23+
import org.apache.spark.deploy.kubernetes.submit.submitsteps._
2424
import org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer.InitContainerStepsOrchestrator
2525
import org.apache.spark.launcher.SparkLauncher
2626
import org.apache.spark.util.Utils
@@ -91,10 +91,10 @@ private[spark] class KubernetesSubmissionStepsOrchestrator(
9191
submissionSparkConf)
9292
val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep(
9393
submissionSparkConf, kubernetesAppId)
94-
val pythonStep = mainAppResource match {
94+
val pythonResolverStep = mainAppResource match {
9595
case PythonMainAppResource(mainPyResource) =>
96-
Option(new PythonStep(mainPyResource, additionalPythonFiles, filesDownloadPath))
97-
case _ => Option.empty[KubernetesSubmissionStep]
96+
Option(new PythonStep(mainPyResource, additionalPythonFiles, appArgs, filesDownloadPath))
97+
case _ => Option(new NonPythonArgumentResolver(appArgs))
9898
}
9999
val initContainerBootstrapStep = if ((sparkJars ++ sparkFiles).exists { uri =>
100100
Option(Utils.resolveURI(uri).getScheme).getOrElse("file") != "local"
@@ -130,6 +130,6 @@ private[spark] class KubernetesSubmissionStepsOrchestrator(
130130
kubernetesCredentialsStep,
131131
dependencyResolutionStep) ++
132132
initContainerBootstrapStep.toSeq ++
133-
pythonStep.toSeq
133+
pythonResolverStep.toSeq
134134
}
135135
}

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseSubmissionStep.scala

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,6 @@ private[spark] class BaseSubmissionStep(
8989
.withName(ENV_DRIVER_MAIN_CLASS)
9090
.withValue(mainClass)
9191
.endEnv()
92-
.addNewEnv()
93-
.withName(ENV_DRIVER_ARGS)
94-
.withValue(appArgs.mkString(" "))
95-
.endEnv()
9692
.withNewResources()
9793
.addToRequests("cpu", driverCpuQuantity)
9894
.addToRequests("memory", driverMemoryQuantity)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.spark.deploy.kubernetes.submit.submitsteps
18+
19+
import io.fabric8.kubernetes.api.model.ContainerBuilder
20+
21+
import org.apache.spark.deploy.kubernetes.constants._
22+
import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils
23+
24+
private[spark] class NonPythonArgumentResolver(
25+
appArgs: Array[String]) extends KubernetesSubmissionStep {
26+
27+
override def prepareSubmission(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
28+
val withNonPythonArgumentsResolvedContainer = new ContainerBuilder(driverSpec.driverContainer)
29+
.addNewEnv()
30+
.withName(ENV_DRIVER_ARGS)
31+
.withValue(appArgs.mkString(" "))
32+
.endEnv()
33+
driverSpec.copy(driverContainer = withNonPythonArgumentsResolvedContainer.build())
34+
}
35+
}

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,32 @@
1616
*/
1717
package org.apache.spark.deploy.kubernetes.submit.submitsteps
1818

19-
import io.fabric8.kubernetes.api.model.ContainerBuilder
19+
import org.apache.spark.internal.Logging
2020

21+
import io.fabric8.kubernetes.api.model.ContainerBuilder
2122
import org.apache.spark.deploy.kubernetes.constants._
2223
import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils
2324

25+
2426
private[spark] class PythonStep(
2527
primaryPyFile: String,
2628
otherPyFiles: Seq[String],
29+
appArgs: Array[String],
2730
filesDownloadPath: String) extends KubernetesSubmissionStep {
2831

2932
override def prepareSubmission(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
33+
val arguments : Array[String] = otherPyFiles.toList match {
34+
case Nil => null +: appArgs
35+
case a :: b => a match {
36+
case _ if a == "" && b == Nil => null +: appArgs
37+
case _ => appArgs
38+
}
39+
}
3040
val withPythonPrimaryFileContainer = new ContainerBuilder(driverSpec.driverContainer)
41+
.addNewEnv()
42+
.withName(ENV_DRIVER_ARGS)
43+
.withValue(arguments.mkString(" "))
44+
.endEnv()
3145
.addNewEnv()
3246
.withName(ENV_PYSPARK_PRIMARY)
3347
.withValue(KubernetesFileUtils.resolveFilePath(primaryPyFile, filesDownloadPath))

resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter {
7272
kubernetesTestComponents.deleteNamespace()
7373
}
7474

75-
test("Run PySpark Job on file from SUBMITTER") {
75+
test("Run PySpark Job on file from SUBMITTER with --py-files") {
7676
assume(testBackend.name == MINIKUBE_TEST_BACKEND)
7777

7878
launchStagingServer(SSLOptions(), None)
@@ -82,7 +82,10 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter {
8282
.set(EXECUTOR_DOCKER_IMAGE,
8383
System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest"))
8484

85-
runPySparkPiAndVerifyCompletion(PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION, Seq.empty[String])
85+
runPySparkPiAndVerifyCompletion(
86+
PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION,
87+
Seq(PYSPARK_SORT_CONTAINER_LOCAL_FILE_LOCATION)
88+
)
8689
}
8790

8891
test("Run PySpark Job on file from CONTAINER with spark.jar defined") {
@@ -154,7 +157,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter {
154157
runSparkApplicationAndVerifyCompletion(
155158
JavaMainAppResource(SUBMITTER_LOCAL_MAIN_APP_RESOURCE),
156159
GROUP_BY_MAIN_CLASS,
157-
"The Result is",
160+
Array("The Result is"),
158161
Array.empty[String],
159162
Seq.empty[String])
160163
}
@@ -218,7 +221,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter {
218221
runSparkApplicationAndVerifyCompletion(
219222
JavaMainAppResource(SUBMITTER_LOCAL_MAIN_APP_RESOURCE),
220223
FILE_EXISTENCE_MAIN_CLASS,
221-
s"File found at /opt/spark/${testExistenceFile.getName} with correct contents.",
224+
Array(s"File found at /opt/spark/${testExistenceFile.getName} with correct contents."),
222225
Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS),
223226
Seq.empty[String])
224227
}
@@ -250,7 +253,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter {
250253
runSparkApplicationAndVerifyCompletion(
251254
JavaMainAppResource(appResource),
252255
SPARK_PI_MAIN_CLASS,
253-
"Pi is roughly 3",
256+
Array("Pi is roughly 3"),
254257
Array.empty[String],
255258
Seq.empty[String])
256259
}
@@ -260,15 +263,15 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter {
260263
runSparkApplicationAndVerifyCompletion(
261264
PythonMainAppResource(appResource),
262265
PYSPARK_PI_MAIN_CLASS,
263-
"Pi is roughly 3",
264-
Array("5"),
266+
Array("(10/10)", "Pi is roughly 3"),
267+
Array("10"),
265268
otherPyFiles)
266269
}
267270

268271
private def runSparkApplicationAndVerifyCompletion(
269272
appResource: MainAppResource,
270273
mainClass: String,
271-
expectedLogOnCompletion: String,
274+
expectedLogOnCompletion: Array[String],
272275
appArgs: Array[String],
273276
otherPyFiles: Seq[String]): Unit = {
274277
val clientArguments = ClientArguments(
@@ -284,11 +287,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter {
284287
.getItems
285288
.get(0)
286289
Eventually.eventually(TIMEOUT, INTERVAL) {
287-
assert(kubernetesTestComponents.kubernetesClient
288-
.pods()
289-
.withName(driverPod.getMetadata.getName)
290-
.getLog
291-
.contains(expectedLogOnCompletion), "The application did not complete.")
290+
expectedLogOnCompletion.foreach { e =>
291+
assert(kubernetesTestComponents.kubernetesClient
292+
.pods()
293+
.withName(driverPod.getMetadata.getName)
294+
.getLog
295+
.contains(e), "The application did not complete.")
296+
}
292297
}
293298
}
294299

@@ -357,6 +362,8 @@ private[spark] object KubernetesSuite {
357362
val PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION =
358363
"local:///opt/spark/examples/src/main/python/pi.py"
359364
val PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION = "src/test/python/pi.py"
365+
val PYSPARK_SORT_CONTAINER_LOCAL_FILE_LOCATION =
366+
"local:///opt/spark/examples/src/main/python/sort.py"
360367
val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" +
361368
".integrationtest.jobs.FileExistenceTest"
362369
val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" +

0 commit comments

Comments
 (0)