Skip to content

Commit 30b6606

Browse files
authored
[SPARK-436] Remove SPARK_JAVA_OPTS from CLI (apache#129)
* [SPARK-436] remove SPARK_JAVA_OPTS from CLI * use --conf in tests * add spark.mesos.uris at the beginning of spark-submit, not the end
1 parent 32bd9df commit 30b6606

File tree

6 files changed

+76
-58
lines changed

6 files changed

+76
-58
lines changed

dispatcher/cli/dcos_spark/spark_submit.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,22 @@ def show_help():
154154
return 0
155155

156156

157-
def submit_job(dispatcher, args, docker_image, verbose=False):
158-
(props, args) = partition(args.split(" "), lambda a: a.startswith("-D"))
159-
160-
props = props + ["-Dspark.mesos.executor.docker.image=" + docker_image]
157+
def submit_job(dispatcher, submit_args, docker_image, verbose=False):
158+
"""
159+
Run spark-submit.
160+
161+
:param dispatcher: Spark Dispatcher URL. Used to construct --master.
162+
:type dispatcher: string
163+
:param args: --submit-args value from `dcos spark run`
164+
:type args: string
165+
:param docker_image: Docker image to run the driver and executors in.
166+
:type docker_image: string
167+
:param verbose: If true, prints verbose information to stdout.
168+
:type verbose: boolean
169+
"""
170+
args = ["--conf",
171+
"spark.mesos.executor.docker.image={}".format(docker_image)] + \
172+
submit_args.split()
161173

162174
hdfs_url = _get_spark_hdfs_url()
163175
if hdfs_url is not None:
@@ -166,10 +178,12 @@ def submit_job(dispatcher, args, docker_image, verbose=False):
166178
hdfs_url += '/'
167179
hdfs_config_url = urllib.parse.urljoin(hdfs_url, 'hdfs-site.xml')
168180
site_config_url = urllib.parse.urljoin(hdfs_url, 'core-site.xml')
169-
props = props + ["-Dspark.mesos.uris={0},{1}".format(hdfs_config_url,
170-
site_config_url)]
181+
args = ["--conf", "spark.mesos.uris={0},{1}".format(
182+
hdfs_config_url,
183+
site_config_url)] + \
184+
args
171185

172-
response = run(dispatcher, args, verbose, props)
186+
response = run(dispatcher, args, verbose)
173187
if response[0] is not None:
174188
print("Run job succeeded. Submission id: " +
175189
response[0]['submissionId'])
@@ -266,13 +280,16 @@ def check_java():
266280
return False
267281

268282

269-
def run(dispatcher, args, verbose, props=[]):
283+
def run(dispatcher, args, verbose):
270284
"""
271-
This method runs spark_submit with the passed in parameters.
272-
ie: ./bin/spark-submit --deploy-mode cluster --class
273-
org.apache.spark.examples.SparkPi --master mesos://10.127.131.174:8077
274-
--executor-memory 1G --total-executor-cores 100 --driver-memory 1G
275-
http://10.127.131.174:8000/spark-examples_2.10-1.3.0-SNAPSHOT.jar 30
285+
Runs spark-submit.
286+
287+
:param dispatcher: Spark Dispatcher URL. Used to construct --master.
288+
:type dispatcher: string
289+
:param args: Extra arguments to spark-submit
290+
:type args: list[string]
291+
:param verbose: If true, prints verbose information to stdout.
292+
:type verbose: boolean
276293
"""
277294
if not check_java():
278295
return (None, 1)
@@ -285,11 +302,12 @@ def run(dispatcher, args, verbose, props=[]):
285302

286303
command = _get_command(dispatcher, args)
287304

288-
extra_env = {"SPARK_JAVA_OPTS": ' '.join(props)}
289-
env = dict(os.environ, **extra_env)
290-
# On Windows python 2 complains about unicode in env
291-
if util.is_windows_platform() and sys.version_info[0] < 3:
292-
env = dict([str(key), str(value)] for key, value in env.iteritems())
305+
# On Windows, python 2 complains about unicode in env.
306+
env = dict([str(key), str(value)]
307+
for key, value in os.environ.iteritems()) \
308+
if util.is_windows_platform() and sys.version_info[0] < 3 \
309+
else os.environ
310+
293311
process = subprocess.Popen(
294312
command,
295313
env=env,
@@ -302,9 +320,8 @@ def run(dispatcher, args, verbose, props=[]):
302320
proxy_thread.proxy.shutdown()
303321
proxy_thread.join()
304322

305-
if verbose is True:
323+
if verbose:
306324
print("Ran command: " + " ".join(command))
307-
print("With added env vars: {0}".format(extra_env))
308325
print("Stdout:")
309326
print(stdout)
310327
print("Stderr:")

docs/history-server.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ your cluster][10] and run:
4545

4646
1. Run jobs with the event log enabled:
4747

48-
$ dcos spark run --submit-args="-Dspark.eventLog.enabled=true -Dspark.eventLog.dir=hdfs://hdfs/history ... --class MySampleClass http://external.website/mysparkapp.jar"
48+
$ dcos spark run --submit-args="--conf spark.eventLog.enabled=true --conf spark.eventLog.dir=hdfs://hdfs/history ... --class MySampleClass http://external.website/mysparkapp.jar"
4949

5050
1. Visit your job in the dispatcher at
5151
`http://<dcos_url>/service/spark/`. It will include a link to the

docs/run-job.md

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,15 @@ you can create a configuration file.
4343
## Submission
4444

4545
All properties are submitted through the `--submit-args` option to
46-
`dcos spark run`. These are ultimately passed to the [`spark-submit`
47-
script][13].
46+
`dcos spark run`. These are ultimately passed to the
47+
[`spark-submit` script][13]. View `dcos spark run --help` for a list
48+
of all these options. Example
4849

4950
Certain common properties have their own special names. You can view
5051
these through `dcos spark run --help`. Here is an example of using
5152
`--supervise`:
5253

53-
$ dcos spark run --submit-args="--supervise --class MySampleClass http://external.website/mysparkapp.jar 30`
54-
55-
Or you can set arbitrary properties as java system properties by using
56-
`-D<prop>=<value>`:
57-
58-
$ dcos spark run --submit-args="-Dspark.executor.memory=4g --class MySampleClass http://external.website/mysparkapp.jar 30`
54+
$ dcos spark run --submit-args="--conf spark.executor.memory=4g --supervise --class MySampleClass http://external.website/mysparkapp.jar 30`
5955

6056
## Configuration file
6157

docs/security.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,29 +31,29 @@ Follow these instructions to authenticate in strict mode:
3131
1. Assign Permissions
3232

3333
First, allow Spark to run tasks as root:
34-
34+
3535
```
3636
$ curl -k -L -X PUT \
3737
-H "Authorization: token=$(dcos config show core.dcos_acs_token)" \
3838
"$(dcos config show core.dcos_url)/acs/api/v1/acls/dcos:mesos:master:task:user:root" \
3939
-d '{"description":"Allows root to execute tasks"}' \
4040
-H 'Content-Type: application/json'
41-
41+
4242
$ curl -k -L -X PUT \
4343
-H "Authorization: token=$(dcos config show core.dcos_acs_token)" \
4444
"$(dcos config show core.dcos_url)/acs/api/v1/acls/dcos:mesos:master:task:user:root/users/${SERVICE_ACCOUNT_NAME}/create"
4545
```
4646
4747
Now, you must allow Spark to register under the desired role. This is the value used for `service.role` when installing Spark (default: `*`):
48-
48+
4949
```
5050
$ export ROLE=<service.role value>
5151
$ curl -k -L -X PUT \
5252
-H "Authorization: token=$(dcos config show core.dcos_acs_token)" \
5353
"$(dcos config show core.dcos_url)/acs/api/v1/acls/dcos:mesos:master:framework:role:${ROLE}" \
5454
-d '{"description":"Allows ${ROLE} to register as a framework with the Mesos master"}' \
5555
-H 'Content-Type: application/json'
56-
56+
5757
$ curl -k -L -X PUT \
5858
-H "Authorization: token=$(dcos config show core.dcos_acs_token)" \
5959
"$(dcos config show core.dcos_url)/acs/api/v1/acls/dcos:mesos:master:framework:role:${ROLE}/users/${SERVICE_ACCOUNT_NAME}/create"
@@ -86,11 +86,11 @@ Follow these instructions to authenticate in strict mode:
8686
1. Submit a Job
8787
8888
We've now installed the Spark Dispatcher, which is authenticating itself to the Mesos master. Spark jobs are also frameworks that must authenticate. The dispatcher will pass the secret along to the jobs, so all that's left to do is configure our jobs to use DC/OS authentication:
89-
89+
9090
```
91-
$ PROPS="-Dspark.mesos.driverEnv.MESOS_MODULES=file:///opt/mesosphere/etc/mesos-scheduler-modules/dcos_authenticatee_module.json "
92-
$ PROPS+="-Dspark.mesos.driverEnv.MESOS_AUTHENTICATEE=com_mesosphere_dcos_ClassicRPCAuthenticatee "
93-
$ PROPS+="-Dspark.mesos.principal=<principal>"
91+
$ PROPS="--conf spark.mesos.driverEnv.MESOS_MODULES=file:///opt/mesosphere/etc/mesos-scheduler-modules/dcos_authenticatee_module.json "
92+
$ PROPS+="--conf spark.mesos.driverEnv.MESOS_AUTHENTICATEE=com_mesosphere_dcos_ClassicRPCAuthenticatee "
93+
$ PROPS+="--conf spark.mesos.principal=<principal>"
9494
$ dcos spark run --submit-args="${PROPS} ..."
9595
```
9696

docs/troubleshooting.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ the `--verbose` flag.
3939
To debug authentication in a Spark job, enable Java security debug
4040
output:
4141

42-
$ dcos spark run --submit-args="-Dsun.security.krb5.debug=true..."
42+
$ dcos spark run --submit-args="--conf sun.security.krb5.debug=true..."

tests/test.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_jar():
5454
_run_tests(jar_url,
5555
spark_job_runner_args,
5656
"All tests passed",
57-
{"--class": 'com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner'})
57+
["--class", 'com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner'])
5858

5959

6060
def test_teragen():
@@ -63,7 +63,7 @@ def test_teragen():
6363
_run_tests(jar_url,
6464
"1g hdfs:///terasort_in",
6565
"Number of records written",
66-
{"--class": "com.github.ehiggs.spark.terasort.TeraGen"})
66+
["--class", "com.github.ehiggs.spark.terasort.TeraGen"])
6767

6868

6969
def test_python():
@@ -74,7 +74,7 @@ def test_python():
7474
_run_tests(python_script_url,
7575
"30",
7676
"Pi is roughly 3",
77-
{"--py-files": py_file_url})
77+
["--py-files", py_file_url])
7878

7979

8080
@pytest.mark.skip(reason="must be run manually against a kerberized HDFS")
@@ -94,8 +94,10 @@ def test_kerberos():
9494
"http://infinity-artifacts.s3.amazonaws.com/spark/sparkjob-assembly-1.0.jar",
9595
"hdfs:///krb5.conf",
9696
"number of words in",
97-
{"--class": "HDFSWordCount", "--principal": principal, "--keytab": keytab},
98-
{"sun.security.krb5.debug": "true"})
97+
["--class", "HDFSWordCount",
98+
"--principal", principal,
99+
"--keytab", keytab,
100+
"--conf", "sun.security.krb5.debug=true"])
99101

100102

101103
def test_r():
@@ -111,8 +113,8 @@ def test_cni():
111113
_run_tests(SPARK_EXAMPLES,
112114
"",
113115
"Pi is roughly 3",
114-
{"--class": "org.apache.spark.examples.SparkPi"},
115-
{"spark.mesos.network.name": "dcos"})
116+
["--conf", "spark.mesos.network.name=dcos",
117+
"--class", "org.apache.spark.examples.SparkPi"])
116118

117119

118120
def test_s3():
@@ -123,18 +125,23 @@ def test_s3():
123125
s3.s3n_url('linecount.txt'),
124126
s3.s3n_url("linecount-out"))
125127

128+
args = ["--conf",
129+
"spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
130+
os.environ["AWS_ACCESS_KEY_ID"]),
131+
"--conf",
132+
"spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
133+
os.environ["AWS_SECRET_ACCESS_KEY"]),
134+
"--class", "S3Job"]
126135
_run_tests(_upload_file(os.environ["SCALA_TEST_JAR_PATH"]),
127136
app_args,
128137
"",
129-
{"--class": "S3Job"},
130-
{"spark.mesos.driverEnv.AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"],
131-
"spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"]})
138+
args)
132139

133140
assert len(list(s3.list("linecount-out"))) > 0
134141

135142

136143
def _hdfs_enabled():
137-
return os.environ.get("HDFS_DISABLED") is None
144+
return os.environ.get("HDFS_ENABLED") != "false"
138145

139146

140147
def _require_hdfs():
@@ -248,30 +255,28 @@ def _run_janitor(service_name):
248255
auth=shakedown.dcos_acs_token()))
249256

250257

251-
def _run_tests(app_url, app_args, expected_output, args={}, config={}):
252-
task_id = _submit_job(app_url, app_args, args, config)
258+
def _run_tests(app_url, app_args, expected_output, args=[]):
259+
task_id = _submit_job(app_url, app_args, args)
253260
LOGGER.info('Waiting for task id={} to complete'.format(task_id))
254261
shakedown.wait_for_task_completion(task_id)
255262
log = _task_log(task_id)
256263
LOGGER.info("task log: {}".format(log))
257264
assert expected_output in log
258265

259266

260-
def _submit_job(app_url, app_args, args={}, config={}):
267+
def _submit_job(app_url, app_args, args=[]):
261268
if _is_strict():
262269
config['spark.mesos.driverEnv.MESOS_MODULES'] = \
263270
'file:///opt/mesosphere/etc/mesos-scheduler-modules/dcos_authenticatee_module.json'
264271
config['spark.mesos.driverEnv.MESOS_AUTHENTICATEE'] = 'com_mesosphere_dcos_ClassicRPCAuthenticatee'
265272
config['spark.mesos.principal'] = 'service-acct'
266-
args_str = ' '.join('{0} {1}'.format(k, v) for k,v in args.items())
267-
config_str = ' '.join('-D{0}={1}'.format(k, v) for k,v in config.items())
268-
submit_args = ' '.join(
269-
arg for arg in
270-
["-Dspark.driver.memory=2g", args_str, app_url, app_args, config_str]
271-
if arg != "")
273+
args_str = ' '.join(args + ["--conf", "spark.driver.memory=2g"])
274+
submit_args = ' '.join([args_str, app_url, app_args])
272275
cmd = 'dcos --log-level=DEBUG spark --verbose run --submit-args="{0}"'.format(submit_args)
276+
273277
LOGGER.info("Running {}".format(cmd))
274278
stdout = subprocess.check_output(cmd, shell=True).decode('utf-8')
279+
275280
LOGGER.info("stdout: {}".format(stdout))
276281

277282
regex = r"Submission id: (\S+)"

0 commit comments

Comments
 (0)