@@ -60,11 +60,14 @@ def rm_r(path):
60
60
61
61
def run_cmd (cmd ):
62
62
"""Given a command as a list of arguments will attempt to execute the
63
- command and, on failure, print an error message"""
63
+ command from the determined SPARK_HOME directory and, on failure, print
64
+ an error message"""
64
65
65
66
if not isinstance (cmd , list ):
66
67
cmd = cmd .split ()
67
68
try :
69
+ # prepend SPARK_HOME onto the first element of the command
70
+ cmd [0 ] = os .path .join (SPARK_HOME , * filter (lambda x : x , cmd [0 ].split (os .path .sep )))
68
71
subprocess .check_call (cmd )
69
72
except subprocess .CalledProcessError as e :
70
73
exit_from_command_with_retcode (e .cmd , e .returncode )
@@ -194,9 +197,8 @@ def exec_sbt(sbt_args=[]):
194
197
195
198
196
199
def get_hadoop_profiles (hadoop_version ):
197
- """Return a list of profiles indicating which Hadoop version to use from a Hadoop version tag."""
198
-
199
- #amplab_jenkins_build_profile = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE")
200
+ """Return a list of profiles indicating which Hadoop version to use from
201
+ a Hadoop version tag."""
200
202
201
203
sbt_maven_hadoop_profiles = {
202
204
"hadoop1.0" : ["-Phadoop-1" , "-Dhadoop.version=1.0.4" ],
@@ -224,11 +226,14 @@ def get_build_profiles(hadoop_version="hadoop2.3",
224
226
base_profiles = ["-Pkinesis-asl" ]
225
227
hive_profiles = ["-Phive" , "-Phive-thriftserver" ]
226
228
hadoop_profiles = get_hadoop_profiles (hadoop_version )
227
-
229
+
230
+ build_profiles = hadoop_profiles
228
231
# first, check and add the base profiles
229
- if base_profiles : build_profiles = build_profile + base_profiles
232
+ if base_profiles :
233
+ build_profiles = build_profiles + base_profiles
230
234
# second, check and add the hive profiles
231
- if hive_profiles : build_profiles = build_profile + hive_profiles
235
+ if hive_profiles :
236
+ build_profiles = build_profiles + hive_profiles
232
237
233
238
return build_profiles
234
239
@@ -238,7 +243,7 @@ def build_spark_maven(hadoop_version):
238
243
mvn_goals = ["clean" , "package" , "-DskipTests" ]
239
244
profiles_and_goals = build_profiles + mvn_goals
240
245
241
- print "[info] Building Spark (w/Hive 0.13.1) with these arguments:" ,
246
+ print "[info] Building Spark (w/Hive 0.13.1) using Maven with these arguments:" ,
242
247
print " " .join (profiles_and_goals )
243
248
244
249
exec_maven (profiles_and_goals )
@@ -251,7 +256,7 @@ def build_spark_sbt(hadoop_version):
251
256
"streaming-kafka-assembly/assembly" ]
252
257
profiles_and_goals = build_profiles + sbt_goals
253
258
254
- print "[info] Building Spark (w/Hive 0.13.1) with these arguments:" ,
259
+ print "[info] Building Spark (w/Hive 0.13.1) using SBT with these arguments:" ,
255
260
print " " .join (profiles_and_goals )
256
261
257
262
exec_sbt (profiles_and_goals )
@@ -296,9 +301,31 @@ def determine_test_modules(test_env):
296
301
297
302
# find any sql files
298
303
sql_files = [f for f in changed_files
299
- if any (f .startswith (p ) for p in ["sql/" ,
300
- "bin/spark-sql" ,
301
- "sbin/start-thriftserver.sh" ])]
304
+ if any (f .startswith (p ) for p in
305
+ ["sql/" ,
306
+ "bin/spark-sql" ,
307
+ "sbin/start-thriftserver.sh" ,
308
+ "examples/src/main/java/org/apache/spark/examples/sql/" ,
309
+ "examples/src/main/scala/org/apache/spark/examples/sql/" ])]
310
+ mllib_files = [f for f in changed_files
311
+ if any (f .startswith (p ) for p in
312
+ ["examples/src/main/java/org/apache/spark/examples/mllib/" ,
313
+ "examples/src/main/scala/org/apache/spark/examples/mllib" ,
314
+ "data/mllib/" ,
315
+ "mllib/" ])]
316
+ streaming_files = [f for f in changed_files
317
+ if any (f .startswith (p ) for p in
318
+ ["examples/scala-2.10/" ,
319
+ "examples/src/main/java/org/apache/spark/examples/streaming/" ,
320
+ "examples/src/main/scala/org/apache/spark/examples/streaming/" ,
321
+ "external/" ,
322
+ "extras/java8-tests/" ,
323
+ "extras/kinesis-asl/" ,
324
+ "streaming/" ])]
325
+ graphx_files = [f for f in changed_files
326
+ if any (f .startswith (p ) for p in
327
+ ["examples/src/main/scala/org/apache/spark/examples/graphx/" ,
328
+ "graphx/" ])]
302
329
303
330
non_sql_files = set (changed_files ).difference (set (sql_files ))
304
331
@@ -309,38 +336,66 @@ def determine_test_modules(test_env):
309
336
test_suite .append ("SQL" )
310
337
if not non_sql_files :
311
338
print "[info] Detected no changes except in SQL. Will only run SQL tests."
339
+ if mllib_files :
340
+ print "[info] Detected changes in MLlib. Will run MLlib test suite."
341
+ test_suite .append ("MLLIB" )
342
+ if streaming_files :
343
+ print "[info] Detected changes in Streaming. Will run Streaming test suite."
344
+ test_suite .append ("STREAMING" )
345
+ if graphx_files :
346
+ print "[info] Detected changes in GraphX. Will run GraphX test suite."
347
+ test_suite .append ("GRAPHX" )
348
+
312
349
return set (test_suite )
313
350
else :
314
351
# we aren't in the Amplab environment so simply run all tests
315
- test_suite .append ("CORE" )
316
- test_suite .append ("SQL" )
352
+ test_suite .append ("ALL" )
317
353
return set (test_suite )
318
354
319
355
320
356
def run_scala_tests_maven (test_profiles ):
321
357
mvn_test_goals = ["test" , "--fail-at-end" ]
322
358
profiles_and_goals = test_profiles + mvn_test_goals
323
359
324
- print "[info] Running Spark tests with these arguments:" ,
360
+ print "[info] Running Spark tests using Maven with these arguments:" ,
325
361
print " " .join (profiles_and_goals )
326
362
327
363
exec_maven (profiles_and_goals )
328
364
329
365
330
366
def run_scala_tests_sbt (test_modules , test_profiles ):
331
- # if we only have changes in SQL build a custom test list
332
- if "SQL" in test_modules and "CORE" not in test_modules :
333
- sbt_test_goals = ["catalyst/test" ,
334
- "sql/test" ,
335
- "hive/test" ,
336
- "hive-thriftserver/test" ,
337
- "mllib/test" ]
338
- else :
367
+ if "ALL" in test_modules :
339
368
sbt_test_goals = ["test" ]
369
+ else :
370
+ # if we only have changes in SQL build a custom test list
371
+ if "SQL" in test_modules and "CORE" not in test_modules :
372
+ sbt_test_goals = ["catalyst/test" ,
373
+ "sql/test" ,
374
+ "hive/test" ,
375
+ "hive-thriftserver/test" ,
376
+ "mllib/test" ,
377
+ "examples/test" ]
378
+ if "MLLIB" in test_modules and "CORE" not in test_modules :
379
+ sbt_test_goals = sbt_test_goals + ["mllib/test" ,
380
+ "examples/test" ]
381
+ if "STREAMING" in test_modules and "CORE" not in test_modules :
382
+ sbt_test_goals = sbt_test_goals + ["streaming/test" ,
383
+ "streaming-flume/test" ,
384
+ "streaming-flume-sink/test" ,
385
+ "streaming-kafka/test" ,
386
+ "streaming-mqtt/test" ,
387
+ "streaming-twitter/test" ,
388
+ "streaming-zeromq/test" ,
389
+ "examples/test" ]
390
+ if "GRAPHX" in test_modules and "CORE" not in test_modules :
391
+ sbt_test_goals = sbt_test_goals + ["graphx/test" ,
392
+ "examples/test" ]
393
+ if not sbt_test_goals :
394
+ sbt_test_goals = ["test" ]
340
395
341
396
profiles_and_goals = test_profiles + sbt_test_goals
342
397
343
- print "[info] Running Spark tests with these arguments:" ,
398
+ print "[info] Running Spark tests using SBT with these arguments:" ,
344
399
print " " .join (profiles_and_goals )
345
400
346
401
exec_sbt (profiles_and_goals )
@@ -393,7 +448,7 @@ def main():
393
448
print "ensure the $HOME environment variable is set properly."
394
449
sys .exit (1 )
395
450
396
- os .chdir (SPARK_HOME )
451
+ # os.chdir(SPARK_HOME)
397
452
398
453
rm_r (os .path .join (SPARK_HOME , "work" ))
399
454
rm_r (os .path .join (USER_HOME , ".ivy2/local/org.apache.spark" ))
@@ -418,12 +473,15 @@ def main():
418
473
# to reflect the environment settings
419
474
build_tool = os .environ .get ("AMPLAB_JENKINS_BUILD_TOOL" , "sbt" )
420
475
hadoop_version = os .environ .get ("AMPLAB_JENKINS_BUILD_PROFILE" , "hadoop2.3" )
421
- test_env = "amplab_jenkins"
476
+ test_env = "amplab_jenkins"
422
477
else :
423
478
# else we're running locally and can use local settings
424
479
build_tool = "sbt"
425
480
hadoop_version = "hadoop2.3"
426
- test_env = "local"
481
+ test_env = "local"
482
+
483
+ print "[info] Using build tool" , build_tool , "with profile" , hadoop_version ,
484
+ print "under environment" , test_env
427
485
428
486
# license checks
429
487
run_apache_rat_checks ()
0 commit comments