Skip to content

Commit 4a73bed

Browse files
committed
[SPARK-29991][INFRA] Support Hive 1.2 and Hive 2.3 (default) in PR builder
### What changes were proposed in this pull request? Currently, Apache Spark PR Builder using `hive-1.2` for `hadoop-2.7` and `hive-2.3` for `hadoop-3.2`. This PR aims to support - `[test-hive1.2]` in PR builder - `[test-hive2.3]` in PR builder to be consistent and independent of the default profile - After this PR, all PR builders will use Hive 2.3 by default (because Spark uses Hive 2.3 by default as of c98e5eb) - Use default profile in AppVeyor build. Note that this was reverted due to unexpected test failure at `ThriftServerPageSuite`, which was investigated in #26706 . This PR fixed it by letting it use their own forked JVM. There is no explicit evidence for this fix and it was just my speculation, and thankfully it fixed at least. ### Why are the changes needed? This new tag allows us more flexibility. ### Does this PR introduce any user-facing change? No. (This is a dev-only change.) ### How was this patch tested? Check the Jenkins triggers in this PR. Default: ``` ======================================================================== Building Spark ======================================================================== [info] Building Spark using SBT with these arguments: -Phadoop-2.7 -Phive-2.3 -Phive-thriftserver -Pmesos -Pspark-ganglia-lgpl -Phadoop-cloud -Phive -Pkubernetes -Pkinesis-asl -Pyarn test:package streaming-kinesis-asl-assembly/assembly ``` `[test-hive1.2][test-hadoop3.2]`: ``` ======================================================================== Building Spark ======================================================================== [info] Building Spark using SBT with these arguments: -Phadoop-3.2 -Phive-1.2 -Phadoop-cloud -Pyarn -Pspark-ganglia-lgpl -Phive -Phive-thriftserver -Pmesos -Pkubernetes -Pkinesis-asl test:package streaming-kinesis-asl-assembly/assembly ``` `[test-maven][test-hive-2.3]`: ``` ======================================================================== Building Spark ======================================================================== [info] Building Spark using Maven with these arguments: -Phadoop-2.7 -Phive-2.3 -Pspark-ganglia-lgpl -Pyarn -Phive -Phadoop-cloud -Pkinesis-asl -Pmesos -Pkubernetes -Phive-thriftserver clean package -DskipTests ``` Closes #26710 from HyukjinKwon/SPARK-29991. Authored-by: HyukjinKwon <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
1 parent b182ed8 commit 4a73bed

File tree

4 files changed

+58
-30
lines changed

4 files changed

+58
-30
lines changed

appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ install:
5353
build_script:
5454
# '-Djna.nosys=true' is required to avoid kernel32.dll load failure.
5555
# See SPARK-28759.
56-
- cmd: mvn -DskipTests -Psparkr -Phive -Phive-1.2 -Djna.nosys=true package
56+
- cmd: mvn -DskipTests -Psparkr -Phive -Djna.nosys=true package
5757

5858
environment:
5959
NOT_CRAN: true

dev/run-tests-jenkins.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,11 @@ def main():
182182
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7"
183183
if "test-hadoop3.2" in ghprb_pull_title:
184184
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2"
185+
# Switch the Hive profile based on the PR title:
186+
if "test-hive1.2" in ghprb_pull_title:
187+
os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive1.2"
188+
if "test-hive2.3" in ghprb_pull_title:
189+
os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive2.3"
185190

186191
build_display_name = os.environ["BUILD_DISPLAY_NAME"]
187192
build_url = os.environ["BUILD_URL"]

dev/run-tests.py

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ def run_apache_rat_checks():
179179
run_cmd([os.path.join(SPARK_HOME, "dev", "check-license")])
180180

181181

182-
def run_scala_style_checks(build_profiles):
182+
def run_scala_style_checks(extra_profiles):
183+
build_profiles = extra_profiles + modules.root.build_profile_flags
183184
set_title_and_block("Running Scala style checks", "BLOCK_SCALA_STYLE")
184185
profiles = " ".join(build_profiles)
185186
print("[info] Checking Scala style using SBT with these profiles: ", profiles)
@@ -283,8 +284,8 @@ def get_hadoop_profiles(hadoop_version):
283284
"""
284285

285286
sbt_maven_hadoop_profiles = {
286-
"hadoop2.7": ["-Phadoop-2.7", "-Phive-1.2"],
287-
"hadoop3.2": ["-Phadoop-3.2", "-Phive-2.3"],
287+
"hadoop2.7": ["-Phadoop-2.7"],
288+
"hadoop3.2": ["-Phadoop-3.2"],
288289
}
289290

290291
if hadoop_version in sbt_maven_hadoop_profiles:
@@ -295,9 +296,28 @@ def get_hadoop_profiles(hadoop_version):
295296
sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
296297

297298

298-
def build_spark_maven(hadoop_version):
299+
def get_hive_profiles(hive_version):
300+
"""
301+
For the given Hive version tag, return a list of Maven/SBT profile flags for
302+
building and testing against that Hive version.
303+
"""
304+
305+
sbt_maven_hive_profiles = {
306+
"hive1.2": ["-Phive-1.2"],
307+
"hive2.3": ["-Phive-2.3"],
308+
}
309+
310+
if hive_version in sbt_maven_hive_profiles:
311+
return sbt_maven_hive_profiles[hive_version]
312+
else:
313+
print("[error] Could not find", hive_version, "in the list. Valid options",
314+
" are", sbt_maven_hive_profiles.keys())
315+
sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
316+
317+
318+
def build_spark_maven(extra_profiles):
299319
# Enable all of the profiles for the build:
300-
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
320+
build_profiles = extra_profiles + modules.root.build_profile_flags
301321
mvn_goals = ["clean", "package", "-DskipTests"]
302322
profiles_and_goals = build_profiles + mvn_goals
303323

@@ -306,9 +326,9 @@ def build_spark_maven(hadoop_version):
306326
exec_maven(profiles_and_goals)
307327

308328

309-
def build_spark_sbt(hadoop_version):
329+
def build_spark_sbt(extra_profiles):
310330
# Enable all of the profiles for the build:
311-
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
331+
build_profiles = extra_profiles + modules.root.build_profile_flags
312332
sbt_goals = ["test:package", # Build test jars as some tests depend on them
313333
"streaming-kinesis-asl-assembly/assembly"]
314334
profiles_and_goals = build_profiles + sbt_goals
@@ -318,10 +338,10 @@ def build_spark_sbt(hadoop_version):
318338
exec_sbt(profiles_and_goals)
319339

320340

321-
def build_spark_unidoc_sbt(hadoop_version):
341+
def build_spark_unidoc_sbt(extra_profiles):
322342
set_title_and_block("Building Unidoc API Documentation", "BLOCK_DOCUMENTATION")
323343
# Enable all of the profiles for the build:
324-
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
344+
build_profiles = extra_profiles + modules.root.build_profile_flags
325345
sbt_goals = ["unidoc"]
326346
profiles_and_goals = build_profiles + sbt_goals
327347

@@ -331,9 +351,9 @@ def build_spark_unidoc_sbt(hadoop_version):
331351
exec_sbt(profiles_and_goals)
332352

333353

334-
def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
354+
def build_spark_assembly_sbt(extra_profiles, checkstyle=False):
335355
# Enable all of the profiles for the build:
336-
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
356+
build_profiles = extra_profiles + modules.root.build_profile_flags
337357
sbt_goals = ["assembly/package"]
338358
profiles_and_goals = build_profiles + sbt_goals
339359
print("[info] Building Spark assembly using SBT with these arguments: ",
@@ -343,25 +363,25 @@ def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
343363
if checkstyle:
344364
run_java_style_checks(build_profiles)
345365

346-
build_spark_unidoc_sbt(hadoop_version)
366+
build_spark_unidoc_sbt(extra_profiles)
347367

348368

349-
def build_apache_spark(build_tool, hadoop_version):
350-
"""Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
351-
`maven`). Defaults to using `sbt`."""
369+
def build_apache_spark(build_tool, extra_profiles):
370+
"""Will build Spark with the extra profiles and the passed in build tool
371+
(either `sbt` or `maven`). Defaults to using `sbt`."""
352372

353373
set_title_and_block("Building Spark", "BLOCK_BUILD")
354374

355375
rm_r("lib_managed")
356376

357377
if build_tool == "maven":
358-
build_spark_maven(hadoop_version)
378+
build_spark_maven(extra_profiles)
359379
else:
360-
build_spark_sbt(hadoop_version)
380+
build_spark_sbt(extra_profiles)
361381

362382

363-
def detect_binary_inop_with_mima(hadoop_version):
364-
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
383+
def detect_binary_inop_with_mima(extra_profiles):
384+
build_profiles = extra_profiles + modules.root.build_profile_flags
365385
set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA")
366386
profiles = " ".join(build_profiles)
367387
print("[info] Detecting binary incompatibilities with MiMa using SBT with these profiles: ",
@@ -395,14 +415,14 @@ def run_scala_tests_sbt(test_modules, test_profiles):
395415
exec_sbt(profiles_and_goals)
396416

397417

398-
def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
418+
def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags):
399419
"""Function to properly execute all tests passed in as a set from the
400420
`determine_test_suites` function"""
401421
set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")
402422

403423
test_modules = set(test_modules)
404424

405-
test_profiles = get_hadoop_profiles(hadoop_version) + \
425+
test_profiles = extra_profiles + \
406426
list(set(itertools.chain.from_iterable(m.build_profile_flags for m in test_modules)))
407427

408428
if excluded_tags:
@@ -555,6 +575,7 @@ def main():
555575
# to reflect the environment settings
556576
build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt")
557577
hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
578+
hive_version = os.environ.get("AMPLAB_JENKINS_BUILD_HIVE_PROFILE", "hive2.3")
558579
test_env = "amplab_jenkins"
559580
# add path for Python3 in Jenkins if we're calling from a Jenkins machine
560581
# TODO(sknapp): after all builds are ported to the ubuntu workers, change this to be:
@@ -564,10 +585,12 @@ def main():
564585
# else we're running locally and can use local settings
565586
build_tool = "sbt"
566587
hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
588+
hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
567589
test_env = "local"
568590

569591
print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version,
570-
"under environment", test_env)
592+
"and Hive profile", hive_version, "under environment", test_env)
593+
extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version)
571594

572595
changed_modules = None
573596
changed_files = None
@@ -601,8 +624,7 @@ def main():
601624
if not changed_files or any(f.endswith(".scala")
602625
or f.endswith("scalastyle-config.xml")
603626
for f in changed_files):
604-
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
605-
run_scala_style_checks(build_profiles)
627+
run_scala_style_checks(extra_profiles)
606628
should_run_java_style_checks = False
607629
if not changed_files or any(f.endswith(".java")
608630
or f.endswith("checkstyle.xml")
@@ -630,18 +652,18 @@ def main():
630652
run_build_tests()
631653

632654
# spark build
633-
build_apache_spark(build_tool, hadoop_version)
655+
build_apache_spark(build_tool, extra_profiles)
634656

635657
# backwards compatibility checks
636658
if build_tool == "sbt":
637659
# Note: compatibility tests only supported in sbt for now
638-
detect_binary_inop_with_mima(hadoop_version)
660+
detect_binary_inop_with_mima(extra_profiles)
639661
# Since we did not build assembly/package before running dev/mima, we need to
640662
# do it here because the tests still rely on it; see SPARK-13294 for details.
641-
build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks)
663+
build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
642664

643665
# run the test suites
644-
run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
666+
run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags)
645667

646668
modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
647669
if modules_with_python_tests:

project/SparkBuild.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,8 @@ object SparkParallelTestGrouping {
476476
"org.apache.spark.ml.classification.LinearSVCSuite",
477477
"org.apache.spark.sql.SQLQueryTestSuite",
478478
"org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite",
479-
"org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite"
479+
"org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite",
480+
"org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite"
480481
)
481482

482483
private val DEFAULT_TEST_GROUP = "default_test_group"

0 commit comments

Comments
 (0)