From c19cd28cca2565cfc238e9989e707a3eee7d2993 Mon Sep 17 00:00:00 2001 From: wangfei Date: Wed, 5 Nov 2014 16:47:24 -0800 Subject: [PATCH 1/4] make right version info for beeline --- assembly/pom.xml | 1 + bin/compute-classpath.cmd | 17 +++++++++++++---- bin/compute-classpath.sh | 20 ++++++++++++++------ sql/hive-thriftserver/pom.xml | 24 ++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 31a01e4d8e1de..e60ccdc1dbea8 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -119,6 +119,7 @@ *:* org/datanucleus/** + org/apache/hive/beeline/** META-INF/*.SF META-INF/*.DSA META-INF/*.RSA diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index a4c099fb45b14..8ac858758c692 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -56,21 +56,30 @@ if exist "%FWDIR%RELEASE" ( set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR% -rem When Hive support is needed, Datanucleus jars must be included on the classpath. -rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. -rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is -rem built with Hive, so look for them there. if exist "%FWDIR%RELEASE" ( set datanucleus_dir=%FWDIR%lib ) else ( set datanucleus_dir=%FWDIR%lib_managed\jars ) + +rem When Hive support is needed, Datanucleus jars must be included on the classpath. +rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. +rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is +rem built with Hive, so look for them there. set "datanucleus_jars=" for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do ( set datanucleus_jars=!datanucleus_jars!;%%d ) set CLASSPATH=%CLASSPATH%;%datanucleus_jars% +rem Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version +rem from MANIFEST.MF. +for %%d in ("%datanucleus_dir%\hive-beeline*.jar") do ( + set hivebeeline_jar=!hivebeeline_jar!;%%d +) +set CLASSPATH=%CLASSPATH%;%hivebeeline_jar% + + set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 905bbaf99b374..49c6528140bee 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -98,21 +98,25 @@ fi CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR" -# When Hive support is needed, Datanucleus jars must be included on the classpath. -# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. -# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is -# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark -# assembly is built for Hive, before actually populating the CLASSPATH with the jars. -# Note that this check order is faster (by up to half a second) in the case where Hive is not used. if [ -f "$FWDIR/RELEASE" ]; then datanucleus_dir="$FWDIR"/lib else datanucleus_dir="$FWDIR"/lib_managed/jars fi +# When Hive support is needed, Datanucleus jars must be included on the classpath. +# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. +# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is +# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark +# assembly is built for Hive, before actually populating the CLASSPATH with the jars. +# Note that this check order is faster (by up to half a second) in the case where Hive is not used. datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")" datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)" +# Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version +# from MANIFEST.MF. +hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline*\\.jar")" + if [ -n "$datanucleus_jars" ]; then hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null) if [ -n "$hive_files" ]; then @@ -121,6 +125,10 @@ if [ -n "$datanucleus_jars" ]; then fi fi +if [ -n "$hivebeeline_jar" ]; then + CLASSPATH="$CLASSPATH:$hivebeeline_jar" +fi + # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1 if [[ $SPARK_TESTING == 1 ]]; then CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes" diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 8db3010624100..9a4548010cb93 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -95,6 +95,30 @@ true + + + org.apache.maven.plugins + maven-dependency-plugin + 2.4 + + + copy-dependencies + package + + copy-dependencies + + + + ${basedir}/../../lib_managed/jars + false + false + true + org.spark-project.hive + hive-beeline + + + + From 80117011af4f981d0989f46630d1d13b4f898a37 Mon Sep 17 00:00:00 2001 From: wangfei Date: Wed, 5 Nov 2014 21:56:00 -0800 Subject: [PATCH 2/4] bug fix --- bin/compute-classpath.cmd | 4 ++-- bin/compute-classpath.sh | 6 +++--- sql/hive-thriftserver/pom.xml | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index 8ac858758c692..ad5760f90b8d0 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -72,8 +72,8 @@ for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do ( ) set CLASSPATH=%CLASSPATH%;%datanucleus_jars% -rem Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version -rem from MANIFEST.MF. +rem SPARK-4261: make right version info for beeline, copy hive-beeline*.jar to "lib_managed/jars/". +rem Here add beeline jar to classpath. for %%d in ("%datanucleus_dir%\hive-beeline*.jar") do ( set hivebeeline_jar=!hivebeeline_jar!;%%d ) diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 49c6528140bee..3948cf3c3286c 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -113,9 +113,9 @@ fi datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")" datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)" -# Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version -# from MANIFEST.MF. -hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline*\\.jar")" +# SPARK-4261: make right version info for beeline, copy hive-beeline*.jar to "lib_managed/jars/". +# Here add beeline jar to classpath. +hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline-.*\\.jar")" if [ -n "$datanucleus_jars" ]; then hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null) diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 9a4548010cb93..c5fd740eb225a 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -95,7 +95,7 @@ true - + org.apache.maven.plugins maven-dependency-plugin @@ -108,13 +108,13 @@ copy-dependencies - + ${basedir}/../../lib_managed/jars false false true org.spark-project.hive - hive-beeline + hive-beeline From 4230322c6d7e85aa5896ee76f520e8f2ec225ab7 Mon Sep 17 00:00:00 2001 From: scwf Date: Thu, 6 Nov 2014 00:50:57 -0800 Subject: [PATCH 3/4] inherit version config from the parent pom --- pom.xml | 5 +++++ sql/hive-thriftserver/pom.xml | 1 - sql/hive/pom.xml | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index eb613531b8a5f..e8997dfe40f56 100644 --- a/pom.xml +++ b/pom.xml @@ -870,6 +870,11 @@ build-helper-maven-plugin 1.8 + + org.apache.maven.plugins + maven-dependency-plugin + 2.9 + net.alchim31.maven scala-maven-plugin diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index c5fd740eb225a..4a4255699e6dd 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -99,7 +99,6 @@ org.apache.maven.plugins maven-dependency-plugin - 2.4 copy-dependencies diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 67e36a951e506..2d7c2c49be0c2 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -188,7 +188,6 @@ org.apache.maven.plugins maven-dependency-plugin - 2.4 copy-dependencies From 204bab104adb5dc5f0257ca9d3578f0f78e46391 Mon Sep 17 00:00:00 2001 From: scwf Date: Thu, 6 Nov 2014 20:24:44 -0800 Subject: [PATCH 4/4] fix beeline version when sbt --- project/SparkBuild.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 33618f5401768..a5ab68b9c1ad9 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -273,13 +273,14 @@ object Assembly { jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" + Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" }, mergeStrategy in assembly := { - case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard - case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard - case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard - case "log4j.properties" => MergeStrategy.discard - case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines - case "reference.conf" => MergeStrategy.concat - case _ => MergeStrategy.first + case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard + case PathList("org", "apache", "hive", "beeline", xs @ _*) => MergeStrategy.discard + case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard + case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard + case "log4j.properties" => MergeStrategy.discard + case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines + case "reference.conf" => MergeStrategy.concat + case _ => MergeStrategy.first } )