diff --git a/assembly/pom.xml b/assembly/pom.xml index 78fb908f9a9ef..82ed4a297819f 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -119,6 +119,7 @@ *:* org/datanucleus/** + org/apache/hive/beeline/** META-INF/*.SF META-INF/*.DSA META-INF/*.RSA diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index a4c099fb45b14..ad5760f90b8d0 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -56,21 +56,30 @@ if exist "%FWDIR%RELEASE" ( set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR% -rem When Hive support is needed, Datanucleus jars must be included on the classpath. -rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. -rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is -rem built with Hive, so look for them there. if exist "%FWDIR%RELEASE" ( set datanucleus_dir=%FWDIR%lib ) else ( set datanucleus_dir=%FWDIR%lib_managed\jars ) + +rem When Hive support is needed, Datanucleus jars must be included on the classpath. +rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. +rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is +rem built with Hive, so look for them there. set "datanucleus_jars=" for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do ( set datanucleus_jars=!datanucleus_jars!;%%d ) set CLASSPATH=%CLASSPATH%;%datanucleus_jars% +rem SPARK-4261: make right version info for beeline, copy hive-beeline*.jar to "lib_managed/jars/". +rem Here add beeline jar to classpath. +for %%d in ("%datanucleus_dir%\hive-beeline*.jar") do ( + set hivebeeline_jar=!hivebeeline_jar!;%%d +) +set CLASSPATH=%CLASSPATH%;%hivebeeline_jar% + + set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 685051eeed9f1..5adb267816df8 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -96,21 +96,25 @@ fi CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR" -# When Hive support is needed, Datanucleus jars must be included on the classpath. -# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. -# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is -# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark -# assembly is built for Hive, before actually populating the CLASSPATH with the jars. -# Note that this check order is faster (by up to half a second) in the case where Hive is not used. if [ -f "$FWDIR/RELEASE" ]; then datanucleus_dir="$FWDIR"/lib else datanucleus_dir="$FWDIR"/lib_managed/jars fi +# When Hive support is needed, Datanucleus jars must be included on the classpath. +# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. +# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is +# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark +# assembly is built for Hive, before actually populating the CLASSPATH with the jars. +# Note that this check order is faster (by up to half a second) in the case where Hive is not used. datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar$")" datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)" +# SPARK-4261: make right version info for beeline, copy hive-beeline*.jar to "lib_managed/jars/". +# Here add beeline jar to classpath. +hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline-.*\\.jar")" + if [ -n "$datanucleus_jars" ]; then hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null) if [ -n "$hive_files" ]; then @@ -119,6 +123,10 @@ if [ -n "$datanucleus_jars" ]; then fi fi +if [ -n "$hivebeeline_jar" ]; then + CLASSPATH="$CLASSPATH:$hivebeeline_jar" +fi + # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1 if [[ $SPARK_TESTING == 1 ]]; then CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/test-classes" diff --git a/pom.xml b/pom.xml index f42257265ede3..6cf5953ba0209 100644 --- a/pom.xml +++ b/pom.xml @@ -870,6 +870,11 @@ build-helper-maven-plugin 1.8 + + org.apache.maven.plugins + maven-dependency-plugin + 2.9 + net.alchim31.maven scala-maven-plugin diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 39ac27f820d89..8dd2d2b5f6d92 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -302,13 +302,14 @@ object Assembly { } }, mergeStrategy in assembly := { - case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard - case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard - case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard - case "log4j.properties" => MergeStrategy.discard - case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines - case "reference.conf" => MergeStrategy.concat - case _ => MergeStrategy.first + case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard + case PathList("org", "apache", "hive", "beeline", xs @ _*) => MergeStrategy.discard + case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard + case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard + case "log4j.properties" => MergeStrategy.discard + case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines + case "reference.conf" => MergeStrategy.concat + case _ => MergeStrategy.first } ) diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 490cfbce654d7..dc03b3ca7c49a 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -95,6 +95,29 @@ true + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-dependencies + package + + copy-dependencies + + + + ${basedir}/../../lib_managed/jars + false + false + true + org.spark-project.hive + hive-beeline + + + + diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index f6805b942153a..a6a38744d85c9 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -185,7 +185,6 @@ org.apache.maven.plugins maven-dependency-plugin - 2.4 copy-dependencies