apache · scwf · Nov 6, 2014 · Nov 6, 2014 · Nov 6, 2014 · Nov 7, 2014
diff --git a/assembly/pom.xml b/assembly/pom.xml
@@ -119,6 +119,7 @@
               <artifact>*:*</artifact>
               <excludes>
                 <exclude>org/datanucleus/**</exclude>
+                <exclude>org/apache/hive/beeline/**</exclude>
                 <exclude>META-INF/*.SF</exclude>
                 <exclude>META-INF/*.DSA</exclude>
                 <exclude>META-INF/*.RSA</exclude>

diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
@@ -56,21 +56,30 @@ if exist "%FWDIR%RELEASE" (
 
 set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
 
-rem When Hive support is needed, Datanucleus jars must be included on the classpath.
-rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
-rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
-rem built with Hive, so look for them there.
 if exist "%FWDIR%RELEASE" (
   set datanucleus_dir=%FWDIR%lib
 ) else (
   set datanucleus_dir=%FWDIR%lib_managed\jars
 )
+
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
 set "datanucleus_jars="
 for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
   set datanucleus_jars=!datanucleus_jars!;%%d
 )
 set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
 
+rem SPARK-4261: make right version info for beeline, copy hive-beeline*.jar to "lib_managed/jars/".
+rem Here add beeline jar to classpath.
+for %%d in ("%datanucleus_dir%\hive-beeline*.jar") do (
+  set hivebeeline_jar=!hivebeeline_jar!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%hivebeeline_jar%
+
+
 set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes

diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
@@ -96,21 +96,25 @@ fi
 
 CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
 
-# When Hive support is needed, Datanucleus jars must be included on the classpath.
-# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
-# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
-# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
-# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
-# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
 if [ -f "$FWDIR/RELEASE" ]; then
   datanucleus_dir="$FWDIR"/lib
 else
   datanucleus_dir="$FWDIR"/lib_managed/jars
 fi
 
+# When Hive support is needed, Datanucleus jars must be included on the classpath.
+# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
+# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
+# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
 datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar$")"
 datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
 
+# SPARK-4261: make right version info for beeline, copy hive-beeline*.jar to "lib_managed/jars/".
+# Here add beeline jar to classpath.
+hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline-.*\\.jar")"
+
 if [ -n "$datanucleus_jars" ]; then
   hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
   if [ -n "$hive_files" ]; then
@@ -119,6 +123,10 @@ if [ -n "$datanucleus_jars" ]; then
   fi
 fi
 
+if [ -n "$hivebeeline_jar" ]; then
+    CLASSPATH="$CLASSPATH:$hivebeeline_jar"
+fi
+
 # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
 if [[ $SPARK_TESTING == 1 ]]; then
   CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/test-classes"

diff --git a/pom.xml b/pom.xml
@@ -870,6 +870,11 @@
           <artifactId>build-helper-maven-plugin</artifactId>
           <version>1.8</version>
         </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-dependency-plugin</artifactId>
+          <version>2.9</version>
+        </plugin>
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -302,13 +302,14 @@ object Assembly {
       }
     },
     mergeStrategy in assembly := {
-      case PathList("org", "datanucleus", xs @ _*)             => MergeStrategy.discard
-      case m if m.toLowerCase.endsWith("manifest.mf")          => MergeStrategy.discard
-      case m if m.toLowerCase.matches("meta-inf.*\\.sf$")      => MergeStrategy.discard
-      case "log4j.properties"                                  => MergeStrategy.discard
-      case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
-      case "reference.conf"                                    => MergeStrategy.concat
-      case _                                                   => MergeStrategy.first
+      case PathList("org", "datanucleus", xs @ _*)               => MergeStrategy.discard
+      case PathList("org", "apache", "hive", "beeline", xs @ _*) => MergeStrategy.discard
+      case m if m.toLowerCase.endsWith("manifest.mf")            => MergeStrategy.discard
+      case m if m.toLowerCase.matches("meta-inf.*\\.sf$")        => MergeStrategy.discard
+      case "log4j.properties"                                    => MergeStrategy.discard
+      case m if m.toLowerCase.startsWith("meta-inf/services/")   => MergeStrategy.filterDistinctLines
+      case "reference.conf"                                      => MergeStrategy.concat
+      case _                                                     => MergeStrategy.first
     }
   )
 

diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
@@ -95,6 +95,29 @@
           <skip>true</skip>
         </configuration>
       </plugin>
+      <!-- Deploy hive-beeline jar to the spark/lib_managed/jars directory -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-dependencies</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <!-- basedir is spark/sql/hive-thriftserver/ -->
+              <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
+              <overWriteReleases>false</overWriteReleases>
+              <overWriteSnapshots>false</overWriteSnapshots>
+              <overWriteIfNewer>true</overWriteIfNewer>
+              <includeGroupIds>org.spark-project.hive</includeGroupIds>
+              <includeArtifactIds>hive-beeline</includeArtifactIds>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
@@ -185,7 +185,6 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
-        <version>2.4</version>
         <executions>
           <execution>
             <id>copy-dependencies</id>