From c19cd28cca2565cfc238e9989e707a3eee7d2993 Mon Sep 17 00:00:00 2001
From: wangfei <wangfei1@huawei.com>
Date: Wed, 5 Nov 2014 16:47:24 -0800
Subject: [PATCH 1/4] make right version info for beeline

---
 assembly/pom.xml              |  1 +
 bin/compute-classpath.cmd     | 17 +++++++++++++----
 bin/compute-classpath.sh      | 20 ++++++++++++++------
 sql/hive-thriftserver/pom.xml | 24 ++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 31a01e4d8e1de..e60ccdc1dbea8 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -119,6 +119,7 @@
               <artifact>*:*</artifact>
               <excludes>
                 <exclude>org/datanucleus/**</exclude>
+                <exclude>org/apache/hive/beeline/**</exclude>
                 <exclude>META-INF/*.SF</exclude>
                 <exclude>META-INF/*.DSA</exclude>
                 <exclude>META-INF/*.RSA</exclude>
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index a4c099fb45b14..8ac858758c692 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -56,21 +56,30 @@ if exist "%FWDIR%RELEASE" (
 
 set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
 
-rem When Hive support is needed, Datanucleus jars must be included on the classpath.
-rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
-rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
-rem built with Hive, so look for them there.
 if exist "%FWDIR%RELEASE" (
   set datanucleus_dir=%FWDIR%lib
 ) else (
   set datanucleus_dir=%FWDIR%lib_managed\jars
 )
+
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
 set "datanucleus_jars="
 for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
   set datanucleus_jars=!datanucleus_jars!;%%d
 )
 set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
 
+rem Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version
+rem from MANIFEST.MF.
+for %%d in ("%datanucleus_dir%\hive-beeline*.jar") do (
+  set hivebeeline_jar=!hivebeeline_jar!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%hivebeeline_jar%
+
+
 set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 905bbaf99b374..49c6528140bee 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -98,21 +98,25 @@ fi
 
 CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
 
-# When Hive support is needed, Datanucleus jars must be included on the classpath.
-# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
-# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
-# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
-# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
-# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
 if [ -f "$FWDIR/RELEASE" ]; then
   datanucleus_dir="$FWDIR"/lib
 else
   datanucleus_dir="$FWDIR"/lib_managed/jars
 fi
 
+# When Hive support is needed, Datanucleus jars must be included on the classpath.
+# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
+# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
+# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
 datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
 datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
 
+# Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version
+# from MANIFEST.MF.
+hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline*\\.jar")"
+
 if [ -n "$datanucleus_jars" ]; then
   hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
   if [ -n "$hive_files" ]; then
@@ -121,6 +125,10 @@ if [ -n "$datanucleus_jars" ]; then
   fi
 fi
 
+if [ -n "$hivebeeline_jar" ]; then
+    CLASSPATH="$CLASSPATH:$hivebeeline_jar"
+fi
+
 # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
 if [[ $SPARK_TESTING == 1 ]]; then
   CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes"
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 8db3010624100..9a4548010cb93 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -95,6 +95,30 @@
           <skip>true</skip>
         </configuration>
       </plugin>
+      <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>copy-dependencies</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <!-- basedir is spark/sql/hive/ -->
+              <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
+              <overWriteReleases>false</overWriteReleases>
+              <overWriteSnapshots>false</overWriteSnapshots>
+              <overWriteIfNewer>true</overWriteIfNewer>
+              <includeGroupIds>org.spark-project.hive</includeGroupIds>
+              <includeClassifiers>hive-beeline</includeClassifiers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 </project>

From 80117011af4f981d0989f46630d1d13b4f898a37 Mon Sep 17 00:00:00 2001
From: wangfei <wangfei1@huawei.com>
Date: Wed, 5 Nov 2014 21:56:00 -0800
Subject: [PATCH 2/4] bug fix

---
 bin/compute-classpath.cmd     | 4 ++--
 bin/compute-classpath.sh      | 6 +++---
 sql/hive-thriftserver/pom.xml | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 8ac858758c692..ad5760f90b8d0 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -72,8 +72,8 @@ for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
 )
 set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
 
-rem Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version
-rem from MANIFEST.MF.
+rem SPARK-4261: make right version info for beeline, copy hive-beeline*.jar to "lib_managed/jars/".
+rem Here add beeline jar to classpath.
 for %%d in ("%datanucleus_dir%\hive-beeline*.jar") do (
   set hivebeeline_jar=!hivebeeline_jar!;%%d
 )
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 49c6528140bee..3948cf3c3286c 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -113,9 +113,9 @@ fi
 datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
 datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
 
-# Exclude hive-beeline*.jar from uber jar, since beeline need the Implementation-Version
-# from MANIFEST.MF.
-hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline*\\.jar")"
+# SPARK-4261: make right version info for beeline, copy hive-beeline*.jar to "lib_managed/jars/".
+# Here add beeline jar to classpath.
+hivebeeline_jar="$(find "$datanucleus_dir" 2>/dev/null | grep "hive-beeline-.*\\.jar")"
 
 if [ -n "$datanucleus_jars" ]; then
   hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 9a4548010cb93..c5fd740eb225a 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -95,7 +95,7 @@
           <skip>true</skip>
         </configuration>
       </plugin>
-      <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
+      <!-- Deploy hive-beeline jar to the spark/lib_managed/jars directory -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
@@ -108,13 +108,13 @@
               <goal>copy-dependencies</goal>
             </goals>
             <configuration>
-              <!-- basedir is spark/sql/hive/ -->
+              <!-- basedir is spark/sql/hive-thriftserver/ -->
               <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
               <overWriteReleases>false</overWriteReleases>
               <overWriteSnapshots>false</overWriteSnapshots>
               <overWriteIfNewer>true</overWriteIfNewer>
               <includeGroupIds>org.spark-project.hive</includeGroupIds>
-              <includeClassifiers>hive-beeline</includeClassifiers>
+              <includeArtifactIds>hive-beeline</includeArtifactIds>
             </configuration>
           </execution>
         </executions>

From 4230322c6d7e85aa5896ee76f520e8f2ec225ab7 Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Thu, 6 Nov 2014 00:50:57 -0800
Subject: [PATCH 3/4]  inherit version config from the parent pom

---
 pom.xml                       | 5 +++++
 sql/hive-thriftserver/pom.xml | 1 -
 sql/hive/pom.xml              | 1 -
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index eb613531b8a5f..e8997dfe40f56 100644
--- a/pom.xml
+++ b/pom.xml
@@ -870,6 +870,11 @@
           <artifactId>build-helper-maven-plugin</artifactId>
           <version>1.8</version>
         </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-dependency-plugin</artifactId>
+          <version>2.9</version>
+        </plugin>
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index c5fd740eb225a..4a4255699e6dd 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -99,7 +99,6 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
-        <version>2.4</version>
         <executions>
           <execution>
             <id>copy-dependencies</id>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 67e36a951e506..2d7c2c49be0c2 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -188,7 +188,6 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
-        <version>2.4</version>
         <executions>
           <execution>
             <id>copy-dependencies</id>

From 204bab104adb5dc5f0257ca9d3578f0f78e46391 Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Thu, 6 Nov 2014 20:24:44 -0800
Subject: [PATCH 4/4] fix beeline version when sbt

---
 project/SparkBuild.scala | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 33618f5401768..a5ab68b9c1ad9 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -273,13 +273,14 @@ object Assembly {
     jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" +
       Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" },
     mergeStrategy in assembly := {
-      case PathList("org", "datanucleus", xs @ _*)             => MergeStrategy.discard
-      case m if m.toLowerCase.endsWith("manifest.mf")          => MergeStrategy.discard
-      case m if m.toLowerCase.matches("meta-inf.*\\.sf$")      => MergeStrategy.discard
-      case "log4j.properties"                                  => MergeStrategy.discard
-      case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
-      case "reference.conf"                                    => MergeStrategy.concat
-      case _                                                   => MergeStrategy.first
+      case PathList("org", "datanucleus", xs @ _*)               => MergeStrategy.discard
+      case PathList("org", "apache", "hive", "beeline", xs @ _*) => MergeStrategy.discard
+      case m if m.toLowerCase.endsWith("manifest.mf")            => MergeStrategy.discard
+      case m if m.toLowerCase.matches("meta-inf.*\\.sf$")        => MergeStrategy.discard
+      case "log4j.properties"                                    => MergeStrategy.discard
+      case m if m.toLowerCase.startsWith("meta-inf/services/")   => MergeStrategy.filterDistinctLines
+      case "reference.conf"                                      => MergeStrategy.concat
+      case _                                                     => MergeStrategy.first
     }
   )