Skip to content

Commit f6ff2a6

Browse files
lianchengpwendell
authored andcommitted
[SPARK-2410][SQL] Merging Hive Thrift/JDBC server
(This is a replacement of #1399, trying to fix potential `HiveThriftServer2` port collision between parallel builds. Please refer to [these comments](#1399 (comment)) for details.) JIRA issue: [SPARK-2410](https://issues.apache.org/jira/browse/SPARK-2410) Merging the Hive Thrift/JDBC server from [branch-1.0-jdbc](https://github.com/apache/spark/tree/branch-1.0-jdbc). Thanks chenghao-intel for his initial contribution of the Spark SQL CLI. Author: Cheng Lian <[email protected]> Closes #1600 from liancheng/jdbc and squashes the following commits: ac4618b [Cheng Lian] Uses random port for HiveThriftServer2 to avoid collision with parallel builds 090beea [Cheng Lian] Revert changes related to SPARK-2678, decided to move them to another PR 21c6cf4 [Cheng Lian] Updated Spark SQL programming guide docs fe0af31 [Cheng Lian] Reordered spark-submit options in spark-shell[.cmd] 199e3fb [Cheng Lian] Disabled MIMA for hive-thriftserver 1083e9d [Cheng Lian] Fixed failed test suites 7db82a1 [Cheng Lian] Fixed spark-submit application options handling logic 9cc0f06 [Cheng Lian] Starts beeline with spark-submit cfcf461 [Cheng Lian] Updated documents and build scripts for the newly added hive-thriftserver profile 061880f [Cheng Lian] Addressed all comments by @pwendell 7755062 [Cheng Lian] Adapts test suites to spark-submit settings 40bafef [Cheng Lian] Fixed more license header issues e214aab [Cheng Lian] Added missing license headers b8905ba [Cheng Lian] Fixed minor issues in spark-sql and start-thriftserver.sh f975d22 [Cheng Lian] Updated docs for Hive compatibility and Shark migration guide draft 3ad4e75 [Cheng Lian] Starts spark-sql shell with spark-submit a5310d1 [Cheng Lian] Make HiveThriftServer2 play well with spark-submit 61f39f4 [Cheng Lian] Starts Hive Thrift server via spark-submit 2c4c539 [Cheng Lian] Cherry picked the Hive Thrift server
1 parent 2bbf235 commit f6ff2a6

File tree

54 files changed

+1781
-96
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+1781
-96
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,4 @@ metastore_db/
5757
metastore/
5858
warehouse/
5959
TempStatsStore/
60+
sql/hive-thriftserver/test_warehouses

assembly/pom.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,16 @@
165165
</dependency>
166166
</dependencies>
167167
</profile>
168+
<profile>
169+
<id>hive-thriftserver</id>
170+
<dependencies>
171+
<dependency>
172+
<groupId>org.apache.spark</groupId>
173+
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
174+
<version>${project.version}</version>
175+
</dependency>
176+
</dependencies>
177+
</profile>
168178
<profile>
169179
<id>spark-ganglia-lgpl</id>
170180
<dependencies>

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
<groupId>org.apache.spark</groupId>
2929
<artifactId>spark-bagel_2.10</artifactId>
3030
<properties>
31-
<sbt.project.name>bagel</sbt.project.name>
31+
<sbt.project.name>bagel</sbt.project.name>
3232
</properties>
3333
<packaging>jar</packaging>
3434
<name>Spark Project Bagel</name>

bin/beeline

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# Figure out where Spark is installed
21+
FWDIR="$(cd `dirname $0`/..; pwd)"
22+
23+
# Find the java binary
24+
if [ -n "${JAVA_HOME}" ]; then
25+
RUNNER="${JAVA_HOME}/bin/java"
26+
else
27+
if [ `command -v java` ]; then
28+
RUNNER="java"
29+
else
30+
echo "JAVA_HOME is not set" >&2
31+
exit 1
32+
fi
33+
fi
34+
35+
# Compute classpath using external script
36+
classpath_output=$($FWDIR/bin/compute-classpath.sh)
37+
if [[ "$?" != "0" ]]; then
38+
echo "$classpath_output"
39+
exit 1
40+
else
41+
CLASSPATH=$classpath_output
42+
fi
43+
44+
CLASS="org.apache.hive.beeline.BeeLine"
45+
exec "$RUNNER" -cp "$CLASSPATH" $CLASS "$@"

bin/compute-classpath.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ if [ -n "$SPARK_PREPEND_CLASSES" ]; then
5252
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
5353
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
5454
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
55+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive-thriftserver/target/scala-$SCALA_VERSION/classes"
5556
CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
5657
fi
5758

bin/spark-shell

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ function main(){
4646
# (see https://github.com/sbt/sbt/issues/562).
4747
stty -icanon min 1 -echo > /dev/null 2>&1
4848
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
49-
$FWDIR/bin/spark-submit spark-shell "$@" --class org.apache.spark.repl.Main
49+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
5050
stty icanon echo > /dev/null 2>&1
5151
else
5252
export SPARK_SUBMIT_OPTS
53-
$FWDIR/bin/spark-submit spark-shell "$@" --class org.apache.spark.repl.Main
53+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
5454
fi
5555
}
5656

bin/spark-shell.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ rem
1919

2020
set SPARK_HOME=%~dp0..
2121

22-
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell %* --class org.apache.spark.repl.Main
22+
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell --class org.apache.spark.repl.Main %*

bin/spark-sql

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
#
21+
# Shell script for starting the Spark SQL CLI
22+
23+
# Enter posix mode for bash
24+
set -o posix
25+
26+
# Figure out where Spark is installed
27+
FWDIR="$(cd `dirname $0`/..; pwd)"
28+
29+
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
30+
echo "Usage: ./sbin/spark-sql [options]"
31+
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
32+
exit 0
33+
fi
34+
35+
CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
36+
exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
<groupId>org.apache.spark</groupId>
2929
<artifactId>spark-core_2.10</artifactId>
3030
<properties>
31-
<sbt.project.name>core</sbt.project.name>
31+
<sbt.project.name>core</sbt.project.name>
3232
</properties>
3333
<packaging>jar</packaging>
3434
<name>Spark Project Core</name>

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ object SparkSubmit {
4646
private val CLUSTER = 2
4747
private val ALL_DEPLOY_MODES = CLIENT | CLUSTER
4848

49+
// A special jar name that indicates the class being run is inside of Spark itself, and therefore
50+
// no user jar is needed.
51+
private val SPARK_INTERNAL = "spark-internal"
52+
4953
// Special primary resource names that represent shells rather than application jars.
5054
private val SPARK_SHELL = "spark-shell"
5155
private val PYSPARK_SHELL = "pyspark-shell"
@@ -257,7 +261,9 @@ object SparkSubmit {
257261
// In yarn-cluster mode, use yarn.Client as a wrapper around the user class
258262
if (clusterManager == YARN && deployMode == CLUSTER) {
259263
childMainClass = "org.apache.spark.deploy.yarn.Client"
260-
childArgs += ("--jar", args.primaryResource)
264+
if (args.primaryResource != SPARK_INTERNAL) {
265+
childArgs += ("--jar", args.primaryResource)
266+
}
261267
childArgs += ("--class", args.mainClass)
262268
if (args.childArgs != null) {
263269
args.childArgs.foreach { arg => childArgs += ("--arg", arg) }
@@ -332,7 +338,7 @@ object SparkSubmit {
332338
* Return whether the given primary resource represents a user jar.
333339
*/
334340
private def isUserJar(primaryResource: String): Boolean = {
335-
!isShell(primaryResource) && !isPython(primaryResource)
341+
!isShell(primaryResource) && !isPython(primaryResource) && !isInternal(primaryResource)
336342
}
337343

338344
/**
@@ -349,6 +355,10 @@ object SparkSubmit {
349355
primaryResource.endsWith(".py") || primaryResource == PYSPARK_SHELL
350356
}
351357

358+
private[spark] def isInternal(primaryResource: String): Boolean = {
359+
primaryResource == SPARK_INTERNAL
360+
}
361+
352362
/**
353363
* Merge a sequence of comma-separated file lists, some of which may be null to indicate
354364
* no files, into a single comma-separated string.

0 commit comments

Comments
 (0)