Skip to content

Commit a6cd311

Browse files
lianchengpwendell
authored andcommitted
[SPARK-2678][Core][SQL] A workaround for SPARK-2678
JIRA issues: - Main: [SPARK-2678](https://issues.apache.org/jira/browse/SPARK-2678) - Related: [SPARK-2874](https://issues.apache.org/jira/browse/SPARK-2874) Related PR: - apache#1715 This PR is both a fix for SPARK-2874 and a workaround for SPARK-2678. Fixing SPARK-2678 completely requires some API level changes that need further discussion, and we decided not to include it in Spark 1.1 release. As currently SPARK-2678 only affects Spark SQL scripts, this workaround is enough for Spark 1.1. Command line option handling logic in bash scripts looks somewhat dirty and duplicated, but it helps to provide a cleaner user interface as well as retain full downward compatibility for now. Author: Cheng Lian <[email protected]> Closes apache#1801 from liancheng/spark-2874 and squashes the following commits: 8045d7a [Cheng Lian] Make sure test suites pass 8493a9e [Cheng Lian] Using eval to retain quoted arguments aed523f [Cheng Lian] Fixed typo in bin/spark-sql f12a0b1 [Cheng Lian] Worked arount SPARK-2678 daee105 [Cheng Lian] Fixed usage messages of all Spark SQL related scripts
1 parent 4878911 commit a6cd311

File tree

8 files changed

+164
-75
lines changed

8 files changed

+164
-75
lines changed

bin/beeline

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,14 @@
1717
# limitations under the License.
1818
#
1919

20-
# Figure out where Spark is installed
21-
FWDIR="$(cd `dirname $0`/..; pwd)"
20+
#
21+
# Shell script for starting BeeLine
2222

23-
# Find the java binary
24-
if [ -n "${JAVA_HOME}" ]; then
25-
RUNNER="${JAVA_HOME}/bin/java"
26-
else
27-
if [ `command -v java` ]; then
28-
RUNNER="java"
29-
else
30-
echo "JAVA_HOME is not set" >&2
31-
exit 1
32-
fi
33-
fi
23+
# Enter posix mode for bash
24+
set -o posix
3425

35-
# Compute classpath using external script
36-
classpath_output=$($FWDIR/bin/compute-classpath.sh)
37-
if [[ "$?" != "0" ]]; then
38-
echo "$classpath_output"
39-
exit 1
40-
else
41-
CLASSPATH=$classpath_output
42-
fi
26+
# Figure out where Spark is installed
27+
FWDIR="$(cd `dirname $0`/..; pwd)"
4328

4429
CLASS="org.apache.hive.beeline.BeeLine"
45-
exec "$RUNNER" -cp "$CLASSPATH" $CLASS "$@"
30+
exec "$FWDIR/bin/spark-class" $CLASS "$@"

bin/spark-sql

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,72 @@
2323
# Enter posix mode for bash
2424
set -o posix
2525

26+
CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
27+
2628
# Figure out where Spark is installed
2729
FWDIR="$(cd `dirname $0`/..; pwd)"
2830

29-
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
30-
echo "Usage: ./sbin/spark-sql [options]"
31+
function usage {
32+
echo "Usage: ./sbin/spark-sql [options] [cli option]"
33+
pattern="usage"
34+
pattern+="\|Spark assembly has been built with Hive"
35+
pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
36+
pattern+="\|Spark Command: "
37+
pattern+="\|--help"
38+
pattern+="\|======="
39+
3140
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
41+
echo
42+
echo "CLI options:"
43+
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
44+
}
45+
46+
function ensure_arg_number {
47+
arg_number=$1
48+
at_least=$2
49+
50+
if [[ $arg_number -lt $at_least ]]; then
51+
usage
52+
exit 1
53+
fi
54+
}
55+
56+
if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
57+
usage
3258
exit 0
3359
fi
3460

35-
CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
36-
exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@
61+
CLI_ARGS=()
62+
SUBMISSION_ARGS=()
63+
64+
while (($#)); do
65+
case $1 in
66+
-d | --define | --database | -f | -h | --hiveconf | --hivevar | -i | -p)
67+
ensure_arg_number $# 2
68+
CLI_ARGS+=($1); shift
69+
CLI_ARGS+=($1); shift
70+
;;
71+
72+
-e)
73+
ensure_arg_number $# 2
74+
CLI_ARGS+=($1); shift
75+
CLI_ARGS+=(\"$1\"); shift
76+
;;
77+
78+
-s | --silent)
79+
CLI_ARGS+=($1); shift
80+
;;
81+
82+
-v | --verbose)
83+
# Both SparkSubmit and SparkSQLCLIDriver recognizes -v | --verbose
84+
CLI_ARGS+=($1)
85+
SUBMISSION_ARGS+=($1); shift
86+
;;
87+
88+
*)
89+
SUBMISSION_ARGS+=($1); shift
90+
;;
91+
esac
92+
done
93+
94+
eval exec "$FWDIR"/bin/spark-submit --class $CLASS ${SUBMISSION_ARGS[*]} spark-internal ${CLI_ARGS[*]}

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
220220
/** Fill in values by parsing user options. */
221221
private def parseOpts(opts: Seq[String]): Unit = {
222222
var inSparkOpts = true
223+
val EQ_SEPARATED_OPT="""(--[^=]+)=(.+)""".r
223224

224225
// Delineates parsing of Spark options from parsing of user options.
225226
parse(opts)
@@ -322,33 +323,21 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
322323
verbose = true
323324
parse(tail)
324325

326+
case EQ_SEPARATED_OPT(opt, value) :: tail =>
327+
parse(opt :: value :: tail)
328+
329+
case value :: tail if value.startsWith("-") =>
330+
SparkSubmit.printErrorAndExit(s"Unrecognized option '$value'.")
331+
325332
case value :: tail =>
326-
if (inSparkOpts) {
327-
value match {
328-
// convert --foo=bar to --foo bar
329-
case v if v.startsWith("--") && v.contains("=") && v.split("=").size == 2 =>
330-
val parts = v.split("=")
331-
parse(Seq(parts(0), parts(1)) ++ tail)
332-
case v if v.startsWith("-") =>
333-
val errMessage = s"Unrecognized option '$value'."
334-
SparkSubmit.printErrorAndExit(errMessage)
335-
case v =>
336-
primaryResource =
337-
if (!SparkSubmit.isShell(v) && !SparkSubmit.isInternal(v)) {
338-
Utils.resolveURI(v).toString
339-
} else {
340-
v
341-
}
342-
inSparkOpts = false
343-
isPython = SparkSubmit.isPython(v)
344-
parse(tail)
333+
primaryResource =
334+
if (!SparkSubmit.isShell(value) && !SparkSubmit.isInternal(value)) {
335+
Utils.resolveURI(value).toString
336+
} else {
337+
value
345338
}
346-
} else {
347-
if (!value.isEmpty) {
348-
childArgs += value
349-
}
350-
parse(tail)
351-
}
339+
isPython = SparkSubmit.isPython(value)
340+
childArgs ++= tail
352341

353342
case Nil =>
354343
}

core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,18 @@ class SparkSubmitSuite extends FunSuite with Matchers {
106106
appArgs.childArgs should be (Seq("some", "--weird", "args"))
107107
}
108108

109+
test("handles arguments to user program with name collision") {
110+
val clArgs = Seq(
111+
"--name", "myApp",
112+
"--class", "Foo",
113+
"userjar.jar",
114+
"--master", "local",
115+
"some",
116+
"--weird", "args")
117+
val appArgs = new SparkSubmitArguments(clArgs)
118+
appArgs.childArgs should be (Seq("--master", "local", "some", "--weird", "args"))
119+
}
120+
109121
test("handles YARN cluster mode") {
110122
val clArgs = Seq(
111123
"--deploy-mode", "cluster",

sbin/start-thriftserver.sh

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,53 @@ set -o posix
2626
# Figure out where Spark is installed
2727
FWDIR="$(cd `dirname $0`/..; pwd)"
2828

29-
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
30-
echo "Usage: ./sbin/start-thriftserver [options]"
29+
CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
30+
31+
function usage {
32+
echo "Usage: ./sbin/start-thriftserver [options] [thrift server options]"
33+
pattern="usage"
34+
pattern+="\|Spark assembly has been built with Hive"
35+
pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
36+
pattern+="\|Spark Command: "
37+
pattern+="\|======="
38+
pattern+="\|--help"
39+
3140
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
41+
echo
42+
echo "Thrift server options:"
43+
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
44+
}
45+
46+
function ensure_arg_number {
47+
arg_number=$1
48+
at_least=$2
49+
50+
if [[ $arg_number -lt $at_least ]]; then
51+
usage
52+
exit 1
53+
fi
54+
}
55+
56+
if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
57+
usage
3258
exit 0
3359
fi
3460

35-
CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
36-
exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@
61+
THRIFT_SERVER_ARGS=()
62+
SUBMISSION_ARGS=()
63+
64+
while (($#)); do
65+
case $1 in
66+
--hiveconf)
67+
ensure_arg_number $# 2
68+
THRIFT_SERVER_ARGS+=($1); shift
69+
THRIFT_SERVER_ARGS+=($1); shift
70+
;;
71+
72+
*)
73+
SUBMISSION_ARGS+=($1); shift
74+
;;
75+
esac
76+
done
77+
78+
eval exec "$FWDIR"/bin/spark-submit --class $CLASS ${SUBMISSION_ARGS[*]} spark-internal ${THRIFT_SERVER_ARGS[*]}

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ private[hive] object HiveThriftServer2 extends Logging {
4040
val optionsProcessor = new ServerOptionsProcessor("HiveThriftServer2")
4141

4242
if (!optionsProcessor.process(args)) {
43-
logWarning("Error starting HiveThriftServer2 with given arguments")
4443
System.exit(-1)
4544
}
4645

sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,23 @@ package org.apache.spark.sql.hive.thriftserver
2020

2121
import java.io.{BufferedReader, InputStreamReader, PrintWriter}
2222

23+
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
2324
import org.scalatest.{BeforeAndAfterAll, FunSuite}
2425

2526
class CliSuite extends FunSuite with BeforeAndAfterAll with TestUtils {
2627
val WAREHOUSE_PATH = TestUtils.getWarehousePath("cli")
2728
val METASTORE_PATH = TestUtils.getMetastorePath("cli")
2829

2930
override def beforeAll() {
30-
val pb = new ProcessBuilder(
31-
"../../bin/spark-sql",
32-
"--master",
33-
"local",
34-
"--hiveconf",
35-
s"javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=$METASTORE_PATH;create=true",
36-
"--hiveconf",
37-
"hive.metastore.warehouse.dir=" + WAREHOUSE_PATH)
38-
31+
val jdbcUrl = s"jdbc:derby:;databaseName=$METASTORE_PATH;create=true"
32+
val commands =
33+
s"""../../bin/spark-sql
34+
| --master local
35+
| --hiveconf ${ConfVars.METASTORECONNECTURLKEY}="$jdbcUrl"
36+
| --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$WAREHOUSE_PATH
37+
""".stripMargin.split("\\s+")
38+
39+
val pb = new ProcessBuilder(commands: _*)
3940
process = pb.start()
4041
outputWriter = new PrintWriter(process.getOutputStream, true)
4142
inputReader = new BufferedReader(new InputStreamReader(process.getInputStream))

sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import java.io.{BufferedReader, InputStreamReader}
2525
import java.net.ServerSocket
2626
import java.sql.{Connection, DriverManager, Statement}
2727

28+
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
2829
import org.scalatest.{BeforeAndAfterAll, FunSuite}
2930

3031
import org.apache.spark.Logging
@@ -63,16 +64,18 @@ class HiveThriftServer2Suite extends FunSuite with BeforeAndAfterAll with TestUt
6364
// Forking a new process to start the Hive Thrift server. The reason to do this is it is
6465
// hard to clean up Hive resources entirely, so we just start a new process and kill
6566
// that process for cleanup.
66-
val defaultArgs = Seq(
67-
"../../sbin/start-thriftserver.sh",
68-
"--master local",
69-
"--hiveconf",
70-
"hive.root.logger=INFO,console",
71-
"--hiveconf",
72-
s"javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=$METASTORE_PATH;create=true",
73-
"--hiveconf",
74-
s"hive.metastore.warehouse.dir=$WAREHOUSE_PATH")
75-
val pb = new ProcessBuilder(defaultArgs ++ args)
67+
val jdbcUrl = s"jdbc:derby:;databaseName=$METASTORE_PATH;create=true"
68+
val command =
69+
s"""../../sbin/start-thriftserver.sh
70+
| --master local
71+
| --hiveconf hive.root.logger=INFO,console
72+
| --hiveconf ${ConfVars.METASTORECONNECTURLKEY}="$jdbcUrl"
73+
| --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$METASTORE_PATH
74+
| --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=$HOST
75+
| --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_PORT}=$PORT
76+
""".stripMargin.split("\\s+")
77+
78+
val pb = new ProcessBuilder(command ++ args: _*)
7679
val environment = pb.environment()
7780
environment.put("HIVE_SERVER2_THRIFT_PORT", PORT.toString)
7881
environment.put("HIVE_SERVER2_THRIFT_BIND_HOST", HOST)

0 commit comments

Comments
 (0)