Skip to content

Commit 7e8f010

Browse files
committed
better error messages and jar handling
1 parent e7b3941 commit 7e8f010

File tree

3 files changed

+37
-7
lines changed

3 files changed

+37
-7
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ private[hive] class HiveQLDialect extends Dialect {
6565
class HiveContext(sc: SparkContext) extends SQLContext(sc) {
6666
self =>
6767

68+
import HiveContext._
69+
6870
/**
6971
* When true, enables an experimental feature where metastore tables that use the parquet SerDe
7072
* are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
@@ -103,18 +105,18 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
103105
* Spark SQL for execution.
104106
*/
105107
protected[hive] def hiveMetastoreVersion: String =
106-
getConf("spark.sql.hive.metastore.version", "0.13.1")
108+
getConf(HIVE_METASTORE_VERSION, "0.13.1")
107109

108110
/**
109111
* The location of the jars that should be used to instantiate the HiveMetastoreClient. This
110112
* property can be one of three option:
111-
* - a comma-separated list of jar files that could be passed to a URLClassLoader
113+
* - a colon-separated list of jar files or directories for hive and hadoop.
112114
* - builtin - attempt to discover the jars that were used to load Spark SQL and use those. This
113115
* option is only valid when using the execution version of Hive.
114116
* - maven - download the correct version of hive on demand from maven.
115117
*/
116118
protected[hive] def hiveMetastoreJars: String =
117-
getConf("spark.sql.hive.metastore.jars", "builtin")
119+
getConf(HIVE_METASTORE_JARS, "builtin")
118120

119121
@transient
120122
protected[sql] lazy val substitutor = new VariableSubstitution()
@@ -173,8 +175,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
173175
throw new IllegalArgumentException(
174176
"Builtin jars can only be used when hive execution version == hive metastore version. " +
175177
s"Execution: ${hiveExecutionVersion} != Metastore: ${hiveMetastoreVersion}. " +
176-
"Specify a vaild path to the correct hive jars using spark.sql.hive.metastore.jars " +
177-
s"or change spark.sql.hive.metastore.version to ${hiveExecutionVersion}.")
178+
"Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " +
179+
s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
178180
}
179181
val jars = getClass.getClassLoader match {
180182
case urlClassLoader: java.net.URLClassLoader => urlClassLoader.getURLs
@@ -198,7 +200,17 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
198200
s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using maven.")
199201
IsolatedClientLoader.forVersion(hiveMetastoreVersion, allConfig )
200202
} else {
201-
val jars = hiveMetastoreJars.split(",").map(new java.net.URL(_))
203+
// Convert to files and expand any directories.
204+
val jars =
205+
hiveMetastoreJars
206+
.split(":")
207+
.map(new java.io.File(_))
208+
.flatMap {
209+
case f if f.isDirectory => f.listFiles()
210+
case f => f :: Nil
211+
}
212+
.map(_.toURI.toURL)
213+
202214
logInfo(
203215
s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using $jars")
204216
new IsolatedClientLoader(
@@ -460,7 +472,10 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
460472
}
461473

462474

463-
private object HiveContext {
475+
private[hive] object HiveContext {
476+
val HIVE_METASTORE_VERSION: String = "spark.sql.hive.metastore.version"
477+
val HIVE_METASTORE_JARS: String = "spark.sql.hive.metastore.jars"
478+
464479
protected val primitiveTypes =
465480
Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
466481
ShortType, DateType, TimestampType, BinaryType)

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import org.apache.spark.Logging
3030
import org.apache.spark.deploy.SparkSubmitUtils
3131

3232
import org.apache.spark.sql.catalyst.util.quietly
33+
import org.apache.spark.sql.hive.HiveContext
3334

3435
/** Factory for `IsolatedClientLoader` with specific versions of hive. */
3536
object IsolatedClientLoader {
@@ -166,6 +167,12 @@ class IsolatedClientLoader(
166167
.getConstructors.head
167168
.newInstance(version, config)
168169
.asInstanceOf[ClientInterface]
170+
} catch {
171+
case ReflectionException(cnf: NoClassDefFoundError) =>
172+
throw new ClassNotFoundException(
173+
s"$cnf when creating Hive client using classpath: ${execJars.mkString(", ")}\n" +
174+
"Please make sure that jars for your version of hive and hadoop are included in the " +
175+
s"paths passed to ${HiveContext.HIVE_METASTORE_JARS}.")
169176
} finally {
170177
Thread.currentThread.setContextClassLoader(baseClassLoader)
171178
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ package org.apache.spark.sql.hive.client
1919

2020
import scala.reflect._
2121

22+
/** Unwraps reflection exceptions. */
23+
private[client] object ReflectionException {
24+
def unapply(a: Throwable): Option[Throwable] = a match {
25+
case ite: java.lang.reflect.InvocationTargetException => Option(ite.getCause)
26+
case _ => None
27+
}
28+
}
29+
2230
/**
2331
* Provides implicit functions on any object for calling methods reflectively.
2432
*/

0 commit comments

Comments
 (0)