Skip to content

Commit 38c3c0e

Browse files
committed
[SPARK-12855][SQL] Remove parser dialect developer API
This pull request removes the public developer parser API for external parsers. Given everything a parser depends on (e.g. logical plans and expressions) are internal and not stable, external parsers will break with every release of Spark. It is a bad idea to create the illusion that Spark actually supports pluggable parsers. In addition, this also reduces incentives for 3rd party projects to contribute parse improvements back to Spark. Author: Reynold Xin <[email protected]> Closes #10801 from rxin/SPARK-12855.
1 parent b8cb548 commit 38c3c0e

File tree

13 files changed

+16
-139
lines changed

13 files changed

+16
-139
lines changed

project/MimaExcludes.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ object MimaExcludes {
5353
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.jdbc"),
5454
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.jsonFile"),
5555
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.jsonRDD"),
56-
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.load")
56+
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.load"),
57+
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.dialectClassName"),
58+
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.getSQLDialect")
5759
) ++ Seq(
5860
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.SparkContext.emptyRDD"),
5961
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.broadcast.HttpBroadcastFactory")

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import org.apache.spark.unsafe.types.CalendarInterval
3131
import org.apache.spark.util.random.RandomSampler
3232

3333
/**
34-
* This class translates a HQL String to a Catalyst [[LogicalPlan]] or [[Expression]].
34+
* This class translates SQL to Catalyst [[LogicalPlan]]s or [[Expression]]s.
3535
*/
3636
private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends ParserDialect {
3737
object Token {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,12 @@
1717

1818
package org.apache.spark.sql.catalyst
1919

20-
import org.apache.spark.annotation.DeveloperApi
2120
import org.apache.spark.sql.catalyst.expressions.Expression
2221
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
2322

2423
/**
25-
* Root class of SQL Parser Dialect, and we don't guarantee the binary
26-
* compatibility for the future release, let's keep it as the internal
27-
* interface for advanced user.
24+
* Interface for a parser.
2825
*/
29-
@DeveloperApi
3026
trait ParserDialect {
3127
/** Creates LogicalPlan for a given SQL string. */
3228
def parsePlan(sqlText: String): LogicalPlan

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ package object errors {
3838
}
3939
}
4040

41-
class DialectException(msg: String, cause: Throwable) extends Exception(msg, cause)
42-
4341
/**
4442
* Wraps any exceptions that are thrown while executing `f` in a
4543
* [[catalyst.errors.TreeNodeException TreeNodeException]], attaching the provided `tree`.

sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,6 @@ private[spark] object SQLConf {
278278
doc = "When true, common subexpressions will be eliminated.",
279279
isPublic = false)
280280

281-
val DIALECT = stringConf(
282-
"spark.sql.dialect",
283-
defaultValue = Some("sql"),
284-
doc = "The default SQL dialect to use.")
285-
286281
val CASE_SENSITIVE = booleanConf("spark.sql.caseSensitive",
287282
defaultValue = Some(true),
288283
doc = "Whether the query analyzer should be case sensitive or not.")
@@ -524,21 +519,6 @@ private[sql] class SQLConf extends Serializable with CatalystConf with ParserCon
524519
new java.util.HashMap[String, String]())
525520

526521
/** ************************ Spark SQL Params/Hints ******************* */
527-
// TODO: refactor so that these hints accessors don't pollute the name space of SQLContext?
528-
529-
/**
530-
* The SQL dialect that is used when parsing queries. This defaults to 'sql' which uses
531-
* a simple SQL parser provided by Spark SQL. This is currently the only option for users of
532-
* SQLContext.
533-
*
534-
* When using a HiveContext, this value defaults to 'hiveql', which uses the Hive 0.12.0 HiveQL
535-
* parser. Users can change this to 'sql' if they want to run queries that aren't supported by
536-
* HiveQL (e.g., SELECT 1).
537-
*
538-
* Note that the choice of dialect does not affect things like what tables are available or
539-
* how query execution is performed.
540-
*/
541-
private[spark] def dialect: String = getConf(DIALECT)
542522

543523
private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED)
544524

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import java.util.concurrent.atomic.AtomicReference
2424
import scala.collection.JavaConverters._
2525
import scala.collection.immutable
2626
import scala.reflect.runtime.universe.TypeTag
27-
import scala.util.control.NonFatal
2827

2928
import org.apache.spark.{SparkContext, SparkException}
3029
import org.apache.spark.annotation.{DeveloperApi, Experimental}
@@ -33,13 +32,11 @@ import org.apache.spark.rdd.RDD
3332
import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
3433
import org.apache.spark.sql.{execution => sparkexecution}
3534
import org.apache.spark.sql.SQLConf.SQLConfEntry
36-
import org.apache.spark.sql.catalyst.{InternalRow, ParserDialect, _}
35+
import org.apache.spark.sql.catalyst.{InternalRow, _}
3736
import org.apache.spark.sql.catalyst.analysis._
3837
import org.apache.spark.sql.catalyst.encoders.encoderFor
39-
import org.apache.spark.sql.catalyst.errors.DialectException
4038
import org.apache.spark.sql.catalyst.expressions._
4139
import org.apache.spark.sql.catalyst.optimizer.Optimizer
42-
import org.apache.spark.sql.catalyst.parser.ParserConf
4340
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
4441
import org.apache.spark.sql.catalyst.rules.RuleExecutor
4542
import org.apache.spark.sql.execution._
@@ -206,30 +203,10 @@ class SQLContext private[sql](
206203
protected[sql] lazy val optimizer: Optimizer = new SparkOptimizer(this)
207204

208205
@transient
209-
protected[sql] val ddlParser = new DDLParser(sqlParser)
206+
protected[sql] val sqlParser: ParserDialect = new SparkSQLParser(new SparkQl(conf))
210207

211208
@transient
212-
protected[sql] val sqlParser = new SparkSQLParser(getSQLDialect())
213-
214-
protected[sql] def getSQLDialect(): ParserDialect = {
215-
try {
216-
val clazz = Utils.classForName(dialectClassName)
217-
clazz.getConstructor(classOf[ParserConf])
218-
.newInstance(conf)
219-
.asInstanceOf[ParserDialect]
220-
} catch {
221-
case NonFatal(e) =>
222-
// Since we didn't find the available SQL Dialect, it will fail even for SET command:
223-
// SET spark.sql.dialect=sql; Let's reset as default dialect automatically.
224-
val dialect = conf.dialect
225-
// reset the sql dialect
226-
conf.unsetConf(SQLConf.DIALECT)
227-
// throw out the exception, and the default sql dialect will take effect for next query.
228-
throw new DialectException(
229-
s"""Instantiating dialect '$dialect' failed.
230-
|Reverting to default dialect '${conf.dialect}'""".stripMargin, e)
231-
}
232-
}
209+
protected[sql] val ddlParser: DDLParser = new DDLParser(sqlParser)
233210

234211
protected[sql] def parseSql(sql: String): LogicalPlan = ddlParser.parse(sql, false)
235212

@@ -239,12 +216,6 @@ class SQLContext private[sql](
239216
protected[sql] def executePlan(plan: LogicalPlan) =
240217
new sparkexecution.QueryExecution(this, plan)
241218

242-
protected[sql] def dialectClassName = if (conf.dialect == "sql") {
243-
classOf[SparkQl].getCanonicalName
244-
} else {
245-
conf.dialect
246-
}
247-
248219
/**
249220
* Add a jar to SQLContext
250221
*/

sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,13 +201,7 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm
201201
case Some((key, None)) =>
202202
val runFunc = (sqlContext: SQLContext) => {
203203
val value =
204-
try {
205-
if (key == SQLConf.DIALECT.key) {
206-
sqlContext.conf.dialect
207-
} else {
208-
sqlContext.getConf(key)
209-
}
210-
} catch {
204+
try sqlContext.getConf(key) catch {
211205
case _: NoSuchElementException => "<undefined>"
212206
}
213207
Seq(Row(key, value))

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ class DDLParser(fallback: => ParserDialect)
3737

3838
override def parseExpression(sql: String): Expression = fallback.parseExpression(sql)
3939

40-
override def parseTableIdentifier(sql: String): TableIdentifier =
41-
40+
override def parseTableIdentifier(sql: String): TableIdentifier = {
4241
fallback.parseTableIdentifier(sql)
42+
}
43+
4344
def parse(input: String, exceptionOnError: Boolean): LogicalPlan = {
4445
try {
4546
parsePlan(input)

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,7 @@ object functions extends LegacyFunctions {
10641064
* @group normal_funcs
10651065
*/
10661066
def expr(expr: String): Column = {
1067-
val parser = SQLContext.getActive().map(_.getSQLDialect()).getOrElse(new CatalystQl())
1067+
val parser = SQLContext.getActive().map(_.sqlParser).getOrElse(new CatalystQl())
10681068
Column(parser.parseExpression(expr))
10691069
}
10701070

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import java.sql.Timestamp
2323
import org.apache.spark.AccumulatorSuite
2424
import org.apache.spark.sql.catalyst.CatalystQl
2525
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
26-
import org.apache.spark.sql.catalyst.errors.DialectException
2726
import org.apache.spark.sql.catalyst.parser.ParserConf
2827
import org.apache.spark.sql.execution.{aggregate, SparkQl}
2928
import org.apache.spark.sql.execution.joins.{CartesianProduct, SortMergeJoin}
@@ -32,8 +31,6 @@ import org.apache.spark.sql.test.{SharedSQLContext, TestSQLContext}
3231
import org.apache.spark.sql.test.SQLTestData._
3332
import org.apache.spark.sql.types._
3433

35-
/** A SQL Dialect for testing purpose, and it can not be nested type */
36-
class MyDialect(conf: ParserConf) extends CatalystQl(conf)
3734

3835
class SQLQuerySuite extends QueryTest with SharedSQLContext {
3936
import testImplicits._
@@ -148,23 +145,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
148145
.count(), Row(24, 1) :: Row(14, 1) :: Nil)
149146
}
150147

151-
test("SQL Dialect Switching to a new SQL parser") {
152-
val newContext = new SQLContext(sparkContext)
153-
newContext.setConf("spark.sql.dialect", classOf[MyDialect].getCanonicalName())
154-
assert(newContext.getSQLDialect().getClass === classOf[MyDialect])
155-
assert(newContext.sql("SELECT 1").collect() === Array(Row(1)))
156-
}
157-
158-
test("SQL Dialect Switch to an invalid parser with alias") {
159-
val newContext = new SQLContext(sparkContext)
160-
newContext.sql("SET spark.sql.dialect=MyTestClass")
161-
intercept[DialectException] {
162-
newContext.sql("SELECT 1")
163-
}
164-
// test if the dialect set back to DefaultSQLDialect
165-
assert(newContext.getSQLDialect().getClass === classOf[SparkQl])
166-
}
167-
168148
test("SPARK-4625 support SORT BY in SimpleSQLParser & DSL") {
169149
checkAnswer(
170150
sql("SELECT a FROM testData2 SORT BY a"),

0 commit comments

Comments
 (0)